mb_utils 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mb_utils-2.0.3/PKG-INFO +9 -0
- mb_utils-2.0.3/README.md +165 -0
- mb_utils-2.0.3/mb/utils/__init__.py +0 -0
- mb_utils-2.0.3/mb/utils/deprecated.py +85 -0
- mb_utils-2.0.3/mb/utils/extra.py +46 -0
- mb_utils-2.0.3/mb/utils/logging.py +108 -0
- mb_utils-2.0.3/mb/utils/path_checker.py +35 -0
- mb_utils-2.0.3/mb/utils/pip_update.py +19 -0
- mb_utils-2.0.3/mb/utils/profiler.py +63 -0
- mb_utils-2.0.3/mb/utils/retry_decorator.py +41 -0
- mb_utils-2.0.3/mb/utils/s3.py +138 -0
- mb_utils-2.0.3/mb/utils/terminal.py +24 -0
- mb_utils-2.0.3/mb/utils/verify_image.py +54 -0
- mb_utils-2.0.3/mb/utils/version.py +5 -0
- mb_utils-2.0.3/mb_utils.egg-info/PKG-INFO +9 -0
- mb_utils-2.0.3/mb_utils.egg-info/SOURCES.txt +19 -0
- mb_utils-2.0.3/mb_utils.egg-info/dependency_links.txt +1 -0
- mb_utils-2.0.3/mb_utils.egg-info/top_level.txt +1 -0
- mb_utils-2.0.3/pyproject.toml +3 -0
- mb_utils-2.0.3/setup.cfg +4 -0
- mb_utils-2.0.3/setup.py +27 -0
mb_utils-2.0.3/PKG-INFO
ADDED
mb_utils-2.0.3/README.md
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# MB Utils
|
|
2
|
+
|
|
3
|
+
[](https://www.python.org/)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
[](https://github.com/bigmb/mb_utils/graphs/commit-activity)
|
|
6
|
+
[](https://pepy.tech/project/mb_utils)
|
|
7
|
+
|
|
8
|
+
A collection of utility functions and tools to simplify common Python development tasks. Part of the `mb` namespace — install as `mb_utils`, import as `mb.utils`.
|
|
9
|
+
|
|
10
|
+
## Features
|
|
11
|
+
|
|
12
|
+
- **Logging**: Easy-to-use logging with colored console output, rotating file handlers, and a safe `LoggerWrapper` (`logg`) that skips logging when no logger is provided
|
|
13
|
+
- **File Operations**: Concurrent path checking and validation
|
|
14
|
+
- **Decorators**: Deprecation warnings and retry logic
|
|
15
|
+
- **Image Verification**: Validate image files (path, type, shape) with multithreading
|
|
16
|
+
- **S3 Integration**: Simplified AWS S3 file and directory operations
|
|
17
|
+
- **Profiling**: Function profiling with SnakeViz, line-by-line profiling
|
|
18
|
+
- **Utilities**: Timer decorator, batch creation
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install mb_utils
|
|
24
|
+
# or
|
|
25
|
+
uv pip install mb_utils
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
This installs under the `mb` namespace. Import everything via `mb.utils.*`.
|
|
29
|
+
|
|
30
|
+
## Usage
|
|
31
|
+
|
|
32
|
+
### Logging
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from mb.utils.logging import make_logger, logg
|
|
36
|
+
|
|
37
|
+
# Create a logger with colored console + rotating file output
|
|
38
|
+
logger = make_logger('myapp')
|
|
39
|
+
logger.info("Direct logger usage")
|
|
40
|
+
|
|
41
|
+
# Safe logging wrapper — no need for `if logger:` checks
|
|
42
|
+
logg.info("This message logs", logger) # logs normally
|
|
43
|
+
logg.info("This is silenced", None) # does nothing
|
|
44
|
+
|
|
45
|
+
# Set a default logger so you don't have to pass it every time
|
|
46
|
+
logg.set_default(logger)
|
|
47
|
+
logg.info("Uses default logger") # logs via default
|
|
48
|
+
logg.warning("Also works")
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Path Checking
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from mb.utils.path_checker import check_path
|
|
55
|
+
|
|
56
|
+
# Check a list of paths concurrently (returns list of bools)
|
|
57
|
+
results = check_path(['/path/to/file1', '/path/to/file2'], max_threads=16)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Retry Decorator
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from mb.utils.retry_decorator import retry
|
|
64
|
+
|
|
65
|
+
@retry(times=3, exceptions=(ValueError, TypeError))
|
|
66
|
+
def might_fail():
|
|
67
|
+
pass
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Deprecation Decorator
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from mb.utils.deprecated import deprecated_func
|
|
74
|
+
|
|
75
|
+
@deprecated_func(deprecated_version="1.0", suggested_func="new_func", removed_version="3.0")
|
|
76
|
+
def old_function():
|
|
77
|
+
pass
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### S3 Operations
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from mb.utils.s3 import upload_file, download_file, upload_dir, download_dir, list_objects
|
|
84
|
+
|
|
85
|
+
# Upload / download a single file
|
|
86
|
+
upload_file('bucket-name', 'remote_key.txt', 'local_file.txt')
|
|
87
|
+
download_file('bucket-name', 'remote_key.txt', 'local_file.txt')
|
|
88
|
+
|
|
89
|
+
# Upload / download entire directories
|
|
90
|
+
upload_dir('bucket-name', 's3/prefix', '/local/dir')
|
|
91
|
+
download_dir('bucket-name', 's3/prefix', '/local/dir')
|
|
92
|
+
|
|
93
|
+
# List objects
|
|
94
|
+
list_objects('bucket-name')
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Timer & Batch Utilities
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from mb.utils.extra import timer, batch_generator, batch_create
|
|
101
|
+
|
|
102
|
+
@timer
|
|
103
|
+
def slow_function():
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
# Generator-based batching
|
|
107
|
+
for batch in batch_generator(range(100), batch_size=10):
|
|
108
|
+
process(batch)
|
|
109
|
+
|
|
110
|
+
# List-based batching
|
|
111
|
+
batches = batch_create(my_list, n=10)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Image Verification
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from mb.utils.verify_image import verify_image
|
|
118
|
+
|
|
119
|
+
results = verify_image(
|
|
120
|
+
image_paths=['/path/img1.jpg', '/path/img2.png'],
|
|
121
|
+
image_type='JPEG', # optional: check format
|
|
122
|
+
image_shape=(1920, 1080), # optional: check dimensions (width, height)
|
|
123
|
+
max_workers=16
|
|
124
|
+
)
|
|
125
|
+
# Returns list: True, False, 'image_type_mismatch', 'image_shape_mismatch', 'unknown_image_format'
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Profiling
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from mb.utils.profiler import run_with_snakeviz, line_profile
|
|
132
|
+
|
|
133
|
+
# Profile and visualize with SnakeViz
|
|
134
|
+
@run_with_snakeviz
|
|
135
|
+
def process_data(data):
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
# Save profile without opening SnakeViz
|
|
139
|
+
run_with_snakeviz(my_func, arg1, arg2, save_only=True, file_path="output.prof")
|
|
140
|
+
|
|
141
|
+
# Line-by-line profiling
|
|
142
|
+
@line_profile
|
|
143
|
+
def process_item(item):
|
|
144
|
+
result = item * 2
|
|
145
|
+
return result
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Available Modules
|
|
149
|
+
|
|
150
|
+
| Module | Description | Import Path |
|
|
151
|
+
|--------|-------------|-------------|
|
|
152
|
+
| logging | Logger with colored output, file rotation, safe wrapper | `from mb.utils.logging import make_logger, logg` |
|
|
153
|
+
| path_checker | Concurrent path validation | `from mb.utils.path_checker import check_path` |
|
|
154
|
+
| deprecated | Function deprecation decorator | `from mb.utils.deprecated import deprecated_func` |
|
|
155
|
+
| verify_image | Image verification (path, type, shape) | `from mb.utils.verify_image import verify_image` |
|
|
156
|
+
| retry_decorator | Retry mechanism for functions | `from mb.utils.retry_decorator import retry` |
|
|
157
|
+
| s3 | AWS S3 upload/download/list operations | `from mb.utils.s3 import *` |
|
|
158
|
+
| extra | Timer decorator, batch utilities | `from mb.utils.extra import *` |
|
|
159
|
+
| profiler | SnakeViz and line profiling | `from mb.utils.profiler import *` |
|
|
160
|
+
| terminal | Terminal size utilities | `from mb.utils.terminal import stty_size` |
|
|
161
|
+
| version | Package version info | `from mb.utils.version import version` |
|
|
162
|
+
|
|
163
|
+
## Included Scripts
|
|
164
|
+
|
|
165
|
+
- `verify_images_script`: Utility script for batch image verification
|
|
File without changes
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
'''Utilities to deprecate a function or a module.
|
|
2
|
+
MTbase function.
|
|
3
|
+
'''
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from functools import wraps
|
|
7
|
+
import traceback as _tb
|
|
8
|
+
from .logging import logg
|
|
9
|
+
|
|
10
|
+
__all__ = ['deprecated_func']
|
|
11
|
+
|
|
12
|
+
def extract_stack_compact():
|
|
13
|
+
'''Returns the current callstack in a compact format.'''
|
|
14
|
+
lines = _tb.format_list(_tb.extract_stack())
|
|
15
|
+
lines = "".join(lines).split('\n')
|
|
16
|
+
lines = [line for line in lines if 'frozen importlib' not in line]
|
|
17
|
+
return lines
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def deprecated_func(deprecated_version, suggested_func=None, removed_version=None, docstring_prefix="",logger=None):
|
|
21
|
+
'''A decorator to warn the user that the function has been deprecated and will be removed in future.
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
deprecated_version : str
|
|
25
|
+
the version since which the function has been deprecated
|
|
26
|
+
suggested_func : str or list of strings, optional
|
|
27
|
+
the function to be used in replacement of the deprecated function
|
|
28
|
+
removed_version : str, optional
|
|
29
|
+
the future version from which the function will be removed
|
|
30
|
+
docstring_prefix : str
|
|
31
|
+
prefix string to be inserted at the beginning of every new line in the docstring
|
|
32
|
+
'''
|
|
33
|
+
|
|
34
|
+
def deprecated_decorator(func):
|
|
35
|
+
@wraps(func)
|
|
36
|
+
def func_wrapper(*args, **kwargs):
|
|
37
|
+
if not deprecated_func_warned[func]:
|
|
38
|
+
lines = extract_stack_compact()
|
|
39
|
+
if len(lines) > 7:
|
|
40
|
+
logg.warn("IMPORT: Deprecated function '{}' invoked at:".format(func.__name__), logger)
|
|
41
|
+
for x in lines[-7:-5]:
|
|
42
|
+
logg.warn(x, logger)
|
|
43
|
+
logg.warn(" It has been deprecated since version {}.".format(deprecated_version), logger)
|
|
44
|
+
else:
|
|
45
|
+
logg.warn("IMPORT: Function {} has been deprecated since version {}.".format(func.__name__, deprecated_version), logger)
|
|
46
|
+
if removed_version:
|
|
47
|
+
logg.warn(" It will be removed in version {}.".format(removed_version), logger)
|
|
48
|
+
if suggested_func:
|
|
49
|
+
if isinstance(suggested_func, str):
|
|
50
|
+
logg.warn(" Use function '{}' instead.".format(suggested_func), logger)
|
|
51
|
+
else:
|
|
52
|
+
logg.warn(" Use a function in {} instead.".format(suggested_func), logger)
|
|
53
|
+
deprecated_func_warned[func] = True
|
|
54
|
+
return func(*args, **kwargs)
|
|
55
|
+
|
|
56
|
+
deprecated_func_warned[func] = False # register the function
|
|
57
|
+
|
|
58
|
+
the_doc = func_wrapper.__doc__
|
|
59
|
+
|
|
60
|
+
msg = "{}.. deprecated:: {}\n".format(docstring_prefix, deprecated_version)
|
|
61
|
+
if not the_doc or len(the_doc) == 0:
|
|
62
|
+
the_doc = msg
|
|
63
|
+
else:
|
|
64
|
+
if the_doc[-1] != '\n':
|
|
65
|
+
the_doc += '\n'
|
|
66
|
+
the_doc += '\n'+msg
|
|
67
|
+
|
|
68
|
+
if removed_version:
|
|
69
|
+
the_doc += "{} It will be removed in version {}.\n".format(docstring_prefix, removed_version)
|
|
70
|
+
|
|
71
|
+
if suggested_func:
|
|
72
|
+
if isinstance(suggested_func, str):
|
|
73
|
+
msg = ':func:`{}`'.format(suggested_func)
|
|
74
|
+
else:
|
|
75
|
+
msg = ' or '.join([':func:`{}`'.format(x) for x in suggested_func])
|
|
76
|
+
the_doc += "{} Use {} instead.\n".format(docstring_prefix, msg)
|
|
77
|
+
|
|
78
|
+
func_wrapper.__doc__ = the_doc
|
|
79
|
+
return func_wrapper
|
|
80
|
+
|
|
81
|
+
return deprecated_decorator
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# map: deprecated function -> warned
|
|
85
|
+
deprecated_func_warned = {}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
##Extra functions - batch creation, timer wrapper, etc.
|
|
2
|
+
import time
|
|
3
|
+
from .logging import logg
|
|
4
|
+
|
|
5
|
+
__all__ = ['timer', 'batch_generator', 'batch_create']
|
|
6
|
+
|
|
7
|
+
def timer(func,logger=None):
|
|
8
|
+
"""
|
|
9
|
+
Decorator to time a function
|
|
10
|
+
Input:
|
|
11
|
+
func: function to be timed
|
|
12
|
+
"""
|
|
13
|
+
def wrapper(*args,**kwargs):
|
|
14
|
+
before = time.time()
|
|
15
|
+
a = func(*args,**kwargs)
|
|
16
|
+
logg.info('function time : {} seconds'.format(time.time() - before), logger)
|
|
17
|
+
return a
|
|
18
|
+
return wrapper
|
|
19
|
+
|
|
20
|
+
def batch_generator(iterable, batch_size):
|
|
21
|
+
"""
|
|
22
|
+
Generator to create batches of a given size from an iterable
|
|
23
|
+
Input:
|
|
24
|
+
iterable: iterable to be batched
|
|
25
|
+
batch_size: size of the batches
|
|
26
|
+
Output:
|
|
27
|
+
batch: batch of the given size
|
|
28
|
+
"""
|
|
29
|
+
l = len(iterable)
|
|
30
|
+
for ndx in range(0, l, batch_size):
|
|
31
|
+
yield iterable[ndx:min(ndx + batch_size, l)]
|
|
32
|
+
|
|
33
|
+
def batch_create(l, n,logger=None):
|
|
34
|
+
"""
|
|
35
|
+
Create batches in a list of a size from a given list
|
|
36
|
+
Input:
|
|
37
|
+
l: list to be batched
|
|
38
|
+
n: size of the batches
|
|
39
|
+
Output:
|
|
40
|
+
batch(list): batch of the given size
|
|
41
|
+
"""
|
|
42
|
+
batch_create_list=[]
|
|
43
|
+
for i in range(0, len(l), n):
|
|
44
|
+
batch_create_list.append(l[i:i+n])
|
|
45
|
+
logg.info("batches created : {}".format(len(batch_create_list)), logger)
|
|
46
|
+
return batch_create_list
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from logging import *
|
|
2
|
+
import os
|
|
3
|
+
import logging.handlers
|
|
4
|
+
from colorama import Fore, Style
|
|
5
|
+
from colorama import init as _colorama_init
|
|
6
|
+
from .terminal import stty_size
|
|
7
|
+
_colorama_init()
|
|
8
|
+
|
|
9
|
+
__all__ = ['make_logger','logger', 'logg']
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LoggerWrapper:
|
|
13
|
+
"""
|
|
14
|
+
A wrapper that allows calling log methods without checking if logger is None.
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
from mb_utils.logging import logg
|
|
18
|
+
|
|
19
|
+
# With explicit logger:
|
|
20
|
+
logg.info('hello', logger) # If logger is None, does nothing. If logger exists, logs.
|
|
21
|
+
|
|
22
|
+
# With default logger (set via set_default):
|
|
23
|
+
logg.set_default(make_logger('myapp'))
|
|
24
|
+
logg.info('hello') # Uses default logger
|
|
25
|
+
|
|
26
|
+
# Explicit None overrides default (does nothing):
|
|
27
|
+
logg.info('hello', None)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
self._default_logger = None
|
|
32
|
+
|
|
33
|
+
def set_default(self, logger):
|
|
34
|
+
"""Set a default logger to use when none is provided."""
|
|
35
|
+
self._default_logger = logger
|
|
36
|
+
|
|
37
|
+
def _log(self, level, msg, logger=..., *args, **kwargs):
|
|
38
|
+
if logger is ...:
|
|
39
|
+
logger = self._default_logger
|
|
40
|
+
if logger is not None:
|
|
41
|
+
getattr(logger, level)(msg, *args, **kwargs)
|
|
42
|
+
|
|
43
|
+
def debug(self, msg, logger=..., *args, **kwargs):
|
|
44
|
+
self._log('debug', msg, logger, *args, **kwargs)
|
|
45
|
+
|
|
46
|
+
def info(self, msg, logger=..., *args, **kwargs):
|
|
47
|
+
self._log('info', msg, logger, *args, **kwargs)
|
|
48
|
+
|
|
49
|
+
def warning(self, msg, logger=..., *args, **kwargs):
|
|
50
|
+
self._log('warning', msg, logger, *args, **kwargs)
|
|
51
|
+
|
|
52
|
+
def error(self, msg, logger=..., *args, **kwargs):
|
|
53
|
+
self._log('error', msg, logger, *args, **kwargs)
|
|
54
|
+
|
|
55
|
+
def critical(self, msg, logger=..., *args, **kwargs):
|
|
56
|
+
self._log('critical', msg, logger, *args, **kwargs)
|
|
57
|
+
|
|
58
|
+
def exception(self, msg, logger=..., *args, **kwargs):
|
|
59
|
+
self._log('exception', msg, logger, *args, **kwargs)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
logg = LoggerWrapper()
|
|
63
|
+
|
|
64
|
+
def make_logger(name):
|
|
65
|
+
"""
|
|
66
|
+
logger package for user
|
|
67
|
+
Input:
|
|
68
|
+
name: name of the logger
|
|
69
|
+
Output:
|
|
70
|
+
logger object
|
|
71
|
+
"""
|
|
72
|
+
logger = getLogger(name)
|
|
73
|
+
if logger is None:
|
|
74
|
+
logger.addHandler(NullHandler())
|
|
75
|
+
logger.setLevel(1) #getting all logs
|
|
76
|
+
#basicConfig(filename='logger.log',filemode='w',level=INFO)
|
|
77
|
+
|
|
78
|
+
# determine some max string lengths
|
|
79
|
+
column_length = stty_size()[1]-13
|
|
80
|
+
log_lvl_length = min(max(int(column_length*0.03), 1), 8)
|
|
81
|
+
s1 = '{}.{}s '.format(log_lvl_length, log_lvl_length)
|
|
82
|
+
column_length -= log_lvl_length
|
|
83
|
+
s5 = '-{}.{}s'.format(column_length, column_length)
|
|
84
|
+
|
|
85
|
+
os.mkdir('logs') if not os.path.exists('logs') else None
|
|
86
|
+
should_roll_over = os.path.isfile('logs/logger.log')
|
|
87
|
+
file_handler = logging.handlers.RotatingFileHandler('logs/logger.log',mode='w' ,maxBytes=1000000, backupCount=3)
|
|
88
|
+
if should_roll_over: # log already exists, roll over!
|
|
89
|
+
file_handler.doRollover()
|
|
90
|
+
file_handler.setLevel(DEBUG)
|
|
91
|
+
file_formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
92
|
+
file_handler.setFormatter(file_formatter)
|
|
93
|
+
|
|
94
|
+
std_handler = StreamHandler()
|
|
95
|
+
std_handler.setLevel(DEBUG)
|
|
96
|
+
fmt_str = Fore.CYAN+'%(asctime)s '+Fore.LIGHTGREEN_EX+'%(levelname)'+s1+\
|
|
97
|
+
Fore.LIGHTWHITE_EX+'%(message)'+s5+Fore.RESET
|
|
98
|
+
formatter = Formatter(fmt_str)
|
|
99
|
+
formatter.default_time_format = "%a %H:%M:%S"
|
|
100
|
+
std_handler.setFormatter(formatter)
|
|
101
|
+
|
|
102
|
+
logger.addHandler(file_handler)
|
|
103
|
+
logger.addHandler(std_handler)
|
|
104
|
+
|
|
105
|
+
return logger
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
logger = make_logger('mb_utils')
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
##Function to check paths
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
6
|
+
from tqdm.auto import tqdm
|
|
7
|
+
from .logging import logg
|
|
8
|
+
|
|
9
|
+
__all__ = ['check_path']
|
|
10
|
+
|
|
11
|
+
def check_path(path,logger=None,path_column=None,max_threads=16) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Function to check the path.
|
|
14
|
+
Tqdm progress bar is used to show the progress and it updates every 1 second.
|
|
15
|
+
Input:
|
|
16
|
+
path (list): path to be checked (list or pandas.DataFrame).
|
|
17
|
+
path_column: column name if path is pandas.DataFrame (str, optional)
|
|
18
|
+
logger: logger object (optional)
|
|
19
|
+
max_threads: maximum number of threads to use (int, optional, default=16)
|
|
20
|
+
Output:
|
|
21
|
+
results: list of bools indicating if each path exists.
|
|
22
|
+
"""
|
|
23
|
+
if type(path) != list:
|
|
24
|
+
if type(path) != pd.DataFrame or type(path) != pd.core.series.Series:
|
|
25
|
+
raise TypeError("Input should be a list or pandas.DataFrame or pandas.Series")
|
|
26
|
+
path = path[path_column].tolist()
|
|
27
|
+
|
|
28
|
+
def check_single_path(p):
|
|
29
|
+
return os.path.exists(p)
|
|
30
|
+
|
|
31
|
+
with ThreadPoolExecutor(max_workers=max_threads) as executor:
|
|
32
|
+
results = list(tqdm(executor.map(check_single_path, path), total=len(path), mininterval=1))
|
|
33
|
+
|
|
34
|
+
logg.info('Path not found: {}'.format(results.count(False)), logger)
|
|
35
|
+
return results
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from .logging import logg
|
|
4
|
+
|
|
5
|
+
__all__ = ['update_package']
|
|
6
|
+
|
|
7
|
+
def update_package(package_name, logger: Optional[str]=None) -> None:
|
|
8
|
+
# Check if package is up-to-date
|
|
9
|
+
result = subprocess.run(['pip', 'check', package_name], capture_output=True, text=True)
|
|
10
|
+
if 'up-to-date' in result.stdout:
|
|
11
|
+
logg.info(f"{package_name} is already up-to-date.",logger)
|
|
12
|
+
return
|
|
13
|
+
|
|
14
|
+
# Install latest version of package
|
|
15
|
+
result = subprocess.run(['pip', 'install', '--upgrade', package_name], capture_output=True, text=True)
|
|
16
|
+
if result.returncode == 0:
|
|
17
|
+
logg.info(f"{package_name} has been updated.",logger)
|
|
18
|
+
else:
|
|
19
|
+
logg.info(f"Error updating {package_name}: {result.stderr}")
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import cProfile
|
|
2
|
+
import subprocess
|
|
3
|
+
import os
|
|
4
|
+
import functools
|
|
5
|
+
import sys
|
|
6
|
+
from .logging import logg
|
|
7
|
+
|
|
8
|
+
__all__ = ['run_with_snakeviz','line_profile']
|
|
9
|
+
|
|
10
|
+
def run_with_snakeviz(func, *args, save_only=False,file_path=None,logger=None, **kwargs):
|
|
11
|
+
"""
|
|
12
|
+
Profiles a function, saves to 'profiler.prof' in the current directory,
|
|
13
|
+
and optionally opens SnakeViz.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
func: The function to profile.
|
|
17
|
+
*args, **kwargs: Arguments to pass to the function.
|
|
18
|
+
save_only (bool): If True, only saves the file and does not launch SnakeViz.
|
|
19
|
+
file_path (str): Path to save the profile file. If None, saves to 'profiler.prof' in the current directory.
|
|
20
|
+
|
|
21
|
+
Examples:
|
|
22
|
+
@run_with_snakeviz
|
|
23
|
+
def my_function():
|
|
24
|
+
pass
|
|
25
|
+
"""
|
|
26
|
+
if file_path is None:
|
|
27
|
+
file_path = os.path.join(os.getcwd(), "profiler.prof")
|
|
28
|
+
|
|
29
|
+
profiler = cProfile.Profile()
|
|
30
|
+
profiler.enable()
|
|
31
|
+
try:
|
|
32
|
+
result = func(*args, **kwargs)
|
|
33
|
+
finally:
|
|
34
|
+
profiler.disable()
|
|
35
|
+
profiler.dump_stats(file_path)
|
|
36
|
+
logg.info(f"[Profiler] Saved to {file_path}", logger)
|
|
37
|
+
|
|
38
|
+
if not save_only:
|
|
39
|
+
logg.info("[Profiler] Launching SnakeViz", logger)
|
|
40
|
+
subprocess.run(["snakeviz", file_path])
|
|
41
|
+
|
|
42
|
+
return result
|
|
43
|
+
|
|
44
|
+
def line_profile(func, logger=None):
|
|
45
|
+
"""
|
|
46
|
+
A decorator that profiles the function line-by-line using line_profiler.
|
|
47
|
+
Compatible with IPython/Jupyter.
|
|
48
|
+
|
|
49
|
+
Examples:
|
|
50
|
+
@line_profile
|
|
51
|
+
def my_function():
|
|
52
|
+
pass
|
|
53
|
+
"""
|
|
54
|
+
from line_profiler import LineProfiler
|
|
55
|
+
@functools.wraps(func)
|
|
56
|
+
def wrapper(*args, **kwargs):
|
|
57
|
+
profiler = LineProfiler()
|
|
58
|
+
profiler.add_function(func)
|
|
59
|
+
result = profiler(func)(*args, **kwargs)
|
|
60
|
+
profiler.print_stats(stream=sys.stdout)
|
|
61
|
+
logg.info("[Profiler] Line profile complete", logger)
|
|
62
|
+
return result
|
|
63
|
+
return wrapper
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
##function to retry a function if it fails
|
|
2
|
+
# Path: mb_utils/src/retry_decorator.py
|
|
3
|
+
|
|
4
|
+
from .logging import logg
|
|
5
|
+
|
|
6
|
+
__all__ = ['retry']
|
|
7
|
+
|
|
8
|
+
def retry(times, exceptions,logger=None):
|
|
9
|
+
"""
|
|
10
|
+
Retry Decorator
|
|
11
|
+
Retries the wrapped function/method `times` times if the exceptions listed
|
|
12
|
+
in ``exceptions`` are thrown
|
|
13
|
+
Input:
|
|
14
|
+
times (int): The number of times to repeat the wrapped function/method
|
|
15
|
+
exceptions (tuple of exceptiosn): The exceptions to catch
|
|
16
|
+
Output:
|
|
17
|
+
The wrapped function/method
|
|
18
|
+
"""
|
|
19
|
+
def decorator(func):
|
|
20
|
+
def newfn(*args, **kwargs):
|
|
21
|
+
attempt = 0
|
|
22
|
+
while attempt < times:
|
|
23
|
+
try:
|
|
24
|
+
return func(*args, **kwargs)
|
|
25
|
+
except exceptions:
|
|
26
|
+
logg.error(
|
|
27
|
+
'Exception thrown when attempting to run %s, attempt '
|
|
28
|
+
'%d of %d' % (func, attempt, times),logger)
|
|
29
|
+
attempt += 1
|
|
30
|
+
return func(*args, **kwargs)
|
|
31
|
+
return newfn
|
|
32
|
+
return decorator
|
|
33
|
+
|
|
34
|
+
##example of how to use the retry decorator
|
|
35
|
+
# @retry(times=3, exceptions=(ValueError, TypeError))
|
|
36
|
+
# def foo1():
|
|
37
|
+
# print('Some code here ....')
|
|
38
|
+
# print('Oh no, we have exception')
|
|
39
|
+
# raise ValueError('Some error')
|
|
40
|
+
|
|
41
|
+
# foo1()
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
##file for s3 download and upload
|
|
2
|
+
|
|
3
|
+
import boto3
|
|
4
|
+
import os
|
|
5
|
+
from functools import partial
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
7
|
+
from tqdm.auto import tqdm
|
|
8
|
+
from .logging import logg
|
|
9
|
+
|
|
10
|
+
__all__ = ['download_file', 'upload_file', 'upload_dir', 'download_dir','list_objects']
|
|
11
|
+
|
|
12
|
+
def download_file(bucket_name, file_name, local_file_name,logger=None):
|
|
13
|
+
"""
|
|
14
|
+
download file from s3
|
|
15
|
+
Input:
|
|
16
|
+
bucket_name: name of the bucket
|
|
17
|
+
file_name: name of the file in s3
|
|
18
|
+
local_file_name: name of the file in local
|
|
19
|
+
Output:
|
|
20
|
+
None
|
|
21
|
+
"""
|
|
22
|
+
s3 = boto3.resource('s3')
|
|
23
|
+
try:
|
|
24
|
+
s3.Bucket(bucket_name).download_file(file_name, local_file_name)
|
|
25
|
+
except Exception as e:
|
|
26
|
+
logg.error('Error in downloading file from s3',logger)
|
|
27
|
+
logg.error(e,logger)
|
|
28
|
+
raise e
|
|
29
|
+
logg.info('Downloaded file from s3',logger)
|
|
30
|
+
|
|
31
|
+
def upload_file(bucket_name, file_name, local_file_name,logger=None):
|
|
32
|
+
"""
|
|
33
|
+
upload file to s3
|
|
34
|
+
Input:
|
|
35
|
+
bucket_name: name of the bucket
|
|
36
|
+
file_name: name of the file in s3
|
|
37
|
+
local_file_name: name of the file in local
|
|
38
|
+
Output:
|
|
39
|
+
None
|
|
40
|
+
"""
|
|
41
|
+
s3 = boto3.resource('s3')
|
|
42
|
+
try:
|
|
43
|
+
s3.Bucket(bucket_name).upload_file(local_file_name, file_name)
|
|
44
|
+
except Exception as e:
|
|
45
|
+
logg.error('Error in uploading file to s3',logger)
|
|
46
|
+
logg.error(e)
|
|
47
|
+
raise e
|
|
48
|
+
logg.info('File uploaded to s3',logger)
|
|
49
|
+
|
|
50
|
+
def upload_dir(bucket_name, dir_name, local_dir_name,logger=None):
|
|
51
|
+
"""
|
|
52
|
+
upload directory to s3
|
|
53
|
+
Input:
|
|
54
|
+
bucket_name: name of the bucket
|
|
55
|
+
dir_name: name of the directory in s3
|
|
56
|
+
local_dir_name: name of the directory in local
|
|
57
|
+
Output:
|
|
58
|
+
results (List) : list of uploaded files location. False if error in uploading individual file
|
|
59
|
+
"""
|
|
60
|
+
s3 = boto3.resource('s3')
|
|
61
|
+
|
|
62
|
+
file_list = []
|
|
63
|
+
def _get_all_files(local_dir_name):
|
|
64
|
+
return [os.path.join(dp, f) for dp, _, filenames in os.walk(local_dir_name) for f in filenames]
|
|
65
|
+
file_list = _get_all_files(local_dir_name)
|
|
66
|
+
|
|
67
|
+
def _upload_file(bucket,dir_name,file):
|
|
68
|
+
try:
|
|
69
|
+
file_name = file.split('/')[-1]
|
|
70
|
+
file_loc = os.path.join(dir_name, file_name)
|
|
71
|
+
s3.Bucket(bucket).upload_file(file, file_loc)
|
|
72
|
+
return file_loc
|
|
73
|
+
except Exception as e:
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
upload_func = partial(_upload_file, bucket=bucket_name, dir_name=dir_name)
|
|
77
|
+
|
|
78
|
+
results = []
|
|
79
|
+
with ThreadPoolExecutor(max_workers=8) as executor:
|
|
80
|
+
results = list(tqdm(executor.map(upload_func, file_list), total=len(file_list), mininterval=1))
|
|
81
|
+
|
|
82
|
+
if results.count(False) > 0:
|
|
83
|
+
logg.error('Error in uploading files : {i}'.format(i=results.count(False)),logger)
|
|
84
|
+
else:
|
|
85
|
+
logg.info('All files uploaded to s3 : {i}'.format(i=len(results)),logger)
|
|
86
|
+
return results
|
|
87
|
+
|
|
88
|
+
def download_dir(bucket_name, dir_name, local_dir_name=None,max_workers=8):
|
|
89
|
+
"""
|
|
90
|
+
download directory from s3
|
|
91
|
+
Input:
|
|
92
|
+
bucket_name: name of the bucket
|
|
93
|
+
dir_name: name of the directory in s3
|
|
94
|
+
local_dir_name: name of the directory in local
|
|
95
|
+
max_workers: number of parallel workers to use
|
|
96
|
+
Output:
|
|
97
|
+
results (List) : list of downloaded files location
|
|
98
|
+
"""
|
|
99
|
+
s3 = boto3.resource('s3')
|
|
100
|
+
bucket = s3.Bucket(bucket_name)
|
|
101
|
+
|
|
102
|
+
def _download_file(bucket, local_dir_name):
|
|
103
|
+
for obj in bucket.objects.filter(Prefix=local_dir_name):
|
|
104
|
+
target = obj.key if local_dir_name is None \
|
|
105
|
+
else os.path.join(local_dir_name, os.path.relpath(obj.key, dir_name))
|
|
106
|
+
if not os.path.exists(os.path.dirname(target)):
|
|
107
|
+
os.makedirs(os.path.dirname(target))
|
|
108
|
+
if obj.key[-1] == '/':
|
|
109
|
+
continue
|
|
110
|
+
bucket.download_file(obj.key, target)
|
|
111
|
+
return target
|
|
112
|
+
|
|
113
|
+
download_func = partial(_download_file, bucket=bucket, local_dir_name=local_dir_name)
|
|
114
|
+
results = []
|
|
115
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
116
|
+
results = list(tqdm(executor.map(download_func, [local_dir_name]), total=1, mininterval=1))
|
|
117
|
+
return results
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def list_objects(bucket_name,logger=None,**kwargs):
|
|
121
|
+
"""
|
|
122
|
+
List all the objects in the bucket
|
|
123
|
+
Args:
|
|
124
|
+
bucket_name : str
|
|
125
|
+
Name of the bucket
|
|
126
|
+
Returns:
|
|
127
|
+
List of objects in the bucket
|
|
128
|
+
"""
|
|
129
|
+
s3 = boto3.resource('s3')
|
|
130
|
+
objects = s3.list_objects_v2(Bucket=bucket_name)
|
|
131
|
+
|
|
132
|
+
if 'Contents' in objects:
|
|
133
|
+
for obj in objects['Contents']:
|
|
134
|
+
logg.info(obj['Key'],logger)
|
|
135
|
+
return objects['Contents']
|
|
136
|
+
else:
|
|
137
|
+
logg.info(f"No objects found in {bucket_name}",logger)
|
|
138
|
+
return []
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
##Functions related to terminal size
|
|
2
|
+
#Functions taken from mtbase : https://github.com/inteplus/mtbase/tree/master/mt/base
|
|
3
|
+
import shutil
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
__all__ = ['stty_imgres', 'stty_size']
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def stty_size() -> list:
|
|
10
|
+
'''Gets the terminal size.
|
|
11
|
+
Returns the Linux-compatible console's number of rows and number of characters per
|
|
12
|
+
row. If the information does not exist, returns (72, 128).'''
|
|
13
|
+
|
|
14
|
+
res = shutil.get_terminal_size(fallback=(128, 72))
|
|
15
|
+
return [res[1], res[0]]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def stty_imgres() -> list:
|
|
19
|
+
'''Gets the terminal resolution.
|
|
20
|
+
Returns the Linux-compatible console's number of letters per row and the number of
|
|
21
|
+
rows. If the information does not exist, returns (128, 72).'''
|
|
22
|
+
|
|
23
|
+
res = shutil.get_terminal_size(fallback=(128, 72))
|
|
24
|
+
return [res[0], res[1]]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
##Function to verify the image
|
|
2
|
+
import PIL.Image as Image
|
|
3
|
+
import os
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
5
|
+
from functools import partial
|
|
6
|
+
from tqdm.auto import tqdm
|
|
7
|
+
from .logging import logg
|
|
8
|
+
|
|
9
|
+
__all__ = ['verify_image']
|
|
10
|
+
|
|
11
|
+
def verify_image(image_paths: list, image_type=None, image_shape=None,logger=None,max_workers=16) -> list:
|
|
12
|
+
"""
|
|
13
|
+
Function to verify the image. Checks Path, Type and Size of the image if the later two are provided.
|
|
14
|
+
tqdm progress bar is used to show the progress and it updates every 1 second.
|
|
15
|
+
Input:
|
|
16
|
+
image_paths: list of paths to the images
|
|
17
|
+
image_type (str : optional): type of the image
|
|
18
|
+
image_shape (Tuple : optional): verify if image shape. (width, height)
|
|
19
|
+
Output:
|
|
20
|
+
results (list : bool): lists of bools indicating if each image is valid. Also returns size_mismatch if image_shape is provided and not correct.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def verify_single_image(image_path,image_type=None, image_shape=None):
|
|
25
|
+
if os.path.isfile(image_path):
|
|
26
|
+
im = Image.open(image_path)
|
|
27
|
+
if image_type and im.format != image_type.upper():
|
|
28
|
+
return 'image_type_mismatch'
|
|
29
|
+
elif not image_type and im.format not in ('JPEG', 'PNG', 'GIF', 'BMP', 'TIFF', 'JPG'):
|
|
30
|
+
return 'unknown_image_format'
|
|
31
|
+
if image_shape and im.size != image_shape: # PIL uses (width, height)
|
|
32
|
+
return 'image_shape_mismatch'
|
|
33
|
+
im.close()
|
|
34
|
+
return True
|
|
35
|
+
else:
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
verify_func = partial(verify_single_image, image_type=image_type, image_shape=image_shape) ### partial function fixes the given arguments
|
|
39
|
+
|
|
40
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
41
|
+
results = list(tqdm(executor.map(verify_func, image_paths), total=len(image_paths),mininterval=1))
|
|
42
|
+
if image_shape:
|
|
43
|
+
if logger:
|
|
44
|
+
logger.info('Image shape mismatch: {}'.format(results.count('image_shape_mismatch')))
|
|
45
|
+
else:
|
|
46
|
+
print('Image shape mismatch: {}'.format(results.count('image_shape_mismatch')))
|
|
47
|
+
if image_type:
|
|
48
|
+
if logger:
|
|
49
|
+
logger.info('Image type mismatch: {}'.format(results.count('image_type_mismatch')))
|
|
50
|
+
else:
|
|
51
|
+
print('Image type mismatch: {}'.format(results.count('image_type_mismatch')))
|
|
52
|
+
|
|
53
|
+
logg.info('Image not found: {}'.format(results.count(False)),logger)
|
|
54
|
+
return results
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
setup.py
|
|
4
|
+
mb/utils/__init__.py
|
|
5
|
+
mb/utils/deprecated.py
|
|
6
|
+
mb/utils/extra.py
|
|
7
|
+
mb/utils/logging.py
|
|
8
|
+
mb/utils/path_checker.py
|
|
9
|
+
mb/utils/pip_update.py
|
|
10
|
+
mb/utils/profiler.py
|
|
11
|
+
mb/utils/retry_decorator.py
|
|
12
|
+
mb/utils/s3.py
|
|
13
|
+
mb/utils/terminal.py
|
|
14
|
+
mb/utils/verify_image.py
|
|
15
|
+
mb/utils/version.py
|
|
16
|
+
mb_utils.egg-info/PKG-INFO
|
|
17
|
+
mb_utils.egg-info/SOURCES.txt
|
|
18
|
+
mb_utils.egg-info/dependency_links.txt
|
|
19
|
+
mb_utils.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mb
|
mb_utils-2.0.3/setup.cfg
ADDED
mb_utils-2.0.3/setup.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
from setuptools import find_namespace_packages, setup,find_packages
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
VERSION_FILE = os.path.join(os.path.dirname(__file__), "VERSION.txt")
|
|
8
|
+
print(VERSION_FILE)
|
|
9
|
+
setup(
|
|
10
|
+
name="mb_utils",
|
|
11
|
+
description="Extra mb python utilities",
|
|
12
|
+
author=["Malav Bateriwala"],
|
|
13
|
+
packages=find_namespace_packages(include=["mb.*"]),
|
|
14
|
+
#packages=find_packages(),
|
|
15
|
+
scripts=[],
|
|
16
|
+
install_requires=[],
|
|
17
|
+
setup_requires=["setuptools-git-versioning<2"],
|
|
18
|
+
python_requires='>=3.8',
|
|
19
|
+
setuptools_git_versioning={
|
|
20
|
+
"enabled": True,
|
|
21
|
+
"version_file": VERSION_FILE,
|
|
22
|
+
"count_commits_from_version_file": True,
|
|
23
|
+
"template": "{tag}",
|
|
24
|
+
"dev_template": "{tag}.dev{ccount}+{branch}",
|
|
25
|
+
"dirty_template": "{tag}.post{ccount}",
|
|
26
|
+
},
|
|
27
|
+
)
|