mb_utils 2.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: mb_utils
3
+ Version: 2.0.3
4
+ Summary: Extra mb python utilities
5
+ Author: ['Malav Bateriwala']
6
+ Requires-Python: >=3.8
7
+ Dynamic: author
8
+ Dynamic: requires-python
9
+ Dynamic: summary
@@ -0,0 +1,165 @@
1
+ # MB Utils
2
+
3
+ [![Python Version](https://img.shields.io/badge/python-3.8+-blue)](https://www.python.org/)
4
+ [![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
5
+ [![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/bigmb/mb_utils/graphs/commit-activity)
6
+ [![Downloads](https://static.pepy.tech/badge/mb_utils)](https://pepy.tech/project/mb_utils)
7
+
8
+ A collection of utility functions and tools to simplify common Python development tasks. Part of the `mb` namespace — install as `mb_utils`, import as `mb.utils`.
9
+
10
+ ## Features
11
+
12
+ - **Logging**: Easy-to-use logging with colored console output, rotating file handlers, and a safe `LoggerWrapper` (`logg`) that skips logging when no logger is provided
13
+ - **File Operations**: Concurrent path checking and validation
14
+ - **Decorators**: Deprecation warnings and retry logic
15
+ - **Image Verification**: Validate image files (path, type, shape) with multithreading
16
+ - **S3 Integration**: Simplified AWS S3 file and directory operations
17
+ - **Profiling**: Function profiling with SnakeViz, line-by-line profiling
18
+ - **Utilities**: Timer decorator, batch creation
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install mb_utils
24
+ # or
25
+ uv pip install mb_utils
26
+ ```
27
+
28
+ This installs under the `mb` namespace. Import everything via `mb.utils.*`.
29
+
30
+ ## Usage
31
+
32
+ ### Logging
33
+
34
+ ```python
35
+ from mb.utils.logging import make_logger, logg
36
+
37
+ # Create a logger with colored console + rotating file output
38
+ logger = make_logger('myapp')
39
+ logger.info("Direct logger usage")
40
+
41
+ # Safe logging wrapper — no need for `if logger:` checks
42
+ logg.info("This message logs", logger) # logs normally
43
+ logg.info("This is silenced", None) # does nothing
44
+
45
+ # Set a default logger so you don't have to pass it every time
46
+ logg.set_default(logger)
47
+ logg.info("Uses default logger") # logs via default
48
+ logg.warning("Also works")
49
+ ```
50
+
51
+ ### Path Checking
52
+
53
+ ```python
54
+ from mb.utils.path_checker import check_path
55
+
56
+ # Check a list of paths concurrently (returns list of bools)
57
+ results = check_path(['/path/to/file1', '/path/to/file2'], max_threads=16)
58
+ ```
59
+
60
+ ### Retry Decorator
61
+
62
+ ```python
63
+ from mb.utils.retry_decorator import retry
64
+
65
+ @retry(times=3, exceptions=(ValueError, TypeError))
66
+ def might_fail():
67
+ pass
68
+ ```
69
+
70
+ ### Deprecation Decorator
71
+
72
+ ```python
73
+ from mb.utils.deprecated import deprecated_func
74
+
75
+ @deprecated_func(deprecated_version="1.0", suggested_func="new_func", removed_version="3.0")
76
+ def old_function():
77
+ pass
78
+ ```
79
+
80
+ ### S3 Operations
81
+
82
+ ```python
83
+ from mb.utils.s3 import upload_file, download_file, upload_dir, download_dir, list_objects
84
+
85
+ # Upload / download a single file
86
+ upload_file('bucket-name', 'remote_key.txt', 'local_file.txt')
87
+ download_file('bucket-name', 'remote_key.txt', 'local_file.txt')
88
+
89
+ # Upload / download entire directories
90
+ upload_dir('bucket-name', 's3/prefix', '/local/dir')
91
+ download_dir('bucket-name', 's3/prefix', '/local/dir')
92
+
93
+ # List objects
94
+ list_objects('bucket-name')
95
+ ```
96
+
97
+ ### Timer & Batch Utilities
98
+
99
+ ```python
100
+ from mb.utils.extra import timer, batch_generator, batch_create
101
+
102
+ @timer
103
+ def slow_function():
104
+ pass
105
+
106
+ # Generator-based batching
107
+ for batch in batch_generator(range(100), batch_size=10):
108
+ process(batch)
109
+
110
+ # List-based batching
111
+ batches = batch_create(my_list, n=10)
112
+ ```
113
+
114
+ ### Image Verification
115
+
116
+ ```python
117
+ from mb.utils.verify_image import verify_image
118
+
119
+ results = verify_image(
120
+ image_paths=['/path/img1.jpg', '/path/img2.png'],
121
+ image_type='JPEG', # optional: check format
122
+ image_shape=(1920, 1080), # optional: check dimensions (width, height)
123
+ max_workers=16
124
+ )
125
+ # Returns list: True, False, 'image_type_mismatch', 'image_shape_mismatch', 'unknown_image_format'
126
+ ```
127
+
128
+ ### Profiling
129
+
130
+ ```python
131
+ from mb.utils.profiler import run_with_snakeviz, line_profile
132
+
133
+ # Profile and visualize with SnakeViz
134
+ @run_with_snakeviz
135
+ def process_data(data):
136
+ pass
137
+
138
+ # Save profile without opening SnakeViz
139
+ run_with_snakeviz(my_func, arg1, arg2, save_only=True, file_path="output.prof")
140
+
141
+ # Line-by-line profiling
142
+ @line_profile
143
+ def process_item(item):
144
+ result = item * 2
145
+ return result
146
+ ```
147
+
148
+ ## Available Modules
149
+
150
+ | Module | Description | Import Path |
151
+ |--------|-------------|-------------|
152
+ | logging | Logger with colored output, file rotation, safe wrapper | `from mb.utils.logging import make_logger, logg` |
153
+ | path_checker | Concurrent path validation | `from mb.utils.path_checker import check_path` |
154
+ | deprecated | Function deprecation decorator | `from mb.utils.deprecated import deprecated_func` |
155
+ | verify_image | Image verification (path, type, shape) | `from mb.utils.verify_image import verify_image` |
156
+ | retry_decorator | Retry mechanism for functions | `from mb.utils.retry_decorator import retry` |
157
+ | s3 | AWS S3 upload/download/list operations | `from mb.utils.s3 import *` |
158
+ | extra | Timer decorator, batch utilities | `from mb.utils.extra import *` |
159
+ | profiler | SnakeViz and line profiling | `from mb.utils.profiler import *` |
160
+ | terminal | Terminal size utilities | `from mb.utils.terminal import stty_size` |
161
+ | version | Package version info | `from mb.utils.version import version` |
162
+
163
+ ## Included Scripts
164
+
165
+ - `verify_images_script`: Utility script for batch image verification
File without changes
@@ -0,0 +1,85 @@
1
+ '''Utilities to deprecate a function or a module.
2
+ MTbase function.
3
+ '''
4
+
5
+
6
+ from functools import wraps
7
+ import traceback as _tb
8
+ from .logging import logg
9
+
10
+ __all__ = ['deprecated_func']
11
+
12
+ def extract_stack_compact():
13
+ '''Returns the current callstack in a compact format.'''
14
+ lines = _tb.format_list(_tb.extract_stack())
15
+ lines = "".join(lines).split('\n')
16
+ lines = [line for line in lines if 'frozen importlib' not in line]
17
+ return lines
18
+
19
+
20
+ def deprecated_func(deprecated_version, suggested_func=None, removed_version=None, docstring_prefix="",logger=None):
21
+ '''A decorator to warn the user that the function has been deprecated and will be removed in future.
22
+ Parameters
23
+ ----------
24
+ deprecated_version : str
25
+ the version since which the function has been deprecated
26
+ suggested_func : str or list of strings, optional
27
+ the function to be used in replacement of the deprecated function
28
+ removed_version : str, optional
29
+ the future version from which the function will be removed
30
+ docstring_prefix : str
31
+ prefix string to be inserted at the beginning of every new line in the docstring
32
+ '''
33
+
34
+ def deprecated_decorator(func):
35
+ @wraps(func)
36
+ def func_wrapper(*args, **kwargs):
37
+ if not deprecated_func_warned[func]:
38
+ lines = extract_stack_compact()
39
+ if len(lines) > 7:
40
+ logg.warn("IMPORT: Deprecated function '{}' invoked at:".format(func.__name__), logger)
41
+ for x in lines[-7:-5]:
42
+ logg.warn(x, logger)
43
+ logg.warn(" It has been deprecated since version {}.".format(deprecated_version), logger)
44
+ else:
45
+ logg.warn("IMPORT: Function {} has been deprecated since version {}.".format(func.__name__, deprecated_version), logger)
46
+ if removed_version:
47
+ logg.warn(" It will be removed in version {}.".format(removed_version), logger)
48
+ if suggested_func:
49
+ if isinstance(suggested_func, str):
50
+ logg.warn(" Use function '{}' instead.".format(suggested_func), logger)
51
+ else:
52
+ logg.warn(" Use a function in {} instead.".format(suggested_func), logger)
53
+ deprecated_func_warned[func] = True
54
+ return func(*args, **kwargs)
55
+
56
+ deprecated_func_warned[func] = False # register the function
57
+
58
+ the_doc = func_wrapper.__doc__
59
+
60
+ msg = "{}.. deprecated:: {}\n".format(docstring_prefix, deprecated_version)
61
+ if not the_doc or len(the_doc) == 0:
62
+ the_doc = msg
63
+ else:
64
+ if the_doc[-1] != '\n':
65
+ the_doc += '\n'
66
+ the_doc += '\n'+msg
67
+
68
+ if removed_version:
69
+ the_doc += "{} It will be removed in version {}.\n".format(docstring_prefix, removed_version)
70
+
71
+ if suggested_func:
72
+ if isinstance(suggested_func, str):
73
+ msg = ':func:`{}`'.format(suggested_func)
74
+ else:
75
+ msg = ' or '.join([':func:`{}`'.format(x) for x in suggested_func])
76
+ the_doc += "{} Use {} instead.\n".format(docstring_prefix, msg)
77
+
78
+ func_wrapper.__doc__ = the_doc
79
+ return func_wrapper
80
+
81
+ return deprecated_decorator
82
+
83
+
84
+ # map: deprecated function -> warned
85
+ deprecated_func_warned = {}
@@ -0,0 +1,46 @@
1
+ ##Extra functions - batch creation, timer wrapper, etc.
2
+ import time
3
+ from .logging import logg
4
+
5
+ __all__ = ['timer', 'batch_generator', 'batch_create']
6
+
7
+ def timer(func,logger=None):
8
+ """
9
+ Decorator to time a function
10
+ Input:
11
+ func: function to be timed
12
+ """
13
+ def wrapper(*args,**kwargs):
14
+ before = time.time()
15
+ a = func(*args,**kwargs)
16
+ logg.info('function time : {} seconds'.format(time.time() - before), logger)
17
+ return a
18
+ return wrapper
19
+
20
+ def batch_generator(iterable, batch_size):
21
+ """
22
+ Generator to create batches of a given size from an iterable
23
+ Input:
24
+ iterable: iterable to be batched
25
+ batch_size: size of the batches
26
+ Output:
27
+ batch: batch of the given size
28
+ """
29
+ l = len(iterable)
30
+ for ndx in range(0, l, batch_size):
31
+ yield iterable[ndx:min(ndx + batch_size, l)]
32
+
33
+ def batch_create(l, n,logger=None):
34
+ """
35
+ Create batches in a list of a size from a given list
36
+ Input:
37
+ l: list to be batched
38
+ n: size of the batches
39
+ Output:
40
+ batch(list): batch of the given size
41
+ """
42
+ batch_create_list=[]
43
+ for i in range(0, len(l), n):
44
+ batch_create_list.append(l[i:i+n])
45
+ logg.info("batches created : {}".format(len(batch_create_list)), logger)
46
+ return batch_create_list
@@ -0,0 +1,108 @@
1
+ from logging import *
2
+ import os
3
+ import logging.handlers
4
+ from colorama import Fore, Style
5
+ from colorama import init as _colorama_init
6
+ from .terminal import stty_size
7
+ _colorama_init()
8
+
9
+ __all__ = ['make_logger','logger', 'logg']
10
+
11
+
12
+ class LoggerWrapper:
13
+ """
14
+ A wrapper that allows calling log methods without checking if logger is None.
15
+
16
+ Usage:
17
+ from mb_utils.logging import logg
18
+
19
+ # With explicit logger:
20
+ logg.info('hello', logger) # If logger is None, does nothing. If logger exists, logs.
21
+
22
+ # With default logger (set via set_default):
23
+ logg.set_default(make_logger('myapp'))
24
+ logg.info('hello') # Uses default logger
25
+
26
+ # Explicit None overrides default (does nothing):
27
+ logg.info('hello', None)
28
+ """
29
+
30
+ def __init__(self):
31
+ self._default_logger = None
32
+
33
+ def set_default(self, logger):
34
+ """Set a default logger to use when none is provided."""
35
+ self._default_logger = logger
36
+
37
+ def _log(self, level, msg, logger=..., *args, **kwargs):
38
+ if logger is ...:
39
+ logger = self._default_logger
40
+ if logger is not None:
41
+ getattr(logger, level)(msg, *args, **kwargs)
42
+
43
+ def debug(self, msg, logger=..., *args, **kwargs):
44
+ self._log('debug', msg, logger, *args, **kwargs)
45
+
46
+ def info(self, msg, logger=..., *args, **kwargs):
47
+ self._log('info', msg, logger, *args, **kwargs)
48
+
49
+ def warning(self, msg, logger=..., *args, **kwargs):
50
+ self._log('warning', msg, logger, *args, **kwargs)
51
+
52
+ def error(self, msg, logger=..., *args, **kwargs):
53
+ self._log('error', msg, logger, *args, **kwargs)
54
+
55
+ def critical(self, msg, logger=..., *args, **kwargs):
56
+ self._log('critical', msg, logger, *args, **kwargs)
57
+
58
+ def exception(self, msg, logger=..., *args, **kwargs):
59
+ self._log('exception', msg, logger, *args, **kwargs)
60
+
61
+
62
+ logg = LoggerWrapper()
63
+
64
+ def make_logger(name):
65
+ """
66
+ logger package for user
67
+ Input:
68
+ name: name of the logger
69
+ Output:
70
+ logger object
71
+ """
72
+ logger = getLogger(name)
73
+ if logger is None:
74
+ logger.addHandler(NullHandler())
75
+ logger.setLevel(1) #getting all logs
76
+ #basicConfig(filename='logger.log',filemode='w',level=INFO)
77
+
78
+ # determine some max string lengths
79
+ column_length = stty_size()[1]-13
80
+ log_lvl_length = min(max(int(column_length*0.03), 1), 8)
81
+ s1 = '{}.{}s '.format(log_lvl_length, log_lvl_length)
82
+ column_length -= log_lvl_length
83
+ s5 = '-{}.{}s'.format(column_length, column_length)
84
+
85
+ os.mkdir('logs') if not os.path.exists('logs') else None
86
+ should_roll_over = os.path.isfile('logs/logger.log')
87
+ file_handler = logging.handlers.RotatingFileHandler('logs/logger.log',mode='w' ,maxBytes=1000000, backupCount=3)
88
+ if should_roll_over: # log already exists, roll over!
89
+ file_handler.doRollover()
90
+ file_handler.setLevel(DEBUG)
91
+ file_formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
92
+ file_handler.setFormatter(file_formatter)
93
+
94
+ std_handler = StreamHandler()
95
+ std_handler.setLevel(DEBUG)
96
+ fmt_str = Fore.CYAN+'%(asctime)s '+Fore.LIGHTGREEN_EX+'%(levelname)'+s1+\
97
+ Fore.LIGHTWHITE_EX+'%(message)'+s5+Fore.RESET
98
+ formatter = Formatter(fmt_str)
99
+ formatter.default_time_format = "%a %H:%M:%S"
100
+ std_handler.setFormatter(formatter)
101
+
102
+ logger.addHandler(file_handler)
103
+ logger.addHandler(std_handler)
104
+
105
+ return logger
106
+
107
+
108
+ logger = make_logger('mb_utils')
@@ -0,0 +1,35 @@
1
+ ##Function to check paths
2
+
3
+ import os
4
+ import pandas as pd
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from tqdm.auto import tqdm
7
+ from .logging import logg
8
+
9
+ __all__ = ['check_path']
10
+
11
+ def check_path(path,logger=None,path_column=None,max_threads=16) -> bool:
12
+ """
13
+ Function to check the path.
14
+ Tqdm progress bar is used to show the progress and it updates every 1 second.
15
+ Input:
16
+ path (list): path to be checked (list or pandas.DataFrame).
17
+ path_column: column name if path is pandas.DataFrame (str, optional)
18
+ logger: logger object (optional)
19
+ max_threads: maximum number of threads to use (int, optional, default=16)
20
+ Output:
21
+ results: list of bools indicating if each path exists.
22
+ """
23
+ if type(path) != list:
24
+ if type(path) != pd.DataFrame or type(path) != pd.core.series.Series:
25
+ raise TypeError("Input should be a list or pandas.DataFrame or pandas.Series")
26
+ path = path[path_column].tolist()
27
+
28
+ def check_single_path(p):
29
+ return os.path.exists(p)
30
+
31
+ with ThreadPoolExecutor(max_workers=max_threads) as executor:
32
+ results = list(tqdm(executor.map(check_single_path, path), total=len(path), mininterval=1))
33
+
34
+ logg.info('Path not found: {}'.format(results.count(False)), logger)
35
+ return results
@@ -0,0 +1,19 @@
1
+ import subprocess
2
+ from typing import Optional
3
+ from .logging import logg
4
+
5
+ __all__ = ['update_package']
6
+
7
+ def update_package(package_name, logger: Optional[str]=None) -> None:
8
+ # Check if package is up-to-date
9
+ result = subprocess.run(['pip', 'check', package_name], capture_output=True, text=True)
10
+ if 'up-to-date' in result.stdout:
11
+ logg.info(f"{package_name} is already up-to-date.",logger)
12
+ return
13
+
14
+ # Install latest version of package
15
+ result = subprocess.run(['pip', 'install', '--upgrade', package_name], capture_output=True, text=True)
16
+ if result.returncode == 0:
17
+ logg.info(f"{package_name} has been updated.",logger)
18
+ else:
19
+ logg.info(f"Error updating {package_name}: {result.stderr}")
@@ -0,0 +1,63 @@
1
+ import cProfile
2
+ import subprocess
3
+ import os
4
+ import functools
5
+ import sys
6
+ from .logging import logg
7
+
8
+ __all__ = ['run_with_snakeviz','line_profile']
9
+
10
+ def run_with_snakeviz(func, *args, save_only=False,file_path=None,logger=None, **kwargs):
11
+ """
12
+ Profiles a function, saves to 'profiler.prof' in the current directory,
13
+ and optionally opens SnakeViz.
14
+
15
+ Args:
16
+ func: The function to profile.
17
+ *args, **kwargs: Arguments to pass to the function.
18
+ save_only (bool): If True, only saves the file and does not launch SnakeViz.
19
+ file_path (str): Path to save the profile file. If None, saves to 'profiler.prof' in the current directory.
20
+
21
+ Examples:
22
+ @run_with_snakeviz
23
+ def my_function():
24
+ pass
25
+ """
26
+ if file_path is None:
27
+ file_path = os.path.join(os.getcwd(), "profiler.prof")
28
+
29
+ profiler = cProfile.Profile()
30
+ profiler.enable()
31
+ try:
32
+ result = func(*args, **kwargs)
33
+ finally:
34
+ profiler.disable()
35
+ profiler.dump_stats(file_path)
36
+ logg.info(f"[Profiler] Saved to {file_path}", logger)
37
+
38
+ if not save_only:
39
+ logg.info("[Profiler] Launching SnakeViz", logger)
40
+ subprocess.run(["snakeviz", file_path])
41
+
42
+ return result
43
+
44
+ def line_profile(func, logger=None):
45
+ """
46
+ A decorator that profiles the function line-by-line using line_profiler.
47
+ Compatible with IPython/Jupyter.
48
+
49
+ Examples:
50
+ @line_profile
51
+ def my_function():
52
+ pass
53
+ """
54
+ from line_profiler import LineProfiler
55
+ @functools.wraps(func)
56
+ def wrapper(*args, **kwargs):
57
+ profiler = LineProfiler()
58
+ profiler.add_function(func)
59
+ result = profiler(func)(*args, **kwargs)
60
+ profiler.print_stats(stream=sys.stdout)
61
+ logg.info("[Profiler] Line profile complete", logger)
62
+ return result
63
+ return wrapper
@@ -0,0 +1,41 @@
1
+ ##function to retry a function if it fails
2
+ # Path: mb_utils/src/retry_decorator.py
3
+
4
+ from .logging import logg
5
+
6
+ __all__ = ['retry']
7
+
8
+ def retry(times, exceptions,logger=None):
9
+ """
10
+ Retry Decorator
11
+ Retries the wrapped function/method `times` times if the exceptions listed
12
+ in ``exceptions`` are thrown
13
+ Input:
14
+ times (int): The number of times to repeat the wrapped function/method
15
+ exceptions (tuple of exceptiosn): The exceptions to catch
16
+ Output:
17
+ The wrapped function/method
18
+ """
19
+ def decorator(func):
20
+ def newfn(*args, **kwargs):
21
+ attempt = 0
22
+ while attempt < times:
23
+ try:
24
+ return func(*args, **kwargs)
25
+ except exceptions:
26
+ logg.error(
27
+ 'Exception thrown when attempting to run %s, attempt '
28
+ '%d of %d' % (func, attempt, times),logger)
29
+ attempt += 1
30
+ return func(*args, **kwargs)
31
+ return newfn
32
+ return decorator
33
+
34
+ ##example of how to use the retry decorator
35
+ # @retry(times=3, exceptions=(ValueError, TypeError))
36
+ # def foo1():
37
+ # print('Some code here ....')
38
+ # print('Oh no, we have exception')
39
+ # raise ValueError('Some error')
40
+
41
+ # foo1()
@@ -0,0 +1,138 @@
1
+ ##file for s3 download and upload
2
+
3
+ import boto3
4
+ import os
5
+ from functools import partial
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ from tqdm.auto import tqdm
8
+ from .logging import logg
9
+
10
+ __all__ = ['download_file', 'upload_file', 'upload_dir', 'download_dir','list_objects']
11
+
12
+ def download_file(bucket_name, file_name, local_file_name,logger=None):
13
+ """
14
+ download file from s3
15
+ Input:
16
+ bucket_name: name of the bucket
17
+ file_name: name of the file in s3
18
+ local_file_name: name of the file in local
19
+ Output:
20
+ None
21
+ """
22
+ s3 = boto3.resource('s3')
23
+ try:
24
+ s3.Bucket(bucket_name).download_file(file_name, local_file_name)
25
+ except Exception as e:
26
+ logg.error('Error in downloading file from s3',logger)
27
+ logg.error(e,logger)
28
+ raise e
29
+ logg.info('Downloaded file from s3',logger)
30
+
31
+ def upload_file(bucket_name, file_name, local_file_name,logger=None):
32
+ """
33
+ upload file to s3
34
+ Input:
35
+ bucket_name: name of the bucket
36
+ file_name: name of the file in s3
37
+ local_file_name: name of the file in local
38
+ Output:
39
+ None
40
+ """
41
+ s3 = boto3.resource('s3')
42
+ try:
43
+ s3.Bucket(bucket_name).upload_file(local_file_name, file_name)
44
+ except Exception as e:
45
+ logg.error('Error in uploading file to s3',logger)
46
+ logg.error(e)
47
+ raise e
48
+ logg.info('File uploaded to s3',logger)
49
+
50
+ def upload_dir(bucket_name, dir_name, local_dir_name,logger=None):
51
+ """
52
+ upload directory to s3
53
+ Input:
54
+ bucket_name: name of the bucket
55
+ dir_name: name of the directory in s3
56
+ local_dir_name: name of the directory in local
57
+ Output:
58
+ results (List) : list of uploaded files location. False if error in uploading individual file
59
+ """
60
+ s3 = boto3.resource('s3')
61
+
62
+ file_list = []
63
+ def _get_all_files(local_dir_name):
64
+ return [os.path.join(dp, f) for dp, _, filenames in os.walk(local_dir_name) for f in filenames]
65
+ file_list = _get_all_files(local_dir_name)
66
+
67
+ def _upload_file(bucket,dir_name,file):
68
+ try:
69
+ file_name = file.split('/')[-1]
70
+ file_loc = os.path.join(dir_name, file_name)
71
+ s3.Bucket(bucket).upload_file(file, file_loc)
72
+ return file_loc
73
+ except Exception as e:
74
+ return False
75
+
76
+ upload_func = partial(_upload_file, bucket=bucket_name, dir_name=dir_name)
77
+
78
+ results = []
79
+ with ThreadPoolExecutor(max_workers=8) as executor:
80
+ results = list(tqdm(executor.map(upload_func, file_list), total=len(file_list), mininterval=1))
81
+
82
+ if results.count(False) > 0:
83
+ logg.error('Error in uploading files : {i}'.format(i=results.count(False)),logger)
84
+ else:
85
+ logg.info('All files uploaded to s3 : {i}'.format(i=len(results)),logger)
86
+ return results
87
+
88
+ def download_dir(bucket_name, dir_name, local_dir_name=None,max_workers=8):
89
+ """
90
+ download directory from s3
91
+ Input:
92
+ bucket_name: name of the bucket
93
+ dir_name: name of the directory in s3
94
+ local_dir_name: name of the directory in local
95
+ max_workers: number of parallel workers to use
96
+ Output:
97
+ results (List) : list of downloaded files location
98
+ """
99
+ s3 = boto3.resource('s3')
100
+ bucket = s3.Bucket(bucket_name)
101
+
102
+ def _download_file(bucket, local_dir_name):
103
+ for obj in bucket.objects.filter(Prefix=local_dir_name):
104
+ target = obj.key if local_dir_name is None \
105
+ else os.path.join(local_dir_name, os.path.relpath(obj.key, dir_name))
106
+ if not os.path.exists(os.path.dirname(target)):
107
+ os.makedirs(os.path.dirname(target))
108
+ if obj.key[-1] == '/':
109
+ continue
110
+ bucket.download_file(obj.key, target)
111
+ return target
112
+
113
+ download_func = partial(_download_file, bucket=bucket, local_dir_name=local_dir_name)
114
+ results = []
115
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
116
+ results = list(tqdm(executor.map(download_func, [local_dir_name]), total=1, mininterval=1))
117
+ return results
118
+
119
+
120
+ def list_objects(bucket_name,logger=None,**kwargs):
121
+ """
122
+ List all the objects in the bucket
123
+ Args:
124
+ bucket_name : str
125
+ Name of the bucket
126
+ Returns:
127
+ List of objects in the bucket
128
+ """
129
+ s3 = boto3.resource('s3')
130
+ objects = s3.list_objects_v2(Bucket=bucket_name)
131
+
132
+ if 'Contents' in objects:
133
+ for obj in objects['Contents']:
134
+ logg.info(obj['Key'],logger)
135
+ return objects['Contents']
136
+ else:
137
+ logg.info(f"No objects found in {bucket_name}",logger)
138
+ return []
@@ -0,0 +1,24 @@
1
+ ##Functions related to terminal size
2
+ #Functions taken from mtbase : https://github.com/inteplus/mtbase/tree/master/mt/base
3
+ import shutil
4
+
5
+
6
+ __all__ = ['stty_imgres', 'stty_size']
7
+
8
+
9
+ def stty_size() -> list:
10
+ '''Gets the terminal size.
11
+ Returns the Linux-compatible console's number of rows and number of characters per
12
+ row. If the information does not exist, returns (72, 128).'''
13
+
14
+ res = shutil.get_terminal_size(fallback=(128, 72))
15
+ return [res[1], res[0]]
16
+
17
+
18
+ def stty_imgres() -> list:
19
+ '''Gets the terminal resolution.
20
+ Returns the Linux-compatible console's number of letters per row and the number of
21
+ rows. If the information does not exist, returns (128, 72).'''
22
+
23
+ res = shutil.get_terminal_size(fallback=(128, 72))
24
+ return [res[0], res[1]]
@@ -0,0 +1,54 @@
1
+ ##Function to verify the image
2
+ import PIL.Image as Image
3
+ import os
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ from functools import partial
6
+ from tqdm.auto import tqdm
7
+ from .logging import logg
8
+
9
+ __all__ = ['verify_image']
10
+
11
+ def verify_image(image_paths: list, image_type=None, image_shape=None,logger=None,max_workers=16) -> list:
12
+ """
13
+ Function to verify the image. Checks Path, Type and Size of the image if the later two are provided.
14
+ tqdm progress bar is used to show the progress and it updates every 1 second.
15
+ Input:
16
+ image_paths: list of paths to the images
17
+ image_type (str : optional): type of the image
18
+ image_shape (Tuple : optional): verify if image shape. (width, height)
19
+ Output:
20
+ results (list : bool): lists of bools indicating if each image is valid. Also returns size_mismatch if image_shape is provided and not correct.
21
+ """
22
+
23
+
24
+ def verify_single_image(image_path,image_type=None, image_shape=None):
25
+ if os.path.isfile(image_path):
26
+ im = Image.open(image_path)
27
+ if image_type and im.format != image_type.upper():
28
+ return 'image_type_mismatch'
29
+ elif not image_type and im.format not in ('JPEG', 'PNG', 'GIF', 'BMP', 'TIFF', 'JPG'):
30
+ return 'unknown_image_format'
31
+ if image_shape and im.size != image_shape: # PIL uses (width, height)
32
+ return 'image_shape_mismatch'
33
+ im.close()
34
+ return True
35
+ else:
36
+ return False
37
+
38
+ verify_func = partial(verify_single_image, image_type=image_type, image_shape=image_shape) ### partial function fixes the given arguments
39
+
40
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
41
+ results = list(tqdm(executor.map(verify_func, image_paths), total=len(image_paths),mininterval=1))
42
+ if image_shape:
43
+ if logger:
44
+ logger.info('Image shape mismatch: {}'.format(results.count('image_shape_mismatch')))
45
+ else:
46
+ print('Image shape mismatch: {}'.format(results.count('image_shape_mismatch')))
47
+ if image_type:
48
+ if logger:
49
+ logger.info('Image type mismatch: {}'.format(results.count('image_type_mismatch')))
50
+ else:
51
+ print('Image type mismatch: {}'.format(results.count('image_type_mismatch')))
52
+
53
+ logg.info('Image not found: {}'.format(results.count(False)),logger)
54
+ return results
@@ -0,0 +1,5 @@
1
+ MAJOR_VERSION = 2
2
+ MINOR_VERSION = 0
3
+ PATCH_VERSION = 0
4
+ version = '{}.{}.{}'.format(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
5
+ __all__ = ['MAJOR_VERSION', 'MINOR_VERSION', 'PATCH_VERSION', 'version']
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: mb_utils
3
+ Version: 2.0.3
4
+ Summary: Extra mb python utilities
5
+ Author: ['Malav Bateriwala']
6
+ Requires-Python: >=3.8
7
+ Dynamic: author
8
+ Dynamic: requires-python
9
+ Dynamic: summary
@@ -0,0 +1,19 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ mb/utils/__init__.py
5
+ mb/utils/deprecated.py
6
+ mb/utils/extra.py
7
+ mb/utils/logging.py
8
+ mb/utils/path_checker.py
9
+ mb/utils/pip_update.py
10
+ mb/utils/profiler.py
11
+ mb/utils/retry_decorator.py
12
+ mb/utils/s3.py
13
+ mb/utils/terminal.py
14
+ mb/utils/verify_image.py
15
+ mb/utils/version.py
16
+ mb_utils.egg-info/PKG-INFO
17
+ mb_utils.egg-info/SOURCES.txt
18
+ mb_utils.egg-info/dependency_links.txt
19
+ mb_utils.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ mb
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ from setuptools import find_namespace_packages, setup,find_packages
5
+ import os
6
+
7
+ VERSION_FILE = os.path.join(os.path.dirname(__file__), "VERSION.txt")
8
+ print(VERSION_FILE)
9
+ setup(
10
+ name="mb_utils",
11
+ description="Extra mb python utilities",
12
+ author=["Malav Bateriwala"],
13
+ packages=find_namespace_packages(include=["mb.*"]),
14
+ #packages=find_packages(),
15
+ scripts=[],
16
+ install_requires=[],
17
+ setup_requires=["setuptools-git-versioning<2"],
18
+ python_requires='>=3.8',
19
+ setuptools_git_versioning={
20
+ "enabled": True,
21
+ "version_file": VERSION_FILE,
22
+ "count_commits_from_version_file": True,
23
+ "template": "{tag}",
24
+ "dev_template": "{tag}.dev{ccount}+{branch}",
25
+ "dirty_template": "{tag}.post{ccount}",
26
+ },
27
+ )