fkat 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fkat/__init__.py +147 -0
- fkat/data/__init__.py +15 -0
- fkat/data/data_module.py +198 -0
- fkat/data/datasets/__init__.py +19 -0
- fkat/data/datasets/dict.py +78 -0
- fkat/data/datasets/json.py +176 -0
- fkat/data/datasets/map.py +90 -0
- fkat/data/datasets/parquet.py +242 -0
- fkat/data/datasets/sized.py +31 -0
- fkat/data/dict.py +42 -0
- fkat/data/samplers/__init__.py +9 -0
- fkat/data/samplers/dict.py +38 -0
- fkat/data/samplers/sized.py +16 -0
- fkat/data/samplers/strategies.py +68 -0
- fkat/data/sharded.py +718 -0
- fkat/data/shm.py +364 -0
- fkat/predict.py +32 -0
- fkat/py.typed +0 -0
- fkat/pytorch/__init__.py +3 -0
- fkat/pytorch/actions/__init__.py +11 -0
- fkat/pytorch/actions/aws/__init__.py +3 -0
- fkat/pytorch/actions/aws/batch.py +29 -0
- fkat/pytorch/actions/aws/ec2.py +61 -0
- fkat/pytorch/callbacks/__init__.py +2 -0
- fkat/pytorch/callbacks/cuda/__init__.py +16 -0
- fkat/pytorch/callbacks/cuda/cache.py +115 -0
- fkat/pytorch/callbacks/cuda/memory.py +200 -0
- fkat/pytorch/callbacks/cuda/nsys.py +199 -0
- fkat/pytorch/callbacks/cuda/nvtx.py +288 -0
- fkat/pytorch/callbacks/cuda/xid.py +173 -0
- fkat/pytorch/callbacks/debugging/__init__.py +9 -0
- fkat/pytorch/callbacks/debugging/introspection.py +569 -0
- fkat/pytorch/callbacks/debugging/optimizer.py +45 -0
- fkat/pytorch/callbacks/gc.py +146 -0
- fkat/pytorch/callbacks/loggers.py +211 -0
- fkat/pytorch/callbacks/logging/__init__.py +12 -0
- fkat/pytorch/callbacks/logging/heartbeat.py +76 -0
- fkat/pytorch/callbacks/logging/throughput.py +253 -0
- fkat/pytorch/callbacks/logging/validation_metrics.py +94 -0
- fkat/pytorch/callbacks/monitoring/__init__.py +14 -0
- fkat/pytorch/callbacks/monitoring/crash.py +162 -0
- fkat/pytorch/callbacks/monitoring/dp.py +130 -0
- fkat/pytorch/callbacks/monitoring/hardware_stats.py +135 -0
- fkat/pytorch/callbacks/monitoring/shutdown.py +170 -0
- fkat/pytorch/callbacks/profiling/__init__.py +13 -0
- fkat/pytorch/callbacks/profiling/flops.py +574 -0
- fkat/pytorch/callbacks/profiling/memray.py +212 -0
- fkat/pytorch/callbacks/profiling/torch.py +197 -0
- fkat/pytorch/callbacks/profiling/viztracer.py +197 -0
- fkat/pytorch/loggers.py +284 -0
- fkat/pytorch/schedule/__init__.py +27 -0
- fkat/pytorch/schedule/base.py +308 -0
- fkat/pytorch/schedule/mlflow.py +143 -0
- fkat/pytorch/utilities.py +49 -0
- fkat/test.py +31 -0
- fkat/train.py +32 -0
- fkat/utils/__init__.py +28 -0
- fkat/utils/aws/__init__.py +3 -0
- fkat/utils/aws/imds.py +137 -0
- fkat/utils/boto3.py +24 -0
- fkat/utils/config.py +194 -0
- fkat/utils/cuda/__init__.py +3 -0
- fkat/utils/cuda/preflight/__init__.py +3 -0
- fkat/utils/cuda/preflight/health_check/aws_instance_config.py +82 -0
- fkat/utils/cuda/preflight/health_check/constants.py +23 -0
- fkat/utils/cuda/preflight/health_check/ddb_client.py +82 -0
- fkat/utils/cuda/preflight/health_check/gpu_connection_test.py +104 -0
- fkat/utils/cuda/preflight/health_check/gpu_stress_test.py +122 -0
- fkat/utils/cuda/preflight/health_check/helpers.py +297 -0
- fkat/utils/cuda/preflight/health_check/logger.py +205 -0
- fkat/utils/cuda/preflight/health_check/timer.py +31 -0
- fkat/utils/cuda/preflight/run.py +560 -0
- fkat/utils/cuda/xid.py +48 -0
- fkat/utils/logging.py +28 -0
- fkat/utils/mlflow.py +33 -0
- fkat/utils/pandas.py +25 -0
- fkat/utils/pdb.py +84 -0
- fkat/utils/pool.py +81 -0
- fkat/utils/profiler.py +18 -0
- fkat/utils/pyarrow.py +21 -0
- fkat/utils/rng.py +27 -0
- fkat/utils/shm.py +184 -0
- fkat/validate.py +31 -0
- fkat-0.1.2.dist-info/METADATA +134 -0
- fkat-0.1.2.dist-info/RECORD +88 -0
- fkat-0.1.2.dist-info/WHEEL +4 -0
- fkat-0.1.2.dist-info/licenses/LICENSE +175 -0
- fkat-0.1.2.dist-info/licenses/NOTICE +1 -0
fkat/utils/pdb.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
import os
|
|
4
|
+
import pdb
|
|
5
|
+
import sys
|
|
6
|
+
from typing import Any
|
|
7
|
+
from types import FrameType, TracebackType
|
|
8
|
+
from typing_extensions import override
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ForkedPdb(pdb.Pdb):
|
|
12
|
+
def __init__(self) -> None:
|
|
13
|
+
super().__init__()
|
|
14
|
+
self.rank = os.environ.get(
|
|
15
|
+
"RANK", # PyTorch DDP
|
|
16
|
+
os.environ.get(
|
|
17
|
+
"PMI_RANK", # MPI
|
|
18
|
+
os.environ.get(
|
|
19
|
+
"OMPI_COMM_WORLD_RANK", # OpenMPI
|
|
20
|
+
"unknown",
|
|
21
|
+
),
|
|
22
|
+
),
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
@override
|
|
26
|
+
def interaction(self, frame: FrameType | None, traceback: TracebackType | None, *args: Any, **kwargs: Any) -> None:
|
|
27
|
+
_stdin = sys.stdin
|
|
28
|
+
try:
|
|
29
|
+
sys.stdin = open("/dev/stdin")
|
|
30
|
+
self.print_rank_info()
|
|
31
|
+
pdb.Pdb.interaction(self, frame, traceback, *args, **kwargs)
|
|
32
|
+
finally:
|
|
33
|
+
sys.stdin = _stdin
|
|
34
|
+
|
|
35
|
+
def print_rank_info(self) -> None:
|
|
36
|
+
print(f"\n[RANK={self.rank}, PID={os.getpid()}]:")
|
|
37
|
+
|
|
38
|
+
@override
|
|
39
|
+
def default(self, line: str) -> None:
|
|
40
|
+
self.print_rank_info()
|
|
41
|
+
super().default(line)
|
|
42
|
+
|
|
43
|
+
@override
|
|
44
|
+
def do_continue(self, arg: str) -> bool | None:
|
|
45
|
+
self.print_rank_info()
|
|
46
|
+
return super().do_continue(arg)
|
|
47
|
+
|
|
48
|
+
@override
|
|
49
|
+
def do_next(self, arg: str) -> bool | None:
|
|
50
|
+
self.print_rank_info()
|
|
51
|
+
return super().do_next(arg)
|
|
52
|
+
|
|
53
|
+
@override
|
|
54
|
+
def do_step(self, arg: str) -> bool | None:
|
|
55
|
+
self.print_rank_info()
|
|
56
|
+
return super().do_step(arg)
|
|
57
|
+
|
|
58
|
+
@override
|
|
59
|
+
def do_return(self, arg: str) -> bool | None:
|
|
60
|
+
self.print_rank_info()
|
|
61
|
+
return super().do_return(arg)
|
|
62
|
+
|
|
63
|
+
@override
|
|
64
|
+
def do_quit(self, arg: str) -> bool | None:
|
|
65
|
+
self.print_rank_info()
|
|
66
|
+
return super().do_quit(arg)
|
|
67
|
+
|
|
68
|
+
@override
|
|
69
|
+
def do_jump(self, arg: str) -> bool | None:
|
|
70
|
+
self.print_rank_info()
|
|
71
|
+
return super().do_jump(arg)
|
|
72
|
+
|
|
73
|
+
@override
|
|
74
|
+
def precmd(self, line: str) -> str:
|
|
75
|
+
self.print_rank_info()
|
|
76
|
+
return line
|
|
77
|
+
|
|
78
|
+
def post_mortem(self, tb: TracebackType | None) -> None:
|
|
79
|
+
self.reset()
|
|
80
|
+
self.interaction(None, tb)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def post_mortem() -> None:
|
|
84
|
+
sys.excepthook = lambda t, v, tb: ForkedPdb().post_mortem(tb)
|
fkat/utils/pool.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor, Future
|
|
4
|
+
from typing import Any, TypeVar
|
|
5
|
+
import multiprocessing as mp
|
|
6
|
+
from multiprocessing.pool import AsyncResult
|
|
7
|
+
from collections.abc import Callable, Iterable, Mapping
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T", covariant=False)
|
|
11
|
+
T_co = TypeVar("T_co", covariant=True)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FutureResult(AsyncResult[T]):
|
|
15
|
+
"""An AsyncResult implementation for concurrent.future Future object"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, fut: Future[T]) -> None:
|
|
18
|
+
self.fut = fut
|
|
19
|
+
|
|
20
|
+
def ready(self) -> bool:
|
|
21
|
+
return self.fut.done()
|
|
22
|
+
|
|
23
|
+
def get(self, timeout: float | None = None) -> T:
|
|
24
|
+
return self.fut.result(timeout)
|
|
25
|
+
|
|
26
|
+
def wait(self, timeout: float | None = None) -> None:
|
|
27
|
+
self.fut.exception(timeout)
|
|
28
|
+
|
|
29
|
+
def successful(self) -> bool:
|
|
30
|
+
return self.fut.exception() is None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ThreadPool:
|
|
34
|
+
"""A multiprocessing Pool-like implementation that uses ThreadPoolExecutor"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
37
|
+
self.pool = ThreadPoolExecutor(**kwargs)
|
|
38
|
+
|
|
39
|
+
def apply_async(
|
|
40
|
+
self,
|
|
41
|
+
func: Callable[..., T_co],
|
|
42
|
+
args: Iterable[Any] | None = None,
|
|
43
|
+
kwds: Mapping[str, Any] | None = None,
|
|
44
|
+
) -> FutureResult[T_co]:
|
|
45
|
+
fut = self.pool.submit(func, *(args or ()), **(kwds or {}))
|
|
46
|
+
return FutureResult(fut)
|
|
47
|
+
|
|
48
|
+
def close(self) -> None:
|
|
49
|
+
self.pool.shutdown()
|
|
50
|
+
|
|
51
|
+
def join(self) -> None:
|
|
52
|
+
if self.pool._shutdown:
|
|
53
|
+
self.close()
|
|
54
|
+
else:
|
|
55
|
+
self.pool.submit(lambda: None).result()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class NoDaemonProcess(mp.Process):
|
|
59
|
+
"""A Process implementation that never runs in daemon mode"""
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def daemon(self) -> bool:
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
@daemon.setter
|
|
66
|
+
def daemon(self, value: bool) -> None:
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class NoDaemonContext(type(mp.get_context())): # type: ignore[misc]
|
|
71
|
+
"""A multiprocessing Context that uses NoDaemonProcess"""
|
|
72
|
+
|
|
73
|
+
Process = NoDaemonProcess
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class NoDaemonPool(mp.pool.Pool): # type: ignore[unresolved-attribute]
|
|
77
|
+
"""A multiprocessing Pool that uses NoDaemonContext"""
|
|
78
|
+
|
|
79
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
80
|
+
kwargs["context"] = NoDaemonContext()
|
|
81
|
+
super().__init__(*args, **kwargs)
|
fkat/utils/profiler.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
import atexit
|
|
4
|
+
|
|
5
|
+
from lightning.pytorch.profilers import Profiler
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def profile_until_exit(profiler: Profiler, action: str, filename_suffix: str | None = None) -> None:
|
|
9
|
+
def stop_profiler() -> None:
|
|
10
|
+
profiler.stop(action)
|
|
11
|
+
profiler.summary()
|
|
12
|
+
profiler.describe()
|
|
13
|
+
|
|
14
|
+
atexit.register(stop_profiler)
|
|
15
|
+
|
|
16
|
+
if profiler.filename and filename_suffix:
|
|
17
|
+
profiler.filename += filename_suffix
|
|
18
|
+
profiler.start(action)
|
fkat/utils/pyarrow.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
import pyarrow as pa
|
|
4
|
+
from typing import Any
|
|
5
|
+
from collections.abc import Iterator
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def iter_rows(table: pa.Table, chunk_size: int) -> Iterator[dict[str, Any]]:
|
|
9
|
+
"""
|
|
10
|
+
Generator function to iterate over rows of a PyArrow table in chunks.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
table (pa.Table): PyArrow table.
|
|
14
|
+
chunk_size (int): The number of rows per chunk for processing.
|
|
15
|
+
Yields:
|
|
16
|
+
Dict[str, Any]: Dictionary representing each row.
|
|
17
|
+
"""
|
|
18
|
+
for chunk in table.to_batches(chunk_size):
|
|
19
|
+
columns = chunk.to_pydict()
|
|
20
|
+
for i in range(chunk.num_rows):
|
|
21
|
+
yield {col: columns[col][i] for col in columns}
|
fkat/utils/rng.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
from random import getstate as python_get_rng_state
|
|
4
|
+
from random import setstate as python_set_rng_state
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import torch
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_rng_states() -> dict[str, Any]:
|
|
12
|
+
r"""Collect the global random state of :mod:`torch`, :mod:`torch.cuda`, :mod:`numpy` and Python."""
|
|
13
|
+
states = {
|
|
14
|
+
"torch": torch.get_rng_state(),
|
|
15
|
+
"numpy": np.random.get_state(),
|
|
16
|
+
"python": python_get_rng_state(),
|
|
17
|
+
}
|
|
18
|
+
return states
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def set_rng_states(rng_state_dict: dict[str, Any]) -> None:
|
|
22
|
+
r"""Set the global random state of :mod:`torch`, :mod:`torch.cuda`, :mod:`numpy` and Python in the current
|
|
23
|
+
process."""
|
|
24
|
+
torch.set_rng_state(rng_state_dict["torch"])
|
|
25
|
+
np.random.set_state(rng_state_dict["numpy"])
|
|
26
|
+
version, state, gauss = rng_state_dict["python"]
|
|
27
|
+
python_set_rng_state((version, tuple(state), gauss))
|
fkat/utils/shm.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Distributed Shared Memory Utility for Dataloader"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import mmap
|
|
7
|
+
import os
|
|
8
|
+
import pickle
|
|
9
|
+
import shutil
|
|
10
|
+
import time
|
|
11
|
+
import uuid
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, SupportsIndex
|
|
14
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
15
|
+
|
|
16
|
+
import torch.distributed as dist
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
COMPLETE = ".complete"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
__all__ = ["save", "load"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def save(obj: Any, path: Path | None = None) -> Path:
|
|
27
|
+
"""Serialize obj with out-of-band data to path for zero-copy shared memory usage.
|
|
28
|
+
|
|
29
|
+
If the object to be serialized itself, or the objects it uses for data
|
|
30
|
+
storage (such as numpy arrays) implement the the pickle protocol version 5
|
|
31
|
+
pickle.PickleBuffer type in __reduce_ex__, then this function can store
|
|
32
|
+
these buffers out-of-band as files in `path` so that they subsequently be
|
|
33
|
+
re-used for zero-copy sharing accross processes.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
obj (object):
|
|
37
|
+
Object to serialize. For example a PyArrow Table, a Pandas Dataframe or
|
|
38
|
+
any type that relies on NumPy to store the binary data.
|
|
39
|
+
path (pathlib.Path, optional):
|
|
40
|
+
Empty folder used to save serialized data. Usually a folder in /dev/shm
|
|
41
|
+
Returns:
|
|
42
|
+
pathlib.Path where the data was serialized
|
|
43
|
+
"""
|
|
44
|
+
idx = 0
|
|
45
|
+
root: Path = path or generate_path()
|
|
46
|
+
root.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
|
|
48
|
+
def buffer_callback(buf: pickle.PickleBuffer) -> None:
|
|
49
|
+
nonlocal idx
|
|
50
|
+
with open(root / f"{idx}.bin", "wb") as f:
|
|
51
|
+
f.write(buf)
|
|
52
|
+
idx += 1
|
|
53
|
+
|
|
54
|
+
with open(root / "meta.pkl", "wb") as f:
|
|
55
|
+
pickle.dump(obj, f, protocol=5, buffer_callback=buffer_callback)
|
|
56
|
+
|
|
57
|
+
# mark as saved
|
|
58
|
+
(root / COMPLETE).touch()
|
|
59
|
+
return root
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def generate_path() -> Path:
|
|
63
|
+
global_rank = dist.get_rank() if dist.is_initialized() else 0 # type: ignore[possibly-unbound-attribute]
|
|
64
|
+
path_str = f"/dev/shm/{global_rank}-{uuid.uuid4()}"
|
|
65
|
+
path = Path(path_str)
|
|
66
|
+
return path
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def save_iter(
|
|
70
|
+
it: Iterable[Any],
|
|
71
|
+
path: Path | None = None,
|
|
72
|
+
max_items: int = 0,
|
|
73
|
+
should_stop: Callable[[], bool] = lambda: False,
|
|
74
|
+
truncation_threshold: int | None = None,
|
|
75
|
+
) -> Path:
|
|
76
|
+
logger.debug("save iter %r ... started", path)
|
|
77
|
+
path = path or generate_path()
|
|
78
|
+
next_idx = 0
|
|
79
|
+
for i, e in enumerate(it):
|
|
80
|
+
logger.debug("save iter %r ...", path)
|
|
81
|
+
if max_items > 0:
|
|
82
|
+
while (cnt := sum(x.is_dir() for x in path.iterdir()) if path.exists() else 0) >= max_items:
|
|
83
|
+
logger.debug("save iter ... %r dirs of %r stop? %r", cnt, max_items, should_stop())
|
|
84
|
+
if should_stop():
|
|
85
|
+
break
|
|
86
|
+
time.sleep(0.001) # busy wait
|
|
87
|
+
if should_stop():
|
|
88
|
+
break
|
|
89
|
+
if truncation_threshold is not None and i == truncation_threshold:
|
|
90
|
+
logger.info(f"reached {truncation_threshold=}, stop saving microbatches")
|
|
91
|
+
break
|
|
92
|
+
save(e, path / str(i))
|
|
93
|
+
next_idx = i + 1
|
|
94
|
+
save(POISON_PILL, path / str(next_idx))
|
|
95
|
+
logger.debug("save iter %r ... finished after %r microbatches", path, next_idx)
|
|
96
|
+
return path
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class Sentinel:
|
|
100
|
+
"""
|
|
101
|
+
Create a unique sentinel object that is pickled as a constant.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
def __init__(self, name: str) -> None:
|
|
105
|
+
self.name = name
|
|
106
|
+
|
|
107
|
+
def __repr__(self) -> str:
|
|
108
|
+
return self.name # pragma: no cover
|
|
109
|
+
|
|
110
|
+
def __copy__(self) -> "Sentinel":
|
|
111
|
+
return self # pragma: no cover
|
|
112
|
+
|
|
113
|
+
def __deepcopy__(self, memo: Any) -> "Sentinel":
|
|
114
|
+
return self # pragma: no cover
|
|
115
|
+
|
|
116
|
+
def __reduce__(self) -> str | tuple[Any, ...]:
|
|
117
|
+
return self.name
|
|
118
|
+
|
|
119
|
+
def __reduce_ex__(self, protocol: SupportsIndex) -> str | tuple[Any, ...]:
|
|
120
|
+
return self.name
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
POISON_PILL = Sentinel("POISON_PILL")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def load(path: Path) -> Any:
|
|
127
|
+
"""Load serialized object with out-of-band data from path based on zero-copy shared memory.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
path (pathlib.Path):
|
|
131
|
+
Folder used to save serialized data with serialize(). Usually a folder /dev/shm
|
|
132
|
+
Returns:
|
|
133
|
+
Raw deserialized data
|
|
134
|
+
"""
|
|
135
|
+
if not saved(path):
|
|
136
|
+
raise RuntimeError(f"The object at {path} is corrupted or not saved")
|
|
137
|
+
buffers: list[pickle.PickleBuffer | mmap.mmap] = []
|
|
138
|
+
num_buffers = len(list(path.iterdir())) - 2 # exclude meta.pkl and .complete
|
|
139
|
+
for idx in range(num_buffers):
|
|
140
|
+
fpath = path / f"{idx}.bin"
|
|
141
|
+
if os.stat(fpath).st_size == 0:
|
|
142
|
+
buffers.append(pickle.PickleBuffer(b""))
|
|
143
|
+
else:
|
|
144
|
+
with open(fpath, "rb") as f:
|
|
145
|
+
buffers.append(mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ))
|
|
146
|
+
with open(path / "meta.pkl", "rb") as f:
|
|
147
|
+
obj = pickle.load(f, buffers=buffers)
|
|
148
|
+
shutil.rmtree(path)
|
|
149
|
+
logger.debug("removed %r", path)
|
|
150
|
+
for b in buffers:
|
|
151
|
+
if isinstance(b, pickle.PickleBuffer):
|
|
152
|
+
b.release()
|
|
153
|
+
else:
|
|
154
|
+
b.close()
|
|
155
|
+
return obj
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def saved(path: Path) -> bool:
|
|
159
|
+
return (path / COMPLETE).exists()
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def load_iter(
|
|
163
|
+
path: Path, next_timeout: int = 10 * 60, wait_callback: Callable[[], None] = lambda: None
|
|
164
|
+
) -> Iterator[Any]:
|
|
165
|
+
idx = 0
|
|
166
|
+
while True:
|
|
167
|
+
start_time = time.time()
|
|
168
|
+
wait_time_threshold = start_time + next_timeout
|
|
169
|
+
chunk_path = path / str(idx)
|
|
170
|
+
while not saved(chunk_path):
|
|
171
|
+
wait_callback()
|
|
172
|
+
logger.debug("waiting for data in %r", chunk_path)
|
|
173
|
+
if time.time() > wait_time_threshold:
|
|
174
|
+
logger.error("timed out waiting for %r", chunk_path)
|
|
175
|
+
raise TimeoutError
|
|
176
|
+
time.sleep(0.001) # busy wait
|
|
177
|
+
chunk = load(chunk_path)
|
|
178
|
+
if chunk is POISON_PILL:
|
|
179
|
+
logger.debug("poison pill!")
|
|
180
|
+
break
|
|
181
|
+
logger.debug("fetching microbatch took %r s", time.time() - start_time)
|
|
182
|
+
yield chunk
|
|
183
|
+
idx += 1
|
|
184
|
+
return
|
fkat/validate.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#!/usr/bin/env python
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
The ``fkat.validate`` entrypoint processes the provided config,
|
|
7
|
+
instatiates the ``trainer``, ``model`` and ``data`` sections and calls ``trainer.validate()``.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import hydra
|
|
11
|
+
import lightning as L
|
|
12
|
+
from omegaconf import DictConfig
|
|
13
|
+
|
|
14
|
+
from fkat import initialize, run_main
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@hydra.main(version_base="1.3")
|
|
18
|
+
def main(cfg: DictConfig) -> None:
|
|
19
|
+
s = initialize(cfg)
|
|
20
|
+
kwargs = {
|
|
21
|
+
"ckpt_path": s.ckpt_path,
|
|
22
|
+
}
|
|
23
|
+
if isinstance(s.data, L.LightningDataModule):
|
|
24
|
+
kwargs["datamodule"] = s.data
|
|
25
|
+
else:
|
|
26
|
+
kwargs["val_dataloaders"] = s.data.val_dataloader() if s.data else None
|
|
27
|
+
s.trainer.validate(s.model, **kwargs)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
if __name__ == "__main__":
|
|
31
|
+
run_main(main)
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fkat
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Foundational Kit for AI Training
|
|
5
|
+
Project-URL: homepage, https://github.com/amzn/fkat
|
|
6
|
+
Author: FKAT Contributors
|
|
7
|
+
License: Apache-2.0
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
License-File: NOTICE
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Requires-Dist: awswrangler>=3.5.1
|
|
12
|
+
Requires-Dist: boto3>=1.35.89
|
|
13
|
+
Requires-Dist: datasets>=3.0.0
|
|
14
|
+
Requires-Dist: evaluate
|
|
15
|
+
Requires-Dist: fsspec[s3]
|
|
16
|
+
Requires-Dist: hydra-core
|
|
17
|
+
Requires-Dist: importlib-metadata
|
|
18
|
+
Requires-Dist: lightning!=2022.*
|
|
19
|
+
Requires-Dist: lightning-utilities
|
|
20
|
+
Requires-Dist: mlflow-skinny<=3.8.1
|
|
21
|
+
Requires-Dist: nvidia-ml-py
|
|
22
|
+
Requires-Dist: pandas
|
|
23
|
+
Requires-Dist: pyarrow<21.0.0,>=15.0.0
|
|
24
|
+
Requires-Dist: tensorboard
|
|
25
|
+
Requires-Dist: torch!=2.3.0,!=2.6.0,>=2.0.1
|
|
26
|
+
Requires-Dist: torchaudio
|
|
27
|
+
Requires-Dist: torchmetrics>=0.11.4
|
|
28
|
+
Requires-Dist: torchvision
|
|
29
|
+
Requires-Dist: transformers
|
|
30
|
+
Requires-Dist: wandb
|
|
31
|
+
Provides-Extra: docs
|
|
32
|
+
Requires-Dist: docutils<0.21,>=0.16; extra == 'docs'
|
|
33
|
+
Requires-Dist: myst-parser<3.0.0,>=0.18.1; extra == 'docs'
|
|
34
|
+
Requires-Dist: pandoc<=2.3,>=1.0; extra == 'docs'
|
|
35
|
+
Requires-Dist: sphinx-autobuild; extra == 'docs'
|
|
36
|
+
Requires-Dist: sphinx-autodoc-typehints==1.23.4; extra == 'docs'
|
|
37
|
+
Requires-Dist: sphinx-book-theme; extra == 'docs'
|
|
38
|
+
Requires-Dist: sphinx-copybutton<=0.5.2,>=0.3; extra == 'docs'
|
|
39
|
+
Requires-Dist: sphinx-multiproject; extra == 'docs'
|
|
40
|
+
Requires-Dist: sphinx-paramlinks<=0.6.0,>=0.5.1; extra == 'docs'
|
|
41
|
+
Requires-Dist: sphinx-prompt==1.5.0; extra == 'docs'
|
|
42
|
+
Requires-Dist: sphinx-rtd-dark-mode==1.3.0; extra == 'docs'
|
|
43
|
+
Requires-Dist: sphinx-togglebutton<=0.3.2,>=0.2; extra == 'docs'
|
|
44
|
+
Requires-Dist: sphinx-toolbox==3.5.0; extra == 'docs'
|
|
45
|
+
Requires-Dist: sphinx<6.0,>5.0; extra == 'docs'
|
|
46
|
+
Requires-Dist: sphinxcontrib-mockautodoc; extra == 'docs'
|
|
47
|
+
Requires-Dist: sphinxcontrib-video==0.2.0; extra == 'docs'
|
|
48
|
+
Provides-Extra: test
|
|
49
|
+
Requires-Dist: captum>=0.4.0; extra == 'test'
|
|
50
|
+
Requires-Dist: contourpy<1.3.1; extra == 'test'
|
|
51
|
+
Requires-Dist: deepspeed; extra == 'test'
|
|
52
|
+
Requires-Dist: ipython==8.18.1; extra == 'test'
|
|
53
|
+
Requires-Dist: moto==5.0.18; extra == 'test'
|
|
54
|
+
Requires-Dist: nvidia-ml-py; extra == 'test'
|
|
55
|
+
Requires-Dist: nvtx; extra == 'test'
|
|
56
|
+
Requires-Dist: peft; extra == 'test'
|
|
57
|
+
Requires-Dist: pre-commit; extra == 'test'
|
|
58
|
+
Requires-Dist: pre-commit-hooks; extra == 'test'
|
|
59
|
+
Requires-Dist: pytest; extra == 'test'
|
|
60
|
+
Requires-Dist: pytest-cov; extra == 'test'
|
|
61
|
+
Requires-Dist: pytest-timeout; extra == 'test'
|
|
62
|
+
Requires-Dist: pytest-xdist; extra == 'test'
|
|
63
|
+
Requires-Dist: ruff; extra == 'test'
|
|
64
|
+
Requires-Dist: tox-venv; extra == 'test'
|
|
65
|
+
Requires-Dist: tox-wheel==1.0.0; extra == 'test'
|
|
66
|
+
Requires-Dist: tox==3.26.0; extra == 'test'
|
|
67
|
+
Requires-Dist: trl==0.7.1; extra == 'test'
|
|
68
|
+
Requires-Dist: twine; extra == 'test'
|
|
69
|
+
Requires-Dist: ty; extra == 'test'
|
|
70
|
+
Requires-Dist: types-boto3[batch,dynamodb,ec2,s3]; extra == 'test'
|
|
71
|
+
Requires-Dist: viztracer; extra == 'test'
|
|
72
|
+
Description-Content-Type: text/markdown
|
|
73
|
+
|
|
74
|
+
# FKAT
|
|
75
|
+
|
|
76
|
+
[](https://amzn.github.io/fkat/)
|
|
77
|
+
[](LICENSE)
|
|
78
|
+
[](https://www.python.org/downloads/)
|
|
79
|
+
[](https://pytorch.org/)
|
|
80
|
+
[](https://lightning.ai/)
|
|
81
|
+
[](https://github.com/astral-sh/ruff)
|
|
82
|
+
|
|
83
|
+
Foundational Kit for AI Training
|
|
84
|
+
|
|
85
|
+
## Documentation
|
|
86
|
+
|
|
87
|
+
📚 **[Read the full documentation](https://amzn.github.io/fkat/)**
|
|
88
|
+
|
|
89
|
+
## Dependencies
|
|
90
|
+
|
|
91
|
+
This project depends on third-party open source packages that are installed via PyPI.
|
|
92
|
+
|
|
93
|
+
Key dependencies include:
|
|
94
|
+
- PyTorch (BSD-3-Clause)
|
|
95
|
+
- Lightning (Apache-2.0)
|
|
96
|
+
- Transformers (Apache-2.0)
|
|
97
|
+
- Hydra (MIT)
|
|
98
|
+
- MLflow (Apache-2.0)
|
|
99
|
+
- AWS SDK for Python / Boto3 (Apache-2.0)
|
|
100
|
+
- PyArrow (Apache-2.0)
|
|
101
|
+
|
|
102
|
+
For a complete list of dependencies and their licenses, see `pyproject.toml` and run `pip-licenses` after installation.
|
|
103
|
+
|
|
104
|
+
## Setup
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
pip install hatch
|
|
108
|
+
hatch env create
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Development
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
hatch run test:test
|
|
115
|
+
hatch run lint:check
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Documentation
|
|
119
|
+
|
|
120
|
+
Docs are automatically built and deployed to GitHub Pages on push to main/mainline.
|
|
121
|
+
|
|
122
|
+
Build locally:
|
|
123
|
+
```bash
|
|
124
|
+
hatch run docs:build
|
|
125
|
+
hatch run docs:serve
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Contributing
|
|
129
|
+
|
|
130
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
131
|
+
|
|
132
|
+
## Code of Conduct
|
|
133
|
+
|
|
134
|
+
See [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md).
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
fkat/__init__.py,sha256=kuYbZ-7cqDIEA5dNggMVe49LibdC-RwE1pL3yPhH-2U,5149
|
|
2
|
+
fkat/predict.py,sha256=e2FSuw5HIz_eocoxTbncklNch18e_5mffIgObJXBrFk,878
|
|
3
|
+
fkat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
fkat/test.py,sha256=CKqP0MLmXqVdXlBTcjtN5CmPT_YnCi2COyUg7a7rMBM,811
|
|
5
|
+
fkat/train.py,sha256=Gx_td6GyhJ6VAIQEAwMOa2me9FT2t3_OL1PTt6wwOVI,892
|
|
6
|
+
fkat/validate.py,sha256=cHg9zW9EpHm_B8x30jb7xBSzu_x8N5haJY5d00sNaXE,821
|
|
7
|
+
fkat/data/__init__.py,sha256=261egC2KYrpUnaDXnCLnOFoudKcywYkFZ25QlrdJWkk,419
|
|
8
|
+
fkat/data/data_module.py,sha256=RACUbTNdFtYXpKc-pvj7iN4kp1dPDqouQfbrcNz9QVA,7793
|
|
9
|
+
fkat/data/dict.py,sha256=-ivMLelLGDOhVcw9gEygp4LLdOgiFBOVmta-mZCYHiw,1442
|
|
10
|
+
fkat/data/sharded.py,sha256=6BSi975P4RYJfewwZJklT3Oyd9CcD4Y1AJbCnEv6ayc,29219
|
|
11
|
+
fkat/data/shm.py,sha256=_1ZeRhZRih_TMZabSQPq3Gm9PInZtwed0Lgy4My4yZ8,14337
|
|
12
|
+
fkat/data/datasets/__init__.py,sha256=hUU88kfAIWNHhkz-v5w3fHqNcHihYIKj8VUf-ps65D4,540
|
|
13
|
+
fkat/data/datasets/dict.py,sha256=9d1PWVaLP5G_E6w0V9AO0mFZsx6-HqqKjve9RYWnpyg,2700
|
|
14
|
+
fkat/data/datasets/json.py,sha256=oNn4czQThutKBpixnLdG1UBAlIa2pkWcNb1OC1C4rc8,6318
|
|
15
|
+
fkat/data/datasets/map.py,sha256=gE_xnUVtl6h-ouTBZbiAvu7C0-dPZ7OTR-DxHG0mvhM,2568
|
|
16
|
+
fkat/data/datasets/parquet.py,sha256=bGSjrrFJw3DE5WEFGIuxkA_XH9WjC9l_3TFVYH1ZMt8,8819
|
|
17
|
+
fkat/data/datasets/sized.py,sha256=tmQX1cd9_0b4jmnJ3560oPX2r_o9OvXljeweQb5f9xk,724
|
|
18
|
+
fkat/data/samplers/__init__.py,sha256=HDPLdpb7UwttzARf16Z6-vABaHfzEXXpqHv-HPAxdo8,233
|
|
19
|
+
fkat/data/samplers/dict.py,sha256=nppgM6z0WfG16LkOiT7CIzqyC7IkePu5VjI_VeFQ1gI,1559
|
|
20
|
+
fkat/data/samplers/sized.py,sha256=uo1Y-XgH10qaH6HYiKv39YCiRWQuzKMJ4Ha6Oqu3Gj0,410
|
|
21
|
+
fkat/data/samplers/strategies.py,sha256=jsG5C8jCP6QNgwlNbMRx0UF2Kp0hfUTDEB6sRkqDxeE,1902
|
|
22
|
+
fkat/pytorch/__init__.py,sha256=cdA5mS4EFfP6aggb1H9JDyT2ZTGwRDJwFEAtOEOLBSg,108
|
|
23
|
+
fkat/pytorch/loggers.py,sha256=sSFTlv8e5s5Nyz4B50HPbQWjhJmgB-BK4wDAeWAOs5I,9683
|
|
24
|
+
fkat/pytorch/utilities.py,sha256=VIMjJ7OjSh-dA-7OjPi10WjLY9ZMFtopggtEOJHXyXU,1446
|
|
25
|
+
fkat/pytorch/actions/__init__.py,sha256=YukooKLQ5FhRrJHIVIKusClD8BXFJYpcWF0_vKCblBs,412
|
|
26
|
+
fkat/pytorch/actions/aws/__init__.py,sha256=cdA5mS4EFfP6aggb1H9JDyT2ZTGwRDJwFEAtOEOLBSg,108
|
|
27
|
+
fkat/pytorch/actions/aws/batch.py,sha256=nMGvAuSOtHuY4DnxXojI_cbhFCq8HpIal9hV5cBpxQo,976
|
|
28
|
+
fkat/pytorch/actions/aws/ec2.py,sha256=xwsy9QFdFCkhkYWka5kl_f6tdRDJZCsfKU1kCrsAbMg,2631
|
|
29
|
+
fkat/pytorch/callbacks/__init__.py,sha256=LTiYHnMO8kpe1MOUjnqYLW55bRL2vI3iW6616zwBCCI,107
|
|
30
|
+
fkat/pytorch/callbacks/gc.py,sha256=FHwjci0Kj6znp5hfnr5fgNeTblSSICkKJCyRqMuWBmI,5087
|
|
31
|
+
fkat/pytorch/callbacks/loggers.py,sha256=hQNpajn6YwUbEE_hfrtJ0WL1inAi4WUNWaTrPk8320E,7297
|
|
32
|
+
fkat/pytorch/callbacks/cuda/__init__.py,sha256=gILx7c3Gu4jXEIrDVrdD388X-xq4qkRbD_AEaDjPHRU,330
|
|
33
|
+
fkat/pytorch/callbacks/cuda/cache.py,sha256=nqUJVxQlRw76NqtM0-vxwAxp3NgVRWIySD3T8sPNbLQ,3788
|
|
34
|
+
fkat/pytorch/callbacks/cuda/memory.py,sha256=N_JucoxovoAeawWgTwXu89o23T8gk0fRgmsF34YaeUs,8575
|
|
35
|
+
fkat/pytorch/callbacks/cuda/nsys.py,sha256=6MrWSZHvGCNeCcSfF6BX5xyXV_VqGDyu3pwZeznKcY4,7039
|
|
36
|
+
fkat/pytorch/callbacks/cuda/nvtx.py,sha256=zWWdl-kcRSVeo2R7VaoeQ5qT0m_9rl07qddfcCKXEbM,11179
|
|
37
|
+
fkat/pytorch/callbacks/cuda/xid.py,sha256=RZaZDjmRutimg5LTBQ1waRygmUkfzUTmItjzMHfZ6Oo,6736
|
|
38
|
+
fkat/pytorch/callbacks/debugging/__init__.py,sha256=npRowOXLh-swYsIUBjTKWkX9T00ISA9gIB4qdhzjnUY,250
|
|
39
|
+
fkat/pytorch/callbacks/debugging/introspection.py,sha256=ENaqSgNoh-0fAoS4LsJRKqqCtUuypaMjYHYCsQNv54w,21565
|
|
40
|
+
fkat/pytorch/callbacks/debugging/optimizer.py,sha256=suzSAJu0stuJo9W9j-1f63Y5LzZrAimibRavUWJjQjM,1696
|
|
41
|
+
fkat/pytorch/callbacks/logging/__init__.py,sha256=Br3Hwt5HexjdFZcuvO5kWxa3yzO7pVtWBvXIKVk52jk,301
|
|
42
|
+
fkat/pytorch/callbacks/logging/heartbeat.py,sha256=Re29g8zy-5Hucvkr7CjWkssB-Sywmy84aivmzqnajz8,2801
|
|
43
|
+
fkat/pytorch/callbacks/logging/throughput.py,sha256=E42193nY3FqWo04YWHQ-hDArOu_O1jFoTTX2Mxmaj_c,9427
|
|
44
|
+
fkat/pytorch/callbacks/logging/validation_metrics.py,sha256=nyImmN0EeUG0gbpmoXtXPaRabVgHu5Az202K851iFO8,3680
|
|
45
|
+
fkat/pytorch/callbacks/monitoring/__init__.py,sha256=-0w2E1H0698aLNeyz4QozY67ioQviQhkI0y3Z7m1jyQ,354
|
|
46
|
+
fkat/pytorch/callbacks/monitoring/crash.py,sha256=AIGMUZl73YK8MMrRXIZXBNl2J7Qr77SNRfRFkI9aRgU,6077
|
|
47
|
+
fkat/pytorch/callbacks/monitoring/dp.py,sha256=NEcMg6cpi5--xnA77BHQfbqxhHyojwK1z75loO5b5j4,4698
|
|
48
|
+
fkat/pytorch/callbacks/monitoring/hardware_stats.py,sha256=_yrpdi8RJdCm74YflUj_Xx3e_odhuufiqVctSMGSE9A,5394
|
|
49
|
+
fkat/pytorch/callbacks/monitoring/shutdown.py,sha256=2luwvueNeK1_am4Y8gttEunzJOgdwqedpVbINw9iT7Y,6469
|
|
50
|
+
fkat/pytorch/callbacks/profiling/__init__.py,sha256=4TSwRLkqEGRcYCx-hgqgGrLRV6D4nUjwVd-TFtCU72A,293
|
|
51
|
+
fkat/pytorch/callbacks/profiling/flops.py,sha256=UWNim5nUYRQJu4Sg_8plKiLktRC-OySdb2xLzHnFF4k,23281
|
|
52
|
+
fkat/pytorch/callbacks/profiling/memray.py,sha256=mG18EuNqRG2hCAxwa13RVLPAsKDRf0-cQMOIMym-fgA,7694
|
|
53
|
+
fkat/pytorch/callbacks/profiling/torch.py,sha256=SZ5Nd_ypbtP56lYHQmfdfgXN8cqCy-8JfLMHv7xRB3U,7558
|
|
54
|
+
fkat/pytorch/callbacks/profiling/viztracer.py,sha256=TKc-4A1tO2jxjhIZeKpEUeVtVNbXWg8q2pPb8G7OnOc,7421
|
|
55
|
+
fkat/pytorch/schedule/__init__.py,sha256=ritz02ozwavSurhRaK4tpU7Oj5edBlXO1uwuAs2X4RA,458
|
|
56
|
+
fkat/pytorch/schedule/base.py,sha256=1HuDBBPK-Czc5xX2ppWZaBdzJpjMGNuMtSwsJ5w2_eA,9462
|
|
57
|
+
fkat/pytorch/schedule/mlflow.py,sha256=3hDH85yrejnht4eupiBmiIJu9bebvFF2skBBCIR-sJw,5980
|
|
58
|
+
fkat/utils/__init__.py,sha256=gmLZEQAEHy2pth2jU-oM-afQKXqVl7NJxtgBpY2OW3w,808
|
|
59
|
+
fkat/utils/boto3.py,sha256=vFmgaM4AbpvUkhnNybK_7AIbvPT6PKMEGDcbdiUo9co,634
|
|
60
|
+
fkat/utils/config.py,sha256=ypvvU-T7ibQwlGMOWre_tuJtVjdAjbyOJsUTH_EZ7O0,6238
|
|
61
|
+
fkat/utils/logging.py,sha256=7k-7-rAxtut9fMJyGkGTv8vy1udanzPHKE7w-UKjvQs,838
|
|
62
|
+
fkat/utils/mlflow.py,sha256=gUyE6JrD_8glhsHkg77eHA6fXI_6gu3pgvuRHPgs4HA,1075
|
|
63
|
+
fkat/utils/pandas.py,sha256=kUcNHhuWFK45oQ9c5iEyBrTfBmzubL-2uF8MfByYJ34,925
|
|
64
|
+
fkat/utils/pdb.py,sha256=q-yzt0_iJKfN7wFHvm8SIwPiVWZO7R0WEl_qlP6aT_Q,2329
|
|
65
|
+
fkat/utils/pool.py,sha256=2TxOGBZTxR_ZyM923VYJeiU-kLSLCIwBxDtfrfrUFmk,2309
|
|
66
|
+
fkat/utils/profiler.py,sha256=lp-VMMTSnoe7JmomfZ4AoTBo3X9SotiXE7VOdcFMr2E,547
|
|
67
|
+
fkat/utils/pyarrow.py,sha256=QooFPFRgKd27405JygZyxhVD_9dQHUSBsEwNApKnzvE,733
|
|
68
|
+
fkat/utils/rng.py,sha256=2dUlU4IgZgAnrHZZ7MnrKdv9aRO8buki9nv1SGPWUDo,971
|
|
69
|
+
fkat/utils/shm.py,sha256=hTYgycIzLI-MskK-JHkTEIFb9c29XJ4z8QtI--6Kbjg,5941
|
|
70
|
+
fkat/utils/aws/__init__.py,sha256=cdA5mS4EFfP6aggb1H9JDyT2ZTGwRDJwFEAtOEOLBSg,108
|
|
71
|
+
fkat/utils/aws/imds.py,sha256=xv60coWcD2gp4HiC5rWtEujsGEHMrMnDMNtPrx6OKdc,4474
|
|
72
|
+
fkat/utils/cuda/__init__.py,sha256=cdA5mS4EFfP6aggb1H9JDyT2ZTGwRDJwFEAtOEOLBSg,108
|
|
73
|
+
fkat/utils/cuda/xid.py,sha256=xdfqQ6gBnNjSjktPAdkrAwZmktJL5uyxuwdDJNi2B30,1611
|
|
74
|
+
fkat/utils/cuda/preflight/__init__.py,sha256=cdA5mS4EFfP6aggb1H9JDyT2ZTGwRDJwFEAtOEOLBSg,108
|
|
75
|
+
fkat/utils/cuda/preflight/run.py,sha256=p_MlLcyrQbqbyvuqeO5Q42IKc88UDS-yk9IMaBHcgiw,21561
|
|
76
|
+
fkat/utils/cuda/preflight/health_check/aws_instance_config.py,sha256=db54ylrFRpHvfihaZOmmHIA-4C9vpG4maQcnQwtHDsg,1989
|
|
77
|
+
fkat/utils/cuda/preflight/health_check/constants.py,sha256=Xblr9IyLBVYG54rOBusKkVeSSpSzlko-WdyRn4vmf9Y,738
|
|
78
|
+
fkat/utils/cuda/preflight/health_check/ddb_client.py,sha256=x0FLnoao8iEM9yDJqhfDNZDZ8VtVbXuirQh6ODpXT04,3250
|
|
79
|
+
fkat/utils/cuda/preflight/health_check/gpu_connection_test.py,sha256=Dmmq4lDoXYPABY-V51oV-cl3c64oFqa4um9288cC1Dw,4185
|
|
80
|
+
fkat/utils/cuda/preflight/health_check/gpu_stress_test.py,sha256=YAZNWtRgkPCqthhIxmr-U6vuWW-e8_6ZZdsbT-JTGCA,4693
|
|
81
|
+
fkat/utils/cuda/preflight/health_check/helpers.py,sha256=rXw6fhKN36y4Xui4yDVQpym0RmY1pnhMLTScVZuQneQ,11291
|
|
82
|
+
fkat/utils/cuda/preflight/health_check/logger.py,sha256=XAIJPvf3Pp0gPTCWE_W9HBMrqh7T2y794YvsG2pXLGM,8894
|
|
83
|
+
fkat/utils/cuda/preflight/health_check/timer.py,sha256=lvMEEH8EBLLIMj0hVlaL4OuCaq7GYlBNsB6JXmh0YRA,912
|
|
84
|
+
fkat-0.1.2.dist-info/METADATA,sha256=7Cn7bE7zumd2mDduxMdj8QMcX5_CBSEv9k6x9Lm67PQ,4480
|
|
85
|
+
fkat-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
86
|
+
fkat-0.1.2.dist-info/licenses/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
|
|
87
|
+
fkat-0.1.2.dist-info/licenses/NOTICE,sha256=1CkO1kwu3Q_OHYTj-d-yiBJA_lNN73a4zSntavaD4oc,67
|
|
88
|
+
fkat-0.1.2.dist-info/RECORD,,
|