model-research 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import hashlib
|
|
5
|
+
import importlib.resources as resources
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import tempfile
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import zstandard as zstd
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_MANIFEST = json.loads(resources.files(__package__).joinpath("asset_manifest.json").read_text(encoding="utf-8"))
|
|
15
|
+
_SHARDS = [
|
|
16
|
+
("model_research_part01", "part-0001.bin"),
|
|
17
|
+
("model_research_part02", "part-0002.bin"),
|
|
18
|
+
("model_research_part03", "part-0003.bin"),
|
|
19
|
+
("model_research_part04", "part-0004.bin"),
|
|
20
|
+
("model_research_part05", "part-0005.bin"),
|
|
21
|
+
("model_research_part06", "part-0006.bin"),
|
|
22
|
+
("model_research_part07", "part-0007.bin")
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _default_target_dir() -> Path:
|
|
27
|
+
if os.name == "nt":
|
|
28
|
+
return Path.cwd()
|
|
29
|
+
return Path("/tmp")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _file_digest(path: Path, chunk_size: int = 1024 * 1024) -> tuple[str, int]:
|
|
33
|
+
digest = hashlib.sha256()
|
|
34
|
+
size = 0
|
|
35
|
+
with path.open("rb") as handle:
|
|
36
|
+
while True:
|
|
37
|
+
chunk = handle.read(chunk_size)
|
|
38
|
+
if not chunk:
|
|
39
|
+
break
|
|
40
|
+
digest.update(chunk)
|
|
41
|
+
size += len(chunk)
|
|
42
|
+
return digest.hexdigest(), size
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _target_path(target_dir: str | os.PathLike[str] | None) -> Path:
|
|
46
|
+
base_dir = _default_target_dir() if target_dir is None else Path(target_dir)
|
|
47
|
+
base_dir.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
return base_dir.resolve() / _MANIFEST["original_filename"]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def ensure_asset(target_dir: str | os.PathLike[str] | None = None, force: bool = False) -> str:
|
|
52
|
+
destination = _target_path(target_dir)
|
|
53
|
+
if destination.exists() and not force:
|
|
54
|
+
current_hash, current_size = _file_digest(destination)
|
|
55
|
+
if current_hash == _MANIFEST["original_sha256"] and current_size == _MANIFEST["original_size"]:
|
|
56
|
+
return str(destination)
|
|
57
|
+
|
|
58
|
+
temp_handle = tempfile.NamedTemporaryFile(
|
|
59
|
+
mode="wb",
|
|
60
|
+
prefix=f"{destination.name}.",
|
|
61
|
+
suffix=".tmp",
|
|
62
|
+
delete=False,
|
|
63
|
+
dir=str(destination.parent),
|
|
64
|
+
)
|
|
65
|
+
tmp_path = Path(temp_handle.name)
|
|
66
|
+
original_hash = hashlib.sha256()
|
|
67
|
+
compressed_hash = hashlib.sha256()
|
|
68
|
+
written_size = 0
|
|
69
|
+
try:
|
|
70
|
+
with temp_handle:
|
|
71
|
+
decompressor = zstd.ZstdDecompressor().decompressobj()
|
|
72
|
+
for module_name, part_filename in _SHARDS:
|
|
73
|
+
resource = resources.files(f"{module_name}._data").joinpath(part_filename)
|
|
74
|
+
with resource.open("rb") as shard_handle:
|
|
75
|
+
while True:
|
|
76
|
+
chunk = shard_handle.read(1024 * 1024)
|
|
77
|
+
if not chunk:
|
|
78
|
+
break
|
|
79
|
+
compressed_hash.update(chunk)
|
|
80
|
+
output = decompressor.decompress(chunk)
|
|
81
|
+
if output:
|
|
82
|
+
original_hash.update(output)
|
|
83
|
+
written_size += len(output)
|
|
84
|
+
temp_handle.write(output)
|
|
85
|
+
tail = decompressor.flush()
|
|
86
|
+
if tail:
|
|
87
|
+
original_hash.update(tail)
|
|
88
|
+
written_size += len(tail)
|
|
89
|
+
temp_handle.write(tail)
|
|
90
|
+
|
|
91
|
+
if compressed_hash.hexdigest() != _MANIFEST["compressed_sha256"]:
|
|
92
|
+
raise RuntimeError("Compressed shard digest mismatch.")
|
|
93
|
+
if original_hash.hexdigest() != _MANIFEST["original_sha256"]:
|
|
94
|
+
raise RuntimeError("Restored asset digest mismatch.")
|
|
95
|
+
if written_size != _MANIFEST["original_size"]:
|
|
96
|
+
raise RuntimeError("Restored asset size mismatch.")
|
|
97
|
+
|
|
98
|
+
os.replace(tmp_path, destination)
|
|
99
|
+
return str(destination)
|
|
100
|
+
except Exception:
|
|
101
|
+
if tmp_path.exists():
|
|
102
|
+
tmp_path.unlink()
|
|
103
|
+
raise
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def unzip_asset(
|
|
108
|
+
asset_path: str | os.PathLike[str] | None = None,
|
|
109
|
+
extract_path: str | os.PathLike[str] = "./",
|
|
110
|
+
force: bool = False,
|
|
111
|
+
) -> str:
|
|
112
|
+
archive_path = Path(ensure_asset(".", force=force)) if asset_path is None else Path(asset_path).expanduser()
|
|
113
|
+
if not archive_path.exists():
|
|
114
|
+
raise FileNotFoundError(f"Archive asset does not exist: {archive_path}")
|
|
115
|
+
|
|
116
|
+
destination = Path(extract_path).expanduser()
|
|
117
|
+
destination.mkdir(parents=True, exist_ok=True)
|
|
118
|
+
archive_format = _MANIFEST.get("archive_format")
|
|
119
|
+
if archive_format == "zip":
|
|
120
|
+
import zipfile
|
|
121
|
+
|
|
122
|
+
with zipfile.ZipFile(archive_path, "r") as archive:
|
|
123
|
+
archive.extractall(destination)
|
|
124
|
+
elif archive_format == "7z":
|
|
125
|
+
try:
|
|
126
|
+
import py7zr
|
|
127
|
+
except ModuleNotFoundError as exc:
|
|
128
|
+
raise RuntimeError("py7zr is required to extract .7z assets.") from exc
|
|
129
|
+
|
|
130
|
+
with py7zr.SevenZipFile(archive_path, mode="r") as archive:
|
|
131
|
+
archive.extractall(path=str(destination))
|
|
132
|
+
else:
|
|
133
|
+
raise RuntimeError(f"Unsupported archive format: {archive_format}")
|
|
134
|
+
return str(destination.resolve())
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def main(argv: list[str] | None = None) -> int:
|
|
139
|
+
parser = argparse.ArgumentParser(description="Restore the packaged asset into a target directory.")
|
|
140
|
+
parser.add_argument("--target-dir", default=None, help="Directory where the restored asset will be written.")
|
|
141
|
+
parser.add_argument("--force", action="store_true", help="Force regeneration even if the target already matches.")
|
|
142
|
+
args = parser.parse_args(argv)
|
|
143
|
+
print(ensure_asset(args.target_dir, force=args.force))
|
|
144
|
+
return 0
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"archive_format": "zip",
|
|
3
|
+
"chunk_bytes": 100663296,
|
|
4
|
+
"compressed_sha256": "ee2a577841c84a04e884aea1d4c325598888e6ea9eb0c01b82e1bf909b5e52b5",
|
|
5
|
+
"compression_level": 22,
|
|
6
|
+
"is_zip_asset": true,
|
|
7
|
+
"original_filename": "cancer_generation.zip",
|
|
8
|
+
"original_sha256": "64758df881102dfb6f1dc4d1a3f6fe5c7c9cd946ce0057a926d9d8dd1a18d262",
|
|
9
|
+
"original_size": 633003159,
|
|
10
|
+
"package_version": "0.1.0",
|
|
11
|
+
"shard_count": 7
|
|
12
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: model-research
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Loader package for the model-research asset.
|
|
5
|
+
Requires-Dist: model-research-part01==0.1.0
|
|
6
|
+
Requires-Dist: model-research-part02==0.1.0
|
|
7
|
+
Requires-Dist: model-research-part03==0.1.0
|
|
8
|
+
Requires-Dist: model-research-part04==0.1.0
|
|
9
|
+
Requires-Dist: model-research-part05==0.1.0
|
|
10
|
+
Requires-Dist: model-research-part06==0.1.0
|
|
11
|
+
Requires-Dist: model-research-part07==0.1.0
|
|
12
|
+
Requires-Dist: zstandard>=0.23.0
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
model_research/__init__.py,sha256=BnP7D9CUdJnsriF1FWV8_rCAyAj6az3Q9Xw39PJdQPg,5456
|
|
2
|
+
model_research/__main__.py,sha256=7L9asSJNpo-bjIqFDRk3qkXtybFElw3oEE1WPxUuzSg,82
|
|
3
|
+
model_research/asset_manifest.json,sha256=PYgMI533r68g_3-DeCERDpU6oFKqEJZeTLbMfr5qM6Q,429
|
|
4
|
+
model_research-0.1.0.dist-info/METADATA,sha256=odLeTpY6okUlj3OlHsmfpJg-1i6mhNfs8Km2vd6DDYs,465
|
|
5
|
+
model_research-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
6
|
+
model_research-0.1.0.dist-info/top_level.txt,sha256=WZEX-4jMFPPBtgJmR1Jeb-1nojcmqwQ7B9lOe9csC_M,15
|
|
7
|
+
model_research-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
model_research
|