mini-imggen-numpy-lib 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mini_imggen_numpy_lib-0.1.0 → mini_imggen_numpy_lib-0.1.2}/PKG-INFO +1 -1
- mini_imggen_numpy_lib-0.1.2/mini_imggen_numpy_lib/__init__.py +40 -0
- mini_imggen_numpy_lib-0.1.2/mini_imggen_numpy_lib/mini_imggen_numpy_lib.py +463 -0
- {mini_imggen_numpy_lib-0.1.0 → mini_imggen_numpy_lib-0.1.2}/mini_imggen_numpy_lib.egg-info/PKG-INFO +1 -1
- {mini_imggen_numpy_lib-0.1.0 → mini_imggen_numpy_lib-0.1.2}/mini_imggen_numpy_lib.egg-info/SOURCES.txt +2 -0
- mini_imggen_numpy_lib-0.1.2/setup.py +18 -0
- mini_imggen_numpy_lib-0.1.0/setup.py +0 -21
- {mini_imggen_numpy_lib-0.1.0 → mini_imggen_numpy_lib-0.1.2}/README.md +0 -0
- {mini_imggen_numpy_lib-0.1.0 → mini_imggen_numpy_lib-0.1.2}/mini_imggen_numpy_lib.egg-info/dependency_links.txt +0 -0
- {mini_imggen_numpy_lib-0.1.0 → mini_imggen_numpy_lib-0.1.2}/mini_imggen_numpy_lib.egg-info/requires.txt +0 -0
- {mini_imggen_numpy_lib-0.1.0 → mini_imggen_numpy_lib-0.1.2}/mini_imggen_numpy_lib.egg-info/top_level.txt +0 -0
- {mini_imggen_numpy_lib-0.1.0 → mini_imggen_numpy_lib-0.1.2}/pyproject.toml +0 -0
- {mini_imggen_numpy_lib-0.1.0 → mini_imggen_numpy_lib-0.1.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mini_imggen_numpy_lib
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.2
|
4
4
|
Summary: A lightweight educational Python library for toy image & text generation using NumPy only.
|
5
5
|
Home-page: https://github.com/Leo62-glitch/mini_imggen_numpy_lib
|
6
6
|
Author: Léo
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# mini_imggen_numpy_lib/__init__.py
|
2
|
+
|
3
|
+
"""
|
4
|
+
mini_imggen_numpy_lib
|
5
|
+
--------------------
|
6
|
+
Librairie Python pour génération d'images et texte avec Numpy, sans interface graphique.
|
7
|
+
|
8
|
+
Exports principaux:
|
9
|
+
- TextVocab
|
10
|
+
- ARBigramText, Config
|
11
|
+
- Fonctions images: build_dataset, load_and_preprocess_image, image_to_tokens, tokens_to_image, train_model, generate_image_from_model
|
12
|
+
- Fonctions texte: load_text_dataset, train_text_model, generate_text_from_model
|
13
|
+
"""
|
14
|
+
|
15
|
+
# Import des utilitaires et classes depuis le module principal
|
16
|
+
from .mini_imggen_numpy_lib import (
|
17
|
+
TOKEN_VOCAB_SIZE,
|
18
|
+
set_seed,
|
19
|
+
tokenize_text,
|
20
|
+
TextVocab,
|
21
|
+
load_and_preprocess_image,
|
22
|
+
image_to_tokens,
|
23
|
+
tokens_to_image,
|
24
|
+
ARBigramText,
|
25
|
+
Config,
|
26
|
+
build_dataset,
|
27
|
+
train_model,
|
28
|
+
generate_image_from_model,
|
29
|
+
load_text_dataset,
|
30
|
+
train_text_model,
|
31
|
+
generate_text_from_model
|
32
|
+
)
|
33
|
+
|
34
|
+
# Définition des symboles exportés pour l'import *
|
35
|
+
__all__ = [
|
36
|
+
'TOKEN_VOCAB_SIZE', 'set_seed', 'tokenize_text',
|
37
|
+
'TextVocab', 'load_and_preprocess_image', 'image_to_tokens', 'tokens_to_image',
|
38
|
+
'ARBigramText', 'Config', 'build_dataset', 'train_model', 'generate_image_from_model',
|
39
|
+
'load_text_dataset', 'train_text_model', 'generate_text_from_model'
|
40
|
+
]
|
@@ -0,0 +1,463 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
mini_imggen_numpy_lib.py
|
5
|
+
------------------------
|
6
|
+
Refactor du script `mini_imggen_numpy_ui.py` pour être une librairie Python
|
7
|
+
sans interface graphique.
|
8
|
+
|
9
|
+
Exports principaux:
|
10
|
+
- TextVocab: construction / encode / decode / save / load pour vocabulaire texte
|
11
|
+
- ARBigramText: modèle autoregressif simplifié partagé image+texte
|
12
|
+
- build_dataset, load_and_preprocess_image, image_to_tokens, tokens_to_image
|
13
|
+
- train_model / generate_image_from_model
|
14
|
+
- train_text_model / generate_text_from_model
|
15
|
+
|
16
|
+
Design notes:
|
17
|
+
- Toutes les fonctions acceptent un ``log_cb: Optional[Callable[[str], None]]``
|
18
|
+
pour recevoir des messages de log (utile pour intégration dans une app).
|
19
|
+
- Les poids et vocabs se sauvent/chargent en NPZ/JSON compatibles avec l'ancien
|
20
|
+
format pour la rétro-compatibilité.
|
21
|
+
|
22
|
+
Usage rapide:
|
23
|
+
>>> from mini_imggen_numpy_lib import train_model, generate_image_from_model
|
24
|
+
>>> train_model('images/', 'ckpt', epochs=3)
|
25
|
+
>>> generate_image_from_model('ckpt', 'chat tigré', 'out.png')
|
26
|
+
|
27
|
+
"""
|
28
|
+
from __future__ import annotations
|
29
|
+
import os
|
30
|
+
import re
|
31
|
+
import json
|
32
|
+
from dataclasses import dataclass
|
33
|
+
from typing import List, Tuple, Dict, Optional, Callable
|
34
|
+
|
35
|
+
import numpy as np
|
36
|
+
from PIL import Image
|
37
|
+
|
38
|
+
# -------------------------
|
39
|
+
# Utilitaires & tokenizers
|
40
|
+
# -------------------------
|
41
|
+
|
42
|
+
TOKEN_VOCAB_SIZE = 256 # espace de tokens 0..255 (pour images)
|
43
|
+
|
44
|
+
def set_seed(seed: int = 42) -> None:
|
45
|
+
np.random.seed(seed)
|
46
|
+
|
47
|
+
|
48
|
+
def tokenize_text(s: str) -> List[str]:
|
49
|
+
toks = re.split(r"[^\w]+", s.lower())
|
50
|
+
return [t for t in toks if t]
|
51
|
+
|
52
|
+
# -------------------------
|
53
|
+
# Text vocab
|
54
|
+
# -------------------------
|
55
|
+
|
56
|
+
class TextVocab:
|
57
|
+
"""Simple vocab pour la partie texte.
|
58
|
+
|
59
|
+
Format sur disque:
|
60
|
+
- JSON contenant 'stoi' et 'itos'
|
61
|
+
"""
|
62
|
+
def __init__(self):
|
63
|
+
self.stoi: Dict[str, int] = {"<unk>": 0}
|
64
|
+
self.itos: List[str] = ["<unk>"]
|
65
|
+
|
66
|
+
def build(self, words: List[str], min_freq: int = 1) -> None:
|
67
|
+
from collections import Counter
|
68
|
+
c = Counter(words)
|
69
|
+
for w, f in c.items():
|
70
|
+
if f >= min_freq and w not in self.stoi:
|
71
|
+
self.stoi[w] = len(self.itos)
|
72
|
+
self.itos.append(w)
|
73
|
+
|
74
|
+
def encode(self, words: List[str]) -> List[int]:
|
75
|
+
return [self.stoi.get(w, 0) for w in words]
|
76
|
+
|
77
|
+
def decode(self, ids: List[int]) -> List[str]:
|
78
|
+
return [self.itos[i] if 0 <= i < len(self.itos) else "<unk>" for i in ids]
|
79
|
+
|
80
|
+
def save(self, path: str) -> None:
|
81
|
+
os.makedirs(os.path.dirname(path) or '.', exist_ok=True)
|
82
|
+
with open(path, 'w', encoding='utf-8') as f:
|
83
|
+
json.dump({"stoi": self.stoi, "itos": self.itos}, f, ensure_ascii=False, indent=2)
|
84
|
+
|
85
|
+
@staticmethod
|
86
|
+
def load(path: str) -> 'TextVocab':
|
87
|
+
with open(path, 'r', encoding='utf-8') as f:
|
88
|
+
d = json.load(f)
|
89
|
+
tv = TextVocab()
|
90
|
+
tv.stoi = {k: int(v) for k, v in d["stoi"].items()}
|
91
|
+
tv.itos = list(d["itos"])
|
92
|
+
return tv
|
93
|
+
|
94
|
+
# -------------------------
|
95
|
+
# Chargement / prétraitement image
|
96
|
+
# -------------------------
|
97
|
+
|
98
|
+
|
99
|
+
def load_and_preprocess_image(path: str, size: int = 32) -> np.ndarray:
|
100
|
+
img = Image.open(path).convert('RGB')
|
101
|
+
img = img.resize((size, size), Image.BICUBIC)
|
102
|
+
arr = np.array(img, dtype=np.uint8)
|
103
|
+
return arr
|
104
|
+
|
105
|
+
|
106
|
+
def image_to_tokens(img_arr: np.ndarray) -> np.ndarray:
|
107
|
+
# aplatissement des canaux -> tokens 0..255
|
108
|
+
return img_arr.flatten()
|
109
|
+
|
110
|
+
|
111
|
+
def tokens_to_image(tokens: np.ndarray, size: int = 32) -> np.ndarray:
|
112
|
+
return tokens.reshape(size, size, 3)
|
113
|
+
|
114
|
+
# -------------------------
|
115
|
+
# Config & modèle AR simplifié (texte+image)
|
116
|
+
# -------------------------
|
117
|
+
|
118
|
+
@dataclass
|
119
|
+
class Config:
|
120
|
+
d_model: int = 64
|
121
|
+
hidden: int = 128
|
122
|
+
|
123
|
+
|
124
|
+
class ARBigramText:
|
125
|
+
"""Modèle autoregressif minimal partagé pour image+texte.
|
126
|
+
|
127
|
+
Remarque: modèle toy — utile pour expérimentations pédagogiques et
|
128
|
+
génération naïve conditionnée sur texte.
|
129
|
+
"""
|
130
|
+
def __init__(self, vocab_size_text: int, cfg: Config):
|
131
|
+
self.cfg = cfg
|
132
|
+
d = cfg.d_model
|
133
|
+
h = cfg.hidden
|
134
|
+
self.E_tok = 0.02 * np.random.randn(TOKEN_VOCAB_SIZE, d).astype(np.float32)
|
135
|
+
self.E_txt = 0.02 * np.random.randn(vocab_size_text, d).astype(np.float32)
|
136
|
+
self.W1 = 0.02 * np.random.randn(2 * d, h).astype(np.float32)
|
137
|
+
self.b1 = np.zeros((h,), dtype=np.float32)
|
138
|
+
self.W2 = 0.02 * np.random.randn(h, TOKEN_VOCAB_SIZE).astype(np.float32)
|
139
|
+
self.b2 = np.zeros((TOKEN_VOCAB_SIZE,), dtype=np.float32)
|
140
|
+
self.zero_grads()
|
141
|
+
|
142
|
+
def zero_grads(self) -> None:
|
143
|
+
self.gE_tok = np.zeros_like(self.E_tok)
|
144
|
+
self.gE_txt = np.zeros_like(self.E_txt)
|
145
|
+
self.gW1 = np.zeros_like(self.W1)
|
146
|
+
self.gb1 = np.zeros_like(self.b1)
|
147
|
+
self.gW2 = np.zeros_like(self.W2)
|
148
|
+
self.gb2 = np.zeros_like(self.b2)
|
149
|
+
|
150
|
+
@staticmethod
|
151
|
+
def relu(x):
|
152
|
+
return np.maximum(0, x)
|
153
|
+
|
154
|
+
@staticmethod
|
155
|
+
def softmax(x):
|
156
|
+
x = x - x.max(axis=1, keepdims=True)
|
157
|
+
ex = np.exp(x)
|
158
|
+
return ex / ex.sum(axis=1, keepdims=True)
|
159
|
+
|
160
|
+
def text_embed(self, text_ids: np.ndarray) -> np.ndarray:
|
161
|
+
if text_ids is None or text_ids.size == 0:
|
162
|
+
return np.zeros((1, self.cfg.d_model), dtype=np.float32)
|
163
|
+
emb = self.E_txt[text_ids]
|
164
|
+
return emb.mean(axis=0, keepdims=True)
|
165
|
+
|
166
|
+
def forward(self, prev_tokens: np.ndarray, text_ids: np.ndarray) -> Tuple[np.ndarray, dict]:
|
167
|
+
N = prev_tokens.shape[0]
|
168
|
+
emb_prev = self.E_tok[prev_tokens]
|
169
|
+
emb_text = np.repeat(self.text_embed(text_ids), N, axis=0)
|
170
|
+
x = np.concatenate([emb_prev, emb_text], axis=1)
|
171
|
+
hpre = x @ self.W1 + self.b1
|
172
|
+
h = self.relu(hpre)
|
173
|
+
logits = h @ self.W2 + self.b2
|
174
|
+
cache = {'x': x, 'hpre': hpre, 'h': h, 'emb_prev_idx': prev_tokens, 'emb_text_ids': text_ids}
|
175
|
+
return logits, cache
|
176
|
+
|
177
|
+
def loss_and_grads(self, prev_tokens: np.ndarray, targets: np.ndarray, text_ids: np.ndarray) -> Tuple[float, np.ndarray]:
|
178
|
+
logits, cache = self.forward(prev_tokens, text_ids)
|
179
|
+
probs = self.softmax(logits)
|
180
|
+
N = targets.size
|
181
|
+
loss = -np.log(probs[np.arange(N), targets] + 1e-12).mean()
|
182
|
+
dlogits = probs
|
183
|
+
dlogits[np.arange(N), targets] -= 1
|
184
|
+
dlogits /= N
|
185
|
+
self.gW2 += cache['h'].T @ dlogits
|
186
|
+
self.gb2 += dlogits.sum(axis=0)
|
187
|
+
dh = dlogits @ self.W2.T
|
188
|
+
dhpre = dh * (cache['hpre'] > 0)
|
189
|
+
self.gW1 += cache['x'].T @ dhpre
|
190
|
+
self.gb1 += dhpre.sum(axis=0)
|
191
|
+
dx = dhpre @ self.W1.T
|
192
|
+
d_emb_prev = dx[:, :self.cfg.d_model]
|
193
|
+
d_emb_text = dx[:, self.cfg.d_model:]
|
194
|
+
np.add.at(self.gE_tok, cache['emb_prev_idx'], d_emb_prev)
|
195
|
+
text_ids_arr = cache['emb_text_ids']
|
196
|
+
if text_ids_arr is not None and text_ids_arr.size > 0:
|
197
|
+
d_text_mean = d_emb_text.mean(axis=0, keepdims=True)
|
198
|
+
for idx in text_ids_arr:
|
199
|
+
self.gE_txt[idx] += d_text_mean[0]
|
200
|
+
return loss, probs
|
201
|
+
|
202
|
+
def sgd_step(self, lr: float = 1e-2) -> None:
|
203
|
+
for p, g in [
|
204
|
+
(self.E_tok, self.gE_tok), (self.E_txt, self.gE_txt),
|
205
|
+
(self.W1, self.gW1), (self.b1, self.gb1),
|
206
|
+
(self.W2, self.gW2), (self.b2, self.gb2)
|
207
|
+
]:
|
208
|
+
p -= lr * g
|
209
|
+
self.zero_grads()
|
210
|
+
|
211
|
+
def predict_next(self, prev_token: int, text_ids: np.ndarray, temperature: float = 1.0) -> int:
|
212
|
+
logits, _ = self.forward(np.array([prev_token], dtype=np.int32), text_ids)
|
213
|
+
logits = logits / max(1e-6, temperature)
|
214
|
+
probs = self.softmax(logits)[0]
|
215
|
+
return int(np.random.choice(TOKEN_VOCAB_SIZE, p=probs))
|
216
|
+
|
217
|
+
def init_token_from_text(self, text_ids: np.ndarray) -> int:
|
218
|
+
d = self.cfg.d_model
|
219
|
+
emb_text = self.text_embed(text_ids)
|
220
|
+
x0 = np.concatenate([np.zeros((1, d), dtype=np.float32), emb_text], axis=1)
|
221
|
+
h0 = self.relu(x0 @ self.W1 + self.b1)
|
222
|
+
logits0 = h0 @ self.W2 + self.b2
|
223
|
+
probs0 = self.softmax(logits0)[0]
|
224
|
+
return int(np.argmax(probs0))
|
225
|
+
|
226
|
+
def generate(self, text_ids: np.ndarray, length: int, mode: str = "quick",
|
227
|
+
prefix: Optional[List[int]] = None, temperature: float = 1.0) -> np.ndarray:
|
228
|
+
seq: List[int] = []
|
229
|
+
if prefix and len(prefix) > 0:
|
230
|
+
seq.extend(prefix)
|
231
|
+
else:
|
232
|
+
if mode == "quick":
|
233
|
+
seq.append(self.init_token_from_text(text_ids))
|
234
|
+
else:
|
235
|
+
seq.append(0)
|
236
|
+
while len(seq) < length:
|
237
|
+
nxt = self.predict_next(seq[-1], text_ids, temperature=temperature)
|
238
|
+
seq.append(nxt)
|
239
|
+
return np.array(seq, dtype=np.uint8)
|
240
|
+
|
241
|
+
def save(self, path: str) -> None:
|
242
|
+
os.makedirs(os.path.dirname(path) or '.', exist_ok=True)
|
243
|
+
np.savez_compressed(path,
|
244
|
+
E_tok=self.E_tok, E_txt=self.E_txt,
|
245
|
+
W1=self.W1, b1=self.b1, W2=self.W2, b2=self.b2,
|
246
|
+
d_model=self.cfg.d_model, hidden=self.cfg.hidden)
|
247
|
+
|
248
|
+
@staticmethod
|
249
|
+
def load(path: str) -> 'ARBigramText':
|
250
|
+
d = np.load(path)
|
251
|
+
cfg = Config(int(d['d_model']), int(d['hidden']))
|
252
|
+
model = ARBigramText(vocab_size_text=d['E_txt'].shape[0], cfg=cfg)
|
253
|
+
model.E_tok = d['E_tok']
|
254
|
+
model.E_txt = d['E_txt']
|
255
|
+
model.W1 = d['W1']
|
256
|
+
model.b1 = d['b1']
|
257
|
+
model.W2 = d['W2']
|
258
|
+
model.b2 = d['b2']
|
259
|
+
model.zero_grads()
|
260
|
+
return model
|
261
|
+
|
262
|
+
# -------------------------
|
263
|
+
# Dataset images helpers
|
264
|
+
# -------------------------
|
265
|
+
|
266
|
+
|
267
|
+
def build_dataset(data_dir: str, size: int = 32) -> Tuple[List[np.ndarray], List[List[str]]]:
|
268
|
+
imgs = []
|
269
|
+
texts = []
|
270
|
+
for fname in os.listdir(data_dir):
|
271
|
+
if not fname.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
|
272
|
+
continue
|
273
|
+
path = os.path.join(data_dir, fname)
|
274
|
+
try:
|
275
|
+
arr = load_and_preprocess_image(path, size=size)
|
276
|
+
except Exception:
|
277
|
+
continue
|
278
|
+
toks = image_to_tokens(arr)
|
279
|
+
imgs.append(toks)
|
280
|
+
name = os.path.splitext(fname)[0]
|
281
|
+
words = tokenize_text(name)
|
282
|
+
texts.append(words)
|
283
|
+
return imgs, texts
|
284
|
+
|
285
|
+
|
286
|
+
def make_batches(img_tokens: List[np.ndarray], text_ids_list: List[np.ndarray], batch_size: int = 16):
|
287
|
+
n = len(img_tokens)
|
288
|
+
if n == 0:
|
289
|
+
raise RuntimeError("Aucun exemple dans make_batches")
|
290
|
+
while True:
|
291
|
+
idxs = np.random.choice(n, size=batch_size)
|
292
|
+
prev_list = []
|
293
|
+
targ_list = []
|
294
|
+
txt_ids_sample = None
|
295
|
+
for i in idxs:
|
296
|
+
seq = img_tokens[i]
|
297
|
+
if seq.size < 2:
|
298
|
+
continue
|
299
|
+
pos = np.random.randint(1, seq.size)
|
300
|
+
prev_list.append(seq[pos-1])
|
301
|
+
targ_list.append(seq[pos])
|
302
|
+
txt_ids_sample = text_ids_list[i]
|
303
|
+
if not prev_list:
|
304
|
+
continue
|
305
|
+
prev = np.array(prev_list, dtype=np.int32)
|
306
|
+
targ = np.array(targ_list, dtype=np.int32)
|
307
|
+
yield prev, targ, txt_ids_sample
|
308
|
+
|
309
|
+
# -------------------------
|
310
|
+
# Entraînement / génération image
|
311
|
+
# -------------------------
|
312
|
+
|
313
|
+
|
314
|
+
def train_model(data_dir: str, out_dir: str, size: int = 32, d_model: int = 64, hidden: int = 128,
|
315
|
+
batch_size: int = 32, epochs: int = 3, lr: float = 0.02, seed: int = 42,
|
316
|
+
log_cb: Optional[Callable[[str], None]] = None) -> Tuple[str, str]:
|
317
|
+
set_seed(seed)
|
318
|
+
imgs, texts = build_dataset(data_dir, size=size)
|
319
|
+
if not imgs:
|
320
|
+
raise RuntimeError("Aucune image trouvée dans le dossier sélectionné.")
|
321
|
+
vocab = TextVocab()
|
322
|
+
vocab.build([w for doc in texts for w in doc], min_freq=1)
|
323
|
+
text_ids_list = [np.array(vocab.encode(doc), dtype=np.int32) for doc in texts]
|
324
|
+
model = ARBigramText(vocab_size_text=len(vocab.itos), cfg=Config(d_model=d_model, hidden=hidden))
|
325
|
+
batches = make_batches(imgs, text_ids_list, batch_size=batch_size)
|
326
|
+
steps_per_epoch = max(100, len(imgs))
|
327
|
+
if log_cb:
|
328
|
+
log_cb(f"[IMG] Dataset: {len(imgs)} images | Vocab texte: {len(vocab.itos)}")
|
329
|
+
for epoch in range(1, epochs + 1):
|
330
|
+
running = 0.0
|
331
|
+
for step in range(steps_per_epoch):
|
332
|
+
prev, targ, txt_ids = next(batches)
|
333
|
+
loss, _ = model.loss_and_grads(prev, targ, txt_ids)
|
334
|
+
model.sgd_step(lr=lr)
|
335
|
+
running += loss
|
336
|
+
if (step+1) % 50 == 0 and log_cb:
|
337
|
+
log_cb(f"[IMG] Epoch {epoch} step {step+1}/{steps_per_epoch} avg_loss={(running/(step+1)):.4f}")
|
338
|
+
if log_cb:
|
339
|
+
log_cb(f"[IMG] Epoch {epoch}: loss={(running/steps_per_epoch):.4f}")
|
340
|
+
os.makedirs(out_dir, exist_ok=True)
|
341
|
+
model_path = os.path.join(out_dir, 'model_weights.npz')
|
342
|
+
vocab_path = os.path.join(out_dir, 'vocab.json')
|
343
|
+
model.save(model_path)
|
344
|
+
vocab.save(vocab_path)
|
345
|
+
if log_cb:
|
346
|
+
log_cb(f"[IMG] Sauvé: {model_path}\n[IMG] Sauvé: {vocab_path}")
|
347
|
+
return model_path, vocab_path
|
348
|
+
|
349
|
+
|
350
|
+
def generate_image_from_model(model_dir: str, prompt: str, out_path: str, size: int = 32, mode: str = 'quick',
|
351
|
+
prefix_path: Optional[str] = None, prefix_ratio: float = 0.1, temperature: float = 1.0) -> str:
|
352
|
+
vocab = TextVocab.load(os.path.join(model_dir, 'vocab.json'))
|
353
|
+
model = ARBigramText.load(os.path.join(model_dir, 'model_weights.npz'))
|
354
|
+
words_all = tokenize_text(prompt)
|
355
|
+
words_known = [w for w in words_all if w in vocab.stoi]
|
356
|
+
if not words_known:
|
357
|
+
words_known = ["<unk>"]
|
358
|
+
text_ids = np.array(vocab.encode(words_known), dtype=np.int32)
|
359
|
+
length = size * size
|
360
|
+
prefix = None
|
361
|
+
if prefix_path:
|
362
|
+
arr = load_and_preprocess_image(prefix_path, size=size)
|
363
|
+
toks = image_to_tokens(arr)
|
364
|
+
k = max(1, int(len(toks) * prefix_ratio))
|
365
|
+
prefix = toks[:k].tolist()
|
366
|
+
seq = model.generate(text_ids, length=length, mode=mode, prefix=prefix, temperature=temperature)
|
367
|
+
seq = np.array(seq, dtype=np.uint8)
|
368
|
+
side = int(np.sqrt(len(seq)))
|
369
|
+
img_flat = seq[:side * side].reshape((side, side))
|
370
|
+
img_rgb = np.stack((img_flat,)*3, axis=-1).astype(np.uint8)
|
371
|
+
Image.fromarray(img_rgb, mode='RGB').save(out_path)
|
372
|
+
return out_path
|
373
|
+
|
374
|
+
# -------------------------
|
375
|
+
# Générateur texte
|
376
|
+
# -------------------------
|
377
|
+
|
378
|
+
|
379
|
+
def load_text_dataset(json_path: str) -> List[List[str]]:
|
380
|
+
with open(json_path, 'r', encoding='utf-8') as f:
|
381
|
+
data = json.load(f)
|
382
|
+
texts = data.get('texts', [])
|
383
|
+
return [tokenize_text(s) for s in texts if isinstance(s, str) and s.strip()]
|
384
|
+
|
385
|
+
|
386
|
+
def make_text_batches(seqs: List[List[int]], batch_size: int = 16):
|
387
|
+
data = [np.array(s, dtype=np.int32) for s in seqs if len(s) > 1]
|
388
|
+
if not data:
|
389
|
+
raise RuntimeError("Dataset texte trop petit ou mal formaté.")
|
390
|
+
while True:
|
391
|
+
prev, targ = [], []
|
392
|
+
for _ in range(batch_size):
|
393
|
+
seq = data[np.random.randint(len(data))]
|
394
|
+
pos = np.random.randint(1, len(seq))
|
395
|
+
prev.append(seq[pos-1])
|
396
|
+
targ.append(seq[pos])
|
397
|
+
yield np.array(prev, dtype=np.int32), np.array(targ, dtype=np.int32)
|
398
|
+
|
399
|
+
|
400
|
+
def train_text_model(dataset_json: str, out_dir: str, d_model: int = 64, hidden: int = 128,
|
401
|
+
batch_size: int = 32, epochs: int = 3, lr: float = 0.02,
|
402
|
+
log_cb: Optional[Callable[[str], None]] = None) -> Tuple[str, str]:
|
403
|
+
texts = load_text_dataset(dataset_json)
|
404
|
+
if not texts:
|
405
|
+
raise RuntimeError("Aucun texte trouvé dans le JSON.")
|
406
|
+
vocab = TextVocab()
|
407
|
+
vocab.build([w for doc in texts for w in doc], min_freq=1)
|
408
|
+
seqs = [vocab.encode(doc) for doc in texts]
|
409
|
+
model = ARBigramText(vocab_size_text=len(vocab.itos), cfg=Config(d_model=d_model, hidden=hidden))
|
410
|
+
batches = make_text_batches(seqs, batch_size=batch_size)
|
411
|
+
steps_per_epoch = max(100, len(seqs))
|
412
|
+
if log_cb:
|
413
|
+
log_cb(f"[TEXT] Dataset: {len(seqs)} phrases | Vocab: {len(vocab.itos)} mots")
|
414
|
+
for epoch in range(1, epochs+1):
|
415
|
+
running = 0.0
|
416
|
+
for step in range(steps_per_epoch):
|
417
|
+
prev, targ = next(batches)
|
418
|
+
loss, _ = model.loss_and_grads(prev, targ, np.array([], dtype=np.int32))
|
419
|
+
model.sgd_step(lr=lr)
|
420
|
+
running += loss
|
421
|
+
if (step+1) % 100 == 0 and log_cb:
|
422
|
+
log_cb(f"[TEXT] Epoch {epoch} step {step+1}/{steps_per_epoch} avg_loss={(running/(step+1)):.4f}")
|
423
|
+
if log_cb:
|
424
|
+
log_cb(f"[TEXT] Epoch {epoch}: loss={(running/steps_per_epoch):.4f}")
|
425
|
+
os.makedirs(out_dir, exist_ok=True)
|
426
|
+
model_path = os.path.join(out_dir, 'text_model.npz')
|
427
|
+
vocab_path = os.path.join(out_dir, 'text_vocab.json')
|
428
|
+
model.save(model_path)
|
429
|
+
vocab.save(vocab_path)
|
430
|
+
if log_cb:
|
431
|
+
log_cb(f"[TEXT] Sauvé: {model_path}\n[TEXT] Sauvé: {vocab_path}")
|
432
|
+
return model_path, vocab_path
|
433
|
+
|
434
|
+
|
435
|
+
def generate_text_from_model(model_dir: str, prompt: str, length: int = 30, temperature: float = 1.0) -> str:
|
436
|
+
vocab = TextVocab.load(os.path.join(model_dir, 'text_vocab.json'))
|
437
|
+
model = ARBigramText.load(os.path.join(model_dir, 'text_model.npz'))
|
438
|
+
words = tokenize_text(prompt)
|
439
|
+
if not words:
|
440
|
+
words = ["<unk>"]
|
441
|
+
ids = vocab.encode(words)
|
442
|
+
start_id = ids[-1] if ids else 0
|
443
|
+
seq = model.generate(np.array([start_id], dtype=np.int32), length=length, mode="quick", prefix=None, temperature=temperature)
|
444
|
+
mapped_ids = [int(tok) % len(vocab.itos) for tok in seq.tolist()]
|
445
|
+
return " ".join(vocab.decode(mapped_ids))
|
446
|
+
|
447
|
+
# -------------------------
|
448
|
+
# Exports
|
449
|
+
# -------------------------
|
450
|
+
|
451
|
+
__all__ = [
|
452
|
+
'TOKEN_VOCAB_SIZE', 'set_seed', 'tokenize_text',
|
453
|
+
'TextVocab', 'load_and_preprocess_image', 'image_to_tokens', 'tokens_to_image',
|
454
|
+
'ARBigramText', 'Config', 'build_dataset', 'train_model', 'generate_image_from_model',
|
455
|
+
'load_text_dataset', 'train_text_model', 'generate_text_from_model'
|
456
|
+
]
|
457
|
+
|
458
|
+
# -------------------------
|
459
|
+
# Exemple d'utilisation (commenté)
|
460
|
+
# -------------------------
|
461
|
+
|
462
|
+
if __name__ == '__main__':
|
463
|
+
print('mini_imggen_numpy_lib: importez la librairie et appelez les fonctions depuis votre application.')
|
{mini_imggen_numpy_lib-0.1.0 → mini_imggen_numpy_lib-0.1.2}/mini_imggen_numpy_lib.egg-info/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mini_imggen_numpy_lib
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.2
|
4
4
|
Summary: A lightweight educational Python library for toy image & text generation using NumPy only.
|
5
5
|
Home-page: https://github.com/Leo62-glitch/mini_imggen_numpy_lib
|
6
6
|
Author: Léo
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from setuptools import setup, find_packages
|
2
|
+
|
3
|
+
setup(
|
4
|
+
name="mini_imggen_numpy_lib",
|
5
|
+
version="0.1.2", # ↑ incrémente la version avant de republier
|
6
|
+
description="A lightweight educational Python library for toy image & text generation using NumPy only.",
|
7
|
+
author="Léo",
|
8
|
+
url="https://github.com/Leo62-glitch/mini_imggen_numpy_lib",
|
9
|
+
packages=find_packages(), # <-- au lieu de py_modules
|
10
|
+
install_requires=["numpy", "pillow"],
|
11
|
+
python_requires=">=3.8",
|
12
|
+
include_package_data=True,
|
13
|
+
classifiers=[
|
14
|
+
"Programming Language :: Python :: 3",
|
15
|
+
"License :: OSI Approved :: MIT License",
|
16
|
+
"Operating System :: OS Independent",
|
17
|
+
],
|
18
|
+
)
|
@@ -1,21 +0,0 @@
|
|
1
|
-
from setuptools import setup, find_packages
|
2
|
-
|
3
|
-
|
4
|
-
setup(
|
5
|
-
name="mini_imggen_numpy_lib",
|
6
|
-
version="0.1.0",
|
7
|
-
description="A lightweight educational Python library for toy image & text generation using NumPy only.",
|
8
|
-
author="Léo",
|
9
|
-
url="https://github.com/Leo62-glitch/mini_imggen_numpy_lib",
|
10
|
-
py_modules=["mini_imggen_numpy_lib"],
|
11
|
-
install_requires=[
|
12
|
-
"numpy",
|
13
|
-
"pillow"
|
14
|
-
],
|
15
|
-
python_requires=">=3.8",
|
16
|
-
classifiers=[
|
17
|
-
"Programming Language :: Python :: 3",
|
18
|
-
"License :: OSI Approved :: MIT License",
|
19
|
-
"Operating System :: OS Independent",
|
20
|
-
],
|
21
|
-
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|