claudient 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +42 -0
- package/CONTEXT.md +58 -0
- package/README.md +165 -0
- package/agents/build-resolvers/de/python-resolver.md +64 -0
- package/agents/build-resolvers/de/typescript-resolver.md +65 -0
- package/agents/build-resolvers/es/python-resolver.md +64 -0
- package/agents/build-resolvers/es/typescript-resolver.md +65 -0
- package/agents/build-resolvers/fr/python-resolver.md +64 -0
- package/agents/build-resolvers/fr/typescript-resolver.md +65 -0
- package/agents/build-resolvers/nl/python-resolver.md +64 -0
- package/agents/build-resolvers/nl/typescript-resolver.md +65 -0
- package/agents/build-resolvers/python-resolver.md +62 -0
- package/agents/build-resolvers/typescript-resolver.md +63 -0
- package/agents/core/architect.md +64 -0
- package/agents/core/code-reviewer.md +78 -0
- package/agents/core/de/architect.md +66 -0
- package/agents/core/de/code-reviewer.md +80 -0
- package/agents/core/de/planner.md +63 -0
- package/agents/core/de/security-reviewer.md +93 -0
- package/agents/core/es/architect.md +66 -0
- package/agents/core/es/code-reviewer.md +80 -0
- package/agents/core/es/planner.md +63 -0
- package/agents/core/es/security-reviewer.md +93 -0
- package/agents/core/fr/architect.md +66 -0
- package/agents/core/fr/code-reviewer.md +80 -0
- package/agents/core/fr/planner.md +63 -0
- package/agents/core/fr/security-reviewer.md +93 -0
- package/agents/core/nl/architect.md +66 -0
- package/agents/core/nl/code-reviewer.md +80 -0
- package/agents/core/nl/planner.md +63 -0
- package/agents/core/nl/security-reviewer.md +93 -0
- package/agents/core/planner.md +61 -0
- package/agents/core/security-reviewer.md +91 -0
- package/guides/agent-orchestration.md +231 -0
- package/guides/de/agent-orchestration.md +174 -0
- package/guides/de/getting-started.md +164 -0
- package/guides/de/hooks-cookbook.md +160 -0
- package/guides/de/memory-management.md +153 -0
- package/guides/de/security.md +180 -0
- package/guides/de/skill-authoring.md +214 -0
- package/guides/de/token-optimization.md +156 -0
- package/guides/es/agent-orchestration.md +174 -0
- package/guides/es/getting-started.md +164 -0
- package/guides/es/hooks-cookbook.md +160 -0
- package/guides/es/memory-management.md +153 -0
- package/guides/es/security.md +180 -0
- package/guides/es/skill-authoring.md +214 -0
- package/guides/es/token-optimization.md +156 -0
- package/guides/fr/agent-orchestration.md +174 -0
- package/guides/fr/getting-started.md +164 -0
- package/guides/fr/hooks-cookbook.md +227 -0
- package/guides/fr/memory-management.md +169 -0
- package/guides/fr/security.md +180 -0
- package/guides/fr/skill-authoring.md +214 -0
- package/guides/fr/token-optimization.md +158 -0
- package/guides/getting-started.md +164 -0
- package/guides/hooks-cookbook.md +423 -0
- package/guides/memory-management.md +192 -0
- package/guides/nl/agent-orchestration.md +174 -0
- package/guides/nl/getting-started.md +164 -0
- package/guides/nl/hooks-cookbook.md +160 -0
- package/guides/nl/memory-management.md +153 -0
- package/guides/nl/security.md +180 -0
- package/guides/nl/skill-authoring.md +214 -0
- package/guides/nl/token-optimization.md +156 -0
- package/guides/security.md +229 -0
- package/guides/skill-authoring.md +226 -0
- package/guides/token-optimization.md +169 -0
- package/hooks/lifecycle/cost-tracker.md +49 -0
- package/hooks/lifecycle/cost-tracker.sh +59 -0
- package/hooks/lifecycle/pre-compact-save.md +56 -0
- package/hooks/lifecycle/pre-compact-save.sh +37 -0
- package/hooks/lifecycle/session-start.md +50 -0
- package/hooks/lifecycle/session-start.sh +47 -0
- package/hooks/post-tool-use/audit-log.md +53 -0
- package/hooks/post-tool-use/audit-log.sh +53 -0
- package/hooks/post-tool-use/prettier.md +53 -0
- package/hooks/post-tool-use/prettier.sh +49 -0
- package/hooks/pre-tool-use/block-dangerous.md +48 -0
- package/hooks/pre-tool-use/block-dangerous.sh +76 -0
- package/hooks/pre-tool-use/git-push-confirm.md +46 -0
- package/hooks/pre-tool-use/git-push-confirm.sh +36 -0
- package/mcp/configs/github.json +11 -0
- package/mcp/configs/postgres.json +11 -0
- package/mcp/de/recommended-servers.md +170 -0
- package/mcp/es/recommended-servers.md +170 -0
- package/mcp/fr/recommended-servers.md +170 -0
- package/mcp/nl/recommended-servers.md +170 -0
- package/mcp/recommended-servers.md +168 -0
- package/package.json +45 -0
- package/prompts/project-starters/de/fastapi-project.md +62 -0
- package/prompts/project-starters/de/nextjs-project.md +82 -0
- package/prompts/project-starters/es/fastapi-project.md +62 -0
- package/prompts/project-starters/es/nextjs-project.md +82 -0
- package/prompts/project-starters/fastapi-project.md +60 -0
- package/prompts/project-starters/fr/fastapi-project.md +62 -0
- package/prompts/project-starters/fr/nextjs-project.md +82 -0
- package/prompts/project-starters/nextjs-project.md +80 -0
- package/prompts/project-starters/nl/fastapi-project.md +62 -0
- package/prompts/project-starters/nl/nextjs-project.md +82 -0
- package/prompts/system-prompts/ai-product.md +80 -0
- package/prompts/system-prompts/data-pipeline.md +76 -0
- package/prompts/system-prompts/de/ai-product.md +82 -0
- package/prompts/system-prompts/de/data-pipeline.md +78 -0
- package/prompts/system-prompts/de/saas-backend.md +71 -0
- package/prompts/system-prompts/es/ai-product.md +82 -0
- package/prompts/system-prompts/es/data-pipeline.md +78 -0
- package/prompts/system-prompts/es/saas-backend.md +71 -0
- package/prompts/system-prompts/fr/ai-product.md +82 -0
- package/prompts/system-prompts/fr/data-pipeline.md +78 -0
- package/prompts/system-prompts/fr/saas-backend.md +71 -0
- package/prompts/system-prompts/nl/ai-product.md +82 -0
- package/prompts/system-prompts/nl/data-pipeline.md +78 -0
- package/prompts/system-prompts/nl/saas-backend.md +71 -0
- package/prompts/system-prompts/saas-backend.md +69 -0
- package/prompts/task-specific/changelog.md +81 -0
- package/prompts/task-specific/de/changelog.md +83 -0
- package/prompts/task-specific/de/debugging.md +78 -0
- package/prompts/task-specific/de/pr-description.md +69 -0
- package/prompts/task-specific/debugging.md +76 -0
- package/prompts/task-specific/es/changelog.md +83 -0
- package/prompts/task-specific/es/debugging.md +78 -0
- package/prompts/task-specific/es/pr-description.md +69 -0
- package/prompts/task-specific/fr/changelog.md +83 -0
- package/prompts/task-specific/fr/debugging.md +78 -0
- package/prompts/task-specific/fr/pr-description.md +69 -0
- package/prompts/task-specific/nl/changelog.md +83 -0
- package/prompts/task-specific/nl/debugging.md +78 -0
- package/prompts/task-specific/nl/pr-description.md +69 -0
- package/prompts/task-specific/pr-description.md +67 -0
- package/rules/common/coding-style.md +45 -0
- package/rules/common/de/coding-style.md +47 -0
- package/rules/common/de/git.md +48 -0
- package/rules/common/de/performance.md +40 -0
- package/rules/common/de/security.md +45 -0
- package/rules/common/de/testing.md +45 -0
- package/rules/common/es/coding-style.md +47 -0
- package/rules/common/es/git.md +48 -0
- package/rules/common/es/performance.md +40 -0
- package/rules/common/es/security.md +45 -0
- package/rules/common/es/testing.md +45 -0
- package/rules/common/fr/coding-style.md +47 -0
- package/rules/common/fr/git.md +48 -0
- package/rules/common/fr/performance.md +40 -0
- package/rules/common/fr/security.md +45 -0
- package/rules/common/fr/testing.md +45 -0
- package/rules/common/git.md +46 -0
- package/rules/common/nl/coding-style.md +47 -0
- package/rules/common/nl/git.md +48 -0
- package/rules/common/nl/performance.md +40 -0
- package/rules/common/nl/security.md +45 -0
- package/rules/common/nl/testing.md +45 -0
- package/rules/common/performance.md +38 -0
- package/rules/common/security.md +43 -0
- package/rules/common/testing.md +43 -0
- package/rules/language-specific/de/go.md +48 -0
- package/rules/language-specific/de/python.md +38 -0
- package/rules/language-specific/de/typescript.md +51 -0
- package/rules/language-specific/es/go.md +48 -0
- package/rules/language-specific/es/python.md +38 -0
- package/rules/language-specific/es/typescript.md +51 -0
- package/rules/language-specific/fr/go.md +48 -0
- package/rules/language-specific/fr/python.md +38 -0
- package/rules/language-specific/fr/typescript.md +51 -0
- package/rules/language-specific/go.md +46 -0
- package/rules/language-specific/nl/go.md +48 -0
- package/rules/language-specific/nl/python.md +38 -0
- package/rules/language-specific/nl/typescript.md +51 -0
- package/rules/language-specific/python.md +36 -0
- package/rules/language-specific/typescript.md +49 -0
- package/scripts/cli.js +161 -0
- package/scripts/link-skills.sh +35 -0
- package/scripts/list-skills.sh +34 -0
- package/skills/ai-engineering/agent-construction.md +285 -0
- package/skills/ai-engineering/claude-api.md +248 -0
- package/skills/ai-engineering/de/agent-construction.md +287 -0
- package/skills/ai-engineering/de/claude-api.md +250 -0
- package/skills/ai-engineering/es/agent-construction.md +287 -0
- package/skills/ai-engineering/es/claude-api.md +250 -0
- package/skills/ai-engineering/fr/agent-construction.md +287 -0
- package/skills/ai-engineering/fr/claude-api.md +250 -0
- package/skills/ai-engineering/nl/agent-construction.md +287 -0
- package/skills/ai-engineering/nl/claude-api.md +250 -0
- package/skills/backend/dotnet/csharp.md +304 -0
- package/skills/backend/dotnet/de/csharp.md +306 -0
- package/skills/backend/dotnet/es/csharp.md +306 -0
- package/skills/backend/dotnet/fr/csharp.md +306 -0
- package/skills/backend/dotnet/nl/csharp.md +306 -0
- package/skills/backend/go/de/go.md +307 -0
- package/skills/backend/go/es/go.md +307 -0
- package/skills/backend/go/fr/go.md +307 -0
- package/skills/backend/go/go.md +305 -0
- package/skills/backend/go/nl/go.md +307 -0
- package/skills/backend/nodejs/de/nestjs.md +274 -0
- package/skills/backend/nodejs/de/nextjs.md +222 -0
- package/skills/backend/nodejs/es/nestjs.md +274 -0
- package/skills/backend/nodejs/es/nextjs.md +222 -0
- package/skills/backend/nodejs/fr/nestjs.md +274 -0
- package/skills/backend/nodejs/fr/nextjs.md +222 -0
- package/skills/backend/nodejs/nestjs.md +272 -0
- package/skills/backend/nodejs/nextjs.md +220 -0
- package/skills/backend/nodejs/nl/nestjs.md +274 -0
- package/skills/backend/nodejs/nl/nextjs.md +222 -0
- package/skills/backend/python/de/django.md +285 -0
- package/skills/backend/python/de/fastapi.md +244 -0
- package/skills/backend/python/django.md +283 -0
- package/skills/backend/python/es/django.md +285 -0
- package/skills/backend/python/es/fastapi.md +244 -0
- package/skills/backend/python/fastapi.md +242 -0
- package/skills/backend/python/fr/django.md +285 -0
- package/skills/backend/python/fr/fastapi.md +244 -0
- package/skills/backend/python/nl/django.md +285 -0
- package/skills/backend/python/nl/fastapi.md +244 -0
- package/skills/data-ml/dbt-data-pipelines.md +155 -0
- package/skills/data-ml/de/dbt-data-pipelines.md +157 -0
- package/skills/data-ml/de/pandas-polars.md +147 -0
- package/skills/data-ml/de/pytorch-tensorflow.md +171 -0
- package/skills/data-ml/es/dbt-data-pipelines.md +157 -0
- package/skills/data-ml/es/pandas-polars.md +147 -0
- package/skills/data-ml/es/pytorch-tensorflow.md +171 -0
- package/skills/data-ml/fr/dbt-data-pipelines.md +157 -0
- package/skills/data-ml/fr/pandas-polars.md +147 -0
- package/skills/data-ml/fr/pytorch-tensorflow.md +171 -0
- package/skills/data-ml/nl/dbt-data-pipelines.md +157 -0
- package/skills/data-ml/nl/pandas-polars.md +147 -0
- package/skills/data-ml/nl/pytorch-tensorflow.md +171 -0
- package/skills/data-ml/pandas-polars.md +145 -0
- package/skills/data-ml/pytorch-tensorflow.md +169 -0
- package/skills/database/de/graphql.md +181 -0
- package/skills/database/es/graphql.md +181 -0
- package/skills/database/fr/graphql.md +181 -0
- package/skills/database/graphql.md +179 -0
- package/skills/database/nl/graphql.md +181 -0
- package/skills/devops-infra/de/docker.md +133 -0
- package/skills/devops-infra/de/github-actions.md +179 -0
- package/skills/devops-infra/de/kubernetes.md +129 -0
- package/skills/devops-infra/de/terraform.md +130 -0
- package/skills/devops-infra/docker.md +131 -0
- package/skills/devops-infra/es/docker.md +133 -0
- package/skills/devops-infra/es/github-actions.md +179 -0
- package/skills/devops-infra/es/kubernetes.md +129 -0
- package/skills/devops-infra/es/terraform.md +130 -0
- package/skills/devops-infra/fr/docker.md +133 -0
- package/skills/devops-infra/fr/github-actions.md +179 -0
- package/skills/devops-infra/fr/kubernetes.md +129 -0
- package/skills/devops-infra/fr/terraform.md +130 -0
- package/skills/devops-infra/github-actions.md +177 -0
- package/skills/devops-infra/kubernetes.md +127 -0
- package/skills/devops-infra/nl/docker.md +133 -0
- package/skills/devops-infra/nl/github-actions.md +179 -0
- package/skills/devops-infra/nl/kubernetes.md +129 -0
- package/skills/devops-infra/nl/terraform.md +130 -0
- package/skills/devops-infra/terraform.md +128 -0
- package/skills/finance-payments/de/stripe.md +187 -0
- package/skills/finance-payments/es/stripe.md +187 -0
- package/skills/finance-payments/fr/stripe.md +187 -0
- package/skills/finance-payments/nl/stripe.md +187 -0
- package/skills/finance-payments/stripe.md +185 -0
- package/workflows/code-review.md +151 -0
- package/workflows/de/code-review.md +153 -0
- package/workflows/de/debugging-session.md +146 -0
- package/workflows/de/feature-development.md +155 -0
- package/workflows/de/new-project-bootstrap.md +175 -0
- package/workflows/de/refactor-safely.md +150 -0
- package/workflows/debugging-session.md +144 -0
- package/workflows/es/code-review.md +153 -0
- package/workflows/es/debugging-session.md +146 -0
- package/workflows/es/feature-development.md +155 -0
- package/workflows/es/new-project-bootstrap.md +175 -0
- package/workflows/es/refactor-safely.md +150 -0
- package/workflows/feature-development.md +153 -0
- package/workflows/fr/code-review.md +153 -0
- package/workflows/fr/debugging-session.md +146 -0
- package/workflows/fr/feature-development.md +155 -0
- package/workflows/fr/new-project-bootstrap.md +175 -0
- package/workflows/fr/refactor-safely.md +150 -0
- package/workflows/new-project-bootstrap.md +173 -0
- package/workflows/nl/code-review.md +153 -0
- package/workflows/nl/debugging-session.md +146 -0
- package/workflows/nl/feature-development.md +155 -0
- package/workflows/nl/new-project-bootstrap.md +175 -0
- package/workflows/nl/refactor-safely.md +150 -0
- package/workflows/refactor-safely.md +148 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
> 🇩🇪 Dies ist die deutsche Übersetzung. [Englische Version](../pytorch-tensorflow.md).
|
|
2
|
+
|
|
3
|
+
# PyTorch / TensorFlow Skill
|
|
4
|
+
|
|
5
|
+
## Wann aktivieren
|
|
6
|
+
- Neuronale Netzwerk-Trainingsschleifen in PyTorch schreiben
|
|
7
|
+
- Keras/TensorFlow-Modelle bauen und trainieren
|
|
8
|
+
- Benutzerdefinierte Verlustfunktionen oder Modellarchitekturen implementieren
|
|
9
|
+
- GPU-Training mit Geräteverwaltung einrichten
|
|
10
|
+
- Datenlader und Vorverarbeitungs-Pipelines für das Modelltraining schreiben
|
|
11
|
+
- Modellauswertung, Checkpointing und Early Stopping implementieren
|
|
12
|
+
- NaN-Verluste, explodierende Gradienten oder Trainingsinstabilität debuggen
|
|
13
|
+
- Modelle zwischen PyTorch und TensorFlow portieren
|
|
14
|
+
|
|
15
|
+
## Wann NICHT verwenden
|
|
16
|
+
- scikit-learn-Aufgaben (Klassifikation, Regression, Clustering auf tabellarischen Daten) — kein Deep Learning
|
|
17
|
+
- Pandas/Polars-Datenmanipulation vor dem Modellierungsschritt
|
|
18
|
+
- Hugging Face Fine-Tuning mit Trainer API (anderer Workflow)
|
|
19
|
+
- Nur-Inferenz-Deployments ohne Trainingscode
|
|
20
|
+
|
|
21
|
+
## Anweisungen
|
|
22
|
+
|
|
23
|
+
### PyTorch-Trainingsschleife — Standardstruktur
|
|
24
|
+
```python
|
|
25
|
+
import torch
|
|
26
|
+
import torch.nn as nn
|
|
27
|
+
from torch.utils.data import DataLoader
|
|
28
|
+
|
|
29
|
+
def train(model, train_loader, val_loader, epochs, lr, device):
|
|
30
|
+
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-2)
|
|
31
|
+
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
|
|
32
|
+
criterion = nn.CrossEntropyLoss()
|
|
33
|
+
|
|
34
|
+
best_val_loss = float('inf')
|
|
35
|
+
|
|
36
|
+
for epoch in range(epochs):
|
|
37
|
+
# Training
|
|
38
|
+
model.train()
|
|
39
|
+
train_loss = 0.0
|
|
40
|
+
for batch in train_loader:
|
|
41
|
+
inputs, targets = batch
|
|
42
|
+
inputs, targets = inputs.to(device), targets.to(device)
|
|
43
|
+
|
|
44
|
+
optimizer.zero_grad()
|
|
45
|
+
outputs = model(inputs)
|
|
46
|
+
loss = criterion(outputs, targets)
|
|
47
|
+
loss.backward()
|
|
48
|
+
|
|
49
|
+
# Gradientenclipping — immer für Stabilität
|
|
50
|
+
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
|
51
|
+
|
|
52
|
+
optimizer.step()
|
|
53
|
+
train_loss += loss.item()
|
|
54
|
+
|
|
55
|
+
# Validierung
|
|
56
|
+
model.eval()
|
|
57
|
+
val_loss = 0.0
|
|
58
|
+
with torch.no_grad():
|
|
59
|
+
for batch in val_loader:
|
|
60
|
+
inputs, targets = batch
|
|
61
|
+
inputs, targets = inputs.to(device), targets.to(device)
|
|
62
|
+
outputs = model(inputs)
|
|
63
|
+
val_loss += criterion(outputs, targets).item()
|
|
64
|
+
|
|
65
|
+
scheduler.step()
|
|
66
|
+
|
|
67
|
+
# Bestes Modell speichern
|
|
68
|
+
if val_loss < best_val_loss:
|
|
69
|
+
best_val_loss = val_loss
|
|
70
|
+
torch.save(model.state_dict(), 'best_model.pt')
|
|
71
|
+
|
|
72
|
+
print(f"Epoch {epoch+1}/{epochs} | Train: {train_loss/len(train_loader):.4f} | Val: {val_loss/len(val_loader):.4f}")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Geräteverwaltung
|
|
76
|
+
```python
|
|
77
|
+
# Immer explizite Geräteauswahl
|
|
78
|
+
device = torch.device('cuda' if torch.cuda.is_available() else
|
|
79
|
+
'mps' if torch.backends.mps.is_available() else
|
|
80
|
+
'cpu')
|
|
81
|
+
model = model.to(device)
|
|
82
|
+
```
|
|
83
|
+
Niemals `'cuda'` hardcoden — immer Verfügbarkeit prüfen.
|
|
84
|
+
|
|
85
|
+
### Benutzerdefinierte Modellstruktur
|
|
86
|
+
```python
|
|
87
|
+
class MyModel(nn.Module):
|
|
88
|
+
def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.3):
|
|
89
|
+
super().__init__()
|
|
90
|
+
self.network = nn.Sequential(
|
|
91
|
+
nn.Linear(input_dim, hidden_dim),
|
|
92
|
+
nn.LayerNorm(hidden_dim),
|
|
93
|
+
nn.GELU(),
|
|
94
|
+
nn.Dropout(dropout),
|
|
95
|
+
nn.Linear(hidden_dim, output_dim)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def forward(self, x):
|
|
99
|
+
return self.network(x)
|
|
100
|
+
```
|
|
101
|
+
`nn.Sequential` für einfache Feedforward-Netze bevorzugen; `forward()`-Überschreibung für komplexe Verzweigungen verwenden.
|
|
102
|
+
|
|
103
|
+
### Trainingsinstabilität debuggen
|
|
104
|
+
1. **NaN-Verlust** → auf log(0) im Verlust, explodierende Eingaben oder Division durch Null bei der Vorverarbeitung prüfen
|
|
105
|
+
2. **Explodierende Gradienten** → `clip_grad_norm_` hinzufügen (bereits in obiger Vorlage)
|
|
106
|
+
3. **Verschwindende Gradienten** → Aktivierungsfunktionen prüfen (sigmoid/tanh in tiefen Netzwerken vermeiden), Residualverbindungen verwenden
|
|
107
|
+
4. **Verlust sinkt nicht** → LR um Faktor 10 reduzieren, DataLoader-Shuffling prüfen, Labels verifizieren
|
|
108
|
+
5. **GPU OOM** → Batch-Größe reduzieren, Gradient Checkpointing verwenden, Mixed Precision verwenden
|
|
109
|
+
|
|
110
|
+
### Mixed Precision Training (PyTorch)
|
|
111
|
+
```python
|
|
112
|
+
from torch.cuda.amp import autocast, GradScaler
|
|
113
|
+
|
|
114
|
+
scaler = GradScaler()
|
|
115
|
+
|
|
116
|
+
for batch in train_loader:
|
|
117
|
+
optimizer.zero_grad()
|
|
118
|
+
with autocast():
|
|
119
|
+
outputs = model(inputs)
|
|
120
|
+
loss = criterion(outputs, targets)
|
|
121
|
+
scaler.scale(loss).backward()
|
|
122
|
+
scaler.unscale_(optimizer)
|
|
123
|
+
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
|
124
|
+
scaler.step(optimizer)
|
|
125
|
+
scaler.update()
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### TensorFlow/Keras — Standardstruktur
|
|
129
|
+
```python
|
|
130
|
+
import tensorflow as tf
|
|
131
|
+
|
|
132
|
+
model = tf.keras.Sequential([
|
|
133
|
+
tf.keras.layers.Dense(256, activation='relu'),
|
|
134
|
+
tf.keras.layers.Dropout(0.3),
|
|
135
|
+
tf.keras.layers.Dense(10, activation='softmax')
|
|
136
|
+
])
|
|
137
|
+
|
|
138
|
+
model.compile(
|
|
139
|
+
optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-3, weight_decay=1e-2),
|
|
140
|
+
loss='sparse_categorical_crossentropy',
|
|
141
|
+
metrics=['accuracy']
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
callbacks = [
|
|
145
|
+
tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
|
|
146
|
+
tf.keras.callbacks.ModelCheckpoint('best_model.keras', save_best_only=True),
|
|
147
|
+
tf.keras.callbacks.ReduceLROnPlateau(patience=3, factor=0.5)
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
history = model.fit(
|
|
151
|
+
train_dataset,
|
|
152
|
+
validation_data=val_dataset,
|
|
153
|
+
epochs=100,
|
|
154
|
+
callbacks=callbacks
|
|
155
|
+
)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Beispiel
|
|
159
|
+
|
|
160
|
+
**Benutzer:** Einen PyTorch-Textklassifikator für Sentiment-Analyse (binär) mit Embedding, LSTM und Dropout bauen.
|
|
161
|
+
|
|
162
|
+
**Erwartete Ausgabe:**
|
|
163
|
+
- `SentimentLSTM(nn.Module)` — Embedding-Layer, LSTM, Dropout, linearer Kopf
|
|
164
|
+
- `forward()` — verarbeitet gepackte Sequenzen oder aufgefüllte Eingaben
|
|
165
|
+
- Trainingsschleife mit Gradientenclipping, Validierung pro Epoche, bestes Modell-Checkpoint
|
|
166
|
+
- `device` automatisch erkannt (CUDA/MPS/CPU)
|
|
167
|
+
- Train/Val-Aufteilung via `DataLoader` mit Shuffling nur beim Training
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
> **Mit uns arbeiten:** Claudient wird von [Uitbreiden](https://uitbreiden.com/) unterstützt — wir bauen KI-Produkte und B2B-Lösungen mit Entwickler-Communities. ML-Modelle oder KI-gestützte Produkte bauen? [uitbreiden.com](https://uitbreiden.com/)
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
> 🇪🇸 Esta es la traducción en español. [Versión en inglés](../dbt-data-pipelines.md).
|
|
2
|
+
|
|
3
|
+
# Skill de dbt Data Pipelines
|
|
4
|
+
|
|
5
|
+
## Cuándo activar
|
|
6
|
+
- Escribir modelos dbt (capas staging, intermediate, mart)
|
|
7
|
+
- Configurar fuentes, refs y dependencias de dbt
|
|
8
|
+
- Escribir tests de dbt (tests de esquema, tests singulares, tests genéricos personalizados)
|
|
9
|
+
- Configurar la estructura del proyecto dbt para un nuevo data warehouse
|
|
10
|
+
- Escribir macros dbt para lógica SQL reutilizable
|
|
11
|
+
- Configurar documentación dbt y verificaciones de frescura
|
|
12
|
+
- Depurar errores de compilación de dbt o ejecuciones de modelos fallidas
|
|
13
|
+
- Configurar dbt con BigQuery, Snowflake, Redshift o DuckDB
|
|
14
|
+
|
|
15
|
+
## Cuándo NO usar
|
|
16
|
+
- Pipelines ETL sin un warehouse (usar Airflow, Prefect o Dagster en su lugar)
|
|
17
|
+
- Datos de streaming en tiempo real (dbt es solo batch)
|
|
18
|
+
- Transformaciones en memoria con Pandas/Polars (usar el skill de pandas-polars)
|
|
19
|
+
- Ingesta de datos (dbt transforma, no ingiere)
|
|
20
|
+
|
|
21
|
+
## Instrucciones
|
|
22
|
+
|
|
23
|
+
### Arquitectura de capas del proyecto
|
|
24
|
+
Siempre separa los modelos en tres capas:
|
|
25
|
+
```
|
|
26
|
+
models/
|
|
27
|
+
├── staging/ ← 1:1 con tablas fuente. Solo limpieza ligera. Sin joins.
|
|
28
|
+
│ ├── stg_orders.sql
|
|
29
|
+
│ └── stg_customers.sql
|
|
30
|
+
├── intermediate/ ← Lógica de negocio. Se permiten joins. No expuesto a herramientas de BI.
|
|
31
|
+
│ └── int_orders_with_customers.sql
|
|
32
|
+
└── marts/ ← Entidades de negocio finales. Expuesto a BI. Las agregaciones viven aquí.
|
|
33
|
+
├── finance/
|
|
34
|
+
│ └── fct_revenue.sql
|
|
35
|
+
└── marketing/
|
|
36
|
+
└── dim_customers.sql
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Reglas de Staging:**
|
|
40
|
+
- Renombrar columnas a las convenciones del proyecto (snake_case)
|
|
41
|
+
- Castear tipos explícitamente
|
|
42
|
+
- Sin lógica de negocio — sin joins, sin agregaciones
|
|
43
|
+
- Prefijo `stg_`
|
|
44
|
+
|
|
45
|
+
**Reglas de Mart:**
|
|
46
|
+
- Prefijo `fct_` para tablas de hechos (eventos, transacciones)
|
|
47
|
+
- Prefijo `dim_` para tablas de dimensiones (clientes, productos)
|
|
48
|
+
- Siempre documentar en schema.yml
|
|
49
|
+
|
|
50
|
+
### Configuración de modelos
|
|
51
|
+
```sql
|
|
52
|
+
-- models/marts/finance/fct_revenue.sql
|
|
53
|
+
{{
|
|
54
|
+
config(
|
|
55
|
+
materialized='incremental',
|
|
56
|
+
unique_key='order_id',
|
|
57
|
+
on_schema_change='fail'
|
|
58
|
+
)
|
|
59
|
+
}}
|
|
60
|
+
|
|
61
|
+
with orders as (
|
|
62
|
+
select * from {{ ref('int_orders_with_customers') }}
|
|
63
|
+
{% if is_incremental() %}
|
|
64
|
+
where created_at > (select max(created_at) from {{ this }})
|
|
65
|
+
{% endif %}
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
select
|
|
69
|
+
order_id,
|
|
70
|
+
customer_id,
|
|
71
|
+
amount,
|
|
72
|
+
created_at
|
|
73
|
+
from orders
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**Opciones de materialización:**
|
|
77
|
+
- `view`: por defecto — bueno para modelos de staging e intermediate
|
|
78
|
+
- `table`: para consultas costosas consultadas con frecuencia
|
|
79
|
+
- `incremental`: para tablas de hechos grandes que crecen con el tiempo
|
|
80
|
+
- `ephemeral`: CTEs, no materializadas — usar para transformaciones simples llamadas una vez
|
|
81
|
+
|
|
82
|
+
### Testing — requerido en cada modelo mart
|
|
83
|
+
```yaml
|
|
84
|
+
# models/marts/finance/schema.yml
|
|
85
|
+
version: 2
|
|
86
|
+
|
|
87
|
+
models:
|
|
88
|
+
- name: fct_revenue
|
|
89
|
+
description: "Una fila por pedido completado"
|
|
90
|
+
columns:
|
|
91
|
+
- name: order_id
|
|
92
|
+
description: "Clave primaria"
|
|
93
|
+
tests:
|
|
94
|
+
- unique
|
|
95
|
+
- not_null
|
|
96
|
+
- name: customer_id
|
|
97
|
+
tests:
|
|
98
|
+
- not_null
|
|
99
|
+
- relationships:
|
|
100
|
+
to: ref('dim_customers')
|
|
101
|
+
field: customer_id
|
|
102
|
+
- name: amount
|
|
103
|
+
tests:
|
|
104
|
+
- not_null
|
|
105
|
+
- dbt_utils.accepted_range:
|
|
106
|
+
min_value: 0
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Tests mínimos en cada modelo mart: `unique` + `not_null` en la clave primaria, `not_null` en las claves foráneas críticas.
|
|
110
|
+
|
|
111
|
+
### Configuración de fuentes
|
|
112
|
+
```yaml
|
|
113
|
+
# models/staging/sources.yml
|
|
114
|
+
version: 2
|
|
115
|
+
|
|
116
|
+
sources:
|
|
117
|
+
- name: raw_stripe
|
|
118
|
+
database: raw
|
|
119
|
+
schema: stripe
|
|
120
|
+
freshness:
|
|
121
|
+
warn_after: {count: 12, period: hour}
|
|
122
|
+
error_after: {count: 24, period: hour}
|
|
123
|
+
loaded_at_field: _ingested_at
|
|
124
|
+
tables:
|
|
125
|
+
- name: charges
|
|
126
|
+
description: "Cargos de Stripe sin procesar de Fivetran"
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Siempre establece `freshness` en las fuentes — los datos de fuente obsoletos son un fallo silencioso.
|
|
130
|
+
|
|
131
|
+
### Macros para lógica reutilizable
|
|
132
|
+
```sql
|
|
133
|
+
-- macros/cents_to_dollars.sql
|
|
134
|
+
{% macro cents_to_dollars(column_name) %}
|
|
135
|
+
({{ column_name }} / 100.0)::numeric(10, 2)
|
|
136
|
+
{% endmacro %}
|
|
137
|
+
|
|
138
|
+
-- Uso en el modelo
|
|
139
|
+
select
|
|
140
|
+
{{ cents_to_dollars('amount_cents') }} as amount_dollars
|
|
141
|
+
from orders
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Ejemplo
|
|
145
|
+
|
|
146
|
+
**Usuario:** Crear modelos de staging y mart para datos de pagos de Stripe (cargos, reembolsos) con tests y verificaciones de frescura.
|
|
147
|
+
|
|
148
|
+
**Salida esperada:**
|
|
149
|
+
- `models/staging/stripe/sources.yml` — fuente con verificación de frescura en `_ingested_at`
|
|
150
|
+
- `models/staging/stripe/stg_stripe_charges.sql` — renombrar, castear, sin joins
|
|
151
|
+
- `models/staging/stripe/stg_stripe_refunds.sql` — mismo patrón
|
|
152
|
+
- `models/marts/finance/fct_payments.sql` — join de cargos + reembolsos, importe neto, materialización incremental
|
|
153
|
+
- `models/marts/finance/schema.yml` — `unique` + `not_null` en `charge_id`, test de relación en `customer_id`
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
> **Trabaja con nosotros:** Claudient está respaldado por [Uitbreiden](https://uitbreiden.com/) — construimos productos de IA y soluciones B2B con comunidades de desarrolladores. ¿Construyendo pipelines de datos para IA o productos de analítica? [uitbreiden.com](https://uitbreiden.com/)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
> 🇪🇸 Esta es la traducción en español. [Versión en inglés](../pandas-polars.md).
|
|
2
|
+
|
|
3
|
+
# Skill de Pandas / Polars
|
|
4
|
+
|
|
5
|
+
## Cuándo activar
|
|
6
|
+
- Limpiar, transformar o agregar datos tabulares en Python
|
|
7
|
+
- Hacer merge, join o reshape de DataFrames
|
|
8
|
+
- Escribir validaciones de datos o verificaciones de calidad
|
|
9
|
+
- Convertir entre formatos (CSV, Parquet, JSON, Excel)
|
|
10
|
+
- Perfilar o explorar un nuevo conjunto de datos
|
|
11
|
+
- Optimizar código lento de Pandas para grandes conjuntos de datos
|
|
12
|
+
- Migrar código de Pandas a Polars por rendimiento
|
|
13
|
+
|
|
14
|
+
## Cuándo NO usar
|
|
15
|
+
- SQL en una base de datos (empujar las transformaciones a la base de datos cuando los datos ya están allí)
|
|
16
|
+
- Spark/computación distribuida (usar el skill de PySpark para conjuntos de datos > RAM disponible)
|
|
17
|
+
- Modelos dbt (transformaciones basadas en SQL en un warehouse)
|
|
18
|
+
- Operaciones de arrays NumPy sobre datos no tabulares
|
|
19
|
+
|
|
20
|
+
## Instrucciones
|
|
21
|
+
|
|
22
|
+
### Pandas — reglas de rendimiento
|
|
23
|
+
```python
|
|
24
|
+
import pandas as pd
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
# Nunca uses iterrows() — vectoriza en su lugar
|
|
28
|
+
# Malo:
|
|
29
|
+
for idx, row in df.iterrows():
|
|
30
|
+
df.at[idx, 'tax'] = row['price'] * 0.2
|
|
31
|
+
|
|
32
|
+
# Bueno:
|
|
33
|
+
df['tax'] = df['price'] * 0.2
|
|
34
|
+
|
|
35
|
+
# Usa .loc para acceso basado en etiquetas, .iloc para acceso basado en posición
|
|
36
|
+
# Nunca encadenes sin asignación — causa SettingWithCopyWarning
|
|
37
|
+
df.loc[df['status'] == 'active', 'flag'] = True
|
|
38
|
+
|
|
39
|
+
# Dtype categórico para columnas de cadenas de baja cardinalidad (ahorro masivo de memoria)
|
|
40
|
+
df['country'] = df['country'].astype('category')
|
|
41
|
+
|
|
42
|
+
# Reducir tipos numéricos para reducir memoria
|
|
43
|
+
df['quantity'] = pd.to_numeric(df['quantity'], downcast='integer')
|
|
44
|
+
df['price'] = pd.to_numeric(df['price'], downcast='float')
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Pandas — agregación y groupby
|
|
48
|
+
```python
|
|
49
|
+
# Groupby con múltiples agregaciones
|
|
50
|
+
summary = (
|
|
51
|
+
df.groupby(['region', 'category'])
|
|
52
|
+
.agg(
|
|
53
|
+
total_revenue=('revenue', 'sum'),
|
|
54
|
+
order_count=('order_id', 'nunique'),
|
|
55
|
+
avg_order_value=('revenue', 'mean'),
|
|
56
|
+
)
|
|
57
|
+
.reset_index()
|
|
58
|
+
.sort_values('total_revenue', ascending=False)
|
|
59
|
+
)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Pandas — merging
|
|
63
|
+
```python
|
|
64
|
+
# Siempre especifica how= explícitamente — nunca confíes en el valor por defecto (inner)
|
|
65
|
+
result = pd.merge(
|
|
66
|
+
orders,
|
|
67
|
+
customers,
|
|
68
|
+
on='customer_id',
|
|
69
|
+
how='left', # explícito
|
|
70
|
+
validate='m:1', # valida la cardinalidad — lanza excepción si se viola
|
|
71
|
+
suffixes=('_order', '_customer')
|
|
72
|
+
)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Polars — cuándo usar en lugar de Pandas
|
|
76
|
+
Usa Polars cuando:
|
|
77
|
+
- El conjunto de datos tiene > 1M filas (Polars es 5–100x más rápido para muchas operaciones)
|
|
78
|
+
- Necesitas evaluación lazy (optimización de consultas antes de la ejecución)
|
|
79
|
+
- El paralelismo importa (Polars usa todos los núcleos de CPU por defecto)
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import polars as pl
|
|
83
|
+
|
|
84
|
+
# API Lazy — las consultas se optimizan antes de ejecutarse
|
|
85
|
+
result = (
|
|
86
|
+
pl.scan_parquet("orders.parquet") # Escaneo lazy — no se cargan datos aún
|
|
87
|
+
.filter(pl.col("status") == "completed")
|
|
88
|
+
.group_by(["region", "category"])
|
|
89
|
+
.agg([
|
|
90
|
+
pl.col("revenue").sum().alias("total_revenue"),
|
|
91
|
+
pl.col("order_id").n_unique().alias("order_count"),
|
|
92
|
+
pl.col("revenue").mean().alias("avg_order_value"),
|
|
93
|
+
])
|
|
94
|
+
.sort("total_revenue", descending=True)
|
|
95
|
+
.collect() # Ejecutar ahora
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Polars — expresiones (sin indexación encadenada)
|
|
100
|
+
```python
|
|
101
|
+
# Polars: sin SettingWithCopyWarning, sin indexación encadenada
|
|
102
|
+
df = df.with_columns([
|
|
103
|
+
(pl.col("price") * 0.2).alias("tax"),
|
|
104
|
+
pl.col("name").str.to_uppercase().alias("name_upper"),
|
|
105
|
+
pl.when(pl.col("quantity") > 10)
|
|
106
|
+
.then(pl.lit("bulk"))
|
|
107
|
+
.otherwise(pl.lit("standard"))
|
|
108
|
+
.alias("order_type"),
|
|
109
|
+
])
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Patrón de validación de datos
|
|
113
|
+
```python
|
|
114
|
+
def validate_orders(df: pd.DataFrame) -> None:
|
|
115
|
+
assert df['order_id'].notna().all(), "order_id has nulls"
|
|
116
|
+
assert df['order_id'].is_unique, "order_id has duplicates"
|
|
117
|
+
assert (df['amount'] >= 0).all(), "amount has negative values"
|
|
118
|
+
assert df['status'].isin(['pending', 'completed', 'cancelled']).all(), "invalid status values"
|
|
119
|
+
assert pd.to_datetime(df['created_at'], errors='coerce').notna().all(), "created_at has invalid dates"
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Conversión de formatos
|
|
123
|
+
```python
|
|
124
|
+
# Leer
|
|
125
|
+
df = pd.read_parquet("data.parquet", columns=['id', 'name', 'amount']) # Selección de columnas al leer
|
|
126
|
+
df = pd.read_csv("data.csv", dtype={'id': str}, parse_dates=['created_at'])
|
|
127
|
+
|
|
128
|
+
# Escribir — siempre usa Parquet sobre CSV para grandes conjuntos de datos
|
|
129
|
+
df.to_parquet("output.parquet", index=False, compression='snappy')
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Ejemplo
|
|
133
|
+
|
|
134
|
+
**Usuario:** Limpiar un CSV de pedidos sin procesar: corregir dtypes, eliminar duplicados, manejar nulos, agregar columnas derivadas (revenue_after_tax, order_size_bucket) y generar un archivo Parquet validado.
|
|
135
|
+
|
|
136
|
+
**Salida esperada:**
|
|
137
|
+
- Leer con `dtype=` explícito y `parse_dates=`
|
|
138
|
+
- Eliminar filas duplicadas de `order_id` (mantener el último)
|
|
139
|
+
- Rellenar nulos: `quantity` → 0, `discount` → 0.0, eliminar filas donde `customer_id` es nulo
|
|
140
|
+
- Derivar: `revenue_after_tax = price * quantity * (1 - discount) * 0.8`
|
|
141
|
+
- Bucket: `order_size_bucket` = 'small'/<100, 'medium'/100–1000, 'large'/>1000
|
|
142
|
+
- Validar con aserciones antes de escribir
|
|
143
|
+
- Escribir en Parquet con compresión snappy
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
> **Trabaja con nosotros:** Claudient está respaldado por [Uitbreiden](https://uitbreiden.com/) — construimos productos de IA y soluciones B2B con comunidades de desarrolladores. ¿Construyendo pipelines de datos o productos de datos con IA? [uitbreiden.com](https://uitbreiden.com/)
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
> 🇪🇸 Esta es la traducción en español. [Versión en inglés](../pytorch-tensorflow.md).
|
|
2
|
+
|
|
3
|
+
# Skill de PyTorch / TensorFlow
|
|
4
|
+
|
|
5
|
+
## Cuándo activar
|
|
6
|
+
- Escribir bucles de entrenamiento de redes neuronales en PyTorch
|
|
7
|
+
- Construir y entrenar modelos Keras/TensorFlow
|
|
8
|
+
- Implementar funciones de pérdida personalizadas o arquitecturas de modelos
|
|
9
|
+
- Configurar entrenamiento en GPU con gestión de dispositivos
|
|
10
|
+
- Escribir cargadores de datos y pipelines de preprocesamiento para entrenamiento de modelos
|
|
11
|
+
- Implementar evaluación de modelos, checkpointing y early stopping
|
|
12
|
+
- Depurar pérdidas NaN, gradientes explosivos o inestabilidad de entrenamiento
|
|
13
|
+
- Portar modelos entre PyTorch y TensorFlow
|
|
14
|
+
|
|
15
|
+
## Cuándo NO usar
|
|
16
|
+
- Tareas de scikit-learn (clasificación, regresión, clustering en datos tabulares) — no es deep learning
|
|
17
|
+
- Manipulación de datos con Pandas/Polars antes del paso de modelado
|
|
18
|
+
- Fine-tuning con la API trainer de Hugging Face (flujo de trabajo diferente)
|
|
19
|
+
- Despliegues solo de inferencia sin código de entrenamiento
|
|
20
|
+
|
|
21
|
+
## Instrucciones
|
|
22
|
+
|
|
23
|
+
### Bucle de entrenamiento de PyTorch — estructura estándar
|
|
24
|
+
```python
|
|
25
|
+
import torch
|
|
26
|
+
import torch.nn as nn
|
|
27
|
+
from torch.utils.data import DataLoader
|
|
28
|
+
|
|
29
|
+
def train(model, train_loader, val_loader, epochs, lr, device):
|
|
30
|
+
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-2)
|
|
31
|
+
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
|
|
32
|
+
criterion = nn.CrossEntropyLoss()
|
|
33
|
+
|
|
34
|
+
best_val_loss = float('inf')
|
|
35
|
+
|
|
36
|
+
for epoch in range(epochs):
|
|
37
|
+
# Entrenamiento
|
|
38
|
+
model.train()
|
|
39
|
+
train_loss = 0.0
|
|
40
|
+
for batch in train_loader:
|
|
41
|
+
inputs, targets = batch
|
|
42
|
+
inputs, targets = inputs.to(device), targets.to(device)
|
|
43
|
+
|
|
44
|
+
optimizer.zero_grad()
|
|
45
|
+
outputs = model(inputs)
|
|
46
|
+
loss = criterion(outputs, targets)
|
|
47
|
+
loss.backward()
|
|
48
|
+
|
|
49
|
+
# Recorte de gradientes — siempre para estabilidad
|
|
50
|
+
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
|
51
|
+
|
|
52
|
+
optimizer.step()
|
|
53
|
+
train_loss += loss.item()
|
|
54
|
+
|
|
55
|
+
# Validación
|
|
56
|
+
model.eval()
|
|
57
|
+
val_loss = 0.0
|
|
58
|
+
with torch.no_grad():
|
|
59
|
+
for batch in val_loader:
|
|
60
|
+
inputs, targets = batch
|
|
61
|
+
inputs, targets = inputs.to(device), targets.to(device)
|
|
62
|
+
outputs = model(inputs)
|
|
63
|
+
val_loss += criterion(outputs, targets).item()
|
|
64
|
+
|
|
65
|
+
scheduler.step()
|
|
66
|
+
|
|
67
|
+
# Checkpoint del mejor modelo
|
|
68
|
+
if val_loss < best_val_loss:
|
|
69
|
+
best_val_loss = val_loss
|
|
70
|
+
torch.save(model.state_dict(), 'best_model.pt')
|
|
71
|
+
|
|
72
|
+
print(f"Epoch {epoch+1}/{epochs} | Train: {train_loss/len(train_loader):.4f} | Val: {val_loss/len(val_loader):.4f}")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Gestión de dispositivos
|
|
76
|
+
```python
|
|
77
|
+
# Siempre selección explícita del dispositivo
|
|
78
|
+
device = torch.device('cuda' if torch.cuda.is_available() else
|
|
79
|
+
'mps' if torch.backends.mps.is_available() else
|
|
80
|
+
'cpu')
|
|
81
|
+
model = model.to(device)
|
|
82
|
+
```
|
|
83
|
+
Nunca hardcodees `'cuda'` — siempre verifica la disponibilidad.
|
|
84
|
+
|
|
85
|
+
### Estructura del modelo personalizado
|
|
86
|
+
```python
|
|
87
|
+
class MyModel(nn.Module):
|
|
88
|
+
def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.3):
|
|
89
|
+
super().__init__()
|
|
90
|
+
self.network = nn.Sequential(
|
|
91
|
+
nn.Linear(input_dim, hidden_dim),
|
|
92
|
+
nn.LayerNorm(hidden_dim),
|
|
93
|
+
nn.GELU(),
|
|
94
|
+
nn.Dropout(dropout),
|
|
95
|
+
nn.Linear(hidden_dim, output_dim)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def forward(self, x):
|
|
99
|
+
return self.network(x)
|
|
100
|
+
```
|
|
101
|
+
Prefiere `nn.Sequential` para feedforward simple; usa la sobreescritura de `forward()` para ramificaciones complejas.
|
|
102
|
+
|
|
103
|
+
### Depuración de inestabilidad de entrenamiento
|
|
104
|
+
1. **Pérdida NaN** → verifica log(0) en la pérdida, entradas explosivas o división por cero en el preprocesamiento
|
|
105
|
+
2. **Gradientes explosivos** → agrega `clip_grad_norm_` (ya en la plantilla anterior)
|
|
106
|
+
3. **Gradientes que desaparecen** → verifica las funciones de activación (evita sigmoid/tanh en redes profundas), usa conexiones residuales
|
|
107
|
+
4. **La pérdida no disminuye** → reduce LR 10x, verifica el shuffling del data loader, verifica que las etiquetas son correctas
|
|
108
|
+
5. **OOM en GPU** → reduce el tamaño del batch, usa gradient checkpointing, usa precisión mixta
|
|
109
|
+
|
|
110
|
+
### Entrenamiento con precisión mixta (PyTorch)
|
|
111
|
+
```python
|
|
112
|
+
from torch.cuda.amp import autocast, GradScaler
|
|
113
|
+
|
|
114
|
+
scaler = GradScaler()
|
|
115
|
+
|
|
116
|
+
for batch in train_loader:
|
|
117
|
+
optimizer.zero_grad()
|
|
118
|
+
with autocast():
|
|
119
|
+
outputs = model(inputs)
|
|
120
|
+
loss = criterion(outputs, targets)
|
|
121
|
+
scaler.scale(loss).backward()
|
|
122
|
+
scaler.unscale_(optimizer)
|
|
123
|
+
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
|
124
|
+
scaler.step(optimizer)
|
|
125
|
+
scaler.update()
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### TensorFlow/Keras — estructura estándar
|
|
129
|
+
```python
|
|
130
|
+
import tensorflow as tf
|
|
131
|
+
|
|
132
|
+
model = tf.keras.Sequential([
|
|
133
|
+
tf.keras.layers.Dense(256, activation='relu'),
|
|
134
|
+
tf.keras.layers.Dropout(0.3),
|
|
135
|
+
tf.keras.layers.Dense(10, activation='softmax')
|
|
136
|
+
])
|
|
137
|
+
|
|
138
|
+
model.compile(
|
|
139
|
+
optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-3, weight_decay=1e-2),
|
|
140
|
+
loss='sparse_categorical_crossentropy',
|
|
141
|
+
metrics=['accuracy']
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
callbacks = [
|
|
145
|
+
tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
|
|
146
|
+
tf.keras.callbacks.ModelCheckpoint('best_model.keras', save_best_only=True),
|
|
147
|
+
tf.keras.callbacks.ReduceLROnPlateau(patience=3, factor=0.5)
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
history = model.fit(
|
|
151
|
+
train_dataset,
|
|
152
|
+
validation_data=val_dataset,
|
|
153
|
+
epochs=100,
|
|
154
|
+
callbacks=callbacks
|
|
155
|
+
)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Ejemplo
|
|
159
|
+
|
|
160
|
+
**Usuario:** Construir un clasificador de texto en PyTorch para análisis de sentimientos (binario) con embedding, LSTM y dropout.
|
|
161
|
+
|
|
162
|
+
**Salida esperada:**
|
|
163
|
+
- `SentimentLSTM(nn.Module)` — capa de embedding, LSTM, dropout, cabeza lineal
|
|
164
|
+
- `forward()` — maneja secuencias empaquetadas o entrada con padding
|
|
165
|
+
- Bucle de entrenamiento con recorte de gradientes, validación por época, checkpoint del mejor modelo
|
|
166
|
+
- `device` auto-detectado (CUDA/MPS/CPU)
|
|
167
|
+
- División train/val mediante `DataLoader` con shuffling solo en train
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
> **Trabaja con nosotros:** Claudient está respaldado por [Uitbreiden](https://uitbreiden.com/) — construimos productos de IA y soluciones B2B con comunidades de desarrolladores. ¿Construyendo modelos ML o productos impulsados por IA? [uitbreiden.com](https://uitbreiden.com/)
|