@girardmedia/bootspring 3.3.2 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/agents/accessibility-auditor.md +39 -0
- package/assets/agents/api-designer.md +40 -0
- package/assets/agents/auth-implementer.md +64 -0
- package/assets/agents/bug-hunter.md +42 -0
- package/assets/agents/bundle-analyzer.md +40 -0
- package/assets/agents/cache-optimizer.md +55 -0
- package/assets/agents/changelog-writer.md +55 -0
- package/assets/agents/ci-cd-builder.md +40 -0
- package/assets/agents/code-explainer.md +39 -0
- package/assets/agents/code-reviewer.md +39 -0
- package/assets/agents/cost-optimizer.md +57 -0
- package/assets/agents/cron-scheduler.md +51 -0
- package/assets/agents/data-seeder.md +56 -0
- package/assets/agents/database-architect.md +40 -0
- package/assets/agents/dependency-updater.md +40 -0
- package/assets/agents/deploy-checker.md +40 -0
- package/assets/agents/docker-optimizer.md +40 -0
- package/assets/agents/documentation-writer.md +40 -0
- package/assets/agents/email-builder.md +55 -0
- package/assets/agents/env-setup.md +40 -0
- package/assets/agents/error-handler.md +40 -0
- package/assets/agents/eslint-fixer.md +46 -0
- package/assets/agents/feature-flagger.md +69 -0
- package/assets/agents/git-detective.md +39 -0
- package/assets/agents/graphql-builder.md +60 -0
- package/assets/agents/incident-responder.md +59 -0
- package/assets/agents/log-analyzer.md +39 -0
- package/assets/agents/migration-planner.md +41 -0
- package/assets/agents/monorepo-navigator.md +39 -0
- package/assets/agents/nextjs-expert.md +57 -0
- package/assets/agents/notification-builder.md +56 -0
- package/assets/agents/onboarding-guide.md +39 -0
- package/assets/agents/performance-profiler.md +40 -0
- package/assets/agents/prisma-expert.md +57 -0
- package/assets/agents/rate-limiter.md +58 -0
- package/assets/agents/react-expert.md +58 -0
- package/assets/agents/refactorer.md +42 -0
- package/assets/agents/regex-builder.md +46 -0
- package/assets/agents/release-manager.md +40 -0
- package/assets/agents/s3-manager.md +58 -0
- package/assets/agents/schema-validator.md +40 -0
- package/assets/agents/search-builder.md +62 -0
- package/assets/agents/security-auditor.md +39 -0
- package/assets/agents/sitemap-generator.md +53 -0
- package/assets/agents/stripe-integrator.md +59 -0
- package/assets/agents/tailwind-expert.md +55 -0
- package/assets/agents/tech-debt-tracker.md +39 -0
- package/assets/agents/test-writer.md +42 -0
- package/assets/agents/type-fixer.md +45 -0
- package/assets/agents/webhook-builder.md +54 -0
- package/assets/rules/cpp.md +53 -0
- package/assets/rules/css.md +52 -0
- package/assets/rules/go.md +50 -0
- package/assets/rules/html.md +52 -0
- package/assets/rules/java.md +51 -0
- package/assets/rules/kotlin.md +50 -0
- package/assets/rules/php.md +51 -0
- package/assets/rules/python.md +51 -0
- package/assets/rules/ruby.md +51 -0
- package/assets/rules/rust.md +49 -0
- package/assets/rules/shell.md +52 -0
- package/assets/rules/sql.md +49 -0
- package/assets/rules/swift.md +50 -0
- package/assets/rules/typescript.md +52 -0
- package/assets/rules/yaml-json.md +51 -0
- package/assets/skills/accessibility.md +210 -0
- package/assets/skills/agent-patterns.md +387 -0
- package/assets/skills/ai-integration.md +263 -0
- package/assets/skills/animation-patterns.md +224 -0
- package/assets/skills/api-design.md +218 -0
- package/assets/skills/api-gateway.md +341 -0
- package/assets/skills/api-versioning.md +226 -0
- package/assets/skills/astro-patterns.md +233 -0
- package/assets/skills/auth-patterns.md +248 -0
- package/assets/skills/aws-patterns.md +171 -0
- package/assets/skills/background-jobs.md +162 -0
- package/assets/skills/browser-extensions.md +309 -0
- package/assets/skills/caching-patterns.md +253 -0
- package/assets/skills/ci-cd.md +251 -0
- package/assets/skills/cli-development.md +296 -0
- package/assets/skills/code-review.md +185 -0
- package/assets/skills/cron-patterns.md +327 -0
- package/assets/skills/data-fetching.md +231 -0
- package/assets/skills/database-migrations.md +346 -0
- package/assets/skills/database-patterns.md +219 -0
- package/assets/skills/debugging.md +281 -0
- package/assets/skills/design-system.md +289 -0
- package/assets/skills/django-patterns.md +182 -0
- package/assets/skills/docker-patterns.md +235 -0
- package/assets/skills/e2e-testing.md +287 -0
- package/assets/skills/edge-computing.md +268 -0
- package/assets/skills/electron-patterns.md +266 -0
- package/assets/skills/email-templates.md +206 -0
- package/assets/skills/error-handling.md +265 -0
- package/assets/skills/event-driven.md +232 -0
- package/assets/skills/express-patterns.md +239 -0
- package/assets/skills/fastapi-patterns.md +198 -0
- package/assets/skills/feature-flags.md +212 -0
- package/assets/skills/figma-to-code.md +298 -0
- package/assets/skills/file-upload.md +228 -0
- package/assets/skills/forms-patterns.md +264 -0
- package/assets/skills/gcp-patterns.md +189 -0
- package/assets/skills/git-workflow.md +187 -0
- package/assets/skills/golang-patterns.md +185 -0
- package/assets/skills/graphql-patterns.md +244 -0
- package/assets/skills/i18n-patterns.md +172 -0
- package/assets/skills/image-processing.md +350 -0
- package/assets/skills/java-springboot.md +226 -0
- package/assets/skills/kotlin-patterns.md +207 -0
- package/assets/skills/kubernetes-patterns.md +326 -0
- package/assets/skills/laravel-patterns.md +261 -0
- package/assets/skills/llm-fine-tuning.md +335 -0
- package/assets/skills/load-testing.md +303 -0
- package/assets/skills/logging-observability.md +228 -0
- package/assets/skills/markdown-processing.md +318 -0
- package/assets/skills/mcp-server-patterns.md +292 -0
- package/assets/skills/microservices.md +272 -0
- package/assets/skills/migration-patterns.md +239 -0
- package/assets/skills/mongodb-patterns.md +189 -0
- package/assets/skills/monorepo-patterns.md +287 -0
- package/assets/skills/nextjs-app-router.md +237 -0
- package/assets/skills/notification-patterns.md +348 -0
- package/assets/skills/oauth-patterns.md +246 -0
- package/assets/skills/payment-integration.md +222 -0
- package/assets/skills/pdf-generation.md +307 -0
- package/assets/skills/performance-optimization.md +277 -0
- package/assets/skills/php-patterns.md +210 -0
- package/assets/skills/prisma-patterns.md +241 -0
- package/assets/skills/prompt-engineering.md +193 -0
- package/assets/skills/pwa-patterns.md +247 -0
- package/assets/skills/python-patterns.md +158 -0
- package/assets/skills/python-testing.md +172 -0
- package/assets/skills/queue-patterns.md +295 -0
- package/assets/skills/rag-patterns.md +159 -0
- package/assets/skills/rate-limiting.md +319 -0
- package/assets/skills/react-components.md +201 -0
- package/assets/skills/react-native-patterns.md +299 -0
- package/assets/skills/real-time-patterns.md +181 -0
- package/assets/skills/redis-patterns.md +188 -0
- package/assets/skills/refactoring.md +218 -0
- package/assets/skills/regex-patterns.md +191 -0
- package/assets/skills/remix-patterns.md +262 -0
- package/assets/skills/responsive-design.md +199 -0
- package/assets/skills/ruby-rails-patterns.md +178 -0
- package/assets/skills/rust-patterns.md +211 -0
- package/assets/skills/search-patterns.md +227 -0
- package/assets/skills/security-hardening.md +237 -0
- package/assets/skills/seo-patterns.md +179 -0
- package/assets/skills/serverless-patterns.md +223 -0
- package/assets/skills/sql-optimization.md +154 -0
- package/assets/skills/state-management.md +254 -0
- package/assets/skills/storybook-patterns.md +330 -0
- package/assets/skills/svelte-patterns.md +258 -0
- package/assets/skills/swift-patterns.md +227 -0
- package/assets/skills/tailwind-patterns.md +272 -0
- package/assets/skills/tdd-workflow.md +199 -0
- package/assets/skills/terraform-patterns.md +270 -0
- package/assets/skills/testing-react.md +240 -0
- package/assets/skills/testing-vitest.md +232 -0
- package/assets/skills/typescript-strict.md +159 -0
- package/assets/skills/video-processing.md +340 -0
- package/assets/skills/vue-patterns.md +247 -0
- package/assets/skills/web-workers.md +327 -0
- package/assets/skills/webhooks-patterns.md +283 -0
- package/assets/skills/websocket-patterns.md +306 -0
- package/dist/cli/index.js +941 -958
- package/dist/core/index.d.ts +341 -11
- package/dist/core.js +58 -95
- package/dist/mcp/index.d.ts +33 -1
- package/dist/mcp-server.js +177 -255
- package/package.json +4 -1
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: laravel-patterns
|
|
3
|
+
description: Laravel patterns for Eloquent, middleware, jobs, events, policies, Blade, and Livewire.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Laravel Patterns
|
|
7
|
+
|
|
8
|
+
## When to Use
|
|
9
|
+
|
|
10
|
+
Apply these patterns when building Laravel 10+ applications. Use this skill for
|
|
11
|
+
structuring Eloquent models and relationships, writing middleware, dispatching
|
|
12
|
+
background jobs, handling events, authorizing with policies, building Blade
|
|
13
|
+
templates, and adding interactivity with Livewire.
|
|
14
|
+
|
|
15
|
+
## How It Works
|
|
16
|
+
|
|
17
|
+
### Eloquent Models
|
|
18
|
+
|
|
19
|
+
Use scopes for reusable query constraints. Define relationships explicitly.
|
|
20
|
+
Use casts for attribute transformation. Always set `$fillable` or `$guarded`.
|
|
21
|
+
|
|
22
|
+
```php
|
|
23
|
+
class Article extends Model
|
|
24
|
+
{
|
|
25
|
+
protected $fillable = ['title', 'body', 'status', 'published_at'];
|
|
26
|
+
|
|
27
|
+
protected $casts = [
|
|
28
|
+
'published_at' => 'datetime',
|
|
29
|
+
'metadata' => 'array',
|
|
30
|
+
'status' => ArticleStatus::class, // backed enum
|
|
31
|
+
];
|
|
32
|
+
|
|
33
|
+
// Relationships
|
|
34
|
+
public function author(): BelongsTo
|
|
35
|
+
{
|
|
36
|
+
return $this->belongsTo(User::class, 'author_id');
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
public function tags(): BelongsToMany
|
|
40
|
+
{
|
|
41
|
+
return $this->belongsToMany(Tag::class)->withTimestamps();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Scopes
|
|
45
|
+
public function scopePublished(Builder $query): Builder
|
|
46
|
+
{
|
|
47
|
+
return $query->where('status', ArticleStatus::Published)
|
|
48
|
+
->where('published_at', '<=', now());
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
public function scopeByAuthor(Builder $query, User $user): Builder
|
|
52
|
+
{
|
|
53
|
+
return $query->where('author_id', $user->id);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Usage: Article::published()->byAuthor($user)->paginate(20)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Middleware
|
|
61
|
+
|
|
62
|
+
Register middleware in `bootstrap/app.php` (Laravel 11+). Keep middleware focused
|
|
63
|
+
on a single concern. Use constructor injection for dependencies.
|
|
64
|
+
|
|
65
|
+
```php
|
|
66
|
+
class EnsureApiVersion
|
|
67
|
+
{
|
|
68
|
+
public function handle(Request $request, Closure $next, string $version = 'v1'): Response
|
|
69
|
+
{
|
|
70
|
+
if ($request->header('X-API-Version', 'v1') !== $version) {
|
|
71
|
+
return response()->json(['error' => 'Unsupported API version'], 400);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
$response = $next($request);
|
|
75
|
+
$response->headers->set('X-API-Version', $version);
|
|
76
|
+
|
|
77
|
+
return $response;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// bootstrap/app.php
|
|
82
|
+
->withMiddleware(function (Middleware $middleware) {
|
|
83
|
+
$middleware->api(append: [EnsureApiVersion::class]);
|
|
84
|
+
})
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Jobs and Queues
|
|
88
|
+
|
|
89
|
+
Use jobs for heavy processing: email sending, PDF generation, API calls. Implement
|
|
90
|
+
`ShouldQueue` for async. Set `$tries`, `$backoff`, and `$timeout`.
|
|
91
|
+
|
|
92
|
+
```php
|
|
93
|
+
class ProcessUpload implements ShouldQueue
|
|
94
|
+
{
|
|
95
|
+
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
|
96
|
+
|
|
97
|
+
public int $tries = 3;
|
|
98
|
+
public array $backoff = [10, 60, 300];
|
|
99
|
+
public int $timeout = 120;
|
|
100
|
+
|
|
101
|
+
public function __construct(
|
|
102
|
+
private Upload $upload,
|
|
103
|
+
) {}
|
|
104
|
+
|
|
105
|
+
public function handle(StorageService $storage): void
|
|
106
|
+
{
|
|
107
|
+
$storage->processFile($this->upload->path);
|
|
108
|
+
$this->upload->update(['status' => 'processed']);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
public function failed(Throwable $exception): void
|
|
112
|
+
{
|
|
113
|
+
$this->upload->update(['status' => 'failed']);
|
|
114
|
+
Log::error('Upload processing failed', [
|
|
115
|
+
'upload_id' => $this->upload->id,
|
|
116
|
+
'error' => $exception->getMessage(),
|
|
117
|
+
]);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Dispatch
|
|
122
|
+
ProcessUpload::dispatch($upload)->onQueue('uploads');
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Events and Listeners
|
|
126
|
+
|
|
127
|
+
Use events for decoupled side effects. One event can have multiple listeners.
|
|
128
|
+
Queue listeners for non-blocking processing.
|
|
129
|
+
|
|
130
|
+
```php
|
|
131
|
+
// Events
|
|
132
|
+
class OrderPlaced
|
|
133
|
+
{
|
|
134
|
+
public function __construct(public readonly Order $order) {}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Listeners
|
|
138
|
+
class SendOrderConfirmation implements ShouldQueue
|
|
139
|
+
{
|
|
140
|
+
public function handle(OrderPlaced $event): void
|
|
141
|
+
{
|
|
142
|
+
Mail::to($event->order->customer_email)
|
|
143
|
+
->send(new OrderConfirmationMail($event->order));
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
class UpdateInventory
|
|
148
|
+
{
|
|
149
|
+
public function handle(OrderPlaced $event): void
|
|
150
|
+
{
|
|
151
|
+
foreach ($event->order->items as $item) {
|
|
152
|
+
$item->product->decrement('stock', $item->quantity);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Dispatch
|
|
158
|
+
event(new OrderPlaced($order));
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Policies for Authorization
|
|
162
|
+
|
|
163
|
+
One policy per model. Methods map to controller actions. Use `Gate::authorize`
|
|
164
|
+
or `$this->authorize` in controllers.
|
|
165
|
+
|
|
166
|
+
```php
|
|
167
|
+
class ArticlePolicy
|
|
168
|
+
{
|
|
169
|
+
public function update(User $user, Article $article): bool
|
|
170
|
+
{
|
|
171
|
+
return $user->id === $article->author_id
|
|
172
|
+
|| $user->hasRole('editor');
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
public function delete(User $user, Article $article): bool
|
|
176
|
+
{
|
|
177
|
+
return $user->id === $article->author_id
|
|
178
|
+
|| $user->hasRole('admin');
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Controller
|
|
183
|
+
public function update(Request $request, Article $article)
|
|
184
|
+
{
|
|
185
|
+
$this->authorize('update', $article);
|
|
186
|
+
// ... proceed with update
|
|
187
|
+
}
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Blade Components
|
|
191
|
+
|
|
192
|
+
Use class-based components for logic, anonymous components for simple markup.
|
|
193
|
+
Pass data via attributes. Use `{{ }}` for escaped output, `@` directives for
|
|
194
|
+
control flow.
|
|
195
|
+
|
|
196
|
+
```php
|
|
197
|
+
<!-- resources/views/components/alert.blade.php -->
|
|
198
|
+
@props(['type' => 'info', 'dismissible' => false])
|
|
199
|
+
|
|
200
|
+
<div {{ $attributes->merge(['class' => "alert alert-{$type}"]) }}>
|
|
201
|
+
{{ $slot }}
|
|
202
|
+
@if($dismissible)
|
|
203
|
+
<button type="button" class="btn-close" data-bs-dismiss="alert"></button>
|
|
204
|
+
@endif
|
|
205
|
+
</div>
|
|
206
|
+
|
|
207
|
+
<!-- Usage -->
|
|
208
|
+
<x-alert type="success" dismissible>Order placed successfully.</x-alert>
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Livewire for Interactivity
|
|
212
|
+
|
|
213
|
+
Use Livewire 3 for server-rendered interactivity without writing JavaScript.
|
|
214
|
+
Keep components focused. Use `wire:model.live` for real-time binding.
|
|
215
|
+
|
|
216
|
+
```php
|
|
217
|
+
class SearchArticles extends Component
|
|
218
|
+
{
|
|
219
|
+
public string $query = '';
|
|
220
|
+
public int $perPage = 10;
|
|
221
|
+
|
|
222
|
+
public function updatedQuery(): void
|
|
223
|
+
{
|
|
224
|
+
$this->resetPage();
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
public function render(): View
|
|
228
|
+
{
|
|
229
|
+
return view('livewire.search-articles', [
|
|
230
|
+
'articles' => Article::published()
|
|
231
|
+
->where('title', 'like', "%{$this->query}%")
|
|
232
|
+
->paginate($this->perPage),
|
|
233
|
+
]);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Examples
|
|
239
|
+
|
|
240
|
+
| Scenario | Pattern | Key Class/Method |
|
|
241
|
+
|----------|---------|-----------------|
|
|
242
|
+
| Reusable query filters | Eloquent scopes | `scopePublished()`, `scopeByAuthor()` |
|
|
243
|
+
| Background email sending | Queued job | `ProcessUpload implements ShouldQueue` |
|
|
244
|
+
| Decoupled side effects | Event + Listener | `event(new OrderPlaced($order))` |
|
|
245
|
+
| Model-level authorization | Policy | `$this->authorize('update', $article)` |
|
|
246
|
+
| Reusable UI components | Blade component | `<x-alert type="success">` |
|
|
247
|
+
| Real-time search/filter | Livewire | `wire:model.live` binding |
|
|
248
|
+
| Retry failed background work | Job backoff | `$backoff = [10, 60, 300]` |
|
|
249
|
+
|
|
250
|
+
## Checklist
|
|
251
|
+
|
|
252
|
+
- [ ] Eloquent models use scopes, casts, and explicit `$fillable`
|
|
253
|
+
- [ ] `with()` / `load()` for eager loading (no N+1 queries)
|
|
254
|
+
- [ ] Middleware does one thing; registered in `bootstrap/app.php`
|
|
255
|
+
- [ ] Jobs implement `ShouldQueue` with `$tries`, `$backoff`, `$timeout`
|
|
256
|
+
- [ ] `failed()` method on every job for error handling
|
|
257
|
+
- [ ] Events for decoupled side effects; listeners queued for heavy work
|
|
258
|
+
- [ ] Policies for model-level authorization, checked in controllers
|
|
259
|
+
- [ ] Blade components for reusable UI; `$attributes->merge()` for defaults
|
|
260
|
+
- [ ] Livewire components are small; `wire:model.live` for reactive search/filter
|
|
261
|
+
- [ ] Enum casts for status fields (`ArticleStatus::class`)
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: llm-fine-tuning
|
|
3
|
+
description: LLM fine-tuning patterns for dataset preparation, LoRA/QLoRA training, evaluation metrics, deployment, and cost optimization.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# LLM Fine-Tuning Patterns
|
|
7
|
+
|
|
8
|
+
## When to Use
|
|
9
|
+
Fine-tune an LLM when prompt engineering alone cannot achieve the required output quality, consistency, or format adherence. Common scenarios: domain-specific terminology, consistent structured output, style matching, classification tasks, and reducing prompt length. Start with prompt engineering, then few-shot examples, then fine-tuning only when those approaches fall short. Fine-tuning is most cost-effective for high-volume, narrow tasks.
|
|
10
|
+
|
|
11
|
+
## How It Works
|
|
12
|
+
|
|
13
|
+
### Dataset Preparation
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
// scripts/prepare-dataset.ts
|
|
17
|
+
import fs from 'fs/promises';
|
|
18
|
+
|
|
19
|
+
interface TrainingExample {
|
|
20
|
+
messages: Array<{
|
|
21
|
+
role: 'system' | 'user' | 'assistant';
|
|
22
|
+
content: string;
|
|
23
|
+
}>;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface RawDataItem {
|
|
27
|
+
input: string;
|
|
28
|
+
output: string;
|
|
29
|
+
category?: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const SYSTEM_PROMPT = `You are a customer support assistant for Acme Corp. Respond in a helpful, concise manner. Always include the relevant ticket ID when available.`;
|
|
33
|
+
|
|
34
|
+
async function prepareDataset(rawPath: string, outputPath: string) {
|
|
35
|
+
const raw: RawDataItem[] = JSON.parse(await fs.readFile(rawPath, 'utf-8'));
|
|
36
|
+
|
|
37
|
+
// Validate and filter
|
|
38
|
+
const valid = raw.filter((item) => {
|
|
39
|
+
if (!item.input?.trim() || !item.output?.trim()) return false;
|
|
40
|
+
if (item.input.length < 10 || item.output.length < 10) return false;
|
|
41
|
+
if (item.input.length > 4000 || item.output.length > 4000) return false;
|
|
42
|
+
return true;
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
console.log(`Valid examples: ${valid.length}/${raw.length}`);
|
|
46
|
+
|
|
47
|
+
// Convert to chat format
|
|
48
|
+
const training: TrainingExample[] = valid.map((item) => ({
|
|
49
|
+
messages: [
|
|
50
|
+
{ role: 'system', content: SYSTEM_PROMPT },
|
|
51
|
+
{ role: 'user', content: item.input },
|
|
52
|
+
{ role: 'assistant', content: item.output },
|
|
53
|
+
],
|
|
54
|
+
}));
|
|
55
|
+
|
|
56
|
+
// Shuffle
|
|
57
|
+
for (let i = training.length - 1; i > 0; i--) {
|
|
58
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
59
|
+
[training[i], training[j]] = [training[j], training[i]];
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Split: 90% train, 10% validation
|
|
63
|
+
const splitIdx = Math.floor(training.length * 0.9);
|
|
64
|
+
const train = training.slice(0, splitIdx);
|
|
65
|
+
const val = training.slice(splitIdx);
|
|
66
|
+
|
|
67
|
+
// Write JSONL format
|
|
68
|
+
await fs.writeFile(
|
|
69
|
+
outputPath.replace('.jsonl', '_train.jsonl'),
|
|
70
|
+
train.map((e) => JSON.stringify(e)).join('\n')
|
|
71
|
+
);
|
|
72
|
+
await fs.writeFile(
|
|
73
|
+
outputPath.replace('.jsonl', '_val.jsonl'),
|
|
74
|
+
val.map((e) => JSON.stringify(e)).join('\n')
|
|
75
|
+
);
|
|
76
|
+
|
|
77
|
+
console.log(`Train: ${train.length}, Validation: ${val.length}`);
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Dataset Quality Checks
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
// scripts/validate-dataset.ts
|
|
85
|
+
interface DatasetStats {
|
|
86
|
+
totalExamples: number;
|
|
87
|
+
avgInputTokens: number;
|
|
88
|
+
avgOutputTokens: number;
|
|
89
|
+
maxInputTokens: number;
|
|
90
|
+
maxOutputTokens: number;
|
|
91
|
+
duplicateInputs: number;
|
|
92
|
+
categoryDistribution: Record<string, number>;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function validateDataset(path: string): Promise<DatasetStats> {
|
|
96
|
+
const lines = (await fs.readFile(path, 'utf-8')).trim().split('\n');
|
|
97
|
+
const examples = lines.map((l) => JSON.parse(l) as TrainingExample);
|
|
98
|
+
|
|
99
|
+
const inputSet = new Set<string>();
|
|
100
|
+
let duplicates = 0;
|
|
101
|
+
let totalInputTokens = 0;
|
|
102
|
+
let totalOutputTokens = 0;
|
|
103
|
+
let maxInput = 0;
|
|
104
|
+
let maxOutput = 0;
|
|
105
|
+
|
|
106
|
+
for (const ex of examples) {
|
|
107
|
+
const userMsg = ex.messages.find((m) => m.role === 'user')?.content ?? '';
|
|
108
|
+
const assistantMsg = ex.messages.find((m) => m.role === 'assistant')?.content ?? '';
|
|
109
|
+
|
|
110
|
+
const inputTokens = Math.ceil(userMsg.length / 4); // rough estimate
|
|
111
|
+
const outputTokens = Math.ceil(assistantMsg.length / 4);
|
|
112
|
+
|
|
113
|
+
totalInputTokens += inputTokens;
|
|
114
|
+
totalOutputTokens += outputTokens;
|
|
115
|
+
maxInput = Math.max(maxInput, inputTokens);
|
|
116
|
+
maxOutput = Math.max(maxOutput, outputTokens);
|
|
117
|
+
|
|
118
|
+
if (inputSet.has(userMsg)) duplicates++;
|
|
119
|
+
inputSet.add(userMsg);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return {
|
|
123
|
+
totalExamples: examples.length,
|
|
124
|
+
avgInputTokens: Math.round(totalInputTokens / examples.length),
|
|
125
|
+
avgOutputTokens: Math.round(totalOutputTokens / examples.length),
|
|
126
|
+
maxInputTokens: maxInput,
|
|
127
|
+
maxOutputTokens: maxOutput,
|
|
128
|
+
duplicateInputs: duplicates,
|
|
129
|
+
categoryDistribution: {},
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### OpenAI Fine-Tuning API
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
// scripts/finetune.ts
|
|
138
|
+
import OpenAI from 'openai';
|
|
139
|
+
|
|
140
|
+
const openai = new OpenAI();
|
|
141
|
+
|
|
142
|
+
async function startFineTuning(trainFile: string, valFile: string) {
|
|
143
|
+
// Upload training file
|
|
144
|
+
const trainUpload = await openai.files.create({
|
|
145
|
+
file: fs.createReadStream(trainFile),
|
|
146
|
+
purpose: 'fine-tune',
|
|
147
|
+
});
|
|
148
|
+
console.log(`Training file uploaded: ${trainUpload.id}`);
|
|
149
|
+
|
|
150
|
+
// Upload validation file
|
|
151
|
+
const valUpload = await openai.files.create({
|
|
152
|
+
file: fs.createReadStream(valFile),
|
|
153
|
+
purpose: 'fine-tune',
|
|
154
|
+
});
|
|
155
|
+
console.log(`Validation file uploaded: ${valUpload.id}`);
|
|
156
|
+
|
|
157
|
+
// Create fine-tuning job
|
|
158
|
+
const job = await openai.fineTuning.jobs.create({
|
|
159
|
+
training_file: trainUpload.id,
|
|
160
|
+
validation_file: valUpload.id,
|
|
161
|
+
model: 'gpt-4o-mini-2024-07-18',
|
|
162
|
+
hyperparameters: {
|
|
163
|
+
n_epochs: 3,
|
|
164
|
+
batch_size: 'auto',
|
|
165
|
+
learning_rate_multiplier: 'auto',
|
|
166
|
+
},
|
|
167
|
+
suffix: 'customer-support-v1',
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
console.log(`Fine-tuning job created: ${job.id}`);
|
|
171
|
+
return job.id;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
async function monitorJob(jobId: string) {
|
|
175
|
+
while (true) {
|
|
176
|
+
const job = await openai.fineTuning.jobs.retrieve(jobId);
|
|
177
|
+
console.log(`Status: ${job.status}`);
|
|
178
|
+
|
|
179
|
+
if (job.status === 'succeeded') {
|
|
180
|
+
console.log(`Model ready: ${job.fine_tuned_model}`);
|
|
181
|
+
return job.fine_tuned_model;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (job.status === 'failed') {
|
|
185
|
+
console.error(`Failed: ${job.error?.message}`);
|
|
186
|
+
throw new Error(job.error?.message);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// List recent events
|
|
190
|
+
const events = await openai.fineTuning.jobs.listEvents(jobId, { limit: 5 });
|
|
191
|
+
for (const event of events.data) {
|
|
192
|
+
console.log(` [${event.level}] ${event.message}`);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
await new Promise((r) => setTimeout(r, 60_000)); // check every minute
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### LoRA Configuration (Hugging Face / PEFT)
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
# train_lora.py
|
|
204
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
|
|
205
|
+
from peft import LoraConfig, get_peft_model, TaskType
|
|
206
|
+
from trl import SFTTrainer
|
|
207
|
+
from datasets import load_dataset
|
|
208
|
+
|
|
209
|
+
# Load base model
|
|
210
|
+
model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
|
211
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
212
|
+
model_name,
|
|
213
|
+
torch_dtype="auto",
|
|
214
|
+
device_map="auto",
|
|
215
|
+
load_in_4bit=True, # QLoRA: 4-bit quantization
|
|
216
|
+
)
|
|
217
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
218
|
+
tokenizer.pad_token = tokenizer.eos_token
|
|
219
|
+
|
|
220
|
+
# LoRA configuration
|
|
221
|
+
lora_config = LoraConfig(
|
|
222
|
+
task_type=TaskType.CAUSAL_LM,
|
|
223
|
+
r=16, # rank — higher = more capacity, more VRAM
|
|
224
|
+
lora_alpha=32, # scaling factor
|
|
225
|
+
lora_dropout=0.05,
|
|
226
|
+
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], # attention layers
|
|
227
|
+
bias="none",
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
model = get_peft_model(model, lora_config)
|
|
231
|
+
model.print_trainable_parameters()
|
|
232
|
+
# trainable params: 6.5M || all params: 8B || trainable%: 0.08%
|
|
233
|
+
|
|
234
|
+
# Training
|
|
235
|
+
dataset = load_dataset("json", data_files={"train": "train.jsonl", "validation": "val.jsonl"})
|
|
236
|
+
|
|
237
|
+
training_args = TrainingArguments(
|
|
238
|
+
output_dir="./output",
|
|
239
|
+
num_train_epochs=3,
|
|
240
|
+
per_device_train_batch_size=4,
|
|
241
|
+
gradient_accumulation_steps=4,
|
|
242
|
+
learning_rate=2e-4,
|
|
243
|
+
warmup_ratio=0.1,
|
|
244
|
+
logging_steps=10,
|
|
245
|
+
evaluation_strategy="steps",
|
|
246
|
+
eval_steps=100,
|
|
247
|
+
save_strategy="steps",
|
|
248
|
+
save_steps=100,
|
|
249
|
+
bf16=True,
|
|
250
|
+
report_to="wandb",
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
trainer = SFTTrainer(
|
|
254
|
+
model=model,
|
|
255
|
+
args=training_args,
|
|
256
|
+
train_dataset=dataset["train"],
|
|
257
|
+
eval_dataset=dataset["validation"],
|
|
258
|
+
tokenizer=tokenizer,
|
|
259
|
+
max_seq_length=2048,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
trainer.train()
|
|
263
|
+
model.save_pretrained("./lora-adapter")
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
### Evaluation
|
|
267
|
+
|
|
268
|
+
```typescript
|
|
269
|
+
// scripts/evaluate.ts
|
|
270
|
+
interface EvalResult {
|
|
271
|
+
example: TrainingExample;
|
|
272
|
+
predicted: string;
|
|
273
|
+
reference: string;
|
|
274
|
+
metrics: { exact_match: boolean; bleu: number; rouge_l: number };
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
async function evaluateModel(modelId: string, testFile: string): Promise<void> {
|
|
278
|
+
const examples = (await fs.readFile(testFile, 'utf-8')).trim().split('\n')
|
|
279
|
+
.map((l) => JSON.parse(l) as TrainingExample);
|
|
280
|
+
|
|
281
|
+
const results: EvalResult[] = [];
|
|
282
|
+
|
|
283
|
+
for (const ex of examples) {
|
|
284
|
+
const messages = ex.messages.filter((m) => m.role !== 'assistant');
|
|
285
|
+
const reference = ex.messages.find((m) => m.role === 'assistant')!.content;
|
|
286
|
+
|
|
287
|
+
const response = await openai.chat.completions.create({
|
|
288
|
+
model: modelId,
|
|
289
|
+
messages,
|
|
290
|
+
max_tokens: 500,
|
|
291
|
+
temperature: 0,
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
const predicted = response.choices[0].message.content ?? '';
|
|
295
|
+
results.push({
|
|
296
|
+
example: ex,
|
|
297
|
+
predicted,
|
|
298
|
+
reference,
|
|
299
|
+
metrics: {
|
|
300
|
+
exact_match: predicted.trim() === reference.trim(),
|
|
301
|
+
bleu: computeBleu(predicted, reference),
|
|
302
|
+
rouge_l: computeRougeL(predicted, reference),
|
|
303
|
+
},
|
|
304
|
+
});
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const avgBleu = results.reduce((s, r) => s + r.metrics.bleu, 0) / results.length;
|
|
308
|
+
const avgRouge = results.reduce((s, r) => s + r.metrics.rouge_l, 0) / results.length;
|
|
309
|
+
const exactMatch = results.filter((r) => r.metrics.exact_match).length / results.length;
|
|
310
|
+
|
|
311
|
+
console.log(`Results (${results.length} examples):`);
|
|
312
|
+
console.log(` Exact Match: ${(exactMatch * 100).toFixed(1)}%`);
|
|
313
|
+
console.log(` Avg BLEU: ${(avgBleu * 100).toFixed(1)}`);
|
|
314
|
+
console.log(` Avg ROUGE-L: ${(avgRouge * 100).toFixed(1)}`);
|
|
315
|
+
}
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
## Examples
|
|
319
|
+
|
|
320
|
+
| Approach | Dataset Size | Cost | Latency Impact | Best For |
|
|
321
|
+
|----------|-------------|------|----------------|----------|
|
|
322
|
+
| Prompt engineering | 0 | Free | None | Initial prototyping |
|
|
323
|
+
| Few-shot examples | 5-20 | Token cost | Higher latency | Format adherence |
|
|
324
|
+
| OpenAI fine-tune | 50-10K | $3-25/M tokens | Lower latency | Consistent behavior |
|
|
325
|
+
| LoRA (self-hosted) | 1K-100K | GPU hours | You control | Full customization |
|
|
326
|
+
|
|
327
|
+
## Checklist
|
|
328
|
+
- [ ] Dataset has at least 50 examples (200+ recommended for quality)
|
|
329
|
+
- [ ] Training data cleaned: no duplicates, validated formats, balanced categories
|
|
330
|
+
- [ ] Held-out validation set (10%) for monitoring overfitting
|
|
331
|
+
- [ ] System prompt consistent across all training examples
|
|
332
|
+
- [ ] Evaluation metrics defined before training (BLEU, ROUGE, exact match)
|
|
333
|
+
- [ ] Fine-tuned model compared against base model on evaluation set
|
|
334
|
+
- [ ] Training costs estimated before starting (tokens x price per token)
|
|
335
|
+
- [ ] Model versioned with suffix and deployment tested before production use
|