effect-gpt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -0
- package/data/chat_training_data.json +55 -0
- package/data/pretraining_data.json +27 -0
- package/package.json +25 -0
- package/src/cli/errors.ts +51 -0
- package/src/cli/main.ts +163 -0
- package/src/config.ts +3 -0
- package/src/data/Dataset.ts +168 -0
- package/src/errors.ts +73 -0
- package/src/index.ts +88 -0
- package/src/model/Embeddings.ts +108 -0
- package/src/model/FeedForward.ts +121 -0
- package/src/model/LLM.ts +124 -0
- package/src/model/LayerNorm.ts +138 -0
- package/src/model/ModelLayer.ts +10 -0
- package/src/model/OutputProjection.ts +76 -0
- package/src/model/SelfAttention.ts +169 -0
- package/src/model/TransformerBlock.ts +53 -0
- package/src/services/Logger.ts +124 -0
- package/src/services/Metrics.ts +260 -0
- package/src/services/Random.ts +98 -0
- package/src/services/SeedLayer.ts +39 -0
- package/src/services/index.ts +32 -0
- package/src/tensor/Tensor2D.ts +42 -0
- package/src/tensor/ops.ts +371 -0
- package/src/tensor/random.ts +32 -0
- package/src/tokenize/split.ts +27 -0
- package/src/tokenize/tokenize.ts +28 -0
- package/src/training/Adam.ts +61 -0
- package/src/training/clip.ts +16 -0
- package/src/training/loss.ts +35 -0
- package/src/training/train.ts +203 -0
- package/src/vocab/Vocab.ts +79 -0
- package/tests/fixtures/csv_bad.csv +2 -0
- package/tests/fixtures/csv_good.csv +3 -0
- package/tests/ts/cli_error_format.test.ts +26 -0
- package/tests/ts/dataset.test.ts +35 -0
- package/tests/ts/embeddings.test.ts +81 -0
- package/tests/ts/errors.test.ts +36 -0
- package/tests/ts/feed_forward.test.ts +74 -0
- package/tests/ts/initNormal.test.ts +41 -0
- package/tests/ts/layer_norm.test.ts +96 -0
- package/tests/ts/llm_parameters.test.ts +96 -0
- package/tests/ts/llm_predict.test.ts +98 -0
- package/tests/ts/llm_tokenize.test.ts +69 -0
- package/tests/ts/output_projection.test.ts +78 -0
- package/tests/ts/random.test.ts +44 -0
- package/tests/ts/self_attention.test.ts +63 -0
- package/tests/ts/support/factories.ts +126 -0
- package/tests/ts/support/runEffect.ts +29 -0
- package/tests/ts/support/seed.ts +12 -0
- package/tests/ts/support/stubs.ts +58 -0
- package/tests/ts/support/tensorMatchers.ts +96 -0
- package/tests/ts/support.test.ts +165 -0
- package/tests/ts/train_loop.test.ts +229 -0
- package/tests/ts/transformer_block.test.ts +72 -0
- package/tsconfig.json +20 -0
- package/tsconfig.test.json +8 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import * as Effect from "effect/Effect"
|
|
2
|
+
import * as Stream from "effect/Stream"
|
|
3
|
+
import * as Chunk from "effect/Chunk"
|
|
4
|
+
import * as Context from "effect/Context"
|
|
5
|
+
import * as Layer from "effect/Layer"
|
|
6
|
+
import * as Ref from "effect/Ref"
|
|
7
|
+
import * as Option from "effect/Option"
|
|
8
|
+
import type { ShapeError } from "../tensor/ops"
|
|
9
|
+
import * as Ops from "../tensor/ops"
|
|
10
|
+
import * as T from "../tensor/Tensor2D"
|
|
11
|
+
import { tokenize } from "../tokenize/tokenize"
|
|
12
|
+
import type { LLM } from "../model/LLM"
|
|
13
|
+
import { softmaxRows, crossEntropyLoss, dLogits } from "./loss"
|
|
14
|
+
import { clipGlobalL2 } from "./clip"
|
|
15
|
+
import type { LoggerServiceId } from "../services/Logger"
|
|
16
|
+
import { info } from "../services/Logger"
|
|
17
|
+
import type { MetricsServiceId } from "../services/Metrics"
|
|
18
|
+
import { counter, gauge, timed } from "../services/Metrics"
|
|
19
|
+
import { TrainingError } from "../errors"
|
|
20
|
+
import type { TrainingError as TrainingErrorType } from "../errors"
|
|
21
|
+
|
|
22
|
+
export interface TrainingConfig {
|
|
23
|
+
readonly epochs: number
|
|
24
|
+
readonly learningRate: number
|
|
25
|
+
readonly clipNorm?: number
|
|
26
|
+
readonly preprocessConcurrency?: number | "unbounded"
|
|
27
|
+
readonly preprocessBatchSize?: number
|
|
28
|
+
readonly trainConcurrency?: number
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
interface TrainingConfigId {
|
|
32
|
+
readonly TrainingConfig: unique symbol
|
|
33
|
+
}
|
|
34
|
+
interface LLMServiceId {
|
|
35
|
+
readonly LLMService: unique symbol
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface PreprocessSettings {
|
|
39
|
+
readonly concurrency: number | "unbounded"
|
|
40
|
+
readonly batchSize: number
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
interface PreprocessSettingsId {
|
|
44
|
+
readonly PreprocessSettings: unique symbol
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export const TrainingConfig = Context.GenericTag<TrainingConfigId, TrainingConfig>("TrainingConfig")
|
|
48
|
+
export const LLMService = Context.GenericTag<LLMServiceId, LLM>("LLMService")
|
|
49
|
+
export const PreprocessSettings = Context.GenericTag<PreprocessSettingsId, PreprocessSettings>(
|
|
50
|
+
"PreprocessSettings"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
export const makeLLMLayer = (llm: LLM) => Layer.succeed(LLMService, llm)
|
|
54
|
+
export const makeTrainingConfigLayer = (config: TrainingConfig) =>
|
|
55
|
+
Layer.succeed(TrainingConfig, config)
|
|
56
|
+
export const makePreprocessSettingsLayer = (settings: PreprocessSettings) =>
|
|
57
|
+
Layer.succeed(PreprocessSettings, settings)
|
|
58
|
+
|
|
59
|
+
type TrainEnv =
|
|
60
|
+
| TrainingConfigId
|
|
61
|
+
| LLMServiceId
|
|
62
|
+
| LoggerServiceId
|
|
63
|
+
| MetricsServiceId
|
|
64
|
+
| PreprocessSettingsId
|
|
65
|
+
|
|
66
|
+
const mapShapeError = <A, R>(effect: Effect.Effect<A, ShapeError, R>) =>
|
|
67
|
+
effect.pipe(Effect.mapError(TrainingError.shape))
|
|
68
|
+
|
|
69
|
+
const mapShapeUnknown = (error: unknown): TrainingErrorType =>
|
|
70
|
+
error instanceof Ops.ShapeError ? TrainingError.shape(error) : TrainingError.fromUnknown(error)
|
|
71
|
+
|
|
72
|
+
const wrapThrowing = <A>(
|
|
73
|
+
thunk: () => A,
|
|
74
|
+
mapError: (error: unknown) => TrainingErrorType = TrainingError.fromUnknown
|
|
75
|
+
) =>
|
|
76
|
+
Effect.try({
|
|
77
|
+
try: thunk,
|
|
78
|
+
catch: (error) => mapError(error)
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
const clampConcurrency = (value: number | undefined, fallback: number): number =>
|
|
82
|
+
value === undefined ? fallback : Math.max(1, value)
|
|
83
|
+
|
|
84
|
+
const trainWithStreamFactory = <E, R>(
|
|
85
|
+
makeStream: () => Stream.Stream<string, E, R>
|
|
86
|
+
): Effect.Effect<void, TrainingErrorType, R | TrainEnv> =>
|
|
87
|
+
Effect.gen(function* () {
|
|
88
|
+
const llm = yield* LLMService
|
|
89
|
+
const config = yield* TrainingConfig
|
|
90
|
+
const preprocessSettings = yield* Effect.gen(function* () {
|
|
91
|
+
const env = (yield* Effect.context<R | TrainEnv>()) as Context.Context<R | TrainEnv>
|
|
92
|
+
const maybeSettings = Context.getOption(env, PreprocessSettings)
|
|
93
|
+
if (maybeSettings._tag === "Some") {
|
|
94
|
+
return maybeSettings.value
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
concurrency: config.preprocessConcurrency ?? "unbounded",
|
|
98
|
+
batchSize: config.preprocessBatchSize ?? 1
|
|
99
|
+
} satisfies PreprocessSettings
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
const endTokenId = llm.vocab.encode("</s>")
|
|
103
|
+
if (endTokenId._tag === "None") {
|
|
104
|
+
return yield* Effect.fail(TrainingError.config("End token </s> not found in vocabulary"))
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const clipNorm = config.clipNorm ?? 5.0
|
|
108
|
+
const concurrency = preprocessSettings.concurrency
|
|
109
|
+
const batchSize = Math.max(1, preprocessSettings.batchSize)
|
|
110
|
+
const trainConcurrency = clampConcurrency(config.trainConcurrency, 4)
|
|
111
|
+
|
|
112
|
+
const epochCounter = yield* counter("epochs_completed")
|
|
113
|
+
const lossGauge = yield* gauge("epoch_loss")
|
|
114
|
+
const examplesCounter = yield* counter("examples_processed")
|
|
115
|
+
|
|
116
|
+
for (let epoch = 0; epoch < config.epochs; epoch++) {
|
|
117
|
+
const epochResult = yield* timed(`epoch_${epoch}`, Effect.gen(function* () {
|
|
118
|
+
const totalLossRef = yield* Ref.make(0)
|
|
119
|
+
const totalExamplesRef = yield* Ref.make(0)
|
|
120
|
+
|
|
121
|
+
const preprocess = (text: string) =>
|
|
122
|
+
Effect.sync(() => {
|
|
123
|
+
const tokens = [...tokenize(text, llm.vocab)]
|
|
124
|
+
if (tokens.length < 2) {
|
|
125
|
+
return Option.none<{ inputIds: number[]; targetIds: number[] }>()
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return Option.some({
|
|
129
|
+
inputIds: tokens.slice(0, tokens.length - 1),
|
|
130
|
+
targetIds: tokens.slice(1)
|
|
131
|
+
})
|
|
132
|
+
})
|
|
133
|
+
|
|
134
|
+
const preprocessed = makeStream()
|
|
135
|
+
.pipe(
|
|
136
|
+
Stream.mapError(TrainingError.fromUnknown),
|
|
137
|
+
Stream.mapChunks(Chunk.chunksOf(batchSize)),
|
|
138
|
+
Stream.flattenChunks,
|
|
139
|
+
Stream.mapEffect(preprocess, { concurrency }),
|
|
140
|
+
Stream.filterMap((value) => value)
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
const trainExample = ({ inputIds, targetIds }: { inputIds: number[]; targetIds: number[] }) =>
|
|
144
|
+
Effect.gen(function* () {
|
|
145
|
+
let input = T.fromArray(1, inputIds.length, inputIds)
|
|
146
|
+
for (const layer of llm.network) {
|
|
147
|
+
input = yield* mapShapeError(layer.forward(input))
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const logits = input
|
|
151
|
+
const probs = yield* wrapThrowing(() => softmaxRows(logits), mapShapeUnknown)
|
|
152
|
+
const loss = yield* wrapThrowing(() => crossEntropyLoss(probs, targetIds), mapShapeUnknown)
|
|
153
|
+
yield* Ref.update(totalLossRef, (current) => current + loss)
|
|
154
|
+
yield* Ref.update(totalExamplesRef, (current) => current + 1)
|
|
155
|
+
|
|
156
|
+
let grads = yield* wrapThrowing(() => dLogits(probs, targetIds), mapShapeUnknown)
|
|
157
|
+
clipGlobalL2(grads, clipNorm)
|
|
158
|
+
|
|
159
|
+
for (let i = llm.network.length - 1; i >= 0; i--) {
|
|
160
|
+
grads = yield* mapShapeError(llm.network[i]!.backward(grads, config.learningRate))
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const tokens = Ops.argmaxRows(probs)
|
|
164
|
+
const nextToken = tokens[tokens.length - 1]
|
|
165
|
+
if (nextToken === endTokenId.value) {
|
|
166
|
+
return
|
|
167
|
+
}
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
yield* Effect.scoped(
|
|
171
|
+
Stream.runDrain(
|
|
172
|
+
Stream.mapEffect(preprocessed, trainExample, { concurrency: trainConcurrency })
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
const totalLoss = yield* Ref.get(totalLossRef)
|
|
177
|
+
const totalExamples = yield* Ref.get(totalExamplesRef)
|
|
178
|
+
yield* examplesCounter.inc(totalExamples)
|
|
179
|
+
return { totalLoss, totalExamples }
|
|
180
|
+
}))
|
|
181
|
+
|
|
182
|
+
const { totalLoss, totalExamples } = epochResult.value
|
|
183
|
+
const avgLoss = totalExamples > 0 ? totalLoss / totalExamples : 0
|
|
184
|
+
|
|
185
|
+
yield* lossGauge.set(avgLoss)
|
|
186
|
+
yield* epochCounter.inc()
|
|
187
|
+
yield* info(`Epoch ${epoch}: Loss = ${avgLoss.toFixed(4)}`, {
|
|
188
|
+
epoch,
|
|
189
|
+
loss: avgLoss,
|
|
190
|
+
examples: totalExamples,
|
|
191
|
+
durationMs: epochResult.durationMs
|
|
192
|
+
})
|
|
193
|
+
}
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
export const train = (
|
|
197
|
+
examples: ReadonlyArray<string>
|
|
198
|
+
): Effect.Effect<void, TrainingErrorType, TrainEnv> =>
|
|
199
|
+
trainWithStreamFactory(() => Stream.fromIterable(examples))
|
|
200
|
+
|
|
201
|
+
export const trainStream = <E, R>(
|
|
202
|
+
makeStream: () => Stream.Stream<string, E, R>
|
|
203
|
+
): Effect.Effect<void, TrainingErrorType, R | TrainEnv> => trainWithStreamFactory(makeStream)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import * as HashMap from "effect/HashMap"
|
|
2
|
+
import * as HashSet from "effect/HashSet"
|
|
3
|
+
import * as Option from "effect/Option"
|
|
4
|
+
import * as Stream from "effect/Stream"
|
|
5
|
+
import * as Effect from "effect/Effect"
|
|
6
|
+
import { splitWordToTokens } from "../tokenize/split"
|
|
7
|
+
|
|
8
|
+
export class Vocab {
|
|
9
|
+
readonly encodeMap: HashMap.HashMap<string, number>
|
|
10
|
+
readonly decodeMap: HashMap.HashMap<number, string>
|
|
11
|
+
readonly words: ReadonlyArray<string>
|
|
12
|
+
|
|
13
|
+
private constructor(
|
|
14
|
+
encodeMap: HashMap.HashMap<string, number>,
|
|
15
|
+
decodeMap: HashMap.HashMap<number, string>,
|
|
16
|
+
words: ReadonlyArray<string>
|
|
17
|
+
) {
|
|
18
|
+
this.encodeMap = encodeMap
|
|
19
|
+
this.decodeMap = decodeMap
|
|
20
|
+
this.words = words
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
static make(words: ReadonlyArray<string>): Vocab {
|
|
24
|
+
let encodeMap = HashMap.empty<string, number>()
|
|
25
|
+
let decodeMap = HashMap.empty<number, string>()
|
|
26
|
+
|
|
27
|
+
for (let i = 0; i < words.length; i += 1) {
|
|
28
|
+
const word = words[i]
|
|
29
|
+
encodeMap = HashMap.set(encodeMap, word, i)
|
|
30
|
+
decodeMap = HashMap.set(decodeMap, i, word)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return new Vocab(encodeMap, decodeMap, words)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
static defaultWords(): ReadonlyArray<string> {
|
|
37
|
+
return ["hello", "world", "this", "is", "rust", "</s>"]
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
encode(word: string): Option.Option<number> {
|
|
41
|
+
return HashMap.get(this.encodeMap, word)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
decode(id: number): Option.Option<string> {
|
|
45
|
+
return HashMap.get(this.decodeMap, id)
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
private static addTokensToSet(
|
|
49
|
+
set: HashSet.HashSet<string>,
|
|
50
|
+
text: string
|
|
51
|
+
): HashSet.HashSet<string> {
|
|
52
|
+
const words = text.split(/\s+/).filter((w) => w.length > 0)
|
|
53
|
+
let next = set
|
|
54
|
+
|
|
55
|
+
for (const word of words) {
|
|
56
|
+
const parts = splitWordToTokens(word)
|
|
57
|
+
for (const part of parts) {
|
|
58
|
+
next = HashSet.add(next, part)
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return next
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
static processTextForVocab(texts: ReadonlyArray<string>): HashSet.HashSet<string> {
|
|
66
|
+
let vocabSet = HashSet.add(HashSet.empty<string>(), "</s>")
|
|
67
|
+
for (const text of texts) {
|
|
68
|
+
vocabSet = Vocab.addTokensToSet(vocabSet, text)
|
|
69
|
+
}
|
|
70
|
+
return vocabSet
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
static processStreamForVocab<E, R>(
|
|
74
|
+
stream: Stream.Stream<string, E, R>
|
|
75
|
+
): Effect.Effect<HashSet.HashSet<string>, E, R> {
|
|
76
|
+
const initial = HashSet.add(HashSet.empty<string>(), "</s>")
|
|
77
|
+
return Stream.runFold(stream, initial, (set, text) => Vocab.addTokensToSet(set, text))
|
|
78
|
+
}
|
|
79
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test"
|
|
2
|
+
import { formatTrainingError } from "../../src/cli/errors"
|
|
3
|
+
import { TrainingError } from "../../src/errors"
|
|
4
|
+
import { ShapeError } from "../../src/tensor/ops"
|
|
5
|
+
|
|
6
|
+
describe("formatTrainingError", () => {
|
|
7
|
+
test("formats dataset error with path", () => {
|
|
8
|
+
const err = TrainingError.dataset({ _tag: "DatasetLoadError", path: "/tmp/data", error: new Error("io") } as any)
|
|
9
|
+
const message = formatTrainingError(err)
|
|
10
|
+
expect(message).toContain("Dataset error")
|
|
11
|
+
expect(message).toContain("/tmp/data")
|
|
12
|
+
})
|
|
13
|
+
|
|
14
|
+
test("formats shape error cause", () => {
|
|
15
|
+
const err = TrainingError.shape(new ShapeError("bad shape"))
|
|
16
|
+
const message = formatTrainingError(err)
|
|
17
|
+
expect(message).toContain("Shape error")
|
|
18
|
+
expect(message).toContain("bad shape")
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
test("formats plain Error as unexpected error", () => {
|
|
22
|
+
const message = formatTrainingError(new Error("boom"))
|
|
23
|
+
expect(message).toContain("Unexpected error")
|
|
24
|
+
expect(message).toContain("boom")
|
|
25
|
+
})
|
|
26
|
+
})
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import * as Effect from "effect/Effect"
|
|
3
|
+
import * as Exit from "effect/Exit"
|
|
4
|
+
import { BunFileSystem } from "@effect/platform-bun"
|
|
5
|
+
import { Dataset, DatasetParseError } from "../../src/data/Dataset.js"
|
|
6
|
+
|
|
7
|
+
describe("Dataset CSV", () => {
|
|
8
|
+
const good = "tests/fixtures/csv_good.csv"
|
|
9
|
+
const bad = "tests/fixtures/csv_bad.csv"
|
|
10
|
+
|
|
11
|
+
test("collect reads CSV and joins fields", async () => {
|
|
12
|
+
const streams = Dataset.load({ pretrainingPath: good, chatPath: good, format: "csv" })
|
|
13
|
+
const program = Dataset.collect(streams).pipe(Effect.provide(BunFileSystem.layer))
|
|
14
|
+
const result = await Effect.runPromise(program)
|
|
15
|
+
|
|
16
|
+
expect(result.pretrainingData).toEqual(["hello,world", "quoted,field,foo", 'say "hi",bar'])
|
|
17
|
+
expect(result.chatTrainingData).toEqual(["hello,world", "quoted,field,foo", 'say "hi",bar'])
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
test("malformed CSV surfaces DatasetParseError", async () => {
|
|
21
|
+
const streams = Dataset.load({ pretrainingPath: bad, chatPath: bad, format: "csv" })
|
|
22
|
+
const program = Dataset.collect(streams).pipe(Effect.provide(BunFileSystem.layer))
|
|
23
|
+
const exit = await Effect.runPromiseExit(program)
|
|
24
|
+
|
|
25
|
+
expect(exit._tag).toBe("Failure")
|
|
26
|
+
if (exit._tag === "Failure") {
|
|
27
|
+
const cause = exit.cause
|
|
28
|
+
if (cause._tag === "Fail") {
|
|
29
|
+
expect(cause.error).toBeInstanceOf(DatasetParseError)
|
|
30
|
+
} else {
|
|
31
|
+
throw new Error(`Unexpected failure cause: ${cause._tag}`)
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
})
|
|
35
|
+
})
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import { runEffect } from "./support/runEffect"
|
|
3
|
+
import { expectShape, expectNotClose, expectFinite } from "./support/tensorMatchers"
|
|
4
|
+
import { makeEmbeddings } from "./support/factories"
|
|
5
|
+
import * as T from "../../src/tensor/Tensor2D"
|
|
6
|
+
import { EMBEDDING_DIM, MAX_SEQ_LEN } from "../../src/config"
|
|
7
|
+
|
|
8
|
+
describe("Embeddings", () => {
|
|
9
|
+
test("embed single token → [1, EMBEDDING_DIM]", () => {
|
|
10
|
+
const embeddings = makeEmbeddings(10)
|
|
11
|
+
const input = T.fromArray(1, 1, [0])
|
|
12
|
+
const output = runEffect(embeddings.forward(input))
|
|
13
|
+
expectShape(output, [1, EMBEDDING_DIM])
|
|
14
|
+
expectFinite(output)
|
|
15
|
+
})
|
|
16
|
+
|
|
17
|
+
test("embed multiple tokens → [seqLen, EMBEDDING_DIM]", () => {
|
|
18
|
+
const embeddings = makeEmbeddings(10)
|
|
19
|
+
const input = T.fromArray(1, 3, [0, 1, 2])
|
|
20
|
+
const output = runEffect(embeddings.forward(input))
|
|
21
|
+
expectShape(output, [3, EMBEDDING_DIM])
|
|
22
|
+
expectFinite(output)
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
test("positional embeddings differ across positions", () => {
|
|
26
|
+
const embeddings = makeEmbeddings(10)
|
|
27
|
+
const input = T.fromArray(1, 3, [0, 0, 0])
|
|
28
|
+
const output = runEffect(embeddings.forward(input))
|
|
29
|
+
expectShape(output, [3, EMBEDDING_DIM])
|
|
30
|
+
|
|
31
|
+
const row0Val = T.get(output, 0, 0)
|
|
32
|
+
const row1Val = T.get(output, 1, 0)
|
|
33
|
+
const row2Val = T.get(output, 2, 0)
|
|
34
|
+
|
|
35
|
+
expect(row0Val).not.toBe(row1Val)
|
|
36
|
+
expect(row1Val).not.toBe(row2Val)
|
|
37
|
+
expect(row0Val).not.toBe(row2Val)
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
test("shape across sequence lengths 1..5", () => {
|
|
41
|
+
const embeddings = makeEmbeddings(10)
|
|
42
|
+
for (let seqLen = 1; seqLen <= 4; seqLen++) {
|
|
43
|
+
const tokens = Array.from({ length: seqLen }, (_, i) => i % 10)
|
|
44
|
+
const input = T.fromArray(1, seqLen, tokens)
|
|
45
|
+
const output = runEffect(embeddings.forward(input))
|
|
46
|
+
expectShape(output, [seqLen, EMBEDDING_DIM])
|
|
47
|
+
expectFinite(output)
|
|
48
|
+
}
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
test("max sequence length boundary", () => {
|
|
52
|
+
const embeddings = makeEmbeddings(10)
|
|
53
|
+
const tokens = Array.from({ length: MAX_SEQ_LEN }, (_, i) => i % 10)
|
|
54
|
+
const input = T.fromArray(1, MAX_SEQ_LEN, tokens)
|
|
55
|
+
const output = runEffect(embeddings.forward(input))
|
|
56
|
+
expectShape(output, [MAX_SEQ_LEN, EMBEDDING_DIM])
|
|
57
|
+
expectFinite(output)
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
test("backward updates token & positional embeddings", () => {
|
|
61
|
+
const embeddings = makeEmbeddings(10)
|
|
62
|
+
const input = T.fromArray(1, 3, [0, 1, 2])
|
|
63
|
+
|
|
64
|
+
const tokenBefore = T.clone(embeddings.tokenEmbeddings)
|
|
65
|
+
const positionalBefore = T.clone(embeddings.positionalEmbeddings)
|
|
66
|
+
|
|
67
|
+
runEffect(embeddings.forward(input))
|
|
68
|
+
const grad = T.ones(3, EMBEDDING_DIM)
|
|
69
|
+
runEffect(embeddings.backward(grad, 0.01))
|
|
70
|
+
|
|
71
|
+
expectNotClose(embeddings.tokenEmbeddings, tokenBefore)
|
|
72
|
+
expectNotClose(embeddings.positionalEmbeddings, positionalBefore)
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
test("parametersCount", () => {
|
|
76
|
+
const vocabSize = 10
|
|
77
|
+
const embeddings = makeEmbeddings(vocabSize)
|
|
78
|
+
const expectedCount = vocabSize * EMBEDDING_DIM + MAX_SEQ_LEN * EMBEDDING_DIM
|
|
79
|
+
expect(embeddings.parametersCount).toBe(expectedCount)
|
|
80
|
+
})
|
|
81
|
+
})
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test"
|
|
2
|
+
import { TrainingError } from "../../src/errors"
|
|
3
|
+
import { ShapeError } from "../../src/tensor/ops"
|
|
4
|
+
import { DatasetLoadError, DatasetParseError } from "../../src/data/Dataset"
|
|
5
|
+
|
|
6
|
+
describe("TrainingError.fromUnknown", () => {
|
|
7
|
+
test("maps ShapeError to TrainingShapeError", () => {
|
|
8
|
+
const err = TrainingError.fromUnknown(new ShapeError("bad shape"))
|
|
9
|
+
expect(err._tag).toBe("TrainingShapeError")
|
|
10
|
+
expect((err as any).cause.message).toBe("bad shape")
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
test("maps DatasetLoadError to TrainingDatasetError", () => {
|
|
14
|
+
const loadErr = new DatasetLoadError({ path: "x", error: new Error("io") })
|
|
15
|
+
const err = TrainingError.fromUnknown(loadErr)
|
|
16
|
+
expect(err._tag).toBe("TrainingDatasetError")
|
|
17
|
+
expect((err as any).cause).toBe(loadErr)
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
test("maps DatasetParseError to TrainingDatasetError", () => {
|
|
21
|
+
const parseErr = new DatasetParseError({ path: "x", error: new Error("parse") })
|
|
22
|
+
const err = TrainingError.fromUnknown(parseErr)
|
|
23
|
+
expect(err._tag).toBe("TrainingDatasetError")
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
test("passes through existing TrainingError", () => {
|
|
27
|
+
const existing = TrainingError.optimizer("boom")
|
|
28
|
+
const err = TrainingError.fromUnknown(existing)
|
|
29
|
+
expect(err).toBe(existing)
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
test("wraps unknown errors in TrainingUnknownError", () => {
|
|
33
|
+
const err = TrainingError.fromUnknown(new Error("boom"))
|
|
34
|
+
expect(err._tag).toBe("TrainingUnknownError")
|
|
35
|
+
})
|
|
36
|
+
})
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import { runEffect } from "./support/runEffect"
|
|
3
|
+
import { expectShape, expectNotClose, expectFinite } from "./support/tensorMatchers"
|
|
4
|
+
import { makeFeedForward } from "./support/factories"
|
|
5
|
+
import * as T from "../../src/tensor/Tensor2D"
|
|
6
|
+
import { EMBEDDING_DIM, HIDDEN_DIM } from "../../src/config"
|
|
7
|
+
|
|
8
|
+
describe("FeedForward", () => {
|
|
9
|
+
test("forward shape matches input", () => {
|
|
10
|
+
const ff = makeFeedForward()
|
|
11
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
12
|
+
const output = runEffect(ff.forward(input))
|
|
13
|
+
expectShape(output, [3, EMBEDDING_DIM])
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
test("shape across sequence lengths 1..5", () => {
|
|
17
|
+
const ff = makeFeedForward()
|
|
18
|
+
for (let seqLen = 1; seqLen <= 4; seqLen++) {
|
|
19
|
+
const input = T.ones(seqLen, EMBEDDING_DIM)
|
|
20
|
+
const output = runEffect(ff.forward(input))
|
|
21
|
+
expectShape(output, [seqLen, EMBEDDING_DIM])
|
|
22
|
+
}
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
test("output contains finite values", () => {
|
|
26
|
+
const ff = makeFeedForward()
|
|
27
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
28
|
+
const output = runEffect(ff.forward(input))
|
|
29
|
+
expectFinite(output)
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
test("backward returns gradient with correct shape", () => {
|
|
33
|
+
const ff = makeFeedForward()
|
|
34
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
35
|
+
runEffect(ff.forward(input))
|
|
36
|
+
const grads = T.ones(3, EMBEDDING_DIM)
|
|
37
|
+
const gradInput = runEffect(ff.backward(grads, 0.01))
|
|
38
|
+
expectShape(gradInput, [3, EMBEDDING_DIM])
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
test("backward output differs from forward output", () => {
|
|
42
|
+
const ff = makeFeedForward()
|
|
43
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
44
|
+
const output = runEffect(ff.forward(input))
|
|
45
|
+
const grads = T.ones(3, EMBEDDING_DIM)
|
|
46
|
+
const gradInput = runEffect(ff.backward(grads, 0.01))
|
|
47
|
+
expectNotClose(output, gradInput)
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
test("backward updates w1/b1/w2/b2", () => {
|
|
51
|
+
const ff = makeFeedForward()
|
|
52
|
+
const w1Before = T.clone(ff.w1)
|
|
53
|
+
const b1Before = T.clone(ff.b1)
|
|
54
|
+
const w2Before = T.clone(ff.w2)
|
|
55
|
+
const b2Before = T.clone(ff.b2)
|
|
56
|
+
|
|
57
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
58
|
+
runEffect(ff.forward(input))
|
|
59
|
+
const grads = T.ones(3, EMBEDDING_DIM)
|
|
60
|
+
runEffect(ff.backward(grads, 0.01))
|
|
61
|
+
|
|
62
|
+
expectNotClose(ff.w1, w1Before)
|
|
63
|
+
expectNotClose(ff.b1, b1Before)
|
|
64
|
+
expectNotClose(ff.w2, w2Before)
|
|
65
|
+
expectNotClose(ff.b2, b2Before)
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
test("parametersCount", () => {
|
|
69
|
+
const ff = makeFeedForward()
|
|
70
|
+
const expected =
|
|
71
|
+
EMBEDDING_DIM * HIDDEN_DIM + HIDDEN_DIM + HIDDEN_DIM * EMBEDDING_DIM + EMBEDDING_DIM
|
|
72
|
+
expect(ff.parametersCount).toBe(expected)
|
|
73
|
+
})
|
|
74
|
+
})
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test"
|
|
2
|
+
import { initNormal } from "../../src/tensor/ops"
|
|
3
|
+
import { seeded } from "../../src/tensor/random"
|
|
4
|
+
|
|
5
|
+
describe("initNormal with seeded RNG", () => {
|
|
6
|
+
it("produces deterministic tensors with same seed", () => {
|
|
7
|
+
const rng1 = seeded(42)
|
|
8
|
+
const rng2 = seeded(42)
|
|
9
|
+
|
|
10
|
+
const t1 = initNormal(4, 4, 0, 1, rng1)
|
|
11
|
+
const t2 = initNormal(4, 4, 0, 1, rng2)
|
|
12
|
+
|
|
13
|
+
expect(Array.from(t1.data)).toEqual(Array.from(t2.data))
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
it("produces different tensors with different seeds", () => {
|
|
17
|
+
const rng1 = seeded(42)
|
|
18
|
+
const rng2 = seeded(43)
|
|
19
|
+
|
|
20
|
+
const t1 = initNormal(4, 4, 0, 1, rng1)
|
|
21
|
+
const t2 = initNormal(4, 4, 0, 1, rng2)
|
|
22
|
+
|
|
23
|
+
expect(Array.from(t1.data)).not.toEqual(Array.from(t2.data))
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
it("respects mean and std parameters", () => {
|
|
27
|
+
const rng = seeded(1337)
|
|
28
|
+
const mean = 5
|
|
29
|
+
const std = 0.1
|
|
30
|
+
const t = initNormal(100, 100, mean, std, rng)
|
|
31
|
+
|
|
32
|
+
let sum = 0
|
|
33
|
+
for (let i = 0; i < t.data.length; i++) {
|
|
34
|
+
sum += t.data[i]
|
|
35
|
+
}
|
|
36
|
+
const actualMean = sum / t.data.length
|
|
37
|
+
|
|
38
|
+
// Mean should be close to target (within ~3 std errors)
|
|
39
|
+
expect(Math.abs(actualMean - mean)).toBeLessThan(0.1)
|
|
40
|
+
})
|
|
41
|
+
})
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test"
|
|
2
|
+
import { runEffect } from "./support/runEffect"
|
|
3
|
+
import { expectShape, expectNotClose, expectFinite } from "./support/tensorMatchers"
|
|
4
|
+
import { makeLayerNorm } from "./support/factories"
|
|
5
|
+
import * as T from "../../src/tensor/Tensor2D"
|
|
6
|
+
import { EMBEDDING_DIM } from "../../src/config"
|
|
7
|
+
|
|
8
|
+
const rowMean = (t: T.Tensor2D, row: number): number => {
|
|
9
|
+
let sum = 0
|
|
10
|
+
for (let j = 0; j < t.cols; j++) sum += T.get(t, row, j)
|
|
11
|
+
return sum / t.cols
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const rowVariance = (t: T.Tensor2D, row: number): number => {
|
|
15
|
+
const mean = rowMean(t, row)
|
|
16
|
+
let sumSq = 0
|
|
17
|
+
for (let j = 0; j < t.cols; j++) {
|
|
18
|
+
const diff = T.get(t, row, j) - mean
|
|
19
|
+
sumSq += diff * diff
|
|
20
|
+
}
|
|
21
|
+
return sumSq / t.cols
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const makeVariedInput = (): T.Tensor2D => {
|
|
25
|
+
const input = T.zeros(3, EMBEDDING_DIM)
|
|
26
|
+
for (let i = 0; i < input.data.length; i++) {
|
|
27
|
+
input.data[i] = (i % 7) * 0.5 - 1.5
|
|
28
|
+
}
|
|
29
|
+
return input
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
describe("LayerNorm", () => {
|
|
33
|
+
test("forward shape preserved", () => {
|
|
34
|
+
const ln = makeLayerNorm()
|
|
35
|
+
const input = T.ones(3, EMBEDDING_DIM)
|
|
36
|
+
const output = runEffect(ln.forward(input))
|
|
37
|
+
expectShape(output, [3, EMBEDDING_DIM])
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
test("per-row mean approximately zero", () => {
|
|
41
|
+
const ln = makeLayerNorm()
|
|
42
|
+
const input = makeVariedInput()
|
|
43
|
+
const output = runEffect(ln.forward(input))
|
|
44
|
+
|
|
45
|
+
for (let row = 0; row < output.rows; row++) {
|
|
46
|
+
const mean = rowMean(output, row)
|
|
47
|
+
expect(Math.abs(mean)).toBeLessThan(1e-5)
|
|
48
|
+
}
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
test("per-row variance approximately one", () => {
|
|
52
|
+
const ln = makeLayerNorm()
|
|
53
|
+
const input = makeVariedInput()
|
|
54
|
+
const output = runEffect(ln.forward(input))
|
|
55
|
+
|
|
56
|
+
for (let row = 0; row < output.rows; row++) {
|
|
57
|
+
const variance = rowVariance(output, row)
|
|
58
|
+
expect(Math.abs(variance - 1)).toBeLessThan(1e-4)
|
|
59
|
+
}
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
test("output contains finite values", () => {
|
|
63
|
+
const ln = makeLayerNorm()
|
|
64
|
+
const input = makeVariedInput()
|
|
65
|
+
const output = runEffect(ln.forward(input))
|
|
66
|
+
expectFinite(output)
|
|
67
|
+
})
|
|
68
|
+
|
|
69
|
+
test("backward shape preserved", () => {
|
|
70
|
+
const ln = makeLayerNorm()
|
|
71
|
+
const input = makeVariedInput()
|
|
72
|
+
runEffect(ln.forward(input))
|
|
73
|
+
const dOut = T.ones(3, EMBEDDING_DIM)
|
|
74
|
+
const grad = runEffect(ln.backward(dOut, 0.01))
|
|
75
|
+
expectShape(grad, [3, EMBEDDING_DIM])
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
test("backward updates gamma and beta", () => {
|
|
79
|
+
const ln = makeLayerNorm()
|
|
80
|
+
const input = makeVariedInput()
|
|
81
|
+
const gammaBefore = T.clone(ln.gamma)
|
|
82
|
+
const betaBefore = T.clone(ln.beta)
|
|
83
|
+
|
|
84
|
+
runEffect(ln.forward(input))
|
|
85
|
+
const dOut = T.ones(3, EMBEDDING_DIM)
|
|
86
|
+
runEffect(ln.backward(dOut, 0.01))
|
|
87
|
+
|
|
88
|
+
expectNotClose(ln.gamma, gammaBefore)
|
|
89
|
+
expectNotClose(ln.beta, betaBefore)
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
test("parametersCount", () => {
|
|
93
|
+
const ln = makeLayerNorm()
|
|
94
|
+
expect(ln.parametersCount).toBe(2 * EMBEDDING_DIM)
|
|
95
|
+
})
|
|
96
|
+
})
|