effect-gpt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +50 -0
  2. package/data/chat_training_data.json +55 -0
  3. package/data/pretraining_data.json +27 -0
  4. package/package.json +25 -0
  5. package/src/cli/errors.ts +51 -0
  6. package/src/cli/main.ts +163 -0
  7. package/src/config.ts +3 -0
  8. package/src/data/Dataset.ts +168 -0
  9. package/src/errors.ts +73 -0
  10. package/src/index.ts +88 -0
  11. package/src/model/Embeddings.ts +108 -0
  12. package/src/model/FeedForward.ts +121 -0
  13. package/src/model/LLM.ts +124 -0
  14. package/src/model/LayerNorm.ts +138 -0
  15. package/src/model/ModelLayer.ts +10 -0
  16. package/src/model/OutputProjection.ts +76 -0
  17. package/src/model/SelfAttention.ts +169 -0
  18. package/src/model/TransformerBlock.ts +53 -0
  19. package/src/services/Logger.ts +124 -0
  20. package/src/services/Metrics.ts +260 -0
  21. package/src/services/Random.ts +98 -0
  22. package/src/services/SeedLayer.ts +39 -0
  23. package/src/services/index.ts +32 -0
  24. package/src/tensor/Tensor2D.ts +42 -0
  25. package/src/tensor/ops.ts +371 -0
  26. package/src/tensor/random.ts +32 -0
  27. package/src/tokenize/split.ts +27 -0
  28. package/src/tokenize/tokenize.ts +28 -0
  29. package/src/training/Adam.ts +61 -0
  30. package/src/training/clip.ts +16 -0
  31. package/src/training/loss.ts +35 -0
  32. package/src/training/train.ts +203 -0
  33. package/src/vocab/Vocab.ts +79 -0
  34. package/tests/fixtures/csv_bad.csv +2 -0
  35. package/tests/fixtures/csv_good.csv +3 -0
  36. package/tests/ts/cli_error_format.test.ts +26 -0
  37. package/tests/ts/dataset.test.ts +35 -0
  38. package/tests/ts/embeddings.test.ts +81 -0
  39. package/tests/ts/errors.test.ts +36 -0
  40. package/tests/ts/feed_forward.test.ts +74 -0
  41. package/tests/ts/initNormal.test.ts +41 -0
  42. package/tests/ts/layer_norm.test.ts +96 -0
  43. package/tests/ts/llm_parameters.test.ts +96 -0
  44. package/tests/ts/llm_predict.test.ts +98 -0
  45. package/tests/ts/llm_tokenize.test.ts +69 -0
  46. package/tests/ts/output_projection.test.ts +78 -0
  47. package/tests/ts/random.test.ts +44 -0
  48. package/tests/ts/self_attention.test.ts +63 -0
  49. package/tests/ts/support/factories.ts +126 -0
  50. package/tests/ts/support/runEffect.ts +29 -0
  51. package/tests/ts/support/seed.ts +12 -0
  52. package/tests/ts/support/stubs.ts +58 -0
  53. package/tests/ts/support/tensorMatchers.ts +96 -0
  54. package/tests/ts/support.test.ts +165 -0
  55. package/tests/ts/train_loop.test.ts +229 -0
  56. package/tests/ts/transformer_block.test.ts +72 -0
  57. package/tsconfig.json +20 -0
  58. package/tsconfig.test.json +8 -0
@@ -0,0 +1,203 @@
1
+ import * as Effect from "effect/Effect"
2
+ import * as Stream from "effect/Stream"
3
+ import * as Chunk from "effect/Chunk"
4
+ import * as Context from "effect/Context"
5
+ import * as Layer from "effect/Layer"
6
+ import * as Ref from "effect/Ref"
7
+ import * as Option from "effect/Option"
8
+ import type { ShapeError } from "../tensor/ops"
9
+ import * as Ops from "../tensor/ops"
10
+ import * as T from "../tensor/Tensor2D"
11
+ import { tokenize } from "../tokenize/tokenize"
12
+ import type { LLM } from "../model/LLM"
13
+ import { softmaxRows, crossEntropyLoss, dLogits } from "./loss"
14
+ import { clipGlobalL2 } from "./clip"
15
+ import type { LoggerServiceId } from "../services/Logger"
16
+ import { info } from "../services/Logger"
17
+ import type { MetricsServiceId } from "../services/Metrics"
18
+ import { counter, gauge, timed } from "../services/Metrics"
19
+ import { TrainingError } from "../errors"
20
+ import type { TrainingError as TrainingErrorType } from "../errors"
21
+
22
+ export interface TrainingConfig {
23
+ readonly epochs: number
24
+ readonly learningRate: number
25
+ readonly clipNorm?: number
26
+ readonly preprocessConcurrency?: number | "unbounded"
27
+ readonly preprocessBatchSize?: number
28
+ readonly trainConcurrency?: number
29
+ }
30
+
31
+ interface TrainingConfigId {
32
+ readonly TrainingConfig: unique symbol
33
+ }
34
+ interface LLMServiceId {
35
+ readonly LLMService: unique symbol
36
+ }
37
+
38
+ export interface PreprocessSettings {
39
+ readonly concurrency: number | "unbounded"
40
+ readonly batchSize: number
41
+ }
42
+
43
+ interface PreprocessSettingsId {
44
+ readonly PreprocessSettings: unique symbol
45
+ }
46
+
47
+ export const TrainingConfig = Context.GenericTag<TrainingConfigId, TrainingConfig>("TrainingConfig")
48
+ export const LLMService = Context.GenericTag<LLMServiceId, LLM>("LLMService")
49
+ export const PreprocessSettings = Context.GenericTag<PreprocessSettingsId, PreprocessSettings>(
50
+ "PreprocessSettings"
51
+ )
52
+
53
+ export const makeLLMLayer = (llm: LLM) => Layer.succeed(LLMService, llm)
54
+ export const makeTrainingConfigLayer = (config: TrainingConfig) =>
55
+ Layer.succeed(TrainingConfig, config)
56
+ export const makePreprocessSettingsLayer = (settings: PreprocessSettings) =>
57
+ Layer.succeed(PreprocessSettings, settings)
58
+
59
+ type TrainEnv =
60
+ | TrainingConfigId
61
+ | LLMServiceId
62
+ | LoggerServiceId
63
+ | MetricsServiceId
64
+ | PreprocessSettingsId
65
+
66
+ const mapShapeError = <A, R>(effect: Effect.Effect<A, ShapeError, R>) =>
67
+ effect.pipe(Effect.mapError(TrainingError.shape))
68
+
69
+ const mapShapeUnknown = (error: unknown): TrainingErrorType =>
70
+ error instanceof Ops.ShapeError ? TrainingError.shape(error) : TrainingError.fromUnknown(error)
71
+
72
+ const wrapThrowing = <A>(
73
+ thunk: () => A,
74
+ mapError: (error: unknown) => TrainingErrorType = TrainingError.fromUnknown
75
+ ) =>
76
+ Effect.try({
77
+ try: thunk,
78
+ catch: (error) => mapError(error)
79
+ })
80
+
81
+ const clampConcurrency = (value: number | undefined, fallback: number): number =>
82
+ value === undefined ? fallback : Math.max(1, value)
83
+
84
+ const trainWithStreamFactory = <E, R>(
85
+ makeStream: () => Stream.Stream<string, E, R>
86
+ ): Effect.Effect<void, TrainingErrorType, R | TrainEnv> =>
87
+ Effect.gen(function* () {
88
+ const llm = yield* LLMService
89
+ const config = yield* TrainingConfig
90
+ const preprocessSettings = yield* Effect.gen(function* () {
91
+ const env = (yield* Effect.context<R | TrainEnv>()) as Context.Context<R | TrainEnv>
92
+ const maybeSettings = Context.getOption(env, PreprocessSettings)
93
+ if (maybeSettings._tag === "Some") {
94
+ return maybeSettings.value
95
+ }
96
+ return {
97
+ concurrency: config.preprocessConcurrency ?? "unbounded",
98
+ batchSize: config.preprocessBatchSize ?? 1
99
+ } satisfies PreprocessSettings
100
+ })
101
+
102
+ const endTokenId = llm.vocab.encode("</s>")
103
+ if (endTokenId._tag === "None") {
104
+ return yield* Effect.fail(TrainingError.config("End token </s> not found in vocabulary"))
105
+ }
106
+
107
+ const clipNorm = config.clipNorm ?? 5.0
108
+ const concurrency = preprocessSettings.concurrency
109
+ const batchSize = Math.max(1, preprocessSettings.batchSize)
110
+ const trainConcurrency = clampConcurrency(config.trainConcurrency, 4)
111
+
112
+ const epochCounter = yield* counter("epochs_completed")
113
+ const lossGauge = yield* gauge("epoch_loss")
114
+ const examplesCounter = yield* counter("examples_processed")
115
+
116
+ for (let epoch = 0; epoch < config.epochs; epoch++) {
117
+ const epochResult = yield* timed(`epoch_${epoch}`, Effect.gen(function* () {
118
+ const totalLossRef = yield* Ref.make(0)
119
+ const totalExamplesRef = yield* Ref.make(0)
120
+
121
+ const preprocess = (text: string) =>
122
+ Effect.sync(() => {
123
+ const tokens = [...tokenize(text, llm.vocab)]
124
+ if (tokens.length < 2) {
125
+ return Option.none<{ inputIds: number[]; targetIds: number[] }>()
126
+ }
127
+
128
+ return Option.some({
129
+ inputIds: tokens.slice(0, tokens.length - 1),
130
+ targetIds: tokens.slice(1)
131
+ })
132
+ })
133
+
134
+ const preprocessed = makeStream()
135
+ .pipe(
136
+ Stream.mapError(TrainingError.fromUnknown),
137
+ Stream.mapChunks(Chunk.chunksOf(batchSize)),
138
+ Stream.flattenChunks,
139
+ Stream.mapEffect(preprocess, { concurrency }),
140
+ Stream.filterMap((value) => value)
141
+ )
142
+
143
+ const trainExample = ({ inputIds, targetIds }: { inputIds: number[]; targetIds: number[] }) =>
144
+ Effect.gen(function* () {
145
+ let input = T.fromArray(1, inputIds.length, inputIds)
146
+ for (const layer of llm.network) {
147
+ input = yield* mapShapeError(layer.forward(input))
148
+ }
149
+
150
+ const logits = input
151
+ const probs = yield* wrapThrowing(() => softmaxRows(logits), mapShapeUnknown)
152
+ const loss = yield* wrapThrowing(() => crossEntropyLoss(probs, targetIds), mapShapeUnknown)
153
+ yield* Ref.update(totalLossRef, (current) => current + loss)
154
+ yield* Ref.update(totalExamplesRef, (current) => current + 1)
155
+
156
+ let grads = yield* wrapThrowing(() => dLogits(probs, targetIds), mapShapeUnknown)
157
+ clipGlobalL2(grads, clipNorm)
158
+
159
+ for (let i = llm.network.length - 1; i >= 0; i--) {
160
+ grads = yield* mapShapeError(llm.network[i]!.backward(grads, config.learningRate))
161
+ }
162
+
163
+ const tokens = Ops.argmaxRows(probs)
164
+ const nextToken = tokens[tokens.length - 1]
165
+ if (nextToken === endTokenId.value) {
166
+ return
167
+ }
168
+ })
169
+
170
+ yield* Effect.scoped(
171
+ Stream.runDrain(
172
+ Stream.mapEffect(preprocessed, trainExample, { concurrency: trainConcurrency })
173
+ )
174
+ )
175
+
176
+ const totalLoss = yield* Ref.get(totalLossRef)
177
+ const totalExamples = yield* Ref.get(totalExamplesRef)
178
+ yield* examplesCounter.inc(totalExamples)
179
+ return { totalLoss, totalExamples }
180
+ }))
181
+
182
+ const { totalLoss, totalExamples } = epochResult.value
183
+ const avgLoss = totalExamples > 0 ? totalLoss / totalExamples : 0
184
+
185
+ yield* lossGauge.set(avgLoss)
186
+ yield* epochCounter.inc()
187
+ yield* info(`Epoch ${epoch}: Loss = ${avgLoss.toFixed(4)}`, {
188
+ epoch,
189
+ loss: avgLoss,
190
+ examples: totalExamples,
191
+ durationMs: epochResult.durationMs
192
+ })
193
+ }
194
+ })
195
+
196
+ export const train = (
197
+ examples: ReadonlyArray<string>
198
+ ): Effect.Effect<void, TrainingErrorType, TrainEnv> =>
199
+ trainWithStreamFactory(() => Stream.fromIterable(examples))
200
+
201
+ export const trainStream = <E, R>(
202
+ makeStream: () => Stream.Stream<string, E, R>
203
+ ): Effect.Effect<void, TrainingErrorType, R | TrainEnv> => trainWithStreamFactory(makeStream)
@@ -0,0 +1,79 @@
1
+ import * as HashMap from "effect/HashMap"
2
+ import * as HashSet from "effect/HashSet"
3
+ import * as Option from "effect/Option"
4
+ import * as Stream from "effect/Stream"
5
+ import * as Effect from "effect/Effect"
6
+ import { splitWordToTokens } from "../tokenize/split"
7
+
8
+ export class Vocab {
9
+ readonly encodeMap: HashMap.HashMap<string, number>
10
+ readonly decodeMap: HashMap.HashMap<number, string>
11
+ readonly words: ReadonlyArray<string>
12
+
13
+ private constructor(
14
+ encodeMap: HashMap.HashMap<string, number>,
15
+ decodeMap: HashMap.HashMap<number, string>,
16
+ words: ReadonlyArray<string>
17
+ ) {
18
+ this.encodeMap = encodeMap
19
+ this.decodeMap = decodeMap
20
+ this.words = words
21
+ }
22
+
23
+ static make(words: ReadonlyArray<string>): Vocab {
24
+ let encodeMap = HashMap.empty<string, number>()
25
+ let decodeMap = HashMap.empty<number, string>()
26
+
27
+ for (let i = 0; i < words.length; i += 1) {
28
+ const word = words[i]
29
+ encodeMap = HashMap.set(encodeMap, word, i)
30
+ decodeMap = HashMap.set(decodeMap, i, word)
31
+ }
32
+
33
+ return new Vocab(encodeMap, decodeMap, words)
34
+ }
35
+
36
+ static defaultWords(): ReadonlyArray<string> {
37
+ return ["hello", "world", "this", "is", "rust", "</s>"]
38
+ }
39
+
40
+ encode(word: string): Option.Option<number> {
41
+ return HashMap.get(this.encodeMap, word)
42
+ }
43
+
44
+ decode(id: number): Option.Option<string> {
45
+ return HashMap.get(this.decodeMap, id)
46
+ }
47
+
48
+ private static addTokensToSet(
49
+ set: HashSet.HashSet<string>,
50
+ text: string
51
+ ): HashSet.HashSet<string> {
52
+ const words = text.split(/\s+/).filter((w) => w.length > 0)
53
+ let next = set
54
+
55
+ for (const word of words) {
56
+ const parts = splitWordToTokens(word)
57
+ for (const part of parts) {
58
+ next = HashSet.add(next, part)
59
+ }
60
+ }
61
+
62
+ return next
63
+ }
64
+
65
+ static processTextForVocab(texts: ReadonlyArray<string>): HashSet.HashSet<string> {
66
+ let vocabSet = HashSet.add(HashSet.empty<string>(), "</s>")
67
+ for (const text of texts) {
68
+ vocabSet = Vocab.addTokensToSet(vocabSet, text)
69
+ }
70
+ return vocabSet
71
+ }
72
+
73
+ static processStreamForVocab<E, R>(
74
+ stream: Stream.Stream<string, E, R>
75
+ ): Effect.Effect<HashSet.HashSet<string>, E, R> {
76
+ const initial = HashSet.add(HashSet.empty<string>(), "</s>")
77
+ return Stream.runFold(stream, initial, (set, text) => Vocab.addTokensToSet(set, text))
78
+ }
79
+ }
@@ -0,0 +1,2 @@
1
+ hello
2
+ "unclosed
@@ -0,0 +1,3 @@
1
+ hello,world
2
+ "quoted,field",foo
3
+ "say ""hi""",bar
@@ -0,0 +1,26 @@
1
+ import { describe, expect, test } from "bun:test"
2
+ import { formatTrainingError } from "../../src/cli/errors"
3
+ import { TrainingError } from "../../src/errors"
4
+ import { ShapeError } from "../../src/tensor/ops"
5
+
6
+ describe("formatTrainingError", () => {
7
+ test("formats dataset error with path", () => {
8
+ const err = TrainingError.dataset({ _tag: "DatasetLoadError", path: "/tmp/data", error: new Error("io") } as any)
9
+ const message = formatTrainingError(err)
10
+ expect(message).toContain("Dataset error")
11
+ expect(message).toContain("/tmp/data")
12
+ })
13
+
14
+ test("formats shape error cause", () => {
15
+ const err = TrainingError.shape(new ShapeError("bad shape"))
16
+ const message = formatTrainingError(err)
17
+ expect(message).toContain("Shape error")
18
+ expect(message).toContain("bad shape")
19
+ })
20
+
21
+ test("formats plain Error as unexpected error", () => {
22
+ const message = formatTrainingError(new Error("boom"))
23
+ expect(message).toContain("Unexpected error")
24
+ expect(message).toContain("boom")
25
+ })
26
+ })
@@ -0,0 +1,35 @@
1
+ import { describe, test, expect } from "bun:test"
2
+ import * as Effect from "effect/Effect"
3
+ import * as Exit from "effect/Exit"
4
+ import { BunFileSystem } from "@effect/platform-bun"
5
+ import { Dataset, DatasetParseError } from "../../src/data/Dataset.js"
6
+
7
+ describe("Dataset CSV", () => {
8
+ const good = "tests/fixtures/csv_good.csv"
9
+ const bad = "tests/fixtures/csv_bad.csv"
10
+
11
+ test("collect reads CSV and joins fields", async () => {
12
+ const streams = Dataset.load({ pretrainingPath: good, chatPath: good, format: "csv" })
13
+ const program = Dataset.collect(streams).pipe(Effect.provide(BunFileSystem.layer))
14
+ const result = await Effect.runPromise(program)
15
+
16
+ expect(result.pretrainingData).toEqual(["hello,world", "quoted,field,foo", 'say "hi",bar'])
17
+ expect(result.chatTrainingData).toEqual(["hello,world", "quoted,field,foo", 'say "hi",bar'])
18
+ })
19
+
20
+ test("malformed CSV surfaces DatasetParseError", async () => {
21
+ const streams = Dataset.load({ pretrainingPath: bad, chatPath: bad, format: "csv" })
22
+ const program = Dataset.collect(streams).pipe(Effect.provide(BunFileSystem.layer))
23
+ const exit = await Effect.runPromiseExit(program)
24
+
25
+ expect(exit._tag).toBe("Failure")
26
+ if (exit._tag === "Failure") {
27
+ const cause = exit.cause
28
+ if (cause._tag === "Fail") {
29
+ expect(cause.error).toBeInstanceOf(DatasetParseError)
30
+ } else {
31
+ throw new Error(`Unexpected failure cause: ${cause._tag}`)
32
+ }
33
+ }
34
+ })
35
+ })
@@ -0,0 +1,81 @@
1
+ import { describe, test, expect } from "bun:test"
2
+ import { runEffect } from "./support/runEffect"
3
+ import { expectShape, expectNotClose, expectFinite } from "./support/tensorMatchers"
4
+ import { makeEmbeddings } from "./support/factories"
5
+ import * as T from "../../src/tensor/Tensor2D"
6
+ import { EMBEDDING_DIM, MAX_SEQ_LEN } from "../../src/config"
7
+
8
+ describe("Embeddings", () => {
9
+ test("embed single token → [1, EMBEDDING_DIM]", () => {
10
+ const embeddings = makeEmbeddings(10)
11
+ const input = T.fromArray(1, 1, [0])
12
+ const output = runEffect(embeddings.forward(input))
13
+ expectShape(output, [1, EMBEDDING_DIM])
14
+ expectFinite(output)
15
+ })
16
+
17
+ test("embed multiple tokens → [seqLen, EMBEDDING_DIM]", () => {
18
+ const embeddings = makeEmbeddings(10)
19
+ const input = T.fromArray(1, 3, [0, 1, 2])
20
+ const output = runEffect(embeddings.forward(input))
21
+ expectShape(output, [3, EMBEDDING_DIM])
22
+ expectFinite(output)
23
+ })
24
+
25
+ test("positional embeddings differ across positions", () => {
26
+ const embeddings = makeEmbeddings(10)
27
+ const input = T.fromArray(1, 3, [0, 0, 0])
28
+ const output = runEffect(embeddings.forward(input))
29
+ expectShape(output, [3, EMBEDDING_DIM])
30
+
31
+ const row0Val = T.get(output, 0, 0)
32
+ const row1Val = T.get(output, 1, 0)
33
+ const row2Val = T.get(output, 2, 0)
34
+
35
+ expect(row0Val).not.toBe(row1Val)
36
+ expect(row1Val).not.toBe(row2Val)
37
+ expect(row0Val).not.toBe(row2Val)
38
+ })
39
+
40
+ test("shape across sequence lengths 1..5", () => {
41
+ const embeddings = makeEmbeddings(10)
42
+ for (let seqLen = 1; seqLen <= 4; seqLen++) {
43
+ const tokens = Array.from({ length: seqLen }, (_, i) => i % 10)
44
+ const input = T.fromArray(1, seqLen, tokens)
45
+ const output = runEffect(embeddings.forward(input))
46
+ expectShape(output, [seqLen, EMBEDDING_DIM])
47
+ expectFinite(output)
48
+ }
49
+ })
50
+
51
+ test("max sequence length boundary", () => {
52
+ const embeddings = makeEmbeddings(10)
53
+ const tokens = Array.from({ length: MAX_SEQ_LEN }, (_, i) => i % 10)
54
+ const input = T.fromArray(1, MAX_SEQ_LEN, tokens)
55
+ const output = runEffect(embeddings.forward(input))
56
+ expectShape(output, [MAX_SEQ_LEN, EMBEDDING_DIM])
57
+ expectFinite(output)
58
+ })
59
+
60
+ test("backward updates token & positional embeddings", () => {
61
+ const embeddings = makeEmbeddings(10)
62
+ const input = T.fromArray(1, 3, [0, 1, 2])
63
+
64
+ const tokenBefore = T.clone(embeddings.tokenEmbeddings)
65
+ const positionalBefore = T.clone(embeddings.positionalEmbeddings)
66
+
67
+ runEffect(embeddings.forward(input))
68
+ const grad = T.ones(3, EMBEDDING_DIM)
69
+ runEffect(embeddings.backward(grad, 0.01))
70
+
71
+ expectNotClose(embeddings.tokenEmbeddings, tokenBefore)
72
+ expectNotClose(embeddings.positionalEmbeddings, positionalBefore)
73
+ })
74
+
75
+ test("parametersCount", () => {
76
+ const vocabSize = 10
77
+ const embeddings = makeEmbeddings(vocabSize)
78
+ const expectedCount = vocabSize * EMBEDDING_DIM + MAX_SEQ_LEN * EMBEDDING_DIM
79
+ expect(embeddings.parametersCount).toBe(expectedCount)
80
+ })
81
+ })
@@ -0,0 +1,36 @@
1
+ import { describe, expect, test } from "bun:test"
2
+ import { TrainingError } from "../../src/errors"
3
+ import { ShapeError } from "../../src/tensor/ops"
4
+ import { DatasetLoadError, DatasetParseError } from "../../src/data/Dataset"
5
+
6
+ describe("TrainingError.fromUnknown", () => {
7
+ test("maps ShapeError to TrainingShapeError", () => {
8
+ const err = TrainingError.fromUnknown(new ShapeError("bad shape"))
9
+ expect(err._tag).toBe("TrainingShapeError")
10
+ expect((err as any).cause.message).toBe("bad shape")
11
+ })
12
+
13
+ test("maps DatasetLoadError to TrainingDatasetError", () => {
14
+ const loadErr = new DatasetLoadError({ path: "x", error: new Error("io") })
15
+ const err = TrainingError.fromUnknown(loadErr)
16
+ expect(err._tag).toBe("TrainingDatasetError")
17
+ expect((err as any).cause).toBe(loadErr)
18
+ })
19
+
20
+ test("maps DatasetParseError to TrainingDatasetError", () => {
21
+ const parseErr = new DatasetParseError({ path: "x", error: new Error("parse") })
22
+ const err = TrainingError.fromUnknown(parseErr)
23
+ expect(err._tag).toBe("TrainingDatasetError")
24
+ })
25
+
26
+ test("passes through existing TrainingError", () => {
27
+ const existing = TrainingError.optimizer("boom")
28
+ const err = TrainingError.fromUnknown(existing)
29
+ expect(err).toBe(existing)
30
+ })
31
+
32
+ test("wraps unknown errors in TrainingUnknownError", () => {
33
+ const err = TrainingError.fromUnknown(new Error("boom"))
34
+ expect(err._tag).toBe("TrainingUnknownError")
35
+ })
36
+ })
@@ -0,0 +1,74 @@
1
+ import { describe, test, expect } from "bun:test"
2
+ import { runEffect } from "./support/runEffect"
3
+ import { expectShape, expectNotClose, expectFinite } from "./support/tensorMatchers"
4
+ import { makeFeedForward } from "./support/factories"
5
+ import * as T from "../../src/tensor/Tensor2D"
6
+ import { EMBEDDING_DIM, HIDDEN_DIM } from "../../src/config"
7
+
8
+ describe("FeedForward", () => {
9
+ test("forward shape matches input", () => {
10
+ const ff = makeFeedForward()
11
+ const input = T.ones(3, EMBEDDING_DIM)
12
+ const output = runEffect(ff.forward(input))
13
+ expectShape(output, [3, EMBEDDING_DIM])
14
+ })
15
+
16
+ test("shape across sequence lengths 1..5", () => {
17
+ const ff = makeFeedForward()
18
+ for (let seqLen = 1; seqLen <= 4; seqLen++) {
19
+ const input = T.ones(seqLen, EMBEDDING_DIM)
20
+ const output = runEffect(ff.forward(input))
21
+ expectShape(output, [seqLen, EMBEDDING_DIM])
22
+ }
23
+ })
24
+
25
+ test("output contains finite values", () => {
26
+ const ff = makeFeedForward()
27
+ const input = T.ones(3, EMBEDDING_DIM)
28
+ const output = runEffect(ff.forward(input))
29
+ expectFinite(output)
30
+ })
31
+
32
+ test("backward returns gradient with correct shape", () => {
33
+ const ff = makeFeedForward()
34
+ const input = T.ones(3, EMBEDDING_DIM)
35
+ runEffect(ff.forward(input))
36
+ const grads = T.ones(3, EMBEDDING_DIM)
37
+ const gradInput = runEffect(ff.backward(grads, 0.01))
38
+ expectShape(gradInput, [3, EMBEDDING_DIM])
39
+ })
40
+
41
+ test("backward output differs from forward output", () => {
42
+ const ff = makeFeedForward()
43
+ const input = T.ones(3, EMBEDDING_DIM)
44
+ const output = runEffect(ff.forward(input))
45
+ const grads = T.ones(3, EMBEDDING_DIM)
46
+ const gradInput = runEffect(ff.backward(grads, 0.01))
47
+ expectNotClose(output, gradInput)
48
+ })
49
+
50
+ test("backward updates w1/b1/w2/b2", () => {
51
+ const ff = makeFeedForward()
52
+ const w1Before = T.clone(ff.w1)
53
+ const b1Before = T.clone(ff.b1)
54
+ const w2Before = T.clone(ff.w2)
55
+ const b2Before = T.clone(ff.b2)
56
+
57
+ const input = T.ones(3, EMBEDDING_DIM)
58
+ runEffect(ff.forward(input))
59
+ const grads = T.ones(3, EMBEDDING_DIM)
60
+ runEffect(ff.backward(grads, 0.01))
61
+
62
+ expectNotClose(ff.w1, w1Before)
63
+ expectNotClose(ff.b1, b1Before)
64
+ expectNotClose(ff.w2, w2Before)
65
+ expectNotClose(ff.b2, b2Before)
66
+ })
67
+
68
+ test("parametersCount", () => {
69
+ const ff = makeFeedForward()
70
+ const expected =
71
+ EMBEDDING_DIM * HIDDEN_DIM + HIDDEN_DIM + HIDDEN_DIM * EMBEDDING_DIM + EMBEDDING_DIM
72
+ expect(ff.parametersCount).toBe(expected)
73
+ })
74
+ })
@@ -0,0 +1,41 @@
1
+ import { describe, expect, it } from "bun:test"
2
+ import { initNormal } from "../../src/tensor/ops"
3
+ import { seeded } from "../../src/tensor/random"
4
+
5
+ describe("initNormal with seeded RNG", () => {
6
+ it("produces deterministic tensors with same seed", () => {
7
+ const rng1 = seeded(42)
8
+ const rng2 = seeded(42)
9
+
10
+ const t1 = initNormal(4, 4, 0, 1, rng1)
11
+ const t2 = initNormal(4, 4, 0, 1, rng2)
12
+
13
+ expect(Array.from(t1.data)).toEqual(Array.from(t2.data))
14
+ })
15
+
16
+ it("produces different tensors with different seeds", () => {
17
+ const rng1 = seeded(42)
18
+ const rng2 = seeded(43)
19
+
20
+ const t1 = initNormal(4, 4, 0, 1, rng1)
21
+ const t2 = initNormal(4, 4, 0, 1, rng2)
22
+
23
+ expect(Array.from(t1.data)).not.toEqual(Array.from(t2.data))
24
+ })
25
+
26
+ it("respects mean and std parameters", () => {
27
+ const rng = seeded(1337)
28
+ const mean = 5
29
+ const std = 0.1
30
+ const t = initNormal(100, 100, mean, std, rng)
31
+
32
+ let sum = 0
33
+ for (let i = 0; i < t.data.length; i++) {
34
+ sum += t.data[i]
35
+ }
36
+ const actualMean = sum / t.data.length
37
+
38
+ // Mean should be close to target (within ~3 std errors)
39
+ expect(Math.abs(actualMean - mean)).toBeLessThan(0.1)
40
+ })
41
+ })
@@ -0,0 +1,96 @@
1
+ import { describe, test, expect } from "bun:test"
2
+ import { runEffect } from "./support/runEffect"
3
+ import { expectShape, expectNotClose, expectFinite } from "./support/tensorMatchers"
4
+ import { makeLayerNorm } from "./support/factories"
5
+ import * as T from "../../src/tensor/Tensor2D"
6
+ import { EMBEDDING_DIM } from "../../src/config"
7
+
8
+ const rowMean = (t: T.Tensor2D, row: number): number => {
9
+ let sum = 0
10
+ for (let j = 0; j < t.cols; j++) sum += T.get(t, row, j)
11
+ return sum / t.cols
12
+ }
13
+
14
+ const rowVariance = (t: T.Tensor2D, row: number): number => {
15
+ const mean = rowMean(t, row)
16
+ let sumSq = 0
17
+ for (let j = 0; j < t.cols; j++) {
18
+ const diff = T.get(t, row, j) - mean
19
+ sumSq += diff * diff
20
+ }
21
+ return sumSq / t.cols
22
+ }
23
+
24
+ const makeVariedInput = (): T.Tensor2D => {
25
+ const input = T.zeros(3, EMBEDDING_DIM)
26
+ for (let i = 0; i < input.data.length; i++) {
27
+ input.data[i] = (i % 7) * 0.5 - 1.5
28
+ }
29
+ return input
30
+ }
31
+
32
+ describe("LayerNorm", () => {
33
+ test("forward shape preserved", () => {
34
+ const ln = makeLayerNorm()
35
+ const input = T.ones(3, EMBEDDING_DIM)
36
+ const output = runEffect(ln.forward(input))
37
+ expectShape(output, [3, EMBEDDING_DIM])
38
+ })
39
+
40
+ test("per-row mean approximately zero", () => {
41
+ const ln = makeLayerNorm()
42
+ const input = makeVariedInput()
43
+ const output = runEffect(ln.forward(input))
44
+
45
+ for (let row = 0; row < output.rows; row++) {
46
+ const mean = rowMean(output, row)
47
+ expect(Math.abs(mean)).toBeLessThan(1e-5)
48
+ }
49
+ })
50
+
51
+ test("per-row variance approximately one", () => {
52
+ const ln = makeLayerNorm()
53
+ const input = makeVariedInput()
54
+ const output = runEffect(ln.forward(input))
55
+
56
+ for (let row = 0; row < output.rows; row++) {
57
+ const variance = rowVariance(output, row)
58
+ expect(Math.abs(variance - 1)).toBeLessThan(1e-4)
59
+ }
60
+ })
61
+
62
+ test("output contains finite values", () => {
63
+ const ln = makeLayerNorm()
64
+ const input = makeVariedInput()
65
+ const output = runEffect(ln.forward(input))
66
+ expectFinite(output)
67
+ })
68
+
69
+ test("backward shape preserved", () => {
70
+ const ln = makeLayerNorm()
71
+ const input = makeVariedInput()
72
+ runEffect(ln.forward(input))
73
+ const dOut = T.ones(3, EMBEDDING_DIM)
74
+ const grad = runEffect(ln.backward(dOut, 0.01))
75
+ expectShape(grad, [3, EMBEDDING_DIM])
76
+ })
77
+
78
+ test("backward updates gamma and beta", () => {
79
+ const ln = makeLayerNorm()
80
+ const input = makeVariedInput()
81
+ const gammaBefore = T.clone(ln.gamma)
82
+ const betaBefore = T.clone(ln.beta)
83
+
84
+ runEffect(ln.forward(input))
85
+ const dOut = T.ones(3, EMBEDDING_DIM)
86
+ runEffect(ln.backward(dOut, 0.01))
87
+
88
+ expectNotClose(ln.gamma, gammaBefore)
89
+ expectNotClose(ln.beta, betaBefore)
90
+ })
91
+
92
+ test("parametersCount", () => {
93
+ const ln = makeLayerNorm()
94
+ expect(ln.parametersCount).toBe(2 * EMBEDDING_DIM)
95
+ })
96
+ })