effect-gpt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +50 -0
  2. package/data/chat_training_data.json +55 -0
  3. package/data/pretraining_data.json +27 -0
  4. package/package.json +25 -0
  5. package/src/cli/errors.ts +51 -0
  6. package/src/cli/main.ts +163 -0
  7. package/src/config.ts +3 -0
  8. package/src/data/Dataset.ts +168 -0
  9. package/src/errors.ts +73 -0
  10. package/src/index.ts +88 -0
  11. package/src/model/Embeddings.ts +108 -0
  12. package/src/model/FeedForward.ts +121 -0
  13. package/src/model/LLM.ts +124 -0
  14. package/src/model/LayerNorm.ts +138 -0
  15. package/src/model/ModelLayer.ts +10 -0
  16. package/src/model/OutputProjection.ts +76 -0
  17. package/src/model/SelfAttention.ts +169 -0
  18. package/src/model/TransformerBlock.ts +53 -0
  19. package/src/services/Logger.ts +124 -0
  20. package/src/services/Metrics.ts +260 -0
  21. package/src/services/Random.ts +98 -0
  22. package/src/services/SeedLayer.ts +39 -0
  23. package/src/services/index.ts +32 -0
  24. package/src/tensor/Tensor2D.ts +42 -0
  25. package/src/tensor/ops.ts +371 -0
  26. package/src/tensor/random.ts +32 -0
  27. package/src/tokenize/split.ts +27 -0
  28. package/src/tokenize/tokenize.ts +28 -0
  29. package/src/training/Adam.ts +61 -0
  30. package/src/training/clip.ts +16 -0
  31. package/src/training/loss.ts +35 -0
  32. package/src/training/train.ts +203 -0
  33. package/src/vocab/Vocab.ts +79 -0
  34. package/tests/fixtures/csv_bad.csv +2 -0
  35. package/tests/fixtures/csv_good.csv +3 -0
  36. package/tests/ts/cli_error_format.test.ts +26 -0
  37. package/tests/ts/dataset.test.ts +35 -0
  38. package/tests/ts/embeddings.test.ts +81 -0
  39. package/tests/ts/errors.test.ts +36 -0
  40. package/tests/ts/feed_forward.test.ts +74 -0
  41. package/tests/ts/initNormal.test.ts +41 -0
  42. package/tests/ts/layer_norm.test.ts +96 -0
  43. package/tests/ts/llm_parameters.test.ts +96 -0
  44. package/tests/ts/llm_predict.test.ts +98 -0
  45. package/tests/ts/llm_tokenize.test.ts +69 -0
  46. package/tests/ts/output_projection.test.ts +78 -0
  47. package/tests/ts/random.test.ts +44 -0
  48. package/tests/ts/self_attention.test.ts +63 -0
  49. package/tests/ts/support/factories.ts +126 -0
  50. package/tests/ts/support/runEffect.ts +29 -0
  51. package/tests/ts/support/seed.ts +12 -0
  52. package/tests/ts/support/stubs.ts +58 -0
  53. package/tests/ts/support/tensorMatchers.ts +96 -0
  54. package/tests/ts/support.test.ts +165 -0
  55. package/tests/ts/train_loop.test.ts +229 -0
  56. package/tests/ts/transformer_block.test.ts +72 -0
  57. package/tsconfig.json +20 -0
  58. package/tsconfig.test.json +8 -0
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Custom tensor assertion helpers for deterministic testing.
3
+ */
4
+ import { expect } from "bun:test"
5
+ import type { Tensor2D } from "../../../src/tensor/Tensor2D"
6
+
7
+ const DEFAULT_EPSILON = 1e-5
8
+
9
+ /**
10
+ * Asserts that a tensor has the expected shape.
11
+ */
12
+ export const expectShape = (tensor: Tensor2D, shape: [number, number]): void => {
13
+ expect(tensor.rows).toBe(shape[0])
14
+ expect(tensor.cols).toBe(shape[1])
15
+ }
16
+
17
+ /**
18
+ * Asserts that two tensors have the same shape and all elements are within epsilon.
19
+ */
20
+ export const expectClose = (
21
+ actual: Tensor2D,
22
+ expected: Tensor2D,
23
+ epsilon: number = DEFAULT_EPSILON
24
+ ): void => {
25
+ expectShape(actual, [expected.rows, expected.cols])
26
+
27
+ for (let i = 0; i < actual.data.length; i++) {
28
+ const a = actual.data[i]
29
+ const e = expected.data[i]
30
+ const diff = Math.abs(a - e)
31
+ if (diff > epsilon) {
32
+ const row = Math.floor(i / actual.cols)
33
+ const col = i % actual.cols
34
+ throw new Error(
35
+ `Tensor mismatch at [${row}, ${col}]: got ${a}, expected ${e}, diff ${diff} > epsilon ${epsilon}`
36
+ )
37
+ }
38
+ }
39
+ }
40
+
41
+ /**
42
+ * Asserts that all elements of a tensor are within epsilon of a target value.
43
+ */
44
+ export const expectAllClose = (
45
+ tensor: Tensor2D,
46
+ value: number,
47
+ epsilon: number = DEFAULT_EPSILON
48
+ ): void => {
49
+ for (let i = 0; i < tensor.data.length; i++) {
50
+ const v = tensor.data[i]
51
+ const diff = Math.abs(v - value)
52
+ if (diff > epsilon) {
53
+ const row = Math.floor(i / tensor.cols)
54
+ const col = i % tensor.cols
55
+ throw new Error(
56
+ `Tensor element at [${row}, ${col}]: got ${v}, expected ~${value}, diff ${diff} > epsilon ${epsilon}`
57
+ )
58
+ }
59
+ }
60
+ }
61
+
62
+ /**
63
+ * Asserts that two tensors are NOT equal (at least one element differs by more than epsilon).
64
+ */
65
+ export const expectNotClose = (
66
+ actual: Tensor2D,
67
+ expected: Tensor2D,
68
+ epsilon: number = DEFAULT_EPSILON
69
+ ): void => {
70
+ if (actual.rows !== expected.rows || actual.cols !== expected.cols) {
71
+ return // Different shapes means not equal
72
+ }
73
+
74
+ for (let i = 0; i < actual.data.length; i++) {
75
+ const diff = Math.abs(actual.data[i] - expected.data[i])
76
+ if (diff > epsilon) {
77
+ return // Found a difference
78
+ }
79
+ }
80
+
81
+ throw new Error("Expected tensors to differ, but they are equal within epsilon")
82
+ }
83
+
84
+ /**
85
+ * Asserts that a tensor contains finite values (no NaN or Infinity).
86
+ */
87
+ export const expectFinite = (tensor: Tensor2D): void => {
88
+ for (let i = 0; i < tensor.data.length; i++) {
89
+ const v = tensor.data[i]
90
+ if (!Number.isFinite(v)) {
91
+ const row = Math.floor(i / tensor.cols)
92
+ const col = i % tensor.cols
93
+ throw new Error(`Non-finite value at [${row}, ${col}]: ${v}`)
94
+ }
95
+ }
96
+ }
@@ -0,0 +1,165 @@
1
+ /**
2
+ * Tests for Tier 2 test utilities.
3
+ */
4
+ import { describe, expect, it } from "bun:test"
5
+ import * as T from "../../src/tensor/Tensor2D"
6
+ import {
7
+ expectShape,
8
+ expectClose,
9
+ expectAllClose,
10
+ expectNotClose,
11
+ expectFinite
12
+ } from "./support/tensorMatchers"
13
+ import { runEffect, runEffectFail } from "./support/runEffect"
14
+ import {
15
+ makeEmbeddings,
16
+ makeSelfAttention,
17
+ makeFeedForward,
18
+ makeLayerNorm,
19
+ makeTransformerBlock,
20
+ makeOutputProjection,
21
+ makeRng
22
+ } from "./support/factories"
23
+ import * as Effect from "effect/Effect"
24
+ import { ShapeError } from "../../src/tensor/ops"
25
+ import { EMBEDDING_DIM, HIDDEN_DIM } from "../../src/config"
26
+
27
+ describe("tensorMatchers", () => {
28
+ it("expectShape passes for correct shape", () => {
29
+ const t = T.zeros(3, 4)
30
+ expectShape(t, [3, 4])
31
+ })
32
+
33
+ it("expectShape fails for wrong shape", () => {
34
+ const t = T.zeros(3, 4)
35
+ expect(() => expectShape(t, [3, 5])).toThrow()
36
+ })
37
+
38
+ it("expectClose passes for identical tensors", () => {
39
+ const t1 = T.fromArray(2, 2, [1, 2, 3, 4])
40
+ const t2 = T.fromArray(2, 2, [1, 2, 3, 4])
41
+ expectClose(t1, t2)
42
+ })
43
+
44
+ it("expectClose passes within epsilon", () => {
45
+ const t1 = T.fromArray(2, 2, [1.0, 2.0, 3.0, 4.0])
46
+ const t2 = T.fromArray(2, 2, [1.000001, 2.000001, 3.000001, 4.000001])
47
+ expectClose(t1, t2)
48
+ })
49
+
50
+ it("expectClose fails beyond epsilon", () => {
51
+ const t1 = T.fromArray(2, 2, [1, 2, 3, 4])
52
+ const t2 = T.fromArray(2, 2, [1, 2, 3, 5])
53
+ expect(() => expectClose(t1, t2)).toThrow()
54
+ })
55
+
56
+ it("expectAllClose passes when all near value", () => {
57
+ const t = T.fromArray(2, 2, [1.0, 1.000005, 0.999995, 1.0])
58
+ expectAllClose(t, 1.0)
59
+ })
60
+
61
+ it("expectNotClose passes when tensors differ", () => {
62
+ const t1 = T.fromArray(2, 2, [1, 2, 3, 4])
63
+ const t2 = T.fromArray(2, 2, [1, 2, 3, 100])
64
+ expectNotClose(t1, t2)
65
+ })
66
+
67
+ it("expectNotClose fails when tensors are equal", () => {
68
+ const t1 = T.fromArray(2, 2, [1, 2, 3, 4])
69
+ const t2 = T.fromArray(2, 2, [1, 2, 3, 4])
70
+ expect(() => expectNotClose(t1, t2)).toThrow()
71
+ })
72
+
73
+ it("expectFinite passes for finite values", () => {
74
+ const t = T.fromArray(2, 2, [1, 2, 3, 4])
75
+ expectFinite(t)
76
+ })
77
+
78
+ it("expectFinite fails for NaN", () => {
79
+ const t = T.fromArray(2, 2, [1, NaN, 3, 4])
80
+ expect(() => expectFinite(t)).toThrow()
81
+ })
82
+
83
+ it("expectFinite fails for Infinity", () => {
84
+ const t = T.fromArray(2, 2, [1, Infinity, 3, 4])
85
+ expect(() => expectFinite(t)).toThrow()
86
+ })
87
+ })
88
+
89
+ describe("runEffect", () => {
90
+ it("returns value from successful effect", () => {
91
+ const effect = Effect.succeed(42)
92
+ expect(runEffect(effect)).toBe(42)
93
+ })
94
+
95
+ it("throws on failed effect", () => {
96
+ const effect = Effect.fail(new ShapeError("test error"))
97
+ expect(() => runEffect(effect)).toThrow()
98
+ })
99
+
100
+ it("runEffectFail returns the error", () => {
101
+ const effect = Effect.fail(new ShapeError("test error"))
102
+ const error = runEffectFail(effect)
103
+ expect(error).toBeInstanceOf(ShapeError)
104
+ expect(error.message).toBe("test error")
105
+ })
106
+ })
107
+
108
+ describe("factories", () => {
109
+ it("makeEmbeddings produces deterministic weights", () => {
110
+ const e1 = makeEmbeddings(100, { seed: 42 })
111
+ const e2 = makeEmbeddings(100, { seed: 42 })
112
+ expectClose(e1.tokenEmbeddings, e2.tokenEmbeddings)
113
+ expectClose(e1.positionalEmbeddings, e2.positionalEmbeddings)
114
+ })
115
+
116
+ it("makeEmbeddings with different seeds produces different weights", () => {
117
+ const e1 = makeEmbeddings(100, { seed: 42 })
118
+ const e2 = makeEmbeddings(100, { seed: 43 })
119
+ expectNotClose(e1.tokenEmbeddings, e2.tokenEmbeddings)
120
+ })
121
+
122
+ it("makeSelfAttention produces deterministic weights", () => {
123
+ const a1 = makeSelfAttention({ seed: 42 })
124
+ const a2 = makeSelfAttention({ seed: 42 })
125
+ expectClose(a1.wQ, a2.wQ)
126
+ expectClose(a1.wK, a2.wK)
127
+ expectClose(a1.wV, a2.wV)
128
+ })
129
+
130
+ it("makeFeedForward produces deterministic weights", () => {
131
+ const f1 = makeFeedForward({ seed: 42 })
132
+ const f2 = makeFeedForward({ seed: 42 })
133
+ expectClose(f1.w1, f2.w1)
134
+ expectClose(f1.w2, f2.w2)
135
+ })
136
+
137
+ it("makeLayerNorm initializes correctly", () => {
138
+ const ln = makeLayerNorm()
139
+ expectShape(ln.gamma, [1, EMBEDDING_DIM])
140
+ expectShape(ln.beta, [1, EMBEDDING_DIM])
141
+ expectAllClose(ln.gamma, 1.0)
142
+ expectAllClose(ln.beta, 0.0)
143
+ })
144
+
145
+ it("makeTransformerBlock produces deterministic weights", () => {
146
+ const tb1 = makeTransformerBlock({ seed: 42 })
147
+ const tb2 = makeTransformerBlock({ seed: 42 })
148
+ expectClose(tb1.attention.wQ, tb2.attention.wQ)
149
+ expectClose(tb1.feedForward.w1, tb2.feedForward.w1)
150
+ })
151
+
152
+ it("makeOutputProjection produces deterministic weights", () => {
153
+ const op1 = makeOutputProjection(100, { seed: 42 })
154
+ const op2 = makeOutputProjection(100, { seed: 42 })
155
+ expectClose(op1.wOut, op2.wOut)
156
+ })
157
+
158
+ it("makeRng produces deterministic sequences", () => {
159
+ const rng1 = makeRng(42)
160
+ const rng2 = makeRng(42)
161
+ const seq1 = Array.from({ length: 5 }, () => rng1.next())
162
+ const seq2 = Array.from({ length: 5 }, () => rng2.next())
163
+ expect(seq1).toEqual(seq2)
164
+ })
165
+ })
@@ -0,0 +1,229 @@
1
+ import { describe, test, expect } from "bun:test"
2
+ import * as Effect from "effect/Effect"
3
+ import * as Layer from "effect/Layer"
4
+ import { makeLLM } from "./support/factories"
5
+ import { expectNotClose } from "./support/tensorMatchers"
6
+ import { CANONICAL_SEED } from "./support/seed"
7
+ import {
8
+ train,
9
+ makeLLMLayer,
10
+ makeTrainingConfigLayer,
11
+ makePreprocessSettingsLayer
12
+ } from "../../src/training/train"
13
+ import { Embeddings } from "../../src/model/Embeddings"
14
+ import { TransformerBlock } from "../../src/model/TransformerBlock"
15
+ import { OutputProjection } from "../../src/model/OutputProjection"
16
+ import * as T from "../../src/tensor/Tensor2D"
17
+ import { SilentLoggerLive, Logger } from "../../src/services/Logger"
18
+ import { NoOpMetricsLive } from "../../src/services/Metrics"
19
+ import { TestServicesLayer as BaseTestServicesLayer } from "./support/stubs"
20
+
21
+ const TestServicesLayer = Layer.mergeAll(
22
+ BaseTestServicesLayer,
23
+ makePreprocessSettingsLayer({ concurrency: "unbounded", batchSize: 1 })
24
+ )
25
+
26
+ describe("Train Loop", () => {
27
+ const tinyVocab = ["hello", "world", "is", "this", "test", "</s>"]
28
+ const tinyCorpus = ["hello world </s>", "this is </s>", "test world </s>"]
29
+
30
+ const createTinyLLM = (seed: number = CANONICAL_SEED) =>
31
+ makeLLM({ seed, vocabWords: tinyVocab, numTransformerBlocks: 1 })
32
+
33
+ test("training mutates embeddings weights", () => {
34
+ const llm = createTinyLLM()
35
+ const embeddings = llm.network[0] as Embeddings
36
+ const tokenEmbeddingsBefore = T.clone(embeddings.tokenEmbeddings)
37
+
38
+ Effect.runSync(
39
+ train(tinyCorpus).pipe(
40
+ Effect.provide(makeLLMLayer(llm)),
41
+ Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
42
+ Effect.provide(TestServicesLayer)
43
+ )
44
+ )
45
+
46
+ expectNotClose(embeddings.tokenEmbeddings, tokenEmbeddingsBefore)
47
+ })
48
+
49
+ test("training mutates transformer weights", () => {
50
+ const llm = createTinyLLM()
51
+ const transformer = llm.network[1] as TransformerBlock
52
+ const w1Before = T.clone(transformer.feedForward.w1)
53
+ const wQBefore = T.clone(transformer.attention.wQ)
54
+
55
+ Effect.runSync(
56
+ train(tinyCorpus).pipe(
57
+ Effect.provide(makeLLMLayer(llm)),
58
+ Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
59
+ Effect.provide(TestServicesLayer)
60
+ )
61
+ )
62
+
63
+ expectNotClose(transformer.feedForward.w1, w1Before)
64
+ expectNotClose(transformer.attention.wQ, wQBefore)
65
+ })
66
+
67
+ test("training mutates output projection weights", () => {
68
+ const llm = createTinyLLM()
69
+ const output = llm.network[2] as OutputProjection
70
+ const wOutBefore = T.clone(output.wOut)
71
+
72
+ Effect.runSync(
73
+ train(tinyCorpus).pipe(
74
+ Effect.provide(makeLLMLayer(llm)),
75
+ Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
76
+ Effect.provide(TestServicesLayer)
77
+ )
78
+ )
79
+
80
+ expectNotClose(output.wOut, wOutBefore)
81
+ })
82
+
83
+ test("loss decreases over epochs", async () => {
84
+ const llm = createTinyLLM()
85
+ const losses: Array<number> = []
86
+
87
+ const makeCaptureLossLogger = () => {
88
+ const service = {
89
+ log: (_level: any, _message: string, data?: Record<string, unknown>) => {
90
+ if (data?.loss !== undefined) {
91
+ losses.push(data.loss as number)
92
+ }
93
+ return Effect.void
94
+ },
95
+ debug: () => Effect.void,
96
+ info: (_message: string, data?: Record<string, unknown>) => {
97
+ if (data?.loss !== undefined) {
98
+ losses.push(data.loss as number)
99
+ }
100
+ return Effect.void
101
+ },
102
+ warn: () => Effect.void,
103
+ error: () => Effect.void
104
+ }
105
+ return Layer.succeed(Logger, service)
106
+ }
107
+
108
+ const program = train(tinyCorpus).pipe(
109
+ Effect.provide(makeLLMLayer(llm)),
110
+ Effect.provide(makeTrainingConfigLayer({ epochs: 3, learningRate: 0.01 })),
111
+ Effect.provide(makeCaptureLossLogger()),
112
+ Effect.provide(NoOpMetricsLive),
113
+ Effect.provide(makePreprocessSettingsLayer({ concurrency: "unbounded", batchSize: 1 }))
114
+ )
115
+
116
+ await Effect.runPromise(program)
117
+
118
+ expect(losses.length).toBe(3)
119
+ expect(losses[2]).toBeLessThan(losses[0]!)
120
+ })
121
+
122
+ test("seeded RNG produces deterministic training", () => {
123
+ const llm1 = createTinyLLM(CANONICAL_SEED)
124
+ const llm2 = createTinyLLM(CANONICAL_SEED)
125
+
126
+ const runOnce = (llm: ReturnType<typeof createTinyLLM>) =>
127
+ Effect.runSync(
128
+ train(tinyCorpus).pipe(
129
+ Effect.provide(makeLLMLayer(llm)),
130
+ Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
131
+ Effect.provide(TestServicesLayer)
132
+ )
133
+ )
134
+ runOnce(llm1)
135
+ runOnce(llm2)
136
+
137
+ const embeddings1 = llm1.network[0] as Embeddings
138
+ const embeddings2 = llm2.network[0] as Embeddings
139
+
140
+ for (let i = 0; i < embeddings1.tokenEmbeddings.data.length; i++) {
141
+ expect(embeddings1.tokenEmbeddings.data[i]).toBe(embeddings2.tokenEmbeddings.data[i])
142
+ }
143
+ })
144
+
145
+ test("empty corpus does not crash", () => {
146
+ const llm = createTinyLLM()
147
+ expect(() =>
148
+ Effect.runSync(
149
+ train([]).pipe(
150
+ Effect.provide(makeLLMLayer(llm)),
151
+ Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
152
+ Effect.provide(TestServicesLayer)
153
+ )
154
+ )
155
+ ).not.toThrow()
156
+ })
157
+
158
+ test("single example corpus trains", () => {
159
+ const llm = createTinyLLM()
160
+ const embeddings = llm.network[0] as Embeddings
161
+ const before = T.clone(embeddings.tokenEmbeddings)
162
+
163
+ Effect.runSync(
164
+ train(["hello world </s>"]).pipe(
165
+ Effect.provide(makeLLMLayer(llm)),
166
+ Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
167
+ Effect.provide(TestServicesLayer)
168
+ )
169
+ )
170
+
171
+ expectNotClose(embeddings.tokenEmbeddings, before)
172
+ })
173
+
174
+ test("multiple epochs further mutate weights", () => {
175
+ const llm = createTinyLLM()
176
+ const embeddings = llm.network[0] as Embeddings
177
+
178
+ Effect.runSync(
179
+ train(tinyCorpus).pipe(
180
+ Effect.provide(makeLLMLayer(llm)),
181
+ Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
182
+ Effect.provide(TestServicesLayer)
183
+ )
184
+ )
185
+ const afterEpoch1 = T.clone(embeddings.tokenEmbeddings)
186
+
187
+ Effect.runSync(
188
+ train(tinyCorpus).pipe(
189
+ Effect.provide(makeLLMLayer(llm)),
190
+ Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.01 })),
191
+ Effect.provide(TestServicesLayer)
192
+ )
193
+ )
194
+ expectNotClose(embeddings.tokenEmbeddings, afterEpoch1)
195
+ })
196
+
197
+ test("training with higher learning rate causes larger weight changes", () => {
198
+ const llm1 = createTinyLLM(CANONICAL_SEED)
199
+ const llm2 = createTinyLLM(CANONICAL_SEED)
200
+
201
+ const embeddings1 = llm1.network[0] as Embeddings
202
+ const embeddings2 = llm2.network[0] as Embeddings
203
+ const initial = T.clone(embeddings1.tokenEmbeddings)
204
+
205
+ Effect.runSync(
206
+ train(tinyCorpus).pipe(
207
+ Effect.provide(makeLLMLayer(llm1)),
208
+ Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.001 })),
209
+ Effect.provide(TestServicesLayer)
210
+ )
211
+ )
212
+ Effect.runSync(
213
+ train(tinyCorpus).pipe(
214
+ Effect.provide(makeLLMLayer(llm2)),
215
+ Effect.provide(makeTrainingConfigLayer({ epochs: 1, learningRate: 0.1 })),
216
+ Effect.provide(TestServicesLayer)
217
+ )
218
+ )
219
+
220
+ let diff1 = 0
221
+ let diff2 = 0
222
+ for (let i = 0; i < initial.data.length; i++) {
223
+ diff1 += Math.abs(embeddings1.tokenEmbeddings.data[i]! - initial.data[i]!)
224
+ diff2 += Math.abs(embeddings2.tokenEmbeddings.data[i]! - initial.data[i]!)
225
+ }
226
+
227
+ expect(diff2).toBeGreaterThan(diff1)
228
+ })
229
+ })
@@ -0,0 +1,72 @@
1
+ import { describe, test, expect } from "bun:test"
2
+ import { runEffect } from "./support/runEffect"
3
+ import { expectShape, expectNotClose, expectFinite } from "./support/tensorMatchers"
4
+ import { makeTransformerBlock } from "./support/factories"
5
+ import * as T from "../../src/tensor/Tensor2D"
6
+ import { EMBEDDING_DIM, HIDDEN_DIM } from "../../src/config"
7
+
8
+ describe("TransformerBlock", () => {
9
+ test("forward shape preserved [1, EMBEDDING_DIM]", () => {
10
+ const block = makeTransformerBlock()
11
+ const input = T.ones(1, EMBEDDING_DIM)
12
+ const output = runEffect(block.forward(input))
13
+ expectShape(output, [1, EMBEDDING_DIM])
14
+ })
15
+
16
+ test("forward shape preserved across sequence lengths", () => {
17
+ const block = makeTransformerBlock()
18
+ for (let seqLen = 1; seqLen <= 4; seqLen++) {
19
+ const input = T.ones(seqLen, EMBEDDING_DIM)
20
+ const output = runEffect(block.forward(input))
21
+ expectShape(output, [seqLen, EMBEDDING_DIM])
22
+ }
23
+ })
24
+
25
+ test("output contains finite values", () => {
26
+ const block = makeTransformerBlock()
27
+ const input = T.ones(2, EMBEDDING_DIM)
28
+ const output = runEffect(block.forward(input))
29
+ expectFinite(output)
30
+ })
31
+
32
+ test("backward shape matches input", () => {
33
+ const block = makeTransformerBlock()
34
+ const input = T.ones(2, EMBEDDING_DIM)
35
+ runEffect(block.forward(input))
36
+ const dOut = T.ones(2, EMBEDDING_DIM)
37
+ const grad = runEffect(block.backward(dOut, 0.01))
38
+ expectShape(grad, [2, EMBEDDING_DIM])
39
+ })
40
+
41
+ test("backward updates sub-layer weights", () => {
42
+ const block = makeTransformerBlock()
43
+ const input = T.zeros(2, EMBEDDING_DIM)
44
+ for (let i = 0; i < input.data.length; i++) {
45
+ input.data[i] = (i % 10) * 0.1
46
+ }
47
+
48
+ const wQBefore = T.clone(block.attention.wQ)
49
+ const w1Before = T.clone(block.feedForward.w1)
50
+ const betaBefore = T.clone(block.norm1.beta)
51
+
52
+ runEffect(block.forward(input))
53
+ const dOut = T.ones(2, EMBEDDING_DIM)
54
+ runEffect(block.backward(dOut, 0.01))
55
+
56
+ expectNotClose(block.attention.wQ, wQBefore)
57
+ expectNotClose(block.feedForward.w1, w1Before)
58
+ expectNotClose(block.norm1.beta, betaBefore)
59
+ })
60
+
61
+ test("parametersCount equals sum of components", () => {
62
+ const block = makeTransformerBlock()
63
+
64
+ const attentionParams = 3 * EMBEDDING_DIM * EMBEDDING_DIM
65
+ const feedForwardParams =
66
+ EMBEDDING_DIM * HIDDEN_DIM + HIDDEN_DIM + HIDDEN_DIM * EMBEDDING_DIM + EMBEDDING_DIM
67
+ const normParams = 2 * (2 * EMBEDDING_DIM)
68
+
69
+ const expectedTotal = attentionParams + feedForwardParams + normParams
70
+ expect(block.parametersCount).toBe(expectedTotal)
71
+ })
72
+ })
package/tsconfig.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "ESNext",
5
+ "moduleResolution": "Bundler",
6
+ "lib": ["ES2022"],
7
+ "types": ["bun-types"],
8
+ "strict": true,
9
+ "esModuleInterop": true,
10
+ "skipLibCheck": true,
11
+ "forceConsistentCasingInFileNames": true,
12
+ "declaration": true,
13
+ "declarationMap": true,
14
+ "sourceMap": true,
15
+ "outDir": "dist",
16
+ "rootDir": "src"
17
+ },
18
+ "include": ["src"],
19
+ "exclude": ["node_modules", "dist"]
20
+ }
@@ -0,0 +1,8 @@
1
+ {
2
+ "extends": "./tsconfig.json",
3
+ "compilerOptions": {
4
+ "types": ["bun-types"],
5
+ "rootDir": "."
6
+ },
7
+ "include": ["src", "tests/ts"]
8
+ }