tensorgrad 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +121 -0
  3. package/SPEC.md +293 -0
  4. package/dist/adam.d.ts +31 -0
  5. package/dist/adam.d.ts.map +1 -0
  6. package/dist/adam.js +66 -0
  7. package/dist/adam.js.map +1 -0
  8. package/dist/buffers.d.ts +56 -0
  9. package/dist/buffers.d.ts.map +1 -0
  10. package/dist/buffers.js +114 -0
  11. package/dist/buffers.js.map +1 -0
  12. package/dist/codegen.d.ts +23 -0
  13. package/dist/codegen.d.ts.map +1 -0
  14. package/dist/codegen.js +709 -0
  15. package/dist/codegen.js.map +1 -0
  16. package/dist/compile.d.ts +53 -0
  17. package/dist/compile.d.ts.map +1 -0
  18. package/dist/compile.js +76 -0
  19. package/dist/compile.js.map +1 -0
  20. package/dist/grad.d.ts +8 -0
  21. package/dist/grad.d.ts.map +1 -0
  22. package/dist/grad.js +404 -0
  23. package/dist/grad.js.map +1 -0
  24. package/dist/index.d.ts +12 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +37 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/ir.d.ts +204 -0
  29. package/dist/ir.d.ts.map +1 -0
  30. package/dist/ir.js +60 -0
  31. package/dist/ir.js.map +1 -0
  32. package/dist/module.d.ts +21 -0
  33. package/dist/module.d.ts.map +1 -0
  34. package/dist/module.js +113 -0
  35. package/dist/module.js.map +1 -0
  36. package/dist/ops.d.ts +35 -0
  37. package/dist/ops.d.ts.map +1 -0
  38. package/dist/ops.js +270 -0
  39. package/dist/ops.js.map +1 -0
  40. package/dist/runtime.d.ts +26 -0
  41. package/dist/runtime.d.ts.map +1 -0
  42. package/dist/runtime.js +190 -0
  43. package/dist/runtime.js.map +1 -0
  44. package/dist/shape.d.ts +24 -0
  45. package/dist/shape.d.ts.map +1 -0
  46. package/dist/shape.js +259 -0
  47. package/dist/shape.js.map +1 -0
  48. package/dist/trace.d.ts +8 -0
  49. package/dist/trace.d.ts.map +1 -0
  50. package/dist/trace.js +93 -0
  51. package/dist/trace.js.map +1 -0
  52. package/package.json +62 -0
  53. package/src/adam.ts +95 -0
  54. package/src/buffers.ts +173 -0
  55. package/src/codegen.ts +758 -0
  56. package/src/compile.ts +120 -0
  57. package/src/grad.ts +459 -0
  58. package/src/index.ts +40 -0
  59. package/src/ir.ts +197 -0
  60. package/src/module.ts +126 -0
  61. package/src/ops.ts +311 -0
  62. package/src/runtime.ts +232 -0
  63. package/src/shape.ts +263 -0
  64. package/src/trace.ts +101 -0
package/src/buffers.ts ADDED
@@ -0,0 +1,173 @@
1
+ // Buffer planning: walk a Graph and decide which GPU buffer each Tensor maps to.
2
+ //
3
+ // v1 strategy: one GPU buffer per IR Tensor. Static shapes mean every buffer's
4
+ // size is known at compile time and lifetimes don't overlap between steps —
5
+ // so no pooling needed. Total memory is the sum of every intermediate tensor.
6
+ // For our transformer at B=256: ~30 MB of activations + grads. Easily fits.
7
+ //
8
+ // Categorization is what the runtime cares about:
9
+ // * param — uploaded by user via uploadParams; persistent across steps
10
+ // * param_grad — written each step by the backward pass; readable for inspection
11
+ // * tensor_input — uploaded each step (tokens, targets, masks)
12
+ // * intermediate — produced by an op; lifetime = within a single step
13
+ // * output — special intermediate that should be made readable (loss)
14
+
15
+ import type { Graph, Tensor, Dtype, Shape, OpNode } from './ir.js'
16
+
17
+ export interface BufferSpec {
18
+ /** Matches tensor.id. */
19
+ id: number
20
+ byteSize: number
21
+ dtype: Dtype
22
+ shape: Shape
23
+ kind: 'param' | 'param_grad' | 'tensor_input' | 'state' | 'intermediate' | 'output'
24
+ /** External name for param/param_grad/tensor_input/state bindings. null otherwise. */
25
+ name: string | null
26
+ /** For state buffers: the value to fill on initial allocation. 0 by default. */
27
+ initValue?: number
28
+ }
29
+
30
+ /**
31
+ * After step(), copy `source`'s buffer into `dest`'s buffer.
32
+ * Used to write back updated optimizer state and updated parameters into
33
+ * their persistent home buffers.
34
+ */
35
+ export interface Writeback {
36
+ source: number // buffer id of the tensor holding the new value
37
+ dest: number // buffer id of the persistent state/param to overwrite
38
+ bytes: number
39
+ }
40
+
41
+ export interface BufferPlan {
42
+ buffers: BufferSpec[]
43
+ /** Tensor id -> buffer id (currently 1:1 but kept opaque for future pooling). */
44
+ tensorToBuffer: Map<number, number>
45
+ /** Easy lookup tables for the runtime. */
46
+ paramsByName: Map<string, number> // name -> buffer id
47
+ inputsByName: Map<string, number> // name -> buffer id
48
+ paramGradsByName: Map<string, number> // name -> buffer id
49
+ statesByName: Map<string, number> // name -> buffer id (persistent state homes)
50
+ outputBufferIds: number[] // graph.outputs mapped through
51
+ /** End-of-step writebacks (Adam updates for params, m, v, etc.) */
52
+ writebacks: Writeback[]
53
+ }
54
+
55
+ const dtypeBytes: Record<Dtype, number> = { f32: 4, i32: 4, bool: 4 }
56
+
57
+ function shapeSize(shape: Shape): number {
58
+ let n = 1
59
+ for (const d of shape) n *= d
60
+ return n
61
+ }
62
+
63
+ /**
64
+ * Caller-supplied writeback declarations: "after each step, copy this Tensor's
65
+ * buffer into the persistent home of this param/state."
66
+ */
67
+ export interface WritebackDecl {
68
+ /** The Tensor (output of some op) holding the new value to write back. */
69
+ source: Tensor
70
+ /** Either a param name (writes to that param's home buffer) or a state name. */
71
+ destName: string
72
+ destKind: 'param' | 'state'
73
+ }
74
+
75
+ /**
76
+ * Build a BufferPlan from a graph + the param-grad map produced by appendGrad.
77
+ * @param graph the full graph (forward + backward + any optimizer ops)
78
+ * @param paramGrads map from param name -> the Tensor that holds its gradient
79
+ * @param writebackDecls list of end-of-step writebacks (e.g. from appendAdam).
80
+ * Empty when there's no optimizer in the graph.
81
+ */
82
+ export function planBuffers(
83
+ graph: Graph,
84
+ paramGrads: Record<string, Tensor>,
85
+ writebackDecls: WritebackDecl[] = [],
86
+ ): BufferPlan {
87
+ const buffers: BufferSpec[] = []
88
+ const tensorToBuffer = new Map<number, number>()
89
+ const paramsByName = new Map<string, number>()
90
+ const inputsByName = new Map<string, number>()
91
+ const paramGradsByName = new Map<string, number>()
92
+ const statesByName = new Map<string, number>()
93
+
94
+ // Build a quick reverse map: tensorId -> param name (for grads).
95
+ const gradTensorIdToName = new Map<number, string>()
96
+ for (const [name, tensor] of Object.entries(paramGrads)) {
97
+ gradTensorIdToName.set(tensor.id, name)
98
+ }
99
+ // ...and tensorId -> param/input op (so we can name the buffer correctly).
100
+ const opByOutId = new Map<number, OpNode>()
101
+ for (const op of graph.ops) opByOutId.set(op.out, op)
102
+
103
+ const outputSet = new Set(graph.outputs)
104
+
105
+ // Walk all tensors in id order. Categorize each.
106
+ for (const t of graph.tensors) {
107
+ const op = opByOutId.get(t.id)
108
+ let kind: BufferSpec['kind'] = 'intermediate'
109
+ let name: string | null = null
110
+ let initValue: number | undefined
111
+
112
+ if (op?.kind === 'param_input') {
113
+ kind = 'param'
114
+ name = op.name
115
+ } else if (op?.kind === 'tensor_input') {
116
+ kind = 'tensor_input'
117
+ name = op.name
118
+ } else if (op?.kind === 'state_input') {
119
+ kind = 'state'
120
+ name = op.name
121
+ initValue = op.initValue
122
+ } else if (gradTensorIdToName.has(t.id)) {
123
+ kind = 'param_grad'
124
+ name = gradTensorIdToName.get(t.id)!
125
+ } else if (outputSet.has(t.id)) {
126
+ kind = 'output'
127
+ }
128
+
129
+ const spec: BufferSpec = {
130
+ id: t.id,
131
+ byteSize: Math.max(4, shapeSize(t.shape) * dtypeBytes[t.dtype]),
132
+ dtype: t.dtype,
133
+ shape: t.shape,
134
+ kind,
135
+ name,
136
+ ...(initValue !== undefined ? { initValue } : {}),
137
+ }
138
+ buffers.push(spec)
139
+ tensorToBuffer.set(t.id, t.id) // 1:1 for v1
140
+
141
+ if (kind === 'param') paramsByName.set(name!, t.id)
142
+ if (kind === 'tensor_input') inputsByName.set(name!, t.id)
143
+ if (kind === 'param_grad') paramGradsByName.set(name!, t.id)
144
+ if (kind === 'state') statesByName.set(name!, t.id)
145
+ }
146
+
147
+ const outputBufferIds = graph.outputs.map(id => tensorToBuffer.get(id)!)
148
+
149
+ // Resolve writeback declarations to (source, dest) buffer-id pairs.
150
+ const writebacks: Writeback[] = writebackDecls.map(decl => {
151
+ const sourceBufId = tensorToBuffer.get(decl.source.id)
152
+ if (sourceBufId === undefined) {
153
+ throw new Error(`planBuffers: writeback source tensor #${decl.source.id} not in graph`)
154
+ }
155
+ const destBufId = decl.destKind === 'param'
156
+ ? paramsByName.get(decl.destName)
157
+ : statesByName.get(decl.destName)
158
+ if (destBufId === undefined) {
159
+ throw new Error(`planBuffers: writeback dest ${decl.destKind}:'${decl.destName}' not found`)
160
+ }
161
+ const sourceSpec = buffers[sourceBufId]!
162
+ const destSpec = buffers[destBufId]!
163
+ if (sourceSpec.byteSize !== destSpec.byteSize) {
164
+ throw new Error(
165
+ `planBuffers: writeback size mismatch for ${decl.destKind}:'${decl.destName}' ` +
166
+ `(source ${sourceSpec.byteSize} bytes vs dest ${destSpec.byteSize})`,
167
+ )
168
+ }
169
+ return { source: sourceBufId, dest: destBufId, bytes: sourceSpec.byteSize }
170
+ })
171
+
172
+ return { buffers, tensorToBuffer, paramsByName, inputsByName, paramGradsByName, statesByName, outputBufferIds, writebacks }
173
+ }