simplex-tensor 1.0.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/PKG-INFO +1 -1
  2. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/pyproject.toml +1 -1
  3. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/Cargo.lock +2 -2
  4. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/setup.py +1 -1
  5. simplex_tensor-1.1.0/rust-engine/Cargo.lock +574 -0
  6. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/src/x86_emitter.rs +293 -0
  7. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/README.md +0 -0
  8. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/.gitignore +0 -0
  9. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/Cargo.toml +0 -0
  10. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/README.md +0 -0
  11. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/benchmarks/bench_physics.py +0 -0
  12. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/benchmarks/bench_results.json +0 -0
  13. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/benchmarks/bench_symplex.py +0 -0
  14. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/demo.py +0 -0
  15. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/src/lib.rs +0 -0
  16. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/tests/test_jit.py +0 -0
  17. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/tests/test_math.py +0 -0
  18. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/python/tests/test_purity.py +0 -0
  19. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/.gitignore +0 -0
  20. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/Cargo.toml +0 -0
  21. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/benches/physics_bench.rs +0 -0
  22. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/src/cuda_backend.rs +0 -0
  23. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/src/ffi.rs +0 -0
  24. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/src/fusion_engine.rs +0 -0
  25. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/src/lib.rs +0 -0
  26. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/src/phase3_jit.rs +0 -0
  27. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/src/polyhedral.rs +0 -0
  28. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/src/tracing_jit.rs +0 -0
  29. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/src/types.rs +0 -0
  30. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/rust-engine/tests/integration.rs +0 -0
  31. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/symplex/__init__.py +0 -0
  32. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/symplex/_array.py +0 -0
  33. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/symplex/_ast_checker.py +0 -0
  34. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/symplex/_errors.py +0 -0
  35. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/symplex/_jit.py +0 -0
  36. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/symplex/_tracer.cpp +0 -0
  37. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/symplex/_tracer.py +0 -0
  38. {simplex_tensor-1.0.0 → simplex_tensor-1.1.0}/symplex/linalg.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: simplex-tensor
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Intended Audience :: Science/Research
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "simplex-tensor"
7
- version = "1.0.0"
7
+ version = "1.1.0"
8
8
  description = "SympleX – Polyhedral Tensor Superoptimizer with JAX-style purity enforcement and x86-64 JIT compilation"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -205,9 +205,9 @@ dependencies = [
205
205
 
206
206
  [[package]]
207
207
  name = "log"
208
- version = "0.4.31"
208
+ version = "0.4.32"
209
209
  source = "registry+https://github.com/rust-lang/crates.io-index"
210
- checksum = "113b30b4cd05f7c06868fdb2854f66a7b9fece9a48425351cd532e810d74024f"
210
+ checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a"
211
211
 
212
212
  [[package]]
213
213
  name = "matrixmultiply"
@@ -63,7 +63,7 @@ ext = Extension(
63
63
 
64
64
  setup(
65
65
  name="simplex-tensor",
66
- version="1.0.0",
66
+ version="1.1.0",
67
67
  packages=["symplex"],
68
68
  ext_modules=[ext],
69
69
  cmdclass={"build_ext": CMakeBuild},
@@ -0,0 +1,574 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "autocfg"
7
+ version = "1.5.1"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
10
+
11
+ [[package]]
12
+ name = "bumpalo"
13
+ version = "3.20.3"
14
+ source = "registry+https://github.com/rust-lang/crates.io-index"
15
+ checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
16
+
17
+ [[package]]
18
+ name = "cfg-if"
19
+ version = "1.0.4"
20
+ source = "registry+https://github.com/rust-lang/crates.io-index"
21
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
22
+
23
+ [[package]]
24
+ name = "crossbeam-deque"
25
+ version = "0.8.6"
26
+ source = "registry+https://github.com/rust-lang/crates.io-index"
27
+ checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
28
+ dependencies = [
29
+ "crossbeam-epoch",
30
+ "crossbeam-utils",
31
+ ]
32
+
33
+ [[package]]
34
+ name = "crossbeam-epoch"
35
+ version = "0.9.18"
36
+ source = "registry+https://github.com/rust-lang/crates.io-index"
37
+ checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
38
+ dependencies = [
39
+ "crossbeam-utils",
40
+ ]
41
+
42
+ [[package]]
43
+ name = "crossbeam-utils"
44
+ version = "0.8.21"
45
+ source = "registry+https://github.com/rust-lang/crates.io-index"
46
+ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
47
+
48
+ [[package]]
49
+ name = "cudarc"
50
+ version = "0.19.7"
51
+ source = "registry+https://github.com/rust-lang/crates.io-index"
52
+ checksum = "1cea5f10a99e025c1b44ae2354c2d8326b25ddbd0baf76bde8e55cfd4018a2cc"
53
+ dependencies = [
54
+ "libloading",
55
+ ]
56
+
57
+ [[package]]
58
+ name = "egg"
59
+ version = "0.6.0"
60
+ source = "registry+https://github.com/rust-lang/crates.io-index"
61
+ checksum = "05a6c0bbc92278f84e742f08c0ab9cb16a987376cd2bc39d228ef9c74d98d6f7"
62
+ dependencies = [
63
+ "indexmap",
64
+ "instant",
65
+ "log",
66
+ "once_cell",
67
+ "smallvec",
68
+ "symbolic_expressions",
69
+ ]
70
+
71
+ [[package]]
72
+ name = "either"
73
+ version = "1.16.0"
74
+ source = "registry+https://github.com/rust-lang/crates.io-index"
75
+ checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
76
+
77
+ [[package]]
78
+ name = "fnv"
79
+ version = "1.0.7"
80
+ source = "registry+https://github.com/rust-lang/crates.io-index"
81
+ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
82
+
83
+ [[package]]
84
+ name = "futures-core"
85
+ version = "0.3.32"
86
+ source = "registry+https://github.com/rust-lang/crates.io-index"
87
+ checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
88
+
89
+ [[package]]
90
+ name = "futures-task"
91
+ version = "0.3.32"
92
+ source = "registry+https://github.com/rust-lang/crates.io-index"
93
+ checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
94
+
95
+ [[package]]
96
+ name = "futures-util"
97
+ version = "0.3.32"
98
+ source = "registry+https://github.com/rust-lang/crates.io-index"
99
+ checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
100
+ dependencies = [
101
+ "futures-core",
102
+ "futures-task",
103
+ "pin-project-lite",
104
+ "slab",
105
+ ]
106
+
107
+ [[package]]
108
+ name = "good_lp"
109
+ version = "1.15.2"
110
+ source = "registry+https://github.com/rust-lang/crates.io-index"
111
+ checksum = "745190412d5ff4a54335cd16229a475ad3fb8f5474a5c1358292d62932187ea7"
112
+ dependencies = [
113
+ "fnv",
114
+ "microlp",
115
+ ]
116
+
117
+ [[package]]
118
+ name = "hashbrown"
119
+ version = "0.12.3"
120
+ source = "registry+https://github.com/rust-lang/crates.io-index"
121
+ checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
122
+
123
+ [[package]]
124
+ name = "heck"
125
+ version = "0.5.0"
126
+ source = "registry+https://github.com/rust-lang/crates.io-index"
127
+ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
128
+
129
+ [[package]]
130
+ name = "hermit-abi"
131
+ version = "0.5.2"
132
+ source = "registry+https://github.com/rust-lang/crates.io-index"
133
+ checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
134
+
135
+ [[package]]
136
+ name = "iced-x86"
137
+ version = "1.21.0"
138
+ source = "registry+https://github.com/rust-lang/crates.io-index"
139
+ checksum = "7c447cff8c7f384a7d4f741cfcff32f75f3ad02b406432e8d6c878d56b1edf6b"
140
+ dependencies = [
141
+ "lazy_static",
142
+ ]
143
+
144
+ [[package]]
145
+ name = "indexmap"
146
+ version = "1.9.3"
147
+ source = "registry+https://github.com/rust-lang/crates.io-index"
148
+ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
149
+ dependencies = [
150
+ "autocfg",
151
+ "hashbrown",
152
+ ]
153
+
154
+ [[package]]
155
+ name = "indoc"
156
+ version = "2.0.7"
157
+ source = "registry+https://github.com/rust-lang/crates.io-index"
158
+ checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
159
+ dependencies = [
160
+ "rustversion",
161
+ ]
162
+
163
+ [[package]]
164
+ name = "instant"
165
+ version = "0.1.13"
166
+ source = "registry+https://github.com/rust-lang/crates.io-index"
167
+ checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
168
+ dependencies = [
169
+ "cfg-if",
170
+ ]
171
+
172
+ [[package]]
173
+ name = "js-sys"
174
+ version = "0.3.99"
175
+ source = "registry+https://github.com/rust-lang/crates.io-index"
176
+ checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11"
177
+ dependencies = [
178
+ "cfg-if",
179
+ "futures-util",
180
+ "once_cell",
181
+ "wasm-bindgen",
182
+ ]
183
+
184
+ [[package]]
185
+ name = "lazy_static"
186
+ version = "1.5.0"
187
+ source = "registry+https://github.com/rust-lang/crates.io-index"
188
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
189
+
190
+ [[package]]
191
+ name = "libc"
192
+ version = "0.2.186"
193
+ source = "registry+https://github.com/rust-lang/crates.io-index"
194
+ checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
195
+
196
+ [[package]]
197
+ name = "libloading"
198
+ version = "0.9.0"
199
+ source = "registry+https://github.com/rust-lang/crates.io-index"
200
+ checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60"
201
+ dependencies = [
202
+ "cfg-if",
203
+ "windows-link",
204
+ ]
205
+
206
+ [[package]]
207
+ name = "log"
208
+ version = "0.4.32"
209
+ source = "registry+https://github.com/rust-lang/crates.io-index"
210
+ checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a"
211
+
212
+ [[package]]
213
+ name = "matrixmultiply"
214
+ version = "0.3.10"
215
+ source = "registry+https://github.com/rust-lang/crates.io-index"
216
+ checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
217
+ dependencies = [
218
+ "autocfg",
219
+ "rawpointer",
220
+ ]
221
+
222
+ [[package]]
223
+ name = "memoffset"
224
+ version = "0.9.1"
225
+ source = "registry+https://github.com/rust-lang/crates.io-index"
226
+ checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
227
+ dependencies = [
228
+ "autocfg",
229
+ ]
230
+
231
+ [[package]]
232
+ name = "microlp"
233
+ version = "0.4.0"
234
+ source = "registry+https://github.com/rust-lang/crates.io-index"
235
+ checksum = "458ed987196f802dc47c69d4c5afcd19002d6c1c5f8f75c76d129bcf2425057a"
236
+ dependencies = [
237
+ "log",
238
+ "sprs",
239
+ "web-time",
240
+ ]
241
+
242
+ [[package]]
243
+ name = "ndarray"
244
+ version = "0.17.2"
245
+ source = "registry+https://github.com/rust-lang/crates.io-index"
246
+ checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d"
247
+ dependencies = [
248
+ "matrixmultiply",
249
+ "num-complex",
250
+ "num-integer",
251
+ "num-traits",
252
+ "portable-atomic",
253
+ "portable-atomic-util",
254
+ "rawpointer",
255
+ ]
256
+
257
+ [[package]]
258
+ name = "num-complex"
259
+ version = "0.4.6"
260
+ source = "registry+https://github.com/rust-lang/crates.io-index"
261
+ checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
262
+ dependencies = [
263
+ "num-traits",
264
+ ]
265
+
266
+ [[package]]
267
+ name = "num-integer"
268
+ version = "0.1.46"
269
+ source = "registry+https://github.com/rust-lang/crates.io-index"
270
+ checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
271
+ dependencies = [
272
+ "num-traits",
273
+ ]
274
+
275
+ [[package]]
276
+ name = "num-traits"
277
+ version = "0.2.19"
278
+ source = "registry+https://github.com/rust-lang/crates.io-index"
279
+ checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
280
+ dependencies = [
281
+ "autocfg",
282
+ ]
283
+
284
+ [[package]]
285
+ name = "num_cpus"
286
+ version = "1.17.0"
287
+ source = "registry+https://github.com/rust-lang/crates.io-index"
288
+ checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
289
+ dependencies = [
290
+ "hermit-abi",
291
+ "libc",
292
+ ]
293
+
294
+ [[package]]
295
+ name = "once_cell"
296
+ version = "1.21.4"
297
+ source = "registry+https://github.com/rust-lang/crates.io-index"
298
+ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
299
+
300
+ [[package]]
301
+ name = "pin-project-lite"
302
+ version = "0.2.17"
303
+ source = "registry+https://github.com/rust-lang/crates.io-index"
304
+ checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
305
+
306
+ [[package]]
307
+ name = "portable-atomic"
308
+ version = "1.13.1"
309
+ source = "registry+https://github.com/rust-lang/crates.io-index"
310
+ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
311
+
312
+ [[package]]
313
+ name = "portable-atomic-util"
314
+ version = "0.2.7"
315
+ source = "registry+https://github.com/rust-lang/crates.io-index"
316
+ checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618"
317
+ dependencies = [
318
+ "portable-atomic",
319
+ ]
320
+
321
+ [[package]]
322
+ name = "proc-macro2"
323
+ version = "1.0.106"
324
+ source = "registry+https://github.com/rust-lang/crates.io-index"
325
+ checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
326
+ dependencies = [
327
+ "unicode-ident",
328
+ ]
329
+
330
+ [[package]]
331
+ name = "pyo3"
332
+ version = "0.23.5"
333
+ source = "registry+https://github.com/rust-lang/crates.io-index"
334
+ checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
335
+ dependencies = [
336
+ "cfg-if",
337
+ "indoc",
338
+ "libc",
339
+ "memoffset",
340
+ "once_cell",
341
+ "portable-atomic",
342
+ "pyo3-build-config",
343
+ "pyo3-ffi",
344
+ "pyo3-macros",
345
+ "unindent",
346
+ ]
347
+
348
+ [[package]]
349
+ name = "pyo3-build-config"
350
+ version = "0.23.5"
351
+ source = "registry+https://github.com/rust-lang/crates.io-index"
352
+ checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb"
353
+ dependencies = [
354
+ "once_cell",
355
+ "target-lexicon",
356
+ ]
357
+
358
+ [[package]]
359
+ name = "pyo3-ffi"
360
+ version = "0.23.5"
361
+ source = "registry+https://github.com/rust-lang/crates.io-index"
362
+ checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d"
363
+ dependencies = [
364
+ "libc",
365
+ "pyo3-build-config",
366
+ ]
367
+
368
+ [[package]]
369
+ name = "pyo3-macros"
370
+ version = "0.23.5"
371
+ source = "registry+https://github.com/rust-lang/crates.io-index"
372
+ checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da"
373
+ dependencies = [
374
+ "proc-macro2",
375
+ "pyo3-macros-backend",
376
+ "quote",
377
+ "syn",
378
+ ]
379
+
380
+ [[package]]
381
+ name = "pyo3-macros-backend"
382
+ version = "0.23.5"
383
+ source = "registry+https://github.com/rust-lang/crates.io-index"
384
+ checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028"
385
+ dependencies = [
386
+ "heck",
387
+ "proc-macro2",
388
+ "pyo3-build-config",
389
+ "quote",
390
+ "syn",
391
+ ]
392
+
393
+ [[package]]
394
+ name = "quote"
395
+ version = "1.0.45"
396
+ source = "registry+https://github.com/rust-lang/crates.io-index"
397
+ checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
398
+ dependencies = [
399
+ "proc-macro2",
400
+ ]
401
+
402
+ [[package]]
403
+ name = "rawpointer"
404
+ version = "0.2.1"
405
+ source = "registry+https://github.com/rust-lang/crates.io-index"
406
+ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
407
+
408
+ [[package]]
409
+ name = "rayon"
410
+ version = "1.12.0"
411
+ source = "registry+https://github.com/rust-lang/crates.io-index"
412
+ checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
413
+ dependencies = [
414
+ "either",
415
+ "rayon-core",
416
+ ]
417
+
418
+ [[package]]
419
+ name = "rayon-core"
420
+ version = "1.13.0"
421
+ source = "registry+https://github.com/rust-lang/crates.io-index"
422
+ checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
423
+ dependencies = [
424
+ "crossbeam-deque",
425
+ "crossbeam-utils",
426
+ ]
427
+
428
+ [[package]]
429
+ name = "rustc-hash"
430
+ version = "2.1.2"
431
+ source = "registry+https://github.com/rust-lang/crates.io-index"
432
+ checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
433
+
434
+ [[package]]
435
+ name = "rustversion"
436
+ version = "1.0.22"
437
+ source = "registry+https://github.com/rust-lang/crates.io-index"
438
+ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
439
+
440
+ [[package]]
441
+ name = "slab"
442
+ version = "0.4.12"
443
+ source = "registry+https://github.com/rust-lang/crates.io-index"
444
+ checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
445
+
446
+ [[package]]
447
+ name = "smallvec"
448
+ version = "1.15.1"
449
+ source = "registry+https://github.com/rust-lang/crates.io-index"
450
+ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
451
+
452
+ [[package]]
453
+ name = "sprs"
454
+ version = "0.11.4"
455
+ source = "registry+https://github.com/rust-lang/crates.io-index"
456
+ checksum = "6dca58a33be2188d4edc71534f8bafa826e787cc28ca1c47f31be3423f0d6e55"
457
+ dependencies = [
458
+ "ndarray",
459
+ "num-complex",
460
+ "num-traits",
461
+ "smallvec",
462
+ ]
463
+
464
+ [[package]]
465
+ name = "symbolic_expressions"
466
+ version = "5.0.3"
467
+ source = "registry+https://github.com/rust-lang/crates.io-index"
468
+ checksum = "7c68d531d83ec6c531150584c42a4290911964d5f0d79132b193b67252a23b71"
469
+
470
+ [[package]]
471
+ name = "symplex-engine"
472
+ version = "1.0.0"
473
+ dependencies = [
474
+ "cudarc",
475
+ "egg",
476
+ "good_lp",
477
+ "iced-x86",
478
+ "lazy_static",
479
+ "libc",
480
+ "num_cpus",
481
+ "pyo3",
482
+ "rayon",
483
+ "rustc-hash",
484
+ ]
485
+
486
+ [[package]]
487
+ name = "syn"
488
+ version = "2.0.117"
489
+ source = "registry+https://github.com/rust-lang/crates.io-index"
490
+ checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
491
+ dependencies = [
492
+ "proc-macro2",
493
+ "quote",
494
+ "unicode-ident",
495
+ ]
496
+
497
+ [[package]]
498
+ name = "target-lexicon"
499
+ version = "0.12.16"
500
+ source = "registry+https://github.com/rust-lang/crates.io-index"
501
+ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
502
+
503
+ [[package]]
504
+ name = "unicode-ident"
505
+ version = "1.0.24"
506
+ source = "registry+https://github.com/rust-lang/crates.io-index"
507
+ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
508
+
509
+ [[package]]
510
+ name = "unindent"
511
+ version = "0.2.4"
512
+ source = "registry+https://github.com/rust-lang/crates.io-index"
513
+ checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
514
+
515
+ [[package]]
516
+ name = "wasm-bindgen"
517
+ version = "0.2.122"
518
+ source = "registry+https://github.com/rust-lang/crates.io-index"
519
+ checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409"
520
+ dependencies = [
521
+ "cfg-if",
522
+ "once_cell",
523
+ "rustversion",
524
+ "wasm-bindgen-macro",
525
+ "wasm-bindgen-shared",
526
+ ]
527
+
528
+ [[package]]
529
+ name = "wasm-bindgen-macro"
530
+ version = "0.2.122"
531
+ source = "registry+https://github.com/rust-lang/crates.io-index"
532
+ checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6"
533
+ dependencies = [
534
+ "quote",
535
+ "wasm-bindgen-macro-support",
536
+ ]
537
+
538
+ [[package]]
539
+ name = "wasm-bindgen-macro-support"
540
+ version = "0.2.122"
541
+ source = "registry+https://github.com/rust-lang/crates.io-index"
542
+ checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e"
543
+ dependencies = [
544
+ "bumpalo",
545
+ "proc-macro2",
546
+ "quote",
547
+ "syn",
548
+ "wasm-bindgen-shared",
549
+ ]
550
+
551
+ [[package]]
552
+ name = "wasm-bindgen-shared"
553
+ version = "0.2.122"
554
+ source = "registry+https://github.com/rust-lang/crates.io-index"
555
+ checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437"
556
+ dependencies = [
557
+ "unicode-ident",
558
+ ]
559
+
560
+ [[package]]
561
+ name = "web-time"
562
+ version = "1.1.0"
563
+ source = "registry+https://github.com/rust-lang/crates.io-index"
564
+ checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
565
+ dependencies = [
566
+ "js-sys",
567
+ "wasm-bindgen",
568
+ ]
569
+
570
+ [[package]]
571
+ name = "windows-link"
572
+ version = "0.2.1"
573
+ source = "registry+https://github.com/rust-lang/crates.io-index"
574
+ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
@@ -1466,9 +1466,293 @@ pub fn simd_reduce_f64(op: u8, data_ptr: usize, n: usize) -> f64 {
1466
1466
  }
1467
1467
  }
1468
1468
 
1469
+ // ── BLIS-Style Cache-Blocked Matmul ──────────────────────────────────────
1470
+ //
1471
+ // Implements a 5-loop BLIS-style algorithm with:
1472
+ // - AVX2 6×16 micro-kernel (12 YMM accumulators, 12 FMAs per k-step)
1473
+ // - B-matrix packing (contiguous access in micro-kernel, eliminates stride)
1474
+ // - A-matrix packing (column-major micro-panels for sequential broadcast)
1475
+ // - L1/L2/L3 cache blocking (MC=64, NC=64, KC=256)
1476
+ // - Rayon parallelism on the outermost (i2) loop
1477
+ //
1478
+ // Packed layouts (critical for micro-kernel performance):
1479
+ // A micro-panel [MR rows × kc cols]: column-major within panel
1480
+ // Layout: a[0,0] a[1,0] ... a[MR-1,0] a[0,1] a[1,1] ... a[MR-1,kc-1]
1481
+ // Stride between k-steps = MR (micro-kernel does a_ptr += MR)
1482
+ //
1483
+ // B micro-panel [kc rows × NR cols]: row-major within panel, padded to NR
1484
+ // Layout: b[0,0] b[0,1] ... b[0,NR-1] b[1,0] ... b[kc-1,NR-1]
1485
+ // Stride between k-steps = NR (micro-kernel does b_ptr += NR)
1486
+
1487
+ /// Micro-kernel row dimension (6 rows accumulated in YMM registers)
1488
+ const BLIS_MR: usize = 6;
1489
+ /// Micro-kernel column dimension (2 YMMs wide = 16 f32)
1490
+ const BLIS_NR: usize = 16;
1491
+ /// L2 cache block: rows of A
1492
+ const BLIS_MC: usize = 128;
1493
+ /// L2 cache block: cols of B
1494
+ const BLIS_NC: usize = 128;
1495
+ /// L2 cache block: shared dimension
1496
+ const BLIS_KC: usize = 128;
1497
+
1498
+ /// AVX2 micro-kernel for MR×NR rank-1 update.
1499
+ ///
1500
+ /// Accumulates into C[i:i+mr, j:j+nr] += A_panel * B_panel
1501
+ /// using 12 YMM accumulators (6 rows × 2 YMM columns = 12 YMMs).
1502
+ ///
1503
+ /// A_packed layout (column-major within panel):
1504
+ /// For each k step k, BLIS_MR consecutive values starting at a_packed + k*BLIS_MR:
1505
+ /// a[0,k], a[1,k], ..., a[BLIS_MR-1,k]
1506
+ ///
1507
+ /// B_packed layout (row-major within panel, b_stride between k-rows):
1508
+ /// For each k step k, BLIS_NR consecutive values starting at b_packed + k*b_stride:
1509
+ /// b[k,0], b[k,1], ..., b[k,BLIS_NR-1]
1510
+ /// b_stride must be >= BLIS_NR (padded so loads are always valid).
1511
+ #[cfg(target_arch = "x86_64")]
1512
+ #[target_feature(enable = "avx2")]
1513
+ unsafe fn micro_kernel_6x16(
1514
+ a_packed: *const f32,
1515
+ b_packed: *const f32,
1516
+ b_stride: usize, // stride between k-rows in packed B
1517
+ c: *mut f32,
1518
+ ldc: usize,
1519
+ kc: usize,
1520
+ mr: usize, // actual mr (may be < BLIS_MR at edges)
1521
+ nr: usize, // actual nr (may be < BLIS_NR at edges)
1522
+ ) {
1523
+ use std::arch::x86_64::*;
1524
+
1525
+ // 12 YMM accumulators: acc[row][col_block]
1526
+ // row 0: acc[0], acc[1] row 1: acc[2], acc[3] row 2: acc[4], acc[5]
1527
+ // row 3: acc[6], acc[7] row 4: acc[8], acc[9] row 5: acc[10], acc[11]
1528
+ let mut acc = [_mm256_setzero_ps(); 12];
1529
+
1530
+ for p in 0..kc {
1531
+ // Load 2 YMM from B row p (16 f32 contiguous, padded so loads are always valid)
1532
+ let b_row = b_packed.add(p * b_stride);
1533
+ let b0 = _mm256_loadu_ps(b_row);
1534
+ let b1 = _mm256_loadu_ps(b_row.add(8));
1535
+
1536
+ // Broadcast each A row value and do 12 FMAs
1537
+ // A is stored column-major: a_packed + p*BLIS_MR has all MR row values for this k
1538
+ let a_row = a_packed.add(p * BLIS_MR);
1539
+ for row in 0..BLIS_MR {
1540
+ let a_val = _mm256_broadcast_ss(&*a_row.add(row));
1541
+ acc[row * 2] = _mm256_fmadd_ps(a_val, b0, acc[row * 2]);
1542
+ acc[row * 2 + 1] = _mm256_fmadd_ps(a_val, b1, acc[row * 2 + 1]);
1543
+ }
1544
+ }
1545
+
1546
+ // Store accumulators to C with stride ldc
1547
+ for row in 0..mr {
1548
+ let c_row = c.add(row * ldc);
1549
+ if nr >= 16 {
1550
+ _mm256_storeu_ps(c_row, acc[row * 2]);
1551
+ _mm256_storeu_ps(c_row.add(8), acc[row * 2 + 1]);
1552
+ } else if nr >= 8 {
1553
+ _mm256_storeu_ps(c_row, acc[row * 2]);
1554
+ if nr > 8 {
1555
+ // Partial second YMM: extract and store only the valid elements
1556
+ let acc1_arr: [f32; 8] = std::mem::transmute(acc[row * 2 + 1]);
1557
+ for j in 8..nr {
1558
+ *c_row.add(j) = acc1_arr[j - 8];
1559
+ }
1560
+ }
1561
+ } else {
1562
+ // nr < 8: store element by element from first YMM
1563
+ let acc0_arr: [f32; 8] = std::mem::transmute(acc[row * 2]);
1564
+ for j in 0..nr {
1565
+ *c_row.add(j) = acc0_arr[j];
1566
+ }
1567
+ }
1568
+ }
1569
+ }
1570
+
1571
+ /// Core BLIS-style blocked matmul body (shared between serial and parallel).
1572
+ /// Processes a single MC-row block starting at row i2.
1573
+ /// Uses raw pointers for C to allow sharing across rayon threads safely.
1574
+ fn blis_process_block(
1575
+ a: &[f32],
1576
+ b: &[f32],
1577
+ c_ptr: *mut f32,
1578
+ _m: usize,
1579
+ n: usize,
1580
+ k_dim: usize, // renamed from `k` to avoid conflict with iced-x86's k2 register
1581
+ i2: usize,
1582
+ mc: usize,
1583
+ packed_b: &mut [f32],
1584
+ packed_a: &mut [f32],
1585
+ ) {
1586
+ let has_avx2 = is_x86_feature_detected!("avx2");
1587
+
1588
+ // packed_b layout: [kc][BLIS_NC] with stride BLIS_NC (>= BLIS_NR, multiple of BLIS_NR)
1589
+ // BLIS_NC = 64 which is a multiple of BLIS_NR = 16, so loads at j1 offsets are safe
1590
+ let b_stride = BLIS_NC;
1591
+
1592
+ for j2 in (0..n).step_by(BLIS_NC) {
1593
+ let nc = std::cmp::min(BLIS_NC, n - j2);
1594
+
1595
+ for kk2 in (0..k_dim).step_by(BLIS_KC) {
1596
+ let kc = std::cmp::min(BLIS_KC, k_dim - kk2);
1597
+
1598
+ // Pack B[kk2:kk2+kc, j2:j2+nc] with stride b_stride = BLIS_NC
1599
+ // Layout: [kc][BLIS_NC] row-major, zero-padded beyond nc
1600
+ for p in 0..kc {
1601
+ let k_idx = kk2 + p;
1602
+ let row_start = p * b_stride;
1603
+ // Copy actual data
1604
+ for jj in 0..nc {
1605
+ packed_b[row_start + jj] = b[k_idx * n + j2 + jj];
1606
+ }
1607
+ // Zero-pad remainder (only if nc < BLIS_NC)
1608
+ if nc < BLIS_NC {
1609
+ for jj in nc..BLIS_NC {
1610
+ packed_b[row_start + jj] = 0.0f32;
1611
+ }
1612
+ }
1613
+ }
1614
+
1615
+ // Pack A[i2:i2+mc, kk2:kk2+kc] in column-major order within micro-panels
1616
+ // Layout within micro-panel (at offset i1*kc in packed_a):
1617
+ // for each k step p: BLIS_MR consecutive values
1618
+ // packed_a[i1*kc + p*BLIS_MR + row] = A[i2+i1+row, kk2+p]
1619
+ for i1 in (0..mc).step_by(BLIS_MR) {
1620
+ let mr = std::cmp::min(BLIS_MR, mc - i1);
1621
+ let panel_base = i1 * kc;
1622
+ for p in 0..kc {
1623
+ let col_start = panel_base + p * BLIS_MR;
1624
+ // Copy actual rows
1625
+ for row in 0..mr {
1626
+ let i_idx = i2 + i1 + row;
1627
+ packed_a[col_start + row] = a[i_idx * k_dim + kk2 + p];
1628
+ }
1629
+ // Zero-pad remainder (only if mr < BLIS_MR)
1630
+ if mr < BLIS_MR {
1631
+ for row in mr..BLIS_MR {
1632
+ packed_a[col_start + row] = 0.0f32;
1633
+ }
1634
+ }
1635
+ }
1636
+ }
1637
+
1638
+ // Micro-kernel loops
1639
+ for i1 in (0..mc).step_by(BLIS_MR) {
1640
+ let mr = std::cmp::min(BLIS_MR, mc - i1);
1641
+
1642
+ for j1 in (0..nc).step_by(BLIS_NR) {
1643
+ let nr = std::cmp::min(BLIS_NR, nc - j1);
1644
+
1645
+ let a_panel_ptr = packed_a.as_ptr().wrapping_add(i1 * kc);
1646
+ // B micro-panel starts at column j1 within the packed NC-wide panel
1647
+ let b_panel_ptr = packed_b.as_ptr().wrapping_add(j1);
1648
+ let c_row_ptr = unsafe { c_ptr.add((i2 + i1) * n + j2 + j1) };
1649
+
1650
+ if has_avx2 {
1651
+ unsafe {
1652
+ micro_kernel_6x16(
1653
+ a_panel_ptr,
1654
+ b_panel_ptr,
1655
+ b_stride, // stride between k-rows in packed B
1656
+ c_row_ptr,
1657
+ n, // ldc
1658
+ kc,
1659
+ mr,
1660
+ nr,
1661
+ );
1662
+ }
1663
+ } else {
1664
+ // Scalar fallback
1665
+ for i in 0..mr {
1666
+ for j in 0..nr {
1667
+ let mut sum = 0.0f32;
1668
+ for p in 0..kc {
1669
+ let a_val = packed_a[i1 * kc + p * BLIS_MR + i];
1670
+ let b_val = packed_b[p * b_stride + j1 + j];
1671
+ sum += a_val * b_val;
1672
+ }
1673
+ unsafe {
1674
+ *c_ptr.add((i2 + i1 + i) * n + j2 + j1 + j) += sum;
1675
+ }
1676
+ }
1677
+ }
1678
+ }
1679
+ }
1680
+ }
1681
+ }
1682
+ }
1683
+ }
1684
+
1685
+ /// BLIS-style cache-blocked matmul (serial version).
1686
+ pub fn cache_blocked_matmul(a: &[f32], b: &[f32], c: &mut [f32], m: usize, n: usize, k: usize) {
1687
+ if m == 0 || n == 0 || k == 0 { return; }
1688
+
1689
+ // Pre-allocate packing buffers (reused across loops)
1690
+ // A pack needs extra space for zero-padding: round up MC to next MR multiple
1691
+ let a_pack_rows = ((BLIS_MC + BLIS_MR - 1) / BLIS_MR) * BLIS_MR;
1692
+ let b_pack_size = BLIS_KC * BLIS_NC; // B panel [kc][BLIS_NC]
1693
+ let a_pack_size = a_pack_rows * BLIS_KC; // A panel (rounded mc × kc)
1694
+ let mut packed_b = vec![0.0f32; b_pack_size];
1695
+ let mut packed_a = vec![0.0f32; a_pack_size];
1696
+
1697
+ // Zero C
1698
+ for val in c.iter_mut().take(m * n) {
1699
+ *val = 0.0f32;
1700
+ }
1701
+
1702
+ let c_ptr = c.as_mut_ptr();
1703
+
1704
+ // Process all MC-row blocks serially
1705
+ for i2 in (0..m).step_by(BLIS_MC) {
1706
+ let mc = std::cmp::min(BLIS_MC, m - i2);
1707
+ blis_process_block(a, b, c_ptr, m, n, k, i2, mc, &mut packed_b, &mut packed_a);
1708
+ }
1709
+ }
1710
+
1711
+ /// Parallel version of cache_blocked_matmul using rayon.
1712
+ /// Parallelizes over the outermost loop (MC-row blocks).
1713
+ pub fn parallel_cache_blocked_matmul(a: &[f32], b: &[f32], c: &mut [f32], m: usize, n: usize, k: usize) {
1714
+ use rayon::prelude::*;
1715
+ if m == 0 || n == 0 || k == 0 { return; }
1716
+
1717
+ // Zero C first
1718
+ for val in c.iter_mut().take(m * n) {
1719
+ *val = 0.0f32;
1720
+ }
1721
+
1722
+ let num_blocks = (m + BLIS_MC - 1) / BLIS_MC;
1723
+
1724
+ // Convert pointer to usize for safe Send+Sync capture across rayon threads.
1725
+ // Safety: each thread writes to disjoint rows of C (different i2 blocks).
1726
+ let c_addr = c.as_mut_ptr() as usize;
1727
+
1728
+ // Process each MC-row block in parallel (each thread gets its own packing buffers)
1729
+ (0..num_blocks).into_par_iter().for_each(|block_idx| {
1730
+ let i2 = block_idx * BLIS_MC;
1731
+ let mc = std::cmp::min(BLIS_MC, m - i2);
1732
+ let c_ptr = c_addr as *mut f32;
1733
+
1734
+ // Per-thread packing buffers
1735
+ let a_pack_rows = ((BLIS_MC + BLIS_MR - 1) / BLIS_MR) * BLIS_MR;
1736
+ let b_pack_size = BLIS_KC * BLIS_NC;
1737
+ let a_pack_size = a_pack_rows * BLIS_KC;
1738
+ let mut packed_b = vec![0.0f32; b_pack_size];
1739
+ let mut packed_a = vec![0.0f32; a_pack_size];
1740
+
1741
+ blis_process_block(a, b, c_ptr, m, n, k, i2, mc, &mut packed_b, &mut packed_a);
1742
+ });
1743
+ }
1744
+
1469
1745
  pub fn parallel_matmul(a: &[f32], b: &[f32], c: &mut [f32], m: usize, n: usize, k: usize) {
1470
1746
  use rayon::prelude::*;
1471
1747
  if m == 0 || n == 0 || k == 0 { return; }
1748
+
1749
+ // Use cache-blocked matmul when AVX2 is available for significant speedup
1750
+ if is_x86_feature_detected!("avx2") {
1751
+ parallel_cache_blocked_matmul(a, b, c, m, n, k);
1752
+ return;
1753
+ }
1754
+
1755
+ // Scalar fallback
1472
1756
  c.par_chunks_mut(n).enumerate().for_each(|(i, c_row)| {
1473
1757
  c_row.fill(0.0f32);
1474
1758
  let a_row = &a[i * k..(i + 1) * k];
@@ -1482,6 +1766,8 @@ pub fn parallel_matmul(a: &[f32], b: &[f32], c: &mut [f32], m: usize, n: usize,
1482
1766
 
1483
1767
  pub fn jit_parallel_matmul(a: &[f32], b: &[f32], c: &mut [f32], m: usize, n: usize, k: usize) {
1484
1768
  if m == 0 || n == 0 || k == 0 { return; }
1769
+
1770
+ // Use JIT AVX-512 kernel for large matrices where it excels
1485
1771
  let isa = detect_isa_level();
1486
1772
  if isa == ISALevel::AVX512 && n >= 64 {
1487
1773
  let kernel = CompiledKernel::compile_matmul_avx512(m, n, k);
@@ -1490,6 +1776,13 @@ pub fn jit_parallel_matmul(a: &[f32], b: &[f32], c: &mut [f32], m: usize, n: usi
1490
1776
  return;
1491
1777
  }
1492
1778
  }
1779
+
1780
+ // Use cache-blocked matmul when AVX2 is available
1781
+ if is_x86_feature_detected!("avx2") {
1782
+ parallel_cache_blocked_matmul(a, b, c, m, n, k);
1783
+ return;
1784
+ }
1785
+
1493
1786
  parallel_matmul(a, b, c, m, n, k);
1494
1787
  }
1495
1788
 
File without changes