aneforge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. aneforge-0.1.0/.githooks/pre-commit +35 -0
  2. aneforge-0.1.0/.github/workflows/ci.yml +103 -0
  3. aneforge-0.1.0/.github/workflows/release.yml +70 -0
  4. aneforge-0.1.0/.gitignore +111 -0
  5. aneforge-0.1.0/.readthedocs.yaml +15 -0
  6. aneforge-0.1.0/CITATION.cff +12 -0
  7. aneforge-0.1.0/CONTRIBUTING.md +53 -0
  8. aneforge-0.1.0/LICENSE +31 -0
  9. aneforge-0.1.0/PKG-INFO +220 -0
  10. aneforge-0.1.0/README.md +183 -0
  11. aneforge-0.1.0/SECURITY.md +50 -0
  12. aneforge-0.1.0/aneforge/__init__.py +136 -0
  13. aneforge-0.1.0/aneforge/_blob.py +112 -0
  14. aneforge-0.1.0/aneforge/_bridges/__init__.py +3 -0
  15. aneforge-0.1.0/aneforge/_bridges/_netplist.py +3241 -0
  16. aneforge-0.1.0/aneforge/_bridges/ane_cost_volume_fused.py +97 -0
  17. aneforge-0.1.0/aneforge/_bridges/ane_cross_correlation_fused.py +88 -0
  18. aneforge-0.1.0/aneforge/_bridges/ane_cross_product_fused.py +71 -0
  19. aneforge-0.1.0/aneforge/_bridges/ane_dynamic_slice_fused.py +65 -0
  20. aneforge-0.1.0/aneforge/_bridges/ane_fps_fused.py +93 -0
  21. aneforge-0.1.0/aneforge/_bridges/ane_input_view_fused.py +69 -0
  22. aneforge-0.1.0/aneforge/_bridges/ane_radius_search_fused.py +91 -0
  23. aneforge-0.1.0/aneforge/_bridges/ane_rank_fused.py +255 -0
  24. aneforge-0.1.0/aneforge/_bridges/ane_rearrange_fused.py +315 -0
  25. aneforge-0.1.0/aneforge/_bridges/ane_sdpa_fused.py +261 -0
  26. aneforge-0.1.0/aneforge/_bridges/ane_structural_fused.py +111 -0
  27. aneforge-0.1.0/aneforge/_bridges/lrn_fused.py +137 -0
  28. aneforge-0.1.0/aneforge/_bridges/minmax_norm_fused.py +136 -0
  29. aneforge-0.1.0/aneforge/_bridges/scaled_elementwise_fused.py +125 -0
  30. aneforge-0.1.0/aneforge/_capabilities.py +1033 -0
  31. aneforge-0.1.0/aneforge/_circuit.py +76 -0
  32. aneforge-0.1.0/aneforge/_compile.py +1735 -0
  33. aneforge-0.1.0/aneforge/_cost.py +859 -0
  34. aneforge-0.1.0/aneforge/_invokers/README.md +22 -0
  35. aneforge-0.1.0/aneforge/_invokers/layer_invoker.mm +562 -0
  36. aneforge-0.1.0/aneforge/_invokers/persistent_worker.mm +488 -0
  37. aneforge-0.1.0/aneforge/_invokers/rank_invoker.mm +406 -0
  38. aneforge-0.1.0/aneforge/_invokers/sdpa_invoker.mm +520 -0
  39. aneforge-0.1.0/aneforge/_lib/ane_e5rt_dispatch.mm +1026 -0
  40. aneforge-0.1.0/aneforge/_lib/build.sh +12 -0
  41. aneforge-0.1.0/aneforge/_lib/e5rt_api.h +133 -0
  42. aneforge-0.1.0/aneforge/_netplist_worker.py +326 -0
  43. aneforge-0.1.0/aneforge/_op_catalog.py +272 -0
  44. aneforge-0.1.0/aneforge/_optimize.py +886 -0
  45. aneforge-0.1.0/aneforge/_paired.py +223 -0
  46. aneforge-0.1.0/aneforge/_rewrite.py +333 -0
  47. aneforge-0.1.0/aneforge/_runtime.py +347 -0
  48. aneforge-0.1.0/aneforge/_targets.py +462 -0
  49. aneforge-0.1.0/aneforge/ane_cost_model.json +1954 -0
  50. aneforge-0.1.0/aneforge/autograd.py +1444 -0
  51. aneforge-0.1.0/aneforge/build.py +81 -0
  52. aneforge-0.1.0/aneforge/costmodel_curves.json +1482 -0
  53. aneforge-0.1.0/aneforge/dsp.py +650 -0
  54. aneforge-0.1.0/aneforge/einsum.py +385 -0
  55. aneforge-0.1.0/aneforge/fft.py +590 -0
  56. aneforge-0.1.0/aneforge/full_mil_vocabulary_sweep.json +1662 -0
  57. aneforge-0.1.0/aneforge/graph.py +1193 -0
  58. aneforge-0.1.0/aneforge/linalg.py +1060 -0
  59. aneforge-0.1.0/aneforge/models.py +238 -0
  60. aneforge-0.1.0/aneforge/special.py +440 -0
  61. aneforge-0.1.0/aneforge/streaming.py +112 -0
  62. aneforge-0.1.0/bench/README.md +31 -0
  63. aneforge-0.1.0/bench/below_ridge_fusion.py +229 -0
  64. aneforge-0.1.0/bench/compress_speedup_bench.py +112 -0
  65. aneforge-0.1.0/bench/cross_path_compress_bench.py +119 -0
  66. aneforge-0.1.0/bench/decode_int8_accuracy.py +117 -0
  67. aneforge-0.1.0/bench/decode_measurement.py +511 -0
  68. aneforge-0.1.0/bench/device_bandwidth_roofline.py +513 -0
  69. aneforge-0.1.0/bench/device_compare.py +640 -0
  70. aneforge-0.1.0/bench/device_compare_wattcomplete.py +810 -0
  71. aneforge-0.1.0/bench/device_saturation_sweep.py +428 -0
  72. aneforge-0.1.0/bench/device_serving_sweep.py +564 -0
  73. aneforge-0.1.0/bench/encoder_serving_crosspath.py +316 -0
  74. aneforge-0.1.0/bench/fused_gpu_baseline.py +403 -0
  75. aneforge-0.1.0/bench/gemv_bandwidth_sweep.py +271 -0
  76. aneforge-0.1.0/bench/model_int4_bench.py +312 -0
  77. aneforge-0.1.0/bench/real_models_fp16.py +219 -0
  78. aneforge-0.1.0/bench/results/below_ridge_fusion.json +94 -0
  79. aneforge-0.1.0/bench/results/compress_speedup_bench.json +503 -0
  80. aneforge-0.1.0/bench/results/cross_path_compress_bench.json +152 -0
  81. aneforge-0.1.0/bench/results/decode_int8_accuracy.json +32 -0
  82. aneforge-0.1.0/bench/results/decode_measurement_results.json +485 -0
  83. aneforge-0.1.0/bench/results/device_bandwidth_roofline_results.json +1410 -0
  84. aneforge-0.1.0/bench/results/device_compare_wattcomplete_results.json +1124 -0
  85. aneforge-0.1.0/bench/results/device_compare_wattcomplete_results_M1.json +964 -0
  86. aneforge-0.1.0/bench/results/device_compare_wattcomplete_results_M2.json +1124 -0
  87. aneforge-0.1.0/bench/results/device_compare_wattcomplete_results_M5.json +1124 -0
  88. aneforge-0.1.0/bench/results/device_saturation_sweep_results.json +708 -0
  89. aneforge-0.1.0/bench/results/device_serving_sweep_results.json +1838 -0
  90. aneforge-0.1.0/bench/results/encoder_serving_crosspath.json +245 -0
  91. aneforge-0.1.0/bench/results/fused_gpu_baseline_results.json +301 -0
  92. aneforge-0.1.0/bench/results/gemv_bandwidth_sweep_results.json +364 -0
  93. aneforge-0.1.0/bench/results/model_int4_bench.json +158 -0
  94. aneforge-0.1.0/bench/results/real_models_fp16_results.json +89 -0
  95. aneforge-0.1.0/bench/results/roofline_analysis_results.json +615 -0
  96. aneforge-0.1.0/bench/roofline_analysis.py +611 -0
  97. aneforge-0.1.0/docs/aneforge-api.md +322 -0
  98. aneforge-0.1.0/docs/api/compile.md +25 -0
  99. aneforge-0.1.0/docs/api/graph.md +6 -0
  100. aneforge-0.1.0/docs/api/index.md +12 -0
  101. aneforge-0.1.0/docs/api/math.md +25 -0
  102. aneforge-0.1.0/docs/api/models.md +5 -0
  103. aneforge-0.1.0/docs/api/training.md +10 -0
  104. aneforge-0.1.0/docs/assets/demo.png +0 -0
  105. aneforge-0.1.0/docs/assets/demo.tape +35 -0
  106. aneforge-0.1.0/docs/assets/fluid_vorticity.png +0 -0
  107. aneforge-0.1.0/docs/capabilities.json +2174 -0
  108. aneforge-0.1.0/docs/capabilities.md +283 -0
  109. aneforge-0.1.0/docs/cross-chip.md +258 -0
  110. aneforge-0.1.0/docs/development.md +104 -0
  111. aneforge-0.1.0/docs/dispatch.md +161 -0
  112. aneforge-0.1.0/docs/e5rt-dispatch-reference.md +335 -0
  113. aneforge-0.1.0/docs/faq.md +259 -0
  114. aneforge-0.1.0/docs/gen_op_catalog.py +60 -0
  115. aneforge-0.1.0/docs/getting-started.md +131 -0
  116. aneforge-0.1.0/docs/glossary.md +203 -0
  117. aneforge-0.1.0/docs/index.md +56 -0
  118. aneforge-0.1.0/docs/mil-primer.md +317 -0
  119. aneforge-0.1.0/docs/op-catalog.md +248 -0
  120. aneforge-0.1.0/docs/reproducibility.md +88 -0
  121. aneforge-0.1.0/docs/requirements.txt +6 -0
  122. aneforge-0.1.0/docs/roadmap.md +303 -0
  123. aneforge-0.1.0/docs/stylesheets/extra.css +15 -0
  124. aneforge-0.1.0/docs/training.md +353 -0
  125. aneforge-0.1.0/examples/README.md +111 -0
  126. aneforge-0.1.0/examples/_common.py +79 -0
  127. aneforge-0.1.0/examples/autotune.py +271 -0
  128. aneforge-0.1.0/examples/benchmarks/README.md +12 -0
  129. aneforge-0.1.0/examples/benchmarks/bench_encoder_batched.py +102 -0
  130. aneforge-0.1.0/examples/benchmarks/bench_encoder_gpu.py +66 -0
  131. aneforge-0.1.0/examples/benchmarks/rank_worker_bench.py +98 -0
  132. aneforge-0.1.0/examples/benchmarks/sdpa_worker_bench.py +93 -0
  133. aneforge-0.1.0/examples/benchmarks/topk_worker_bench.py +95 -0
  134. aneforge-0.1.0/examples/cifar_data.py +44 -0
  135. aneforge-0.1.0/examples/compress_weights.py +145 -0
  136. aneforge-0.1.0/examples/data/mnist_subset.npz +0 -0
  137. aneforge-0.1.0/examples/demo.py +153 -0
  138. aneforge-0.1.0/examples/demos/README.md +71 -0
  139. aneforge-0.1.0/examples/demos/ane_vs_gpu_cpu.py +63 -0
  140. aneforge-0.1.0/examples/demos/batching_amortization.py +42 -0
  141. aneforge-0.1.0/examples/demos/capability_surface.py +40 -0
  142. aneforge-0.1.0/examples/demos/chaining_depth.py +49 -0
  143. aneforge-0.1.0/examples/demos/cross_chip_cost_model.py +53 -0
  144. aneforge-0.1.0/examples/demos/dispatch_no_coreml.py +46 -0
  145. aneforge-0.1.0/examples/demos/entitlement_boundary.py +39 -0
  146. aneforge-0.1.0/examples/demos/execution_model_floor.py +46 -0
  147. aneforge-0.1.0/examples/demos/hidden_layers.py +51 -0
  148. aneforge-0.1.0/examples/demos/llm_attention_kvcache.py +59 -0
  149. aneforge-0.1.0/examples/demos/mil_dialect.py +37 -0
  150. aneforge-0.1.0/examples/demos/numerical_scientific.py +47 -0
  151. aneforge-0.1.0/examples/demos/numerics_fp16.py +49 -0
  152. aneforge-0.1.0/examples/demos/optimization_autotune.py +53 -0
  153. aneforge-0.1.0/examples/demos/pitfalls_limits.py +53 -0
  154. aneforge-0.1.0/examples/demos/power_efficiency.py +53 -0
  155. aneforge-0.1.0/examples/demos/resident_state.py +44 -0
  156. aneforge-0.1.0/examples/demos/roofline_bandwidth.py +44 -0
  157. aneforge-0.1.0/examples/demos/roofline_compute.py +45 -0
  158. aneforge-0.1.0/examples/demos/single_in_flight.py +55 -0
  159. aneforge-0.1.0/examples/demos/training_on_ane.py +55 -0
  160. aneforge-0.1.0/examples/demos/vision_conv_encoder.py +63 -0
  161. aneforge-0.1.0/examples/demos/weights_compression.py +54 -0
  162. aneforge-0.1.0/examples/demos/what_the_ane_is.py +41 -0
  163. aneforge-0.1.0/examples/demos/zero_copy_io.py +56 -0
  164. aneforge-0.1.0/examples/eigenvalues_svd.py +66 -0
  165. aneforge-0.1.0/examples/factorize.py +39 -0
  166. aneforge-0.1.0/examples/fft.py +37 -0
  167. aneforge-0.1.0/examples/fluid_vorticity.py +287 -0
  168. aneforge-0.1.0/examples/gpt_generate_ane.py +168 -0
  169. aneforge-0.1.0/examples/gpt_multilayer_resident.py +116 -0
  170. aneforge-0.1.0/examples/heat_equation.py +142 -0
  171. aneforge-0.1.0/examples/llama_block_causal.py +79 -0
  172. aneforge-0.1.0/examples/make_mnist_subset.py +63 -0
  173. aneforge-0.1.0/examples/native_geometry.py +78 -0
  174. aneforge-0.1.0/examples/native_norms.py +59 -0
  175. aneforge-0.1.0/examples/native_pixel_ops.py +126 -0
  176. aneforge-0.1.0/examples/native_ranking.py +51 -0
  177. aneforge-0.1.0/examples/nbody.py +109 -0
  178. aneforge-0.1.0/examples/paired_fp16.py +244 -0
  179. aneforge-0.1.0/examples/pointcloud.py +88 -0
  180. aneforge-0.1.0/examples/poisson_spectral.py +101 -0
  181. aneforge-0.1.0/examples/quickstart.py +92 -0
  182. aneforge-0.1.0/examples/resnet18.py +38 -0
  183. aneforge-0.1.0/examples/sd15.py +504 -0
  184. aneforge-0.1.0/examples/sd_unet.py +105 -0
  185. aneforge-0.1.0/examples/sd_vae.py +98 -0
  186. aneforge-0.1.0/examples/sdpa.py +65 -0
  187. aneforge-0.1.0/examples/sentence_embeddings.py +61 -0
  188. aneforge-0.1.0/examples/solve_linear_systems.py +40 -0
  189. aneforge-0.1.0/examples/spectral_analysis.py +142 -0
  190. aneforge-0.1.0/examples/superres_espcn.py +82 -0
  191. aneforge-0.1.0/examples/train_charlm.py +110 -0
  192. aneforge-0.1.0/examples/train_charlm_corpus.py +120 -0
  193. aneforge-0.1.0/examples/train_charlm_deep.py +137 -0
  194. aneforge-0.1.0/examples/train_cifar_cnn.py +116 -0
  195. aneforge-0.1.0/examples/train_llama_block.py +64 -0
  196. aneforge-0.1.0/examples/train_mnist_cnn.py +67 -0
  197. aneforge-0.1.0/examples/train_mnist_mlp.py +65 -0
  198. aneforge-0.1.0/examples/train_transformer.py +105 -0
  199. aneforge-0.1.0/examples/train_transformer_prenorm.py +66 -0
  200. aneforge-0.1.0/examples/vit.py +259 -0
  201. aneforge-0.1.0/mkdocs.yml +100 -0
  202. aneforge-0.1.0/pyproject.toml +110 -0
  203. aneforge-0.1.0/scripts/reproduce.sh +126 -0
  204. aneforge-0.1.0/tests/_corpus.py +162 -0
  205. aneforge-0.1.0/tests/conftest.py +23 -0
  206. aneforge-0.1.0/tests/op_smoketest.py +99 -0
  207. aneforge-0.1.0/tests/run_corpus.py +62 -0
  208. aneforge-0.1.0/tests/test_autograd.py +1349 -0
  209. aneforge-0.1.0/tests/test_blas.py +347 -0
  210. aneforge-0.1.0/tests/test_broad.py +155 -0
  211. aneforge-0.1.0/tests/test_builder_guards.py +55 -0
  212. aneforge-0.1.0/tests/test_compile_breaker.py +114 -0
  213. aneforge-0.1.0/tests/test_compile_targets.py +105 -0
  214. aneforge-0.1.0/tests/test_compress.py +528 -0
  215. aneforge-0.1.0/tests/test_conv_int8.py +139 -0
  216. aneforge-0.1.0/tests/test_corners.py +280 -0
  217. aneforge-0.1.0/tests/test_cost_model_analytic.py +208 -0
  218. aneforge-0.1.0/tests/test_cross_chip_ops.py +130 -0
  219. aneforge-0.1.0/tests/test_cross_compile.py +81 -0
  220. aneforge-0.1.0/tests/test_cross_compile_matrix.py +68 -0
  221. aneforge-0.1.0/tests/test_decoder_block.py +119 -0
  222. aneforge-0.1.0/tests/test_dispatch_floor_warning.py +41 -0
  223. aneforge-0.1.0/tests/test_dynamic_conv.py +66 -0
  224. aneforge-0.1.0/tests/test_fft2.py +80 -0
  225. aneforge-0.1.0/tests/test_fp16_cross_chip.py +120 -0
  226. aneforge-0.1.0/tests/test_group_norm_tiling.py +83 -0
  227. aneforge-0.1.0/tests/test_image_input.py +137 -0
  228. aneforge-0.1.0/tests/test_lapack.py +205 -0
  229. aneforge-0.1.0/tests/test_linalg.py +231 -0
  230. aneforge-0.1.0/tests/test_multilayer_resident.py +14 -0
  231. aneforge-0.1.0/tests/test_new_ops.py +119 -0
  232. aneforge-0.1.0/tests/test_nn_blocks.py +446 -0
  233. aneforge-0.1.0/tests/test_numerical.py +707 -0
  234. aneforge-0.1.0/tests/test_op_catalog.py +40 -0
  235. aneforge-0.1.0/tests/test_op_coverage.py +169 -0
  236. aneforge-0.1.0/tests/test_pde_ode.py +866 -0
  237. aneforge-0.1.0/tests/test_routes.py +172 -0
  238. aneforge-0.1.0/tests/test_sdpa_causal.py +80 -0
  239. aneforge-0.1.0/tests/test_shapes.py +152 -0
  240. aneforge-0.1.0/tests/test_special_trig.py +62 -0
  241. aneforge-0.1.0/tests/test_spectral_sci.py +656 -0
  242. aneforge-0.1.0/tests/test_streaming.py +104 -0
  243. aneforge-0.1.0/tests/test_synthetic.py +361 -0
  244. aneforge-0.1.0/tests/test_targets.py +268 -0
  245. aneforge-0.1.0/tests/test_train_cifar.py +140 -0
  246. aneforge-0.1.0/tests/test_tune_guards.py +230 -0
  247. aneforge-0.1.0/tests/test_vjp_sweep.py +69 -0
  248. aneforge-0.1.0/tests/test_zero_copy_io.py +54 -0
@@ -0,0 +1,35 @@
1
+ #!/bin/sh
2
+ # Pre-commit gate for ANEForge: the off-hardware CI checks, run before every commit so
3
+ # broken code cannot land. Mirrors .github/workflows/ci.yml's lint + build jobs:
4
+ # 1. ruff check (lint)
5
+ # 2. compileall aneforge (every module byte-compiles)
6
+ # The on-device corpus (tests/run_corpus.py) and pytest suites need ANE hardware, so they
7
+ # are NOT run here (they gate separately; see docs development.md).
8
+ #
9
+ # Enable once per clone (git does not auto-trust committed hooks):
10
+ # git config core.hooksPath .githooks
11
+ # Bypass a single commit with: git commit --no-verify
12
+ set -e
13
+ cd "$(git rev-parse --show-toplevel)"
14
+
15
+ if [ -x .venv/bin/ruff ]; then RUFF=.venv/bin/ruff
16
+ elif command -v ruff >/dev/null 2>&1; then RUFF=ruff
17
+ else echo "pre-commit: ruff not found - run: pip install -e '.[dev]'" >&2; exit 1; fi
18
+
19
+ if [ -x .venv/bin/python ]; then PY=.venv/bin/python
20
+ elif command -v python3 >/dev/null 2>&1; then PY=python3
21
+ else echo "pre-commit: python3 not found" >&2; exit 1; fi
22
+
23
+ echo "pre-commit: ruff check"
24
+ if ! "$RUFF" check; then
25
+ echo "pre-commit: BLOCKED - ruff check failed. Fix (or '$RUFF check --fix'), or bypass with 'git commit --no-verify'." >&2
26
+ exit 1
27
+ fi
28
+
29
+ echo "pre-commit: compileall aneforge"
30
+ if ! "$PY" -m compileall -q aneforge; then
31
+ echo "pre-commit: BLOCKED - a module in aneforge/ failed to byte-compile." >&2
32
+ exit 1
33
+ fi
34
+
35
+ echo "pre-commit: OK"
@@ -0,0 +1,103 @@
1
+ name: CI
2
+
3
+ # ANEForge dispatches to the Apple Neural Engine, which GitHub's hosted runners cannot
4
+ # reach: their macOS images are Apple Silicon but virtualized, and the ANE is not passed
5
+ # through to the guest (the same reason CoreML falls back to CPU in a VM). So the on-device
6
+ # correctness corpus (tests/run_corpus.py) is run by hand on a Mac with an ANE, e.g.
7
+ # `bash scripts/reproduce.sh gates`. This workflow covers everything CI *can* verify:
8
+ # lint, packaging, a multi-version compile + import matrix, the hardware-free unit tests,
9
+ # the documentation build, and — on a hosted macOS runner — that the Objective-C++
10
+ # dispatch dylib actually compiles (which the Linux jobs cannot do).
11
+
12
+ on:
13
+ push:
14
+ branches: [main]
15
+ pull_request:
16
+
17
+ concurrency:
18
+ group: ci-${{ github.ref }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ lint:
23
+ name: lint (ruff)
24
+ runs-on: ubuntu-latest
25
+ steps:
26
+ - uses: actions/checkout@v6
27
+ - uses: actions/setup-python@v6
28
+ with:
29
+ python-version: "3.12"
30
+ - run: pip install ruff
31
+ - run: ruff check
32
+
33
+ build:
34
+ name: package (build + twine)
35
+ runs-on: ubuntu-latest
36
+ steps:
37
+ - uses: actions/checkout@v6
38
+ - uses: actions/setup-python@v6
39
+ with:
40
+ python-version: "3.12"
41
+ - run: pip install build twine
42
+ - run: python -m build
43
+ - run: twine check dist/*
44
+
45
+ compile-import:
46
+ name: compile + import (py ${{ matrix.python }})
47
+ runs-on: ubuntu-latest
48
+ strategy:
49
+ fail-fast: false
50
+ matrix:
51
+ python: ["3.10", "3.11", "3.12", "3.13"]
52
+ steps:
53
+ - uses: actions/checkout@v6
54
+ - uses: actions/setup-python@v6
55
+ with:
56
+ python-version: ${{ matrix.python }}
57
+ # Every module byte-compiles (no import, so no dylib/ANE needed).
58
+ - run: python -m compileall -q aneforge
59
+ # The package imports with only NumPy and builds a graph; compile/dispatch is lazy
60
+ # and needs the ANE, so it is exercised on-device, not here.
61
+ - run: pip install -e .
62
+ - run: python -c "import aneforge as af; print('aneforge', af.__version__); af.input((1, 3, 8, 8))"
63
+
64
+ unit-tests:
65
+ name: off-device unit tests
66
+ runs-on: ubuntu-latest
67
+ steps:
68
+ - uses: actions/checkout@v6
69
+ - uses: actions/setup-python@v6
70
+ with:
71
+ python-version: "3.12"
72
+ - run: pip install -e ".[dev]"
73
+ # The compile-backoff rate-limiter is a pure unit test (fake clock, no ANE).
74
+ - run: pytest tests/test_compile_breaker.py -q
75
+
76
+ docs:
77
+ name: docs (mkdocs build)
78
+ runs-on: ubuntu-latest
79
+ steps:
80
+ - uses: actions/checkout@v6
81
+ - uses: actions/setup-python@v6
82
+ with:
83
+ python-version: "3.12"
84
+ - run: pip install -r docs/requirements.txt
85
+ - run: mkdocs build --strict
86
+
87
+ macos:
88
+ name: macOS build + smoke (Apple Silicon, no ANE)
89
+ runs-on: macos-14
90
+ steps:
91
+ - uses: actions/checkout@v6
92
+ - uses: actions/setup-python@v6
93
+ with:
94
+ python-version: "3.12"
95
+ # The Objective-C++ dispatch shim links Apple frameworks and only builds on macOS;
96
+ # this proves it compiles. The hosted runner has no ANE, so it is never dispatched
97
+ # (the dylib loads lazily on first dispatch, which does not happen here).
98
+ - run: sh aneforge/_lib/build.sh
99
+ - run: python -m compileall -q aneforge
100
+ - run: pip install -e ".[dev]"
101
+ # Import + graph build on macOS, and the hardware-free unit test.
102
+ - run: python -c "import aneforge as af; print('aneforge', af.__version__); af.input((1, 3, 8, 8))"
103
+ - run: pytest tests/test_compile_breaker.py -q
@@ -0,0 +1,70 @@
1
+ # Build and publish aneforge to PyPI via Trusted Publishing (OIDC, no stored token).
2
+ #
3
+ # One-time setup on PyPI (https://pypi.org/manage/account/publishing/): add a pending
4
+ # publisher for project "aneforge", owner "sbryngelson", repo "ANEForge", workflow
5
+ # "release.yml", environment "pypi". Repeat on https://test.pypi.org with environment
6
+ # "testpypi" if you want the dry-run path.
7
+ #
8
+ # Publish to PyPI: push a tag matching v* (e.g. `git tag v0.1.0 && git push --tags`).
9
+ # Dry-run to TestPyPI: Actions tab -> Release -> Run workflow -> target = testpypi.
10
+ name: Release
11
+
12
+ on:
13
+ push:
14
+ tags: ["v*"]
15
+ workflow_dispatch:
16
+ inputs:
17
+ target:
18
+ description: "Publish target"
19
+ type: choice
20
+ options: [testpypi, pypi]
21
+ default: testpypi
22
+
23
+ permissions:
24
+ contents: read
25
+
26
+ jobs:
27
+ build:
28
+ runs-on: ubuntu-latest
29
+ steps:
30
+ - uses: actions/checkout@v6
31
+ - uses: actions/setup-python@v6
32
+ with:
33
+ python-version: "3.12"
34
+ - run: python -m pip install --upgrade build
35
+ - run: python -m build
36
+ - run: python -m pip install --upgrade twine && python -m twine check dist/*
37
+ - uses: actions/upload-artifact@v4
38
+ with:
39
+ name: dist
40
+ path: dist/
41
+
42
+ publish-testpypi:
43
+ needs: build
44
+ if: github.event_name == 'workflow_dispatch' && inputs.target == 'testpypi'
45
+ runs-on: ubuntu-latest
46
+ environment: testpypi
47
+ permissions:
48
+ id-token: write
49
+ steps:
50
+ - uses: actions/download-artifact@v4
51
+ with:
52
+ name: dist
53
+ path: dist/
54
+ - uses: pypa/gh-action-pypi-publish@release/v1
55
+ with:
56
+ repository-url: https://test.pypi.org/legacy/
57
+
58
+ publish-pypi:
59
+ needs: build
60
+ if: github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && inputs.target == 'pypi')
61
+ runs-on: ubuntu-latest
62
+ environment: pypi
63
+ permissions:
64
+ id-token: write
65
+ steps:
66
+ - uses: actions/download-artifact@v4
67
+ with:
68
+ name: dist
69
+ path: dist/
70
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,111 @@
1
+ # =====================================================================
2
+ # ANEForge .gitignore
3
+ #
4
+ # Source only. Everything generated (compiled binaries, traces, model
5
+ # dumps, runtime sweeps) stays local and is not tracked.
6
+ # =====================================================================
7
+
8
+ # Generated artifacts: runtime outputs, traces, model dumps.
9
+ ane_artifacts/
10
+
11
+ # Build outputs: per-machine compiled invoker/probe binaries, dSYM bundles, cache.
12
+ # Invokers rebuild from aneforge/_invokers/; the dispatch dylib from aneforge/_lib/.
13
+ ane_build/
14
+
15
+ # The package's bundled dispatch dylib is built on the target Mac (sh
16
+ # aneforge/_lib/build.sh); we ship the source, not the per-machine binary.
17
+ aneforge/_lib/*.dylib
18
+
19
+ # Local RE/paper test tooling (fuzzers, capability census + its sweep data).
20
+ # Untracked on purpose: not user-facing proof, not collected by pytest, not shipped.
21
+ utils/
22
+
23
+ # Python build artifacts (python -m build / setuptools).
24
+ build/
25
+ dist/
26
+
27
+ # =====================================================================
28
+ # macOS / OS junk
29
+ # =====================================================================
30
+ .DS_Store
31
+ ._.DS_Store
32
+ **/.DS_Store
33
+
34
+ # =====================================================================
35
+ # Python
36
+ # =====================================================================
37
+ __pycache__/
38
+ *.py[cod]
39
+ *$py.class
40
+ *.so
41
+ .Python
42
+ *.egg-info/
43
+ .eggs/
44
+ .pytest_cache/
45
+ .mypy_cache/
46
+ .ruff_cache/
47
+ *.venv/
48
+ venv/
49
+ .venv/
50
+
51
+ # =====================================================================
52
+ # Editors
53
+ # =====================================================================
54
+ *.swp
55
+ *.swo
56
+ *.swn
57
+ .vscode/
58
+ .idea/
59
+ .cursor/
60
+
61
+ # =====================================================================
62
+ # Local scratch / temp output the fuzzer or probes may drop into the repo
63
+ # =====================================================================
64
+ /tmp_*/
65
+ /scratch_*/
66
+ *.tmp
67
+
68
+ # =====================================================================
69
+ # Local AI-tool session files
70
+ # =====================================================================
71
+ .codex/
72
+ .claude/
73
+
74
+ # =====================================================================
75
+ # Secrets — defensive, none expected in this tree
76
+ # =====================================================================
77
+ *.env
78
+ *.env.local
79
+ .env.*
80
+ credentials.json
81
+ *_credentials.json
82
+ *.pem
83
+ *.key
84
+ auth.json
85
+
86
+ # =====================================================================
87
+ # Example outputs and on-disk caches (regenerated on demand)
88
+ # =====================================================================
89
+ .aneforge_cache/
90
+ examples/sd15_out.png
91
+
92
+ # vhs intermediate; the shipped README asset is demo.png (APNG), see demo.tape.
93
+ docs/assets/demo.gif
94
+
95
+ # LaTeX figure build artifacts
96
+ docs/figures/*.aux
97
+ docs/figures/*.log
98
+ docs/figures/*.synctex.gz
99
+
100
+ # Full-MNIST loader cache (fetched, never committed)
101
+ examples/data/mnist_cache/
102
+
103
+ # Debug matrix dumps from the solver probes
104
+ .dbg_slmat/
105
+
106
+ # CIFAR-10 dataset downloaded by examples/cifar_data.py (large, regenerated on demand)
107
+ examples/data/cifar10/
108
+ examples/data/cifar_cnn.npz
109
+
110
+ # MkDocs build output
111
+ site/
@@ -0,0 +1,15 @@
1
+ # Read the Docs build configuration for the ANEForge package documentation.
2
+ # https://docs.readthedocs.io/en/stable/config-file/v2.html
3
+ version: 2
4
+
5
+ build:
6
+ os: ubuntu-24.04
7
+ tools:
8
+ python: "3.12"
9
+
10
+ mkdocs:
11
+ configuration: mkdocs.yml
12
+
13
+ python:
14
+ install:
15
+ - requirements: docs/requirements.txt
@@ -0,0 +1,12 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use this software, please cite it as below."
3
+ title: "ANEForge: a direct, CoreML-free Apple Neural Engine backend"
4
+ type: software
5
+ authors:
6
+ - family-names: Bryngelson
7
+ given-names: "Spencer H."
8
+ affiliation: "Georgia Institute of Technology"
9
+ repository-code: "https://github.com/sbryngelson/ANEForge"
10
+ license: MIT
11
+ version: "0.1.0"
12
+ date-released: "2026-06-10"
@@ -0,0 +1,53 @@
1
+ # Contributing
2
+
3
+ Build, test, and "adding an operator" details are in
4
+ [`docs/development.md`](docs/development.md). This is the short version.
5
+
6
+ ## Bug reports
7
+
8
+ Behavior on the ANE is per-chip and per-OS, so include:
9
+
10
+ - The chip (M1 through M5, or the A-series equivalent) and the macOS version.
11
+ - A minimal graph that reproduces it: ops, shapes, dtypes.
12
+ - Expected versus actual: numbers, traceback, or compile error.
13
+
14
+ Two things are expected rather than bugs: a graph that compiles for one family
15
+ but overflows a dimension cap on another (caps are per family, see
16
+ [`docs/capabilities.md`](docs/capabilities.md)), and small CPU/ANE divergence at
17
+ boundary fp16 values. A wrong result well inside the fp16 range is a real bug.
18
+
19
+ Report security issues privately, not in a public issue: see
20
+ [`SECURITY.md`](SECURITY.md).
21
+
22
+ ## Changes
23
+
24
+ Operator-coverage gaps are the place to start: anything in
25
+ [`docs/capabilities.md`](docs/capabilities.md) not yet covered, via the four-step
26
+ path in [`docs/development.md`](docs/development.md#adding-an-operator). Open an
27
+ issue first for larger or architectural changes.
28
+
29
+ ## Setup
30
+
31
+ ```sh
32
+ pip install -e ".[dev]" # ruff + pytest
33
+ sh aneforge/_lib/build.sh # build the dispatch dylib (needs the Mac)
34
+ git config core.hooksPath .githooks # off-hardware pre-commit checks
35
+ ```
36
+
37
+ The corpus is the gate and must pass before a change lands:
38
+
39
+ ```sh
40
+ KMP_DUPLICATE_LIB_OK=TRUE PYTHONPATH=. python3 tests/run_corpus.py
41
+ ```
42
+
43
+ Most tests need a real ANE, so CI runs only the off-hardware checks. Run the
44
+ corpus and the pytest suite on your Mac before opening a pull request.
45
+
46
+ ## Style
47
+
48
+ Python 3.10+, linted with `ruff`. Match the surrounding packed style; do not
49
+ reformat existing code.
50
+
51
+ ## License
52
+
53
+ Contributions are licensed under the [MIT License](LICENSE).
aneforge-0.1.0/LICENSE ADDED
@@ -0,0 +1,31 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Spencer H. Bryngelson
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ ---
24
+
25
+ NOTE ON SCOPE. ANEForge is an independent research project. It dispatches to the
26
+ Apple Neural Engine through private, undocumented system symbols (the e5rt
27
+ runtime) and is not an Apple product, is not endorsed by Apple, and relies on no
28
+ Apple API contract. "Apple", "Apple Neural Engine", and related marks belong to
29
+ Apple Inc. The MIT grant above covers this project's own source only; it confers
30
+ no rights in Apple software, and the private interfaces it calls may change or
31
+ break without notice. Use is intended for research and interoperability.
@@ -0,0 +1,220 @@
1
+ Metadata-Version: 2.4
2
+ Name: aneforge
3
+ Version: 0.1.0
4
+ Summary: Direct Apple Neural Engine (ANE) backend. A CoreML-free Python frontend that compiles operator graphs into a single fused e5rt program and dispatches them to the ANE.
5
+ Project-URL: Homepage, https://github.com/sbryngelson/ANEForge
6
+ Project-URL: Repository, https://github.com/sbryngelson/ANEForge
7
+ Project-URL: Documentation, https://aneforge.readthedocs.io
8
+ Project-URL: Issues, https://github.com/sbryngelson/ANEForge/issues
9
+ Author-email: Spencer Bryngelson <shb@gatech.edu>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: ane,apple-neural-engine,apple-silicon,e5rt,espresso,inference,machine-learning
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Operating System :: MacOS :: MacOS X
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Programming Language :: Python :: 3.14
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: numpy
25
+ Provides-Extra: bench
26
+ Requires-Dist: mlx>=0.31; extra == 'bench'
27
+ Requires-Dist: torch; extra == 'bench'
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest; extra == 'dev'
30
+ Requires-Dist: pytest-forked; extra == 'dev'
31
+ Requires-Dist: ruff; extra == 'dev'
32
+ Provides-Extra: models
33
+ Requires-Dist: torch; extra == 'models'
34
+ Requires-Dist: torchvision; extra == 'models'
35
+ Requires-Dist: transformers; extra == 'models'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # ANEForge
39
+
40
+ [![CI](https://github.com/sbryngelson/ANEForge/actions/workflows/ci.yml/badge.svg)](https://github.com/sbryngelson/ANEForge/actions/workflows/ci.yml)
41
+ [![License: MIT](https://img.shields.io/badge/License-MIT-2e6b69.svg)](LICENSE)
42
+ [![Apple Silicon](https://img.shields.io/badge/Apple%20Silicon-macOS%2014%2B-b84310.svg)](#install)
43
+
44
+ **Train and run neural networks directly on the Apple Neural Engine, from
45
+ Python, with no CoreML.**
46
+
47
+ <p align="center">
48
+ <img src="docs/assets/demo.png" width="680"
49
+ alt="A small transformer trains from scratch and generates text live on the Apple Neural Engine">
50
+ </p>
51
+
52
+ <p align="center">
53
+ <sub>A transformer training from scratch on the engine (forward, backward, and
54
+ Adam), then completing a prompt. Reproduce with <a href="examples/demo.py"><code>python examples/demo.py</code></a>.</sub>
55
+ </p>
56
+
57
+ Apple exposes the Neural Engine only through CoreML, and only for inference.
58
+ CoreML decides whether your model lands on the engine or quietly falls back to the
59
+ CPU or GPU, and it gives you no way to train there. ANEForge skips it: it compiles
60
+ a tensor graph into one ANE program and dispatches that program through the same
61
+ private `aned` stack CoreML, MPSGraph, and Espresso use internally. From there:
62
+
63
+ - **Training runs on the engine.** The forward pass, the backward pass, and the
64
+ Adam update all compile to ANE programs. A CNN trains from scratch on CIFAR-10 to
65
+ 71%, on a chip Apple ships for inference only.
66
+ - **Hardware layers CoreML can't reach.** `af.sdpa` drives the engine's
67
+ fused-attention layer directly, the one Apple's compiler decomposes and never
68
+ emits; 18 other native layers (`argmax`, `topk`, `sort`, geometry) come the same way.
69
+ - **The engine, never a fallback.** A pretrained ResNet-18 runs end to end in
70
+ 0.33 ms, matching reference to cosine 1.0000, at a fraction of the GPU's
71
+ energy (table below).
72
+ - **Cross-compilation for chips you don't own.** Lower and gate a graph for any of
73
+ 28 ANE targets (M1-M5) from one machine, and estimate its latency without running it.
74
+
75
+ ```python
76
+ import aneforge as af
77
+
78
+ x = af.input((1, 3, 32, 32)) # a lazy graph input
79
+ y = af.conv(x, W, pad=1).relu().mean((2, 3))
80
+ net = af.compile(y, compress="int8") # graph -> one fused ANE program
81
+ out = net(image) # callable; runs on ANE silicon
82
+
83
+ # ...or load a pretrained model
84
+ enc = af.load(".../all-MiniLM-L6-v2") # MiniLM sentence encoder
85
+ vec = enc(tokens) # on-device, cosine 1.0000 vs reference
86
+ ```
87
+
88
+ A graph is built from 58 fused operators plus 19 native bridge operators, lowered
89
+ into one program and reused across calls, near a 70 us dispatch floor.
90
+
91
+ > **Status:** research project on Apple Silicon / macOS, verified on M5 Pro and M1
92
+ > Max. Relies on private framework symbols that may change without notice. Not
93
+ > affiliated with Apple.
94
+
95
+ ## Install
96
+
97
+ Apple Silicon Mac, macOS 14+, Xcode command-line tools, Python 3.10+.
98
+
99
+ ```sh
100
+ git clone https://github.com/sbryngelson/ANEForge.git
101
+ cd ANEForge
102
+ pip install -e . # core dependency is just NumPy
103
+ PYTHONPATH=. python3 tests/op_smoketest.py # compile + run each op on the ANE
104
+ ```
105
+
106
+ The `e5rt` dispatch shim links Apple frameworks, so it compiles from source on your
107
+ Mac. That happens automatically the first time you dispatch to the ANE; build it
108
+ ahead of time with `python -m aneforge.build` if you prefer.
109
+
110
+ Optional extras: `pip install -e ".[models]"` (torch / torchvision / transformers
111
+ for the pretrained loaders) and `".[bench]"` (mlx / torch for the GPU-comparison
112
+ tools). Then browse [`examples/`](examples/), starting with
113
+ [`examples/quickstart.py`](examples/quickstart.py).
114
+
115
+ ## How it compares
116
+
117
+ | | On the ANE | No CoreML | Trains on it |
118
+ | --------------------- | :---------------: | :-------: | :----------: |
119
+ | CoreML / coremltools | scheduler chooses | -- | no |
120
+ | MLX, PyTorch (MPS) | no (GPU) | yes | on the GPU |
121
+ | **ANEForge** | **yes (direct)** | **yes** | **yes** |
122
+
123
+ CoreML is the only public door to the engine, and it only ever decides whether to
124
+ use it. ANEForge compiles to the engine directly, from an ordinary user process,
125
+ with no entitlement and without disabling system integrity protection.
126
+
127
+ ## Measured
128
+
129
+ Single input, fp16, on an M5 Pro. The GPU baseline is PyTorch on Metal (MPS) at
130
+ fp16; energy is whole-package, read with `powermetrics`.
131
+
132
+ | Pretrained model | ANE | GPU (fp16) | ANE energy | GPU energy |
133
+ | ---------------- | ------: | ---------: | ---------: | ---------: |
134
+ | ResNet-18 | 0.33 ms | 2.03 ms | 2.2 mJ | 35 mJ |
135
+ | MiniLM encoder | 0.53 ms | 1.92 ms | 2.4 mJ | 21 mJ |
136
+ | ViT-B/16 | 18.3 ms | 15.9 ms | 75 mJ | 612 mJ |
137
+
138
+ The engine is faster on the convolutional and encoder workloads and 8 to 16x more
139
+ energy efficient on all three, even on ViT-B/16 where the GPU edges it on latency.
140
+ Reproduce with
141
+ [`bench/device_compare_wattcomplete.py`](bench/device_compare_wattcomplete.py)
142
+ and [`bench/real_models_fp16.py`](bench/real_models_fp16.py); the full per-workload
143
+ device map (16 classes, measured on M1 / M2 / M5) is in
144
+ [`bench/results/`](bench/results/).
145
+
146
+ ## A fluid simulation on the Neural Engine
147
+
148
+ <p align="center">
149
+ <img src="docs/assets/fluid_vorticity.png" width="400"
150
+ alt="A passive dye shaped as the word ANEForge stirred into glowing filaments by a fluid simulation on the Apple Neural Engine">
151
+ </p>
152
+
153
+ A passive dye is painted as the word ANEForge, and a 2-D incompressible
154
+ Navier-Stokes flow (pseudo-spectral) stirs it into thin glowing filaments. Every
155
+ Fourier transform in the 2,200-step loop runs on the ANE, and the whole
156
+ simulation costs about 9 J at the measured 1.48 W rail. Reproduce with
157
+ [`python examples/fluid_vorticity.py`](examples/fluid_vorticity.py).
158
+
159
+ ## What it does
160
+
161
+ - **Graph -> compile -> run.** 58 fused operators (conv/pool, `matmul`/`bmm`/`einsum`,
162
+ activations, reductions, norms, softmax, attention, shape/geometry) into one
163
+ program with int8/int4/fp16 weights, plus a bridge route for 19 native ops the
164
+ public toolchain never emits.
165
+ - **Streaming weight compression.** int8, int4-LUT, or sparse weights streamed from
166
+ the engine's dequant path (~4x smaller for int4), accuracy-gated.
167
+ - **On-device uint8 image input,** dequantized in-graph, so raw camera or video
168
+ bytes feed the model directly.
169
+ - **Resident state.** KV-cache and optimizer state kept on the engine across steps
170
+ via buffer aliasing (`share_buffer`).
171
+ - **Accuracy-preserving optimizer.** `af.tune` measures equivalent lowerings on the
172
+ engine and returns the lossless pick.
173
+ - **Linear algebra and spectral methods.** `aneforge.linalg` and `aneforge.fft` as
174
+ static-dataflow graphs.
175
+
176
+ ## What runs
177
+
178
+ Pretrained models, each fused into one ANE program:
179
+
180
+ | Model | Task | Fidelity vs reference |
181
+ | ------------------ | -------------------------- | ----------------------- |
182
+ | ResNet-18 | ImageNet classification | cosine 1.0000 |
183
+ | ViT-B/16 | vision transformer encoder | cosine 1.0000 |
184
+ | all-MiniLM-L6-v2 | sentence embedding | cosine 1.0000 |
185
+ | ESPCN | super-resolution | runs end to end |
186
+ | Stable Diffusion 1.5 | U-Net + VAE (per component) | U-Net 1.5%, VAE 4.4% rel. |
187
+
188
+ Trained from scratch on the engine: an MLP, a CNN (CIFAR-10 to 71%), a transformer
189
+ block, a LLaMA-style block, and a character language model. Operator coverage is
190
+ tracked op by op across M1 to M5 in the [op catalog](docs/op-catalog.md), the
191
+ exhaustive native-MIL-op x device table; [capabilities](docs/capabilities.md) has
192
+ the dtype matrix and the known limits.
193
+
194
+ ## Verify
195
+
196
+ The correctness corpus compiles and runs every op and kernel on the ANE, and is
197
+ the project's reproducibility gate:
198
+
199
+ ```sh
200
+ KMP_DUPLICATE_LIB_OK=TRUE PYTHONPATH=. python3 tests/run_corpus.py
201
+ KMP_DUPLICATE_LIB_OK=TRUE PYTHONPATH=. python3 -m pytest tests/ -q
202
+ ```
203
+
204
+ ## Documentation
205
+
206
+ The manual lives in [`docs/`](docs/) (MkDocs; `pip install -r docs/requirements.txt`,
207
+ then `mkdocs serve`), starting at [`docs/index.md`](docs/index.md). The API is
208
+ documented in the module docstrings, and runnable usage in [`examples/`](examples/).
209
+
210
+ ## Contributing
211
+
212
+ [`CONTRIBUTING.md`](CONTRIBUTING.md) has the bug-report checklist (include your
213
+ chip and macOS version), the development setup, and where to start. Report security
214
+ issues privately per [`SECURITY.md`](SECURITY.md).
215
+
216
+ ## License
217
+
218
+ [MIT](LICENSE). The Apple Neural Engine is proprietary hardware, and the framework
219
+ symbols this project calls are private, undocumented, and may change at any time.
220
+ Nothing here is endorsed by, or constitutes an API contract from, Apple.