react-native-executorch 0.4.7 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1013) hide show
  1. package/android/CMakeLists.txt +17 -0
  2. package/android/build.gradle +76 -13
  3. package/android/libs/classes.jar +0 -0
  4. package/android/src/main/cpp/CMakeLists.txt +73 -0
  5. package/android/src/main/cpp/ETInstallerModule.cpp +76 -0
  6. package/android/src/main/cpp/ETInstallerModule.h +43 -0
  7. package/android/src/main/java/com/swmansion/rnexecutorch/ETInstaller.kt +66 -0
  8. package/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +3 -3
  9. package/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +7 -113
  10. package/common/ada/ada.cpp +17406 -0
  11. package/common/ada/ada.h +10274 -0
  12. package/common/pfft/pfft.c +2205 -0
  13. package/common/pfft/pfft.h +185 -0
  14. package/common/rnexecutorch/Log.h +489 -0
  15. package/common/rnexecutorch/RnExecutorchInstaller.cpp +78 -0
  16. package/common/rnexecutorch/RnExecutorchInstaller.h +112 -0
  17. package/common/rnexecutorch/TokenizerModule.cpp +52 -0
  18. package/common/rnexecutorch/TokenizerModule.h +26 -0
  19. package/common/rnexecutorch/data_processing/FFT.cpp +21 -0
  20. package/common/rnexecutorch/data_processing/FFT.h +23 -0
  21. package/common/rnexecutorch/data_processing/FileUtils.h +30 -0
  22. package/common/rnexecutorch/data_processing/ImageProcessing.cpp +240 -0
  23. package/common/rnexecutorch/data_processing/ImageProcessing.h +55 -0
  24. package/common/rnexecutorch/data_processing/Numerical.cpp +82 -0
  25. package/common/rnexecutorch/data_processing/Numerical.h +23 -0
  26. package/common/rnexecutorch/data_processing/base64.cpp +110 -0
  27. package/common/rnexecutorch/data_processing/base64.h +46 -0
  28. package/common/rnexecutorch/data_processing/dsp.cpp +65 -0
  29. package/common/rnexecutorch/data_processing/dsp.h +12 -0
  30. package/common/rnexecutorch/host_objects/JSTensorViewIn.h +12 -0
  31. package/common/rnexecutorch/host_objects/JSTensorViewOut.h +22 -0
  32. package/common/rnexecutorch/host_objects/JsiConversions.h +410 -0
  33. package/common/rnexecutorch/host_objects/ModelHostObject.h +239 -0
  34. package/common/rnexecutorch/jsi/JsiHostObject.cpp +108 -0
  35. package/common/rnexecutorch/jsi/JsiHostObject.h +87 -0
  36. package/common/rnexecutorch/jsi/OwningArrayBuffer.h +40 -0
  37. package/common/rnexecutorch/jsi/Promise.cpp +20 -0
  38. package/common/rnexecutorch/jsi/Promise.h +69 -0
  39. package/common/rnexecutorch/jsi/RuntimeAwareCache.h +58 -0
  40. package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.cpp +53 -0
  41. package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.h +35 -0
  42. package/common/rnexecutorch/metaprogramming/ConstructorHelpers.h +131 -0
  43. package/common/rnexecutorch/metaprogramming/FunctionHelpers.h +50 -0
  44. package/common/rnexecutorch/metaprogramming/TypeConcepts.h +37 -0
  45. package/common/rnexecutorch/models/BaseModel.cpp +181 -0
  46. package/common/rnexecutorch/models/BaseModel.h +47 -0
  47. package/common/rnexecutorch/models/EncoderDecoderBase.cpp +21 -0
  48. package/common/rnexecutorch/models/EncoderDecoderBase.h +31 -0
  49. package/common/rnexecutorch/models/classification/Classification.cpp +72 -0
  50. package/common/rnexecutorch/models/classification/Classification.h +26 -0
  51. package/{ios/RnExecutorch/models/classification/Constants.mm → common/rnexecutorch/models/classification/Constants.h} +7 -2
  52. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +27 -0
  53. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +17 -0
  54. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +45 -0
  55. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +23 -0
  56. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +61 -0
  57. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +26 -0
  58. package/{ios/RnExecutorch/models/image_segmentation/Constants.mm → common/rnexecutorch/models/image_segmentation/Constants.h} +7 -2
  59. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +173 -0
  60. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h +43 -0
  61. package/{ios/RnExecutorch/utils/Constants.mm → common/rnexecutorch/models/object_detection/Constants.h} +9 -2
  62. package/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +82 -0
  63. package/common/rnexecutorch/models/object_detection/ObjectDetection.h +31 -0
  64. package/{ios/RnExecutorch/utils/ObjectDetectionUtils.mm → common/rnexecutorch/models/object_detection/Utils.cpp} +10 -30
  65. package/common/rnexecutorch/models/object_detection/Utils.h +17 -0
  66. package/common/rnexecutorch/models/ocr/CTCLabelConverter.cpp +88 -0
  67. package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +29 -0
  68. package/common/rnexecutorch/models/ocr/Constants.h +34 -0
  69. package/common/rnexecutorch/models/ocr/Detector.cpp +102 -0
  70. package/common/rnexecutorch/models/ocr/Detector.h +30 -0
  71. package/common/rnexecutorch/models/ocr/DetectorUtils.cpp +703 -0
  72. package/common/rnexecutorch/models/ocr/DetectorUtils.h +80 -0
  73. package/common/rnexecutorch/models/ocr/OCR.cpp +52 -0
  74. package/common/rnexecutorch/models/ocr/OCR.h +36 -0
  75. package/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +107 -0
  76. package/common/rnexecutorch/models/ocr/RecognitionHandler.h +40 -0
  77. package/common/rnexecutorch/models/ocr/RecognitionHandlerUtils.cpp +153 -0
  78. package/common/rnexecutorch/models/ocr/RecognitionHandlerUtils.h +72 -0
  79. package/common/rnexecutorch/models/ocr/Recognizer.cpp +80 -0
  80. package/common/rnexecutorch/models/ocr/Recognizer.h +36 -0
  81. package/common/rnexecutorch/models/ocr/RecognizerUtils.cpp +202 -0
  82. package/common/rnexecutorch/models/ocr/RecognizerUtils.h +70 -0
  83. package/common/rnexecutorch/models/ocr/Types.h +37 -0
  84. package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +64 -0
  85. package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +31 -0
  86. package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +27 -0
  87. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +50 -0
  88. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +25 -0
  89. package/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +55 -0
  90. package/common/rnexecutorch/models/style_transfer/StyleTransfer.h +29 -0
  91. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +92 -0
  92. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +49 -0
  93. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +180 -0
  94. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +78 -0
  95. package/common/rnexecutorch/tests/LogTest.cpp +530 -0
  96. package/common/rnexecutorch/tests/README.md +20 -0
  97. package/common/rnexecutorch/tests/run_all_tests.sh +14 -0
  98. package/common/rnexecutorch/tests/run_test.sh +18 -0
  99. package/ios/ExecutorchLib.xcframework/Info.plist +4 -4
  100. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
  101. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
  102. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
  103. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
  104. package/ios/RnExecutorch/ETInstaller.h +8 -0
  105. package/ios/RnExecutorch/ETInstaller.mm +56 -0
  106. package/ios/RnExecutorch/utils/Conversions.h +8 -9
  107. package/ios/RnExecutorch/utils/Numerical.h +2 -0
  108. package/ios/RnExecutorch.xcodeproj/project.pbxproj +73 -0
  109. package/lib/module/Error.js +8 -6
  110. package/lib/module/Error.js.map +1 -1
  111. package/lib/module/common/Logger.js +23 -0
  112. package/lib/module/common/Logger.js.map +1 -0
  113. package/lib/module/constants/llmDefaults.js +8 -0
  114. package/lib/module/constants/llmDefaults.js.map +1 -1
  115. package/lib/module/constants/modelUrls.js +328 -84
  116. package/lib/module/constants/modelUrls.js.map +1 -1
  117. package/lib/module/constants/ocr/models.js +181 -286
  118. package/lib/module/constants/ocr/models.js.map +1 -1
  119. package/lib/module/constants/ocr/symbols.js +63 -63
  120. package/lib/module/controllers/LLMController.js +17 -11
  121. package/lib/module/controllers/LLMController.js.map +1 -1
  122. package/lib/module/controllers/OCRController.js +16 -9
  123. package/lib/module/controllers/OCRController.js.map +1 -1
  124. package/lib/module/controllers/VerticalOCRController.js +16 -9
  125. package/lib/module/controllers/VerticalOCRController.js.map +1 -1
  126. package/lib/module/hooks/computer_vision/useClassification.js +5 -5
  127. package/lib/module/hooks/computer_vision/useClassification.js.map +1 -1
  128. package/lib/module/hooks/computer_vision/useImageEmbeddings.js +13 -0
  129. package/lib/module/hooks/computer_vision/useImageEmbeddings.js.map +1 -0
  130. package/lib/module/hooks/computer_vision/useImageSegmentation.js +4 -4
  131. package/lib/module/hooks/computer_vision/useImageSegmentation.js.map +1 -1
  132. package/lib/module/hooks/computer_vision/useOCR.js +14 -15
  133. package/lib/module/hooks/computer_vision/useOCR.js.map +1 -1
  134. package/lib/module/hooks/computer_vision/useObjectDetection.js +5 -5
  135. package/lib/module/hooks/computer_vision/useObjectDetection.js.map +1 -1
  136. package/lib/module/hooks/computer_vision/useStyleTransfer.js +5 -5
  137. package/lib/module/hooks/computer_vision/useStyleTransfer.js.map +1 -1
  138. package/lib/module/hooks/computer_vision/useVerticalOCR.js +16 -17
  139. package/lib/module/hooks/computer_vision/useVerticalOCR.js.map +1 -1
  140. package/lib/module/hooks/general/useExecutorchModule.js +5 -3
  141. package/lib/module/hooks/general/useExecutorchModule.js.map +1 -1
  142. package/lib/module/hooks/natural_language_processing/useLLM.js +22 -25
  143. package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
  144. package/lib/module/hooks/natural_language_processing/useSpeechToText.js +72 -33
  145. package/lib/module/hooks/natural_language_processing/useSpeechToText.js.map +1 -1
  146. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js +4 -5
  147. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js.map +1 -1
  148. package/lib/module/hooks/natural_language_processing/useTokenizer.js +20 -19
  149. package/lib/module/hooks/natural_language_processing/useTokenizer.js.map +1 -1
  150. package/lib/module/hooks/useNonStaticModule.js +52 -0
  151. package/lib/module/hooks/useNonStaticModule.js.map +1 -0
  152. package/lib/module/index.js +15 -4
  153. package/lib/module/index.js.map +1 -1
  154. package/lib/module/modules/BaseModule.js +6 -3
  155. package/lib/module/modules/BaseModule.js.map +1 -1
  156. package/lib/module/modules/BaseNonStaticModule.js +17 -0
  157. package/lib/module/modules/BaseNonStaticModule.js.map +1 -0
  158. package/lib/module/modules/computer_vision/ClassificationModule.js +13 -8
  159. package/lib/module/modules/computer_vision/ClassificationModule.js.map +1 -1
  160. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js +19 -0
  161. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js.map +1 -0
  162. package/lib/module/modules/computer_vision/ImageSegmentationModule.js +21 -19
  163. package/lib/module/modules/computer_vision/ImageSegmentationModule.js.map +1 -1
  164. package/lib/module/modules/computer_vision/OCRModule.js +13 -10
  165. package/lib/module/modules/computer_vision/OCRModule.js.map +1 -1
  166. package/lib/module/modules/computer_vision/ObjectDetectionModule.js +13 -8
  167. package/lib/module/modules/computer_vision/ObjectDetectionModule.js.map +1 -1
  168. package/lib/module/modules/computer_vision/StyleTransferModule.js +13 -8
  169. package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
  170. package/lib/module/modules/computer_vision/VerticalOCRModule.js +15 -10
  171. package/lib/module/modules/computer_vision/VerticalOCRModule.js.map +1 -1
  172. package/lib/module/modules/general/ExecutorchModule.js +10 -36
  173. package/lib/module/modules/general/ExecutorchModule.js.map +1 -1
  174. package/lib/module/modules/natural_language_processing/LLMModule.js +18 -22
  175. package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
  176. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +79 -27
  177. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
  178. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js +15 -8
  179. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js.map +1 -1
  180. package/lib/module/modules/natural_language_processing/TokenizerModule.js +20 -14
  181. package/lib/module/modules/natural_language_processing/TokenizerModule.js.map +1 -1
  182. package/lib/module/native/NativeETInstaller.js +5 -0
  183. package/lib/module/native/NativeETInstaller.js.map +1 -0
  184. package/lib/module/native/RnExecutorchModules.js +2 -11
  185. package/lib/module/native/RnExecutorchModules.js.map +1 -1
  186. package/lib/module/types/common.js +25 -8
  187. package/lib/module/types/common.js.map +1 -1
  188. package/lib/module/types/stt.js +1 -79
  189. package/lib/module/types/stt.js.map +1 -1
  190. package/lib/module/utils/ResourceFetcher.js +276 -114
  191. package/lib/module/utils/ResourceFetcher.js.map +1 -1
  192. package/lib/module/utils/ResourceFetcherUtils.js +155 -0
  193. package/lib/module/utils/ResourceFetcherUtils.js.map +1 -0
  194. package/lib/module/utils/SpeechToTextModule/ASR.js +191 -0
  195. package/lib/module/utils/SpeechToTextModule/ASR.js.map +1 -0
  196. package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js +73 -0
  197. package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js.map +1 -0
  198. package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js +56 -0
  199. package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js.map +1 -0
  200. package/lib/module/utils/llm.js +41 -1
  201. package/lib/module/utils/llm.js.map +1 -1
  202. package/lib/typescript/Error.d.ts +2 -0
  203. package/lib/typescript/Error.d.ts.map +1 -1
  204. package/lib/typescript/common/Logger.d.ts +9 -0
  205. package/lib/typescript/common/Logger.d.ts.map +1 -0
  206. package/lib/typescript/constants/llmDefaults.d.ts +1 -0
  207. package/lib/typescript/constants/llmDefaults.d.ts.map +1 -1
  208. package/lib/typescript/constants/modelUrls.d.ts +240 -79
  209. package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
  210. package/lib/typescript/constants/ocr/models.d.ts +882 -284
  211. package/lib/typescript/constants/ocr/models.d.ts.map +1 -1
  212. package/lib/typescript/controllers/LLMController.d.ts +3 -4
  213. package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
  214. package/lib/typescript/controllers/OCRController.d.ts +5 -6
  215. package/lib/typescript/controllers/OCRController.d.ts.map +1 -1
  216. package/lib/typescript/controllers/VerticalOCRController.d.ts +5 -6
  217. package/lib/typescript/controllers/VerticalOCRController.d.ts.map +1 -1
  218. package/lib/typescript/hooks/computer_vision/useClassification.d.ts +8 -6
  219. package/lib/typescript/hooks/computer_vision/useClassification.d.ts.map +1 -1
  220. package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts +16 -0
  221. package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts.map +1 -0
  222. package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts +5 -3
  223. package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts.map +1 -1
  224. package/lib/typescript/hooks/computer_vision/useOCR.d.ts +4 -4
  225. package/lib/typescript/hooks/computer_vision/useOCR.d.ts.map +1 -1
  226. package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts +5 -3
  227. package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts.map +1 -1
  228. package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts +5 -3
  229. package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts.map +1 -1
  230. package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts +3 -5
  231. package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts.map +1 -1
  232. package/lib/typescript/hooks/general/useExecutorchModule.d.ts +1 -1
  233. package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts +6 -4
  234. package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
  235. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +15 -22
  236. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts.map +1 -1
  237. package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts +9 -5
  238. package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts.map +1 -1
  239. package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts +6 -4
  240. package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts.map +1 -1
  241. package/lib/typescript/hooks/useNonStaticModule.d.ts +21 -0
  242. package/lib/typescript/hooks/useNonStaticModule.d.ts.map +1 -0
  243. package/lib/typescript/index.d.ts +17 -4
  244. package/lib/typescript/index.d.ts.map +1 -1
  245. package/lib/typescript/modules/BaseModule.d.ts +1 -1
  246. package/lib/typescript/modules/BaseModule.d.ts.map +1 -1
  247. package/lib/typescript/modules/BaseNonStaticModule.d.ts +10 -0
  248. package/lib/typescript/modules/BaseNonStaticModule.d.ts.map +1 -0
  249. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts +6 -6
  250. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts.map +1 -1
  251. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts +9 -0
  252. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts.map +1 -0
  253. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts +8 -28
  254. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts.map +1 -1
  255. package/lib/typescript/modules/computer_vision/OCRModule.d.ts +8 -7
  256. package/lib/typescript/modules/computer_vision/OCRModule.d.ts.map +1 -1
  257. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts +7 -5
  258. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts.map +1 -1
  259. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +6 -5
  260. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
  261. package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts +7 -8
  262. package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts.map +1 -1
  263. package/lib/typescript/modules/general/ExecutorchModule.d.ts +5 -8
  264. package/lib/typescript/modules/general/ExecutorchModule.d.ts.map +1 -1
  265. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +16 -16
  266. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
  267. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +20 -13
  268. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
  269. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts +7 -5
  270. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts.map +1 -1
  271. package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts +10 -9
  272. package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts.map +1 -1
  273. package/lib/typescript/native/{NativeStyleTransfer.d.ts → NativeETInstaller.d.ts} +2 -3
  274. package/lib/typescript/native/NativeETInstaller.d.ts.map +1 -0
  275. package/lib/typescript/native/RnExecutorchModules.d.ts +3 -21
  276. package/lib/typescript/native/RnExecutorchModules.d.ts.map +1 -1
  277. package/lib/typescript/types/common.d.ts +30 -2
  278. package/lib/typescript/types/common.d.ts.map +1 -1
  279. package/lib/typescript/types/stt.d.ts +18 -88
  280. package/lib/typescript/types/stt.d.ts.map +1 -1
  281. package/lib/typescript/utils/ResourceFetcher.d.ts +18 -10
  282. package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
  283. package/lib/typescript/utils/ResourceFetcherUtils.d.ts +55 -0
  284. package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -0
  285. package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts +27 -0
  286. package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts.map +1 -0
  287. package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts +23 -0
  288. package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts.map +1 -0
  289. package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts +13 -0
  290. package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts.map +1 -0
  291. package/lib/typescript/utils/llm.d.ts +4 -0
  292. package/lib/typescript/utils/llm.d.ts.map +1 -1
  293. package/package.json +23 -65
  294. package/react-native-executorch.podspec +75 -3
  295. package/src/Error.ts +8 -10
  296. package/src/common/Logger.ts +25 -0
  297. package/src/constants/llmDefaults.ts +11 -0
  298. package/src/constants/modelUrls.ts +401 -168
  299. package/src/constants/ocr/models.ts +826 -395
  300. package/src/constants/ocr/symbols.ts +63 -63
  301. package/src/controllers/LLMController.ts +28 -18
  302. package/src/controllers/OCRController.ts +24 -15
  303. package/src/controllers/VerticalOCRController.ts +24 -14
  304. package/src/hooks/computer_vision/useClassification.ts +10 -11
  305. package/src/hooks/computer_vision/useImageEmbeddings.ts +15 -0
  306. package/src/hooks/computer_vision/useImageSegmentation.ts +5 -8
  307. package/src/hooks/computer_vision/useOCR.ts +29 -21
  308. package/src/hooks/computer_vision/useObjectDetection.ts +6 -9
  309. package/src/hooks/computer_vision/useStyleTransfer.ts +6 -6
  310. package/src/hooks/computer_vision/useVerticalOCR.ts +30 -27
  311. package/src/hooks/general/useExecutorchModule.ts +3 -3
  312. package/src/hooks/natural_language_processing/useLLM.ts +38 -28
  313. package/src/hooks/natural_language_processing/useSpeechToText.ts +91 -88
  314. package/src/hooks/natural_language_processing/useTextEmbeddings.ts +11 -11
  315. package/src/hooks/natural_language_processing/useTokenizer.ts +22 -22
  316. package/src/hooks/useNonStaticModule.ts +74 -0
  317. package/src/index.ts +100 -0
  318. package/src/modules/BaseModule.ts +9 -3
  319. package/src/modules/BaseNonStaticModule.ts +26 -0
  320. package/src/modules/computer_vision/ClassificationModule.ts +20 -11
  321. package/src/modules/computer_vision/ImageEmbeddingsModule.ts +26 -0
  322. package/src/modules/computer_vision/ImageSegmentationModule.ts +35 -27
  323. package/src/modules/computer_vision/OCRModule.ts +23 -15
  324. package/src/modules/computer_vision/ObjectDetectionModule.ts +24 -11
  325. package/src/modules/computer_vision/StyleTransferModule.ts +20 -11
  326. package/src/modules/computer_vision/VerticalOCRModule.ts +25 -21
  327. package/src/modules/general/ExecutorchModule.ts +18 -48
  328. package/src/modules/natural_language_processing/LLMModule.ts +27 -30
  329. package/src/modules/natural_language_processing/SpeechToTextModule.ts +85 -68
  330. package/src/modules/natural_language_processing/TextEmbeddingsModule.ts +27 -12
  331. package/src/modules/natural_language_processing/TokenizerModule.ts +27 -17
  332. package/src/native/NativeETInstaller.ts +8 -0
  333. package/src/native/RnExecutorchModules.ts +4 -46
  334. package/src/types/common.ts +40 -12
  335. package/src/types/stt.ts +98 -89
  336. package/src/utils/ResourceFetcher.ts +338 -119
  337. package/src/utils/ResourceFetcherUtils.ts +186 -0
  338. package/src/utils/SpeechToTextModule/ASR.ts +303 -0
  339. package/src/utils/SpeechToTextModule/OnlineProcessor.ts +87 -0
  340. package/src/utils/SpeechToTextModule/hypothesisBuffer.ts +79 -0
  341. package/src/utils/llm.ts +65 -1
  342. package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
  343. package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
  344. package/third-party/android/libs/opencv/arm64-v8a/libopencv_core.a +0 -0
  345. package/third-party/android/libs/opencv/arm64-v8a/libopencv_features2d.a +0 -0
  346. package/third-party/android/libs/opencv/arm64-v8a/libopencv_highgui.a +0 -0
  347. package/third-party/android/libs/opencv/arm64-v8a/libopencv_imgproc.a +0 -0
  348. package/third-party/android/libs/opencv/arm64-v8a/libopencv_photo.a +0 -0
  349. package/third-party/android/libs/opencv/arm64-v8a/libopencv_video.a +0 -0
  350. package/third-party/android/libs/opencv/x86_64/libopencv_core.a +0 -0
  351. package/third-party/android/libs/opencv/x86_64/libopencv_features2d.a +0 -0
  352. package/third-party/android/libs/opencv/x86_64/libopencv_highgui.a +0 -0
  353. package/third-party/android/libs/opencv/x86_64/libopencv_imgproc.a +0 -0
  354. package/third-party/android/libs/opencv/x86_64/libopencv_photo.a +0 -0
  355. package/third-party/android/libs/opencv/x86_64/libopencv_video.a +0 -0
  356. package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv.a +0 -0
  357. package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_hal.a +0 -0
  358. package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_thread.a +0 -0
  359. package/third-party/include/c10/macros/Export.h +163 -0
  360. package/third-party/include/c10/macros/Macros.h +497 -0
  361. package/third-party/include/c10/util/BFloat16-inl.h +342 -0
  362. package/third-party/include/c10/util/BFloat16-math.h +266 -0
  363. package/third-party/include/c10/util/BFloat16.h +125 -0
  364. package/third-party/include/c10/util/Half-inl.h +347 -0
  365. package/third-party/include/c10/util/Half.h +416 -0
  366. package/third-party/include/c10/util/TypeSafeSignMath.h +133 -0
  367. package/third-party/include/c10/util/bit_cast.h +43 -0
  368. package/third-party/include/c10/util/floating_point_utils.h +33 -0
  369. package/third-party/include/c10/util/irange.h +107 -0
  370. package/third-party/include/executorch/ExecuTorch.h +13 -0
  371. package/third-party/include/executorch/ExecuTorchError.h +16 -0
  372. package/third-party/include/executorch/ExecuTorchLog.h +76 -0
  373. package/third-party/include/executorch/ExecuTorchModule.h +286 -0
  374. package/third-party/include/executorch/ExecuTorchTensor.h +742 -0
  375. package/third-party/include/executorch/ExecuTorchValue.h +219 -0
  376. package/third-party/include/executorch/extension/module/module.h +492 -0
  377. package/third-party/include/executorch/extension/tensor/tensor.h +13 -0
  378. package/third-party/include/executorch/extension/tensor/tensor_accessor.h +190 -0
  379. package/third-party/include/executorch/extension/tensor/tensor_ptr.h +347 -0
  380. package/third-party/include/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
  381. package/third-party/include/executorch/runtime/backend/backend_execution_context.h +71 -0
  382. package/third-party/include/executorch/runtime/backend/backend_init_context.h +72 -0
  383. package/third-party/include/executorch/runtime/backend/interface.h +166 -0
  384. package/third-party/include/executorch/runtime/core/array_ref.h +235 -0
  385. package/third-party/include/executorch/runtime/core/data_loader.h +136 -0
  386. package/third-party/include/executorch/runtime/core/defines.h +20 -0
  387. package/third-party/include/executorch/runtime/core/error.h +229 -0
  388. package/third-party/include/executorch/runtime/core/evalue.h +521 -0
  389. package/third-party/include/executorch/runtime/core/event_tracer.h +565 -0
  390. package/third-party/include/executorch/runtime/core/event_tracer_hooks.h +323 -0
  391. package/third-party/include/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
  392. package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
  393. package/third-party/include/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
  394. package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
  395. package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
  396. package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
  397. package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
  398. package/third-party/include/executorch/runtime/core/freeable_buffer.h +107 -0
  399. package/third-party/include/executorch/runtime/core/hierarchical_allocator.h +107 -0
  400. package/third-party/include/executorch/runtime/core/memory_allocator.h +198 -0
  401. package/third-party/include/executorch/runtime/core/named_data_map.h +86 -0
  402. package/third-party/include/executorch/runtime/core/portable_type/bfloat16.h +27 -0
  403. package/third-party/include/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
  404. package/third-party/include/executorch/runtime/core/portable_type/bits_types.h +83 -0
  405. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
  406. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
  407. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
  408. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
  409. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
  410. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
  411. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
  412. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
  413. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
  414. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
  415. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
  416. package/third-party/include/executorch/runtime/core/portable_type/complex.h +44 -0
  417. package/third-party/include/executorch/runtime/core/portable_type/device.h +70 -0
  418. package/third-party/include/executorch/runtime/core/portable_type/half.h +27 -0
  419. package/third-party/include/executorch/runtime/core/portable_type/optional.h +36 -0
  420. package/third-party/include/executorch/runtime/core/portable_type/qint_types.h +83 -0
  421. package/third-party/include/executorch/runtime/core/portable_type/scalar.h +110 -0
  422. package/third-party/include/executorch/runtime/core/portable_type/scalar_type.h +154 -0
  423. package/third-party/include/executorch/runtime/core/portable_type/string_view.h +29 -0
  424. package/third-party/include/executorch/runtime/core/portable_type/tensor.h +142 -0
  425. package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
  426. package/third-party/include/executorch/runtime/core/portable_type/tensor_options.h +60 -0
  427. package/third-party/include/executorch/runtime/core/result.h +258 -0
  428. package/third-party/include/executorch/runtime/core/span.h +93 -0
  429. package/third-party/include/executorch/runtime/core/tag.h +71 -0
  430. package/third-party/include/executorch/runtime/core/tensor_layout.h +79 -0
  431. package/third-party/include/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
  432. package/third-party/include/executorch/runtime/executor/memory_manager.h +113 -0
  433. package/third-party/include/executorch/runtime/executor/method.h +387 -0
  434. package/third-party/include/executorch/runtime/executor/method_meta.h +251 -0
  435. package/third-party/include/executorch/runtime/executor/program.h +320 -0
  436. package/third-party/include/executorch/runtime/executor/pte_data_map.h +144 -0
  437. package/third-party/include/executorch/runtime/executor/tensor_parser.h +156 -0
  438. package/third-party/include/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
  439. package/third-party/include/executorch/runtime/kernel/operator_registry.h +278 -0
  440. package/third-party/include/executorch/runtime/platform/abort.h +36 -0
  441. package/third-party/include/executorch/runtime/platform/assert.h +119 -0
  442. package/third-party/include/executorch/runtime/platform/clock.h +43 -0
  443. package/third-party/include/executorch/runtime/platform/compat_unistd.h +75 -0
  444. package/third-party/include/executorch/runtime/platform/compiler.h +191 -0
  445. package/third-party/include/executorch/runtime/platform/log.h +177 -0
  446. package/third-party/include/executorch/runtime/platform/platform.h +133 -0
  447. package/third-party/include/executorch/runtime/platform/profiler.h +292 -0
  448. package/third-party/include/executorch/runtime/platform/runtime.h +35 -0
  449. package/third-party/include/executorch/runtime/platform/system.h +49 -0
  450. package/third-party/include/executorch/runtime/platform/types.h +24 -0
  451. package/third-party/include/executorch/schema/extended_header.h +76 -0
  452. package/third-party/include/opencv2/core/affine.hpp +676 -0
  453. package/third-party/include/opencv2/core/async.hpp +107 -0
  454. package/third-party/include/opencv2/core/base.hpp +735 -0
  455. package/third-party/include/opencv2/core/bindings_utils.hpp +279 -0
  456. package/third-party/include/opencv2/core/bufferpool.hpp +39 -0
  457. package/third-party/include/opencv2/core/check.hpp +231 -0
  458. package/third-party/include/opencv2/core/core.hpp +55 -0
  459. package/third-party/include/opencv2/core/core_c.h +3261 -0
  460. package/third-party/include/opencv2/core/cv_cpu_dispatch.h +404 -0
  461. package/third-party/include/opencv2/core/cv_cpu_helper.h +856 -0
  462. package/third-party/include/opencv2/core/cvdef.h +1003 -0
  463. package/third-party/include/opencv2/core/cvstd.hpp +196 -0
  464. package/third-party/include/opencv2/core/cvstd.inl.hpp +188 -0
  465. package/third-party/include/opencv2/core/cvstd_wrapper.hpp +187 -0
  466. package/third-party/include/opencv2/core/detail/async_promise.hpp +73 -0
  467. package/third-party/include/opencv2/core/detail/dispatch_helper.impl.hpp +48 -0
  468. package/third-party/include/opencv2/core/detail/exception_ptr.hpp +24 -0
  469. package/third-party/include/opencv2/core/dualquaternion.hpp +1054 -0
  470. package/third-party/include/opencv2/core/dualquaternion.inl.hpp +464 -0
  471. package/third-party/include/opencv2/core/eigen.hpp +405 -0
  472. package/third-party/include/opencv2/core/fast_math.hpp +433 -0
  473. package/third-party/include/opencv2/core/hal/hal.hpp +451 -0
  474. package/third-party/include/opencv2/core/hal/interface.h +191 -0
  475. package/third-party/include/opencv2/core/hal/intrin.hpp +1222 -0
  476. package/third-party/include/opencv2/core/hal/intrin_avx.hpp +3378 -0
  477. package/third-party/include/opencv2/core/hal/intrin_avx512.hpp +3688 -0
  478. package/third-party/include/opencv2/core/hal/intrin_cpp.hpp +3446 -0
  479. package/third-party/include/opencv2/core/hal/intrin_forward.hpp +195 -0
  480. package/third-party/include/opencv2/core/hal/intrin_lasx.hpp +3243 -0
  481. package/third-party/include/opencv2/core/hal/intrin_lsx.hpp +2671 -0
  482. package/third-party/include/opencv2/core/hal/intrin_math.hpp +772 -0
  483. package/third-party/include/opencv2/core/hal/intrin_msa.hpp +1973 -0
  484. package/third-party/include/opencv2/core/hal/intrin_neon.hpp +2710 -0
  485. package/third-party/include/opencv2/core/hal/intrin_rvv071.hpp +3452 -0
  486. package/third-party/include/opencv2/core/hal/intrin_rvv_scalable.hpp +2559 -0
  487. package/third-party/include/opencv2/core/hal/intrin_sse.hpp +3528 -0
  488. package/third-party/include/opencv2/core/hal/intrin_sse_em.hpp +175 -0
  489. package/third-party/include/opencv2/core/hal/intrin_vsx.hpp +1756 -0
  490. package/third-party/include/opencv2/core/hal/intrin_wasm.hpp +2911 -0
  491. package/third-party/include/opencv2/core/hal/msa_macros.h +2079 -0
  492. package/third-party/include/opencv2/core/hal/simd_utils.impl.hpp +313 -0
  493. package/third-party/include/opencv2/core/mat.hpp +3842 -0
  494. package/third-party/include/opencv2/core/mat.inl.hpp +2753 -0
  495. package/third-party/include/opencv2/core/matx.hpp +603 -0
  496. package/third-party/include/opencv2/core/matx.inl.hpp +1132 -0
  497. package/third-party/include/opencv2/core/neon_utils.hpp +127 -0
  498. package/third-party/include/opencv2/core/operations.hpp +610 -0
  499. package/third-party/include/opencv2/core/optim.hpp +362 -0
  500. package/third-party/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp +66 -0
  501. package/third-party/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp +148 -0
  502. package/third-party/include/opencv2/core/parallel/parallel_backend.hpp +108 -0
  503. package/third-party/include/opencv2/core/persistence.hpp +1321 -0
  504. package/third-party/include/opencv2/core/quaternion.hpp +1889 -0
  505. package/third-party/include/opencv2/core/quaternion.inl.hpp +907 -0
  506. package/third-party/include/opencv2/core/saturate.hpp +347 -0
  507. package/third-party/include/opencv2/core/simd_intrinsics.hpp +90 -0
  508. package/third-party/include/opencv2/core/softfloat.hpp +657 -0
  509. package/third-party/include/opencv2/core/sse_utils.hpp +861 -0
  510. package/third-party/include/opencv2/core/traits.hpp +417 -0
  511. package/third-party/include/opencv2/core/types.hpp +2368 -0
  512. package/third-party/include/opencv2/core/types_c.h +2064 -0
  513. package/third-party/include/opencv2/core/utility.hpp +1296 -0
  514. package/third-party/include/opencv2/core/utils/allocator_stats.hpp +31 -0
  515. package/third-party/include/opencv2/core/utils/allocator_stats.impl.hpp +111 -0
  516. package/third-party/include/opencv2/core/utils/filesystem.hpp +91 -0
  517. package/third-party/include/opencv2/core/utils/fp_control_utils.hpp +70 -0
  518. package/third-party/include/opencv2/core/utils/instrumentation.hpp +127 -0
  519. package/third-party/include/opencv2/core/utils/logger.defines.hpp +50 -0
  520. package/third-party/include/opencv2/core/utils/logger.hpp +258 -0
  521. package/third-party/include/opencv2/core/utils/logtag.hpp +27 -0
  522. package/third-party/include/opencv2/core/utils/tls.hpp +230 -0
  523. package/third-party/include/opencv2/core/utils/trace.hpp +281 -0
  524. package/third-party/include/opencv2/core/version.hpp +29 -0
  525. package/third-party/include/opencv2/core/vsx_utils.hpp +1115 -0
  526. package/third-party/include/opencv2/core.hpp +3699 -0
  527. package/third-party/include/opencv2/cvconfig.h +155 -0
  528. package/third-party/include/opencv2/dnn/dnn.hpp +51 -0
  529. package/third-party/include/opencv2/dnn.hpp +17 -0
  530. package/third-party/include/opencv2/features2d/features2d.hpp +55 -0
  531. package/third-party/include/opencv2/features2d/hal/interface.h +32 -0
  532. package/third-party/include/opencv2/features2d.hpp +1756 -0
  533. package/third-party/include/opencv2/highgui/highgui.hpp +113 -0
  534. package/third-party/include/opencv2/highgui.hpp +17 -0
  535. package/third-party/include/opencv2/imgproc/bindings.hpp +34 -0
  536. package/third-party/include/opencv2/imgproc/detail/gcgraph.hpp +355 -0
  537. package/third-party/include/opencv2/imgproc/detail/legacy.hpp +35 -0
  538. package/third-party/include/opencv2/imgproc/hal/hal.hpp +246 -0
  539. package/third-party/include/opencv2/imgproc/hal/interface.h +52 -0
  540. package/third-party/include/opencv2/imgproc/imgproc.hpp +55 -0
  541. package/third-party/include/opencv2/imgproc/imgproc_c.h +1261 -0
  542. package/third-party/include/opencv2/imgproc/segmentation.hpp +168 -0
  543. package/third-party/include/opencv2/imgproc/types_c.h +632 -0
  544. package/third-party/include/opencv2/imgproc.hpp +5956 -0
  545. package/third-party/include/opencv2/opencv.hpp +102 -0
  546. package/third-party/include/opencv2/opencv_modules.hpp +19 -0
  547. package/third-party/include/opencv2/photo/legacy/constants_c.h +10 -0
  548. package/third-party/include/opencv2/photo/photo.hpp +55 -0
  549. package/third-party/include/opencv2/photo.hpp +975 -0
  550. package/third-party/include/opencv2/video/background_segm.hpp +341 -0
  551. package/third-party/include/opencv2/video/detail/tracking.detail.hpp +435 -0
  552. package/third-party/include/opencv2/video/legacy/constants_c.h +15 -0
  553. package/third-party/include/opencv2/video/tracking.hpp +1014 -0
  554. package/third-party/include/opencv2/video/video.hpp +55 -0
  555. package/third-party/include/opencv2/video.hpp +65 -0
  556. package/third-party/include/tokenizers-cpp/tokenizers_c.h +61 -0
  557. package/third-party/include/tokenizers-cpp/tokenizers_cpp.h +118 -0
  558. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.h +27 -0
  559. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.mm +249 -0
  560. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h +14 -0
  561. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm +80 -0
  562. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h +32 -0
  563. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm +95 -0
  564. package/third-party/ios/ExecutorchLib/ExecutorchLib/InputType.h +12 -0
  565. package/third-party/ios/ExecutorchLib/ExecutorchLib/Utils.hpp +217 -0
  566. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.cpp +11 -0
  567. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.h +11 -0
  568. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/irunner.h +48 -0
  569. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.cpp +278 -0
  570. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.h +67 -0
  571. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/stats.h +164 -0
  572. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.cpp +65 -0
  573. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.h +105 -0
  574. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.cpp +91 -0
  575. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.h +51 -0
  576. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_token_generator.h +162 -0
  577. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/util.h +108 -0
  578. package/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp +193 -0
  579. package/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h +64 -0
  580. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h +202 -0
  581. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp +313 -0
  582. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h +57 -0
  583. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp +78 -0
  584. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h +23 -0
  585. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp +427 -0
  586. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h +87 -0
  587. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h +76 -0
  588. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj +683 -0
  589. package/third-party/ios/ExecutorchLib/build.sh +44 -0
  590. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/Info.plist +43 -0
  591. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64/libbackend_coreml_ios.a +0 -0
  592. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64-simulator/libbackend_coreml_simulator.a +0 -0
  593. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/Info.plist +43 -0
  594. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64/libbackend_mps_ios.a +0 -0
  595. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64-simulator/libbackend_mps_simulator.a +0 -0
  596. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/Info.plist +43 -0
  597. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64/libbackend_xnnpack_ios.a +0 -0
  598. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64-simulator/libbackend_xnnpack_simulator.a +0 -0
  599. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/Info.plist +47 -0
  600. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Export.h +163 -0
  601. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Macros.h +497 -0
  602. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-inl.h +342 -0
  603. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-math.h +266 -0
  604. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16.h +125 -0
  605. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half-inl.h +347 -0
  606. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half.h +416 -0
  607. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/TypeSafeSignMath.h +133 -0
  608. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/bit_cast.h +43 -0
  609. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/floating_point_utils.h +33 -0
  610. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/irange.h +107 -0
  611. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorch.h +13 -0
  612. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchError.h +16 -0
  613. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchLog.h +76 -0
  614. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchModule.h +286 -0
  615. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchTensor.h +742 -0
  616. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchValue.h +219 -0
  617. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/module/module.h +492 -0
  618. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor.h +13 -0
  619. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_accessor.h +190 -0
  620. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr.h +347 -0
  621. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
  622. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_execution_context.h +71 -0
  623. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_init_context.h +72 -0
  624. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/interface.h +166 -0
  625. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/array_ref.h +235 -0
  626. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/data_loader.h +136 -0
  627. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/defines.h +20 -0
  628. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/error.h +229 -0
  629. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/evalue.h +521 -0
  630. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer.h +565 -0
  631. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks.h +323 -0
  632. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
  633. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
  634. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
  635. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
  636. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
  637. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
  638. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
  639. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/freeable_buffer.h +107 -0
  640. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/hierarchical_allocator.h +107 -0
  641. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/memory_allocator.h +198 -0
  642. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/named_data_map.h +86 -0
  643. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16.h +27 -0
  644. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
  645. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bits_types.h +83 -0
  646. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
  647. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
  648. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
  649. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
  650. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
  651. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
  652. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
  653. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
  654. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
  655. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
  656. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
  657. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/complex.h +44 -0
  658. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/device.h +70 -0
  659. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/half.h +27 -0
  660. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/optional.h +36 -0
  661. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/qint_types.h +83 -0
  662. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar.h +110 -0
  663. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar_type.h +154 -0
  664. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/string_view.h +29 -0
  665. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor.h +142 -0
  666. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
  667. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_options.h +60 -0
  668. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/result.h +258 -0
  669. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/span.h +93 -0
  670. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tag.h +71 -0
  671. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_layout.h +79 -0
  672. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
  673. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/memory_manager.h +113 -0
  674. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method.h +387 -0
  675. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method_meta.h +251 -0
  676. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/program.h +320 -0
  677. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/pte_data_map.h +144 -0
  678. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/tensor_parser.h +156 -0
  679. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
  680. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/operator_registry.h +278 -0
  681. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/abort.h +36 -0
  682. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/assert.h +119 -0
  683. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/clock.h +43 -0
  684. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compat_unistd.h +75 -0
  685. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compiler.h +191 -0
  686. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/log.h +177 -0
  687. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/platform.h +133 -0
  688. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/profiler.h +292 -0
  689. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/runtime.h +35 -0
  690. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/system.h +49 -0
  691. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/types.h +24 -0
  692. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/schema/extended_header.h +76 -0
  693. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/module.modulemap +5 -0
  694. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/libexecutorch_ios.a +0 -0
  695. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Export.h +163 -0
  696. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Macros.h +497 -0
  697. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-inl.h +342 -0
  698. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-math.h +266 -0
  699. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16.h +125 -0
  700. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half-inl.h +347 -0
  701. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half.h +416 -0
  702. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/TypeSafeSignMath.h +133 -0
  703. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/bit_cast.h +43 -0
  704. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/floating_point_utils.h +33 -0
  705. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/irange.h +107 -0
  706. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorch.h +13 -0
  707. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchError.h +16 -0
  708. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchLog.h +76 -0
  709. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchModule.h +286 -0
  710. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchTensor.h +742 -0
  711. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchValue.h +219 -0
  712. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/module/module.h +492 -0
  713. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor.h +13 -0
  714. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_accessor.h +190 -0
  715. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr.h +347 -0
  716. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
  717. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_execution_context.h +71 -0
  718. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_init_context.h +72 -0
  719. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/interface.h +166 -0
  720. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/array_ref.h +235 -0
  721. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/data_loader.h +136 -0
  722. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/defines.h +20 -0
  723. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/error.h +229 -0
  724. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/evalue.h +521 -0
  725. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer.h +565 -0
  726. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks.h +323 -0
  727. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
  728. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
  729. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
  730. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
  731. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
  732. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
  733. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
  734. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/freeable_buffer.h +107 -0
  735. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/hierarchical_allocator.h +107 -0
  736. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/memory_allocator.h +198 -0
  737. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/named_data_map.h +86 -0
  738. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16.h +27 -0
  739. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
  740. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bits_types.h +83 -0
  741. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
  742. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
  743. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
  744. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
  745. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
  746. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
  747. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
  748. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
  749. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
  750. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
  751. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
  752. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/complex.h +44 -0
  753. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/device.h +70 -0
  754. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/half.h +27 -0
  755. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/optional.h +36 -0
  756. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/qint_types.h +83 -0
  757. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar.h +110 -0
  758. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar_type.h +154 -0
  759. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/string_view.h +29 -0
  760. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor.h +142 -0
  761. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
  762. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_options.h +60 -0
  763. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/result.h +258 -0
  764. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/span.h +93 -0
  765. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tag.h +71 -0
  766. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_layout.h +79 -0
  767. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
  768. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/memory_manager.h +113 -0
  769. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method.h +387 -0
  770. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method_meta.h +251 -0
  771. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/program.h +320 -0
  772. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/pte_data_map.h +144 -0
  773. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/tensor_parser.h +156 -0
  774. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
  775. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/operator_registry.h +278 -0
  776. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/abort.h +36 -0
  777. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/assert.h +119 -0
  778. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/clock.h +43 -0
  779. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compat_unistd.h +75 -0
  780. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compiler.h +191 -0
  781. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/log.h +177 -0
  782. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/platform.h +133 -0
  783. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/profiler.h +292 -0
  784. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/runtime.h +35 -0
  785. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/system.h +49 -0
  786. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/types.h +24 -0
  787. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/schema/extended_header.h +76 -0
  788. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/module.modulemap +5 -0
  789. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/libexecutorch_simulator.a +0 -0
  790. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/Info.plist +43 -0
  791. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64/libkernels_custom_ios.a +0 -0
  792. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64-simulator/libkernels_custom_simulator.a +0 -0
  793. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/Info.plist +43 -0
  794. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64/libkernels_optimized_ios.a +0 -0
  795. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64-simulator/libkernels_optimized_simulator.a +0 -0
  796. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/Info.plist +43 -0
  797. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64/libkernels_portable_ios.a +0 -0
  798. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64-simulator/libkernels_portable_simulator.a +0 -0
  799. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/Info.plist +43 -0
  800. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64/libkernels_quantized_ios.a +0 -0
  801. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64-simulator/libkernels_quantized_simulator.a +0 -0
  802. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/Info.plist +43 -0
  803. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/bitmap256.h +82 -0
  804. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/filtered_re2.h +111 -0
  805. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/pod_array.h +43 -0
  806. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter.h +130 -0
  807. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter_tree.h +139 -0
  808. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prog.h +483 -0
  809. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/re2.h +994 -0
  810. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/regexp.h +692 -0
  811. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/set.h +85 -0
  812. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_array.h +367 -0
  813. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_set.h +241 -0
  814. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/stringpiece.h +205 -0
  815. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_casefold.h +78 -0
  816. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_groups.h +64 -0
  817. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/walker-inl.h +235 -0
  818. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Info.plist +26 -0
  819. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/re2 +0 -0
  820. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/bitmap256.h +82 -0
  821. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/filtered_re2.h +111 -0
  822. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/pod_array.h +43 -0
  823. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter.h +130 -0
  824. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter_tree.h +139 -0
  825. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prog.h +483 -0
  826. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/re2.h +994 -0
  827. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/regexp.h +692 -0
  828. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/set.h +85 -0
  829. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_array.h +367 -0
  830. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_set.h +241 -0
  831. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/stringpiece.h +205 -0
  832. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_casefold.h +78 -0
  833. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_groups.h +64 -0
  834. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/walker-inl.h +235 -0
  835. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Info.plist +26 -0
  836. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/re2 +0 -0
  837. package/third-party/ios/ios.toolchain.cmake +1122 -0
  838. package/LICENSE +0 -79
  839. package/README.md +0 -148
  840. package/android/src/main/java/com/swmansion/rnexecutorch/Classification.kt +0 -64
  841. package/android/src/main/java/com/swmansion/rnexecutorch/ETModule.kt +0 -90
  842. package/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt +0 -58
  843. package/android/src/main/java/com/swmansion/rnexecutorch/OCR.kt +0 -90
  844. package/android/src/main/java/com/swmansion/rnexecutorch/ObjectDetection.kt +0 -64
  845. package/android/src/main/java/com/swmansion/rnexecutorch/SpeechToText.kt +0 -91
  846. package/android/src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt +0 -54
  847. package/android/src/main/java/com/swmansion/rnexecutorch/TextEmbeddings.kt +0 -51
  848. package/android/src/main/java/com/swmansion/rnexecutorch/Tokenizer.kt +0 -86
  849. package/android/src/main/java/com/swmansion/rnexecutorch/VerticalOCR.kt +0 -179
  850. package/android/src/main/java/com/swmansion/rnexecutorch/models/BaseModel.kt +0 -54
  851. package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsModel.kt +0 -48
  852. package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsUtils.kt +0 -37
  853. package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt +0 -46
  854. package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/Constants.kt +0 -1005
  855. package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt +0 -26
  856. package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt +0 -142
  857. package/android/src/main/java/com/swmansion/rnexecutorch/models/objectDetection/SSDLiteLargeModel.kt +0 -74
  858. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Detector.kt +0 -82
  859. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/RecognitionHandler.kt +0 -117
  860. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Recognizer.kt +0 -51
  861. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/VerticalDetector.kt +0 -89
  862. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/CTCLabelConverter.kt +0 -58
  863. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/Constants.kt +0 -31
  864. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/DetectorUtils.kt +0 -608
  865. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/RecognizerUtils.kt +0 -430
  866. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TDecoder.kt +0 -39
  867. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TModule.kt +0 -43
  868. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Moonshine.kt +0 -16
  869. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineDecoder.kt +0 -23
  870. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineEncoder.kt +0 -20
  871. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Whisper.kt +0 -16
  872. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperDecoder.kt +0 -22
  873. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperEncoder.kt +0 -29
  874. package/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt +0 -43
  875. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ArrayUtils.kt +0 -87
  876. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ETError.kt +0 -34
  877. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ImageProcessor.kt +0 -237
  878. package/android/src/main/java/com/swmansion/rnexecutorch/utils/Numerical.kt +0 -8
  879. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ObjectDetectionUtils.kt +0 -201
  880. package/android/src/main/java/com/swmansion/rnexecutorch/utils/STFT.kt +0 -50
  881. package/android/src/main/java/com/swmansion/rnexecutorch/utils/TensorUtils.kt +0 -103
  882. package/ios/RnExecutorch/Classification.h +0 -5
  883. package/ios/RnExecutorch/Classification.mm +0 -54
  884. package/ios/RnExecutorch/ETModule.h +0 -5
  885. package/ios/RnExecutorch/ETModule.mm +0 -75
  886. package/ios/RnExecutorch/ImageSegmentation.h +0 -5
  887. package/ios/RnExecutorch/ImageSegmentation.mm +0 -60
  888. package/ios/RnExecutorch/OCR.h +0 -5
  889. package/ios/RnExecutorch/OCR.mm +0 -96
  890. package/ios/RnExecutorch/ObjectDetection.h +0 -5
  891. package/ios/RnExecutorch/ObjectDetection.mm +0 -56
  892. package/ios/RnExecutorch/SpeechToText.h +0 -5
  893. package/ios/RnExecutorch/SpeechToText.mm +0 -125
  894. package/ios/RnExecutorch/StyleTransfer.h +0 -5
  895. package/ios/RnExecutorch/StyleTransfer.mm +0 -55
  896. package/ios/RnExecutorch/TextEmbeddings.h +0 -5
  897. package/ios/RnExecutorch/TextEmbeddings.mm +0 -62
  898. package/ios/RnExecutorch/Tokenizer.h +0 -5
  899. package/ios/RnExecutorch/Tokenizer.mm +0 -83
  900. package/ios/RnExecutorch/VerticalOCR.h +0 -5
  901. package/ios/RnExecutorch/VerticalOCR.mm +0 -183
  902. package/ios/RnExecutorch/models/BaseModel.h +0 -21
  903. package/ios/RnExecutorch/models/BaseModel.mm +0 -43
  904. package/ios/RnExecutorch/models/classification/ClassificationModel.h +0 -10
  905. package/ios/RnExecutorch/models/classification/ClassificationModel.mm +0 -53
  906. package/ios/RnExecutorch/models/classification/Constants.h +0 -3
  907. package/ios/RnExecutorch/models/image_segmentation/Constants.h +0 -4
  908. package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +0 -10
  909. package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +0 -146
  910. package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.hpp +0 -11
  911. package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.mm +0 -64
  912. package/ios/RnExecutorch/models/ocr/Detector.h +0 -9
  913. package/ios/RnExecutorch/models/ocr/Detector.mm +0 -101
  914. package/ios/RnExecutorch/models/ocr/RecognitionHandler.h +0 -16
  915. package/ios/RnExecutorch/models/ocr/RecognitionHandler.mm +0 -135
  916. package/ios/RnExecutorch/models/ocr/Recognizer.h +0 -8
  917. package/ios/RnExecutorch/models/ocr/Recognizer.mm +0 -77
  918. package/ios/RnExecutorch/models/ocr/VerticalDetector.h +0 -10
  919. package/ios/RnExecutorch/models/ocr/VerticalDetector.mm +0 -118
  920. package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.h +0 -16
  921. package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.mm +0 -80
  922. package/ios/RnExecutorch/models/ocr/utils/Constants.h +0 -26
  923. package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.h +0 -31
  924. package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.mm +0 -754
  925. package/ios/RnExecutorch/models/ocr/utils/OCRUtils.h +0 -10
  926. package/ios/RnExecutorch/models/ocr/utils/OCRUtils.mm +0 -67
  927. package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.h +0 -35
  928. package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.mm +0 -331
  929. package/ios/RnExecutorch/models/stt/Moonshine.hpp +0 -13
  930. package/ios/RnExecutorch/models/stt/Moonshine.mm +0 -64
  931. package/ios/RnExecutorch/models/stt/MoonshineDecoder.hpp +0 -16
  932. package/ios/RnExecutorch/models/stt/MoonshineDecoder.mm +0 -24
  933. package/ios/RnExecutorch/models/stt/MoonshineEncoder.hpp +0 -15
  934. package/ios/RnExecutorch/models/stt/MoonshineEncoder.mm +0 -18
  935. package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.hpp +0 -26
  936. package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.mm +0 -19
  937. package/ios/RnExecutorch/models/stt/Whisper.hpp +0 -12
  938. package/ios/RnExecutorch/models/stt/Whisper.mm +0 -68
  939. package/ios/RnExecutorch/models/stt/WhisperDecoder.hpp +0 -16
  940. package/ios/RnExecutorch/models/stt/WhisperDecoder.mm +0 -22
  941. package/ios/RnExecutorch/models/stt/WhisperEncoder.hpp +0 -15
  942. package/ios/RnExecutorch/models/stt/WhisperEncoder.mm +0 -21
  943. package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h +0 -11
  944. package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm +0 -50
  945. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.h +0 -15
  946. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.mm +0 -45
  947. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.h +0 -8
  948. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.mm +0 -49
  949. package/ios/RnExecutorch/utils/Constants.h +0 -8
  950. package/ios/RnExecutorch/utils/ObjectDetectionUtils.hpp +0 -23
  951. package/ios/RnExecutorch/utils/SFFT.hpp +0 -13
  952. package/ios/RnExecutorch/utils/SFFT.mm +0 -71
  953. package/lib/module/constants/sttDefaults.js +0 -72
  954. package/lib/module/constants/sttDefaults.js.map +0 -1
  955. package/lib/module/controllers/SpeechToTextController.js +0 -307
  956. package/lib/module/controllers/SpeechToTextController.js.map +0 -1
  957. package/lib/module/native/NativeClassification.js +0 -5
  958. package/lib/module/native/NativeClassification.js.map +0 -1
  959. package/lib/module/native/NativeETModule.js +0 -5
  960. package/lib/module/native/NativeETModule.js.map +0 -1
  961. package/lib/module/native/NativeImageSegmentation.js +0 -5
  962. package/lib/module/native/NativeImageSegmentation.js.map +0 -1
  963. package/lib/module/native/NativeOCR.js +0 -5
  964. package/lib/module/native/NativeOCR.js.map +0 -1
  965. package/lib/module/native/NativeObjectDetection.js +0 -5
  966. package/lib/module/native/NativeObjectDetection.js.map +0 -1
  967. package/lib/module/native/NativeSpeechToText.js +0 -5
  968. package/lib/module/native/NativeSpeechToText.js.map +0 -1
  969. package/lib/module/native/NativeStyleTransfer.js +0 -5
  970. package/lib/module/native/NativeStyleTransfer.js.map +0 -1
  971. package/lib/module/native/NativeTextEmbeddings.js +0 -5
  972. package/lib/module/native/NativeTextEmbeddings.js.map +0 -1
  973. package/lib/module/native/NativeTokenizer.js +0 -5
  974. package/lib/module/native/NativeTokenizer.js.map +0 -1
  975. package/lib/module/native/NativeVerticalOCR.js +0 -5
  976. package/lib/module/native/NativeVerticalOCR.js.map +0 -1
  977. package/lib/module/package.json +0 -1
  978. package/lib/typescript/constants/sttDefaults.d.ts +0 -28
  979. package/lib/typescript/constants/sttDefaults.d.ts.map +0 -1
  980. package/lib/typescript/controllers/SpeechToTextController.d.ts +0 -52
  981. package/lib/typescript/controllers/SpeechToTextController.d.ts.map +0 -1
  982. package/lib/typescript/native/NativeClassification.d.ts +0 -10
  983. package/lib/typescript/native/NativeClassification.d.ts.map +0 -1
  984. package/lib/typescript/native/NativeETModule.d.ts +0 -9
  985. package/lib/typescript/native/NativeETModule.d.ts.map +0 -1
  986. package/lib/typescript/native/NativeImageSegmentation.d.ts +0 -10
  987. package/lib/typescript/native/NativeImageSegmentation.d.ts.map +0 -1
  988. package/lib/typescript/native/NativeOCR.d.ts +0 -9
  989. package/lib/typescript/native/NativeOCR.d.ts.map +0 -1
  990. package/lib/typescript/native/NativeObjectDetection.d.ts +0 -9
  991. package/lib/typescript/native/NativeObjectDetection.d.ts.map +0 -1
  992. package/lib/typescript/native/NativeSpeechToText.d.ts +0 -12
  993. package/lib/typescript/native/NativeSpeechToText.d.ts.map +0 -1
  994. package/lib/typescript/native/NativeStyleTransfer.d.ts.map +0 -1
  995. package/lib/typescript/native/NativeTextEmbeddings.d.ts +0 -8
  996. package/lib/typescript/native/NativeTextEmbeddings.d.ts.map +0 -1
  997. package/lib/typescript/native/NativeTokenizer.d.ts +0 -12
  998. package/lib/typescript/native/NativeTokenizer.d.ts.map +0 -1
  999. package/lib/typescript/native/NativeVerticalOCR.d.ts +0 -9
  1000. package/lib/typescript/native/NativeVerticalOCR.d.ts.map +0 -1
  1001. package/src/constants/sttDefaults.ts +0 -86
  1002. package/src/controllers/SpeechToTextController.ts +0 -458
  1003. package/src/index.tsx +0 -47
  1004. package/src/native/NativeClassification.ts +0 -9
  1005. package/src/native/NativeETModule.ts +0 -14
  1006. package/src/native/NativeImageSegmentation.ts +0 -14
  1007. package/src/native/NativeOCR.ts +0 -16
  1008. package/src/native/NativeObjectDetection.ts +0 -10
  1009. package/src/native/NativeSpeechToText.ts +0 -17
  1010. package/src/native/NativeStyleTransfer.ts +0 -10
  1011. package/src/native/NativeTextEmbeddings.ts +0 -9
  1012. package/src/native/NativeTokenizer.ts +0 -13
  1013. package/src/native/NativeVerticalOCR.ts +0 -16
@@ -0,0 +1,2205 @@
1
+ /* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com )
2
+
3
+ Based on original fortran 77 code from FFTPACKv4 from NETLIB
4
+ (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber
5
+ of NCAR, in 1985.
6
+
7
+ As confirmed by the NCAR fftpack software curators, the following
8
+ FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
9
+ released under the same terms.
10
+
11
+ FFTPACK license:
12
+
13
+ http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
14
+
15
+ Copyright (c) 2004 the University Corporation for Atmospheric
16
+ Research ("UCAR"). All rights reserved. Developed by NCAR's
17
+ Computational and Information Systems Laboratory, UCAR,
18
+ www.cisl.ucar.edu.
19
+
20
+ Redistribution and use of the Software in source and binary forms,
21
+ with or without modification, is permitted provided that the
22
+ following conditions are met:
23
+
24
+ - Neither the names of NCAR's Computational and Information Systems
25
+ Laboratory, the University Corporation for Atmospheric Research,
26
+ nor the names of its sponsors or contributors may be used to
27
+ endorse or promote products derived from this Software without
28
+ specific prior written permission.
29
+
30
+ - Redistributions of source code must retain the above copyright
31
+ notices, this list of conditions, and the disclaimer below.
32
+
33
+ - Redistributions in binary form must reproduce the above copyright
34
+ notice, this list of conditions, and the disclaimer below in the
35
+ documentation and/or other materials provided with the
36
+ distribution.
37
+
38
+ THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
39
+ EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
40
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
41
+ NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
42
+ HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
43
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
44
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
45
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
46
+ SOFTWARE.
47
+
48
+
49
+ PFFFT : a Pretty Fast FFT.
50
+
51
+ This file is largerly based on the original FFTPACK implementation, modified
52
+ in order to take advantage of SIMD instructions of modern CPUs.
53
+ */
54
+
55
+ /*
56
+ ChangeLog:
57
+ - 2011/10/02, version 1: This is the very first release of this file.
58
+ */
59
+
60
+ #ifndef _USE_MATH_DEFINES
61
+ #define _USE_MATH_DEFINES // ask gently MSVC to define M_PI, M_SQRT2 etc.
62
+ #endif
63
+
64
+ #include <assert.h>
65
+ #include <math.h>
66
+ #include <pfft/pfft.h>
67
+ #include <stdio.h>
68
+ #include <stdlib.h>
69
+
70
+ /* detect compiler flavour */
71
+ #if defined(_MSC_VER)
72
+ #define COMPILER_MSVC
73
+ #elif defined(__GNUC__)
74
+ #define COMPILER_GCC
75
+ #endif
76
+
77
+ #if defined(COMPILER_GCC)
78
+ #define ALWAYS_INLINE(return_type) \
79
+ inline return_type __attribute__((always_inline))
80
+ #define NEVER_INLINE(return_type) return_type __attribute__((noinline))
81
+ #define RESTRICT __restrict
82
+ #define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__];
83
+ #elif defined(COMPILER_MSVC)
84
+ #define ALWAYS_INLINE(return_type) __forceinline return_type
85
+ #define NEVER_INLINE(return_type) __declspec(noinline) return_type
86
+ #define RESTRICT __restrict
87
+ #define VLA_ARRAY_ON_STACK(type__, varname__, size__) \
88
+ type__ *varname__ = (type__ *)_alloca(size__ * sizeof(type__))
89
+ #endif
90
+
91
+ /*
92
+ vector support macros: the rest of the code is independant of
93
+ SSE/Altivec/NEON -- adding support for other platforms with 4-element
94
+ vectors should be limited to these macros
95
+ */
96
+
97
+ // define PFFFT_SIMD_DISABLE if you want to use scalar code instead of simd code
98
+ // #define PFFFT_SIMD_DISABLE
99
+
100
+ /* select which SIMD intrinsics will be used */
101
+ #if !defined(PFFFT_SIMD_DISABLE)
102
+ #if (defined(__ppc__) || defined(__ppc64__) || defined(__powerpc__) || \
103
+ defined(__powerpc64__)) && \
104
+ (defined(__VEC__) || defined(__ALTIVEC__))
105
+ #define PFFFT_SIMD_ALTIVEC
106
+ #elif defined(__ARM_NEON) || defined(__aarch64__) || defined(__arm64) || \
107
+ defined(_M_ARM64) || defined(_M_ARM64EC) || defined(__wasm_simd128__)
108
+ // we test _M_ARM64EC before _M_X64 because when _M_ARM64EC is defined, the
109
+ // microsoft compiler also defines _M_X64
110
+ #define PFFFT_SIMD_NEON
111
+ #elif defined(__x86_64__) || defined(__SSE__) || defined(_M_X64) || \
112
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
113
+ #define PFFFT_SIMD_SSE
114
+ #endif
115
+ #endif // PFFFT_SIMD_DISABLE
116
+
117
+ /*
118
+ Altivec support macros
119
+ */
120
+ #ifdef PFFFT_SIMD_ALTIVEC
121
+ #include <altivec.h>
122
+ typedef vector float v4sf;
123
+ #define SIMD_SZ 4
124
+ #define VZERO() ((vector float)vec_splat_u8(0))
125
+ #define VMUL(a, b) vec_madd(a, b, VZERO())
126
+ #define VADD(a, b) vec_add(a, b)
127
+ #define VMADD(a, b, c) vec_madd(a, b, c)
128
+ #define VSUB(a, b) vec_sub(a, b)
129
+ inline v4sf ld_ps1(const float *p) {
130
+ v4sf v = vec_lde(0, p);
131
+ return vec_splat(vec_perm(v, v, vec_lvsl(0, p)), 0);
132
+ }
133
+ #define LD_PS1(p) ld_ps1(&p)
134
+ #define INTERLEAVE2(in1, in2, out1, out2) \
135
+ { \
136
+ v4sf tmp__ = vec_mergeh(in1, in2); \
137
+ out2 = vec_mergel(in1, in2); \
138
+ out1 = tmp__; \
139
+ }
140
+ #define UNINTERLEAVE2(in1, in2, out1, out2) \
141
+ { \
142
+ vector unsigned char vperm1 = (vector unsigned char){ \
143
+ 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}; \
144
+ vector unsigned char vperm2 = (vector unsigned char){ \
145
+ 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}; \
146
+ v4sf tmp__ = vec_perm(in1, in2, vperm1); \
147
+ out2 = vec_perm(in1, in2, vperm2); \
148
+ out1 = tmp__; \
149
+ }
150
+ #define VTRANSPOSE4(x0, x1, x2, x3) \
151
+ { \
152
+ v4sf y0 = vec_mergeh(x0, x2); \
153
+ v4sf y1 = vec_mergel(x0, x2); \
154
+ v4sf y2 = vec_mergeh(x1, x3); \
155
+ v4sf y3 = vec_mergel(x1, x3); \
156
+ x0 = vec_mergeh(y0, y2); \
157
+ x1 = vec_mergel(y0, y2); \
158
+ x2 = vec_mergeh(y1, y3); \
159
+ x3 = vec_mergel(y1, y3); \
160
+ }
161
+ #define VSWAPHL(a, b) \
162
+ vec_perm(a, b, \
163
+ (vector unsigned char){16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, \
164
+ 11, 12, 13, 14, 15})
165
+ #define VALIGNED(ptr) ((((size_t)(ptr)) & 0xF) == 0)
166
+
167
+ /*
168
+ SSE1 support macros
169
+ */
170
+ #elif defined(PFFFT_SIMD_SSE)
171
+
172
+ #include <xmmintrin.h>
173
+ typedef __m128 v4sf;
174
+ #define SIMD_SZ \
175
+ 4 // 4 floats by simd vector -- this is pretty much hardcoded in the
176
+ // preprocess/finalize functions anyway so you will have to work if you want
177
+ // to enable AVX with its 256-bit vectors.
178
+ #define VZERO() _mm_setzero_ps()
179
+ #define VMUL(a, b) _mm_mul_ps(a, b)
180
+ #define VADD(a, b) _mm_add_ps(a, b)
181
+ #define VMADD(a, b, c) _mm_add_ps(_mm_mul_ps(a, b), c)
182
+ #define VSUB(a, b) _mm_sub_ps(a, b)
183
+ #define LD_PS1(p) _mm_set1_ps(p)
184
+ #define INTERLEAVE2(in1, in2, out1, out2) \
185
+ { \
186
+ v4sf tmp__ = _mm_unpacklo_ps(in1, in2); \
187
+ out2 = _mm_unpackhi_ps(in1, in2); \
188
+ out1 = tmp__; \
189
+ }
190
+ #define UNINTERLEAVE2(in1, in2, out1, out2) \
191
+ { \
192
+ v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2, 0, 2, 0)); \
193
+ out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3, 1, 3, 1)); \
194
+ out1 = tmp__; \
195
+ }
196
+ #define VTRANSPOSE4(x0, x1, x2, x3) _MM_TRANSPOSE4_PS(x0, x1, x2, x3)
197
+ #define VSWAPHL(a, b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3, 2, 1, 0))
198
+ #define VALIGNED(ptr) ((((size_t)(ptr)) & 0xF) == 0)
199
+
200
+ /*
201
+ ARM NEON support macros
202
+ */
203
+ #elif defined(PFFFT_SIMD_NEON)
204
+ #include <arm_neon.h>
205
+ typedef float32x4_t v4sf;
206
+ #define SIMD_SZ 4
207
+ #define VZERO() vdupq_n_f32(0)
208
+ #define VMUL(a, b) vmulq_f32(a, b)
209
+ #define VADD(a, b) vaddq_f32(a, b)
210
+ #define VMADD(a, b, c) vmlaq_f32(c, a, b)
211
+ #define VSUB(a, b) vsubq_f32(a, b)
212
+ #define LD_PS1(p) vld1q_dup_f32(&(p))
213
+ #define INTERLEAVE2(in1, in2, out1, out2) \
214
+ { \
215
+ float32x4x2_t tmp__ = vzipq_f32(in1, in2); \
216
+ out1 = tmp__.val[0]; \
217
+ out2 = tmp__.val[1]; \
218
+ }
219
+ #define UNINTERLEAVE2(in1, in2, out1, out2) \
220
+ { \
221
+ float32x4x2_t tmp__ = vuzpq_f32(in1, in2); \
222
+ out1 = tmp__.val[0]; \
223
+ out2 = tmp__.val[1]; \
224
+ }
225
+ #define VTRANSPOSE4(x0, x1, x2, x3) \
226
+ { \
227
+ float32x4x2_t t0_ = vzipq_f32(x0, x2); \
228
+ float32x4x2_t t1_ = vzipq_f32(x1, x3); \
229
+ float32x4x2_t u0_ = vzipq_f32(t0_.val[0], t1_.val[0]); \
230
+ float32x4x2_t u1_ = vzipq_f32(t0_.val[1], t1_.val[1]); \
231
+ x0 = u0_.val[0]; \
232
+ x1 = u0_.val[1]; \
233
+ x2 = u1_.val[0]; \
234
+ x3 = u1_.val[1]; \
235
+ }
236
+ // marginally faster version
237
+ // # define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32
238
+ // %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2),
239
+ // "+w"(x3)::); }
240
+ #define VSWAPHL(a, b) vcombine_f32(vget_low_f32(b), vget_high_f32(a))
241
+ #define VALIGNED(ptr) ((((size_t)(ptr)) & 0x3) == 0)
242
+ #else
243
+ #if !defined(PFFFT_SIMD_DISABLE)
244
+ #warning "building with simd disabled !\n";
245
+ #define PFFFT_SIMD_DISABLE // fallback to scalar code
246
+ #endif
247
+ #endif
248
+
249
+ // fallback mode for situations where SSE/Altivec are not available, use scalar
250
+ // mode instead
251
+ #ifdef PFFFT_SIMD_DISABLE
252
+ typedef float v4sf;
253
+ #define SIMD_SZ 1
254
+ #define VZERO() 0.f
255
+ #define VMUL(a, b) ((a) * (b))
256
+ #define VADD(a, b) ((a) + (b))
257
+ #define VMADD(a, b, c) ((a) * (b) + (c))
258
+ #define VSUB(a, b) ((a) - (b))
259
+ #define LD_PS1(p) (p)
260
+ #define VALIGNED(ptr) ((((size_t)(ptr)) & 0x3) == 0)
261
+ #endif
262
+
263
+ // shortcuts for complex multiplcations
264
+ #define VCPLXMUL(ar, ai, br, bi) \
265
+ { \
266
+ v4sf tmp; \
267
+ tmp = VMUL(ar, bi); \
268
+ ar = VMUL(ar, br); \
269
+ ar = VSUB(ar, VMUL(ai, bi)); \
270
+ ai = VMUL(ai, br); \
271
+ ai = VADD(ai, tmp); \
272
+ }
273
+ #define VCPLXMULCONJ(ar, ai, br, bi) \
274
+ { \
275
+ v4sf tmp; \
276
+ tmp = VMUL(ar, bi); \
277
+ ar = VMUL(ar, br); \
278
+ ar = VADD(ar, VMUL(ai, bi)); \
279
+ ai = VMUL(ai, br); \
280
+ ai = VSUB(ai, tmp); \
281
+ }
282
+ #ifndef SVMUL
283
+ // multiply a scalar with a vector
284
+ #define SVMUL(f, v) VMUL(LD_PS1(f), v)
285
+ #endif
286
+
287
+ #if !defined(PFFFT_SIMD_DISABLE)
288
+ typedef union v4sf_union {
289
+ v4sf v;
290
+ float f[4];
291
+ } v4sf_union;
292
+
293
+ #include <string.h>
294
+
295
+ #define assertv4(v, f0, f1, f2, f3) \
296
+ assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3))
297
+
298
+ /* detect bugs with the vector support macros */
299
+ void validate_pffft_simd(void) {
300
+ float f[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
301
+ v4sf_union a0, a1, a2, a3, t, u;
302
+ memcpy(a0.f, f, 4 * sizeof(float));
303
+ memcpy(a1.f, f + 4, 4 * sizeof(float));
304
+ memcpy(a2.f, f + 8, 4 * sizeof(float));
305
+ memcpy(a3.f, f + 12, 4 * sizeof(float));
306
+
307
+ t = a0;
308
+ u = a1;
309
+ t.v = VZERO();
310
+ printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
311
+ assertv4(t, 0, 0, 0, 0);
312
+ t.v = VADD(a1.v, a2.v);
313
+ printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
314
+ assertv4(t, 12, 14, 16, 18);
315
+ t.v = VMUL(a1.v, a2.v);
316
+ printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
317
+ assertv4(t, 32, 45, 60, 77);
318
+ t.v = VMADD(a1.v, a2.v, a0.v);
319
+ printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2],
320
+ t.f[3]);
321
+ assertv4(t, 32, 46, 62, 80);
322
+
323
+ INTERLEAVE2(a1.v, a2.v, t.v, u.v);
324
+ printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0],
325
+ t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
326
+ assertv4(t, 4, 8, 5, 9);
327
+ assertv4(u, 6, 10, 7, 11);
328
+ UNINTERLEAVE2(a1.v, a2.v, t.v, u.v);
329
+ printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
330
+ t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
331
+ assertv4(t, 4, 6, 8, 10);
332
+ assertv4(u, 5, 7, 9, 11);
333
+
334
+ t.v = LD_PS1(f[15]);
335
+ printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
336
+ assertv4(t, 15, 15, 15, 15);
337
+ t.v = VSWAPHL(a1.v, a2.v);
338
+ printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2],
339
+ t.f[3]);
340
+ assertv4(t, 8, 9, 6, 7);
341
+ VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
342
+ printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] "
343
+ "[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
344
+ a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
345
+ a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2],
346
+ a3.f[3]);
347
+ assertv4(a0, 0, 4, 8, 12);
348
+ assertv4(a1, 1, 5, 9, 13);
349
+ assertv4(a2, 2, 6, 10, 14);
350
+ assertv4(a3, 3, 7, 11, 15);
351
+ }
352
+ #else
353
+ void validate_pffft_simd() {
354
+ } // allow test_pffft.c to call this function even when simd is not available..
355
+ #endif //! PFFFT_SIMD_DISABLE
356
+
357
+ /* SSE and co like 16-bytes aligned pointers */
358
+ #define MALLOC_V4SF_ALIGNMENT \
359
+ 64 // with a 64-byte alignment, we are even aligned on L2 cache lines...
360
+ void *pffft_aligned_malloc(size_t nb_bytes) {
361
+ void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
362
+ if (!p0)
363
+ return (void *)0;
364
+ p = (void *)(((size_t)p0 + MALLOC_V4SF_ALIGNMENT) &
365
+ (~((size_t)(MALLOC_V4SF_ALIGNMENT - 1))));
366
+ *((void **)p - 1) = p0;
367
+ return p;
368
+ }
369
+
370
+ void pffft_aligned_free(void *p) {
371
+ if (p)
372
+ free(*((void **)p - 1));
373
+ }
374
+
375
+ int pffft_simd_size(void) { return SIMD_SZ; }
376
+
377
+ /*
378
+ passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2
379
+ */
380
+ static NEVER_INLINE(void) passf2_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
381
+ const float *wa1, float fsign) {
382
+ int k, i;
383
+ int l1ido = l1 * ido;
384
+ if (ido <= 2) {
385
+ for (k = 0; k < l1ido; k += ido, ch += ido, cc += 2 * ido) {
386
+ ch[0] = VADD(cc[0], cc[ido + 0]);
387
+ ch[l1ido] = VSUB(cc[0], cc[ido + 0]);
388
+ ch[1] = VADD(cc[1], cc[ido + 1]);
389
+ ch[l1ido + 1] = VSUB(cc[1], cc[ido + 1]);
390
+ }
391
+ } else {
392
+ for (k = 0; k < l1ido; k += ido, ch += ido, cc += 2 * ido) {
393
+ for (i = 0; i < ido - 1; i += 2) {
394
+ v4sf tr2 = VSUB(cc[i + 0], cc[i + ido + 0]);
395
+ v4sf ti2 = VSUB(cc[i + 1], cc[i + ido + 1]);
396
+ v4sf wr = LD_PS1(wa1[i]), wi = VMUL(LD_PS1(fsign), LD_PS1(wa1[i + 1]));
397
+ ch[i] = VADD(cc[i + 0], cc[i + ido + 0]);
398
+ ch[i + 1] = VADD(cc[i + 1], cc[i + ido + 1]);
399
+ VCPLXMUL(tr2, ti2, wr, wi);
400
+ ch[i + l1ido] = tr2;
401
+ ch[i + l1ido + 1] = ti2;
402
+ }
403
+ }
404
+ }
405
+ }
406
+
407
+ /*
408
+ passf3 and passb3 has been merged here, fsign = -1 for passf3, +1 for passb3
409
+ */
410
+ static NEVER_INLINE(void)
411
+ passf3_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1,
412
+ const float *wa2, float fsign) {
413
+ static const float taur = -0.5f;
414
+ float taui = 0.866025403784439f * fsign;
415
+ int i, k;
416
+ v4sf tr2, ti2, cr2, ci2, cr3, ci3, dr2, di2, dr3, di3;
417
+ int l1ido = l1 * ido;
418
+ float wr1, wi1, wr2, wi2;
419
+ assert(ido > 2);
420
+ for (k = 0; k < l1ido; k += ido, cc += 3 * ido, ch += ido) {
421
+ for (i = 0; i < ido - 1; i += 2) {
422
+ tr2 = VADD(cc[i + ido], cc[i + 2 * ido]);
423
+ cr2 = VADD(cc[i], SVMUL(taur, tr2));
424
+ ch[i] = VADD(cc[i], tr2);
425
+ ti2 = VADD(cc[i + ido + 1], cc[i + 2 * ido + 1]);
426
+ ci2 = VADD(cc[i + 1], SVMUL(taur, ti2));
427
+ ch[i + 1] = VADD(cc[i + 1], ti2);
428
+ cr3 = SVMUL(taui, VSUB(cc[i + ido], cc[i + 2 * ido]));
429
+ ci3 = SVMUL(taui, VSUB(cc[i + ido + 1], cc[i + 2 * ido + 1]));
430
+ dr2 = VSUB(cr2, ci3);
431
+ dr3 = VADD(cr2, ci3);
432
+ di2 = VADD(ci2, cr3);
433
+ di3 = VSUB(ci2, cr3);
434
+ wr1 = wa1[i];
435
+ wi1 = fsign * wa1[i + 1];
436
+ wr2 = wa2[i];
437
+ wi2 = fsign * wa2[i + 1];
438
+ VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1));
439
+ ch[i + l1ido] = dr2;
440
+ ch[i + l1ido + 1] = di2;
441
+ VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2));
442
+ ch[i + 2 * l1ido] = dr3;
443
+ ch[i + 2 * l1ido + 1] = di3;
444
+ }
445
+ }
446
+ } /* passf3 */
447
+
448
+ static NEVER_INLINE(void)
449
+ passf4_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1,
450
+ const float *wa2, const float *wa3, float fsign) {
451
+ /* isign == -1 for forward transform and +1 for backward transform */
452
+
453
+ int i, k;
454
+ v4sf ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
455
+ int l1ido = l1 * ido;
456
+ if (ido == 2) {
457
+ for (k = 0; k < l1ido; k += ido, ch += ido, cc += 4 * ido) {
458
+ tr1 = VSUB(cc[0], cc[2 * ido + 0]);
459
+ tr2 = VADD(cc[0], cc[2 * ido + 0]);
460
+ ti1 = VSUB(cc[1], cc[2 * ido + 1]);
461
+ ti2 = VADD(cc[1], cc[2 * ido + 1]);
462
+ ti4 = VMUL(VSUB(cc[1 * ido + 0], cc[3 * ido + 0]), LD_PS1(fsign));
463
+ tr4 = VMUL(VSUB(cc[3 * ido + 1], cc[1 * ido + 1]), LD_PS1(fsign));
464
+ tr3 = VADD(cc[ido + 0], cc[3 * ido + 0]);
465
+ ti3 = VADD(cc[ido + 1], cc[3 * ido + 1]);
466
+
467
+ ch[0 * l1ido + 0] = VADD(tr2, tr3);
468
+ ch[0 * l1ido + 1] = VADD(ti2, ti3);
469
+ ch[1 * l1ido + 0] = VADD(tr1, tr4);
470
+ ch[1 * l1ido + 1] = VADD(ti1, ti4);
471
+ ch[2 * l1ido + 0] = VSUB(tr2, tr3);
472
+ ch[2 * l1ido + 1] = VSUB(ti2, ti3);
473
+ ch[3 * l1ido + 0] = VSUB(tr1, tr4);
474
+ ch[3 * l1ido + 1] = VSUB(ti1, ti4);
475
+ }
476
+ } else {
477
+ for (k = 0; k < l1ido; k += ido, ch += ido, cc += 4 * ido) {
478
+ for (i = 0; i < ido - 1; i += 2) {
479
+ float wr1, wi1, wr2, wi2, wr3, wi3;
480
+ tr1 = VSUB(cc[i + 0], cc[i + 2 * ido + 0]);
481
+ tr2 = VADD(cc[i + 0], cc[i + 2 * ido + 0]);
482
+ ti1 = VSUB(cc[i + 1], cc[i + 2 * ido + 1]);
483
+ ti2 = VADD(cc[i + 1], cc[i + 2 * ido + 1]);
484
+ tr4 =
485
+ VMUL(VSUB(cc[i + 3 * ido + 1], cc[i + 1 * ido + 1]), LD_PS1(fsign));
486
+ ti4 =
487
+ VMUL(VSUB(cc[i + 1 * ido + 0], cc[i + 3 * ido + 0]), LD_PS1(fsign));
488
+ tr3 = VADD(cc[i + ido + 0], cc[i + 3 * ido + 0]);
489
+ ti3 = VADD(cc[i + ido + 1], cc[i + 3 * ido + 1]);
490
+
491
+ ch[i] = VADD(tr2, tr3);
492
+ cr3 = VSUB(tr2, tr3);
493
+ ch[i + 1] = VADD(ti2, ti3);
494
+ ci3 = VSUB(ti2, ti3);
495
+
496
+ cr2 = VADD(tr1, tr4);
497
+ cr4 = VSUB(tr1, tr4);
498
+ ci2 = VADD(ti1, ti4);
499
+ ci4 = VSUB(ti1, ti4);
500
+ wr1 = wa1[i];
501
+ wi1 = fsign * wa1[i + 1];
502
+ VCPLXMUL(cr2, ci2, LD_PS1(wr1), LD_PS1(wi1));
503
+ wr2 = wa2[i];
504
+ wi2 = fsign * wa2[i + 1];
505
+ ch[i + l1ido] = cr2;
506
+ ch[i + l1ido + 1] = ci2;
507
+
508
+ VCPLXMUL(cr3, ci3, LD_PS1(wr2), LD_PS1(wi2));
509
+ wr3 = wa3[i];
510
+ wi3 = fsign * wa3[i + 1];
511
+ ch[i + 2 * l1ido] = cr3;
512
+ ch[i + 2 * l1ido + 1] = ci3;
513
+
514
+ VCPLXMUL(cr4, ci4, LD_PS1(wr3), LD_PS1(wi3));
515
+ ch[i + 3 * l1ido] = cr4;
516
+ ch[i + 3 * l1ido + 1] = ci4;
517
+ }
518
+ }
519
+ }
520
+ } /* passf4 */
521
+
522
+ /*
523
+ passf5 and passb5 has been merged here, fsign = -1 for passf5, +1 for passb5
524
+ */
525
+ static NEVER_INLINE(void)
526
+ passf5_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1,
527
+ const float *wa2, const float *wa3, const float *wa4,
528
+ float fsign) {
529
+ static const float tr11 = .309016994374947f;
530
+ const float ti11 = .951056516295154f * fsign;
531
+ static const float tr12 = -.809016994374947f;
532
+ const float ti12 = .587785252292473f * fsign;
533
+
534
+ /* Local variables */
535
+ int i, k;
536
+ v4sf ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
537
+ ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
538
+
539
+ float wr1, wi1, wr2, wi2, wr3, wi3, wr4, wi4;
540
+
541
+ #define cc_ref(a_1, a_2) cc[(a_2 - 1) * ido + a_1 + 1]
542
+ #define ch_ref(a_1, a_3) ch[(a_3 - 1) * l1 * ido + a_1 + 1]
543
+
544
+ assert(ido > 2);
545
+ for (k = 0; k < l1; ++k, cc += 5 * ido, ch += ido) {
546
+ for (i = 0; i < ido - 1; i += 2) {
547
+ ti5 = VSUB(cc_ref(i, 2), cc_ref(i, 5));
548
+ ti2 = VADD(cc_ref(i, 2), cc_ref(i, 5));
549
+ ti4 = VSUB(cc_ref(i, 3), cc_ref(i, 4));
550
+ ti3 = VADD(cc_ref(i, 3), cc_ref(i, 4));
551
+ tr5 = VSUB(cc_ref(i - 1, 2), cc_ref(i - 1, 5));
552
+ tr2 = VADD(cc_ref(i - 1, 2), cc_ref(i - 1, 5));
553
+ tr4 = VSUB(cc_ref(i - 1, 3), cc_ref(i - 1, 4));
554
+ tr3 = VADD(cc_ref(i - 1, 3), cc_ref(i - 1, 4));
555
+ ch_ref(i - 1, 1) = VADD(cc_ref(i - 1, 1), VADD(tr2, tr3));
556
+ ch_ref(i, 1) = VADD(cc_ref(i, 1), VADD(ti2, ti3));
557
+ cr2 = VADD(cc_ref(i - 1, 1), VADD(SVMUL(tr11, tr2), SVMUL(tr12, tr3)));
558
+ ci2 = VADD(cc_ref(i, 1), VADD(SVMUL(tr11, ti2), SVMUL(tr12, ti3)));
559
+ cr3 = VADD(cc_ref(i - 1, 1), VADD(SVMUL(tr12, tr2), SVMUL(tr11, tr3)));
560
+ ci3 = VADD(cc_ref(i, 1), VADD(SVMUL(tr12, ti2), SVMUL(tr11, ti3)));
561
+ cr5 = VADD(SVMUL(ti11, tr5), SVMUL(ti12, tr4));
562
+ ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4));
563
+ cr4 = VSUB(SVMUL(ti12, tr5), SVMUL(ti11, tr4));
564
+ ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4));
565
+ dr3 = VSUB(cr3, ci4);
566
+ dr4 = VADD(cr3, ci4);
567
+ di3 = VADD(ci3, cr4);
568
+ di4 = VSUB(ci3, cr4);
569
+ dr5 = VADD(cr2, ci5);
570
+ dr2 = VSUB(cr2, ci5);
571
+ di5 = VSUB(ci2, cr5);
572
+ di2 = VADD(ci2, cr5);
573
+ wr1 = wa1[i];
574
+ wi1 = fsign * wa1[i + 1];
575
+ wr2 = wa2[i];
576
+ wi2 = fsign * wa2[i + 1];
577
+ wr3 = wa3[i];
578
+ wi3 = fsign * wa3[i + 1];
579
+ wr4 = wa4[i];
580
+ wi4 = fsign * wa4[i + 1];
581
+ VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1));
582
+ ch_ref(i - 1, 2) = dr2;
583
+ ch_ref(i, 2) = di2;
584
+ VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2));
585
+ ch_ref(i - 1, 3) = dr3;
586
+ ch_ref(i, 3) = di3;
587
+ VCPLXMUL(dr4, di4, LD_PS1(wr3), LD_PS1(wi3));
588
+ ch_ref(i - 1, 4) = dr4;
589
+ ch_ref(i, 4) = di4;
590
+ VCPLXMUL(dr5, di5, LD_PS1(wr4), LD_PS1(wi4));
591
+ ch_ref(i - 1, 5) = dr5;
592
+ ch_ref(i, 5) = di5;
593
+ }
594
+ }
595
+ #undef ch_ref
596
+ #undef cc_ref
597
+ }
598
+
599
+ static NEVER_INLINE(void) radf2_ps(int ido, int l1, const v4sf *RESTRICT cc,
600
+ v4sf *RESTRICT ch, const float *wa1) {
601
+ static const float minus_one = -1.f;
602
+ int i, k, l1ido = l1 * ido;
603
+ for (k = 0; k < l1ido; k += ido) {
604
+ v4sf a = cc[k], b = cc[k + l1ido];
605
+ ch[2 * k] = VADD(a, b);
606
+ ch[2 * (k + ido) - 1] = VSUB(a, b);
607
+ }
608
+ if (ido < 2)
609
+ return;
610
+ if (ido != 2) {
611
+ for (k = 0; k < l1ido; k += ido) {
612
+ for (i = 2; i < ido; i += 2) {
613
+ v4sf tr2 = cc[i - 1 + k + l1ido], ti2 = cc[i + k + l1ido];
614
+ v4sf br = cc[i - 1 + k], bi = cc[i + k];
615
+ VCPLXMULCONJ(tr2, ti2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
616
+ ch[i + 2 * k] = VADD(bi, ti2);
617
+ ch[2 * (k + ido) - i] = VSUB(ti2, bi);
618
+ ch[i - 1 + 2 * k] = VADD(br, tr2);
619
+ ch[2 * (k + ido) - i - 1] = VSUB(br, tr2);
620
+ }
621
+ }
622
+ if (ido % 2 == 1)
623
+ return;
624
+ }
625
+ for (k = 0; k < l1ido; k += ido) {
626
+ ch[2 * k + ido] = SVMUL(minus_one, cc[ido - 1 + k + l1ido]);
627
+ ch[2 * k + ido - 1] = cc[k + ido - 1];
628
+ }
629
+ } /* radf2 */
630
+
631
+ static NEVER_INLINE(void)
632
+ radb2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1) {
633
+ static const float minus_two = -2;
634
+ int i, k, l1ido = l1 * ido;
635
+ v4sf a, b, c, d, tr2, ti2;
636
+ for (k = 0; k < l1ido; k += ido) {
637
+ a = cc[2 * k];
638
+ b = cc[2 * (k + ido) - 1];
639
+ ch[k] = VADD(a, b);
640
+ ch[k + l1ido] = VSUB(a, b);
641
+ }
642
+ if (ido < 2)
643
+ return;
644
+ if (ido != 2) {
645
+ for (k = 0; k < l1ido; k += ido) {
646
+ for (i = 2; i < ido; i += 2) {
647
+ a = cc[i - 1 + 2 * k];
648
+ b = cc[2 * (k + ido) - i - 1];
649
+ c = cc[i + 0 + 2 * k];
650
+ d = cc[2 * (k + ido) - i + 0];
651
+ ch[i - 1 + k] = VADD(a, b);
652
+ tr2 = VSUB(a, b);
653
+ ch[i + 0 + k] = VSUB(c, d);
654
+ ti2 = VADD(c, d);
655
+ VCPLXMUL(tr2, ti2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
656
+ ch[i - 1 + k + l1ido] = tr2;
657
+ ch[i + 0 + k + l1ido] = ti2;
658
+ }
659
+ }
660
+ if (ido % 2 == 1)
661
+ return;
662
+ }
663
+ for (k = 0; k < l1ido; k += ido) {
664
+ a = cc[2 * k + ido - 1];
665
+ b = cc[2 * k + ido];
666
+ ch[k + ido - 1] = VADD(a, a);
667
+ ch[k + ido - 1 + l1ido] = SVMUL(minus_two, b);
668
+ }
669
+ } /* radb2 */
670
+
671
+ static void radf3_ps(int ido, int l1, const v4sf *RESTRICT cc,
672
+ v4sf *RESTRICT ch, const float *wa1, const float *wa2) {
673
+ static const float taur = -0.5f;
674
+ static const float taui = 0.866025403784439f;
675
+ int i, k, ic;
676
+ v4sf ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3, wr1, wi1, wr2, wi2;
677
+ for (k = 0; k < l1; k++) {
678
+ cr2 = VADD(cc[(k + l1) * ido], cc[(k + 2 * l1) * ido]);
679
+ ch[3 * k * ido] = VADD(cc[k * ido], cr2);
680
+ ch[(3 * k + 2) * ido] =
681
+ SVMUL(taui, VSUB(cc[(k + l1 * 2) * ido], cc[(k + l1) * ido]));
682
+ ch[ido - 1 + (3 * k + 1) * ido] = VADD(cc[k * ido], SVMUL(taur, cr2));
683
+ }
684
+ if (ido == 1)
685
+ return;
686
+ for (k = 0; k < l1; k++) {
687
+ for (i = 2; i < ido; i += 2) {
688
+ ic = ido - i;
689
+ wr1 = LD_PS1(wa1[i - 2]);
690
+ wi1 = LD_PS1(wa1[i - 1]);
691
+ dr2 = cc[i - 1 + (k + l1) * ido];
692
+ di2 = cc[i + (k + l1) * ido];
693
+ VCPLXMULCONJ(dr2, di2, wr1, wi1);
694
+
695
+ wr2 = LD_PS1(wa2[i - 2]);
696
+ wi2 = LD_PS1(wa2[i - 1]);
697
+ dr3 = cc[i - 1 + (k + l1 * 2) * ido];
698
+ di3 = cc[i + (k + l1 * 2) * ido];
699
+ VCPLXMULCONJ(dr3, di3, wr2, wi2);
700
+
701
+ cr2 = VADD(dr2, dr3);
702
+ ci2 = VADD(di2, di3);
703
+ ch[i - 1 + 3 * k * ido] = VADD(cc[i - 1 + k * ido], cr2);
704
+ ch[i + 3 * k * ido] = VADD(cc[i + k * ido], ci2);
705
+ tr2 = VADD(cc[i - 1 + k * ido], SVMUL(taur, cr2));
706
+ ti2 = VADD(cc[i + k * ido], SVMUL(taur, ci2));
707
+ tr3 = SVMUL(taui, VSUB(di2, di3));
708
+ ti3 = SVMUL(taui, VSUB(dr3, dr2));
709
+ ch[i - 1 + (3 * k + 2) * ido] = VADD(tr2, tr3);
710
+ ch[ic - 1 + (3 * k + 1) * ido] = VSUB(tr2, tr3);
711
+ ch[i + (3 * k + 2) * ido] = VADD(ti2, ti3);
712
+ ch[ic + (3 * k + 1) * ido] = VSUB(ti3, ti2);
713
+ }
714
+ }
715
+ } /* radf3 */
716
+
717
+ static void radb3_ps(int ido, int l1, const v4sf *RESTRICT cc,
718
+ v4sf *RESTRICT ch, const float *wa1, const float *wa2) {
719
+ static const float taur = -0.5f;
720
+ static const float taui = 0.866025403784439f;
721
+ static const float taui_2 = 0.866025403784439f * 2;
722
+ int i, k, ic;
723
+ v4sf ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
724
+ for (k = 0; k < l1; k++) {
725
+ tr2 = cc[ido - 1 + (3 * k + 1) * ido];
726
+ tr2 = VADD(tr2, tr2);
727
+ cr2 = VMADD(LD_PS1(taur), tr2, cc[3 * k * ido]);
728
+ ch[k * ido] = VADD(cc[3 * k * ido], tr2);
729
+ ci3 = SVMUL(taui_2, cc[(3 * k + 2) * ido]);
730
+ ch[(k + l1) * ido] = VSUB(cr2, ci3);
731
+ ch[(k + 2 * l1) * ido] = VADD(cr2, ci3);
732
+ }
733
+ if (ido == 1)
734
+ return;
735
+ for (k = 0; k < l1; k++) {
736
+ for (i = 2; i < ido; i += 2) {
737
+ ic = ido - i;
738
+ tr2 = VADD(cc[i - 1 + (3 * k + 2) * ido], cc[ic - 1 + (3 * k + 1) * ido]);
739
+ cr2 = VMADD(LD_PS1(taur), tr2, cc[i - 1 + 3 * k * ido]);
740
+ ch[i - 1 + k * ido] = VADD(cc[i - 1 + 3 * k * ido], tr2);
741
+ ti2 = VSUB(cc[i + (3 * k + 2) * ido], cc[ic + (3 * k + 1) * ido]);
742
+ ci2 = VMADD(LD_PS1(taur), ti2, cc[i + 3 * k * ido]);
743
+ ch[i + k * ido] = VADD(cc[i + 3 * k * ido], ti2);
744
+ cr3 = SVMUL(taui, VSUB(cc[i - 1 + (3 * k + 2) * ido],
745
+ cc[ic - 1 + (3 * k + 1) * ido]));
746
+ ci3 = SVMUL(taui,
747
+ VADD(cc[i + (3 * k + 2) * ido], cc[ic + (3 * k + 1) * ido]));
748
+ dr2 = VSUB(cr2, ci3);
749
+ dr3 = VADD(cr2, ci3);
750
+ di2 = VADD(ci2, cr3);
751
+ di3 = VSUB(ci2, cr3);
752
+ VCPLXMUL(dr2, di2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
753
+ ch[i - 1 + (k + l1) * ido] = dr2;
754
+ ch[i + (k + l1) * ido] = di2;
755
+ VCPLXMUL(dr3, di3, LD_PS1(wa2[i - 2]), LD_PS1(wa2[i - 1]));
756
+ ch[i - 1 + (k + 2 * l1) * ido] = dr3;
757
+ ch[i + (k + 2 * l1) * ido] = di3;
758
+ }
759
+ }
760
+ } /* radb3 */
761
+
762
+ static NEVER_INLINE(void)
763
+ radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch,
764
+ const float *RESTRICT wa1, const float *RESTRICT wa2,
765
+ const float *RESTRICT wa3) {
766
+ static const float minus_hsqt2 = (float)-0.7071067811865475;
767
+ int i, k, l1ido = l1 * ido;
768
+ {
769
+ const v4sf *RESTRICT cc_ = cc, *RESTRICT cc_end = cc + l1ido;
770
+ v4sf *RESTRICT ch_ = ch;
771
+ while (cc < cc_end) {
772
+ // this loop represents between 25% and 40% of total radf4_ps cost !
773
+ v4sf a0 = cc[0], a1 = cc[l1ido];
774
+ v4sf a2 = cc[2 * l1ido], a3 = cc[3 * l1ido];
775
+ v4sf tr1 = VADD(a1, a3);
776
+ v4sf tr2 = VADD(a0, a2);
777
+ ch[2 * ido - 1] = VSUB(a0, a2);
778
+ ch[2 * ido] = VSUB(a3, a1);
779
+ ch[0] = VADD(tr1, tr2);
780
+ ch[4 * ido - 1] = VSUB(tr2, tr1);
781
+ cc += ido;
782
+ ch += 4 * ido;
783
+ }
784
+ cc = cc_;
785
+ ch = ch_;
786
+ }
787
+ if (ido < 2)
788
+ return;
789
+ if (ido != 2) {
790
+ for (k = 0; k < l1ido; k += ido) {
791
+ const v4sf *RESTRICT pc = (v4sf *)(cc + 1 + k);
792
+ for (i = 2; i < ido; i += 2, pc += 2) {
793
+ int ic = ido - i;
794
+ v4sf wr, wi, cr2, ci2, cr3, ci3, cr4, ci4;
795
+ v4sf tr1, ti1, tr2, ti2, tr3, ti3, tr4, ti4;
796
+
797
+ cr2 = pc[1 * l1ido + 0];
798
+ ci2 = pc[1 * l1ido + 1];
799
+ wr = LD_PS1(wa1[i - 2]);
800
+ wi = LD_PS1(wa1[i - 1]);
801
+ VCPLXMULCONJ(cr2, ci2, wr, wi);
802
+
803
+ cr3 = pc[2 * l1ido + 0];
804
+ ci3 = pc[2 * l1ido + 1];
805
+ wr = LD_PS1(wa2[i - 2]);
806
+ wi = LD_PS1(wa2[i - 1]);
807
+ VCPLXMULCONJ(cr3, ci3, wr, wi);
808
+
809
+ cr4 = pc[3 * l1ido];
810
+ ci4 = pc[3 * l1ido + 1];
811
+ wr = LD_PS1(wa3[i - 2]);
812
+ wi = LD_PS1(wa3[i - 1]);
813
+ VCPLXMULCONJ(cr4, ci4, wr, wi);
814
+
815
+ /* at this point, on SSE, five of "cr2 cr3 cr4 ci2 ci3 ci4" should be
816
+ * loaded in registers */
817
+
818
+ tr1 = VADD(cr2, cr4);
819
+ tr4 = VSUB(cr4, cr2);
820
+ tr2 = VADD(pc[0], cr3);
821
+ tr3 = VSUB(pc[0], cr3);
822
+ ch[i - 1 + 4 * k] = VADD(tr1, tr2);
823
+ ch[ic - 1 + 4 * k + 3 * ido] =
824
+ VSUB(tr2, tr1); // at this point tr1 and tr2 can be disposed
825
+ ti1 = VADD(ci2, ci4);
826
+ ti4 = VSUB(ci2, ci4);
827
+ ch[i - 1 + 4 * k + 2 * ido] = VADD(ti4, tr3);
828
+ ch[ic - 1 + 4 * k + 1 * ido] = VSUB(tr3, ti4); // dispose tr3, ti4
829
+ ti2 = VADD(pc[1], ci3);
830
+ ti3 = VSUB(pc[1], ci3);
831
+ ch[i + 4 * k] = VADD(ti1, ti2);
832
+ ch[ic + 4 * k + 3 * ido] = VSUB(ti1, ti2);
833
+ ch[i + 4 * k + 2 * ido] = VADD(tr4, ti3);
834
+ ch[ic + 4 * k + 1 * ido] = VSUB(tr4, ti3);
835
+ }
836
+ }
837
+ if (ido % 2 == 1)
838
+ return;
839
+ }
840
+ for (k = 0; k < l1ido; k += ido) {
841
+ v4sf a = cc[ido - 1 + k + l1ido], b = cc[ido - 1 + k + 3 * l1ido];
842
+ v4sf c = cc[ido - 1 + k], d = cc[ido - 1 + k + 2 * l1ido];
843
+ v4sf ti1 = SVMUL(minus_hsqt2, VADD(a, b));
844
+ v4sf tr1 = SVMUL(minus_hsqt2, VSUB(b, a));
845
+ ch[ido - 1 + 4 * k] = VADD(tr1, c);
846
+ ch[ido - 1 + 4 * k + 2 * ido] = VSUB(c, tr1);
847
+ ch[4 * k + 1 * ido] = VSUB(ti1, d);
848
+ ch[4 * k + 3 * ido] = VADD(ti1, d);
849
+ }
850
+ } /* radf4 */
851
+
852
+ static NEVER_INLINE(void)
853
+ radb4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch,
854
+ const float *RESTRICT wa1, const float *RESTRICT wa2,
855
+ const float *RESTRICT wa3) {
856
+ static const float minus_sqrt2 = (float)-1.414213562373095;
857
+ static const float two = 2.f;
858
+ int i, k, l1ido = l1 * ido;
859
+ v4sf ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
860
+ {
861
+ const v4sf *RESTRICT cc_ = cc, *RESTRICT ch_end = ch + l1ido;
862
+ v4sf *ch_ = ch;
863
+ while (ch < ch_end) {
864
+ v4sf a = cc[0], b = cc[4 * ido - 1];
865
+ v4sf c = cc[2 * ido], d = cc[2 * ido - 1];
866
+ tr3 = SVMUL(two, d);
867
+ tr2 = VADD(a, b);
868
+ tr1 = VSUB(a, b);
869
+ tr4 = SVMUL(two, c);
870
+ ch[0 * l1ido] = VADD(tr2, tr3);
871
+ ch[2 * l1ido] = VSUB(tr2, tr3);
872
+ ch[1 * l1ido] = VSUB(tr1, tr4);
873
+ ch[3 * l1ido] = VADD(tr1, tr4);
874
+
875
+ cc += 4 * ido;
876
+ ch += ido;
877
+ }
878
+ cc = cc_;
879
+ ch = ch_;
880
+ }
881
+ if (ido < 2)
882
+ return;
883
+ if (ido != 2) {
884
+ for (k = 0; k < l1ido; k += ido) {
885
+ const v4sf *RESTRICT pc = (v4sf *)(cc - 1 + 4 * k);
886
+ v4sf *RESTRICT ph = (v4sf *)(ch + k + 1);
887
+ for (i = 2; i < ido; i += 2) {
888
+
889
+ tr1 = VSUB(pc[i], pc[4 * ido - i]);
890
+ tr2 = VADD(pc[i], pc[4 * ido - i]);
891
+ ti4 = VSUB(pc[2 * ido + i], pc[2 * ido - i]);
892
+ tr3 = VADD(pc[2 * ido + i], pc[2 * ido - i]);
893
+ ph[0] = VADD(tr2, tr3);
894
+ cr3 = VSUB(tr2, tr3);
895
+
896
+ ti3 = VSUB(pc[2 * ido + i + 1], pc[2 * ido - i + 1]);
897
+ tr4 = VADD(pc[2 * ido + i + 1], pc[2 * ido - i + 1]);
898
+ cr2 = VSUB(tr1, tr4);
899
+ cr4 = VADD(tr1, tr4);
900
+
901
+ ti1 = VADD(pc[i + 1], pc[4 * ido - i + 1]);
902
+ ti2 = VSUB(pc[i + 1], pc[4 * ido - i + 1]);
903
+
904
+ ph[1] = VADD(ti2, ti3);
905
+ ph += l1ido;
906
+ ci3 = VSUB(ti2, ti3);
907
+ ci2 = VADD(ti1, ti4);
908
+ ci4 = VSUB(ti1, ti4);
909
+ VCPLXMUL(cr2, ci2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
910
+ ph[0] = cr2;
911
+ ph[1] = ci2;
912
+ ph += l1ido;
913
+ VCPLXMUL(cr3, ci3, LD_PS1(wa2[i - 2]), LD_PS1(wa2[i - 1]));
914
+ ph[0] = cr3;
915
+ ph[1] = ci3;
916
+ ph += l1ido;
917
+ VCPLXMUL(cr4, ci4, LD_PS1(wa3[i - 2]), LD_PS1(wa3[i - 1]));
918
+ ph[0] = cr4;
919
+ ph[1] = ci4;
920
+ ph = ph - 3 * l1ido + 2;
921
+ }
922
+ }
923
+ if (ido % 2 == 1)
924
+ return;
925
+ }
926
+ for (k = 0; k < l1ido; k += ido) {
927
+ int i0 = 4 * k + ido;
928
+ v4sf c = cc[i0 - 1], d = cc[i0 + 2 * ido - 1];
929
+ v4sf a = cc[i0 + 0], b = cc[i0 + 2 * ido + 0];
930
+ tr1 = VSUB(c, d);
931
+ tr2 = VADD(c, d);
932
+ ti1 = VADD(b, a);
933
+ ti2 = VSUB(b, a);
934
+ ch[ido - 1 + k + 0 * l1ido] = VADD(tr2, tr2);
935
+ ch[ido - 1 + k + 1 * l1ido] = SVMUL(minus_sqrt2, VSUB(ti1, tr1));
936
+ ch[ido - 1 + k + 2 * l1ido] = VADD(ti2, ti2);
937
+ ch[ido - 1 + k + 3 * l1ido] = SVMUL(minus_sqrt2, VADD(ti1, tr1));
938
+ }
939
+ } /* radb4 */
940
+
941
+ static void radf5_ps(int ido, int l1, const v4sf *RESTRICT cc,
942
+ v4sf *RESTRICT ch, const float *wa1, const float *wa2,
943
+ const float *wa3, const float *wa4) {
944
+ static const float tr11 = .309016994374947f;
945
+ static const float ti11 = .951056516295154f;
946
+ static const float tr12 = -.809016994374947f;
947
+ static const float ti12 = .587785252292473f;
948
+
949
+ /* System generated locals */
950
+ int cc_offset, ch_offset;
951
+
952
+ /* Local variables */
953
+ int i, k, ic;
954
+ v4sf ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3, dr4, dr5,
955
+ cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
956
+ int idp2;
957
+
958
+ #define cc_ref(a_1, a_2, a_3) cc[((a_3) * l1 + (a_2)) * ido + a_1]
959
+ #define ch_ref(a_1, a_2, a_3) ch[((a_3) * 5 + (a_2)) * ido + a_1]
960
+
961
+ /* Parameter adjustments */
962
+ ch_offset = 1 + ido * 6;
963
+ ch -= ch_offset;
964
+ cc_offset = 1 + ido * (1 + l1);
965
+ cc -= cc_offset;
966
+
967
+ /* Function Body */
968
+ for (k = 1; k <= l1; ++k) {
969
+ cr2 = VADD(cc_ref(1, k, 5), cc_ref(1, k, 2));
970
+ ci5 = VSUB(cc_ref(1, k, 5), cc_ref(1, k, 2));
971
+ cr3 = VADD(cc_ref(1, k, 4), cc_ref(1, k, 3));
972
+ ci4 = VSUB(cc_ref(1, k, 4), cc_ref(1, k, 3));
973
+ ch_ref(1, 1, k) = VADD(cc_ref(1, k, 1), VADD(cr2, cr3));
974
+ ch_ref(ido, 2, k) =
975
+ VADD(cc_ref(1, k, 1), VADD(SVMUL(tr11, cr2), SVMUL(tr12, cr3)));
976
+ ch_ref(1, 3, k) = VADD(SVMUL(ti11, ci5), SVMUL(ti12, ci4));
977
+ ch_ref(ido, 4, k) =
978
+ VADD(cc_ref(1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3)));
979
+ ch_ref(1, 5, k) = VSUB(SVMUL(ti12, ci5), SVMUL(ti11, ci4));
980
+ // printf("pffft: radf5, k=%d ch_ref=%f, ci4=%f\n", k, ch_ref(1, 5, k),
981
+ // ci4);
982
+ }
983
+ if (ido == 1) {
984
+ return;
985
+ }
986
+ idp2 = ido + 2;
987
+ for (k = 1; k <= l1; ++k) {
988
+ for (i = 3; i <= ido; i += 2) {
989
+ ic = idp2 - i;
990
+ dr2 = LD_PS1(wa1[i - 3]);
991
+ di2 = LD_PS1(wa1[i - 2]);
992
+ dr3 = LD_PS1(wa2[i - 3]);
993
+ di3 = LD_PS1(wa2[i - 2]);
994
+ dr4 = LD_PS1(wa3[i - 3]);
995
+ di4 = LD_PS1(wa3[i - 2]);
996
+ dr5 = LD_PS1(wa4[i - 3]);
997
+ di5 = LD_PS1(wa4[i - 2]);
998
+ VCPLXMULCONJ(dr2, di2, cc_ref(i - 1, k, 2), cc_ref(i, k, 2));
999
+ VCPLXMULCONJ(dr3, di3, cc_ref(i - 1, k, 3), cc_ref(i, k, 3));
1000
+ VCPLXMULCONJ(dr4, di4, cc_ref(i - 1, k, 4), cc_ref(i, k, 4));
1001
+ VCPLXMULCONJ(dr5, di5, cc_ref(i - 1, k, 5), cc_ref(i, k, 5));
1002
+ cr2 = VADD(dr2, dr5);
1003
+ ci5 = VSUB(dr5, dr2);
1004
+ cr5 = VSUB(di2, di5);
1005
+ ci2 = VADD(di2, di5);
1006
+ cr3 = VADD(dr3, dr4);
1007
+ ci4 = VSUB(dr4, dr3);
1008
+ cr4 = VSUB(di3, di4);
1009
+ ci3 = VADD(di3, di4);
1010
+ ch_ref(i - 1, 1, k) = VADD(cc_ref(i - 1, k, 1), VADD(cr2, cr3));
1011
+ ch_ref(i, 1, k) = VSUB(cc_ref(i, k, 1), VADD(ci2, ci3)); //
1012
+ tr2 = VADD(cc_ref(i - 1, k, 1), VADD(SVMUL(tr11, cr2), SVMUL(tr12, cr3)));
1013
+ ti2 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr11, ci2), SVMUL(tr12, ci3))); //
1014
+ tr3 = VADD(cc_ref(i - 1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3)));
1015
+ ti3 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr12, ci2), SVMUL(tr11, ci3))); //
1016
+ tr5 = VADD(SVMUL(ti11, cr5), SVMUL(ti12, cr4));
1017
+ ti5 = VADD(SVMUL(ti11, ci5), SVMUL(ti12, ci4));
1018
+ tr4 = VSUB(SVMUL(ti12, cr5), SVMUL(ti11, cr4));
1019
+ ti4 = VSUB(SVMUL(ti12, ci5), SVMUL(ti11, ci4));
1020
+ ch_ref(i - 1, 3, k) = VSUB(tr2, tr5);
1021
+ ch_ref(ic - 1, 2, k) = VADD(tr2, tr5);
1022
+ ch_ref(i, 3, k) = VADD(ti2, ti5);
1023
+ ch_ref(ic, 2, k) = VSUB(ti5, ti2);
1024
+ ch_ref(i - 1, 5, k) = VSUB(tr3, tr4);
1025
+ ch_ref(ic - 1, 4, k) = VADD(tr3, tr4);
1026
+ ch_ref(i, 5, k) = VADD(ti3, ti4);
1027
+ ch_ref(ic, 4, k) = VSUB(ti4, ti3);
1028
+ }
1029
+ }
1030
+ #undef cc_ref
1031
+ #undef ch_ref
1032
+ } /* radf5 */
1033
+
1034
+ static void radb5_ps(int ido, int l1, const v4sf *RESTRICT cc,
1035
+ v4sf *RESTRICT ch, const float *wa1, const float *wa2,
1036
+ const float *wa3, const float *wa4) {
1037
+ static const float tr11 = .309016994374947f;
1038
+ static const float ti11 = .951056516295154f;
1039
+ static const float tr12 = -.809016994374947f;
1040
+ static const float ti12 = .587785252292473f;
1041
+
1042
+ int cc_offset, ch_offset;
1043
+
1044
+ /* Local variables */
1045
+ int i, k, ic;
1046
+ v4sf ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
1047
+ ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
1048
+ int idp2;
1049
+
1050
+ #define cc_ref(a_1, a_2, a_3) cc[((a_3) * 5 + (a_2)) * ido + a_1]
1051
+ #define ch_ref(a_1, a_2, a_3) ch[((a_3) * l1 + (a_2)) * ido + a_1]
1052
+
1053
+ /* Parameter adjustments */
1054
+ ch_offset = 1 + ido * (1 + l1);
1055
+ ch -= ch_offset;
1056
+ cc_offset = 1 + ido * 6;
1057
+ cc -= cc_offset;
1058
+
1059
+ /* Function Body */
1060
+ for (k = 1; k <= l1; ++k) {
1061
+ ti5 = VADD(cc_ref(1, 3, k), cc_ref(1, 3, k));
1062
+ ti4 = VADD(cc_ref(1, 5, k), cc_ref(1, 5, k));
1063
+ tr2 = VADD(cc_ref(ido, 2, k), cc_ref(ido, 2, k));
1064
+ tr3 = VADD(cc_ref(ido, 4, k), cc_ref(ido, 4, k));
1065
+ ch_ref(1, k, 1) = VADD(cc_ref(1, 1, k), VADD(tr2, tr3));
1066
+ cr2 = VADD(cc_ref(1, 1, k), VADD(SVMUL(tr11, tr2), SVMUL(tr12, tr3)));
1067
+ cr3 = VADD(cc_ref(1, 1, k), VADD(SVMUL(tr12, tr2), SVMUL(tr11, tr3)));
1068
+ ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4));
1069
+ ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4));
1070
+ ch_ref(1, k, 2) = VSUB(cr2, ci5);
1071
+ ch_ref(1, k, 3) = VSUB(cr3, ci4);
1072
+ ch_ref(1, k, 4) = VADD(cr3, ci4);
1073
+ ch_ref(1, k, 5) = VADD(cr2, ci5);
1074
+ }
1075
+ if (ido == 1) {
1076
+ return;
1077
+ }
1078
+ idp2 = ido + 2;
1079
+ for (k = 1; k <= l1; ++k) {
1080
+ for (i = 3; i <= ido; i += 2) {
1081
+ ic = idp2 - i;
1082
+ ti5 = VADD(cc_ref(i, 3, k), cc_ref(ic, 2, k));
1083
+ ti2 = VSUB(cc_ref(i, 3, k), cc_ref(ic, 2, k));
1084
+ ti4 = VADD(cc_ref(i, 5, k), cc_ref(ic, 4, k));
1085
+ ti3 = VSUB(cc_ref(i, 5, k), cc_ref(ic, 4, k));
1086
+ tr5 = VSUB(cc_ref(i - 1, 3, k), cc_ref(ic - 1, 2, k));
1087
+ tr2 = VADD(cc_ref(i - 1, 3, k), cc_ref(ic - 1, 2, k));
1088
+ tr4 = VSUB(cc_ref(i - 1, 5, k), cc_ref(ic - 1, 4, k));
1089
+ tr3 = VADD(cc_ref(i - 1, 5, k), cc_ref(ic - 1, 4, k));
1090
+ ch_ref(i - 1, k, 1) = VADD(cc_ref(i - 1, 1, k), VADD(tr2, tr3));
1091
+ ch_ref(i, k, 1) = VADD(cc_ref(i, 1, k), VADD(ti2, ti3));
1092
+ cr2 = VADD(cc_ref(i - 1, 1, k), VADD(SVMUL(tr11, tr2), SVMUL(tr12, tr3)));
1093
+ ci2 = VADD(cc_ref(i, 1, k), VADD(SVMUL(tr11, ti2), SVMUL(tr12, ti3)));
1094
+ cr3 = VADD(cc_ref(i - 1, 1, k), VADD(SVMUL(tr12, tr2), SVMUL(tr11, tr3)));
1095
+ ci3 = VADD(cc_ref(i, 1, k), VADD(SVMUL(tr12, ti2), SVMUL(tr11, ti3)));
1096
+ cr5 = VADD(SVMUL(ti11, tr5), SVMUL(ti12, tr4));
1097
+ ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4));
1098
+ cr4 = VSUB(SVMUL(ti12, tr5), SVMUL(ti11, tr4));
1099
+ ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4));
1100
+ dr3 = VSUB(cr3, ci4);
1101
+ dr4 = VADD(cr3, ci4);
1102
+ di3 = VADD(ci3, cr4);
1103
+ di4 = VSUB(ci3, cr4);
1104
+ dr5 = VADD(cr2, ci5);
1105
+ dr2 = VSUB(cr2, ci5);
1106
+ di5 = VSUB(ci2, cr5);
1107
+ di2 = VADD(ci2, cr5);
1108
+ VCPLXMUL(dr2, di2, LD_PS1(wa1[i - 3]), LD_PS1(wa1[i - 2]));
1109
+ VCPLXMUL(dr3, di3, LD_PS1(wa2[i - 3]), LD_PS1(wa2[i - 2]));
1110
+ VCPLXMUL(dr4, di4, LD_PS1(wa3[i - 3]), LD_PS1(wa3[i - 2]));
1111
+ VCPLXMUL(dr5, di5, LD_PS1(wa4[i - 3]), LD_PS1(wa4[i - 2]));
1112
+
1113
+ ch_ref(i - 1, k, 2) = dr2;
1114
+ ch_ref(i, k, 2) = di2;
1115
+ ch_ref(i - 1, k, 3) = dr3;
1116
+ ch_ref(i, k, 3) = di3;
1117
+ ch_ref(i - 1, k, 4) = dr4;
1118
+ ch_ref(i, k, 4) = di4;
1119
+ ch_ref(i - 1, k, 5) = dr5;
1120
+ ch_ref(i, k, 5) = di5;
1121
+ }
1122
+ }
1123
+ #undef cc_ref
1124
+ #undef ch_ref
1125
+ } /* radb5 */
1126
+
1127
+ static NEVER_INLINE(v4sf *)
1128
+ rfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
1129
+ const float *wa, const int *ifac) {
1130
+ v4sf *in = (v4sf *)input_readonly;
1131
+ v4sf *out = (in == work2 ? work1 : work2);
1132
+ int nf = ifac[1], k1;
1133
+ int l2 = n;
1134
+ int iw = n - 1;
1135
+ assert(in != out && work1 != work2);
1136
+ for (k1 = 1; k1 <= nf; ++k1) {
1137
+ int kh = nf - k1;
1138
+ int ip = ifac[kh + 2];
1139
+ int l1 = l2 / ip;
1140
+ int ido = n / l2;
1141
+ iw -= (ip - 1) * ido;
1142
+ switch (ip) {
1143
+ case 5: {
1144
+ int ix2 = iw + ido;
1145
+ int ix3 = ix2 + ido;
1146
+ int ix4 = ix3 + ido;
1147
+ radf5_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]);
1148
+ } break;
1149
+ case 4: {
1150
+ int ix2 = iw + ido;
1151
+ int ix3 = ix2 + ido;
1152
+ radf4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]);
1153
+ } break;
1154
+ case 3: {
1155
+ int ix2 = iw + ido;
1156
+ radf3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]);
1157
+ } break;
1158
+ case 2:
1159
+ radf2_ps(ido, l1, in, out, &wa[iw]);
1160
+ break;
1161
+ default:
1162
+ assert(0);
1163
+ break;
1164
+ }
1165
+ l2 = l1;
1166
+ if (out == work2) {
1167
+ out = work1;
1168
+ in = work2;
1169
+ } else {
1170
+ out = work2;
1171
+ in = work1;
1172
+ }
1173
+ }
1174
+ return in; /* this is in fact the output .. */
1175
+ } /* rfftf1 */
1176
+
1177
+ static NEVER_INLINE(v4sf *)
1178
+ rfftb1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
1179
+ const float *wa, const int *ifac) {
1180
+ v4sf *in = (v4sf *)input_readonly;
1181
+ v4sf *out = (in == work2 ? work1 : work2);
1182
+ int nf = ifac[1], k1;
1183
+ int l1 = 1;
1184
+ int iw = 0;
1185
+ assert(in != out);
1186
+ for (k1 = 1; k1 <= nf; k1++) {
1187
+ int ip = ifac[k1 + 1];
1188
+ int l2 = ip * l1;
1189
+ int ido = n / l2;
1190
+ switch (ip) {
1191
+ case 5: {
1192
+ int ix2 = iw + ido;
1193
+ int ix3 = ix2 + ido;
1194
+ int ix4 = ix3 + ido;
1195
+ radb5_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]);
1196
+ } break;
1197
+ case 4: {
1198
+ int ix2 = iw + ido;
1199
+ int ix3 = ix2 + ido;
1200
+ radb4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]);
1201
+ } break;
1202
+ case 3: {
1203
+ int ix2 = iw + ido;
1204
+ radb3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]);
1205
+ } break;
1206
+ case 2:
1207
+ radb2_ps(ido, l1, in, out, &wa[iw]);
1208
+ break;
1209
+ default:
1210
+ assert(0);
1211
+ break;
1212
+ }
1213
+ l1 = l2;
1214
+ iw += (ip - 1) * ido;
1215
+
1216
+ if (out == work2) {
1217
+ out = work1;
1218
+ in = work2;
1219
+ } else {
1220
+ out = work2;
1221
+ in = work1;
1222
+ }
1223
+ }
1224
+ return in; /* this is in fact the output .. */
1225
+ }
1226
+
1227
+ #define IFAC_MAX_SIZE \
1228
+ 25 /* max number of integer factors for the decomposition, +2 */
1229
+ static int decompose(int n, int *ifac, const int *ntryh) {
1230
+ int nl = n, nf = 0, i, j = 0;
1231
+ for (j = 0; ntryh[j]; ++j) {
1232
+ int ntry = ntryh[j];
1233
+ while (nl != 1) {
1234
+ int nq = nl / ntry;
1235
+ int nr = nl - ntry * nq;
1236
+ if (nr == 0) {
1237
+ assert(2 + nf < IFAC_MAX_SIZE);
1238
+ ifac[2 + nf++] = ntry;
1239
+ nl = nq;
1240
+ if (ntry == 2 && nf != 1) {
1241
+ for (i = 2; i <= nf; ++i) {
1242
+ int ib = nf - i + 2;
1243
+ ifac[ib + 1] = ifac[ib];
1244
+ }
1245
+ ifac[2] = 2;
1246
+ }
1247
+ } else
1248
+ break;
1249
+ }
1250
+ }
1251
+ ifac[0] = n;
1252
+ ifac[1] = nf;
1253
+ return nf;
1254
+ }
1255
+
1256
+ static void rffti1_ps(int n, float *wa, int *ifac) {
1257
+ static const int ntryh[] = {4, 2, 3, 5, 0};
1258
+ int k1, j, ii;
1259
+
1260
+ int nf = decompose(n, ifac, ntryh);
1261
+ float argh = (2 * M_PI) / n;
1262
+ int is = 0;
1263
+ int nfm1 = nf - 1;
1264
+ int l1 = 1;
1265
+ for (k1 = 1; k1 <= nfm1; k1++) {
1266
+ int ip = ifac[k1 + 1];
1267
+ int ld = 0;
1268
+ int l2 = l1 * ip;
1269
+ int ido = n / l2;
1270
+ int ipm = ip - 1;
1271
+ for (j = 1; j <= ipm; ++j) {
1272
+ float argld;
1273
+ int i = is, fi = 0;
1274
+ ld += l1;
1275
+ argld = ld * argh;
1276
+ for (ii = 3; ii <= ido; ii += 2) {
1277
+ i += 2;
1278
+ fi += 1;
1279
+ wa[i - 2] = cos(fi * argld);
1280
+ wa[i - 1] = sin(fi * argld);
1281
+ }
1282
+ is += ido;
1283
+ }
1284
+ l1 = l2;
1285
+ }
1286
+ } /* rffti1 */
1287
+
1288
+ void cffti1_ps(int n, float *wa, int *ifac) {
1289
+ static const int ntryh[] = {5, 3, 4, 2, 0};
1290
+ int k1, j, ii;
1291
+
1292
+ int nf = decompose(n, ifac, ntryh);
1293
+ float argh = (2 * M_PI) / (float)n;
1294
+ int i = 1;
1295
+ int l1 = 1;
1296
+ for (k1 = 1; k1 <= nf; k1++) {
1297
+ int ip = ifac[k1 + 1];
1298
+ int ld = 0;
1299
+ int l2 = l1 * ip;
1300
+ int ido = n / l2;
1301
+ int idot = ido + ido + 2;
1302
+ int ipm = ip - 1;
1303
+ for (j = 1; j <= ipm; j++) {
1304
+ float argld;
1305
+ int i1 = i, fi = 0;
1306
+ wa[i - 1] = 1;
1307
+ wa[i] = 0;
1308
+ ld += l1;
1309
+ argld = ld * argh;
1310
+ for (ii = 4; ii <= idot; ii += 2) {
1311
+ i += 2;
1312
+ fi += 1;
1313
+ wa[i - 1] = cos(fi * argld);
1314
+ wa[i] = sin(fi * argld);
1315
+ }
1316
+ if (ip > 5) {
1317
+ wa[i1 - 1] = wa[i - 1];
1318
+ wa[i1] = wa[i];
1319
+ }
1320
+ }
1321
+ l1 = l2;
1322
+ }
1323
+ } /* cffti1 */
1324
+
1325
+ v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
1326
+ const float *wa, const int *ifac, int isign) {
1327
+ v4sf *in = (v4sf *)input_readonly;
1328
+ v4sf *out = (in == work2 ? work1 : work2);
1329
+ int nf = ifac[1], k1;
1330
+ int l1 = 1;
1331
+ int iw = 0;
1332
+ assert(in != out && work1 != work2);
1333
+ for (k1 = 2; k1 <= nf + 1; k1++) {
1334
+ int ip = ifac[k1];
1335
+ int l2 = ip * l1;
1336
+ int ido = n / l2;
1337
+ int idot = ido + ido;
1338
+ switch (ip) {
1339
+ case 5: {
1340
+ int ix2 = iw + idot;
1341
+ int ix3 = ix2 + idot;
1342
+ int ix4 = ix3 + idot;
1343
+ passf5_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4],
1344
+ isign);
1345
+ } break;
1346
+ case 4: {
1347
+ int ix2 = iw + idot;
1348
+ int ix3 = ix2 + idot;
1349
+ passf4_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], isign);
1350
+ } break;
1351
+ case 2: {
1352
+ passf2_ps(idot, l1, in, out, &wa[iw], isign);
1353
+ } break;
1354
+ case 3: {
1355
+ int ix2 = iw + idot;
1356
+ passf3_ps(idot, l1, in, out, &wa[iw], &wa[ix2], isign);
1357
+ } break;
1358
+ default:
1359
+ assert(0);
1360
+ }
1361
+ l1 = l2;
1362
+ iw += (ip - 1) * idot;
1363
+ if (out == work2) {
1364
+ out = work1;
1365
+ in = work2;
1366
+ } else {
1367
+ out = work2;
1368
+ in = work1;
1369
+ }
1370
+ }
1371
+
1372
+ return in; /* this is in fact the output .. */
1373
+ }
1374
+
1375
+ struct PFFFT_Setup {
1376
+ int N;
1377
+ int Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if
1378
+ // PFFFT_REAL)
1379
+ // hold the decomposition into small integers of N
1380
+ int ifac[IFAC_MAX_SIZE]; // N , number of factors, factors (admitted values:
1381
+ // 2, 3, 4 ou 5)
1382
+ pffft_transform_t transform;
1383
+ v4sf *data; // allocated room for twiddle coefs
1384
+ float *e; // points into 'data' , N/4*3 elements
1385
+ float *twiddle; // points into 'data', N/4 elements
1386
+ };
1387
+
1388
+ PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) {
1389
+ // validate N for negative values or potential int overflow
1390
+ if (N < 0) {
1391
+ return 0;
1392
+ }
1393
+ if (N > (1 << 26)) {
1394
+ // higher values of N will make you enter in the integer overflow world...
1395
+ assert(0);
1396
+ return 0;
1397
+ }
1398
+ PFFFT_Setup *s = (PFFFT_Setup *)malloc(sizeof(PFFFT_Setup));
1399
+ int k, m;
1400
+ /* unfortunately, the fft size must be a multiple of 16 for complex FFTs
1401
+ and 32 for real FFTs -- a lot of stuff would need to be rewritten to
1402
+ handle other cases (or maybe just switch to a scalar fft, I don't know..)
1403
+ */
1404
+ if (transform == PFFFT_REAL) {
1405
+ assert((N % (2 * SIMD_SZ * SIMD_SZ)) == 0 && N > 0);
1406
+ }
1407
+ if (transform == PFFFT_COMPLEX) {
1408
+ assert((N % (SIMD_SZ * SIMD_SZ)) == 0 && N > 0);
1409
+ }
1410
+ // assert((N % 32) == 0);
1411
+ s->N = N;
1412
+ s->transform = transform;
1413
+ /* nb of complex simd vectors */
1414
+ s->Ncvec = (transform == PFFFT_REAL ? N / 2 : N) / SIMD_SZ;
1415
+ s->data = (v4sf *)pffft_aligned_malloc(2 * s->Ncvec * sizeof(v4sf));
1416
+ s->e = (float *)s->data;
1417
+ s->twiddle = (float *)(s->data + (2 * s->Ncvec * (SIMD_SZ - 1)) / SIMD_SZ);
1418
+
1419
+ for (k = 0; k < s->Ncvec; ++k) {
1420
+ int i = k / SIMD_SZ;
1421
+ int j = k % SIMD_SZ;
1422
+ for (m = 0; m < SIMD_SZ - 1; ++m) {
1423
+ float A = -2 * M_PI * (m + 1) * k / N;
1424
+ s->e[(2 * (i * 3 + m) + 0) * SIMD_SZ + j] = cos(A);
1425
+ s->e[(2 * (i * 3 + m) + 1) * SIMD_SZ + j] = sin(A);
1426
+ }
1427
+ }
1428
+
1429
+ if (transform == PFFFT_REAL) {
1430
+ rffti1_ps(N / SIMD_SZ, s->twiddle, s->ifac);
1431
+ } else {
1432
+ cffti1_ps(N / SIMD_SZ, s->twiddle, s->ifac);
1433
+ }
1434
+
1435
+ /* check that N is decomposable with allowed prime factors */
1436
+ for (k = 0, m = 1; k < s->ifac[1]; ++k) {
1437
+ m *= s->ifac[2 + k];
1438
+ }
1439
+ if (m != N / SIMD_SZ) {
1440
+ pffft_destroy_setup(s);
1441
+ s = 0;
1442
+ }
1443
+
1444
+ return s;
1445
+ }
1446
+
1447
+ void pffft_destroy_setup(PFFFT_Setup *s) {
1448
+ pffft_aligned_free(s->data);
1449
+ free(s);
1450
+ }
1451
+
1452
+ #if !defined(PFFFT_SIMD_DISABLE)
1453
+
1454
+ /* [0 0 1 2 3 4 5 6 7 8] -> [0 8 7 6 5 4 3 2 1] */
1455
+ static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) {
1456
+ v4sf g0, g1;
1457
+ int k;
1458
+ INTERLEAVE2(in[0], in[1], g0, g1);
1459
+ in += in_stride;
1460
+
1461
+ *--out = VSWAPHL(g0, g1); // [g0l, g0h], [g1l g1h] -> [g1l, g0h]
1462
+ for (k = 1; k < N; ++k) {
1463
+ v4sf h0, h1;
1464
+ INTERLEAVE2(in[0], in[1], h0, h1);
1465
+ in += in_stride;
1466
+ *--out = VSWAPHL(g1, h0);
1467
+ *--out = VSWAPHL(h0, h1);
1468
+ g1 = h1;
1469
+ }
1470
+ *--out = VSWAPHL(g1, g0);
1471
+ }
1472
+
1473
+ static void unreversed_copy(int N, const v4sf *in, v4sf *out, int out_stride) {
1474
+ v4sf g0, g1, h0, h1;
1475
+ int k;
1476
+ g0 = g1 = in[0];
1477
+ ++in;
1478
+ for (k = 1; k < N; ++k) {
1479
+ h0 = *in++;
1480
+ h1 = *in++;
1481
+ g1 = VSWAPHL(g1, h0);
1482
+ h0 = VSWAPHL(h0, h1);
1483
+ UNINTERLEAVE2(h0, g1, out[0], out[1]);
1484
+ out += out_stride;
1485
+ g1 = h1;
1486
+ }
1487
+ h0 = *in++;
1488
+ h1 = g0;
1489
+ g1 = VSWAPHL(g1, h0);
1490
+ h0 = VSWAPHL(h0, h1);
1491
+ UNINTERLEAVE2(h0, g1, out[0], out[1]);
1492
+ }
1493
+
1494
+ void pffft_zreorder(PFFFT_Setup *setup, const float *in, float *out,
1495
+ pffft_direction_t direction) {
1496
+ int k, N = setup->N, Ncvec = setup->Ncvec;
1497
+ const v4sf *vin = (const v4sf *)in;
1498
+ v4sf *vout = (v4sf *)out;
1499
+ assert(in != out);
1500
+ if (setup->transform == PFFFT_REAL) {
1501
+ int k, dk = N / 32;
1502
+ if (direction == PFFFT_FORWARD) {
1503
+ for (k = 0; k < dk; ++k) {
1504
+ INTERLEAVE2(vin[k * 8 + 0], vin[k * 8 + 1], vout[2 * (0 * dk + k) + 0],
1505
+ vout[2 * (0 * dk + k) + 1]);
1506
+ INTERLEAVE2(vin[k * 8 + 4], vin[k * 8 + 5], vout[2 * (2 * dk + k) + 0],
1507
+ vout[2 * (2 * dk + k) + 1]);
1508
+ }
1509
+ reversed_copy(dk, vin + 2, 8, (v4sf *)(out + N / 2));
1510
+ reversed_copy(dk, vin + 6, 8, (v4sf *)(out + N));
1511
+ } else {
1512
+ for (k = 0; k < dk; ++k) {
1513
+ UNINTERLEAVE2(vin[2 * (0 * dk + k) + 0], vin[2 * (0 * dk + k) + 1],
1514
+ vout[k * 8 + 0], vout[k * 8 + 1]);
1515
+ UNINTERLEAVE2(vin[2 * (2 * dk + k) + 0], vin[2 * (2 * dk + k) + 1],
1516
+ vout[k * 8 + 4], vout[k * 8 + 5]);
1517
+ }
1518
+ unreversed_copy(dk, (v4sf *)(in + N / 4), (v4sf *)(out + N - 6 * SIMD_SZ),
1519
+ -8);
1520
+ unreversed_copy(dk, (v4sf *)(in + 3 * N / 4),
1521
+ (v4sf *)(out + N - 2 * SIMD_SZ), -8);
1522
+ }
1523
+ } else {
1524
+ if (direction == PFFFT_FORWARD) {
1525
+ for (k = 0; k < Ncvec; ++k) {
1526
+ int kk = (k / 4) + (k % 4) * (Ncvec / 4);
1527
+ INTERLEAVE2(vin[k * 2], vin[k * 2 + 1], vout[kk * 2], vout[kk * 2 + 1]);
1528
+ }
1529
+ } else {
1530
+ for (k = 0; k < Ncvec; ++k) {
1531
+ int kk = (k / 4) + (k % 4) * (Ncvec / 4);
1532
+ UNINTERLEAVE2(vin[kk * 2], vin[kk * 2 + 1], vout[k * 2],
1533
+ vout[k * 2 + 1]);
1534
+ }
1535
+ }
1536
+ }
1537
+ }
1538
+
1539
+ void pffft_cplx_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
1540
+ int k, dk = Ncvec / SIMD_SZ; // number of 4x4 matrix blocks
1541
+ v4sf r0, i0, r1, i1, r2, i2, r3, i3;
1542
+ v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
1543
+ assert(in != out);
1544
+ for (k = 0; k < dk; ++k) {
1545
+ r0 = in[8 * k + 0];
1546
+ i0 = in[8 * k + 1];
1547
+ r1 = in[8 * k + 2];
1548
+ i1 = in[8 * k + 3];
1549
+ r2 = in[8 * k + 4];
1550
+ i2 = in[8 * k + 5];
1551
+ r3 = in[8 * k + 6];
1552
+ i3 = in[8 * k + 7];
1553
+ VTRANSPOSE4(r0, r1, r2, r3);
1554
+ VTRANSPOSE4(i0, i1, i2, i3);
1555
+ VCPLXMUL(r1, i1, e[k * 6 + 0], e[k * 6 + 1]);
1556
+ VCPLXMUL(r2, i2, e[k * 6 + 2], e[k * 6 + 3]);
1557
+ VCPLXMUL(r3, i3, e[k * 6 + 4], e[k * 6 + 5]);
1558
+
1559
+ sr0 = VADD(r0, r2);
1560
+ dr0 = VSUB(r0, r2);
1561
+ sr1 = VADD(r1, r3);
1562
+ dr1 = VSUB(r1, r3);
1563
+ si0 = VADD(i0, i2);
1564
+ di0 = VSUB(i0, i2);
1565
+ si1 = VADD(i1, i3);
1566
+ di1 = VSUB(i1, i3);
1567
+
1568
+ /*
1569
+ transformation for each column is:
1570
+
1571
+ [1 1 1 1 0 0 0 0] [r0]
1572
+ [1 0 -1 0 0 -1 0 1] [r1]
1573
+ [1 -1 1 -1 0 0 0 0] [r2]
1574
+ [1 0 -1 0 0 1 0 -1] [r3]
1575
+ [0 0 0 0 1 1 1 1] * [i0]
1576
+ [0 1 0 -1 1 0 -1 0] [i1]
1577
+ [0 0 0 0 1 -1 1 -1] [i2]
1578
+ [0 -1 0 1 1 0 -1 0] [i3]
1579
+ */
1580
+
1581
+ r0 = VADD(sr0, sr1);
1582
+ i0 = VADD(si0, si1);
1583
+ r1 = VADD(dr0, di1);
1584
+ i1 = VSUB(di0, dr1);
1585
+ r2 = VSUB(sr0, sr1);
1586
+ i2 = VSUB(si0, si1);
1587
+ r3 = VSUB(dr0, di1);
1588
+ i3 = VADD(di0, dr1);
1589
+
1590
+ *out++ = r0;
1591
+ *out++ = i0;
1592
+ *out++ = r1;
1593
+ *out++ = i1;
1594
+ *out++ = r2;
1595
+ *out++ = i2;
1596
+ *out++ = r3;
1597
+ *out++ = i3;
1598
+ }
1599
+ }
1600
+
1601
+ void pffft_cplx_preprocess(int Ncvec, const v4sf *in, v4sf *out,
1602
+ const v4sf *e) {
1603
+ int k, dk = Ncvec / SIMD_SZ; // number of 4x4 matrix blocks
1604
+ v4sf r0, i0, r1, i1, r2, i2, r3, i3;
1605
+ v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
1606
+ assert(in != out);
1607
+ for (k = 0; k < dk; ++k) {
1608
+ r0 = in[8 * k + 0];
1609
+ i0 = in[8 * k + 1];
1610
+ r1 = in[8 * k + 2];
1611
+ i1 = in[8 * k + 3];
1612
+ r2 = in[8 * k + 4];
1613
+ i2 = in[8 * k + 5];
1614
+ r3 = in[8 * k + 6];
1615
+ i3 = in[8 * k + 7];
1616
+
1617
+ sr0 = VADD(r0, r2);
1618
+ dr0 = VSUB(r0, r2);
1619
+ sr1 = VADD(r1, r3);
1620
+ dr1 = VSUB(r1, r3);
1621
+ si0 = VADD(i0, i2);
1622
+ di0 = VSUB(i0, i2);
1623
+ si1 = VADD(i1, i3);
1624
+ di1 = VSUB(i1, i3);
1625
+
1626
+ r0 = VADD(sr0, sr1);
1627
+ i0 = VADD(si0, si1);
1628
+ r1 = VSUB(dr0, di1);
1629
+ i1 = VADD(di0, dr1);
1630
+ r2 = VSUB(sr0, sr1);
1631
+ i2 = VSUB(si0, si1);
1632
+ r3 = VADD(dr0, di1);
1633
+ i3 = VSUB(di0, dr1);
1634
+
1635
+ VCPLXMULCONJ(r1, i1, e[k * 6 + 0], e[k * 6 + 1]);
1636
+ VCPLXMULCONJ(r2, i2, e[k * 6 + 2], e[k * 6 + 3]);
1637
+ VCPLXMULCONJ(r3, i3, e[k * 6 + 4], e[k * 6 + 5]);
1638
+
1639
+ VTRANSPOSE4(r0, r1, r2, r3);
1640
+ VTRANSPOSE4(i0, i1, i2, i3);
1641
+
1642
+ *out++ = r0;
1643
+ *out++ = i0;
1644
+ *out++ = r1;
1645
+ *out++ = i1;
1646
+ *out++ = r2;
1647
+ *out++ = i2;
1648
+ *out++ = r3;
1649
+ *out++ = i3;
1650
+ }
1651
+ }
1652
+
1653
+ static ALWAYS_INLINE(void)
1654
+ pffft_real_finalize_4x4(const v4sf *in0, const v4sf *in1, const v4sf *in,
1655
+ const v4sf *e, v4sf *out) {
1656
+ v4sf r0, i0, r1, i1, r2, i2, r3, i3;
1657
+ v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
1658
+ r0 = *in0;
1659
+ i0 = *in1;
1660
+ r1 = *in++;
1661
+ i1 = *in++;
1662
+ r2 = *in++;
1663
+ i2 = *in++;
1664
+ r3 = *in++;
1665
+ i3 = *in++;
1666
+ VTRANSPOSE4(r0, r1, r2, r3);
1667
+ VTRANSPOSE4(i0, i1, i2, i3);
1668
+
1669
+ /*
1670
+ transformation for each column is:
1671
+
1672
+ [1 1 1 1 0 0 0 0] [r0]
1673
+ [1 0 -1 0 0 -1 0 1] [r1]
1674
+ [1 0 -1 0 0 1 0 -1] [r2]
1675
+ [1 -1 1 -1 0 0 0 0] [r3]
1676
+ [0 0 0 0 1 1 1 1] * [i0]
1677
+ [0 -1 0 1 -1 0 1 0] [i1]
1678
+ [0 -1 0 1 1 0 -1 0] [i2]
1679
+ [0 0 0 0 -1 1 -1 1] [i3]
1680
+ */
1681
+
1682
+ // cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 <<
1683
+ // "\n 1: " << r2 << "\n 1: " << r3 << "\n"; cerr << "matrix initial, before
1684
+ // e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " <<
1685
+ // i3 << "\n";
1686
+
1687
+ VCPLXMUL(r1, i1, e[0], e[1]);
1688
+ VCPLXMUL(r2, i2, e[2], e[3]);
1689
+ VCPLXMUL(r3, i3, e[4], e[5]);
1690
+
1691
+ // cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n
1692
+ // 1: " << r2 << "\n 1: " << r3 << "\n"; cerr << "matrix initial, imag part:\n
1693
+ // 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n";
1694
+
1695
+ sr0 = VADD(r0, r2);
1696
+ dr0 = VSUB(r0, r2);
1697
+ sr1 = VADD(r1, r3);
1698
+ dr1 = VSUB(r3, r1);
1699
+ si0 = VADD(i0, i2);
1700
+ di0 = VSUB(i0, i2);
1701
+ si1 = VADD(i1, i3);
1702
+ di1 = VSUB(i3, i1);
1703
+
1704
+ r0 = VADD(sr0, sr1);
1705
+ r3 = VSUB(sr0, sr1);
1706
+ i0 = VADD(si0, si1);
1707
+ i3 = VSUB(si1, si0);
1708
+ r1 = VADD(dr0, di1);
1709
+ r2 = VSUB(dr0, di1);
1710
+ i1 = VSUB(dr1, di0);
1711
+ i2 = VADD(dr1, di0);
1712
+
1713
+ *out++ = r0;
1714
+ *out++ = i0;
1715
+ *out++ = r1;
1716
+ *out++ = i1;
1717
+ *out++ = r2;
1718
+ *out++ = i2;
1719
+ *out++ = r3;
1720
+ *out++ = i3;
1721
+ }
1722
+
1723
+ static NEVER_INLINE(void)
1724
+ pffft_real_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
1725
+ int k, dk = Ncvec / SIMD_SZ; // number of 4x4 matrix blocks
1726
+ /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
1727
+
1728
+ v4sf_union cr, ci, *uout = (v4sf_union *)out;
1729
+ v4sf save = in[7], zero = VZERO();
1730
+ float xr0, xi0, xr1, xi1, xr2, xi2, xr3, xi3;
1731
+ static const float s = (float)(M_SQRT2 / 2);
1732
+
1733
+ cr.v = in[0];
1734
+ ci.v = in[Ncvec * 2 - 1];
1735
+ assert(in != out);
1736
+ pffft_real_finalize_4x4(&zero, &zero, in + 1, e, out);
1737
+
1738
+ /*
1739
+ [cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3]
1740
+
1741
+ [Xr(1)] ] [1 1 1 1 0 0 0 0]
1742
+ [Xr(N/4) ] [0 0 0 0 1 s 0 -s]
1743
+ [Xr(N/2) ] [1 0 -1 0 0 0 0 0]
1744
+ [Xr(3N/4)] [0 0 0 0 1 -s 0 s]
1745
+ [Xi(1) ] [1 -1 1 -1 0 0 0 0]
1746
+ [Xi(N/4) ] [0 0 0 0 0 -s -1 -s]
1747
+ [Xi(N/2) ] [0 -1 0 1 0 0 0 0]
1748
+ [Xi(3N/4)] [0 0 0 0 0 -s 1 -s]
1749
+ */
1750
+
1751
+ xr0 = (cr.f[0] + cr.f[2]) + (cr.f[1] + cr.f[3]);
1752
+ uout[0].f[0] = xr0;
1753
+ xi0 = (cr.f[0] + cr.f[2]) - (cr.f[1] + cr.f[3]);
1754
+ uout[1].f[0] = xi0;
1755
+ xr2 = (cr.f[0] - cr.f[2]);
1756
+ uout[4].f[0] = xr2;
1757
+ xi2 = (cr.f[3] - cr.f[1]);
1758
+ uout[5].f[0] = xi2;
1759
+ xr1 = ci.f[0] + s * (ci.f[1] - ci.f[3]);
1760
+ uout[2].f[0] = xr1;
1761
+ xi1 = -ci.f[2] - s * (ci.f[1] + ci.f[3]);
1762
+ uout[3].f[0] = xi1;
1763
+ xr3 = ci.f[0] - s * (ci.f[1] - ci.f[3]);
1764
+ uout[6].f[0] = xr3;
1765
+ xi3 = ci.f[2] - s * (ci.f[1] + ci.f[3]);
1766
+ uout[7].f[0] = xi3;
1767
+
1768
+ for (k = 1; k < dk; ++k) {
1769
+ v4sf save_next = in[8 * k + 7];
1770
+ pffft_real_finalize_4x4(&save, &in[8 * k + 0], in + 8 * k + 1, e + k * 6,
1771
+ out + k * 8);
1772
+ save = save_next;
1773
+ }
1774
+ }
1775
+
1776
+ static ALWAYS_INLINE(void)
1777
+ pffft_real_preprocess_4x4(const v4sf *in, const v4sf *e, v4sf *out,
1778
+ int first) {
1779
+ v4sf r0 = in[0], i0 = in[1], r1 = in[2], i1 = in[3], r2 = in[4], i2 = in[5],
1780
+ r3 = in[6], i3 = in[7];
1781
+ /*
1782
+ transformation for each column is:
1783
+
1784
+ [1 1 1 1 0 0 0 0] [r0]
1785
+ [1 0 0 -1 0 -1 -1 0] [r1]
1786
+ [1 -1 -1 1 0 0 0 0] [r2]
1787
+ [1 0 0 -1 0 1 1 0] [r3]
1788
+ [0 0 0 0 1 -1 1 -1] * [i0]
1789
+ [0 -1 1 0 1 0 0 1] [i1]
1790
+ [0 0 0 0 1 1 -1 -1] [i2]
1791
+ [0 1 -1 0 1 0 0 1] [i3]
1792
+ */
1793
+
1794
+ v4sf sr0 = VADD(r0, r3), dr0 = VSUB(r0, r3);
1795
+ v4sf sr1 = VADD(r1, r2), dr1 = VSUB(r1, r2);
1796
+ v4sf si0 = VADD(i0, i3), di0 = VSUB(i0, i3);
1797
+ v4sf si1 = VADD(i1, i2), di1 = VSUB(i1, i2);
1798
+
1799
+ r0 = VADD(sr0, sr1);
1800
+ r2 = VSUB(sr0, sr1);
1801
+ r1 = VSUB(dr0, si1);
1802
+ r3 = VADD(dr0, si1);
1803
+ i0 = VSUB(di0, di1);
1804
+ i2 = VADD(di0, di1);
1805
+ i1 = VSUB(si0, dr1);
1806
+ i3 = VADD(si0, dr1);
1807
+
1808
+ VCPLXMULCONJ(r1, i1, e[0], e[1]);
1809
+ VCPLXMULCONJ(r2, i2, e[2], e[3]);
1810
+ VCPLXMULCONJ(r3, i3, e[4], e[5]);
1811
+
1812
+ VTRANSPOSE4(r0, r1, r2, r3);
1813
+ VTRANSPOSE4(i0, i1, i2, i3);
1814
+
1815
+ if (!first) {
1816
+ *out++ = r0;
1817
+ *out++ = i0;
1818
+ }
1819
+ *out++ = r1;
1820
+ *out++ = i1;
1821
+ *out++ = r2;
1822
+ *out++ = i2;
1823
+ *out++ = r3;
1824
+ *out++ = i3;
1825
+ }
1826
+
1827
+ static NEVER_INLINE(void)
1828
+ pffft_real_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
1829
+ int k, dk = Ncvec / SIMD_SZ; // number of 4x4 matrix blocks
1830
+ /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
1831
+
1832
+ v4sf_union Xr, Xi, *uout = (v4sf_union *)out;
1833
+ float cr0, ci0, cr1, ci1, cr2, ci2, cr3, ci3;
1834
+ static const float s = (float)M_SQRT2;
1835
+ assert(in != out);
1836
+ for (k = 0; k < 4; ++k) {
1837
+ Xr.f[k] = ((float *)in)[8 * k];
1838
+ Xi.f[k] = ((float *)in)[8 * k + 4];
1839
+ }
1840
+
1841
+ pffft_real_preprocess_4x4(in, e, out + 1, 1); // will write only 6 values
1842
+
1843
+ /*
1844
+ [Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3]
1845
+
1846
+ [cr0] [1 0 2 0 1 0 0 0]
1847
+ [cr1] [1 0 0 0 -1 0 -2 0]
1848
+ [cr2] [1 0 -2 0 1 0 0 0]
1849
+ [cr3] [1 0 0 0 -1 0 2 0]
1850
+ [ci0] [0 2 0 2 0 0 0 0]
1851
+ [ci1] [0 s 0 -s 0 -s 0 -s]
1852
+ [ci2] [0 0 0 0 0 -2 0 2]
1853
+ [ci3] [0 -s 0 s 0 -s 0 -s]
1854
+ */
1855
+ for (k = 1; k < dk; ++k) {
1856
+ pffft_real_preprocess_4x4(in + 8 * k, e + k * 6, out - 1 + k * 8, 0);
1857
+ }
1858
+
1859
+ cr0 = (Xr.f[0] + Xi.f[0]) + 2 * Xr.f[2];
1860
+ uout[0].f[0] = cr0;
1861
+ cr1 = (Xr.f[0] - Xi.f[0]) - 2 * Xi.f[2];
1862
+ uout[0].f[1] = cr1;
1863
+ cr2 = (Xr.f[0] + Xi.f[0]) - 2 * Xr.f[2];
1864
+ uout[0].f[2] = cr2;
1865
+ cr3 = (Xr.f[0] - Xi.f[0]) + 2 * Xi.f[2];
1866
+ uout[0].f[3] = cr3;
1867
+ ci0 = 2 * (Xr.f[1] + Xr.f[3]);
1868
+ uout[2 * Ncvec - 1].f[0] = ci0;
1869
+ ci1 = s * (Xr.f[1] - Xr.f[3]) - s * (Xi.f[1] + Xi.f[3]);
1870
+ uout[2 * Ncvec - 1].f[1] = ci1;
1871
+ ci2 = 2 * (Xi.f[3] - Xi.f[1]);
1872
+ uout[2 * Ncvec - 1].f[2] = ci2;
1873
+ ci3 = -s * (Xr.f[1] - Xr.f[3]) - s * (Xi.f[1] + Xi.f[3]);
1874
+ uout[2 * Ncvec - 1].f[3] = ci3;
1875
+ }
1876
+
1877
+ void pffft_transform_internal(PFFFT_Setup *setup, const float *finput,
1878
+ float *foutput, v4sf *scratch,
1879
+ pffft_direction_t direction, int ordered) {
1880
+ int k, Ncvec = setup->Ncvec;
1881
+ int nf_odd = (setup->ifac[1] & 1);
1882
+
1883
+ // temporary buffer is allocated on the stack if the scratch pointer is NULL
1884
+ int stack_allocate = (scratch == 0 ? Ncvec * 2 : 1);
1885
+ VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate);
1886
+
1887
+ const v4sf *vinput = (const v4sf *)finput;
1888
+ v4sf *voutput = (v4sf *)foutput;
1889
+ v4sf *buff[2] = {voutput, scratch ? scratch : scratch_on_stack};
1890
+ int ib = (nf_odd ^ ordered ? 1 : 0);
1891
+
1892
+ assert(VALIGNED(finput) && VALIGNED(foutput));
1893
+
1894
+ // assert(finput != foutput);
1895
+ if (direction == PFFFT_FORWARD) {
1896
+ ib = !ib;
1897
+ if (setup->transform == PFFFT_REAL) {
1898
+ ib = (rfftf1_ps(Ncvec * 2, vinput, buff[ib], buff[!ib], setup->twiddle,
1899
+ &setup->ifac[0]) == buff[0]
1900
+ ? 0
1901
+ : 1);
1902
+ pffft_real_finalize(Ncvec, buff[ib], buff[!ib], (v4sf *)setup->e);
1903
+ } else {
1904
+ v4sf *tmp = buff[ib];
1905
+ for (k = 0; k < Ncvec; ++k) {
1906
+ UNINTERLEAVE2(vinput[k * 2], vinput[k * 2 + 1], tmp[k * 2],
1907
+ tmp[k * 2 + 1]);
1908
+ }
1909
+ ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib], setup->twiddle,
1910
+ &setup->ifac[0], -1) == buff[0]
1911
+ ? 0
1912
+ : 1);
1913
+ pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], (v4sf *)setup->e);
1914
+ }
1915
+ if (ordered) {
1916
+ pffft_zreorder(setup, (float *)buff[!ib], (float *)buff[ib],
1917
+ PFFFT_FORWARD);
1918
+ } else
1919
+ ib = !ib;
1920
+ } else {
1921
+ if (vinput == buff[ib]) {
1922
+ ib = !ib; // may happen when finput == foutput
1923
+ }
1924
+ if (ordered) {
1925
+ pffft_zreorder(setup, (float *)vinput, (float *)buff[ib], PFFFT_BACKWARD);
1926
+ vinput = buff[ib];
1927
+ ib = !ib;
1928
+ }
1929
+ if (setup->transform == PFFFT_REAL) {
1930
+ pffft_real_preprocess(Ncvec, vinput, buff[ib], (v4sf *)setup->e);
1931
+ ib = (rfftb1_ps(Ncvec * 2, buff[ib], buff[0], buff[1], setup->twiddle,
1932
+ &setup->ifac[0]) == buff[0]
1933
+ ? 0
1934
+ : 1);
1935
+ } else {
1936
+ pffft_cplx_preprocess(Ncvec, vinput, buff[ib], (v4sf *)setup->e);
1937
+ ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1], setup->twiddle,
1938
+ &setup->ifac[0], +1) == buff[0]
1939
+ ? 0
1940
+ : 1);
1941
+ for (k = 0; k < Ncvec; ++k) {
1942
+ INTERLEAVE2(buff[ib][k * 2], buff[ib][k * 2 + 1], buff[ib][k * 2],
1943
+ buff[ib][k * 2 + 1]);
1944
+ }
1945
+ }
1946
+ }
1947
+
1948
+ if (buff[ib] != voutput) {
1949
+ /* extra copy required -- this situation should only happen when finput ==
1950
+ * foutput */
1951
+ assert(finput == foutput);
1952
+ for (k = 0; k < Ncvec; ++k) {
1953
+ v4sf a = buff[ib][2 * k], b = buff[ib][2 * k + 1];
1954
+ voutput[2 * k] = a;
1955
+ voutput[2 * k + 1] = b;
1956
+ }
1957
+ ib = !ib;
1958
+ }
1959
+ assert(buff[ib] == voutput);
1960
+ }
1961
+
1962
+ void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b,
1963
+ float *ab, float scaling) {
1964
+ int Ncvec = s->Ncvec;
1965
+ const v4sf *RESTRICT va = (const v4sf *)a;
1966
+ const v4sf *RESTRICT vb = (const v4sf *)b;
1967
+ v4sf *RESTRICT vab = (v4sf *)ab;
1968
+
1969
+ #ifdef __arm__
1970
+ __builtin_prefetch(va);
1971
+ __builtin_prefetch(vb);
1972
+ __builtin_prefetch(vab);
1973
+ __builtin_prefetch(va + 2);
1974
+ __builtin_prefetch(vb + 2);
1975
+ __builtin_prefetch(vab + 2);
1976
+ __builtin_prefetch(va + 4);
1977
+ __builtin_prefetch(vb + 4);
1978
+ __builtin_prefetch(vab + 4);
1979
+ __builtin_prefetch(va + 6);
1980
+ __builtin_prefetch(vb + 6);
1981
+ __builtin_prefetch(vab + 6);
1982
+ #ifndef __clang__
1983
+ #define ZCONVOLVE_USING_INLINE_NEON_ASM
1984
+ #endif
1985
+ #endif
1986
+
1987
+ float ar0, ai0, br0, bi0, abr0, abi0;
1988
+ #ifndef ZCONVOLVE_USING_INLINE_ASM
1989
+ v4sf vscal = LD_PS1(scaling);
1990
+ int i;
1991
+ #endif
1992
+
1993
+ assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
1994
+ ar0 = ((v4sf_union *)va)[0].f[0];
1995
+ ai0 = ((v4sf_union *)va)[1].f[0];
1996
+ br0 = ((v4sf_union *)vb)[0].f[0];
1997
+ bi0 = ((v4sf_union *)vb)[1].f[0];
1998
+ abr0 = ((v4sf_union *)vab)[0].f[0];
1999
+ abi0 = ((v4sf_union *)vab)[1].f[0];
2000
+
2001
+ #ifdef ZCONVOLVE_USING_INLINE_ASM // inline asm version, unfortunately
2002
+ // miscompiled by clang 3.2, at least on
2003
+ // ubuntu.. so this will be restricted to gcc
2004
+ const float *a_ = a, *b_ = b;
2005
+ float *ab_ = ab;
2006
+ int N = Ncvec;
2007
+ asm volatile("mov r8, %2 \n"
2008
+ "vdup.f32 q15, %4 \n"
2009
+ "1: \n"
2010
+ "pld [%0,#64] \n"
2011
+ "pld [%1,#64] \n"
2012
+ "pld [%2,#64] \n"
2013
+ "pld [%0,#96] \n"
2014
+ "pld [%1,#96] \n"
2015
+ "pld [%2,#96] \n"
2016
+ "vld1.f32 {q0,q1}, [%0,:128]! \n"
2017
+ "vld1.f32 {q4,q5}, [%1,:128]! \n"
2018
+ "vld1.f32 {q2,q3}, [%0,:128]! \n"
2019
+ "vld1.f32 {q6,q7}, [%1,:128]! \n"
2020
+ "vld1.f32 {q8,q9}, [r8,:128]! \n"
2021
+
2022
+ "vmul.f32 q10, q0, q4 \n"
2023
+ "vmul.f32 q11, q0, q5 \n"
2024
+ "vmul.f32 q12, q2, q6 \n"
2025
+ "vmul.f32 q13, q2, q7 \n"
2026
+ "vmls.f32 q10, q1, q5 \n"
2027
+ "vmla.f32 q11, q1, q4 \n"
2028
+ "vld1.f32 {q0,q1}, [r8,:128]! \n"
2029
+ "vmls.f32 q12, q3, q7 \n"
2030
+ "vmla.f32 q13, q3, q6 \n"
2031
+ "vmla.f32 q8, q10, q15 \n"
2032
+ "vmla.f32 q9, q11, q15 \n"
2033
+ "vmla.f32 q0, q12, q15 \n"
2034
+ "vmla.f32 q1, q13, q15 \n"
2035
+ "vst1.f32 {q8,q9},[%2,:128]! \n"
2036
+ "vst1.f32 {q0,q1},[%2,:128]! \n"
2037
+ "subs %3, #2 \n"
2038
+ "bne 1b \n"
2039
+ : "+r"(a_), "+r"(b_), "+r"(ab_), "+r"(N)
2040
+ : "r"(scaling)
2041
+ : "r8", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
2042
+ "q9", "q10", "q11", "q12", "q13", "q15", "memory");
2043
+ #else // default routine, works fine for non-arm cpus with current compilers
2044
+ for (i = 0; i < Ncvec; i += 2) {
2045
+ v4sf ar, ai, br, bi;
2046
+ ar = va[2 * i + 0];
2047
+ ai = va[2 * i + 1];
2048
+ br = vb[2 * i + 0];
2049
+ bi = vb[2 * i + 1];
2050
+ VCPLXMUL(ar, ai, br, bi);
2051
+ vab[2 * i + 0] = VMADD(ar, vscal, vab[2 * i + 0]);
2052
+ vab[2 * i + 1] = VMADD(ai, vscal, vab[2 * i + 1]);
2053
+ ar = va[2 * i + 2];
2054
+ ai = va[2 * i + 3];
2055
+ br = vb[2 * i + 2];
2056
+ bi = vb[2 * i + 3];
2057
+ VCPLXMUL(ar, ai, br, bi);
2058
+ vab[2 * i + 2] = VMADD(ar, vscal, vab[2 * i + 2]);
2059
+ vab[2 * i + 3] = VMADD(ai, vscal, vab[2 * i + 3]);
2060
+ }
2061
+ #endif
2062
+ if (s->transform == PFFFT_REAL) {
2063
+ ((v4sf_union *)vab)[0].f[0] = abr0 + ar0 * br0 * scaling;
2064
+ ((v4sf_union *)vab)[1].f[0] = abi0 + ai0 * bi0 * scaling;
2065
+ }
2066
+ }
2067
+
2068
+ #else // defined(PFFFT_SIMD_DISABLE)
2069
+
2070
+ // standard routine using scalar floats, without SIMD stuff.
2071
+
2072
+ #define pffft_zreorder_nosimd pffft_zreorder
2073
+ void pffft_zreorder_nosimd(PFFFT_Setup *setup, const float *in, float *out,
2074
+ pffft_direction_t direction) {
2075
+ int k, N = setup->N;
2076
+ if (setup->transform == PFFFT_COMPLEX) {
2077
+ for (k = 0; k < 2 * N; ++k)
2078
+ out[k] = in[k];
2079
+ return;
2080
+ } else if (direction == PFFFT_FORWARD) {
2081
+ float x_N = in[N - 1];
2082
+ for (k = N - 1; k > 1; --k)
2083
+ out[k] = in[k - 1];
2084
+ out[0] = in[0];
2085
+ out[1] = x_N;
2086
+ } else {
2087
+ float x_N = in[1];
2088
+ for (k = 1; k < N - 1; ++k)
2089
+ out[k] = in[k + 1];
2090
+ out[0] = in[0];
2091
+ out[N - 1] = x_N;
2092
+ }
2093
+ }
2094
+
2095
+ #define pffft_transform_internal_nosimd pffft_transform_internal
2096
+ void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input,
2097
+ float *output, float *scratch,
2098
+ pffft_direction_t direction, int ordered) {
2099
+ int Ncvec = setup->Ncvec;
2100
+ int nf_odd = (setup->ifac[1] & 1);
2101
+
2102
+ // temporary buffer is allocated on the stack if the scratch pointer is NULL
2103
+ int stack_allocate = (scratch == 0 ? Ncvec * 2 : 1);
2104
+ VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate);
2105
+ float *buff[2];
2106
+ int ib;
2107
+ if (scratch == 0)
2108
+ scratch = scratch_on_stack;
2109
+ buff[0] = output;
2110
+ buff[1] = scratch;
2111
+
2112
+ if (setup->transform == PFFFT_COMPLEX)
2113
+ ordered = 0; // it is always ordered.
2114
+ ib = (nf_odd ^ ordered ? 1 : 0);
2115
+
2116
+ if (direction == PFFFT_FORWARD) {
2117
+ if (setup->transform == PFFFT_REAL) {
2118
+ ib = (rfftf1_ps(Ncvec * 2, input, buff[ib], buff[!ib], setup->twiddle,
2119
+ &setup->ifac[0]) == buff[0]
2120
+ ? 0
2121
+ : 1);
2122
+ } else {
2123
+ ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib], setup->twiddle,
2124
+ &setup->ifac[0], -1) == buff[0]
2125
+ ? 0
2126
+ : 1);
2127
+ }
2128
+ if (ordered) {
2129
+ pffft_zreorder(setup, buff[ib], buff[!ib], PFFFT_FORWARD);
2130
+ ib = !ib;
2131
+ }
2132
+ } else {
2133
+ if (input == buff[ib]) {
2134
+ ib = !ib; // may happen when finput == foutput
2135
+ }
2136
+ if (ordered) {
2137
+ pffft_zreorder(setup, input, buff[!ib], PFFFT_BACKWARD);
2138
+ input = buff[!ib];
2139
+ }
2140
+ if (setup->transform == PFFFT_REAL) {
2141
+ ib = (rfftb1_ps(Ncvec * 2, input, buff[ib], buff[!ib], setup->twiddle,
2142
+ &setup->ifac[0]) == buff[0]
2143
+ ? 0
2144
+ : 1);
2145
+ } else {
2146
+ ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib], setup->twiddle,
2147
+ &setup->ifac[0], +1) == buff[0]
2148
+ ? 0
2149
+ : 1);
2150
+ }
2151
+ }
2152
+ if (buff[ib] != output) {
2153
+ int k;
2154
+ // extra copy required -- this situation should happens only when finput ==
2155
+ // foutput
2156
+ assert(input == output);
2157
+ for (k = 0; k < Ncvec; ++k) {
2158
+ float a = buff[ib][2 * k], b = buff[ib][2 * k + 1];
2159
+ output[2 * k] = a;
2160
+ output[2 * k + 1] = b;
2161
+ }
2162
+ ib = !ib;
2163
+ }
2164
+ assert(buff[ib] == output);
2165
+ }
2166
+
2167
+ #define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate
2168
+ void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup *s, const float *a,
2169
+ const float *b, float *ab,
2170
+ float scaling) {
2171
+ int i, Ncvec = s->Ncvec;
2172
+
2173
+ if (s->transform == PFFFT_REAL) {
2174
+ // take care of the fftpack ordering
2175
+ ab[0] += a[0] * b[0] * scaling;
2176
+ ab[2 * Ncvec - 1] += a[2 * Ncvec - 1] * b[2 * Ncvec - 1] * scaling;
2177
+ ++ab;
2178
+ ++a;
2179
+ ++b;
2180
+ --Ncvec;
2181
+ }
2182
+ for (i = 0; i < Ncvec; ++i) {
2183
+ float ar, ai, br, bi;
2184
+ ar = a[2 * i + 0];
2185
+ ai = a[2 * i + 1];
2186
+ br = b[2 * i + 0];
2187
+ bi = b[2 * i + 1];
2188
+ VCPLXMUL(ar, ai, br, bi);
2189
+ ab[2 * i + 0] += ar * scaling;
2190
+ ab[2 * i + 1] += ai * scaling;
2191
+ }
2192
+ }
2193
+
2194
+ #endif // defined(PFFFT_SIMD_DISABLE)
2195
+
2196
+ void pffft_transform(PFFFT_Setup *setup, const float *input, float *output,
2197
+ float *work, pffft_direction_t direction) {
2198
+ pffft_transform_internal(setup, input, output, (v4sf *)work, direction, 0);
2199
+ }
2200
+
2201
+ void pffft_transform_ordered(PFFFT_Setup *setup, const float *input,
2202
+ float *output, float *work,
2203
+ pffft_direction_t direction) {
2204
+ pffft_transform_internal(setup, input, output, (v4sf *)work, direction, 1);
2205
+ }