react-native-executorch 0.4.7 → 0.5.1-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/CMakeLists.txt +17 -0
- package/android/build.gradle +76 -13
- package/android/libs/classes.jar +0 -0
- package/android/src/main/cpp/CMakeLists.txt +73 -0
- package/android/src/main/cpp/ETInstallerModule.cpp +76 -0
- package/android/src/main/cpp/ETInstallerModule.h +43 -0
- package/android/src/main/java/com/swmansion/rnexecutorch/ETInstaller.kt +66 -0
- package/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +3 -3
- package/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +7 -113
- package/common/ada/ada.cpp +17406 -0
- package/common/ada/ada.h +10274 -0
- package/common/pfft/pfft.c +2205 -0
- package/common/pfft/pfft.h +185 -0
- package/common/rnexecutorch/Log.h +489 -0
- package/common/rnexecutorch/RnExecutorchInstaller.cpp +78 -0
- package/common/rnexecutorch/RnExecutorchInstaller.h +112 -0
- package/common/rnexecutorch/TokenizerModule.cpp +52 -0
- package/common/rnexecutorch/TokenizerModule.h +26 -0
- package/common/rnexecutorch/data_processing/FFT.cpp +21 -0
- package/common/rnexecutorch/data_processing/FFT.h +23 -0
- package/common/rnexecutorch/data_processing/FileUtils.h +30 -0
- package/common/rnexecutorch/data_processing/ImageProcessing.cpp +240 -0
- package/common/rnexecutorch/data_processing/ImageProcessing.h +55 -0
- package/common/rnexecutorch/data_processing/Numerical.cpp +82 -0
- package/common/rnexecutorch/data_processing/Numerical.h +23 -0
- package/common/rnexecutorch/data_processing/base64.cpp +110 -0
- package/common/rnexecutorch/data_processing/base64.h +46 -0
- package/common/rnexecutorch/data_processing/dsp.cpp +65 -0
- package/common/rnexecutorch/data_processing/dsp.h +12 -0
- package/common/rnexecutorch/host_objects/JSTensorViewIn.h +12 -0
- package/common/rnexecutorch/host_objects/JSTensorViewOut.h +22 -0
- package/common/rnexecutorch/host_objects/JsiConversions.h +410 -0
- package/common/rnexecutorch/host_objects/ModelHostObject.h +239 -0
- package/common/rnexecutorch/jsi/JsiHostObject.cpp +108 -0
- package/common/rnexecutorch/jsi/JsiHostObject.h +87 -0
- package/common/rnexecutorch/jsi/OwningArrayBuffer.h +40 -0
- package/common/rnexecutorch/jsi/Promise.cpp +20 -0
- package/common/rnexecutorch/jsi/Promise.h +69 -0
- package/common/rnexecutorch/jsi/RuntimeAwareCache.h +58 -0
- package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.cpp +53 -0
- package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.h +35 -0
- package/common/rnexecutorch/metaprogramming/ConstructorHelpers.h +131 -0
- package/common/rnexecutorch/metaprogramming/FunctionHelpers.h +50 -0
- package/common/rnexecutorch/metaprogramming/TypeConcepts.h +37 -0
- package/common/rnexecutorch/models/BaseModel.cpp +181 -0
- package/common/rnexecutorch/models/BaseModel.h +47 -0
- package/common/rnexecutorch/models/EncoderDecoderBase.cpp +21 -0
- package/common/rnexecutorch/models/EncoderDecoderBase.h +31 -0
- package/common/rnexecutorch/models/classification/Classification.cpp +72 -0
- package/common/rnexecutorch/models/classification/Classification.h +26 -0
- package/{ios/RnExecutorch/models/classification/Constants.mm → common/rnexecutorch/models/classification/Constants.h} +7 -2
- package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +27 -0
- package/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +17 -0
- package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +45 -0
- package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +23 -0
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +61 -0
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +26 -0
- package/{ios/RnExecutorch/models/image_segmentation/Constants.mm → common/rnexecutorch/models/image_segmentation/Constants.h} +7 -2
- package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +173 -0
- package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h +43 -0
- package/{ios/RnExecutorch/utils/Constants.mm → common/rnexecutorch/models/object_detection/Constants.h} +9 -2
- package/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +82 -0
- package/common/rnexecutorch/models/object_detection/ObjectDetection.h +31 -0
- package/{ios/RnExecutorch/utils/ObjectDetectionUtils.mm → common/rnexecutorch/models/object_detection/Utils.cpp} +10 -30
- package/common/rnexecutorch/models/object_detection/Utils.h +17 -0
- package/common/rnexecutorch/models/ocr/CTCLabelConverter.cpp +88 -0
- package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +29 -0
- package/common/rnexecutorch/models/ocr/Constants.h +34 -0
- package/common/rnexecutorch/models/ocr/Detector.cpp +102 -0
- package/common/rnexecutorch/models/ocr/Detector.h +30 -0
- package/common/rnexecutorch/models/ocr/DetectorUtils.cpp +703 -0
- package/common/rnexecutorch/models/ocr/DetectorUtils.h +80 -0
- package/common/rnexecutorch/models/ocr/OCR.cpp +52 -0
- package/common/rnexecutorch/models/ocr/OCR.h +36 -0
- package/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +107 -0
- package/common/rnexecutorch/models/ocr/RecognitionHandler.h +40 -0
- package/common/rnexecutorch/models/ocr/RecognitionHandlerUtils.cpp +153 -0
- package/common/rnexecutorch/models/ocr/RecognitionHandlerUtils.h +72 -0
- package/common/rnexecutorch/models/ocr/Recognizer.cpp +80 -0
- package/common/rnexecutorch/models/ocr/Recognizer.h +36 -0
- package/common/rnexecutorch/models/ocr/RecognizerUtils.cpp +202 -0
- package/common/rnexecutorch/models/ocr/RecognizerUtils.h +70 -0
- package/common/rnexecutorch/models/ocr/Types.h +37 -0
- package/common/rnexecutorch/models/speech_to_text/MoonshineStrategy.cpp +31 -0
- package/common/rnexecutorch/models/speech_to_text/MoonshineStrategy.h +21 -0
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +70 -0
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +31 -0
- package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +26 -0
- package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +38 -0
- package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +25 -0
- package/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +55 -0
- package/common/rnexecutorch/models/style_transfer/StyleTransfer.h +29 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +92 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +49 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +180 -0
- package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +78 -0
- package/common/rnexecutorch/tests/LogTest.cpp +530 -0
- package/common/rnexecutorch/tests/README.md +20 -0
- package/common/rnexecutorch/tests/run_all_tests.sh +14 -0
- package/common/rnexecutorch/tests/run_test.sh +18 -0
- package/ios/ExecutorchLib.xcframework/Info.plist +4 -4
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
- package/ios/RnExecutorch/ETInstaller.h +8 -0
- package/ios/RnExecutorch/ETInstaller.mm +56 -0
- package/ios/RnExecutorch/utils/Conversions.h +8 -9
- package/ios/RnExecutorch/utils/Numerical.h +2 -0
- package/ios/RnExecutorch.xcodeproj/project.pbxproj +73 -0
- package/lib/common/Logger.d.ts +8 -0
- package/lib/common/Logger.js +19 -0
- package/lib/constants/modelUrls.d.ts +89 -0
- package/lib/constants/modelUrls.js +116 -0
- package/lib/constants/sttDefaults.js +66 -0
- package/lib/controllers/LLMController.js +210 -0
- package/lib/controllers/OCRController.js +65 -0
- package/lib/controllers/SpeechToTextController.d.ts +52 -0
- package/lib/controllers/SpeechToTextController.js +343 -0
- package/lib/hooks/natural_language_processing/useSpeechToText.js +44 -0
- package/lib/index.d.ts +50 -0
- package/{src/index.tsx → lib/index.js} +22 -10
- package/lib/module/Error.js +8 -6
- package/lib/module/Error.js.map +1 -1
- package/lib/module/common/Logger.js +23 -0
- package/lib/module/common/Logger.js.map +1 -0
- package/lib/module/constants/llmDefaults.js +8 -0
- package/lib/module/constants/llmDefaults.js.map +1 -1
- package/lib/module/constants/modelUrls.js +300 -84
- package/lib/module/constants/modelUrls.js.map +1 -1
- package/lib/module/constants/ocr/models.js +181 -286
- package/lib/module/constants/ocr/models.js.map +1 -1
- package/lib/module/constants/ocr/symbols.js +63 -63
- package/lib/module/constants/sttDefaults.js +12 -10
- package/lib/module/constants/sttDefaults.js.map +1 -1
- package/lib/module/controllers/LLMController.js +17 -11
- package/lib/module/controllers/LLMController.js.map +1 -1
- package/lib/module/controllers/OCRController.js +16 -9
- package/lib/module/controllers/OCRController.js.map +1 -1
- package/lib/module/controllers/SpeechToTextController.js +32 -19
- package/lib/module/controllers/SpeechToTextController.js.map +1 -1
- package/lib/module/controllers/VerticalOCRController.js +16 -9
- package/lib/module/controllers/VerticalOCRController.js.map +1 -1
- package/lib/module/hooks/computer_vision/useClassification.js +5 -5
- package/lib/module/hooks/computer_vision/useClassification.js.map +1 -1
- package/lib/module/hooks/computer_vision/useImageEmbeddings.js +13 -0
- package/lib/module/hooks/computer_vision/useImageEmbeddings.js.map +1 -0
- package/lib/module/hooks/computer_vision/useImageSegmentation.js +4 -4
- package/lib/module/hooks/computer_vision/useImageSegmentation.js.map +1 -1
- package/lib/module/hooks/computer_vision/useOCR.js +14 -15
- package/lib/module/hooks/computer_vision/useOCR.js.map +1 -1
- package/lib/module/hooks/computer_vision/useObjectDetection.js +5 -5
- package/lib/module/hooks/computer_vision/useObjectDetection.js.map +1 -1
- package/lib/module/hooks/computer_vision/useStyleTransfer.js +5 -5
- package/lib/module/hooks/computer_vision/useStyleTransfer.js.map +1 -1
- package/lib/module/hooks/computer_vision/useVerticalOCR.js +16 -17
- package/lib/module/hooks/computer_vision/useVerticalOCR.js.map +1 -1
- package/lib/module/hooks/general/useExecutorchModule.js +5 -3
- package/lib/module/hooks/general/useExecutorchModule.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useLLM.js +22 -25
- package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useSpeechToText.js +16 -14
- package/lib/module/hooks/natural_language_processing/useSpeechToText.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js +4 -5
- package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useTokenizer.js +20 -19
- package/lib/module/hooks/natural_language_processing/useTokenizer.js.map +1 -1
- package/lib/module/hooks/useNonStaticModule.js +52 -0
- package/lib/module/hooks/useNonStaticModule.js.map +1 -0
- package/lib/module/index.js +16 -2
- package/lib/module/index.js.map +1 -1
- package/lib/module/modules/BaseModule.js +6 -3
- package/lib/module/modules/BaseModule.js.map +1 -1
- package/lib/module/modules/BaseNonStaticModule.js +17 -0
- package/lib/module/modules/BaseNonStaticModule.js.map +1 -0
- package/lib/module/modules/computer_vision/ClassificationModule.js +13 -8
- package/lib/module/modules/computer_vision/ClassificationModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js +19 -0
- package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js.map +1 -0
- package/lib/module/modules/computer_vision/ImageSegmentationModule.js +21 -19
- package/lib/module/modules/computer_vision/ImageSegmentationModule.js.map +1 -1
- package/lib/module/modules/computer_vision/OCRModule.js +13 -10
- package/lib/module/modules/computer_vision/OCRModule.js.map +1 -1
- package/lib/module/modules/computer_vision/ObjectDetectionModule.js +13 -8
- package/lib/module/modules/computer_vision/ObjectDetectionModule.js.map +1 -1
- package/lib/module/modules/computer_vision/StyleTransferModule.js +13 -8
- package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
- package/lib/module/modules/computer_vision/VerticalOCRModule.js +15 -10
- package/lib/module/modules/computer_vision/VerticalOCRModule.js.map +1 -1
- package/lib/module/modules/general/ExecutorchModule.js +10 -36
- package/lib/module/modules/general/ExecutorchModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/LLMModule.js +18 -22
- package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +27 -16
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js +15 -8
- package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/TokenizerModule.js +20 -14
- package/lib/module/modules/natural_language_processing/TokenizerModule.js.map +1 -1
- package/lib/module/native/NativeETInstaller.js +5 -0
- package/lib/module/native/NativeETInstaller.js.map +1 -0
- package/lib/module/native/RnExecutorchModules.js +2 -11
- package/lib/module/native/RnExecutorchModules.js.map +1 -1
- package/lib/module/types/common.js +25 -8
- package/lib/module/types/common.js.map +1 -1
- package/lib/module/types/stt.js +6 -0
- package/lib/module/types/stt.js.map +1 -1
- package/lib/module/utils/ResourceFetcher.js +276 -114
- package/lib/module/utils/ResourceFetcher.js.map +1 -1
- package/lib/module/utils/ResourceFetcherUtils.js +155 -0
- package/lib/module/utils/ResourceFetcherUtils.js.map +1 -0
- package/lib/module/utils/llm.js +41 -1
- package/lib/module/utils/llm.js.map +1 -1
- package/lib/modules/natural_language_processing/SpeechToTextModule.d.ts +14 -0
- package/lib/modules/natural_language_processing/SpeechToTextModule.js +30 -0
- package/lib/modules/natural_language_processing/TokenizerModule.js +29 -0
- package/lib/native/RnExecutorchModules.d.ts +3 -0
- package/lib/native/RnExecutorchModules.js +16 -0
- package/lib/typescript/Error.d.ts +2 -0
- package/lib/typescript/Error.d.ts.map +1 -1
- package/lib/typescript/common/Logger.d.ts +9 -0
- package/lib/typescript/common/Logger.d.ts.map +1 -0
- package/lib/typescript/constants/llmDefaults.d.ts +1 -0
- package/lib/typescript/constants/llmDefaults.d.ts.map +1 -1
- package/lib/typescript/constants/modelUrls.d.ts +223 -79
- package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
- package/lib/typescript/constants/ocr/models.d.ts +882 -284
- package/lib/typescript/constants/ocr/models.d.ts.map +1 -1
- package/lib/typescript/constants/sttDefaults.d.ts +1 -0
- package/lib/typescript/constants/sttDefaults.d.ts.map +1 -1
- package/lib/typescript/controllers/LLMController.d.ts +3 -4
- package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
- package/lib/typescript/controllers/OCRController.d.ts +5 -6
- package/lib/typescript/controllers/OCRController.d.ts.map +1 -1
- package/lib/typescript/controllers/SpeechToTextController.d.ts +11 -6
- package/lib/typescript/controllers/SpeechToTextController.d.ts.map +1 -1
- package/lib/typescript/controllers/VerticalOCRController.d.ts +5 -6
- package/lib/typescript/controllers/VerticalOCRController.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useClassification.d.ts +8 -6
- package/lib/typescript/hooks/computer_vision/useClassification.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts +16 -0
- package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts.map +1 -0
- package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts +5 -3
- package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useOCR.d.ts +4 -4
- package/lib/typescript/hooks/computer_vision/useOCR.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts +5 -3
- package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts +5 -3
- package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts +3 -5
- package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts.map +1 -1
- package/lib/typescript/hooks/general/useExecutorchModule.d.ts +1 -1
- package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts +6 -4
- package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +7 -5
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts +9 -5
- package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts +6 -4
- package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts.map +1 -1
- package/lib/typescript/hooks/useNonStaticModule.d.ts +21 -0
- package/lib/typescript/hooks/useNonStaticModule.d.ts.map +1 -0
- package/lib/typescript/index.d.ts +18 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/modules/BaseModule.d.ts +1 -1
- package/lib/typescript/modules/BaseModule.d.ts.map +1 -1
- package/lib/typescript/modules/BaseNonStaticModule.d.ts +10 -0
- package/lib/typescript/modules/BaseNonStaticModule.d.ts.map +1 -0
- package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts +6 -6
- package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts +9 -0
- package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts.map +1 -0
- package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts +8 -28
- package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/OCRModule.d.ts +8 -7
- package/lib/typescript/modules/computer_vision/OCRModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts +7 -5
- package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +6 -5
- package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts +7 -8
- package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts.map +1 -1
- package/lib/typescript/modules/general/ExecutorchModule.d.ts +5 -8
- package/lib/typescript/modules/general/ExecutorchModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +16 -16
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +19 -9
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts +7 -5
- package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts +10 -9
- package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts.map +1 -1
- package/lib/typescript/native/{NativeStyleTransfer.d.ts → NativeETInstaller.d.ts} +2 -3
- package/lib/typescript/native/NativeETInstaller.d.ts.map +1 -0
- package/lib/typescript/native/RnExecutorchModules.d.ts +3 -21
- package/lib/typescript/native/RnExecutorchModules.d.ts.map +1 -1
- package/lib/typescript/types/common.d.ts +30 -2
- package/lib/typescript/types/common.d.ts.map +1 -1
- package/lib/typescript/types/stt.d.ts +5 -1
- package/lib/typescript/types/stt.d.ts.map +1 -1
- package/lib/typescript/utils/ResourceFetcher.d.ts +18 -10
- package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts +55 -0
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -0
- package/lib/typescript/utils/llm.d.ts +4 -0
- package/lib/typescript/utils/llm.d.ts.map +1 -1
- package/lib/utils/ResourceFetcherUtils.js +119 -0
- package/lib/utils/llm.js +72 -0
- package/package.json +22 -64
- package/react-native-executorch.podspec +75 -3
- package/src/Error.ts +8 -10
- package/src/common/Logger.ts +25 -0
- package/src/constants/llmDefaults.ts +11 -0
- package/src/constants/modelUrls.ts +365 -168
- package/src/constants/ocr/models.ts +826 -395
- package/src/constants/ocr/symbols.ts +63 -63
- package/src/constants/sttDefaults.ts +14 -18
- package/src/controllers/LLMController.ts +28 -18
- package/src/controllers/OCRController.ts +24 -15
- package/src/controllers/SpeechToTextController.ts +53 -40
- package/src/controllers/VerticalOCRController.ts +24 -14
- package/src/hooks/computer_vision/useClassification.ts +10 -11
- package/src/hooks/computer_vision/useImageEmbeddings.ts +15 -0
- package/src/hooks/computer_vision/useImageSegmentation.ts +5 -8
- package/src/hooks/computer_vision/useOCR.ts +29 -21
- package/src/hooks/computer_vision/useObjectDetection.ts +6 -9
- package/src/hooks/computer_vision/useStyleTransfer.ts +6 -6
- package/src/hooks/computer_vision/useVerticalOCR.ts +30 -27
- package/src/hooks/general/useExecutorchModule.ts +3 -3
- package/src/hooks/natural_language_processing/useLLM.ts +38 -28
- package/src/hooks/natural_language_processing/useSpeechToText.ts +34 -26
- package/src/hooks/natural_language_processing/useTextEmbeddings.ts +11 -11
- package/src/hooks/natural_language_processing/useTokenizer.ts +22 -22
- package/src/hooks/useNonStaticModule.ts +74 -0
- package/src/index.ts +108 -0
- package/src/modules/BaseModule.ts +9 -3
- package/src/modules/BaseNonStaticModule.ts +26 -0
- package/src/modules/computer_vision/ClassificationModule.ts +20 -11
- package/src/modules/computer_vision/ImageEmbeddingsModule.ts +26 -0
- package/src/modules/computer_vision/ImageSegmentationModule.ts +35 -27
- package/src/modules/computer_vision/OCRModule.ts +23 -15
- package/src/modules/computer_vision/ObjectDetectionModule.ts +24 -11
- package/src/modules/computer_vision/StyleTransferModule.ts +20 -11
- package/src/modules/computer_vision/VerticalOCRModule.ts +25 -21
- package/src/modules/general/ExecutorchModule.ts +18 -48
- package/src/modules/natural_language_processing/LLMModule.ts +27 -30
- package/src/modules/natural_language_processing/SpeechToTextModule.ts +42 -37
- package/src/modules/natural_language_processing/TextEmbeddingsModule.ts +27 -12
- package/src/modules/natural_language_processing/TokenizerModule.ts +27 -17
- package/src/native/NativeETInstaller.ts +8 -0
- package/src/native/RnExecutorchModules.ts +4 -46
- package/src/types/common.ts +40 -12
- package/src/types/stt.ts +5 -1
- package/src/utils/ResourceFetcher.ts +338 -119
- package/src/utils/ResourceFetcherUtils.ts +186 -0
- package/src/utils/llm.ts +65 -1
- package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
- package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_core.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_features2d.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_highgui.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_imgproc.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_photo.a +0 -0
- package/third-party/android/libs/opencv/arm64-v8a/libopencv_video.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_core.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_features2d.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_highgui.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_imgproc.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_photo.a +0 -0
- package/third-party/android/libs/opencv/x86_64/libopencv_video.a +0 -0
- package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv.a +0 -0
- package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_hal.a +0 -0
- package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_thread.a +0 -0
- package/third-party/include/c10/macros/Export.h +163 -0
- package/third-party/include/c10/macros/Macros.h +497 -0
- package/third-party/include/c10/util/BFloat16-inl.h +342 -0
- package/third-party/include/c10/util/BFloat16-math.h +266 -0
- package/third-party/include/c10/util/BFloat16.h +125 -0
- package/third-party/include/c10/util/Half-inl.h +347 -0
- package/third-party/include/c10/util/Half.h +416 -0
- package/third-party/include/c10/util/TypeSafeSignMath.h +133 -0
- package/third-party/include/c10/util/bit_cast.h +43 -0
- package/third-party/include/c10/util/floating_point_utils.h +33 -0
- package/third-party/include/c10/util/irange.h +107 -0
- package/third-party/include/executorch/ExecuTorch.h +13 -0
- package/third-party/include/executorch/ExecuTorchError.h +16 -0
- package/third-party/include/executorch/ExecuTorchLog.h +76 -0
- package/third-party/include/executorch/ExecuTorchModule.h +286 -0
- package/third-party/include/executorch/ExecuTorchTensor.h +742 -0
- package/third-party/include/executorch/ExecuTorchValue.h +219 -0
- package/third-party/include/executorch/extension/module/module.h +492 -0
- package/third-party/include/executorch/extension/tensor/tensor.h +13 -0
- package/third-party/include/executorch/extension/tensor/tensor_accessor.h +190 -0
- package/third-party/include/executorch/extension/tensor/tensor_ptr.h +347 -0
- package/third-party/include/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
- package/third-party/include/executorch/runtime/backend/backend_execution_context.h +71 -0
- package/third-party/include/executorch/runtime/backend/backend_init_context.h +72 -0
- package/third-party/include/executorch/runtime/backend/interface.h +166 -0
- package/third-party/include/executorch/runtime/core/array_ref.h +235 -0
- package/third-party/include/executorch/runtime/core/data_loader.h +136 -0
- package/third-party/include/executorch/runtime/core/defines.h +20 -0
- package/third-party/include/executorch/runtime/core/error.h +229 -0
- package/third-party/include/executorch/runtime/core/evalue.h +521 -0
- package/third-party/include/executorch/runtime/core/event_tracer.h +565 -0
- package/third-party/include/executorch/runtime/core/event_tracer_hooks.h +323 -0
- package/third-party/include/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
- package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
- package/third-party/include/executorch/runtime/core/freeable_buffer.h +107 -0
- package/third-party/include/executorch/runtime/core/hierarchical_allocator.h +107 -0
- package/third-party/include/executorch/runtime/core/memory_allocator.h +198 -0
- package/third-party/include/executorch/runtime/core/named_data_map.h +86 -0
- package/third-party/include/executorch/runtime/core/portable_type/bfloat16.h +27 -0
- package/third-party/include/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
- package/third-party/include/executorch/runtime/core/portable_type/bits_types.h +83 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
- package/third-party/include/executorch/runtime/core/portable_type/complex.h +44 -0
- package/third-party/include/executorch/runtime/core/portable_type/device.h +70 -0
- package/third-party/include/executorch/runtime/core/portable_type/half.h +27 -0
- package/third-party/include/executorch/runtime/core/portable_type/optional.h +36 -0
- package/third-party/include/executorch/runtime/core/portable_type/qint_types.h +83 -0
- package/third-party/include/executorch/runtime/core/portable_type/scalar.h +110 -0
- package/third-party/include/executorch/runtime/core/portable_type/scalar_type.h +154 -0
- package/third-party/include/executorch/runtime/core/portable_type/string_view.h +29 -0
- package/third-party/include/executorch/runtime/core/portable_type/tensor.h +142 -0
- package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
- package/third-party/include/executorch/runtime/core/portable_type/tensor_options.h +60 -0
- package/third-party/include/executorch/runtime/core/result.h +258 -0
- package/third-party/include/executorch/runtime/core/span.h +93 -0
- package/third-party/include/executorch/runtime/core/tag.h +71 -0
- package/third-party/include/executorch/runtime/core/tensor_layout.h +79 -0
- package/third-party/include/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
- package/third-party/include/executorch/runtime/executor/memory_manager.h +113 -0
- package/third-party/include/executorch/runtime/executor/method.h +387 -0
- package/third-party/include/executorch/runtime/executor/method_meta.h +251 -0
- package/third-party/include/executorch/runtime/executor/program.h +320 -0
- package/third-party/include/executorch/runtime/executor/pte_data_map.h +144 -0
- package/third-party/include/executorch/runtime/executor/tensor_parser.h +156 -0
- package/third-party/include/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
- package/third-party/include/executorch/runtime/kernel/operator_registry.h +278 -0
- package/third-party/include/executorch/runtime/platform/abort.h +36 -0
- package/third-party/include/executorch/runtime/platform/assert.h +119 -0
- package/third-party/include/executorch/runtime/platform/clock.h +43 -0
- package/third-party/include/executorch/runtime/platform/compat_unistd.h +75 -0
- package/third-party/include/executorch/runtime/platform/compiler.h +191 -0
- package/third-party/include/executorch/runtime/platform/log.h +177 -0
- package/third-party/include/executorch/runtime/platform/platform.h +133 -0
- package/third-party/include/executorch/runtime/platform/profiler.h +292 -0
- package/third-party/include/executorch/runtime/platform/runtime.h +35 -0
- package/third-party/include/executorch/runtime/platform/system.h +49 -0
- package/third-party/include/executorch/runtime/platform/types.h +24 -0
- package/third-party/include/executorch/schema/extended_header.h +76 -0
- package/third-party/include/opencv2/core/affine.hpp +676 -0
- package/third-party/include/opencv2/core/async.hpp +107 -0
- package/third-party/include/opencv2/core/base.hpp +735 -0
- package/third-party/include/opencv2/core/bindings_utils.hpp +279 -0
- package/third-party/include/opencv2/core/bufferpool.hpp +39 -0
- package/third-party/include/opencv2/core/check.hpp +231 -0
- package/third-party/include/opencv2/core/core.hpp +55 -0
- package/third-party/include/opencv2/core/core_c.h +3261 -0
- package/third-party/include/opencv2/core/cv_cpu_dispatch.h +404 -0
- package/third-party/include/opencv2/core/cv_cpu_helper.h +856 -0
- package/third-party/include/opencv2/core/cvdef.h +1003 -0
- package/third-party/include/opencv2/core/cvstd.hpp +196 -0
- package/third-party/include/opencv2/core/cvstd.inl.hpp +188 -0
- package/third-party/include/opencv2/core/cvstd_wrapper.hpp +187 -0
- package/third-party/include/opencv2/core/detail/async_promise.hpp +73 -0
- package/third-party/include/opencv2/core/detail/dispatch_helper.impl.hpp +48 -0
- package/third-party/include/opencv2/core/detail/exception_ptr.hpp +24 -0
- package/third-party/include/opencv2/core/dualquaternion.hpp +1054 -0
- package/third-party/include/opencv2/core/dualquaternion.inl.hpp +464 -0
- package/third-party/include/opencv2/core/eigen.hpp +405 -0
- package/third-party/include/opencv2/core/fast_math.hpp +433 -0
- package/third-party/include/opencv2/core/hal/hal.hpp +451 -0
- package/third-party/include/opencv2/core/hal/interface.h +191 -0
- package/third-party/include/opencv2/core/hal/intrin.hpp +1222 -0
- package/third-party/include/opencv2/core/hal/intrin_avx.hpp +3378 -0
- package/third-party/include/opencv2/core/hal/intrin_avx512.hpp +3688 -0
- package/third-party/include/opencv2/core/hal/intrin_cpp.hpp +3446 -0
- package/third-party/include/opencv2/core/hal/intrin_forward.hpp +195 -0
- package/third-party/include/opencv2/core/hal/intrin_lasx.hpp +3243 -0
- package/third-party/include/opencv2/core/hal/intrin_lsx.hpp +2671 -0
- package/third-party/include/opencv2/core/hal/intrin_math.hpp +772 -0
- package/third-party/include/opencv2/core/hal/intrin_msa.hpp +1973 -0
- package/third-party/include/opencv2/core/hal/intrin_neon.hpp +2710 -0
- package/third-party/include/opencv2/core/hal/intrin_rvv071.hpp +3452 -0
- package/third-party/include/opencv2/core/hal/intrin_rvv_scalable.hpp +2559 -0
- package/third-party/include/opencv2/core/hal/intrin_sse.hpp +3528 -0
- package/third-party/include/opencv2/core/hal/intrin_sse_em.hpp +175 -0
- package/third-party/include/opencv2/core/hal/intrin_vsx.hpp +1756 -0
- package/third-party/include/opencv2/core/hal/intrin_wasm.hpp +2911 -0
- package/third-party/include/opencv2/core/hal/msa_macros.h +2079 -0
- package/third-party/include/opencv2/core/hal/simd_utils.impl.hpp +313 -0
- package/third-party/include/opencv2/core/mat.hpp +3842 -0
- package/third-party/include/opencv2/core/mat.inl.hpp +2753 -0
- package/third-party/include/opencv2/core/matx.hpp +603 -0
- package/third-party/include/opencv2/core/matx.inl.hpp +1132 -0
- package/third-party/include/opencv2/core/neon_utils.hpp +127 -0
- package/third-party/include/opencv2/core/operations.hpp +610 -0
- package/third-party/include/opencv2/core/optim.hpp +362 -0
- package/third-party/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp +66 -0
- package/third-party/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp +148 -0
- package/third-party/include/opencv2/core/parallel/parallel_backend.hpp +108 -0
- package/third-party/include/opencv2/core/persistence.hpp +1321 -0
- package/third-party/include/opencv2/core/quaternion.hpp +1889 -0
- package/third-party/include/opencv2/core/quaternion.inl.hpp +907 -0
- package/third-party/include/opencv2/core/saturate.hpp +347 -0
- package/third-party/include/opencv2/core/simd_intrinsics.hpp +90 -0
- package/third-party/include/opencv2/core/softfloat.hpp +657 -0
- package/third-party/include/opencv2/core/sse_utils.hpp +861 -0
- package/third-party/include/opencv2/core/traits.hpp +417 -0
- package/third-party/include/opencv2/core/types.hpp +2368 -0
- package/third-party/include/opencv2/core/types_c.h +2064 -0
- package/third-party/include/opencv2/core/utility.hpp +1296 -0
- package/third-party/include/opencv2/core/utils/allocator_stats.hpp +31 -0
- package/third-party/include/opencv2/core/utils/allocator_stats.impl.hpp +111 -0
- package/third-party/include/opencv2/core/utils/filesystem.hpp +91 -0
- package/third-party/include/opencv2/core/utils/fp_control_utils.hpp +70 -0
- package/third-party/include/opencv2/core/utils/instrumentation.hpp +127 -0
- package/third-party/include/opencv2/core/utils/logger.defines.hpp +50 -0
- package/third-party/include/opencv2/core/utils/logger.hpp +258 -0
- package/third-party/include/opencv2/core/utils/logtag.hpp +27 -0
- package/third-party/include/opencv2/core/utils/tls.hpp +230 -0
- package/third-party/include/opencv2/core/utils/trace.hpp +281 -0
- package/third-party/include/opencv2/core/version.hpp +29 -0
- package/third-party/include/opencv2/core/vsx_utils.hpp +1115 -0
- package/third-party/include/opencv2/core.hpp +3699 -0
- package/third-party/include/opencv2/cvconfig.h +155 -0
- package/third-party/include/opencv2/dnn/dnn.hpp +51 -0
- package/third-party/include/opencv2/dnn.hpp +17 -0
- package/third-party/include/opencv2/features2d/features2d.hpp +55 -0
- package/third-party/include/opencv2/features2d/hal/interface.h +32 -0
- package/third-party/include/opencv2/features2d.hpp +1756 -0
- package/third-party/include/opencv2/highgui/highgui.hpp +113 -0
- package/third-party/include/opencv2/highgui.hpp +17 -0
- package/third-party/include/opencv2/imgproc/bindings.hpp +34 -0
- package/third-party/include/opencv2/imgproc/detail/gcgraph.hpp +355 -0
- package/third-party/include/opencv2/imgproc/detail/legacy.hpp +35 -0
- package/third-party/include/opencv2/imgproc/hal/hal.hpp +246 -0
- package/third-party/include/opencv2/imgproc/hal/interface.h +52 -0
- package/third-party/include/opencv2/imgproc/imgproc.hpp +55 -0
- package/third-party/include/opencv2/imgproc/imgproc_c.h +1261 -0
- package/third-party/include/opencv2/imgproc/segmentation.hpp +168 -0
- package/third-party/include/opencv2/imgproc/types_c.h +632 -0
- package/third-party/include/opencv2/imgproc.hpp +5956 -0
- package/third-party/include/opencv2/opencv.hpp +102 -0
- package/third-party/include/opencv2/opencv_modules.hpp +19 -0
- package/third-party/include/opencv2/photo/legacy/constants_c.h +10 -0
- package/third-party/include/opencv2/photo/photo.hpp +55 -0
- package/third-party/include/opencv2/photo.hpp +975 -0
- package/third-party/include/opencv2/video/background_segm.hpp +341 -0
- package/third-party/include/opencv2/video/detail/tracking.detail.hpp +435 -0
- package/third-party/include/opencv2/video/legacy/constants_c.h +15 -0
- package/third-party/include/opencv2/video/tracking.hpp +1014 -0
- package/third-party/include/opencv2/video/video.hpp +55 -0
- package/third-party/include/opencv2/video.hpp +65 -0
- package/third-party/include/tokenizers-cpp/tokenizers_c.h +61 -0
- package/third-party/include/tokenizers-cpp/tokenizers_cpp.h +118 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.h +27 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.mm +249 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h +14 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm +80 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h +32 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm +95 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/InputType.h +12 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/Utils.hpp +217 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.cpp +11 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.h +11 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/irunner.h +48 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.cpp +278 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.h +67 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/stats.h +164 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.cpp +65 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.h +105 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.cpp +91 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.h +51 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_token_generator.h +162 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/util.h +108 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp +193 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h +64 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h +202 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp +313 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h +57 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp +78 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h +23 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp +427 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h +87 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h +76 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj +683 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/xcuserdata/norbertklockiewicz.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/xcuserdata/norbertklockiewicz.xcuserdatad/xcschemes/xcschememanagement.plist +14 -0
- package/third-party/ios/ExecutorchLib/build.sh +44 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/Info.plist +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64/libbackend_coreml_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64-simulator/libbackend_coreml_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/Info.plist +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64/libbackend_mps_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64-simulator/libbackend_mps_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/Info.plist +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64/libbackend_xnnpack_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64-simulator/libbackend_xnnpack_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/Info.plist +47 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Export.h +163 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Macros.h +497 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-inl.h +342 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-math.h +266 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16.h +125 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half-inl.h +347 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half.h +416 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/TypeSafeSignMath.h +133 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/bit_cast.h +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/floating_point_utils.h +33 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/irange.h +107 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorch.h +13 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchError.h +16 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchLog.h +76 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchModule.h +286 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchTensor.h +742 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchValue.h +219 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/module/module.h +492 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor.h +13 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_accessor.h +190 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr.h +347 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_execution_context.h +71 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_init_context.h +72 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/interface.h +166 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/array_ref.h +235 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/data_loader.h +136 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/defines.h +20 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/error.h +229 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/evalue.h +521 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer.h +565 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks.h +323 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/freeable_buffer.h +107 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/hierarchical_allocator.h +107 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/memory_allocator.h +198 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/named_data_map.h +86 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16.h +27 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bits_types.h +83 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/complex.h +44 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/device.h +70 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/half.h +27 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/optional.h +36 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/qint_types.h +83 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar.h +110 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar_type.h +154 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/string_view.h +29 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor.h +142 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_options.h +60 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/result.h +258 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/span.h +93 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tag.h +71 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_layout.h +79 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/memory_manager.h +113 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method.h +387 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method_meta.h +251 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/program.h +320 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/pte_data_map.h +144 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/tensor_parser.h +156 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/operator_registry.h +278 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/abort.h +36 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/assert.h +119 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/clock.h +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compat_unistd.h +75 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compiler.h +191 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/log.h +177 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/platform.h +133 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/profiler.h +292 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/runtime.h +35 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/system.h +49 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/types.h +24 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/schema/extended_header.h +76 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/module.modulemap +5 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/libexecutorch_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Export.h +163 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Macros.h +497 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-inl.h +342 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-math.h +266 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16.h +125 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half-inl.h +347 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half.h +416 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/TypeSafeSignMath.h +133 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/bit_cast.h +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/floating_point_utils.h +33 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/irange.h +107 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorch.h +13 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchError.h +16 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchLog.h +76 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchModule.h +286 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchTensor.h +742 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchValue.h +219 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/module/module.h +492 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor.h +13 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_accessor.h +190 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr.h +347 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_execution_context.h +71 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_init_context.h +72 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/interface.h +166 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/array_ref.h +235 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/data_loader.h +136 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/defines.h +20 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/error.h +229 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/evalue.h +521 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer.h +565 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks.h +323 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/freeable_buffer.h +107 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/hierarchical_allocator.h +107 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/memory_allocator.h +198 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/named_data_map.h +86 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16.h +27 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bits_types.h +83 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/complex.h +44 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/device.h +70 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/half.h +27 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/optional.h +36 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/qint_types.h +83 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar.h +110 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar_type.h +154 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/string_view.h +29 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor.h +142 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_options.h +60 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/result.h +258 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/span.h +93 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tag.h +71 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_layout.h +79 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/memory_manager.h +113 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method.h +387 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method_meta.h +251 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/program.h +320 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/pte_data_map.h +144 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/tensor_parser.h +156 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/operator_registry.h +278 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/abort.h +36 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/assert.h +119 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/clock.h +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compat_unistd.h +75 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compiler.h +191 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/log.h +177 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/platform.h +133 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/profiler.h +292 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/runtime.h +35 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/system.h +49 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/types.h +24 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/schema/extended_header.h +76 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/module.modulemap +5 -0
- package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/libexecutorch_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/Info.plist +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64/libkernels_custom_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64-simulator/libkernels_custom_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/Info.plist +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64/libkernels_optimized_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64-simulator/libkernels_optimized_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/Info.plist +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64/libkernels_portable_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64-simulator/libkernels_portable_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/Info.plist +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64/libkernels_quantized_ios.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64-simulator/libkernels_quantized_simulator.a +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/Info.plist +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/bitmap256.h +82 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/filtered_re2.h +111 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/pod_array.h +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter.h +130 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter_tree.h +139 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prog.h +483 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/re2.h +994 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/regexp.h +692 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/set.h +85 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_array.h +367 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_set.h +241 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/stringpiece.h +205 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_casefold.h +78 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_groups.h +64 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/walker-inl.h +235 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Info.plist +26 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/re2 +0 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/bitmap256.h +82 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/filtered_re2.h +111 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/pod_array.h +43 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter.h +130 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter_tree.h +139 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prog.h +483 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/re2.h +994 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/regexp.h +692 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/set.h +85 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_array.h +367 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_set.h +241 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/stringpiece.h +205 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_casefold.h +78 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_groups.h +64 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/walker-inl.h +235 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Info.plist +26 -0
- package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/re2 +0 -0
- package/third-party/ios/ios.toolchain.cmake +1122 -0
- package/LICENSE +0 -79
- package/README.md +0 -148
- package/android/src/main/java/com/swmansion/rnexecutorch/Classification.kt +0 -64
- package/android/src/main/java/com/swmansion/rnexecutorch/ETModule.kt +0 -90
- package/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt +0 -58
- package/android/src/main/java/com/swmansion/rnexecutorch/OCR.kt +0 -90
- package/android/src/main/java/com/swmansion/rnexecutorch/ObjectDetection.kt +0 -64
- package/android/src/main/java/com/swmansion/rnexecutorch/SpeechToText.kt +0 -91
- package/android/src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt +0 -54
- package/android/src/main/java/com/swmansion/rnexecutorch/TextEmbeddings.kt +0 -51
- package/android/src/main/java/com/swmansion/rnexecutorch/Tokenizer.kt +0 -86
- package/android/src/main/java/com/swmansion/rnexecutorch/VerticalOCR.kt +0 -179
- package/android/src/main/java/com/swmansion/rnexecutorch/models/BaseModel.kt +0 -54
- package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsModel.kt +0 -48
- package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsUtils.kt +0 -37
- package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt +0 -46
- package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/Constants.kt +0 -1005
- package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt +0 -26
- package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt +0 -142
- package/android/src/main/java/com/swmansion/rnexecutorch/models/objectDetection/SSDLiteLargeModel.kt +0 -74
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Detector.kt +0 -82
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/RecognitionHandler.kt +0 -117
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Recognizer.kt +0 -51
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/VerticalDetector.kt +0 -89
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/CTCLabelConverter.kt +0 -58
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/Constants.kt +0 -31
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/DetectorUtils.kt +0 -608
- package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/RecognizerUtils.kt +0 -430
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TDecoder.kt +0 -39
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TModule.kt +0 -43
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Moonshine.kt +0 -16
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineDecoder.kt +0 -23
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineEncoder.kt +0 -20
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Whisper.kt +0 -16
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperDecoder.kt +0 -22
- package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperEncoder.kt +0 -29
- package/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt +0 -43
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ArrayUtils.kt +0 -87
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ETError.kt +0 -34
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ImageProcessor.kt +0 -237
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/Numerical.kt +0 -8
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/ObjectDetectionUtils.kt +0 -201
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/STFT.kt +0 -50
- package/android/src/main/java/com/swmansion/rnexecutorch/utils/TensorUtils.kt +0 -103
- package/ios/RnExecutorch/Classification.h +0 -5
- package/ios/RnExecutorch/Classification.mm +0 -54
- package/ios/RnExecutorch/ETModule.h +0 -5
- package/ios/RnExecutorch/ETModule.mm +0 -75
- package/ios/RnExecutorch/ImageSegmentation.h +0 -5
- package/ios/RnExecutorch/ImageSegmentation.mm +0 -60
- package/ios/RnExecutorch/OCR.h +0 -5
- package/ios/RnExecutorch/OCR.mm +0 -96
- package/ios/RnExecutorch/ObjectDetection.h +0 -5
- package/ios/RnExecutorch/ObjectDetection.mm +0 -56
- package/ios/RnExecutorch/SpeechToText.h +0 -5
- package/ios/RnExecutorch/SpeechToText.mm +0 -125
- package/ios/RnExecutorch/StyleTransfer.h +0 -5
- package/ios/RnExecutorch/StyleTransfer.mm +0 -55
- package/ios/RnExecutorch/TextEmbeddings.h +0 -5
- package/ios/RnExecutorch/TextEmbeddings.mm +0 -62
- package/ios/RnExecutorch/Tokenizer.h +0 -5
- package/ios/RnExecutorch/Tokenizer.mm +0 -83
- package/ios/RnExecutorch/VerticalOCR.h +0 -5
- package/ios/RnExecutorch/VerticalOCR.mm +0 -183
- package/ios/RnExecutorch/models/BaseModel.h +0 -21
- package/ios/RnExecutorch/models/BaseModel.mm +0 -43
- package/ios/RnExecutorch/models/classification/ClassificationModel.h +0 -10
- package/ios/RnExecutorch/models/classification/ClassificationModel.mm +0 -53
- package/ios/RnExecutorch/models/classification/Constants.h +0 -3
- package/ios/RnExecutorch/models/image_segmentation/Constants.h +0 -4
- package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +0 -10
- package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +0 -146
- package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.hpp +0 -11
- package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.mm +0 -64
- package/ios/RnExecutorch/models/ocr/Detector.h +0 -9
- package/ios/RnExecutorch/models/ocr/Detector.mm +0 -101
- package/ios/RnExecutorch/models/ocr/RecognitionHandler.h +0 -16
- package/ios/RnExecutorch/models/ocr/RecognitionHandler.mm +0 -135
- package/ios/RnExecutorch/models/ocr/Recognizer.h +0 -8
- package/ios/RnExecutorch/models/ocr/Recognizer.mm +0 -77
- package/ios/RnExecutorch/models/ocr/VerticalDetector.h +0 -10
- package/ios/RnExecutorch/models/ocr/VerticalDetector.mm +0 -118
- package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.h +0 -16
- package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.mm +0 -80
- package/ios/RnExecutorch/models/ocr/utils/Constants.h +0 -26
- package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.h +0 -31
- package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.mm +0 -754
- package/ios/RnExecutorch/models/ocr/utils/OCRUtils.h +0 -10
- package/ios/RnExecutorch/models/ocr/utils/OCRUtils.mm +0 -67
- package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.h +0 -35
- package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.mm +0 -331
- package/ios/RnExecutorch/models/stt/Moonshine.hpp +0 -13
- package/ios/RnExecutorch/models/stt/Moonshine.mm +0 -64
- package/ios/RnExecutorch/models/stt/MoonshineDecoder.hpp +0 -16
- package/ios/RnExecutorch/models/stt/MoonshineDecoder.mm +0 -24
- package/ios/RnExecutorch/models/stt/MoonshineEncoder.hpp +0 -15
- package/ios/RnExecutorch/models/stt/MoonshineEncoder.mm +0 -18
- package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.hpp +0 -26
- package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.mm +0 -19
- package/ios/RnExecutorch/models/stt/Whisper.hpp +0 -12
- package/ios/RnExecutorch/models/stt/Whisper.mm +0 -68
- package/ios/RnExecutorch/models/stt/WhisperDecoder.hpp +0 -16
- package/ios/RnExecutorch/models/stt/WhisperDecoder.mm +0 -22
- package/ios/RnExecutorch/models/stt/WhisperEncoder.hpp +0 -15
- package/ios/RnExecutorch/models/stt/WhisperEncoder.mm +0 -21
- package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h +0 -11
- package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm +0 -50
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.h +0 -15
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.mm +0 -45
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.h +0 -8
- package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.mm +0 -49
- package/ios/RnExecutorch/utils/Constants.h +0 -8
- package/ios/RnExecutorch/utils/ObjectDetectionUtils.hpp +0 -23
- package/ios/RnExecutorch/utils/SFFT.hpp +0 -13
- package/ios/RnExecutorch/utils/SFFT.mm +0 -71
- package/lib/module/native/NativeClassification.js +0 -5
- package/lib/module/native/NativeClassification.js.map +0 -1
- package/lib/module/native/NativeETModule.js +0 -5
- package/lib/module/native/NativeETModule.js.map +0 -1
- package/lib/module/native/NativeImageSegmentation.js +0 -5
- package/lib/module/native/NativeImageSegmentation.js.map +0 -1
- package/lib/module/native/NativeOCR.js +0 -5
- package/lib/module/native/NativeOCR.js.map +0 -1
- package/lib/module/native/NativeObjectDetection.js +0 -5
- package/lib/module/native/NativeObjectDetection.js.map +0 -1
- package/lib/module/native/NativeSpeechToText.js +0 -5
- package/lib/module/native/NativeSpeechToText.js.map +0 -1
- package/lib/module/native/NativeStyleTransfer.js +0 -5
- package/lib/module/native/NativeStyleTransfer.js.map +0 -1
- package/lib/module/native/NativeTextEmbeddings.js +0 -5
- package/lib/module/native/NativeTextEmbeddings.js.map +0 -1
- package/lib/module/native/NativeTokenizer.js +0 -5
- package/lib/module/native/NativeTokenizer.js.map +0 -1
- package/lib/module/native/NativeVerticalOCR.js +0 -5
- package/lib/module/native/NativeVerticalOCR.js.map +0 -1
- package/lib/module/package.json +0 -1
- package/lib/typescript/native/NativeClassification.d.ts +0 -10
- package/lib/typescript/native/NativeClassification.d.ts.map +0 -1
- package/lib/typescript/native/NativeETModule.d.ts +0 -9
- package/lib/typescript/native/NativeETModule.d.ts.map +0 -1
- package/lib/typescript/native/NativeImageSegmentation.d.ts +0 -10
- package/lib/typescript/native/NativeImageSegmentation.d.ts.map +0 -1
- package/lib/typescript/native/NativeOCR.d.ts +0 -9
- package/lib/typescript/native/NativeOCR.d.ts.map +0 -1
- package/lib/typescript/native/NativeObjectDetection.d.ts +0 -9
- package/lib/typescript/native/NativeObjectDetection.d.ts.map +0 -1
- package/lib/typescript/native/NativeSpeechToText.d.ts +0 -12
- package/lib/typescript/native/NativeSpeechToText.d.ts.map +0 -1
- package/lib/typescript/native/NativeStyleTransfer.d.ts.map +0 -1
- package/lib/typescript/native/NativeTextEmbeddings.d.ts +0 -8
- package/lib/typescript/native/NativeTextEmbeddings.d.ts.map +0 -1
- package/lib/typescript/native/NativeTokenizer.d.ts +0 -12
- package/lib/typescript/native/NativeTokenizer.d.ts.map +0 -1
- package/lib/typescript/native/NativeVerticalOCR.d.ts +0 -9
- package/lib/typescript/native/NativeVerticalOCR.d.ts.map +0 -1
- package/src/native/NativeClassification.ts +0 -9
- package/src/native/NativeETModule.ts +0 -14
- package/src/native/NativeImageSegmentation.ts +0 -14
- package/src/native/NativeOCR.ts +0 -16
- package/src/native/NativeObjectDetection.ts +0 -10
- package/src/native/NativeSpeechToText.ts +0 -17
- package/src/native/NativeStyleTransfer.ts +0 -10
- package/src/native/NativeTextEmbeddings.ts +0 -9
- package/src/native/NativeTokenizer.ts +0 -13
- package/src/native/NativeVerticalOCR.ts +0 -16
|
@@ -0,0 +1,2205 @@
|
|
|
1
|
+
/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com )
|
|
2
|
+
|
|
3
|
+
Based on original fortran 77 code from FFTPACKv4 from NETLIB
|
|
4
|
+
(http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber
|
|
5
|
+
of NCAR, in 1985.
|
|
6
|
+
|
|
7
|
+
As confirmed by the NCAR fftpack software curators, the following
|
|
8
|
+
FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
|
|
9
|
+
released under the same terms.
|
|
10
|
+
|
|
11
|
+
FFTPACK license:
|
|
12
|
+
|
|
13
|
+
http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
|
|
14
|
+
|
|
15
|
+
Copyright (c) 2004 the University Corporation for Atmospheric
|
|
16
|
+
Research ("UCAR"). All rights reserved. Developed by NCAR's
|
|
17
|
+
Computational and Information Systems Laboratory, UCAR,
|
|
18
|
+
www.cisl.ucar.edu.
|
|
19
|
+
|
|
20
|
+
Redistribution and use of the Software in source and binary forms,
|
|
21
|
+
with or without modification, is permitted provided that the
|
|
22
|
+
following conditions are met:
|
|
23
|
+
|
|
24
|
+
- Neither the names of NCAR's Computational and Information Systems
|
|
25
|
+
Laboratory, the University Corporation for Atmospheric Research,
|
|
26
|
+
nor the names of its sponsors or contributors may be used to
|
|
27
|
+
endorse or promote products derived from this Software without
|
|
28
|
+
specific prior written permission.
|
|
29
|
+
|
|
30
|
+
- Redistributions of source code must retain the above copyright
|
|
31
|
+
notices, this list of conditions, and the disclaimer below.
|
|
32
|
+
|
|
33
|
+
- Redistributions in binary form must reproduce the above copyright
|
|
34
|
+
notice, this list of conditions, and the disclaimer below in the
|
|
35
|
+
documentation and/or other materials provided with the
|
|
36
|
+
distribution.
|
|
37
|
+
|
|
38
|
+
THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
39
|
+
EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
|
|
40
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
41
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
|
|
42
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
|
|
43
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
44
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
45
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
|
46
|
+
SOFTWARE.
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
PFFFT : a Pretty Fast FFT.
|
|
50
|
+
|
|
51
|
+
This file is largerly based on the original FFTPACK implementation, modified
|
|
52
|
+
in order to take advantage of SIMD instructions of modern CPUs.
|
|
53
|
+
*/
|
|
54
|
+
|
|
55
|
+
/*
|
|
56
|
+
ChangeLog:
|
|
57
|
+
- 2011/10/02, version 1: This is the very first release of this file.
|
|
58
|
+
*/
|
|
59
|
+
|
|
60
|
+
#ifndef _USE_MATH_DEFINES
|
|
61
|
+
#define _USE_MATH_DEFINES // ask gently MSVC to define M_PI, M_SQRT2 etc.
|
|
62
|
+
#endif
|
|
63
|
+
|
|
64
|
+
#include <assert.h>
|
|
65
|
+
#include <math.h>
|
|
66
|
+
#include <pfft/pfft.h>
|
|
67
|
+
#include <stdio.h>
|
|
68
|
+
#include <stdlib.h>
|
|
69
|
+
|
|
70
|
+
/* detect compiler flavour */
|
|
71
|
+
#if defined(_MSC_VER)
|
|
72
|
+
#define COMPILER_MSVC
|
|
73
|
+
#elif defined(__GNUC__)
|
|
74
|
+
#define COMPILER_GCC
|
|
75
|
+
#endif
|
|
76
|
+
|
|
77
|
+
#if defined(COMPILER_GCC)
|
|
78
|
+
#define ALWAYS_INLINE(return_type) \
|
|
79
|
+
inline return_type __attribute__((always_inline))
|
|
80
|
+
#define NEVER_INLINE(return_type) return_type __attribute__((noinline))
|
|
81
|
+
#define RESTRICT __restrict
|
|
82
|
+
#define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__];
|
|
83
|
+
#elif defined(COMPILER_MSVC)
|
|
84
|
+
#define ALWAYS_INLINE(return_type) __forceinline return_type
|
|
85
|
+
#define NEVER_INLINE(return_type) __declspec(noinline) return_type
|
|
86
|
+
#define RESTRICT __restrict
|
|
87
|
+
#define VLA_ARRAY_ON_STACK(type__, varname__, size__) \
|
|
88
|
+
type__ *varname__ = (type__ *)_alloca(size__ * sizeof(type__))
|
|
89
|
+
#endif
|
|
90
|
+
|
|
91
|
+
/*
|
|
92
|
+
vector support macros: the rest of the code is independant of
|
|
93
|
+
SSE/Altivec/NEON -- adding support for other platforms with 4-element
|
|
94
|
+
vectors should be limited to these macros
|
|
95
|
+
*/
|
|
96
|
+
|
|
97
|
+
// define PFFFT_SIMD_DISABLE if you want to use scalar code instead of simd code
|
|
98
|
+
// #define PFFFT_SIMD_DISABLE
|
|
99
|
+
|
|
100
|
+
/* select which SIMD intrinsics will be used */
|
|
101
|
+
#if !defined(PFFFT_SIMD_DISABLE)
|
|
102
|
+
#if (defined(__ppc__) || defined(__ppc64__) || defined(__powerpc__) || \
|
|
103
|
+
defined(__powerpc64__)) && \
|
|
104
|
+
(defined(__VEC__) || defined(__ALTIVEC__))
|
|
105
|
+
#define PFFFT_SIMD_ALTIVEC
|
|
106
|
+
#elif defined(__ARM_NEON) || defined(__aarch64__) || defined(__arm64) || \
|
|
107
|
+
defined(_M_ARM64) || defined(_M_ARM64EC) || defined(__wasm_simd128__)
|
|
108
|
+
// we test _M_ARM64EC before _M_X64 because when _M_ARM64EC is defined, the
|
|
109
|
+
// microsoft compiler also defines _M_X64
|
|
110
|
+
#define PFFFT_SIMD_NEON
|
|
111
|
+
#elif defined(__x86_64__) || defined(__SSE__) || defined(_M_X64) || \
|
|
112
|
+
(defined(_M_IX86_FP) && _M_IX86_FP >= 1)
|
|
113
|
+
#define PFFFT_SIMD_SSE
|
|
114
|
+
#endif
|
|
115
|
+
#endif // PFFFT_SIMD_DISABLE
|
|
116
|
+
|
|
117
|
+
/*
|
|
118
|
+
Altivec support macros
|
|
119
|
+
*/
|
|
120
|
+
#ifdef PFFFT_SIMD_ALTIVEC
|
|
121
|
+
#include <altivec.h>
|
|
122
|
+
typedef vector float v4sf;
|
|
123
|
+
#define SIMD_SZ 4
|
|
124
|
+
#define VZERO() ((vector float)vec_splat_u8(0))
|
|
125
|
+
#define VMUL(a, b) vec_madd(a, b, VZERO())
|
|
126
|
+
#define VADD(a, b) vec_add(a, b)
|
|
127
|
+
#define VMADD(a, b, c) vec_madd(a, b, c)
|
|
128
|
+
#define VSUB(a, b) vec_sub(a, b)
|
|
129
|
+
inline v4sf ld_ps1(const float *p) {
|
|
130
|
+
v4sf v = vec_lde(0, p);
|
|
131
|
+
return vec_splat(vec_perm(v, v, vec_lvsl(0, p)), 0);
|
|
132
|
+
}
|
|
133
|
+
#define LD_PS1(p) ld_ps1(&p)
|
|
134
|
+
#define INTERLEAVE2(in1, in2, out1, out2) \
|
|
135
|
+
{ \
|
|
136
|
+
v4sf tmp__ = vec_mergeh(in1, in2); \
|
|
137
|
+
out2 = vec_mergel(in1, in2); \
|
|
138
|
+
out1 = tmp__; \
|
|
139
|
+
}
|
|
140
|
+
#define UNINTERLEAVE2(in1, in2, out1, out2) \
|
|
141
|
+
{ \
|
|
142
|
+
vector unsigned char vperm1 = (vector unsigned char){ \
|
|
143
|
+
0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}; \
|
|
144
|
+
vector unsigned char vperm2 = (vector unsigned char){ \
|
|
145
|
+
4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}; \
|
|
146
|
+
v4sf tmp__ = vec_perm(in1, in2, vperm1); \
|
|
147
|
+
out2 = vec_perm(in1, in2, vperm2); \
|
|
148
|
+
out1 = tmp__; \
|
|
149
|
+
}
|
|
150
|
+
#define VTRANSPOSE4(x0, x1, x2, x3) \
|
|
151
|
+
{ \
|
|
152
|
+
v4sf y0 = vec_mergeh(x0, x2); \
|
|
153
|
+
v4sf y1 = vec_mergel(x0, x2); \
|
|
154
|
+
v4sf y2 = vec_mergeh(x1, x3); \
|
|
155
|
+
v4sf y3 = vec_mergel(x1, x3); \
|
|
156
|
+
x0 = vec_mergeh(y0, y2); \
|
|
157
|
+
x1 = vec_mergel(y0, y2); \
|
|
158
|
+
x2 = vec_mergeh(y1, y3); \
|
|
159
|
+
x3 = vec_mergel(y1, y3); \
|
|
160
|
+
}
|
|
161
|
+
#define VSWAPHL(a, b) \
|
|
162
|
+
vec_perm(a, b, \
|
|
163
|
+
(vector unsigned char){16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, \
|
|
164
|
+
11, 12, 13, 14, 15})
|
|
165
|
+
#define VALIGNED(ptr) ((((size_t)(ptr)) & 0xF) == 0)
|
|
166
|
+
|
|
167
|
+
/*
|
|
168
|
+
SSE1 support macros
|
|
169
|
+
*/
|
|
170
|
+
#elif defined(PFFFT_SIMD_SSE)
|
|
171
|
+
|
|
172
|
+
#include <xmmintrin.h>
|
|
173
|
+
typedef __m128 v4sf;
|
|
174
|
+
#define SIMD_SZ \
|
|
175
|
+
4 // 4 floats by simd vector -- this is pretty much hardcoded in the
|
|
176
|
+
// preprocess/finalize functions anyway so you will have to work if you want
|
|
177
|
+
// to enable AVX with its 256-bit vectors.
|
|
178
|
+
#define VZERO() _mm_setzero_ps()
|
|
179
|
+
#define VMUL(a, b) _mm_mul_ps(a, b)
|
|
180
|
+
#define VADD(a, b) _mm_add_ps(a, b)
|
|
181
|
+
#define VMADD(a, b, c) _mm_add_ps(_mm_mul_ps(a, b), c)
|
|
182
|
+
#define VSUB(a, b) _mm_sub_ps(a, b)
|
|
183
|
+
#define LD_PS1(p) _mm_set1_ps(p)
|
|
184
|
+
#define INTERLEAVE2(in1, in2, out1, out2) \
|
|
185
|
+
{ \
|
|
186
|
+
v4sf tmp__ = _mm_unpacklo_ps(in1, in2); \
|
|
187
|
+
out2 = _mm_unpackhi_ps(in1, in2); \
|
|
188
|
+
out1 = tmp__; \
|
|
189
|
+
}
|
|
190
|
+
#define UNINTERLEAVE2(in1, in2, out1, out2) \
|
|
191
|
+
{ \
|
|
192
|
+
v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2, 0, 2, 0)); \
|
|
193
|
+
out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3, 1, 3, 1)); \
|
|
194
|
+
out1 = tmp__; \
|
|
195
|
+
}
|
|
196
|
+
#define VTRANSPOSE4(x0, x1, x2, x3) _MM_TRANSPOSE4_PS(x0, x1, x2, x3)
|
|
197
|
+
#define VSWAPHL(a, b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3, 2, 1, 0))
|
|
198
|
+
#define VALIGNED(ptr) ((((size_t)(ptr)) & 0xF) == 0)
|
|
199
|
+
|
|
200
|
+
/*
|
|
201
|
+
ARM NEON support macros
|
|
202
|
+
*/
|
|
203
|
+
#elif defined(PFFFT_SIMD_NEON)
|
|
204
|
+
#include <arm_neon.h>
|
|
205
|
+
typedef float32x4_t v4sf;
|
|
206
|
+
#define SIMD_SZ 4
|
|
207
|
+
#define VZERO() vdupq_n_f32(0)
|
|
208
|
+
#define VMUL(a, b) vmulq_f32(a, b)
|
|
209
|
+
#define VADD(a, b) vaddq_f32(a, b)
|
|
210
|
+
#define VMADD(a, b, c) vmlaq_f32(c, a, b)
|
|
211
|
+
#define VSUB(a, b) vsubq_f32(a, b)
|
|
212
|
+
#define LD_PS1(p) vld1q_dup_f32(&(p))
|
|
213
|
+
#define INTERLEAVE2(in1, in2, out1, out2) \
|
|
214
|
+
{ \
|
|
215
|
+
float32x4x2_t tmp__ = vzipq_f32(in1, in2); \
|
|
216
|
+
out1 = tmp__.val[0]; \
|
|
217
|
+
out2 = tmp__.val[1]; \
|
|
218
|
+
}
|
|
219
|
+
#define UNINTERLEAVE2(in1, in2, out1, out2) \
|
|
220
|
+
{ \
|
|
221
|
+
float32x4x2_t tmp__ = vuzpq_f32(in1, in2); \
|
|
222
|
+
out1 = tmp__.val[0]; \
|
|
223
|
+
out2 = tmp__.val[1]; \
|
|
224
|
+
}
|
|
225
|
+
#define VTRANSPOSE4(x0, x1, x2, x3) \
|
|
226
|
+
{ \
|
|
227
|
+
float32x4x2_t t0_ = vzipq_f32(x0, x2); \
|
|
228
|
+
float32x4x2_t t1_ = vzipq_f32(x1, x3); \
|
|
229
|
+
float32x4x2_t u0_ = vzipq_f32(t0_.val[0], t1_.val[0]); \
|
|
230
|
+
float32x4x2_t u1_ = vzipq_f32(t0_.val[1], t1_.val[1]); \
|
|
231
|
+
x0 = u0_.val[0]; \
|
|
232
|
+
x1 = u0_.val[1]; \
|
|
233
|
+
x2 = u1_.val[0]; \
|
|
234
|
+
x3 = u1_.val[1]; \
|
|
235
|
+
}
|
|
236
|
+
// marginally faster version
|
|
237
|
+
// # define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32
|
|
238
|
+
// %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2),
|
|
239
|
+
// "+w"(x3)::); }
|
|
240
|
+
#define VSWAPHL(a, b) vcombine_f32(vget_low_f32(b), vget_high_f32(a))
|
|
241
|
+
#define VALIGNED(ptr) ((((size_t)(ptr)) & 0x3) == 0)
|
|
242
|
+
#else
|
|
243
|
+
#if !defined(PFFFT_SIMD_DISABLE)
|
|
244
|
+
#warning "building with simd disabled !\n";
|
|
245
|
+
#define PFFFT_SIMD_DISABLE // fallback to scalar code
|
|
246
|
+
#endif
|
|
247
|
+
#endif
|
|
248
|
+
|
|
249
|
+
// fallback mode for situations where SSE/Altivec are not available, use scalar
|
|
250
|
+
// mode instead
|
|
251
|
+
#ifdef PFFFT_SIMD_DISABLE
|
|
252
|
+
typedef float v4sf;
|
|
253
|
+
#define SIMD_SZ 1
|
|
254
|
+
#define VZERO() 0.f
|
|
255
|
+
#define VMUL(a, b) ((a) * (b))
|
|
256
|
+
#define VADD(a, b) ((a) + (b))
|
|
257
|
+
#define VMADD(a, b, c) ((a) * (b) + (c))
|
|
258
|
+
#define VSUB(a, b) ((a) - (b))
|
|
259
|
+
#define LD_PS1(p) (p)
|
|
260
|
+
#define VALIGNED(ptr) ((((size_t)(ptr)) & 0x3) == 0)
|
|
261
|
+
#endif
|
|
262
|
+
|
|
263
|
+
// shortcuts for complex multiplcations
|
|
264
|
+
#define VCPLXMUL(ar, ai, br, bi) \
|
|
265
|
+
{ \
|
|
266
|
+
v4sf tmp; \
|
|
267
|
+
tmp = VMUL(ar, bi); \
|
|
268
|
+
ar = VMUL(ar, br); \
|
|
269
|
+
ar = VSUB(ar, VMUL(ai, bi)); \
|
|
270
|
+
ai = VMUL(ai, br); \
|
|
271
|
+
ai = VADD(ai, tmp); \
|
|
272
|
+
}
|
|
273
|
+
#define VCPLXMULCONJ(ar, ai, br, bi) \
|
|
274
|
+
{ \
|
|
275
|
+
v4sf tmp; \
|
|
276
|
+
tmp = VMUL(ar, bi); \
|
|
277
|
+
ar = VMUL(ar, br); \
|
|
278
|
+
ar = VADD(ar, VMUL(ai, bi)); \
|
|
279
|
+
ai = VMUL(ai, br); \
|
|
280
|
+
ai = VSUB(ai, tmp); \
|
|
281
|
+
}
|
|
282
|
+
#ifndef SVMUL
|
|
283
|
+
// multiply a scalar with a vector
|
|
284
|
+
#define SVMUL(f, v) VMUL(LD_PS1(f), v)
|
|
285
|
+
#endif
|
|
286
|
+
|
|
287
|
+
#if !defined(PFFFT_SIMD_DISABLE)
|
|
288
|
+
typedef union v4sf_union {
|
|
289
|
+
v4sf v;
|
|
290
|
+
float f[4];
|
|
291
|
+
} v4sf_union;
|
|
292
|
+
|
|
293
|
+
#include <string.h>
|
|
294
|
+
|
|
295
|
+
#define assertv4(v, f0, f1, f2, f3) \
|
|
296
|
+
assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3))
|
|
297
|
+
|
|
298
|
+
/* detect bugs with the vector support macros */
|
|
299
|
+
void validate_pffft_simd(void) {
|
|
300
|
+
float f[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
|
301
|
+
v4sf_union a0, a1, a2, a3, t, u;
|
|
302
|
+
memcpy(a0.f, f, 4 * sizeof(float));
|
|
303
|
+
memcpy(a1.f, f + 4, 4 * sizeof(float));
|
|
304
|
+
memcpy(a2.f, f + 8, 4 * sizeof(float));
|
|
305
|
+
memcpy(a3.f, f + 12, 4 * sizeof(float));
|
|
306
|
+
|
|
307
|
+
t = a0;
|
|
308
|
+
u = a1;
|
|
309
|
+
t.v = VZERO();
|
|
310
|
+
printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
|
311
|
+
assertv4(t, 0, 0, 0, 0);
|
|
312
|
+
t.v = VADD(a1.v, a2.v);
|
|
313
|
+
printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
|
314
|
+
assertv4(t, 12, 14, 16, 18);
|
|
315
|
+
t.v = VMUL(a1.v, a2.v);
|
|
316
|
+
printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
|
317
|
+
assertv4(t, 32, 45, 60, 77);
|
|
318
|
+
t.v = VMADD(a1.v, a2.v, a0.v);
|
|
319
|
+
printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2],
|
|
320
|
+
t.f[3]);
|
|
321
|
+
assertv4(t, 32, 46, 62, 80);
|
|
322
|
+
|
|
323
|
+
INTERLEAVE2(a1.v, a2.v, t.v, u.v);
|
|
324
|
+
printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0],
|
|
325
|
+
t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
|
|
326
|
+
assertv4(t, 4, 8, 5, 9);
|
|
327
|
+
assertv4(u, 6, 10, 7, 11);
|
|
328
|
+
UNINTERLEAVE2(a1.v, a2.v, t.v, u.v);
|
|
329
|
+
printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
|
|
330
|
+
t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
|
|
331
|
+
assertv4(t, 4, 6, 8, 10);
|
|
332
|
+
assertv4(u, 5, 7, 9, 11);
|
|
333
|
+
|
|
334
|
+
t.v = LD_PS1(f[15]);
|
|
335
|
+
printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
|
336
|
+
assertv4(t, 15, 15, 15, 15);
|
|
337
|
+
t.v = VSWAPHL(a1.v, a2.v);
|
|
338
|
+
printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2],
|
|
339
|
+
t.f[3]);
|
|
340
|
+
assertv4(t, 8, 9, 6, 7);
|
|
341
|
+
VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
|
|
342
|
+
printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] "
|
|
343
|
+
"[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
|
|
344
|
+
a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
|
|
345
|
+
a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2],
|
|
346
|
+
a3.f[3]);
|
|
347
|
+
assertv4(a0, 0, 4, 8, 12);
|
|
348
|
+
assertv4(a1, 1, 5, 9, 13);
|
|
349
|
+
assertv4(a2, 2, 6, 10, 14);
|
|
350
|
+
assertv4(a3, 3, 7, 11, 15);
|
|
351
|
+
}
|
|
352
|
+
#else
|
|
353
|
+
void validate_pffft_simd() {
|
|
354
|
+
} // allow test_pffft.c to call this function even when simd is not available..
|
|
355
|
+
#endif //! PFFFT_SIMD_DISABLE
|
|
356
|
+
|
|
357
|
+
/* SSE and co like 16-bytes aligned pointers */
|
|
358
|
+
#define MALLOC_V4SF_ALIGNMENT \
|
|
359
|
+
64 // with a 64-byte alignment, we are even aligned on L2 cache lines...
|
|
360
|
+
void *pffft_aligned_malloc(size_t nb_bytes) {
|
|
361
|
+
void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
|
|
362
|
+
if (!p0)
|
|
363
|
+
return (void *)0;
|
|
364
|
+
p = (void *)(((size_t)p0 + MALLOC_V4SF_ALIGNMENT) &
|
|
365
|
+
(~((size_t)(MALLOC_V4SF_ALIGNMENT - 1))));
|
|
366
|
+
*((void **)p - 1) = p0;
|
|
367
|
+
return p;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
void pffft_aligned_free(void *p) {
|
|
371
|
+
if (p)
|
|
372
|
+
free(*((void **)p - 1));
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
int pffft_simd_size(void) { return SIMD_SZ; }
|
|
376
|
+
|
|
377
|
+
/*
|
|
378
|
+
passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2
|
|
379
|
+
*/
|
|
380
|
+
static NEVER_INLINE(void) passf2_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
|
|
381
|
+
const float *wa1, float fsign) {
|
|
382
|
+
int k, i;
|
|
383
|
+
int l1ido = l1 * ido;
|
|
384
|
+
if (ido <= 2) {
|
|
385
|
+
for (k = 0; k < l1ido; k += ido, ch += ido, cc += 2 * ido) {
|
|
386
|
+
ch[0] = VADD(cc[0], cc[ido + 0]);
|
|
387
|
+
ch[l1ido] = VSUB(cc[0], cc[ido + 0]);
|
|
388
|
+
ch[1] = VADD(cc[1], cc[ido + 1]);
|
|
389
|
+
ch[l1ido + 1] = VSUB(cc[1], cc[ido + 1]);
|
|
390
|
+
}
|
|
391
|
+
} else {
|
|
392
|
+
for (k = 0; k < l1ido; k += ido, ch += ido, cc += 2 * ido) {
|
|
393
|
+
for (i = 0; i < ido - 1; i += 2) {
|
|
394
|
+
v4sf tr2 = VSUB(cc[i + 0], cc[i + ido + 0]);
|
|
395
|
+
v4sf ti2 = VSUB(cc[i + 1], cc[i + ido + 1]);
|
|
396
|
+
v4sf wr = LD_PS1(wa1[i]), wi = VMUL(LD_PS1(fsign), LD_PS1(wa1[i + 1]));
|
|
397
|
+
ch[i] = VADD(cc[i + 0], cc[i + ido + 0]);
|
|
398
|
+
ch[i + 1] = VADD(cc[i + 1], cc[i + ido + 1]);
|
|
399
|
+
VCPLXMUL(tr2, ti2, wr, wi);
|
|
400
|
+
ch[i + l1ido] = tr2;
|
|
401
|
+
ch[i + l1ido + 1] = ti2;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/*
|
|
408
|
+
passf3 and passb3 has been merged here, fsign = -1 for passf3, +1 for passb3
|
|
409
|
+
*/
|
|
410
|
+
static NEVER_INLINE(void)
|
|
411
|
+
passf3_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1,
|
|
412
|
+
const float *wa2, float fsign) {
|
|
413
|
+
static const float taur = -0.5f;
|
|
414
|
+
float taui = 0.866025403784439f * fsign;
|
|
415
|
+
int i, k;
|
|
416
|
+
v4sf tr2, ti2, cr2, ci2, cr3, ci3, dr2, di2, dr3, di3;
|
|
417
|
+
int l1ido = l1 * ido;
|
|
418
|
+
float wr1, wi1, wr2, wi2;
|
|
419
|
+
assert(ido > 2);
|
|
420
|
+
for (k = 0; k < l1ido; k += ido, cc += 3 * ido, ch += ido) {
|
|
421
|
+
for (i = 0; i < ido - 1; i += 2) {
|
|
422
|
+
tr2 = VADD(cc[i + ido], cc[i + 2 * ido]);
|
|
423
|
+
cr2 = VADD(cc[i], SVMUL(taur, tr2));
|
|
424
|
+
ch[i] = VADD(cc[i], tr2);
|
|
425
|
+
ti2 = VADD(cc[i + ido + 1], cc[i + 2 * ido + 1]);
|
|
426
|
+
ci2 = VADD(cc[i + 1], SVMUL(taur, ti2));
|
|
427
|
+
ch[i + 1] = VADD(cc[i + 1], ti2);
|
|
428
|
+
cr3 = SVMUL(taui, VSUB(cc[i + ido], cc[i + 2 * ido]));
|
|
429
|
+
ci3 = SVMUL(taui, VSUB(cc[i + ido + 1], cc[i + 2 * ido + 1]));
|
|
430
|
+
dr2 = VSUB(cr2, ci3);
|
|
431
|
+
dr3 = VADD(cr2, ci3);
|
|
432
|
+
di2 = VADD(ci2, cr3);
|
|
433
|
+
di3 = VSUB(ci2, cr3);
|
|
434
|
+
wr1 = wa1[i];
|
|
435
|
+
wi1 = fsign * wa1[i + 1];
|
|
436
|
+
wr2 = wa2[i];
|
|
437
|
+
wi2 = fsign * wa2[i + 1];
|
|
438
|
+
VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1));
|
|
439
|
+
ch[i + l1ido] = dr2;
|
|
440
|
+
ch[i + l1ido + 1] = di2;
|
|
441
|
+
VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2));
|
|
442
|
+
ch[i + 2 * l1ido] = dr3;
|
|
443
|
+
ch[i + 2 * l1ido + 1] = di3;
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
} /* passf3 */
|
|
447
|
+
|
|
448
|
+
static NEVER_INLINE(void)
|
|
449
|
+
passf4_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1,
|
|
450
|
+
const float *wa2, const float *wa3, float fsign) {
|
|
451
|
+
/* isign == -1 for forward transform and +1 for backward transform */
|
|
452
|
+
|
|
453
|
+
int i, k;
|
|
454
|
+
v4sf ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
|
|
455
|
+
int l1ido = l1 * ido;
|
|
456
|
+
if (ido == 2) {
|
|
457
|
+
for (k = 0; k < l1ido; k += ido, ch += ido, cc += 4 * ido) {
|
|
458
|
+
tr1 = VSUB(cc[0], cc[2 * ido + 0]);
|
|
459
|
+
tr2 = VADD(cc[0], cc[2 * ido + 0]);
|
|
460
|
+
ti1 = VSUB(cc[1], cc[2 * ido + 1]);
|
|
461
|
+
ti2 = VADD(cc[1], cc[2 * ido + 1]);
|
|
462
|
+
ti4 = VMUL(VSUB(cc[1 * ido + 0], cc[3 * ido + 0]), LD_PS1(fsign));
|
|
463
|
+
tr4 = VMUL(VSUB(cc[3 * ido + 1], cc[1 * ido + 1]), LD_PS1(fsign));
|
|
464
|
+
tr3 = VADD(cc[ido + 0], cc[3 * ido + 0]);
|
|
465
|
+
ti3 = VADD(cc[ido + 1], cc[3 * ido + 1]);
|
|
466
|
+
|
|
467
|
+
ch[0 * l1ido + 0] = VADD(tr2, tr3);
|
|
468
|
+
ch[0 * l1ido + 1] = VADD(ti2, ti3);
|
|
469
|
+
ch[1 * l1ido + 0] = VADD(tr1, tr4);
|
|
470
|
+
ch[1 * l1ido + 1] = VADD(ti1, ti4);
|
|
471
|
+
ch[2 * l1ido + 0] = VSUB(tr2, tr3);
|
|
472
|
+
ch[2 * l1ido + 1] = VSUB(ti2, ti3);
|
|
473
|
+
ch[3 * l1ido + 0] = VSUB(tr1, tr4);
|
|
474
|
+
ch[3 * l1ido + 1] = VSUB(ti1, ti4);
|
|
475
|
+
}
|
|
476
|
+
} else {
|
|
477
|
+
for (k = 0; k < l1ido; k += ido, ch += ido, cc += 4 * ido) {
|
|
478
|
+
for (i = 0; i < ido - 1; i += 2) {
|
|
479
|
+
float wr1, wi1, wr2, wi2, wr3, wi3;
|
|
480
|
+
tr1 = VSUB(cc[i + 0], cc[i + 2 * ido + 0]);
|
|
481
|
+
tr2 = VADD(cc[i + 0], cc[i + 2 * ido + 0]);
|
|
482
|
+
ti1 = VSUB(cc[i + 1], cc[i + 2 * ido + 1]);
|
|
483
|
+
ti2 = VADD(cc[i + 1], cc[i + 2 * ido + 1]);
|
|
484
|
+
tr4 =
|
|
485
|
+
VMUL(VSUB(cc[i + 3 * ido + 1], cc[i + 1 * ido + 1]), LD_PS1(fsign));
|
|
486
|
+
ti4 =
|
|
487
|
+
VMUL(VSUB(cc[i + 1 * ido + 0], cc[i + 3 * ido + 0]), LD_PS1(fsign));
|
|
488
|
+
tr3 = VADD(cc[i + ido + 0], cc[i + 3 * ido + 0]);
|
|
489
|
+
ti3 = VADD(cc[i + ido + 1], cc[i + 3 * ido + 1]);
|
|
490
|
+
|
|
491
|
+
ch[i] = VADD(tr2, tr3);
|
|
492
|
+
cr3 = VSUB(tr2, tr3);
|
|
493
|
+
ch[i + 1] = VADD(ti2, ti3);
|
|
494
|
+
ci3 = VSUB(ti2, ti3);
|
|
495
|
+
|
|
496
|
+
cr2 = VADD(tr1, tr4);
|
|
497
|
+
cr4 = VSUB(tr1, tr4);
|
|
498
|
+
ci2 = VADD(ti1, ti4);
|
|
499
|
+
ci4 = VSUB(ti1, ti4);
|
|
500
|
+
wr1 = wa1[i];
|
|
501
|
+
wi1 = fsign * wa1[i + 1];
|
|
502
|
+
VCPLXMUL(cr2, ci2, LD_PS1(wr1), LD_PS1(wi1));
|
|
503
|
+
wr2 = wa2[i];
|
|
504
|
+
wi2 = fsign * wa2[i + 1];
|
|
505
|
+
ch[i + l1ido] = cr2;
|
|
506
|
+
ch[i + l1ido + 1] = ci2;
|
|
507
|
+
|
|
508
|
+
VCPLXMUL(cr3, ci3, LD_PS1(wr2), LD_PS1(wi2));
|
|
509
|
+
wr3 = wa3[i];
|
|
510
|
+
wi3 = fsign * wa3[i + 1];
|
|
511
|
+
ch[i + 2 * l1ido] = cr3;
|
|
512
|
+
ch[i + 2 * l1ido + 1] = ci3;
|
|
513
|
+
|
|
514
|
+
VCPLXMUL(cr4, ci4, LD_PS1(wr3), LD_PS1(wi3));
|
|
515
|
+
ch[i + 3 * l1ido] = cr4;
|
|
516
|
+
ch[i + 3 * l1ido + 1] = ci4;
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
} /* passf4 */
|
|
521
|
+
|
|
522
|
+
/*
|
|
523
|
+
passf5 and passb5 has been merged here, fsign = -1 for passf5, +1 for passb5
|
|
524
|
+
*/
|
|
525
|
+
static NEVER_INLINE(void)
|
|
526
|
+
passf5_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1,
|
|
527
|
+
const float *wa2, const float *wa3, const float *wa4,
|
|
528
|
+
float fsign) {
|
|
529
|
+
static const float tr11 = .309016994374947f;
|
|
530
|
+
const float ti11 = .951056516295154f * fsign;
|
|
531
|
+
static const float tr12 = -.809016994374947f;
|
|
532
|
+
const float ti12 = .587785252292473f * fsign;
|
|
533
|
+
|
|
534
|
+
/* Local variables */
|
|
535
|
+
int i, k;
|
|
536
|
+
v4sf ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
|
|
537
|
+
ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
|
|
538
|
+
|
|
539
|
+
float wr1, wi1, wr2, wi2, wr3, wi3, wr4, wi4;
|
|
540
|
+
|
|
541
|
+
#define cc_ref(a_1, a_2) cc[(a_2 - 1) * ido + a_1 + 1]
|
|
542
|
+
#define ch_ref(a_1, a_3) ch[(a_3 - 1) * l1 * ido + a_1 + 1]
|
|
543
|
+
|
|
544
|
+
assert(ido > 2);
|
|
545
|
+
for (k = 0; k < l1; ++k, cc += 5 * ido, ch += ido) {
|
|
546
|
+
for (i = 0; i < ido - 1; i += 2) {
|
|
547
|
+
ti5 = VSUB(cc_ref(i, 2), cc_ref(i, 5));
|
|
548
|
+
ti2 = VADD(cc_ref(i, 2), cc_ref(i, 5));
|
|
549
|
+
ti4 = VSUB(cc_ref(i, 3), cc_ref(i, 4));
|
|
550
|
+
ti3 = VADD(cc_ref(i, 3), cc_ref(i, 4));
|
|
551
|
+
tr5 = VSUB(cc_ref(i - 1, 2), cc_ref(i - 1, 5));
|
|
552
|
+
tr2 = VADD(cc_ref(i - 1, 2), cc_ref(i - 1, 5));
|
|
553
|
+
tr4 = VSUB(cc_ref(i - 1, 3), cc_ref(i - 1, 4));
|
|
554
|
+
tr3 = VADD(cc_ref(i - 1, 3), cc_ref(i - 1, 4));
|
|
555
|
+
ch_ref(i - 1, 1) = VADD(cc_ref(i - 1, 1), VADD(tr2, tr3));
|
|
556
|
+
ch_ref(i, 1) = VADD(cc_ref(i, 1), VADD(ti2, ti3));
|
|
557
|
+
cr2 = VADD(cc_ref(i - 1, 1), VADD(SVMUL(tr11, tr2), SVMUL(tr12, tr3)));
|
|
558
|
+
ci2 = VADD(cc_ref(i, 1), VADD(SVMUL(tr11, ti2), SVMUL(tr12, ti3)));
|
|
559
|
+
cr3 = VADD(cc_ref(i - 1, 1), VADD(SVMUL(tr12, tr2), SVMUL(tr11, tr3)));
|
|
560
|
+
ci3 = VADD(cc_ref(i, 1), VADD(SVMUL(tr12, ti2), SVMUL(tr11, ti3)));
|
|
561
|
+
cr5 = VADD(SVMUL(ti11, tr5), SVMUL(ti12, tr4));
|
|
562
|
+
ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4));
|
|
563
|
+
cr4 = VSUB(SVMUL(ti12, tr5), SVMUL(ti11, tr4));
|
|
564
|
+
ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4));
|
|
565
|
+
dr3 = VSUB(cr3, ci4);
|
|
566
|
+
dr4 = VADD(cr3, ci4);
|
|
567
|
+
di3 = VADD(ci3, cr4);
|
|
568
|
+
di4 = VSUB(ci3, cr4);
|
|
569
|
+
dr5 = VADD(cr2, ci5);
|
|
570
|
+
dr2 = VSUB(cr2, ci5);
|
|
571
|
+
di5 = VSUB(ci2, cr5);
|
|
572
|
+
di2 = VADD(ci2, cr5);
|
|
573
|
+
wr1 = wa1[i];
|
|
574
|
+
wi1 = fsign * wa1[i + 1];
|
|
575
|
+
wr2 = wa2[i];
|
|
576
|
+
wi2 = fsign * wa2[i + 1];
|
|
577
|
+
wr3 = wa3[i];
|
|
578
|
+
wi3 = fsign * wa3[i + 1];
|
|
579
|
+
wr4 = wa4[i];
|
|
580
|
+
wi4 = fsign * wa4[i + 1];
|
|
581
|
+
VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1));
|
|
582
|
+
ch_ref(i - 1, 2) = dr2;
|
|
583
|
+
ch_ref(i, 2) = di2;
|
|
584
|
+
VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2));
|
|
585
|
+
ch_ref(i - 1, 3) = dr3;
|
|
586
|
+
ch_ref(i, 3) = di3;
|
|
587
|
+
VCPLXMUL(dr4, di4, LD_PS1(wr3), LD_PS1(wi3));
|
|
588
|
+
ch_ref(i - 1, 4) = dr4;
|
|
589
|
+
ch_ref(i, 4) = di4;
|
|
590
|
+
VCPLXMUL(dr5, di5, LD_PS1(wr4), LD_PS1(wi4));
|
|
591
|
+
ch_ref(i - 1, 5) = dr5;
|
|
592
|
+
ch_ref(i, 5) = di5;
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
#undef ch_ref
|
|
596
|
+
#undef cc_ref
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
static NEVER_INLINE(void) radf2_ps(int ido, int l1, const v4sf *RESTRICT cc,
|
|
600
|
+
v4sf *RESTRICT ch, const float *wa1) {
|
|
601
|
+
static const float minus_one = -1.f;
|
|
602
|
+
int i, k, l1ido = l1 * ido;
|
|
603
|
+
for (k = 0; k < l1ido; k += ido) {
|
|
604
|
+
v4sf a = cc[k], b = cc[k + l1ido];
|
|
605
|
+
ch[2 * k] = VADD(a, b);
|
|
606
|
+
ch[2 * (k + ido) - 1] = VSUB(a, b);
|
|
607
|
+
}
|
|
608
|
+
if (ido < 2)
|
|
609
|
+
return;
|
|
610
|
+
if (ido != 2) {
|
|
611
|
+
for (k = 0; k < l1ido; k += ido) {
|
|
612
|
+
for (i = 2; i < ido; i += 2) {
|
|
613
|
+
v4sf tr2 = cc[i - 1 + k + l1ido], ti2 = cc[i + k + l1ido];
|
|
614
|
+
v4sf br = cc[i - 1 + k], bi = cc[i + k];
|
|
615
|
+
VCPLXMULCONJ(tr2, ti2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
|
|
616
|
+
ch[i + 2 * k] = VADD(bi, ti2);
|
|
617
|
+
ch[2 * (k + ido) - i] = VSUB(ti2, bi);
|
|
618
|
+
ch[i - 1 + 2 * k] = VADD(br, tr2);
|
|
619
|
+
ch[2 * (k + ido) - i - 1] = VSUB(br, tr2);
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
if (ido % 2 == 1)
|
|
623
|
+
return;
|
|
624
|
+
}
|
|
625
|
+
for (k = 0; k < l1ido; k += ido) {
|
|
626
|
+
ch[2 * k + ido] = SVMUL(minus_one, cc[ido - 1 + k + l1ido]);
|
|
627
|
+
ch[2 * k + ido - 1] = cc[k + ido - 1];
|
|
628
|
+
}
|
|
629
|
+
} /* radf2 */
|
|
630
|
+
|
|
631
|
+
static NEVER_INLINE(void)
|
|
632
|
+
radb2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1) {
|
|
633
|
+
static const float minus_two = -2;
|
|
634
|
+
int i, k, l1ido = l1 * ido;
|
|
635
|
+
v4sf a, b, c, d, tr2, ti2;
|
|
636
|
+
for (k = 0; k < l1ido; k += ido) {
|
|
637
|
+
a = cc[2 * k];
|
|
638
|
+
b = cc[2 * (k + ido) - 1];
|
|
639
|
+
ch[k] = VADD(a, b);
|
|
640
|
+
ch[k + l1ido] = VSUB(a, b);
|
|
641
|
+
}
|
|
642
|
+
if (ido < 2)
|
|
643
|
+
return;
|
|
644
|
+
if (ido != 2) {
|
|
645
|
+
for (k = 0; k < l1ido; k += ido) {
|
|
646
|
+
for (i = 2; i < ido; i += 2) {
|
|
647
|
+
a = cc[i - 1 + 2 * k];
|
|
648
|
+
b = cc[2 * (k + ido) - i - 1];
|
|
649
|
+
c = cc[i + 0 + 2 * k];
|
|
650
|
+
d = cc[2 * (k + ido) - i + 0];
|
|
651
|
+
ch[i - 1 + k] = VADD(a, b);
|
|
652
|
+
tr2 = VSUB(a, b);
|
|
653
|
+
ch[i + 0 + k] = VSUB(c, d);
|
|
654
|
+
ti2 = VADD(c, d);
|
|
655
|
+
VCPLXMUL(tr2, ti2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
|
|
656
|
+
ch[i - 1 + k + l1ido] = tr2;
|
|
657
|
+
ch[i + 0 + k + l1ido] = ti2;
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
if (ido % 2 == 1)
|
|
661
|
+
return;
|
|
662
|
+
}
|
|
663
|
+
for (k = 0; k < l1ido; k += ido) {
|
|
664
|
+
a = cc[2 * k + ido - 1];
|
|
665
|
+
b = cc[2 * k + ido];
|
|
666
|
+
ch[k + ido - 1] = VADD(a, a);
|
|
667
|
+
ch[k + ido - 1 + l1ido] = SVMUL(minus_two, b);
|
|
668
|
+
}
|
|
669
|
+
} /* radb2 */
|
|
670
|
+
|
|
671
|
+
static void radf3_ps(int ido, int l1, const v4sf *RESTRICT cc,
|
|
672
|
+
v4sf *RESTRICT ch, const float *wa1, const float *wa2) {
|
|
673
|
+
static const float taur = -0.5f;
|
|
674
|
+
static const float taui = 0.866025403784439f;
|
|
675
|
+
int i, k, ic;
|
|
676
|
+
v4sf ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3, wr1, wi1, wr2, wi2;
|
|
677
|
+
for (k = 0; k < l1; k++) {
|
|
678
|
+
cr2 = VADD(cc[(k + l1) * ido], cc[(k + 2 * l1) * ido]);
|
|
679
|
+
ch[3 * k * ido] = VADD(cc[k * ido], cr2);
|
|
680
|
+
ch[(3 * k + 2) * ido] =
|
|
681
|
+
SVMUL(taui, VSUB(cc[(k + l1 * 2) * ido], cc[(k + l1) * ido]));
|
|
682
|
+
ch[ido - 1 + (3 * k + 1) * ido] = VADD(cc[k * ido], SVMUL(taur, cr2));
|
|
683
|
+
}
|
|
684
|
+
if (ido == 1)
|
|
685
|
+
return;
|
|
686
|
+
for (k = 0; k < l1; k++) {
|
|
687
|
+
for (i = 2; i < ido; i += 2) {
|
|
688
|
+
ic = ido - i;
|
|
689
|
+
wr1 = LD_PS1(wa1[i - 2]);
|
|
690
|
+
wi1 = LD_PS1(wa1[i - 1]);
|
|
691
|
+
dr2 = cc[i - 1 + (k + l1) * ido];
|
|
692
|
+
di2 = cc[i + (k + l1) * ido];
|
|
693
|
+
VCPLXMULCONJ(dr2, di2, wr1, wi1);
|
|
694
|
+
|
|
695
|
+
wr2 = LD_PS1(wa2[i - 2]);
|
|
696
|
+
wi2 = LD_PS1(wa2[i - 1]);
|
|
697
|
+
dr3 = cc[i - 1 + (k + l1 * 2) * ido];
|
|
698
|
+
di3 = cc[i + (k + l1 * 2) * ido];
|
|
699
|
+
VCPLXMULCONJ(dr3, di3, wr2, wi2);
|
|
700
|
+
|
|
701
|
+
cr2 = VADD(dr2, dr3);
|
|
702
|
+
ci2 = VADD(di2, di3);
|
|
703
|
+
ch[i - 1 + 3 * k * ido] = VADD(cc[i - 1 + k * ido], cr2);
|
|
704
|
+
ch[i + 3 * k * ido] = VADD(cc[i + k * ido], ci2);
|
|
705
|
+
tr2 = VADD(cc[i - 1 + k * ido], SVMUL(taur, cr2));
|
|
706
|
+
ti2 = VADD(cc[i + k * ido], SVMUL(taur, ci2));
|
|
707
|
+
tr3 = SVMUL(taui, VSUB(di2, di3));
|
|
708
|
+
ti3 = SVMUL(taui, VSUB(dr3, dr2));
|
|
709
|
+
ch[i - 1 + (3 * k + 2) * ido] = VADD(tr2, tr3);
|
|
710
|
+
ch[ic - 1 + (3 * k + 1) * ido] = VSUB(tr2, tr3);
|
|
711
|
+
ch[i + (3 * k + 2) * ido] = VADD(ti2, ti3);
|
|
712
|
+
ch[ic + (3 * k + 1) * ido] = VSUB(ti3, ti2);
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
} /* radf3 */
|
|
716
|
+
|
|
717
|
+
static void radb3_ps(int ido, int l1, const v4sf *RESTRICT cc,
|
|
718
|
+
v4sf *RESTRICT ch, const float *wa1, const float *wa2) {
|
|
719
|
+
static const float taur = -0.5f;
|
|
720
|
+
static const float taui = 0.866025403784439f;
|
|
721
|
+
static const float taui_2 = 0.866025403784439f * 2;
|
|
722
|
+
int i, k, ic;
|
|
723
|
+
v4sf ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
|
|
724
|
+
for (k = 0; k < l1; k++) {
|
|
725
|
+
tr2 = cc[ido - 1 + (3 * k + 1) * ido];
|
|
726
|
+
tr2 = VADD(tr2, tr2);
|
|
727
|
+
cr2 = VMADD(LD_PS1(taur), tr2, cc[3 * k * ido]);
|
|
728
|
+
ch[k * ido] = VADD(cc[3 * k * ido], tr2);
|
|
729
|
+
ci3 = SVMUL(taui_2, cc[(3 * k + 2) * ido]);
|
|
730
|
+
ch[(k + l1) * ido] = VSUB(cr2, ci3);
|
|
731
|
+
ch[(k + 2 * l1) * ido] = VADD(cr2, ci3);
|
|
732
|
+
}
|
|
733
|
+
if (ido == 1)
|
|
734
|
+
return;
|
|
735
|
+
for (k = 0; k < l1; k++) {
|
|
736
|
+
for (i = 2; i < ido; i += 2) {
|
|
737
|
+
ic = ido - i;
|
|
738
|
+
tr2 = VADD(cc[i - 1 + (3 * k + 2) * ido], cc[ic - 1 + (3 * k + 1) * ido]);
|
|
739
|
+
cr2 = VMADD(LD_PS1(taur), tr2, cc[i - 1 + 3 * k * ido]);
|
|
740
|
+
ch[i - 1 + k * ido] = VADD(cc[i - 1 + 3 * k * ido], tr2);
|
|
741
|
+
ti2 = VSUB(cc[i + (3 * k + 2) * ido], cc[ic + (3 * k + 1) * ido]);
|
|
742
|
+
ci2 = VMADD(LD_PS1(taur), ti2, cc[i + 3 * k * ido]);
|
|
743
|
+
ch[i + k * ido] = VADD(cc[i + 3 * k * ido], ti2);
|
|
744
|
+
cr3 = SVMUL(taui, VSUB(cc[i - 1 + (3 * k + 2) * ido],
|
|
745
|
+
cc[ic - 1 + (3 * k + 1) * ido]));
|
|
746
|
+
ci3 = SVMUL(taui,
|
|
747
|
+
VADD(cc[i + (3 * k + 2) * ido], cc[ic + (3 * k + 1) * ido]));
|
|
748
|
+
dr2 = VSUB(cr2, ci3);
|
|
749
|
+
dr3 = VADD(cr2, ci3);
|
|
750
|
+
di2 = VADD(ci2, cr3);
|
|
751
|
+
di3 = VSUB(ci2, cr3);
|
|
752
|
+
VCPLXMUL(dr2, di2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
|
|
753
|
+
ch[i - 1 + (k + l1) * ido] = dr2;
|
|
754
|
+
ch[i + (k + l1) * ido] = di2;
|
|
755
|
+
VCPLXMUL(dr3, di3, LD_PS1(wa2[i - 2]), LD_PS1(wa2[i - 1]));
|
|
756
|
+
ch[i - 1 + (k + 2 * l1) * ido] = dr3;
|
|
757
|
+
ch[i + (k + 2 * l1) * ido] = di3;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
} /* radb3 */
|
|
761
|
+
|
|
762
|
+
static NEVER_INLINE(void)
|
|
763
|
+
radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch,
|
|
764
|
+
const float *RESTRICT wa1, const float *RESTRICT wa2,
|
|
765
|
+
const float *RESTRICT wa3) {
|
|
766
|
+
static const float minus_hsqt2 = (float)-0.7071067811865475;
|
|
767
|
+
int i, k, l1ido = l1 * ido;
|
|
768
|
+
{
|
|
769
|
+
const v4sf *RESTRICT cc_ = cc, *RESTRICT cc_end = cc + l1ido;
|
|
770
|
+
v4sf *RESTRICT ch_ = ch;
|
|
771
|
+
while (cc < cc_end) {
|
|
772
|
+
// this loop represents between 25% and 40% of total radf4_ps cost !
|
|
773
|
+
v4sf a0 = cc[0], a1 = cc[l1ido];
|
|
774
|
+
v4sf a2 = cc[2 * l1ido], a3 = cc[3 * l1ido];
|
|
775
|
+
v4sf tr1 = VADD(a1, a3);
|
|
776
|
+
v4sf tr2 = VADD(a0, a2);
|
|
777
|
+
ch[2 * ido - 1] = VSUB(a0, a2);
|
|
778
|
+
ch[2 * ido] = VSUB(a3, a1);
|
|
779
|
+
ch[0] = VADD(tr1, tr2);
|
|
780
|
+
ch[4 * ido - 1] = VSUB(tr2, tr1);
|
|
781
|
+
cc += ido;
|
|
782
|
+
ch += 4 * ido;
|
|
783
|
+
}
|
|
784
|
+
cc = cc_;
|
|
785
|
+
ch = ch_;
|
|
786
|
+
}
|
|
787
|
+
if (ido < 2)
|
|
788
|
+
return;
|
|
789
|
+
if (ido != 2) {
|
|
790
|
+
for (k = 0; k < l1ido; k += ido) {
|
|
791
|
+
const v4sf *RESTRICT pc = (v4sf *)(cc + 1 + k);
|
|
792
|
+
for (i = 2; i < ido; i += 2, pc += 2) {
|
|
793
|
+
int ic = ido - i;
|
|
794
|
+
v4sf wr, wi, cr2, ci2, cr3, ci3, cr4, ci4;
|
|
795
|
+
v4sf tr1, ti1, tr2, ti2, tr3, ti3, tr4, ti4;
|
|
796
|
+
|
|
797
|
+
cr2 = pc[1 * l1ido + 0];
|
|
798
|
+
ci2 = pc[1 * l1ido + 1];
|
|
799
|
+
wr = LD_PS1(wa1[i - 2]);
|
|
800
|
+
wi = LD_PS1(wa1[i - 1]);
|
|
801
|
+
VCPLXMULCONJ(cr2, ci2, wr, wi);
|
|
802
|
+
|
|
803
|
+
cr3 = pc[2 * l1ido + 0];
|
|
804
|
+
ci3 = pc[2 * l1ido + 1];
|
|
805
|
+
wr = LD_PS1(wa2[i - 2]);
|
|
806
|
+
wi = LD_PS1(wa2[i - 1]);
|
|
807
|
+
VCPLXMULCONJ(cr3, ci3, wr, wi);
|
|
808
|
+
|
|
809
|
+
cr4 = pc[3 * l1ido];
|
|
810
|
+
ci4 = pc[3 * l1ido + 1];
|
|
811
|
+
wr = LD_PS1(wa3[i - 2]);
|
|
812
|
+
wi = LD_PS1(wa3[i - 1]);
|
|
813
|
+
VCPLXMULCONJ(cr4, ci4, wr, wi);
|
|
814
|
+
|
|
815
|
+
/* at this point, on SSE, five of "cr2 cr3 cr4 ci2 ci3 ci4" should be
|
|
816
|
+
* loaded in registers */
|
|
817
|
+
|
|
818
|
+
tr1 = VADD(cr2, cr4);
|
|
819
|
+
tr4 = VSUB(cr4, cr2);
|
|
820
|
+
tr2 = VADD(pc[0], cr3);
|
|
821
|
+
tr3 = VSUB(pc[0], cr3);
|
|
822
|
+
ch[i - 1 + 4 * k] = VADD(tr1, tr2);
|
|
823
|
+
ch[ic - 1 + 4 * k + 3 * ido] =
|
|
824
|
+
VSUB(tr2, tr1); // at this point tr1 and tr2 can be disposed
|
|
825
|
+
ti1 = VADD(ci2, ci4);
|
|
826
|
+
ti4 = VSUB(ci2, ci4);
|
|
827
|
+
ch[i - 1 + 4 * k + 2 * ido] = VADD(ti4, tr3);
|
|
828
|
+
ch[ic - 1 + 4 * k + 1 * ido] = VSUB(tr3, ti4); // dispose tr3, ti4
|
|
829
|
+
ti2 = VADD(pc[1], ci3);
|
|
830
|
+
ti3 = VSUB(pc[1], ci3);
|
|
831
|
+
ch[i + 4 * k] = VADD(ti1, ti2);
|
|
832
|
+
ch[ic + 4 * k + 3 * ido] = VSUB(ti1, ti2);
|
|
833
|
+
ch[i + 4 * k + 2 * ido] = VADD(tr4, ti3);
|
|
834
|
+
ch[ic + 4 * k + 1 * ido] = VSUB(tr4, ti3);
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
if (ido % 2 == 1)
|
|
838
|
+
return;
|
|
839
|
+
}
|
|
840
|
+
for (k = 0; k < l1ido; k += ido) {
|
|
841
|
+
v4sf a = cc[ido - 1 + k + l1ido], b = cc[ido - 1 + k + 3 * l1ido];
|
|
842
|
+
v4sf c = cc[ido - 1 + k], d = cc[ido - 1 + k + 2 * l1ido];
|
|
843
|
+
v4sf ti1 = SVMUL(minus_hsqt2, VADD(a, b));
|
|
844
|
+
v4sf tr1 = SVMUL(minus_hsqt2, VSUB(b, a));
|
|
845
|
+
ch[ido - 1 + 4 * k] = VADD(tr1, c);
|
|
846
|
+
ch[ido - 1 + 4 * k + 2 * ido] = VSUB(c, tr1);
|
|
847
|
+
ch[4 * k + 1 * ido] = VSUB(ti1, d);
|
|
848
|
+
ch[4 * k + 3 * ido] = VADD(ti1, d);
|
|
849
|
+
}
|
|
850
|
+
} /* radf4 */
|
|
851
|
+
|
|
852
|
+
static NEVER_INLINE(void)
|
|
853
|
+
radb4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch,
|
|
854
|
+
const float *RESTRICT wa1, const float *RESTRICT wa2,
|
|
855
|
+
const float *RESTRICT wa3) {
|
|
856
|
+
static const float minus_sqrt2 = (float)-1.414213562373095;
|
|
857
|
+
static const float two = 2.f;
|
|
858
|
+
int i, k, l1ido = l1 * ido;
|
|
859
|
+
v4sf ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
|
|
860
|
+
{
|
|
861
|
+
const v4sf *RESTRICT cc_ = cc, *RESTRICT ch_end = ch + l1ido;
|
|
862
|
+
v4sf *ch_ = ch;
|
|
863
|
+
while (ch < ch_end) {
|
|
864
|
+
v4sf a = cc[0], b = cc[4 * ido - 1];
|
|
865
|
+
v4sf c = cc[2 * ido], d = cc[2 * ido - 1];
|
|
866
|
+
tr3 = SVMUL(two, d);
|
|
867
|
+
tr2 = VADD(a, b);
|
|
868
|
+
tr1 = VSUB(a, b);
|
|
869
|
+
tr4 = SVMUL(two, c);
|
|
870
|
+
ch[0 * l1ido] = VADD(tr2, tr3);
|
|
871
|
+
ch[2 * l1ido] = VSUB(tr2, tr3);
|
|
872
|
+
ch[1 * l1ido] = VSUB(tr1, tr4);
|
|
873
|
+
ch[3 * l1ido] = VADD(tr1, tr4);
|
|
874
|
+
|
|
875
|
+
cc += 4 * ido;
|
|
876
|
+
ch += ido;
|
|
877
|
+
}
|
|
878
|
+
cc = cc_;
|
|
879
|
+
ch = ch_;
|
|
880
|
+
}
|
|
881
|
+
if (ido < 2)
|
|
882
|
+
return;
|
|
883
|
+
if (ido != 2) {
|
|
884
|
+
for (k = 0; k < l1ido; k += ido) {
|
|
885
|
+
const v4sf *RESTRICT pc = (v4sf *)(cc - 1 + 4 * k);
|
|
886
|
+
v4sf *RESTRICT ph = (v4sf *)(ch + k + 1);
|
|
887
|
+
for (i = 2; i < ido; i += 2) {
|
|
888
|
+
|
|
889
|
+
tr1 = VSUB(pc[i], pc[4 * ido - i]);
|
|
890
|
+
tr2 = VADD(pc[i], pc[4 * ido - i]);
|
|
891
|
+
ti4 = VSUB(pc[2 * ido + i], pc[2 * ido - i]);
|
|
892
|
+
tr3 = VADD(pc[2 * ido + i], pc[2 * ido - i]);
|
|
893
|
+
ph[0] = VADD(tr2, tr3);
|
|
894
|
+
cr3 = VSUB(tr2, tr3);
|
|
895
|
+
|
|
896
|
+
ti3 = VSUB(pc[2 * ido + i + 1], pc[2 * ido - i + 1]);
|
|
897
|
+
tr4 = VADD(pc[2 * ido + i + 1], pc[2 * ido - i + 1]);
|
|
898
|
+
cr2 = VSUB(tr1, tr4);
|
|
899
|
+
cr4 = VADD(tr1, tr4);
|
|
900
|
+
|
|
901
|
+
ti1 = VADD(pc[i + 1], pc[4 * ido - i + 1]);
|
|
902
|
+
ti2 = VSUB(pc[i + 1], pc[4 * ido - i + 1]);
|
|
903
|
+
|
|
904
|
+
ph[1] = VADD(ti2, ti3);
|
|
905
|
+
ph += l1ido;
|
|
906
|
+
ci3 = VSUB(ti2, ti3);
|
|
907
|
+
ci2 = VADD(ti1, ti4);
|
|
908
|
+
ci4 = VSUB(ti1, ti4);
|
|
909
|
+
VCPLXMUL(cr2, ci2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
|
|
910
|
+
ph[0] = cr2;
|
|
911
|
+
ph[1] = ci2;
|
|
912
|
+
ph += l1ido;
|
|
913
|
+
VCPLXMUL(cr3, ci3, LD_PS1(wa2[i - 2]), LD_PS1(wa2[i - 1]));
|
|
914
|
+
ph[0] = cr3;
|
|
915
|
+
ph[1] = ci3;
|
|
916
|
+
ph += l1ido;
|
|
917
|
+
VCPLXMUL(cr4, ci4, LD_PS1(wa3[i - 2]), LD_PS1(wa3[i - 1]));
|
|
918
|
+
ph[0] = cr4;
|
|
919
|
+
ph[1] = ci4;
|
|
920
|
+
ph = ph - 3 * l1ido + 2;
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
if (ido % 2 == 1)
|
|
924
|
+
return;
|
|
925
|
+
}
|
|
926
|
+
for (k = 0; k < l1ido; k += ido) {
|
|
927
|
+
int i0 = 4 * k + ido;
|
|
928
|
+
v4sf c = cc[i0 - 1], d = cc[i0 + 2 * ido - 1];
|
|
929
|
+
v4sf a = cc[i0 + 0], b = cc[i0 + 2 * ido + 0];
|
|
930
|
+
tr1 = VSUB(c, d);
|
|
931
|
+
tr2 = VADD(c, d);
|
|
932
|
+
ti1 = VADD(b, a);
|
|
933
|
+
ti2 = VSUB(b, a);
|
|
934
|
+
ch[ido - 1 + k + 0 * l1ido] = VADD(tr2, tr2);
|
|
935
|
+
ch[ido - 1 + k + 1 * l1ido] = SVMUL(minus_sqrt2, VSUB(ti1, tr1));
|
|
936
|
+
ch[ido - 1 + k + 2 * l1ido] = VADD(ti2, ti2);
|
|
937
|
+
ch[ido - 1 + k + 3 * l1ido] = SVMUL(minus_sqrt2, VADD(ti1, tr1));
|
|
938
|
+
}
|
|
939
|
+
} /* radb4 */
|
|
940
|
+
|
|
941
|
+
static void radf5_ps(int ido, int l1, const v4sf *RESTRICT cc,
|
|
942
|
+
v4sf *RESTRICT ch, const float *wa1, const float *wa2,
|
|
943
|
+
const float *wa3, const float *wa4) {
|
|
944
|
+
static const float tr11 = .309016994374947f;
|
|
945
|
+
static const float ti11 = .951056516295154f;
|
|
946
|
+
static const float tr12 = -.809016994374947f;
|
|
947
|
+
static const float ti12 = .587785252292473f;
|
|
948
|
+
|
|
949
|
+
/* System generated locals */
|
|
950
|
+
int cc_offset, ch_offset;
|
|
951
|
+
|
|
952
|
+
/* Local variables */
|
|
953
|
+
int i, k, ic;
|
|
954
|
+
v4sf ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3, dr4, dr5,
|
|
955
|
+
cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
|
|
956
|
+
int idp2;
|
|
957
|
+
|
|
958
|
+
#define cc_ref(a_1, a_2, a_3) cc[((a_3) * l1 + (a_2)) * ido + a_1]
|
|
959
|
+
#define ch_ref(a_1, a_2, a_3) ch[((a_3) * 5 + (a_2)) * ido + a_1]
|
|
960
|
+
|
|
961
|
+
/* Parameter adjustments */
|
|
962
|
+
ch_offset = 1 + ido * 6;
|
|
963
|
+
ch -= ch_offset;
|
|
964
|
+
cc_offset = 1 + ido * (1 + l1);
|
|
965
|
+
cc -= cc_offset;
|
|
966
|
+
|
|
967
|
+
/* Function Body */
|
|
968
|
+
for (k = 1; k <= l1; ++k) {
|
|
969
|
+
cr2 = VADD(cc_ref(1, k, 5), cc_ref(1, k, 2));
|
|
970
|
+
ci5 = VSUB(cc_ref(1, k, 5), cc_ref(1, k, 2));
|
|
971
|
+
cr3 = VADD(cc_ref(1, k, 4), cc_ref(1, k, 3));
|
|
972
|
+
ci4 = VSUB(cc_ref(1, k, 4), cc_ref(1, k, 3));
|
|
973
|
+
ch_ref(1, 1, k) = VADD(cc_ref(1, k, 1), VADD(cr2, cr3));
|
|
974
|
+
ch_ref(ido, 2, k) =
|
|
975
|
+
VADD(cc_ref(1, k, 1), VADD(SVMUL(tr11, cr2), SVMUL(tr12, cr3)));
|
|
976
|
+
ch_ref(1, 3, k) = VADD(SVMUL(ti11, ci5), SVMUL(ti12, ci4));
|
|
977
|
+
ch_ref(ido, 4, k) =
|
|
978
|
+
VADD(cc_ref(1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3)));
|
|
979
|
+
ch_ref(1, 5, k) = VSUB(SVMUL(ti12, ci5), SVMUL(ti11, ci4));
|
|
980
|
+
// printf("pffft: radf5, k=%d ch_ref=%f, ci4=%f\n", k, ch_ref(1, 5, k),
|
|
981
|
+
// ci4);
|
|
982
|
+
}
|
|
983
|
+
if (ido == 1) {
|
|
984
|
+
return;
|
|
985
|
+
}
|
|
986
|
+
idp2 = ido + 2;
|
|
987
|
+
for (k = 1; k <= l1; ++k) {
|
|
988
|
+
for (i = 3; i <= ido; i += 2) {
|
|
989
|
+
ic = idp2 - i;
|
|
990
|
+
dr2 = LD_PS1(wa1[i - 3]);
|
|
991
|
+
di2 = LD_PS1(wa1[i - 2]);
|
|
992
|
+
dr3 = LD_PS1(wa2[i - 3]);
|
|
993
|
+
di3 = LD_PS1(wa2[i - 2]);
|
|
994
|
+
dr4 = LD_PS1(wa3[i - 3]);
|
|
995
|
+
di4 = LD_PS1(wa3[i - 2]);
|
|
996
|
+
dr5 = LD_PS1(wa4[i - 3]);
|
|
997
|
+
di5 = LD_PS1(wa4[i - 2]);
|
|
998
|
+
VCPLXMULCONJ(dr2, di2, cc_ref(i - 1, k, 2), cc_ref(i, k, 2));
|
|
999
|
+
VCPLXMULCONJ(dr3, di3, cc_ref(i - 1, k, 3), cc_ref(i, k, 3));
|
|
1000
|
+
VCPLXMULCONJ(dr4, di4, cc_ref(i - 1, k, 4), cc_ref(i, k, 4));
|
|
1001
|
+
VCPLXMULCONJ(dr5, di5, cc_ref(i - 1, k, 5), cc_ref(i, k, 5));
|
|
1002
|
+
cr2 = VADD(dr2, dr5);
|
|
1003
|
+
ci5 = VSUB(dr5, dr2);
|
|
1004
|
+
cr5 = VSUB(di2, di5);
|
|
1005
|
+
ci2 = VADD(di2, di5);
|
|
1006
|
+
cr3 = VADD(dr3, dr4);
|
|
1007
|
+
ci4 = VSUB(dr4, dr3);
|
|
1008
|
+
cr4 = VSUB(di3, di4);
|
|
1009
|
+
ci3 = VADD(di3, di4);
|
|
1010
|
+
ch_ref(i - 1, 1, k) = VADD(cc_ref(i - 1, k, 1), VADD(cr2, cr3));
|
|
1011
|
+
ch_ref(i, 1, k) = VSUB(cc_ref(i, k, 1), VADD(ci2, ci3)); //
|
|
1012
|
+
tr2 = VADD(cc_ref(i - 1, k, 1), VADD(SVMUL(tr11, cr2), SVMUL(tr12, cr3)));
|
|
1013
|
+
ti2 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr11, ci2), SVMUL(tr12, ci3))); //
|
|
1014
|
+
tr3 = VADD(cc_ref(i - 1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3)));
|
|
1015
|
+
ti3 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr12, ci2), SVMUL(tr11, ci3))); //
|
|
1016
|
+
tr5 = VADD(SVMUL(ti11, cr5), SVMUL(ti12, cr4));
|
|
1017
|
+
ti5 = VADD(SVMUL(ti11, ci5), SVMUL(ti12, ci4));
|
|
1018
|
+
tr4 = VSUB(SVMUL(ti12, cr5), SVMUL(ti11, cr4));
|
|
1019
|
+
ti4 = VSUB(SVMUL(ti12, ci5), SVMUL(ti11, ci4));
|
|
1020
|
+
ch_ref(i - 1, 3, k) = VSUB(tr2, tr5);
|
|
1021
|
+
ch_ref(ic - 1, 2, k) = VADD(tr2, tr5);
|
|
1022
|
+
ch_ref(i, 3, k) = VADD(ti2, ti5);
|
|
1023
|
+
ch_ref(ic, 2, k) = VSUB(ti5, ti2);
|
|
1024
|
+
ch_ref(i - 1, 5, k) = VSUB(tr3, tr4);
|
|
1025
|
+
ch_ref(ic - 1, 4, k) = VADD(tr3, tr4);
|
|
1026
|
+
ch_ref(i, 5, k) = VADD(ti3, ti4);
|
|
1027
|
+
ch_ref(ic, 4, k) = VSUB(ti4, ti3);
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
#undef cc_ref
|
|
1031
|
+
#undef ch_ref
|
|
1032
|
+
} /* radf5 */
|
|
1033
|
+
|
|
1034
|
+
static void radb5_ps(int ido, int l1, const v4sf *RESTRICT cc,
|
|
1035
|
+
v4sf *RESTRICT ch, const float *wa1, const float *wa2,
|
|
1036
|
+
const float *wa3, const float *wa4) {
|
|
1037
|
+
static const float tr11 = .309016994374947f;
|
|
1038
|
+
static const float ti11 = .951056516295154f;
|
|
1039
|
+
static const float tr12 = -.809016994374947f;
|
|
1040
|
+
static const float ti12 = .587785252292473f;
|
|
1041
|
+
|
|
1042
|
+
int cc_offset, ch_offset;
|
|
1043
|
+
|
|
1044
|
+
/* Local variables */
|
|
1045
|
+
int i, k, ic;
|
|
1046
|
+
v4sf ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
|
|
1047
|
+
ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
|
|
1048
|
+
int idp2;
|
|
1049
|
+
|
|
1050
|
+
#define cc_ref(a_1, a_2, a_3) cc[((a_3) * 5 + (a_2)) * ido + a_1]
|
|
1051
|
+
#define ch_ref(a_1, a_2, a_3) ch[((a_3) * l1 + (a_2)) * ido + a_1]
|
|
1052
|
+
|
|
1053
|
+
/* Parameter adjustments */
|
|
1054
|
+
ch_offset = 1 + ido * (1 + l1);
|
|
1055
|
+
ch -= ch_offset;
|
|
1056
|
+
cc_offset = 1 + ido * 6;
|
|
1057
|
+
cc -= cc_offset;
|
|
1058
|
+
|
|
1059
|
+
/* Function Body */
|
|
1060
|
+
for (k = 1; k <= l1; ++k) {
|
|
1061
|
+
ti5 = VADD(cc_ref(1, 3, k), cc_ref(1, 3, k));
|
|
1062
|
+
ti4 = VADD(cc_ref(1, 5, k), cc_ref(1, 5, k));
|
|
1063
|
+
tr2 = VADD(cc_ref(ido, 2, k), cc_ref(ido, 2, k));
|
|
1064
|
+
tr3 = VADD(cc_ref(ido, 4, k), cc_ref(ido, 4, k));
|
|
1065
|
+
ch_ref(1, k, 1) = VADD(cc_ref(1, 1, k), VADD(tr2, tr3));
|
|
1066
|
+
cr2 = VADD(cc_ref(1, 1, k), VADD(SVMUL(tr11, tr2), SVMUL(tr12, tr3)));
|
|
1067
|
+
cr3 = VADD(cc_ref(1, 1, k), VADD(SVMUL(tr12, tr2), SVMUL(tr11, tr3)));
|
|
1068
|
+
ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4));
|
|
1069
|
+
ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4));
|
|
1070
|
+
ch_ref(1, k, 2) = VSUB(cr2, ci5);
|
|
1071
|
+
ch_ref(1, k, 3) = VSUB(cr3, ci4);
|
|
1072
|
+
ch_ref(1, k, 4) = VADD(cr3, ci4);
|
|
1073
|
+
ch_ref(1, k, 5) = VADD(cr2, ci5);
|
|
1074
|
+
}
|
|
1075
|
+
if (ido == 1) {
|
|
1076
|
+
return;
|
|
1077
|
+
}
|
|
1078
|
+
idp2 = ido + 2;
|
|
1079
|
+
for (k = 1; k <= l1; ++k) {
|
|
1080
|
+
for (i = 3; i <= ido; i += 2) {
|
|
1081
|
+
ic = idp2 - i;
|
|
1082
|
+
ti5 = VADD(cc_ref(i, 3, k), cc_ref(ic, 2, k));
|
|
1083
|
+
ti2 = VSUB(cc_ref(i, 3, k), cc_ref(ic, 2, k));
|
|
1084
|
+
ti4 = VADD(cc_ref(i, 5, k), cc_ref(ic, 4, k));
|
|
1085
|
+
ti3 = VSUB(cc_ref(i, 5, k), cc_ref(ic, 4, k));
|
|
1086
|
+
tr5 = VSUB(cc_ref(i - 1, 3, k), cc_ref(ic - 1, 2, k));
|
|
1087
|
+
tr2 = VADD(cc_ref(i - 1, 3, k), cc_ref(ic - 1, 2, k));
|
|
1088
|
+
tr4 = VSUB(cc_ref(i - 1, 5, k), cc_ref(ic - 1, 4, k));
|
|
1089
|
+
tr3 = VADD(cc_ref(i - 1, 5, k), cc_ref(ic - 1, 4, k));
|
|
1090
|
+
ch_ref(i - 1, k, 1) = VADD(cc_ref(i - 1, 1, k), VADD(tr2, tr3));
|
|
1091
|
+
ch_ref(i, k, 1) = VADD(cc_ref(i, 1, k), VADD(ti2, ti3));
|
|
1092
|
+
cr2 = VADD(cc_ref(i - 1, 1, k), VADD(SVMUL(tr11, tr2), SVMUL(tr12, tr3)));
|
|
1093
|
+
ci2 = VADD(cc_ref(i, 1, k), VADD(SVMUL(tr11, ti2), SVMUL(tr12, ti3)));
|
|
1094
|
+
cr3 = VADD(cc_ref(i - 1, 1, k), VADD(SVMUL(tr12, tr2), SVMUL(tr11, tr3)));
|
|
1095
|
+
ci3 = VADD(cc_ref(i, 1, k), VADD(SVMUL(tr12, ti2), SVMUL(tr11, ti3)));
|
|
1096
|
+
cr5 = VADD(SVMUL(ti11, tr5), SVMUL(ti12, tr4));
|
|
1097
|
+
ci5 = VADD(SVMUL(ti11, ti5), SVMUL(ti12, ti4));
|
|
1098
|
+
cr4 = VSUB(SVMUL(ti12, tr5), SVMUL(ti11, tr4));
|
|
1099
|
+
ci4 = VSUB(SVMUL(ti12, ti5), SVMUL(ti11, ti4));
|
|
1100
|
+
dr3 = VSUB(cr3, ci4);
|
|
1101
|
+
dr4 = VADD(cr3, ci4);
|
|
1102
|
+
di3 = VADD(ci3, cr4);
|
|
1103
|
+
di4 = VSUB(ci3, cr4);
|
|
1104
|
+
dr5 = VADD(cr2, ci5);
|
|
1105
|
+
dr2 = VSUB(cr2, ci5);
|
|
1106
|
+
di5 = VSUB(ci2, cr5);
|
|
1107
|
+
di2 = VADD(ci2, cr5);
|
|
1108
|
+
VCPLXMUL(dr2, di2, LD_PS1(wa1[i - 3]), LD_PS1(wa1[i - 2]));
|
|
1109
|
+
VCPLXMUL(dr3, di3, LD_PS1(wa2[i - 3]), LD_PS1(wa2[i - 2]));
|
|
1110
|
+
VCPLXMUL(dr4, di4, LD_PS1(wa3[i - 3]), LD_PS1(wa3[i - 2]));
|
|
1111
|
+
VCPLXMUL(dr5, di5, LD_PS1(wa4[i - 3]), LD_PS1(wa4[i - 2]));
|
|
1112
|
+
|
|
1113
|
+
ch_ref(i - 1, k, 2) = dr2;
|
|
1114
|
+
ch_ref(i, k, 2) = di2;
|
|
1115
|
+
ch_ref(i - 1, k, 3) = dr3;
|
|
1116
|
+
ch_ref(i, k, 3) = di3;
|
|
1117
|
+
ch_ref(i - 1, k, 4) = dr4;
|
|
1118
|
+
ch_ref(i, k, 4) = di4;
|
|
1119
|
+
ch_ref(i - 1, k, 5) = dr5;
|
|
1120
|
+
ch_ref(i, k, 5) = di5;
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
#undef cc_ref
|
|
1124
|
+
#undef ch_ref
|
|
1125
|
+
} /* radb5 */
|
|
1126
|
+
|
|
1127
|
+
static NEVER_INLINE(v4sf *)
|
|
1128
|
+
rfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
|
|
1129
|
+
const float *wa, const int *ifac) {
|
|
1130
|
+
v4sf *in = (v4sf *)input_readonly;
|
|
1131
|
+
v4sf *out = (in == work2 ? work1 : work2);
|
|
1132
|
+
int nf = ifac[1], k1;
|
|
1133
|
+
int l2 = n;
|
|
1134
|
+
int iw = n - 1;
|
|
1135
|
+
assert(in != out && work1 != work2);
|
|
1136
|
+
for (k1 = 1; k1 <= nf; ++k1) {
|
|
1137
|
+
int kh = nf - k1;
|
|
1138
|
+
int ip = ifac[kh + 2];
|
|
1139
|
+
int l1 = l2 / ip;
|
|
1140
|
+
int ido = n / l2;
|
|
1141
|
+
iw -= (ip - 1) * ido;
|
|
1142
|
+
switch (ip) {
|
|
1143
|
+
case 5: {
|
|
1144
|
+
int ix2 = iw + ido;
|
|
1145
|
+
int ix3 = ix2 + ido;
|
|
1146
|
+
int ix4 = ix3 + ido;
|
|
1147
|
+
radf5_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]);
|
|
1148
|
+
} break;
|
|
1149
|
+
case 4: {
|
|
1150
|
+
int ix2 = iw + ido;
|
|
1151
|
+
int ix3 = ix2 + ido;
|
|
1152
|
+
radf4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]);
|
|
1153
|
+
} break;
|
|
1154
|
+
case 3: {
|
|
1155
|
+
int ix2 = iw + ido;
|
|
1156
|
+
radf3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]);
|
|
1157
|
+
} break;
|
|
1158
|
+
case 2:
|
|
1159
|
+
radf2_ps(ido, l1, in, out, &wa[iw]);
|
|
1160
|
+
break;
|
|
1161
|
+
default:
|
|
1162
|
+
assert(0);
|
|
1163
|
+
break;
|
|
1164
|
+
}
|
|
1165
|
+
l2 = l1;
|
|
1166
|
+
if (out == work2) {
|
|
1167
|
+
out = work1;
|
|
1168
|
+
in = work2;
|
|
1169
|
+
} else {
|
|
1170
|
+
out = work2;
|
|
1171
|
+
in = work1;
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
return in; /* this is in fact the output .. */
|
|
1175
|
+
} /* rfftf1 */
|
|
1176
|
+
|
|
1177
|
+
static NEVER_INLINE(v4sf *)
|
|
1178
|
+
rfftb1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
|
|
1179
|
+
const float *wa, const int *ifac) {
|
|
1180
|
+
v4sf *in = (v4sf *)input_readonly;
|
|
1181
|
+
v4sf *out = (in == work2 ? work1 : work2);
|
|
1182
|
+
int nf = ifac[1], k1;
|
|
1183
|
+
int l1 = 1;
|
|
1184
|
+
int iw = 0;
|
|
1185
|
+
assert(in != out);
|
|
1186
|
+
for (k1 = 1; k1 <= nf; k1++) {
|
|
1187
|
+
int ip = ifac[k1 + 1];
|
|
1188
|
+
int l2 = ip * l1;
|
|
1189
|
+
int ido = n / l2;
|
|
1190
|
+
switch (ip) {
|
|
1191
|
+
case 5: {
|
|
1192
|
+
int ix2 = iw + ido;
|
|
1193
|
+
int ix3 = ix2 + ido;
|
|
1194
|
+
int ix4 = ix3 + ido;
|
|
1195
|
+
radb5_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]);
|
|
1196
|
+
} break;
|
|
1197
|
+
case 4: {
|
|
1198
|
+
int ix2 = iw + ido;
|
|
1199
|
+
int ix3 = ix2 + ido;
|
|
1200
|
+
radb4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]);
|
|
1201
|
+
} break;
|
|
1202
|
+
case 3: {
|
|
1203
|
+
int ix2 = iw + ido;
|
|
1204
|
+
radb3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]);
|
|
1205
|
+
} break;
|
|
1206
|
+
case 2:
|
|
1207
|
+
radb2_ps(ido, l1, in, out, &wa[iw]);
|
|
1208
|
+
break;
|
|
1209
|
+
default:
|
|
1210
|
+
assert(0);
|
|
1211
|
+
break;
|
|
1212
|
+
}
|
|
1213
|
+
l1 = l2;
|
|
1214
|
+
iw += (ip - 1) * ido;
|
|
1215
|
+
|
|
1216
|
+
if (out == work2) {
|
|
1217
|
+
out = work1;
|
|
1218
|
+
in = work2;
|
|
1219
|
+
} else {
|
|
1220
|
+
out = work2;
|
|
1221
|
+
in = work1;
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
return in; /* this is in fact the output .. */
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
#define IFAC_MAX_SIZE \
|
|
1228
|
+
25 /* max number of integer factors for the decomposition, +2 */
|
|
1229
|
+
static int decompose(int n, int *ifac, const int *ntryh) {
|
|
1230
|
+
int nl = n, nf = 0, i, j = 0;
|
|
1231
|
+
for (j = 0; ntryh[j]; ++j) {
|
|
1232
|
+
int ntry = ntryh[j];
|
|
1233
|
+
while (nl != 1) {
|
|
1234
|
+
int nq = nl / ntry;
|
|
1235
|
+
int nr = nl - ntry * nq;
|
|
1236
|
+
if (nr == 0) {
|
|
1237
|
+
assert(2 + nf < IFAC_MAX_SIZE);
|
|
1238
|
+
ifac[2 + nf++] = ntry;
|
|
1239
|
+
nl = nq;
|
|
1240
|
+
if (ntry == 2 && nf != 1) {
|
|
1241
|
+
for (i = 2; i <= nf; ++i) {
|
|
1242
|
+
int ib = nf - i + 2;
|
|
1243
|
+
ifac[ib + 1] = ifac[ib];
|
|
1244
|
+
}
|
|
1245
|
+
ifac[2] = 2;
|
|
1246
|
+
}
|
|
1247
|
+
} else
|
|
1248
|
+
break;
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
ifac[0] = n;
|
|
1252
|
+
ifac[1] = nf;
|
|
1253
|
+
return nf;
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
static void rffti1_ps(int n, float *wa, int *ifac) {
|
|
1257
|
+
static const int ntryh[] = {4, 2, 3, 5, 0};
|
|
1258
|
+
int k1, j, ii;
|
|
1259
|
+
|
|
1260
|
+
int nf = decompose(n, ifac, ntryh);
|
|
1261
|
+
float argh = (2 * M_PI) / n;
|
|
1262
|
+
int is = 0;
|
|
1263
|
+
int nfm1 = nf - 1;
|
|
1264
|
+
int l1 = 1;
|
|
1265
|
+
for (k1 = 1; k1 <= nfm1; k1++) {
|
|
1266
|
+
int ip = ifac[k1 + 1];
|
|
1267
|
+
int ld = 0;
|
|
1268
|
+
int l2 = l1 * ip;
|
|
1269
|
+
int ido = n / l2;
|
|
1270
|
+
int ipm = ip - 1;
|
|
1271
|
+
for (j = 1; j <= ipm; ++j) {
|
|
1272
|
+
float argld;
|
|
1273
|
+
int i = is, fi = 0;
|
|
1274
|
+
ld += l1;
|
|
1275
|
+
argld = ld * argh;
|
|
1276
|
+
for (ii = 3; ii <= ido; ii += 2) {
|
|
1277
|
+
i += 2;
|
|
1278
|
+
fi += 1;
|
|
1279
|
+
wa[i - 2] = cos(fi * argld);
|
|
1280
|
+
wa[i - 1] = sin(fi * argld);
|
|
1281
|
+
}
|
|
1282
|
+
is += ido;
|
|
1283
|
+
}
|
|
1284
|
+
l1 = l2;
|
|
1285
|
+
}
|
|
1286
|
+
} /* rffti1 */
|
|
1287
|
+
|
|
1288
|
+
void cffti1_ps(int n, float *wa, int *ifac) {
|
|
1289
|
+
static const int ntryh[] = {5, 3, 4, 2, 0};
|
|
1290
|
+
int k1, j, ii;
|
|
1291
|
+
|
|
1292
|
+
int nf = decompose(n, ifac, ntryh);
|
|
1293
|
+
float argh = (2 * M_PI) / (float)n;
|
|
1294
|
+
int i = 1;
|
|
1295
|
+
int l1 = 1;
|
|
1296
|
+
for (k1 = 1; k1 <= nf; k1++) {
|
|
1297
|
+
int ip = ifac[k1 + 1];
|
|
1298
|
+
int ld = 0;
|
|
1299
|
+
int l2 = l1 * ip;
|
|
1300
|
+
int ido = n / l2;
|
|
1301
|
+
int idot = ido + ido + 2;
|
|
1302
|
+
int ipm = ip - 1;
|
|
1303
|
+
for (j = 1; j <= ipm; j++) {
|
|
1304
|
+
float argld;
|
|
1305
|
+
int i1 = i, fi = 0;
|
|
1306
|
+
wa[i - 1] = 1;
|
|
1307
|
+
wa[i] = 0;
|
|
1308
|
+
ld += l1;
|
|
1309
|
+
argld = ld * argh;
|
|
1310
|
+
for (ii = 4; ii <= idot; ii += 2) {
|
|
1311
|
+
i += 2;
|
|
1312
|
+
fi += 1;
|
|
1313
|
+
wa[i - 1] = cos(fi * argld);
|
|
1314
|
+
wa[i] = sin(fi * argld);
|
|
1315
|
+
}
|
|
1316
|
+
if (ip > 5) {
|
|
1317
|
+
wa[i1 - 1] = wa[i - 1];
|
|
1318
|
+
wa[i1] = wa[i];
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
l1 = l2;
|
|
1322
|
+
}
|
|
1323
|
+
} /* cffti1 */
|
|
1324
|
+
|
|
1325
|
+
v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
|
|
1326
|
+
const float *wa, const int *ifac, int isign) {
|
|
1327
|
+
v4sf *in = (v4sf *)input_readonly;
|
|
1328
|
+
v4sf *out = (in == work2 ? work1 : work2);
|
|
1329
|
+
int nf = ifac[1], k1;
|
|
1330
|
+
int l1 = 1;
|
|
1331
|
+
int iw = 0;
|
|
1332
|
+
assert(in != out && work1 != work2);
|
|
1333
|
+
for (k1 = 2; k1 <= nf + 1; k1++) {
|
|
1334
|
+
int ip = ifac[k1];
|
|
1335
|
+
int l2 = ip * l1;
|
|
1336
|
+
int ido = n / l2;
|
|
1337
|
+
int idot = ido + ido;
|
|
1338
|
+
switch (ip) {
|
|
1339
|
+
case 5: {
|
|
1340
|
+
int ix2 = iw + idot;
|
|
1341
|
+
int ix3 = ix2 + idot;
|
|
1342
|
+
int ix4 = ix3 + idot;
|
|
1343
|
+
passf5_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4],
|
|
1344
|
+
isign);
|
|
1345
|
+
} break;
|
|
1346
|
+
case 4: {
|
|
1347
|
+
int ix2 = iw + idot;
|
|
1348
|
+
int ix3 = ix2 + idot;
|
|
1349
|
+
passf4_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], isign);
|
|
1350
|
+
} break;
|
|
1351
|
+
case 2: {
|
|
1352
|
+
passf2_ps(idot, l1, in, out, &wa[iw], isign);
|
|
1353
|
+
} break;
|
|
1354
|
+
case 3: {
|
|
1355
|
+
int ix2 = iw + idot;
|
|
1356
|
+
passf3_ps(idot, l1, in, out, &wa[iw], &wa[ix2], isign);
|
|
1357
|
+
} break;
|
|
1358
|
+
default:
|
|
1359
|
+
assert(0);
|
|
1360
|
+
}
|
|
1361
|
+
l1 = l2;
|
|
1362
|
+
iw += (ip - 1) * idot;
|
|
1363
|
+
if (out == work2) {
|
|
1364
|
+
out = work1;
|
|
1365
|
+
in = work2;
|
|
1366
|
+
} else {
|
|
1367
|
+
out = work2;
|
|
1368
|
+
in = work1;
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1372
|
+
return in; /* this is in fact the output .. */
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
struct PFFFT_Setup {
|
|
1376
|
+
int N;
|
|
1377
|
+
int Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if
|
|
1378
|
+
// PFFFT_REAL)
|
|
1379
|
+
// hold the decomposition into small integers of N
|
|
1380
|
+
int ifac[IFAC_MAX_SIZE]; // N , number of factors, factors (admitted values:
|
|
1381
|
+
// 2, 3, 4 ou 5)
|
|
1382
|
+
pffft_transform_t transform;
|
|
1383
|
+
v4sf *data; // allocated room for twiddle coefs
|
|
1384
|
+
float *e; // points into 'data' , N/4*3 elements
|
|
1385
|
+
float *twiddle; // points into 'data', N/4 elements
|
|
1386
|
+
};
|
|
1387
|
+
|
|
1388
|
+
PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) {
|
|
1389
|
+
// validate N for negative values or potential int overflow
|
|
1390
|
+
if (N < 0) {
|
|
1391
|
+
return 0;
|
|
1392
|
+
}
|
|
1393
|
+
if (N > (1 << 26)) {
|
|
1394
|
+
// higher values of N will make you enter in the integer overflow world...
|
|
1395
|
+
assert(0);
|
|
1396
|
+
return 0;
|
|
1397
|
+
}
|
|
1398
|
+
PFFFT_Setup *s = (PFFFT_Setup *)malloc(sizeof(PFFFT_Setup));
|
|
1399
|
+
int k, m;
|
|
1400
|
+
/* unfortunately, the fft size must be a multiple of 16 for complex FFTs
|
|
1401
|
+
and 32 for real FFTs -- a lot of stuff would need to be rewritten to
|
|
1402
|
+
handle other cases (or maybe just switch to a scalar fft, I don't know..)
|
|
1403
|
+
*/
|
|
1404
|
+
if (transform == PFFFT_REAL) {
|
|
1405
|
+
assert((N % (2 * SIMD_SZ * SIMD_SZ)) == 0 && N > 0);
|
|
1406
|
+
}
|
|
1407
|
+
if (transform == PFFFT_COMPLEX) {
|
|
1408
|
+
assert((N % (SIMD_SZ * SIMD_SZ)) == 0 && N > 0);
|
|
1409
|
+
}
|
|
1410
|
+
// assert((N % 32) == 0);
|
|
1411
|
+
s->N = N;
|
|
1412
|
+
s->transform = transform;
|
|
1413
|
+
/* nb of complex simd vectors */
|
|
1414
|
+
s->Ncvec = (transform == PFFFT_REAL ? N / 2 : N) / SIMD_SZ;
|
|
1415
|
+
s->data = (v4sf *)pffft_aligned_malloc(2 * s->Ncvec * sizeof(v4sf));
|
|
1416
|
+
s->e = (float *)s->data;
|
|
1417
|
+
s->twiddle = (float *)(s->data + (2 * s->Ncvec * (SIMD_SZ - 1)) / SIMD_SZ);
|
|
1418
|
+
|
|
1419
|
+
for (k = 0; k < s->Ncvec; ++k) {
|
|
1420
|
+
int i = k / SIMD_SZ;
|
|
1421
|
+
int j = k % SIMD_SZ;
|
|
1422
|
+
for (m = 0; m < SIMD_SZ - 1; ++m) {
|
|
1423
|
+
float A = -2 * M_PI * (m + 1) * k / N;
|
|
1424
|
+
s->e[(2 * (i * 3 + m) + 0) * SIMD_SZ + j] = cos(A);
|
|
1425
|
+
s->e[(2 * (i * 3 + m) + 1) * SIMD_SZ + j] = sin(A);
|
|
1426
|
+
}
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
if (transform == PFFFT_REAL) {
|
|
1430
|
+
rffti1_ps(N / SIMD_SZ, s->twiddle, s->ifac);
|
|
1431
|
+
} else {
|
|
1432
|
+
cffti1_ps(N / SIMD_SZ, s->twiddle, s->ifac);
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
/* check that N is decomposable with allowed prime factors */
|
|
1436
|
+
for (k = 0, m = 1; k < s->ifac[1]; ++k) {
|
|
1437
|
+
m *= s->ifac[2 + k];
|
|
1438
|
+
}
|
|
1439
|
+
if (m != N / SIMD_SZ) {
|
|
1440
|
+
pffft_destroy_setup(s);
|
|
1441
|
+
s = 0;
|
|
1442
|
+
}
|
|
1443
|
+
|
|
1444
|
+
return s;
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
void pffft_destroy_setup(PFFFT_Setup *s) {
|
|
1448
|
+
pffft_aligned_free(s->data);
|
|
1449
|
+
free(s);
|
|
1450
|
+
}
|
|
1451
|
+
|
|
1452
|
+
#if !defined(PFFFT_SIMD_DISABLE)
|
|
1453
|
+
|
|
1454
|
+
/* [0 0 1 2 3 4 5 6 7 8] -> [0 8 7 6 5 4 3 2 1] */
|
|
1455
|
+
static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) {
|
|
1456
|
+
v4sf g0, g1;
|
|
1457
|
+
int k;
|
|
1458
|
+
INTERLEAVE2(in[0], in[1], g0, g1);
|
|
1459
|
+
in += in_stride;
|
|
1460
|
+
|
|
1461
|
+
*--out = VSWAPHL(g0, g1); // [g0l, g0h], [g1l g1h] -> [g1l, g0h]
|
|
1462
|
+
for (k = 1; k < N; ++k) {
|
|
1463
|
+
v4sf h0, h1;
|
|
1464
|
+
INTERLEAVE2(in[0], in[1], h0, h1);
|
|
1465
|
+
in += in_stride;
|
|
1466
|
+
*--out = VSWAPHL(g1, h0);
|
|
1467
|
+
*--out = VSWAPHL(h0, h1);
|
|
1468
|
+
g1 = h1;
|
|
1469
|
+
}
|
|
1470
|
+
*--out = VSWAPHL(g1, g0);
|
|
1471
|
+
}
|
|
1472
|
+
|
|
1473
|
+
static void unreversed_copy(int N, const v4sf *in, v4sf *out, int out_stride) {
|
|
1474
|
+
v4sf g0, g1, h0, h1;
|
|
1475
|
+
int k;
|
|
1476
|
+
g0 = g1 = in[0];
|
|
1477
|
+
++in;
|
|
1478
|
+
for (k = 1; k < N; ++k) {
|
|
1479
|
+
h0 = *in++;
|
|
1480
|
+
h1 = *in++;
|
|
1481
|
+
g1 = VSWAPHL(g1, h0);
|
|
1482
|
+
h0 = VSWAPHL(h0, h1);
|
|
1483
|
+
UNINTERLEAVE2(h0, g1, out[0], out[1]);
|
|
1484
|
+
out += out_stride;
|
|
1485
|
+
g1 = h1;
|
|
1486
|
+
}
|
|
1487
|
+
h0 = *in++;
|
|
1488
|
+
h1 = g0;
|
|
1489
|
+
g1 = VSWAPHL(g1, h0);
|
|
1490
|
+
h0 = VSWAPHL(h0, h1);
|
|
1491
|
+
UNINTERLEAVE2(h0, g1, out[0], out[1]);
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
void pffft_zreorder(PFFFT_Setup *setup, const float *in, float *out,
|
|
1495
|
+
pffft_direction_t direction) {
|
|
1496
|
+
int k, N = setup->N, Ncvec = setup->Ncvec;
|
|
1497
|
+
const v4sf *vin = (const v4sf *)in;
|
|
1498
|
+
v4sf *vout = (v4sf *)out;
|
|
1499
|
+
assert(in != out);
|
|
1500
|
+
if (setup->transform == PFFFT_REAL) {
|
|
1501
|
+
int k, dk = N / 32;
|
|
1502
|
+
if (direction == PFFFT_FORWARD) {
|
|
1503
|
+
for (k = 0; k < dk; ++k) {
|
|
1504
|
+
INTERLEAVE2(vin[k * 8 + 0], vin[k * 8 + 1], vout[2 * (0 * dk + k) + 0],
|
|
1505
|
+
vout[2 * (0 * dk + k) + 1]);
|
|
1506
|
+
INTERLEAVE2(vin[k * 8 + 4], vin[k * 8 + 5], vout[2 * (2 * dk + k) + 0],
|
|
1507
|
+
vout[2 * (2 * dk + k) + 1]);
|
|
1508
|
+
}
|
|
1509
|
+
reversed_copy(dk, vin + 2, 8, (v4sf *)(out + N / 2));
|
|
1510
|
+
reversed_copy(dk, vin + 6, 8, (v4sf *)(out + N));
|
|
1511
|
+
} else {
|
|
1512
|
+
for (k = 0; k < dk; ++k) {
|
|
1513
|
+
UNINTERLEAVE2(vin[2 * (0 * dk + k) + 0], vin[2 * (0 * dk + k) + 1],
|
|
1514
|
+
vout[k * 8 + 0], vout[k * 8 + 1]);
|
|
1515
|
+
UNINTERLEAVE2(vin[2 * (2 * dk + k) + 0], vin[2 * (2 * dk + k) + 1],
|
|
1516
|
+
vout[k * 8 + 4], vout[k * 8 + 5]);
|
|
1517
|
+
}
|
|
1518
|
+
unreversed_copy(dk, (v4sf *)(in + N / 4), (v4sf *)(out + N - 6 * SIMD_SZ),
|
|
1519
|
+
-8);
|
|
1520
|
+
unreversed_copy(dk, (v4sf *)(in + 3 * N / 4),
|
|
1521
|
+
(v4sf *)(out + N - 2 * SIMD_SZ), -8);
|
|
1522
|
+
}
|
|
1523
|
+
} else {
|
|
1524
|
+
if (direction == PFFFT_FORWARD) {
|
|
1525
|
+
for (k = 0; k < Ncvec; ++k) {
|
|
1526
|
+
int kk = (k / 4) + (k % 4) * (Ncvec / 4);
|
|
1527
|
+
INTERLEAVE2(vin[k * 2], vin[k * 2 + 1], vout[kk * 2], vout[kk * 2 + 1]);
|
|
1528
|
+
}
|
|
1529
|
+
} else {
|
|
1530
|
+
for (k = 0; k < Ncvec; ++k) {
|
|
1531
|
+
int kk = (k / 4) + (k % 4) * (Ncvec / 4);
|
|
1532
|
+
UNINTERLEAVE2(vin[kk * 2], vin[kk * 2 + 1], vout[k * 2],
|
|
1533
|
+
vout[k * 2 + 1]);
|
|
1534
|
+
}
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
void pffft_cplx_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
|
|
1540
|
+
int k, dk = Ncvec / SIMD_SZ; // number of 4x4 matrix blocks
|
|
1541
|
+
v4sf r0, i0, r1, i1, r2, i2, r3, i3;
|
|
1542
|
+
v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
|
|
1543
|
+
assert(in != out);
|
|
1544
|
+
for (k = 0; k < dk; ++k) {
|
|
1545
|
+
r0 = in[8 * k + 0];
|
|
1546
|
+
i0 = in[8 * k + 1];
|
|
1547
|
+
r1 = in[8 * k + 2];
|
|
1548
|
+
i1 = in[8 * k + 3];
|
|
1549
|
+
r2 = in[8 * k + 4];
|
|
1550
|
+
i2 = in[8 * k + 5];
|
|
1551
|
+
r3 = in[8 * k + 6];
|
|
1552
|
+
i3 = in[8 * k + 7];
|
|
1553
|
+
VTRANSPOSE4(r0, r1, r2, r3);
|
|
1554
|
+
VTRANSPOSE4(i0, i1, i2, i3);
|
|
1555
|
+
VCPLXMUL(r1, i1, e[k * 6 + 0], e[k * 6 + 1]);
|
|
1556
|
+
VCPLXMUL(r2, i2, e[k * 6 + 2], e[k * 6 + 3]);
|
|
1557
|
+
VCPLXMUL(r3, i3, e[k * 6 + 4], e[k * 6 + 5]);
|
|
1558
|
+
|
|
1559
|
+
sr0 = VADD(r0, r2);
|
|
1560
|
+
dr0 = VSUB(r0, r2);
|
|
1561
|
+
sr1 = VADD(r1, r3);
|
|
1562
|
+
dr1 = VSUB(r1, r3);
|
|
1563
|
+
si0 = VADD(i0, i2);
|
|
1564
|
+
di0 = VSUB(i0, i2);
|
|
1565
|
+
si1 = VADD(i1, i3);
|
|
1566
|
+
di1 = VSUB(i1, i3);
|
|
1567
|
+
|
|
1568
|
+
/*
|
|
1569
|
+
transformation for each column is:
|
|
1570
|
+
|
|
1571
|
+
[1 1 1 1 0 0 0 0] [r0]
|
|
1572
|
+
[1 0 -1 0 0 -1 0 1] [r1]
|
|
1573
|
+
[1 -1 1 -1 0 0 0 0] [r2]
|
|
1574
|
+
[1 0 -1 0 0 1 0 -1] [r3]
|
|
1575
|
+
[0 0 0 0 1 1 1 1] * [i0]
|
|
1576
|
+
[0 1 0 -1 1 0 -1 0] [i1]
|
|
1577
|
+
[0 0 0 0 1 -1 1 -1] [i2]
|
|
1578
|
+
[0 -1 0 1 1 0 -1 0] [i3]
|
|
1579
|
+
*/
|
|
1580
|
+
|
|
1581
|
+
r0 = VADD(sr0, sr1);
|
|
1582
|
+
i0 = VADD(si0, si1);
|
|
1583
|
+
r1 = VADD(dr0, di1);
|
|
1584
|
+
i1 = VSUB(di0, dr1);
|
|
1585
|
+
r2 = VSUB(sr0, sr1);
|
|
1586
|
+
i2 = VSUB(si0, si1);
|
|
1587
|
+
r3 = VSUB(dr0, di1);
|
|
1588
|
+
i3 = VADD(di0, dr1);
|
|
1589
|
+
|
|
1590
|
+
*out++ = r0;
|
|
1591
|
+
*out++ = i0;
|
|
1592
|
+
*out++ = r1;
|
|
1593
|
+
*out++ = i1;
|
|
1594
|
+
*out++ = r2;
|
|
1595
|
+
*out++ = i2;
|
|
1596
|
+
*out++ = r3;
|
|
1597
|
+
*out++ = i3;
|
|
1598
|
+
}
|
|
1599
|
+
}
|
|
1600
|
+
|
|
1601
|
+
void pffft_cplx_preprocess(int Ncvec, const v4sf *in, v4sf *out,
|
|
1602
|
+
const v4sf *e) {
|
|
1603
|
+
int k, dk = Ncvec / SIMD_SZ; // number of 4x4 matrix blocks
|
|
1604
|
+
v4sf r0, i0, r1, i1, r2, i2, r3, i3;
|
|
1605
|
+
v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
|
|
1606
|
+
assert(in != out);
|
|
1607
|
+
for (k = 0; k < dk; ++k) {
|
|
1608
|
+
r0 = in[8 * k + 0];
|
|
1609
|
+
i0 = in[8 * k + 1];
|
|
1610
|
+
r1 = in[8 * k + 2];
|
|
1611
|
+
i1 = in[8 * k + 3];
|
|
1612
|
+
r2 = in[8 * k + 4];
|
|
1613
|
+
i2 = in[8 * k + 5];
|
|
1614
|
+
r3 = in[8 * k + 6];
|
|
1615
|
+
i3 = in[8 * k + 7];
|
|
1616
|
+
|
|
1617
|
+
sr0 = VADD(r0, r2);
|
|
1618
|
+
dr0 = VSUB(r0, r2);
|
|
1619
|
+
sr1 = VADD(r1, r3);
|
|
1620
|
+
dr1 = VSUB(r1, r3);
|
|
1621
|
+
si0 = VADD(i0, i2);
|
|
1622
|
+
di0 = VSUB(i0, i2);
|
|
1623
|
+
si1 = VADD(i1, i3);
|
|
1624
|
+
di1 = VSUB(i1, i3);
|
|
1625
|
+
|
|
1626
|
+
r0 = VADD(sr0, sr1);
|
|
1627
|
+
i0 = VADD(si0, si1);
|
|
1628
|
+
r1 = VSUB(dr0, di1);
|
|
1629
|
+
i1 = VADD(di0, dr1);
|
|
1630
|
+
r2 = VSUB(sr0, sr1);
|
|
1631
|
+
i2 = VSUB(si0, si1);
|
|
1632
|
+
r3 = VADD(dr0, di1);
|
|
1633
|
+
i3 = VSUB(di0, dr1);
|
|
1634
|
+
|
|
1635
|
+
VCPLXMULCONJ(r1, i1, e[k * 6 + 0], e[k * 6 + 1]);
|
|
1636
|
+
VCPLXMULCONJ(r2, i2, e[k * 6 + 2], e[k * 6 + 3]);
|
|
1637
|
+
VCPLXMULCONJ(r3, i3, e[k * 6 + 4], e[k * 6 + 5]);
|
|
1638
|
+
|
|
1639
|
+
VTRANSPOSE4(r0, r1, r2, r3);
|
|
1640
|
+
VTRANSPOSE4(i0, i1, i2, i3);
|
|
1641
|
+
|
|
1642
|
+
*out++ = r0;
|
|
1643
|
+
*out++ = i0;
|
|
1644
|
+
*out++ = r1;
|
|
1645
|
+
*out++ = i1;
|
|
1646
|
+
*out++ = r2;
|
|
1647
|
+
*out++ = i2;
|
|
1648
|
+
*out++ = r3;
|
|
1649
|
+
*out++ = i3;
|
|
1650
|
+
}
|
|
1651
|
+
}
|
|
1652
|
+
|
|
1653
|
+
static ALWAYS_INLINE(void)
|
|
1654
|
+
pffft_real_finalize_4x4(const v4sf *in0, const v4sf *in1, const v4sf *in,
|
|
1655
|
+
const v4sf *e, v4sf *out) {
|
|
1656
|
+
v4sf r0, i0, r1, i1, r2, i2, r3, i3;
|
|
1657
|
+
v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
|
|
1658
|
+
r0 = *in0;
|
|
1659
|
+
i0 = *in1;
|
|
1660
|
+
r1 = *in++;
|
|
1661
|
+
i1 = *in++;
|
|
1662
|
+
r2 = *in++;
|
|
1663
|
+
i2 = *in++;
|
|
1664
|
+
r3 = *in++;
|
|
1665
|
+
i3 = *in++;
|
|
1666
|
+
VTRANSPOSE4(r0, r1, r2, r3);
|
|
1667
|
+
VTRANSPOSE4(i0, i1, i2, i3);
|
|
1668
|
+
|
|
1669
|
+
/*
|
|
1670
|
+
transformation for each column is:
|
|
1671
|
+
|
|
1672
|
+
[1 1 1 1 0 0 0 0] [r0]
|
|
1673
|
+
[1 0 -1 0 0 -1 0 1] [r1]
|
|
1674
|
+
[1 0 -1 0 0 1 0 -1] [r2]
|
|
1675
|
+
[1 -1 1 -1 0 0 0 0] [r3]
|
|
1676
|
+
[0 0 0 0 1 1 1 1] * [i0]
|
|
1677
|
+
[0 -1 0 1 -1 0 1 0] [i1]
|
|
1678
|
+
[0 -1 0 1 1 0 -1 0] [i2]
|
|
1679
|
+
[0 0 0 0 -1 1 -1 1] [i3]
|
|
1680
|
+
*/
|
|
1681
|
+
|
|
1682
|
+
// cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 <<
|
|
1683
|
+
// "\n 1: " << r2 << "\n 1: " << r3 << "\n"; cerr << "matrix initial, before
|
|
1684
|
+
// e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " <<
|
|
1685
|
+
// i3 << "\n";
|
|
1686
|
+
|
|
1687
|
+
VCPLXMUL(r1, i1, e[0], e[1]);
|
|
1688
|
+
VCPLXMUL(r2, i2, e[2], e[3]);
|
|
1689
|
+
VCPLXMUL(r3, i3, e[4], e[5]);
|
|
1690
|
+
|
|
1691
|
+
// cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n
|
|
1692
|
+
// 1: " << r2 << "\n 1: " << r3 << "\n"; cerr << "matrix initial, imag part:\n
|
|
1693
|
+
// 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n";
|
|
1694
|
+
|
|
1695
|
+
sr0 = VADD(r0, r2);
|
|
1696
|
+
dr0 = VSUB(r0, r2);
|
|
1697
|
+
sr1 = VADD(r1, r3);
|
|
1698
|
+
dr1 = VSUB(r3, r1);
|
|
1699
|
+
si0 = VADD(i0, i2);
|
|
1700
|
+
di0 = VSUB(i0, i2);
|
|
1701
|
+
si1 = VADD(i1, i3);
|
|
1702
|
+
di1 = VSUB(i3, i1);
|
|
1703
|
+
|
|
1704
|
+
r0 = VADD(sr0, sr1);
|
|
1705
|
+
r3 = VSUB(sr0, sr1);
|
|
1706
|
+
i0 = VADD(si0, si1);
|
|
1707
|
+
i3 = VSUB(si1, si0);
|
|
1708
|
+
r1 = VADD(dr0, di1);
|
|
1709
|
+
r2 = VSUB(dr0, di1);
|
|
1710
|
+
i1 = VSUB(dr1, di0);
|
|
1711
|
+
i2 = VADD(dr1, di0);
|
|
1712
|
+
|
|
1713
|
+
*out++ = r0;
|
|
1714
|
+
*out++ = i0;
|
|
1715
|
+
*out++ = r1;
|
|
1716
|
+
*out++ = i1;
|
|
1717
|
+
*out++ = r2;
|
|
1718
|
+
*out++ = i2;
|
|
1719
|
+
*out++ = r3;
|
|
1720
|
+
*out++ = i3;
|
|
1721
|
+
}
|
|
1722
|
+
|
|
1723
|
+
static NEVER_INLINE(void)
|
|
1724
|
+
pffft_real_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
|
|
1725
|
+
int k, dk = Ncvec / SIMD_SZ; // number of 4x4 matrix blocks
|
|
1726
|
+
/* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
|
|
1727
|
+
|
|
1728
|
+
v4sf_union cr, ci, *uout = (v4sf_union *)out;
|
|
1729
|
+
v4sf save = in[7], zero = VZERO();
|
|
1730
|
+
float xr0, xi0, xr1, xi1, xr2, xi2, xr3, xi3;
|
|
1731
|
+
static const float s = (float)(M_SQRT2 / 2);
|
|
1732
|
+
|
|
1733
|
+
cr.v = in[0];
|
|
1734
|
+
ci.v = in[Ncvec * 2 - 1];
|
|
1735
|
+
assert(in != out);
|
|
1736
|
+
pffft_real_finalize_4x4(&zero, &zero, in + 1, e, out);
|
|
1737
|
+
|
|
1738
|
+
/*
|
|
1739
|
+
[cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3]
|
|
1740
|
+
|
|
1741
|
+
[Xr(1)] ] [1 1 1 1 0 0 0 0]
|
|
1742
|
+
[Xr(N/4) ] [0 0 0 0 1 s 0 -s]
|
|
1743
|
+
[Xr(N/2) ] [1 0 -1 0 0 0 0 0]
|
|
1744
|
+
[Xr(3N/4)] [0 0 0 0 1 -s 0 s]
|
|
1745
|
+
[Xi(1) ] [1 -1 1 -1 0 0 0 0]
|
|
1746
|
+
[Xi(N/4) ] [0 0 0 0 0 -s -1 -s]
|
|
1747
|
+
[Xi(N/2) ] [0 -1 0 1 0 0 0 0]
|
|
1748
|
+
[Xi(3N/4)] [0 0 0 0 0 -s 1 -s]
|
|
1749
|
+
*/
|
|
1750
|
+
|
|
1751
|
+
xr0 = (cr.f[0] + cr.f[2]) + (cr.f[1] + cr.f[3]);
|
|
1752
|
+
uout[0].f[0] = xr0;
|
|
1753
|
+
xi0 = (cr.f[0] + cr.f[2]) - (cr.f[1] + cr.f[3]);
|
|
1754
|
+
uout[1].f[0] = xi0;
|
|
1755
|
+
xr2 = (cr.f[0] - cr.f[2]);
|
|
1756
|
+
uout[4].f[0] = xr2;
|
|
1757
|
+
xi2 = (cr.f[3] - cr.f[1]);
|
|
1758
|
+
uout[5].f[0] = xi2;
|
|
1759
|
+
xr1 = ci.f[0] + s * (ci.f[1] - ci.f[3]);
|
|
1760
|
+
uout[2].f[0] = xr1;
|
|
1761
|
+
xi1 = -ci.f[2] - s * (ci.f[1] + ci.f[3]);
|
|
1762
|
+
uout[3].f[0] = xi1;
|
|
1763
|
+
xr3 = ci.f[0] - s * (ci.f[1] - ci.f[3]);
|
|
1764
|
+
uout[6].f[0] = xr3;
|
|
1765
|
+
xi3 = ci.f[2] - s * (ci.f[1] + ci.f[3]);
|
|
1766
|
+
uout[7].f[0] = xi3;
|
|
1767
|
+
|
|
1768
|
+
for (k = 1; k < dk; ++k) {
|
|
1769
|
+
v4sf save_next = in[8 * k + 7];
|
|
1770
|
+
pffft_real_finalize_4x4(&save, &in[8 * k + 0], in + 8 * k + 1, e + k * 6,
|
|
1771
|
+
out + k * 8);
|
|
1772
|
+
save = save_next;
|
|
1773
|
+
}
|
|
1774
|
+
}
|
|
1775
|
+
|
|
1776
|
+
static ALWAYS_INLINE(void)
|
|
1777
|
+
pffft_real_preprocess_4x4(const v4sf *in, const v4sf *e, v4sf *out,
|
|
1778
|
+
int first) {
|
|
1779
|
+
v4sf r0 = in[0], i0 = in[1], r1 = in[2], i1 = in[3], r2 = in[4], i2 = in[5],
|
|
1780
|
+
r3 = in[6], i3 = in[7];
|
|
1781
|
+
/*
|
|
1782
|
+
transformation for each column is:
|
|
1783
|
+
|
|
1784
|
+
[1 1 1 1 0 0 0 0] [r0]
|
|
1785
|
+
[1 0 0 -1 0 -1 -1 0] [r1]
|
|
1786
|
+
[1 -1 -1 1 0 0 0 0] [r2]
|
|
1787
|
+
[1 0 0 -1 0 1 1 0] [r3]
|
|
1788
|
+
[0 0 0 0 1 -1 1 -1] * [i0]
|
|
1789
|
+
[0 -1 1 0 1 0 0 1] [i1]
|
|
1790
|
+
[0 0 0 0 1 1 -1 -1] [i2]
|
|
1791
|
+
[0 1 -1 0 1 0 0 1] [i3]
|
|
1792
|
+
*/
|
|
1793
|
+
|
|
1794
|
+
v4sf sr0 = VADD(r0, r3), dr0 = VSUB(r0, r3);
|
|
1795
|
+
v4sf sr1 = VADD(r1, r2), dr1 = VSUB(r1, r2);
|
|
1796
|
+
v4sf si0 = VADD(i0, i3), di0 = VSUB(i0, i3);
|
|
1797
|
+
v4sf si1 = VADD(i1, i2), di1 = VSUB(i1, i2);
|
|
1798
|
+
|
|
1799
|
+
r0 = VADD(sr0, sr1);
|
|
1800
|
+
r2 = VSUB(sr0, sr1);
|
|
1801
|
+
r1 = VSUB(dr0, si1);
|
|
1802
|
+
r3 = VADD(dr0, si1);
|
|
1803
|
+
i0 = VSUB(di0, di1);
|
|
1804
|
+
i2 = VADD(di0, di1);
|
|
1805
|
+
i1 = VSUB(si0, dr1);
|
|
1806
|
+
i3 = VADD(si0, dr1);
|
|
1807
|
+
|
|
1808
|
+
VCPLXMULCONJ(r1, i1, e[0], e[1]);
|
|
1809
|
+
VCPLXMULCONJ(r2, i2, e[2], e[3]);
|
|
1810
|
+
VCPLXMULCONJ(r3, i3, e[4], e[5]);
|
|
1811
|
+
|
|
1812
|
+
VTRANSPOSE4(r0, r1, r2, r3);
|
|
1813
|
+
VTRANSPOSE4(i0, i1, i2, i3);
|
|
1814
|
+
|
|
1815
|
+
if (!first) {
|
|
1816
|
+
*out++ = r0;
|
|
1817
|
+
*out++ = i0;
|
|
1818
|
+
}
|
|
1819
|
+
*out++ = r1;
|
|
1820
|
+
*out++ = i1;
|
|
1821
|
+
*out++ = r2;
|
|
1822
|
+
*out++ = i2;
|
|
1823
|
+
*out++ = r3;
|
|
1824
|
+
*out++ = i3;
|
|
1825
|
+
}
|
|
1826
|
+
|
|
1827
|
+
static NEVER_INLINE(void)
|
|
1828
|
+
pffft_real_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
|
|
1829
|
+
int k, dk = Ncvec / SIMD_SZ; // number of 4x4 matrix blocks
|
|
1830
|
+
/* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
|
|
1831
|
+
|
|
1832
|
+
v4sf_union Xr, Xi, *uout = (v4sf_union *)out;
|
|
1833
|
+
float cr0, ci0, cr1, ci1, cr2, ci2, cr3, ci3;
|
|
1834
|
+
static const float s = (float)M_SQRT2;
|
|
1835
|
+
assert(in != out);
|
|
1836
|
+
for (k = 0; k < 4; ++k) {
|
|
1837
|
+
Xr.f[k] = ((float *)in)[8 * k];
|
|
1838
|
+
Xi.f[k] = ((float *)in)[8 * k + 4];
|
|
1839
|
+
}
|
|
1840
|
+
|
|
1841
|
+
pffft_real_preprocess_4x4(in, e, out + 1, 1); // will write only 6 values
|
|
1842
|
+
|
|
1843
|
+
/*
|
|
1844
|
+
[Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3]
|
|
1845
|
+
|
|
1846
|
+
[cr0] [1 0 2 0 1 0 0 0]
|
|
1847
|
+
[cr1] [1 0 0 0 -1 0 -2 0]
|
|
1848
|
+
[cr2] [1 0 -2 0 1 0 0 0]
|
|
1849
|
+
[cr3] [1 0 0 0 -1 0 2 0]
|
|
1850
|
+
[ci0] [0 2 0 2 0 0 0 0]
|
|
1851
|
+
[ci1] [0 s 0 -s 0 -s 0 -s]
|
|
1852
|
+
[ci2] [0 0 0 0 0 -2 0 2]
|
|
1853
|
+
[ci3] [0 -s 0 s 0 -s 0 -s]
|
|
1854
|
+
*/
|
|
1855
|
+
for (k = 1; k < dk; ++k) {
|
|
1856
|
+
pffft_real_preprocess_4x4(in + 8 * k, e + k * 6, out - 1 + k * 8, 0);
|
|
1857
|
+
}
|
|
1858
|
+
|
|
1859
|
+
cr0 = (Xr.f[0] + Xi.f[0]) + 2 * Xr.f[2];
|
|
1860
|
+
uout[0].f[0] = cr0;
|
|
1861
|
+
cr1 = (Xr.f[0] - Xi.f[0]) - 2 * Xi.f[2];
|
|
1862
|
+
uout[0].f[1] = cr1;
|
|
1863
|
+
cr2 = (Xr.f[0] + Xi.f[0]) - 2 * Xr.f[2];
|
|
1864
|
+
uout[0].f[2] = cr2;
|
|
1865
|
+
cr3 = (Xr.f[0] - Xi.f[0]) + 2 * Xi.f[2];
|
|
1866
|
+
uout[0].f[3] = cr3;
|
|
1867
|
+
ci0 = 2 * (Xr.f[1] + Xr.f[3]);
|
|
1868
|
+
uout[2 * Ncvec - 1].f[0] = ci0;
|
|
1869
|
+
ci1 = s * (Xr.f[1] - Xr.f[3]) - s * (Xi.f[1] + Xi.f[3]);
|
|
1870
|
+
uout[2 * Ncvec - 1].f[1] = ci1;
|
|
1871
|
+
ci2 = 2 * (Xi.f[3] - Xi.f[1]);
|
|
1872
|
+
uout[2 * Ncvec - 1].f[2] = ci2;
|
|
1873
|
+
ci3 = -s * (Xr.f[1] - Xr.f[3]) - s * (Xi.f[1] + Xi.f[3]);
|
|
1874
|
+
uout[2 * Ncvec - 1].f[3] = ci3;
|
|
1875
|
+
}
|
|
1876
|
+
|
|
1877
|
+
void pffft_transform_internal(PFFFT_Setup *setup, const float *finput,
|
|
1878
|
+
float *foutput, v4sf *scratch,
|
|
1879
|
+
pffft_direction_t direction, int ordered) {
|
|
1880
|
+
int k, Ncvec = setup->Ncvec;
|
|
1881
|
+
int nf_odd = (setup->ifac[1] & 1);
|
|
1882
|
+
|
|
1883
|
+
// temporary buffer is allocated on the stack if the scratch pointer is NULL
|
|
1884
|
+
int stack_allocate = (scratch == 0 ? Ncvec * 2 : 1);
|
|
1885
|
+
VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate);
|
|
1886
|
+
|
|
1887
|
+
const v4sf *vinput = (const v4sf *)finput;
|
|
1888
|
+
v4sf *voutput = (v4sf *)foutput;
|
|
1889
|
+
v4sf *buff[2] = {voutput, scratch ? scratch : scratch_on_stack};
|
|
1890
|
+
int ib = (nf_odd ^ ordered ? 1 : 0);
|
|
1891
|
+
|
|
1892
|
+
assert(VALIGNED(finput) && VALIGNED(foutput));
|
|
1893
|
+
|
|
1894
|
+
// assert(finput != foutput);
|
|
1895
|
+
if (direction == PFFFT_FORWARD) {
|
|
1896
|
+
ib = !ib;
|
|
1897
|
+
if (setup->transform == PFFFT_REAL) {
|
|
1898
|
+
ib = (rfftf1_ps(Ncvec * 2, vinput, buff[ib], buff[!ib], setup->twiddle,
|
|
1899
|
+
&setup->ifac[0]) == buff[0]
|
|
1900
|
+
? 0
|
|
1901
|
+
: 1);
|
|
1902
|
+
pffft_real_finalize(Ncvec, buff[ib], buff[!ib], (v4sf *)setup->e);
|
|
1903
|
+
} else {
|
|
1904
|
+
v4sf *tmp = buff[ib];
|
|
1905
|
+
for (k = 0; k < Ncvec; ++k) {
|
|
1906
|
+
UNINTERLEAVE2(vinput[k * 2], vinput[k * 2 + 1], tmp[k * 2],
|
|
1907
|
+
tmp[k * 2 + 1]);
|
|
1908
|
+
}
|
|
1909
|
+
ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib], setup->twiddle,
|
|
1910
|
+
&setup->ifac[0], -1) == buff[0]
|
|
1911
|
+
? 0
|
|
1912
|
+
: 1);
|
|
1913
|
+
pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], (v4sf *)setup->e);
|
|
1914
|
+
}
|
|
1915
|
+
if (ordered) {
|
|
1916
|
+
pffft_zreorder(setup, (float *)buff[!ib], (float *)buff[ib],
|
|
1917
|
+
PFFFT_FORWARD);
|
|
1918
|
+
} else
|
|
1919
|
+
ib = !ib;
|
|
1920
|
+
} else {
|
|
1921
|
+
if (vinput == buff[ib]) {
|
|
1922
|
+
ib = !ib; // may happen when finput == foutput
|
|
1923
|
+
}
|
|
1924
|
+
if (ordered) {
|
|
1925
|
+
pffft_zreorder(setup, (float *)vinput, (float *)buff[ib], PFFFT_BACKWARD);
|
|
1926
|
+
vinput = buff[ib];
|
|
1927
|
+
ib = !ib;
|
|
1928
|
+
}
|
|
1929
|
+
if (setup->transform == PFFFT_REAL) {
|
|
1930
|
+
pffft_real_preprocess(Ncvec, vinput, buff[ib], (v4sf *)setup->e);
|
|
1931
|
+
ib = (rfftb1_ps(Ncvec * 2, buff[ib], buff[0], buff[1], setup->twiddle,
|
|
1932
|
+
&setup->ifac[0]) == buff[0]
|
|
1933
|
+
? 0
|
|
1934
|
+
: 1);
|
|
1935
|
+
} else {
|
|
1936
|
+
pffft_cplx_preprocess(Ncvec, vinput, buff[ib], (v4sf *)setup->e);
|
|
1937
|
+
ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1], setup->twiddle,
|
|
1938
|
+
&setup->ifac[0], +1) == buff[0]
|
|
1939
|
+
? 0
|
|
1940
|
+
: 1);
|
|
1941
|
+
for (k = 0; k < Ncvec; ++k) {
|
|
1942
|
+
INTERLEAVE2(buff[ib][k * 2], buff[ib][k * 2 + 1], buff[ib][k * 2],
|
|
1943
|
+
buff[ib][k * 2 + 1]);
|
|
1944
|
+
}
|
|
1945
|
+
}
|
|
1946
|
+
}
|
|
1947
|
+
|
|
1948
|
+
if (buff[ib] != voutput) {
|
|
1949
|
+
/* extra copy required -- this situation should only happen when finput ==
|
|
1950
|
+
* foutput */
|
|
1951
|
+
assert(finput == foutput);
|
|
1952
|
+
for (k = 0; k < Ncvec; ++k) {
|
|
1953
|
+
v4sf a = buff[ib][2 * k], b = buff[ib][2 * k + 1];
|
|
1954
|
+
voutput[2 * k] = a;
|
|
1955
|
+
voutput[2 * k + 1] = b;
|
|
1956
|
+
}
|
|
1957
|
+
ib = !ib;
|
|
1958
|
+
}
|
|
1959
|
+
assert(buff[ib] == voutput);
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b,
|
|
1963
|
+
float *ab, float scaling) {
|
|
1964
|
+
int Ncvec = s->Ncvec;
|
|
1965
|
+
const v4sf *RESTRICT va = (const v4sf *)a;
|
|
1966
|
+
const v4sf *RESTRICT vb = (const v4sf *)b;
|
|
1967
|
+
v4sf *RESTRICT vab = (v4sf *)ab;
|
|
1968
|
+
|
|
1969
|
+
#ifdef __arm__
|
|
1970
|
+
__builtin_prefetch(va);
|
|
1971
|
+
__builtin_prefetch(vb);
|
|
1972
|
+
__builtin_prefetch(vab);
|
|
1973
|
+
__builtin_prefetch(va + 2);
|
|
1974
|
+
__builtin_prefetch(vb + 2);
|
|
1975
|
+
__builtin_prefetch(vab + 2);
|
|
1976
|
+
__builtin_prefetch(va + 4);
|
|
1977
|
+
__builtin_prefetch(vb + 4);
|
|
1978
|
+
__builtin_prefetch(vab + 4);
|
|
1979
|
+
__builtin_prefetch(va + 6);
|
|
1980
|
+
__builtin_prefetch(vb + 6);
|
|
1981
|
+
__builtin_prefetch(vab + 6);
|
|
1982
|
+
#ifndef __clang__
|
|
1983
|
+
#define ZCONVOLVE_USING_INLINE_NEON_ASM
|
|
1984
|
+
#endif
|
|
1985
|
+
#endif
|
|
1986
|
+
|
|
1987
|
+
float ar0, ai0, br0, bi0, abr0, abi0;
|
|
1988
|
+
#ifndef ZCONVOLVE_USING_INLINE_ASM
|
|
1989
|
+
v4sf vscal = LD_PS1(scaling);
|
|
1990
|
+
int i;
|
|
1991
|
+
#endif
|
|
1992
|
+
|
|
1993
|
+
assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
|
|
1994
|
+
ar0 = ((v4sf_union *)va)[0].f[0];
|
|
1995
|
+
ai0 = ((v4sf_union *)va)[1].f[0];
|
|
1996
|
+
br0 = ((v4sf_union *)vb)[0].f[0];
|
|
1997
|
+
bi0 = ((v4sf_union *)vb)[1].f[0];
|
|
1998
|
+
abr0 = ((v4sf_union *)vab)[0].f[0];
|
|
1999
|
+
abi0 = ((v4sf_union *)vab)[1].f[0];
|
|
2000
|
+
|
|
2001
|
+
#ifdef ZCONVOLVE_USING_INLINE_ASM // inline asm version, unfortunately
|
|
2002
|
+
// miscompiled by clang 3.2, at least on
|
|
2003
|
+
// ubuntu.. so this will be restricted to gcc
|
|
2004
|
+
const float *a_ = a, *b_ = b;
|
|
2005
|
+
float *ab_ = ab;
|
|
2006
|
+
int N = Ncvec;
|
|
2007
|
+
asm volatile("mov r8, %2 \n"
|
|
2008
|
+
"vdup.f32 q15, %4 \n"
|
|
2009
|
+
"1: \n"
|
|
2010
|
+
"pld [%0,#64] \n"
|
|
2011
|
+
"pld [%1,#64] \n"
|
|
2012
|
+
"pld [%2,#64] \n"
|
|
2013
|
+
"pld [%0,#96] \n"
|
|
2014
|
+
"pld [%1,#96] \n"
|
|
2015
|
+
"pld [%2,#96] \n"
|
|
2016
|
+
"vld1.f32 {q0,q1}, [%0,:128]! \n"
|
|
2017
|
+
"vld1.f32 {q4,q5}, [%1,:128]! \n"
|
|
2018
|
+
"vld1.f32 {q2,q3}, [%0,:128]! \n"
|
|
2019
|
+
"vld1.f32 {q6,q7}, [%1,:128]! \n"
|
|
2020
|
+
"vld1.f32 {q8,q9}, [r8,:128]! \n"
|
|
2021
|
+
|
|
2022
|
+
"vmul.f32 q10, q0, q4 \n"
|
|
2023
|
+
"vmul.f32 q11, q0, q5 \n"
|
|
2024
|
+
"vmul.f32 q12, q2, q6 \n"
|
|
2025
|
+
"vmul.f32 q13, q2, q7 \n"
|
|
2026
|
+
"vmls.f32 q10, q1, q5 \n"
|
|
2027
|
+
"vmla.f32 q11, q1, q4 \n"
|
|
2028
|
+
"vld1.f32 {q0,q1}, [r8,:128]! \n"
|
|
2029
|
+
"vmls.f32 q12, q3, q7 \n"
|
|
2030
|
+
"vmla.f32 q13, q3, q6 \n"
|
|
2031
|
+
"vmla.f32 q8, q10, q15 \n"
|
|
2032
|
+
"vmla.f32 q9, q11, q15 \n"
|
|
2033
|
+
"vmla.f32 q0, q12, q15 \n"
|
|
2034
|
+
"vmla.f32 q1, q13, q15 \n"
|
|
2035
|
+
"vst1.f32 {q8,q9},[%2,:128]! \n"
|
|
2036
|
+
"vst1.f32 {q0,q1},[%2,:128]! \n"
|
|
2037
|
+
"subs %3, #2 \n"
|
|
2038
|
+
"bne 1b \n"
|
|
2039
|
+
: "+r"(a_), "+r"(b_), "+r"(ab_), "+r"(N)
|
|
2040
|
+
: "r"(scaling)
|
|
2041
|
+
: "r8", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
|
|
2042
|
+
"q9", "q10", "q11", "q12", "q13", "q15", "memory");
|
|
2043
|
+
#else // default routine, works fine for non-arm cpus with current compilers
|
|
2044
|
+
for (i = 0; i < Ncvec; i += 2) {
|
|
2045
|
+
v4sf ar, ai, br, bi;
|
|
2046
|
+
ar = va[2 * i + 0];
|
|
2047
|
+
ai = va[2 * i + 1];
|
|
2048
|
+
br = vb[2 * i + 0];
|
|
2049
|
+
bi = vb[2 * i + 1];
|
|
2050
|
+
VCPLXMUL(ar, ai, br, bi);
|
|
2051
|
+
vab[2 * i + 0] = VMADD(ar, vscal, vab[2 * i + 0]);
|
|
2052
|
+
vab[2 * i + 1] = VMADD(ai, vscal, vab[2 * i + 1]);
|
|
2053
|
+
ar = va[2 * i + 2];
|
|
2054
|
+
ai = va[2 * i + 3];
|
|
2055
|
+
br = vb[2 * i + 2];
|
|
2056
|
+
bi = vb[2 * i + 3];
|
|
2057
|
+
VCPLXMUL(ar, ai, br, bi);
|
|
2058
|
+
vab[2 * i + 2] = VMADD(ar, vscal, vab[2 * i + 2]);
|
|
2059
|
+
vab[2 * i + 3] = VMADD(ai, vscal, vab[2 * i + 3]);
|
|
2060
|
+
}
|
|
2061
|
+
#endif
|
|
2062
|
+
if (s->transform == PFFFT_REAL) {
|
|
2063
|
+
((v4sf_union *)vab)[0].f[0] = abr0 + ar0 * br0 * scaling;
|
|
2064
|
+
((v4sf_union *)vab)[1].f[0] = abi0 + ai0 * bi0 * scaling;
|
|
2065
|
+
}
|
|
2066
|
+
}
|
|
2067
|
+
|
|
2068
|
+
#else // defined(PFFFT_SIMD_DISABLE)
|
|
2069
|
+
|
|
2070
|
+
// standard routine using scalar floats, without SIMD stuff.
|
|
2071
|
+
|
|
2072
|
+
#define pffft_zreorder_nosimd pffft_zreorder
|
|
2073
|
+
void pffft_zreorder_nosimd(PFFFT_Setup *setup, const float *in, float *out,
|
|
2074
|
+
pffft_direction_t direction) {
|
|
2075
|
+
int k, N = setup->N;
|
|
2076
|
+
if (setup->transform == PFFFT_COMPLEX) {
|
|
2077
|
+
for (k = 0; k < 2 * N; ++k)
|
|
2078
|
+
out[k] = in[k];
|
|
2079
|
+
return;
|
|
2080
|
+
} else if (direction == PFFFT_FORWARD) {
|
|
2081
|
+
float x_N = in[N - 1];
|
|
2082
|
+
for (k = N - 1; k > 1; --k)
|
|
2083
|
+
out[k] = in[k - 1];
|
|
2084
|
+
out[0] = in[0];
|
|
2085
|
+
out[1] = x_N;
|
|
2086
|
+
} else {
|
|
2087
|
+
float x_N = in[1];
|
|
2088
|
+
for (k = 1; k < N - 1; ++k)
|
|
2089
|
+
out[k] = in[k + 1];
|
|
2090
|
+
out[0] = in[0];
|
|
2091
|
+
out[N - 1] = x_N;
|
|
2092
|
+
}
|
|
2093
|
+
}
|
|
2094
|
+
|
|
2095
|
+
#define pffft_transform_internal_nosimd pffft_transform_internal
|
|
2096
|
+
void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input,
|
|
2097
|
+
float *output, float *scratch,
|
|
2098
|
+
pffft_direction_t direction, int ordered) {
|
|
2099
|
+
int Ncvec = setup->Ncvec;
|
|
2100
|
+
int nf_odd = (setup->ifac[1] & 1);
|
|
2101
|
+
|
|
2102
|
+
// temporary buffer is allocated on the stack if the scratch pointer is NULL
|
|
2103
|
+
int stack_allocate = (scratch == 0 ? Ncvec * 2 : 1);
|
|
2104
|
+
VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate);
|
|
2105
|
+
float *buff[2];
|
|
2106
|
+
int ib;
|
|
2107
|
+
if (scratch == 0)
|
|
2108
|
+
scratch = scratch_on_stack;
|
|
2109
|
+
buff[0] = output;
|
|
2110
|
+
buff[1] = scratch;
|
|
2111
|
+
|
|
2112
|
+
if (setup->transform == PFFFT_COMPLEX)
|
|
2113
|
+
ordered = 0; // it is always ordered.
|
|
2114
|
+
ib = (nf_odd ^ ordered ? 1 : 0);
|
|
2115
|
+
|
|
2116
|
+
if (direction == PFFFT_FORWARD) {
|
|
2117
|
+
if (setup->transform == PFFFT_REAL) {
|
|
2118
|
+
ib = (rfftf1_ps(Ncvec * 2, input, buff[ib], buff[!ib], setup->twiddle,
|
|
2119
|
+
&setup->ifac[0]) == buff[0]
|
|
2120
|
+
? 0
|
|
2121
|
+
: 1);
|
|
2122
|
+
} else {
|
|
2123
|
+
ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib], setup->twiddle,
|
|
2124
|
+
&setup->ifac[0], -1) == buff[0]
|
|
2125
|
+
? 0
|
|
2126
|
+
: 1);
|
|
2127
|
+
}
|
|
2128
|
+
if (ordered) {
|
|
2129
|
+
pffft_zreorder(setup, buff[ib], buff[!ib], PFFFT_FORWARD);
|
|
2130
|
+
ib = !ib;
|
|
2131
|
+
}
|
|
2132
|
+
} else {
|
|
2133
|
+
if (input == buff[ib]) {
|
|
2134
|
+
ib = !ib; // may happen when finput == foutput
|
|
2135
|
+
}
|
|
2136
|
+
if (ordered) {
|
|
2137
|
+
pffft_zreorder(setup, input, buff[!ib], PFFFT_BACKWARD);
|
|
2138
|
+
input = buff[!ib];
|
|
2139
|
+
}
|
|
2140
|
+
if (setup->transform == PFFFT_REAL) {
|
|
2141
|
+
ib = (rfftb1_ps(Ncvec * 2, input, buff[ib], buff[!ib], setup->twiddle,
|
|
2142
|
+
&setup->ifac[0]) == buff[0]
|
|
2143
|
+
? 0
|
|
2144
|
+
: 1);
|
|
2145
|
+
} else {
|
|
2146
|
+
ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib], setup->twiddle,
|
|
2147
|
+
&setup->ifac[0], +1) == buff[0]
|
|
2148
|
+
? 0
|
|
2149
|
+
: 1);
|
|
2150
|
+
}
|
|
2151
|
+
}
|
|
2152
|
+
if (buff[ib] != output) {
|
|
2153
|
+
int k;
|
|
2154
|
+
// extra copy required -- this situation should happens only when finput ==
|
|
2155
|
+
// foutput
|
|
2156
|
+
assert(input == output);
|
|
2157
|
+
for (k = 0; k < Ncvec; ++k) {
|
|
2158
|
+
float a = buff[ib][2 * k], b = buff[ib][2 * k + 1];
|
|
2159
|
+
output[2 * k] = a;
|
|
2160
|
+
output[2 * k + 1] = b;
|
|
2161
|
+
}
|
|
2162
|
+
ib = !ib;
|
|
2163
|
+
}
|
|
2164
|
+
assert(buff[ib] == output);
|
|
2165
|
+
}
|
|
2166
|
+
|
|
2167
|
+
#define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate
|
|
2168
|
+
void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup *s, const float *a,
|
|
2169
|
+
const float *b, float *ab,
|
|
2170
|
+
float scaling) {
|
|
2171
|
+
int i, Ncvec = s->Ncvec;
|
|
2172
|
+
|
|
2173
|
+
if (s->transform == PFFFT_REAL) {
|
|
2174
|
+
// take care of the fftpack ordering
|
|
2175
|
+
ab[0] += a[0] * b[0] * scaling;
|
|
2176
|
+
ab[2 * Ncvec - 1] += a[2 * Ncvec - 1] * b[2 * Ncvec - 1] * scaling;
|
|
2177
|
+
++ab;
|
|
2178
|
+
++a;
|
|
2179
|
+
++b;
|
|
2180
|
+
--Ncvec;
|
|
2181
|
+
}
|
|
2182
|
+
for (i = 0; i < Ncvec; ++i) {
|
|
2183
|
+
float ar, ai, br, bi;
|
|
2184
|
+
ar = a[2 * i + 0];
|
|
2185
|
+
ai = a[2 * i + 1];
|
|
2186
|
+
br = b[2 * i + 0];
|
|
2187
|
+
bi = b[2 * i + 1];
|
|
2188
|
+
VCPLXMUL(ar, ai, br, bi);
|
|
2189
|
+
ab[2 * i + 0] += ar * scaling;
|
|
2190
|
+
ab[2 * i + 1] += ai * scaling;
|
|
2191
|
+
}
|
|
2192
|
+
}
|
|
2193
|
+
|
|
2194
|
+
#endif // defined(PFFFT_SIMD_DISABLE)
|
|
2195
|
+
|
|
2196
|
+
void pffft_transform(PFFFT_Setup *setup, const float *input, float *output,
|
|
2197
|
+
float *work, pffft_direction_t direction) {
|
|
2198
|
+
pffft_transform_internal(setup, input, output, (v4sf *)work, direction, 0);
|
|
2199
|
+
}
|
|
2200
|
+
|
|
2201
|
+
void pffft_transform_ordered(PFFFT_Setup *setup, const float *input,
|
|
2202
|
+
float *output, float *work,
|
|
2203
|
+
pffft_direction_t direction) {
|
|
2204
|
+
pffft_transform_internal(setup, input, output, (v4sf *)work, direction, 1);
|
|
2205
|
+
}
|