react-native-executorch 0.4.7 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1013) hide show
  1. package/android/CMakeLists.txt +17 -0
  2. package/android/build.gradle +76 -13
  3. package/android/libs/classes.jar +0 -0
  4. package/android/src/main/cpp/CMakeLists.txt +73 -0
  5. package/android/src/main/cpp/ETInstallerModule.cpp +76 -0
  6. package/android/src/main/cpp/ETInstallerModule.h +43 -0
  7. package/android/src/main/java/com/swmansion/rnexecutorch/ETInstaller.kt +66 -0
  8. package/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +3 -3
  9. package/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +7 -113
  10. package/common/ada/ada.cpp +17406 -0
  11. package/common/ada/ada.h +10274 -0
  12. package/common/pfft/pfft.c +2205 -0
  13. package/common/pfft/pfft.h +185 -0
  14. package/common/rnexecutorch/Log.h +489 -0
  15. package/common/rnexecutorch/RnExecutorchInstaller.cpp +78 -0
  16. package/common/rnexecutorch/RnExecutorchInstaller.h +112 -0
  17. package/common/rnexecutorch/TokenizerModule.cpp +52 -0
  18. package/common/rnexecutorch/TokenizerModule.h +26 -0
  19. package/common/rnexecutorch/data_processing/FFT.cpp +21 -0
  20. package/common/rnexecutorch/data_processing/FFT.h +23 -0
  21. package/common/rnexecutorch/data_processing/FileUtils.h +30 -0
  22. package/common/rnexecutorch/data_processing/ImageProcessing.cpp +240 -0
  23. package/common/rnexecutorch/data_processing/ImageProcessing.h +55 -0
  24. package/common/rnexecutorch/data_processing/Numerical.cpp +82 -0
  25. package/common/rnexecutorch/data_processing/Numerical.h +23 -0
  26. package/common/rnexecutorch/data_processing/base64.cpp +110 -0
  27. package/common/rnexecutorch/data_processing/base64.h +46 -0
  28. package/common/rnexecutorch/data_processing/dsp.cpp +65 -0
  29. package/common/rnexecutorch/data_processing/dsp.h +12 -0
  30. package/common/rnexecutorch/host_objects/JSTensorViewIn.h +12 -0
  31. package/common/rnexecutorch/host_objects/JSTensorViewOut.h +22 -0
  32. package/common/rnexecutorch/host_objects/JsiConversions.h +410 -0
  33. package/common/rnexecutorch/host_objects/ModelHostObject.h +239 -0
  34. package/common/rnexecutorch/jsi/JsiHostObject.cpp +108 -0
  35. package/common/rnexecutorch/jsi/JsiHostObject.h +87 -0
  36. package/common/rnexecutorch/jsi/OwningArrayBuffer.h +40 -0
  37. package/common/rnexecutorch/jsi/Promise.cpp +20 -0
  38. package/common/rnexecutorch/jsi/Promise.h +69 -0
  39. package/common/rnexecutorch/jsi/RuntimeAwareCache.h +58 -0
  40. package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.cpp +53 -0
  41. package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.h +35 -0
  42. package/common/rnexecutorch/metaprogramming/ConstructorHelpers.h +131 -0
  43. package/common/rnexecutorch/metaprogramming/FunctionHelpers.h +50 -0
  44. package/common/rnexecutorch/metaprogramming/TypeConcepts.h +37 -0
  45. package/common/rnexecutorch/models/BaseModel.cpp +181 -0
  46. package/common/rnexecutorch/models/BaseModel.h +47 -0
  47. package/common/rnexecutorch/models/EncoderDecoderBase.cpp +21 -0
  48. package/common/rnexecutorch/models/EncoderDecoderBase.h +31 -0
  49. package/common/rnexecutorch/models/classification/Classification.cpp +72 -0
  50. package/common/rnexecutorch/models/classification/Classification.h +26 -0
  51. package/{ios/RnExecutorch/models/classification/Constants.mm → common/rnexecutorch/models/classification/Constants.h} +7 -2
  52. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +27 -0
  53. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +17 -0
  54. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +45 -0
  55. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +23 -0
  56. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +61 -0
  57. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +26 -0
  58. package/{ios/RnExecutorch/models/image_segmentation/Constants.mm → common/rnexecutorch/models/image_segmentation/Constants.h} +7 -2
  59. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +173 -0
  60. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h +43 -0
  61. package/{ios/RnExecutorch/utils/Constants.mm → common/rnexecutorch/models/object_detection/Constants.h} +9 -2
  62. package/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +82 -0
  63. package/common/rnexecutorch/models/object_detection/ObjectDetection.h +31 -0
  64. package/{ios/RnExecutorch/utils/ObjectDetectionUtils.mm → common/rnexecutorch/models/object_detection/Utils.cpp} +10 -30
  65. package/common/rnexecutorch/models/object_detection/Utils.h +17 -0
  66. package/common/rnexecutorch/models/ocr/CTCLabelConverter.cpp +88 -0
  67. package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +29 -0
  68. package/common/rnexecutorch/models/ocr/Constants.h +34 -0
  69. package/common/rnexecutorch/models/ocr/Detector.cpp +102 -0
  70. package/common/rnexecutorch/models/ocr/Detector.h +30 -0
  71. package/common/rnexecutorch/models/ocr/DetectorUtils.cpp +703 -0
  72. package/common/rnexecutorch/models/ocr/DetectorUtils.h +80 -0
  73. package/common/rnexecutorch/models/ocr/OCR.cpp +52 -0
  74. package/common/rnexecutorch/models/ocr/OCR.h +36 -0
  75. package/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +107 -0
  76. package/common/rnexecutorch/models/ocr/RecognitionHandler.h +40 -0
  77. package/common/rnexecutorch/models/ocr/RecognitionHandlerUtils.cpp +153 -0
  78. package/common/rnexecutorch/models/ocr/RecognitionHandlerUtils.h +72 -0
  79. package/common/rnexecutorch/models/ocr/Recognizer.cpp +80 -0
  80. package/common/rnexecutorch/models/ocr/Recognizer.h +36 -0
  81. package/common/rnexecutorch/models/ocr/RecognizerUtils.cpp +202 -0
  82. package/common/rnexecutorch/models/ocr/RecognizerUtils.h +70 -0
  83. package/common/rnexecutorch/models/ocr/Types.h +37 -0
  84. package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +64 -0
  85. package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +31 -0
  86. package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +27 -0
  87. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +50 -0
  88. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +25 -0
  89. package/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +55 -0
  90. package/common/rnexecutorch/models/style_transfer/StyleTransfer.h +29 -0
  91. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +92 -0
  92. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +49 -0
  93. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +180 -0
  94. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +78 -0
  95. package/common/rnexecutorch/tests/LogTest.cpp +530 -0
  96. package/common/rnexecutorch/tests/README.md +20 -0
  97. package/common/rnexecutorch/tests/run_all_tests.sh +14 -0
  98. package/common/rnexecutorch/tests/run_test.sh +18 -0
  99. package/ios/ExecutorchLib.xcframework/Info.plist +4 -4
  100. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
  101. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
  102. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
  103. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
  104. package/ios/RnExecutorch/ETInstaller.h +8 -0
  105. package/ios/RnExecutorch/ETInstaller.mm +56 -0
  106. package/ios/RnExecutorch/utils/Conversions.h +8 -9
  107. package/ios/RnExecutorch/utils/Numerical.h +2 -0
  108. package/ios/RnExecutorch.xcodeproj/project.pbxproj +73 -0
  109. package/lib/module/Error.js +8 -6
  110. package/lib/module/Error.js.map +1 -1
  111. package/lib/module/common/Logger.js +23 -0
  112. package/lib/module/common/Logger.js.map +1 -0
  113. package/lib/module/constants/llmDefaults.js +8 -0
  114. package/lib/module/constants/llmDefaults.js.map +1 -1
  115. package/lib/module/constants/modelUrls.js +328 -84
  116. package/lib/module/constants/modelUrls.js.map +1 -1
  117. package/lib/module/constants/ocr/models.js +181 -286
  118. package/lib/module/constants/ocr/models.js.map +1 -1
  119. package/lib/module/constants/ocr/symbols.js +63 -63
  120. package/lib/module/controllers/LLMController.js +17 -11
  121. package/lib/module/controllers/LLMController.js.map +1 -1
  122. package/lib/module/controllers/OCRController.js +16 -9
  123. package/lib/module/controllers/OCRController.js.map +1 -1
  124. package/lib/module/controllers/VerticalOCRController.js +16 -9
  125. package/lib/module/controllers/VerticalOCRController.js.map +1 -1
  126. package/lib/module/hooks/computer_vision/useClassification.js +5 -5
  127. package/lib/module/hooks/computer_vision/useClassification.js.map +1 -1
  128. package/lib/module/hooks/computer_vision/useImageEmbeddings.js +13 -0
  129. package/lib/module/hooks/computer_vision/useImageEmbeddings.js.map +1 -0
  130. package/lib/module/hooks/computer_vision/useImageSegmentation.js +4 -4
  131. package/lib/module/hooks/computer_vision/useImageSegmentation.js.map +1 -1
  132. package/lib/module/hooks/computer_vision/useOCR.js +14 -15
  133. package/lib/module/hooks/computer_vision/useOCR.js.map +1 -1
  134. package/lib/module/hooks/computer_vision/useObjectDetection.js +5 -5
  135. package/lib/module/hooks/computer_vision/useObjectDetection.js.map +1 -1
  136. package/lib/module/hooks/computer_vision/useStyleTransfer.js +5 -5
  137. package/lib/module/hooks/computer_vision/useStyleTransfer.js.map +1 -1
  138. package/lib/module/hooks/computer_vision/useVerticalOCR.js +16 -17
  139. package/lib/module/hooks/computer_vision/useVerticalOCR.js.map +1 -1
  140. package/lib/module/hooks/general/useExecutorchModule.js +5 -3
  141. package/lib/module/hooks/general/useExecutorchModule.js.map +1 -1
  142. package/lib/module/hooks/natural_language_processing/useLLM.js +22 -25
  143. package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
  144. package/lib/module/hooks/natural_language_processing/useSpeechToText.js +72 -33
  145. package/lib/module/hooks/natural_language_processing/useSpeechToText.js.map +1 -1
  146. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js +4 -5
  147. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js.map +1 -1
  148. package/lib/module/hooks/natural_language_processing/useTokenizer.js +20 -19
  149. package/lib/module/hooks/natural_language_processing/useTokenizer.js.map +1 -1
  150. package/lib/module/hooks/useNonStaticModule.js +52 -0
  151. package/lib/module/hooks/useNonStaticModule.js.map +1 -0
  152. package/lib/module/index.js +15 -4
  153. package/lib/module/index.js.map +1 -1
  154. package/lib/module/modules/BaseModule.js +6 -3
  155. package/lib/module/modules/BaseModule.js.map +1 -1
  156. package/lib/module/modules/BaseNonStaticModule.js +17 -0
  157. package/lib/module/modules/BaseNonStaticModule.js.map +1 -0
  158. package/lib/module/modules/computer_vision/ClassificationModule.js +13 -8
  159. package/lib/module/modules/computer_vision/ClassificationModule.js.map +1 -1
  160. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js +19 -0
  161. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js.map +1 -0
  162. package/lib/module/modules/computer_vision/ImageSegmentationModule.js +21 -19
  163. package/lib/module/modules/computer_vision/ImageSegmentationModule.js.map +1 -1
  164. package/lib/module/modules/computer_vision/OCRModule.js +13 -10
  165. package/lib/module/modules/computer_vision/OCRModule.js.map +1 -1
  166. package/lib/module/modules/computer_vision/ObjectDetectionModule.js +13 -8
  167. package/lib/module/modules/computer_vision/ObjectDetectionModule.js.map +1 -1
  168. package/lib/module/modules/computer_vision/StyleTransferModule.js +13 -8
  169. package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
  170. package/lib/module/modules/computer_vision/VerticalOCRModule.js +15 -10
  171. package/lib/module/modules/computer_vision/VerticalOCRModule.js.map +1 -1
  172. package/lib/module/modules/general/ExecutorchModule.js +10 -36
  173. package/lib/module/modules/general/ExecutorchModule.js.map +1 -1
  174. package/lib/module/modules/natural_language_processing/LLMModule.js +18 -22
  175. package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
  176. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +79 -27
  177. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
  178. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js +15 -8
  179. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js.map +1 -1
  180. package/lib/module/modules/natural_language_processing/TokenizerModule.js +20 -14
  181. package/lib/module/modules/natural_language_processing/TokenizerModule.js.map +1 -1
  182. package/lib/module/native/NativeETInstaller.js +5 -0
  183. package/lib/module/native/NativeETInstaller.js.map +1 -0
  184. package/lib/module/native/RnExecutorchModules.js +2 -11
  185. package/lib/module/native/RnExecutorchModules.js.map +1 -1
  186. package/lib/module/types/common.js +25 -8
  187. package/lib/module/types/common.js.map +1 -1
  188. package/lib/module/types/stt.js +1 -79
  189. package/lib/module/types/stt.js.map +1 -1
  190. package/lib/module/utils/ResourceFetcher.js +276 -114
  191. package/lib/module/utils/ResourceFetcher.js.map +1 -1
  192. package/lib/module/utils/ResourceFetcherUtils.js +155 -0
  193. package/lib/module/utils/ResourceFetcherUtils.js.map +1 -0
  194. package/lib/module/utils/SpeechToTextModule/ASR.js +191 -0
  195. package/lib/module/utils/SpeechToTextModule/ASR.js.map +1 -0
  196. package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js +73 -0
  197. package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js.map +1 -0
  198. package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js +56 -0
  199. package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js.map +1 -0
  200. package/lib/module/utils/llm.js +41 -1
  201. package/lib/module/utils/llm.js.map +1 -1
  202. package/lib/typescript/Error.d.ts +2 -0
  203. package/lib/typescript/Error.d.ts.map +1 -1
  204. package/lib/typescript/common/Logger.d.ts +9 -0
  205. package/lib/typescript/common/Logger.d.ts.map +1 -0
  206. package/lib/typescript/constants/llmDefaults.d.ts +1 -0
  207. package/lib/typescript/constants/llmDefaults.d.ts.map +1 -1
  208. package/lib/typescript/constants/modelUrls.d.ts +240 -79
  209. package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
  210. package/lib/typescript/constants/ocr/models.d.ts +882 -284
  211. package/lib/typescript/constants/ocr/models.d.ts.map +1 -1
  212. package/lib/typescript/controllers/LLMController.d.ts +3 -4
  213. package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
  214. package/lib/typescript/controllers/OCRController.d.ts +5 -6
  215. package/lib/typescript/controllers/OCRController.d.ts.map +1 -1
  216. package/lib/typescript/controllers/VerticalOCRController.d.ts +5 -6
  217. package/lib/typescript/controllers/VerticalOCRController.d.ts.map +1 -1
  218. package/lib/typescript/hooks/computer_vision/useClassification.d.ts +8 -6
  219. package/lib/typescript/hooks/computer_vision/useClassification.d.ts.map +1 -1
  220. package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts +16 -0
  221. package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts.map +1 -0
  222. package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts +5 -3
  223. package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts.map +1 -1
  224. package/lib/typescript/hooks/computer_vision/useOCR.d.ts +4 -4
  225. package/lib/typescript/hooks/computer_vision/useOCR.d.ts.map +1 -1
  226. package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts +5 -3
  227. package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts.map +1 -1
  228. package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts +5 -3
  229. package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts.map +1 -1
  230. package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts +3 -5
  231. package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts.map +1 -1
  232. package/lib/typescript/hooks/general/useExecutorchModule.d.ts +1 -1
  233. package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts +6 -4
  234. package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
  235. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +15 -22
  236. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts.map +1 -1
  237. package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts +9 -5
  238. package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts.map +1 -1
  239. package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts +6 -4
  240. package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts.map +1 -1
  241. package/lib/typescript/hooks/useNonStaticModule.d.ts +21 -0
  242. package/lib/typescript/hooks/useNonStaticModule.d.ts.map +1 -0
  243. package/lib/typescript/index.d.ts +17 -4
  244. package/lib/typescript/index.d.ts.map +1 -1
  245. package/lib/typescript/modules/BaseModule.d.ts +1 -1
  246. package/lib/typescript/modules/BaseModule.d.ts.map +1 -1
  247. package/lib/typescript/modules/BaseNonStaticModule.d.ts +10 -0
  248. package/lib/typescript/modules/BaseNonStaticModule.d.ts.map +1 -0
  249. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts +6 -6
  250. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts.map +1 -1
  251. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts +9 -0
  252. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts.map +1 -0
  253. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts +8 -28
  254. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts.map +1 -1
  255. package/lib/typescript/modules/computer_vision/OCRModule.d.ts +8 -7
  256. package/lib/typescript/modules/computer_vision/OCRModule.d.ts.map +1 -1
  257. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts +7 -5
  258. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts.map +1 -1
  259. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +6 -5
  260. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
  261. package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts +7 -8
  262. package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts.map +1 -1
  263. package/lib/typescript/modules/general/ExecutorchModule.d.ts +5 -8
  264. package/lib/typescript/modules/general/ExecutorchModule.d.ts.map +1 -1
  265. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +16 -16
  266. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
  267. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +20 -13
  268. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
  269. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts +7 -5
  270. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts.map +1 -1
  271. package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts +10 -9
  272. package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts.map +1 -1
  273. package/lib/typescript/native/{NativeStyleTransfer.d.ts → NativeETInstaller.d.ts} +2 -3
  274. package/lib/typescript/native/NativeETInstaller.d.ts.map +1 -0
  275. package/lib/typescript/native/RnExecutorchModules.d.ts +3 -21
  276. package/lib/typescript/native/RnExecutorchModules.d.ts.map +1 -1
  277. package/lib/typescript/types/common.d.ts +30 -2
  278. package/lib/typescript/types/common.d.ts.map +1 -1
  279. package/lib/typescript/types/stt.d.ts +18 -88
  280. package/lib/typescript/types/stt.d.ts.map +1 -1
  281. package/lib/typescript/utils/ResourceFetcher.d.ts +18 -10
  282. package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
  283. package/lib/typescript/utils/ResourceFetcherUtils.d.ts +55 -0
  284. package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -0
  285. package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts +27 -0
  286. package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts.map +1 -0
  287. package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts +23 -0
  288. package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts.map +1 -0
  289. package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts +13 -0
  290. package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts.map +1 -0
  291. package/lib/typescript/utils/llm.d.ts +4 -0
  292. package/lib/typescript/utils/llm.d.ts.map +1 -1
  293. package/package.json +23 -65
  294. package/react-native-executorch.podspec +75 -3
  295. package/src/Error.ts +8 -10
  296. package/src/common/Logger.ts +25 -0
  297. package/src/constants/llmDefaults.ts +11 -0
  298. package/src/constants/modelUrls.ts +401 -168
  299. package/src/constants/ocr/models.ts +826 -395
  300. package/src/constants/ocr/symbols.ts +63 -63
  301. package/src/controllers/LLMController.ts +28 -18
  302. package/src/controllers/OCRController.ts +24 -15
  303. package/src/controllers/VerticalOCRController.ts +24 -14
  304. package/src/hooks/computer_vision/useClassification.ts +10 -11
  305. package/src/hooks/computer_vision/useImageEmbeddings.ts +15 -0
  306. package/src/hooks/computer_vision/useImageSegmentation.ts +5 -8
  307. package/src/hooks/computer_vision/useOCR.ts +29 -21
  308. package/src/hooks/computer_vision/useObjectDetection.ts +6 -9
  309. package/src/hooks/computer_vision/useStyleTransfer.ts +6 -6
  310. package/src/hooks/computer_vision/useVerticalOCR.ts +30 -27
  311. package/src/hooks/general/useExecutorchModule.ts +3 -3
  312. package/src/hooks/natural_language_processing/useLLM.ts +38 -28
  313. package/src/hooks/natural_language_processing/useSpeechToText.ts +91 -88
  314. package/src/hooks/natural_language_processing/useTextEmbeddings.ts +11 -11
  315. package/src/hooks/natural_language_processing/useTokenizer.ts +22 -22
  316. package/src/hooks/useNonStaticModule.ts +74 -0
  317. package/src/index.ts +100 -0
  318. package/src/modules/BaseModule.ts +9 -3
  319. package/src/modules/BaseNonStaticModule.ts +26 -0
  320. package/src/modules/computer_vision/ClassificationModule.ts +20 -11
  321. package/src/modules/computer_vision/ImageEmbeddingsModule.ts +26 -0
  322. package/src/modules/computer_vision/ImageSegmentationModule.ts +35 -27
  323. package/src/modules/computer_vision/OCRModule.ts +23 -15
  324. package/src/modules/computer_vision/ObjectDetectionModule.ts +24 -11
  325. package/src/modules/computer_vision/StyleTransferModule.ts +20 -11
  326. package/src/modules/computer_vision/VerticalOCRModule.ts +25 -21
  327. package/src/modules/general/ExecutorchModule.ts +18 -48
  328. package/src/modules/natural_language_processing/LLMModule.ts +27 -30
  329. package/src/modules/natural_language_processing/SpeechToTextModule.ts +85 -68
  330. package/src/modules/natural_language_processing/TextEmbeddingsModule.ts +27 -12
  331. package/src/modules/natural_language_processing/TokenizerModule.ts +27 -17
  332. package/src/native/NativeETInstaller.ts +8 -0
  333. package/src/native/RnExecutorchModules.ts +4 -46
  334. package/src/types/common.ts +40 -12
  335. package/src/types/stt.ts +98 -89
  336. package/src/utils/ResourceFetcher.ts +338 -119
  337. package/src/utils/ResourceFetcherUtils.ts +186 -0
  338. package/src/utils/SpeechToTextModule/ASR.ts +303 -0
  339. package/src/utils/SpeechToTextModule/OnlineProcessor.ts +87 -0
  340. package/src/utils/SpeechToTextModule/hypothesisBuffer.ts +79 -0
  341. package/src/utils/llm.ts +65 -1
  342. package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
  343. package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
  344. package/third-party/android/libs/opencv/arm64-v8a/libopencv_core.a +0 -0
  345. package/third-party/android/libs/opencv/arm64-v8a/libopencv_features2d.a +0 -0
  346. package/third-party/android/libs/opencv/arm64-v8a/libopencv_highgui.a +0 -0
  347. package/third-party/android/libs/opencv/arm64-v8a/libopencv_imgproc.a +0 -0
  348. package/third-party/android/libs/opencv/arm64-v8a/libopencv_photo.a +0 -0
  349. package/third-party/android/libs/opencv/arm64-v8a/libopencv_video.a +0 -0
  350. package/third-party/android/libs/opencv/x86_64/libopencv_core.a +0 -0
  351. package/third-party/android/libs/opencv/x86_64/libopencv_features2d.a +0 -0
  352. package/third-party/android/libs/opencv/x86_64/libopencv_highgui.a +0 -0
  353. package/third-party/android/libs/opencv/x86_64/libopencv_imgproc.a +0 -0
  354. package/third-party/android/libs/opencv/x86_64/libopencv_photo.a +0 -0
  355. package/third-party/android/libs/opencv/x86_64/libopencv_video.a +0 -0
  356. package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv.a +0 -0
  357. package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_hal.a +0 -0
  358. package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_thread.a +0 -0
  359. package/third-party/include/c10/macros/Export.h +163 -0
  360. package/third-party/include/c10/macros/Macros.h +497 -0
  361. package/third-party/include/c10/util/BFloat16-inl.h +342 -0
  362. package/third-party/include/c10/util/BFloat16-math.h +266 -0
  363. package/third-party/include/c10/util/BFloat16.h +125 -0
  364. package/third-party/include/c10/util/Half-inl.h +347 -0
  365. package/third-party/include/c10/util/Half.h +416 -0
  366. package/third-party/include/c10/util/TypeSafeSignMath.h +133 -0
  367. package/third-party/include/c10/util/bit_cast.h +43 -0
  368. package/third-party/include/c10/util/floating_point_utils.h +33 -0
  369. package/third-party/include/c10/util/irange.h +107 -0
  370. package/third-party/include/executorch/ExecuTorch.h +13 -0
  371. package/third-party/include/executorch/ExecuTorchError.h +16 -0
  372. package/third-party/include/executorch/ExecuTorchLog.h +76 -0
  373. package/third-party/include/executorch/ExecuTorchModule.h +286 -0
  374. package/third-party/include/executorch/ExecuTorchTensor.h +742 -0
  375. package/third-party/include/executorch/ExecuTorchValue.h +219 -0
  376. package/third-party/include/executorch/extension/module/module.h +492 -0
  377. package/third-party/include/executorch/extension/tensor/tensor.h +13 -0
  378. package/third-party/include/executorch/extension/tensor/tensor_accessor.h +190 -0
  379. package/third-party/include/executorch/extension/tensor/tensor_ptr.h +347 -0
  380. package/third-party/include/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
  381. package/third-party/include/executorch/runtime/backend/backend_execution_context.h +71 -0
  382. package/third-party/include/executorch/runtime/backend/backend_init_context.h +72 -0
  383. package/third-party/include/executorch/runtime/backend/interface.h +166 -0
  384. package/third-party/include/executorch/runtime/core/array_ref.h +235 -0
  385. package/third-party/include/executorch/runtime/core/data_loader.h +136 -0
  386. package/third-party/include/executorch/runtime/core/defines.h +20 -0
  387. package/third-party/include/executorch/runtime/core/error.h +229 -0
  388. package/third-party/include/executorch/runtime/core/evalue.h +521 -0
  389. package/third-party/include/executorch/runtime/core/event_tracer.h +565 -0
  390. package/third-party/include/executorch/runtime/core/event_tracer_hooks.h +323 -0
  391. package/third-party/include/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
  392. package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
  393. package/third-party/include/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
  394. package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
  395. package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
  396. package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
  397. package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
  398. package/third-party/include/executorch/runtime/core/freeable_buffer.h +107 -0
  399. package/third-party/include/executorch/runtime/core/hierarchical_allocator.h +107 -0
  400. package/third-party/include/executorch/runtime/core/memory_allocator.h +198 -0
  401. package/third-party/include/executorch/runtime/core/named_data_map.h +86 -0
  402. package/third-party/include/executorch/runtime/core/portable_type/bfloat16.h +27 -0
  403. package/third-party/include/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
  404. package/third-party/include/executorch/runtime/core/portable_type/bits_types.h +83 -0
  405. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
  406. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
  407. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
  408. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
  409. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
  410. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
  411. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
  412. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
  413. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
  414. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
  415. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
  416. package/third-party/include/executorch/runtime/core/portable_type/complex.h +44 -0
  417. package/third-party/include/executorch/runtime/core/portable_type/device.h +70 -0
  418. package/third-party/include/executorch/runtime/core/portable_type/half.h +27 -0
  419. package/third-party/include/executorch/runtime/core/portable_type/optional.h +36 -0
  420. package/third-party/include/executorch/runtime/core/portable_type/qint_types.h +83 -0
  421. package/third-party/include/executorch/runtime/core/portable_type/scalar.h +110 -0
  422. package/third-party/include/executorch/runtime/core/portable_type/scalar_type.h +154 -0
  423. package/third-party/include/executorch/runtime/core/portable_type/string_view.h +29 -0
  424. package/third-party/include/executorch/runtime/core/portable_type/tensor.h +142 -0
  425. package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
  426. package/third-party/include/executorch/runtime/core/portable_type/tensor_options.h +60 -0
  427. package/third-party/include/executorch/runtime/core/result.h +258 -0
  428. package/third-party/include/executorch/runtime/core/span.h +93 -0
  429. package/third-party/include/executorch/runtime/core/tag.h +71 -0
  430. package/third-party/include/executorch/runtime/core/tensor_layout.h +79 -0
  431. package/third-party/include/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
  432. package/third-party/include/executorch/runtime/executor/memory_manager.h +113 -0
  433. package/third-party/include/executorch/runtime/executor/method.h +387 -0
  434. package/third-party/include/executorch/runtime/executor/method_meta.h +251 -0
  435. package/third-party/include/executorch/runtime/executor/program.h +320 -0
  436. package/third-party/include/executorch/runtime/executor/pte_data_map.h +144 -0
  437. package/third-party/include/executorch/runtime/executor/tensor_parser.h +156 -0
  438. package/third-party/include/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
  439. package/third-party/include/executorch/runtime/kernel/operator_registry.h +278 -0
  440. package/third-party/include/executorch/runtime/platform/abort.h +36 -0
  441. package/third-party/include/executorch/runtime/platform/assert.h +119 -0
  442. package/third-party/include/executorch/runtime/platform/clock.h +43 -0
  443. package/third-party/include/executorch/runtime/platform/compat_unistd.h +75 -0
  444. package/third-party/include/executorch/runtime/platform/compiler.h +191 -0
  445. package/third-party/include/executorch/runtime/platform/log.h +177 -0
  446. package/third-party/include/executorch/runtime/platform/platform.h +133 -0
  447. package/third-party/include/executorch/runtime/platform/profiler.h +292 -0
  448. package/third-party/include/executorch/runtime/platform/runtime.h +35 -0
  449. package/third-party/include/executorch/runtime/platform/system.h +49 -0
  450. package/third-party/include/executorch/runtime/platform/types.h +24 -0
  451. package/third-party/include/executorch/schema/extended_header.h +76 -0
  452. package/third-party/include/opencv2/core/affine.hpp +676 -0
  453. package/third-party/include/opencv2/core/async.hpp +107 -0
  454. package/third-party/include/opencv2/core/base.hpp +735 -0
  455. package/third-party/include/opencv2/core/bindings_utils.hpp +279 -0
  456. package/third-party/include/opencv2/core/bufferpool.hpp +39 -0
  457. package/third-party/include/opencv2/core/check.hpp +231 -0
  458. package/third-party/include/opencv2/core/core.hpp +55 -0
  459. package/third-party/include/opencv2/core/core_c.h +3261 -0
  460. package/third-party/include/opencv2/core/cv_cpu_dispatch.h +404 -0
  461. package/third-party/include/opencv2/core/cv_cpu_helper.h +856 -0
  462. package/third-party/include/opencv2/core/cvdef.h +1003 -0
  463. package/third-party/include/opencv2/core/cvstd.hpp +196 -0
  464. package/third-party/include/opencv2/core/cvstd.inl.hpp +188 -0
  465. package/third-party/include/opencv2/core/cvstd_wrapper.hpp +187 -0
  466. package/third-party/include/opencv2/core/detail/async_promise.hpp +73 -0
  467. package/third-party/include/opencv2/core/detail/dispatch_helper.impl.hpp +48 -0
  468. package/third-party/include/opencv2/core/detail/exception_ptr.hpp +24 -0
  469. package/third-party/include/opencv2/core/dualquaternion.hpp +1054 -0
  470. package/third-party/include/opencv2/core/dualquaternion.inl.hpp +464 -0
  471. package/third-party/include/opencv2/core/eigen.hpp +405 -0
  472. package/third-party/include/opencv2/core/fast_math.hpp +433 -0
  473. package/third-party/include/opencv2/core/hal/hal.hpp +451 -0
  474. package/third-party/include/opencv2/core/hal/interface.h +191 -0
  475. package/third-party/include/opencv2/core/hal/intrin.hpp +1222 -0
  476. package/third-party/include/opencv2/core/hal/intrin_avx.hpp +3378 -0
  477. package/third-party/include/opencv2/core/hal/intrin_avx512.hpp +3688 -0
  478. package/third-party/include/opencv2/core/hal/intrin_cpp.hpp +3446 -0
  479. package/third-party/include/opencv2/core/hal/intrin_forward.hpp +195 -0
  480. package/third-party/include/opencv2/core/hal/intrin_lasx.hpp +3243 -0
  481. package/third-party/include/opencv2/core/hal/intrin_lsx.hpp +2671 -0
  482. package/third-party/include/opencv2/core/hal/intrin_math.hpp +772 -0
  483. package/third-party/include/opencv2/core/hal/intrin_msa.hpp +1973 -0
  484. package/third-party/include/opencv2/core/hal/intrin_neon.hpp +2710 -0
  485. package/third-party/include/opencv2/core/hal/intrin_rvv071.hpp +3452 -0
  486. package/third-party/include/opencv2/core/hal/intrin_rvv_scalable.hpp +2559 -0
  487. package/third-party/include/opencv2/core/hal/intrin_sse.hpp +3528 -0
  488. package/third-party/include/opencv2/core/hal/intrin_sse_em.hpp +175 -0
  489. package/third-party/include/opencv2/core/hal/intrin_vsx.hpp +1756 -0
  490. package/third-party/include/opencv2/core/hal/intrin_wasm.hpp +2911 -0
  491. package/third-party/include/opencv2/core/hal/msa_macros.h +2079 -0
  492. package/third-party/include/opencv2/core/hal/simd_utils.impl.hpp +313 -0
  493. package/third-party/include/opencv2/core/mat.hpp +3842 -0
  494. package/third-party/include/opencv2/core/mat.inl.hpp +2753 -0
  495. package/third-party/include/opencv2/core/matx.hpp +603 -0
  496. package/third-party/include/opencv2/core/matx.inl.hpp +1132 -0
  497. package/third-party/include/opencv2/core/neon_utils.hpp +127 -0
  498. package/third-party/include/opencv2/core/operations.hpp +610 -0
  499. package/third-party/include/opencv2/core/optim.hpp +362 -0
  500. package/third-party/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp +66 -0
  501. package/third-party/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp +148 -0
  502. package/third-party/include/opencv2/core/parallel/parallel_backend.hpp +108 -0
  503. package/third-party/include/opencv2/core/persistence.hpp +1321 -0
  504. package/third-party/include/opencv2/core/quaternion.hpp +1889 -0
  505. package/third-party/include/opencv2/core/quaternion.inl.hpp +907 -0
  506. package/third-party/include/opencv2/core/saturate.hpp +347 -0
  507. package/third-party/include/opencv2/core/simd_intrinsics.hpp +90 -0
  508. package/third-party/include/opencv2/core/softfloat.hpp +657 -0
  509. package/third-party/include/opencv2/core/sse_utils.hpp +861 -0
  510. package/third-party/include/opencv2/core/traits.hpp +417 -0
  511. package/third-party/include/opencv2/core/types.hpp +2368 -0
  512. package/third-party/include/opencv2/core/types_c.h +2064 -0
  513. package/third-party/include/opencv2/core/utility.hpp +1296 -0
  514. package/third-party/include/opencv2/core/utils/allocator_stats.hpp +31 -0
  515. package/third-party/include/opencv2/core/utils/allocator_stats.impl.hpp +111 -0
  516. package/third-party/include/opencv2/core/utils/filesystem.hpp +91 -0
  517. package/third-party/include/opencv2/core/utils/fp_control_utils.hpp +70 -0
  518. package/third-party/include/opencv2/core/utils/instrumentation.hpp +127 -0
  519. package/third-party/include/opencv2/core/utils/logger.defines.hpp +50 -0
  520. package/third-party/include/opencv2/core/utils/logger.hpp +258 -0
  521. package/third-party/include/opencv2/core/utils/logtag.hpp +27 -0
  522. package/third-party/include/opencv2/core/utils/tls.hpp +230 -0
  523. package/third-party/include/opencv2/core/utils/trace.hpp +281 -0
  524. package/third-party/include/opencv2/core/version.hpp +29 -0
  525. package/third-party/include/opencv2/core/vsx_utils.hpp +1115 -0
  526. package/third-party/include/opencv2/core.hpp +3699 -0
  527. package/third-party/include/opencv2/cvconfig.h +155 -0
  528. package/third-party/include/opencv2/dnn/dnn.hpp +51 -0
  529. package/third-party/include/opencv2/dnn.hpp +17 -0
  530. package/third-party/include/opencv2/features2d/features2d.hpp +55 -0
  531. package/third-party/include/opencv2/features2d/hal/interface.h +32 -0
  532. package/third-party/include/opencv2/features2d.hpp +1756 -0
  533. package/third-party/include/opencv2/highgui/highgui.hpp +113 -0
  534. package/third-party/include/opencv2/highgui.hpp +17 -0
  535. package/third-party/include/opencv2/imgproc/bindings.hpp +34 -0
  536. package/third-party/include/opencv2/imgproc/detail/gcgraph.hpp +355 -0
  537. package/third-party/include/opencv2/imgproc/detail/legacy.hpp +35 -0
  538. package/third-party/include/opencv2/imgproc/hal/hal.hpp +246 -0
  539. package/third-party/include/opencv2/imgproc/hal/interface.h +52 -0
  540. package/third-party/include/opencv2/imgproc/imgproc.hpp +55 -0
  541. package/third-party/include/opencv2/imgproc/imgproc_c.h +1261 -0
  542. package/third-party/include/opencv2/imgproc/segmentation.hpp +168 -0
  543. package/third-party/include/opencv2/imgproc/types_c.h +632 -0
  544. package/third-party/include/opencv2/imgproc.hpp +5956 -0
  545. package/third-party/include/opencv2/opencv.hpp +102 -0
  546. package/third-party/include/opencv2/opencv_modules.hpp +19 -0
  547. package/third-party/include/opencv2/photo/legacy/constants_c.h +10 -0
  548. package/third-party/include/opencv2/photo/photo.hpp +55 -0
  549. package/third-party/include/opencv2/photo.hpp +975 -0
  550. package/third-party/include/opencv2/video/background_segm.hpp +341 -0
  551. package/third-party/include/opencv2/video/detail/tracking.detail.hpp +435 -0
  552. package/third-party/include/opencv2/video/legacy/constants_c.h +15 -0
  553. package/third-party/include/opencv2/video/tracking.hpp +1014 -0
  554. package/third-party/include/opencv2/video/video.hpp +55 -0
  555. package/third-party/include/opencv2/video.hpp +65 -0
  556. package/third-party/include/tokenizers-cpp/tokenizers_c.h +61 -0
  557. package/third-party/include/tokenizers-cpp/tokenizers_cpp.h +118 -0
  558. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.h +27 -0
  559. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.mm +249 -0
  560. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h +14 -0
  561. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm +80 -0
  562. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h +32 -0
  563. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm +95 -0
  564. package/third-party/ios/ExecutorchLib/ExecutorchLib/InputType.h +12 -0
  565. package/third-party/ios/ExecutorchLib/ExecutorchLib/Utils.hpp +217 -0
  566. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.cpp +11 -0
  567. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.h +11 -0
  568. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/irunner.h +48 -0
  569. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.cpp +278 -0
  570. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.h +67 -0
  571. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/stats.h +164 -0
  572. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.cpp +65 -0
  573. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.h +105 -0
  574. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.cpp +91 -0
  575. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.h +51 -0
  576. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_token_generator.h +162 -0
  577. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/util.h +108 -0
  578. package/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp +193 -0
  579. package/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h +64 -0
  580. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h +202 -0
  581. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp +313 -0
  582. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h +57 -0
  583. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp +78 -0
  584. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h +23 -0
  585. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp +427 -0
  586. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h +87 -0
  587. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h +76 -0
  588. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj +683 -0
  589. package/third-party/ios/ExecutorchLib/build.sh +44 -0
  590. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/Info.plist +43 -0
  591. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64/libbackend_coreml_ios.a +0 -0
  592. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64-simulator/libbackend_coreml_simulator.a +0 -0
  593. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/Info.plist +43 -0
  594. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64/libbackend_mps_ios.a +0 -0
  595. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64-simulator/libbackend_mps_simulator.a +0 -0
  596. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/Info.plist +43 -0
  597. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64/libbackend_xnnpack_ios.a +0 -0
  598. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64-simulator/libbackend_xnnpack_simulator.a +0 -0
  599. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/Info.plist +47 -0
  600. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Export.h +163 -0
  601. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Macros.h +497 -0
  602. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-inl.h +342 -0
  603. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-math.h +266 -0
  604. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16.h +125 -0
  605. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half-inl.h +347 -0
  606. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half.h +416 -0
  607. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/TypeSafeSignMath.h +133 -0
  608. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/bit_cast.h +43 -0
  609. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/floating_point_utils.h +33 -0
  610. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/irange.h +107 -0
  611. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorch.h +13 -0
  612. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchError.h +16 -0
  613. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchLog.h +76 -0
  614. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchModule.h +286 -0
  615. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchTensor.h +742 -0
  616. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchValue.h +219 -0
  617. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/module/module.h +492 -0
  618. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor.h +13 -0
  619. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_accessor.h +190 -0
  620. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr.h +347 -0
  621. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
  622. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_execution_context.h +71 -0
  623. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_init_context.h +72 -0
  624. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/interface.h +166 -0
  625. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/array_ref.h +235 -0
  626. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/data_loader.h +136 -0
  627. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/defines.h +20 -0
  628. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/error.h +229 -0
  629. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/evalue.h +521 -0
  630. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer.h +565 -0
  631. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks.h +323 -0
  632. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
  633. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
  634. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
  635. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
  636. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
  637. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
  638. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
  639. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/freeable_buffer.h +107 -0
  640. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/hierarchical_allocator.h +107 -0
  641. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/memory_allocator.h +198 -0
  642. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/named_data_map.h +86 -0
  643. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16.h +27 -0
  644. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
  645. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bits_types.h +83 -0
  646. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
  647. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
  648. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
  649. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
  650. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
  651. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
  652. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
  653. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
  654. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
  655. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
  656. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
  657. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/complex.h +44 -0
  658. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/device.h +70 -0
  659. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/half.h +27 -0
  660. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/optional.h +36 -0
  661. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/qint_types.h +83 -0
  662. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar.h +110 -0
  663. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar_type.h +154 -0
  664. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/string_view.h +29 -0
  665. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor.h +142 -0
  666. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
  667. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_options.h +60 -0
  668. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/result.h +258 -0
  669. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/span.h +93 -0
  670. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tag.h +71 -0
  671. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_layout.h +79 -0
  672. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
  673. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/memory_manager.h +113 -0
  674. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method.h +387 -0
  675. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method_meta.h +251 -0
  676. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/program.h +320 -0
  677. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/pte_data_map.h +144 -0
  678. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/tensor_parser.h +156 -0
  679. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
  680. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/operator_registry.h +278 -0
  681. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/abort.h +36 -0
  682. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/assert.h +119 -0
  683. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/clock.h +43 -0
  684. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compat_unistd.h +75 -0
  685. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compiler.h +191 -0
  686. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/log.h +177 -0
  687. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/platform.h +133 -0
  688. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/profiler.h +292 -0
  689. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/runtime.h +35 -0
  690. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/system.h +49 -0
  691. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/types.h +24 -0
  692. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/schema/extended_header.h +76 -0
  693. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/module.modulemap +5 -0
  694. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/libexecutorch_ios.a +0 -0
  695. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Export.h +163 -0
  696. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Macros.h +497 -0
  697. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-inl.h +342 -0
  698. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-math.h +266 -0
  699. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16.h +125 -0
  700. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half-inl.h +347 -0
  701. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half.h +416 -0
  702. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/TypeSafeSignMath.h +133 -0
  703. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/bit_cast.h +43 -0
  704. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/floating_point_utils.h +33 -0
  705. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/irange.h +107 -0
  706. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorch.h +13 -0
  707. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchError.h +16 -0
  708. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchLog.h +76 -0
  709. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchModule.h +286 -0
  710. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchTensor.h +742 -0
  711. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchValue.h +219 -0
  712. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/module/module.h +492 -0
  713. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor.h +13 -0
  714. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_accessor.h +190 -0
  715. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr.h +347 -0
  716. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
  717. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_execution_context.h +71 -0
  718. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_init_context.h +72 -0
  719. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/interface.h +166 -0
  720. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/array_ref.h +235 -0
  721. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/data_loader.h +136 -0
  722. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/defines.h +20 -0
  723. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/error.h +229 -0
  724. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/evalue.h +521 -0
  725. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer.h +565 -0
  726. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks.h +323 -0
  727. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
  728. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
  729. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
  730. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
  731. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
  732. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
  733. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
  734. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/freeable_buffer.h +107 -0
  735. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/hierarchical_allocator.h +107 -0
  736. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/memory_allocator.h +198 -0
  737. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/named_data_map.h +86 -0
  738. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16.h +27 -0
  739. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
  740. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bits_types.h +83 -0
  741. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
  742. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
  743. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
  744. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
  745. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
  746. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
  747. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
  748. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
  749. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
  750. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
  751. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
  752. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/complex.h +44 -0
  753. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/device.h +70 -0
  754. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/half.h +27 -0
  755. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/optional.h +36 -0
  756. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/qint_types.h +83 -0
  757. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar.h +110 -0
  758. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar_type.h +154 -0
  759. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/string_view.h +29 -0
  760. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor.h +142 -0
  761. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
  762. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_options.h +60 -0
  763. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/result.h +258 -0
  764. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/span.h +93 -0
  765. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tag.h +71 -0
  766. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_layout.h +79 -0
  767. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
  768. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/memory_manager.h +113 -0
  769. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method.h +387 -0
  770. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method_meta.h +251 -0
  771. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/program.h +320 -0
  772. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/pte_data_map.h +144 -0
  773. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/tensor_parser.h +156 -0
  774. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
  775. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/operator_registry.h +278 -0
  776. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/abort.h +36 -0
  777. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/assert.h +119 -0
  778. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/clock.h +43 -0
  779. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compat_unistd.h +75 -0
  780. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compiler.h +191 -0
  781. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/log.h +177 -0
  782. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/platform.h +133 -0
  783. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/profiler.h +292 -0
  784. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/runtime.h +35 -0
  785. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/system.h +49 -0
  786. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/types.h +24 -0
  787. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/schema/extended_header.h +76 -0
  788. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/module.modulemap +5 -0
  789. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/libexecutorch_simulator.a +0 -0
  790. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/Info.plist +43 -0
  791. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64/libkernels_custom_ios.a +0 -0
  792. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64-simulator/libkernels_custom_simulator.a +0 -0
  793. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/Info.plist +43 -0
  794. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64/libkernels_optimized_ios.a +0 -0
  795. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64-simulator/libkernels_optimized_simulator.a +0 -0
  796. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/Info.plist +43 -0
  797. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64/libkernels_portable_ios.a +0 -0
  798. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64-simulator/libkernels_portable_simulator.a +0 -0
  799. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/Info.plist +43 -0
  800. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64/libkernels_quantized_ios.a +0 -0
  801. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64-simulator/libkernels_quantized_simulator.a +0 -0
  802. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/Info.plist +43 -0
  803. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/bitmap256.h +82 -0
  804. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/filtered_re2.h +111 -0
  805. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/pod_array.h +43 -0
  806. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter.h +130 -0
  807. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter_tree.h +139 -0
  808. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prog.h +483 -0
  809. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/re2.h +994 -0
  810. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/regexp.h +692 -0
  811. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/set.h +85 -0
  812. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_array.h +367 -0
  813. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_set.h +241 -0
  814. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/stringpiece.h +205 -0
  815. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_casefold.h +78 -0
  816. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_groups.h +64 -0
  817. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/walker-inl.h +235 -0
  818. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Info.plist +26 -0
  819. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/re2 +0 -0
  820. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/bitmap256.h +82 -0
  821. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/filtered_re2.h +111 -0
  822. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/pod_array.h +43 -0
  823. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter.h +130 -0
  824. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter_tree.h +139 -0
  825. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prog.h +483 -0
  826. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/re2.h +994 -0
  827. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/regexp.h +692 -0
  828. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/set.h +85 -0
  829. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_array.h +367 -0
  830. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_set.h +241 -0
  831. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/stringpiece.h +205 -0
  832. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_casefold.h +78 -0
  833. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_groups.h +64 -0
  834. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/walker-inl.h +235 -0
  835. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Info.plist +26 -0
  836. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/re2 +0 -0
  837. package/third-party/ios/ios.toolchain.cmake +1122 -0
  838. package/LICENSE +0 -79
  839. package/README.md +0 -148
  840. package/android/src/main/java/com/swmansion/rnexecutorch/Classification.kt +0 -64
  841. package/android/src/main/java/com/swmansion/rnexecutorch/ETModule.kt +0 -90
  842. package/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt +0 -58
  843. package/android/src/main/java/com/swmansion/rnexecutorch/OCR.kt +0 -90
  844. package/android/src/main/java/com/swmansion/rnexecutorch/ObjectDetection.kt +0 -64
  845. package/android/src/main/java/com/swmansion/rnexecutorch/SpeechToText.kt +0 -91
  846. package/android/src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt +0 -54
  847. package/android/src/main/java/com/swmansion/rnexecutorch/TextEmbeddings.kt +0 -51
  848. package/android/src/main/java/com/swmansion/rnexecutorch/Tokenizer.kt +0 -86
  849. package/android/src/main/java/com/swmansion/rnexecutorch/VerticalOCR.kt +0 -179
  850. package/android/src/main/java/com/swmansion/rnexecutorch/models/BaseModel.kt +0 -54
  851. package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsModel.kt +0 -48
  852. package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsUtils.kt +0 -37
  853. package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt +0 -46
  854. package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/Constants.kt +0 -1005
  855. package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt +0 -26
  856. package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt +0 -142
  857. package/android/src/main/java/com/swmansion/rnexecutorch/models/objectDetection/SSDLiteLargeModel.kt +0 -74
  858. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Detector.kt +0 -82
  859. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/RecognitionHandler.kt +0 -117
  860. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Recognizer.kt +0 -51
  861. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/VerticalDetector.kt +0 -89
  862. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/CTCLabelConverter.kt +0 -58
  863. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/Constants.kt +0 -31
  864. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/DetectorUtils.kt +0 -608
  865. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/RecognizerUtils.kt +0 -430
  866. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TDecoder.kt +0 -39
  867. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TModule.kt +0 -43
  868. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Moonshine.kt +0 -16
  869. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineDecoder.kt +0 -23
  870. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineEncoder.kt +0 -20
  871. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Whisper.kt +0 -16
  872. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperDecoder.kt +0 -22
  873. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperEncoder.kt +0 -29
  874. package/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt +0 -43
  875. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ArrayUtils.kt +0 -87
  876. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ETError.kt +0 -34
  877. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ImageProcessor.kt +0 -237
  878. package/android/src/main/java/com/swmansion/rnexecutorch/utils/Numerical.kt +0 -8
  879. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ObjectDetectionUtils.kt +0 -201
  880. package/android/src/main/java/com/swmansion/rnexecutorch/utils/STFT.kt +0 -50
  881. package/android/src/main/java/com/swmansion/rnexecutorch/utils/TensorUtils.kt +0 -103
  882. package/ios/RnExecutorch/Classification.h +0 -5
  883. package/ios/RnExecutorch/Classification.mm +0 -54
  884. package/ios/RnExecutorch/ETModule.h +0 -5
  885. package/ios/RnExecutorch/ETModule.mm +0 -75
  886. package/ios/RnExecutorch/ImageSegmentation.h +0 -5
  887. package/ios/RnExecutorch/ImageSegmentation.mm +0 -60
  888. package/ios/RnExecutorch/OCR.h +0 -5
  889. package/ios/RnExecutorch/OCR.mm +0 -96
  890. package/ios/RnExecutorch/ObjectDetection.h +0 -5
  891. package/ios/RnExecutorch/ObjectDetection.mm +0 -56
  892. package/ios/RnExecutorch/SpeechToText.h +0 -5
  893. package/ios/RnExecutorch/SpeechToText.mm +0 -125
  894. package/ios/RnExecutorch/StyleTransfer.h +0 -5
  895. package/ios/RnExecutorch/StyleTransfer.mm +0 -55
  896. package/ios/RnExecutorch/TextEmbeddings.h +0 -5
  897. package/ios/RnExecutorch/TextEmbeddings.mm +0 -62
  898. package/ios/RnExecutorch/Tokenizer.h +0 -5
  899. package/ios/RnExecutorch/Tokenizer.mm +0 -83
  900. package/ios/RnExecutorch/VerticalOCR.h +0 -5
  901. package/ios/RnExecutorch/VerticalOCR.mm +0 -183
  902. package/ios/RnExecutorch/models/BaseModel.h +0 -21
  903. package/ios/RnExecutorch/models/BaseModel.mm +0 -43
  904. package/ios/RnExecutorch/models/classification/ClassificationModel.h +0 -10
  905. package/ios/RnExecutorch/models/classification/ClassificationModel.mm +0 -53
  906. package/ios/RnExecutorch/models/classification/Constants.h +0 -3
  907. package/ios/RnExecutorch/models/image_segmentation/Constants.h +0 -4
  908. package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +0 -10
  909. package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +0 -146
  910. package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.hpp +0 -11
  911. package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.mm +0 -64
  912. package/ios/RnExecutorch/models/ocr/Detector.h +0 -9
  913. package/ios/RnExecutorch/models/ocr/Detector.mm +0 -101
  914. package/ios/RnExecutorch/models/ocr/RecognitionHandler.h +0 -16
  915. package/ios/RnExecutorch/models/ocr/RecognitionHandler.mm +0 -135
  916. package/ios/RnExecutorch/models/ocr/Recognizer.h +0 -8
  917. package/ios/RnExecutorch/models/ocr/Recognizer.mm +0 -77
  918. package/ios/RnExecutorch/models/ocr/VerticalDetector.h +0 -10
  919. package/ios/RnExecutorch/models/ocr/VerticalDetector.mm +0 -118
  920. package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.h +0 -16
  921. package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.mm +0 -80
  922. package/ios/RnExecutorch/models/ocr/utils/Constants.h +0 -26
  923. package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.h +0 -31
  924. package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.mm +0 -754
  925. package/ios/RnExecutorch/models/ocr/utils/OCRUtils.h +0 -10
  926. package/ios/RnExecutorch/models/ocr/utils/OCRUtils.mm +0 -67
  927. package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.h +0 -35
  928. package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.mm +0 -331
  929. package/ios/RnExecutorch/models/stt/Moonshine.hpp +0 -13
  930. package/ios/RnExecutorch/models/stt/Moonshine.mm +0 -64
  931. package/ios/RnExecutorch/models/stt/MoonshineDecoder.hpp +0 -16
  932. package/ios/RnExecutorch/models/stt/MoonshineDecoder.mm +0 -24
  933. package/ios/RnExecutorch/models/stt/MoonshineEncoder.hpp +0 -15
  934. package/ios/RnExecutorch/models/stt/MoonshineEncoder.mm +0 -18
  935. package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.hpp +0 -26
  936. package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.mm +0 -19
  937. package/ios/RnExecutorch/models/stt/Whisper.hpp +0 -12
  938. package/ios/RnExecutorch/models/stt/Whisper.mm +0 -68
  939. package/ios/RnExecutorch/models/stt/WhisperDecoder.hpp +0 -16
  940. package/ios/RnExecutorch/models/stt/WhisperDecoder.mm +0 -22
  941. package/ios/RnExecutorch/models/stt/WhisperEncoder.hpp +0 -15
  942. package/ios/RnExecutorch/models/stt/WhisperEncoder.mm +0 -21
  943. package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h +0 -11
  944. package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm +0 -50
  945. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.h +0 -15
  946. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.mm +0 -45
  947. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.h +0 -8
  948. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.mm +0 -49
  949. package/ios/RnExecutorch/utils/Constants.h +0 -8
  950. package/ios/RnExecutorch/utils/ObjectDetectionUtils.hpp +0 -23
  951. package/ios/RnExecutorch/utils/SFFT.hpp +0 -13
  952. package/ios/RnExecutorch/utils/SFFT.mm +0 -71
  953. package/lib/module/constants/sttDefaults.js +0 -72
  954. package/lib/module/constants/sttDefaults.js.map +0 -1
  955. package/lib/module/controllers/SpeechToTextController.js +0 -307
  956. package/lib/module/controllers/SpeechToTextController.js.map +0 -1
  957. package/lib/module/native/NativeClassification.js +0 -5
  958. package/lib/module/native/NativeClassification.js.map +0 -1
  959. package/lib/module/native/NativeETModule.js +0 -5
  960. package/lib/module/native/NativeETModule.js.map +0 -1
  961. package/lib/module/native/NativeImageSegmentation.js +0 -5
  962. package/lib/module/native/NativeImageSegmentation.js.map +0 -1
  963. package/lib/module/native/NativeOCR.js +0 -5
  964. package/lib/module/native/NativeOCR.js.map +0 -1
  965. package/lib/module/native/NativeObjectDetection.js +0 -5
  966. package/lib/module/native/NativeObjectDetection.js.map +0 -1
  967. package/lib/module/native/NativeSpeechToText.js +0 -5
  968. package/lib/module/native/NativeSpeechToText.js.map +0 -1
  969. package/lib/module/native/NativeStyleTransfer.js +0 -5
  970. package/lib/module/native/NativeStyleTransfer.js.map +0 -1
  971. package/lib/module/native/NativeTextEmbeddings.js +0 -5
  972. package/lib/module/native/NativeTextEmbeddings.js.map +0 -1
  973. package/lib/module/native/NativeTokenizer.js +0 -5
  974. package/lib/module/native/NativeTokenizer.js.map +0 -1
  975. package/lib/module/native/NativeVerticalOCR.js +0 -5
  976. package/lib/module/native/NativeVerticalOCR.js.map +0 -1
  977. package/lib/module/package.json +0 -1
  978. package/lib/typescript/constants/sttDefaults.d.ts +0 -28
  979. package/lib/typescript/constants/sttDefaults.d.ts.map +0 -1
  980. package/lib/typescript/controllers/SpeechToTextController.d.ts +0 -52
  981. package/lib/typescript/controllers/SpeechToTextController.d.ts.map +0 -1
  982. package/lib/typescript/native/NativeClassification.d.ts +0 -10
  983. package/lib/typescript/native/NativeClassification.d.ts.map +0 -1
  984. package/lib/typescript/native/NativeETModule.d.ts +0 -9
  985. package/lib/typescript/native/NativeETModule.d.ts.map +0 -1
  986. package/lib/typescript/native/NativeImageSegmentation.d.ts +0 -10
  987. package/lib/typescript/native/NativeImageSegmentation.d.ts.map +0 -1
  988. package/lib/typescript/native/NativeOCR.d.ts +0 -9
  989. package/lib/typescript/native/NativeOCR.d.ts.map +0 -1
  990. package/lib/typescript/native/NativeObjectDetection.d.ts +0 -9
  991. package/lib/typescript/native/NativeObjectDetection.d.ts.map +0 -1
  992. package/lib/typescript/native/NativeSpeechToText.d.ts +0 -12
  993. package/lib/typescript/native/NativeSpeechToText.d.ts.map +0 -1
  994. package/lib/typescript/native/NativeStyleTransfer.d.ts.map +0 -1
  995. package/lib/typescript/native/NativeTextEmbeddings.d.ts +0 -8
  996. package/lib/typescript/native/NativeTextEmbeddings.d.ts.map +0 -1
  997. package/lib/typescript/native/NativeTokenizer.d.ts +0 -12
  998. package/lib/typescript/native/NativeTokenizer.d.ts.map +0 -1
  999. package/lib/typescript/native/NativeVerticalOCR.d.ts +0 -9
  1000. package/lib/typescript/native/NativeVerticalOCR.d.ts.map +0 -1
  1001. package/src/constants/sttDefaults.ts +0 -86
  1002. package/src/controllers/SpeechToTextController.ts +0 -458
  1003. package/src/index.tsx +0 -47
  1004. package/src/native/NativeClassification.ts +0 -9
  1005. package/src/native/NativeETModule.ts +0 -14
  1006. package/src/native/NativeImageSegmentation.ts +0 -14
  1007. package/src/native/NativeOCR.ts +0 -16
  1008. package/src/native/NativeObjectDetection.ts +0 -10
  1009. package/src/native/NativeSpeechToText.ts +0 -17
  1010. package/src/native/NativeStyleTransfer.ts +0 -10
  1011. package/src/native/NativeTextEmbeddings.ts +0 -9
  1012. package/src/native/NativeTokenizer.ts +0 -13
  1013. package/src/native/NativeVerticalOCR.ts +0 -16
@@ -0,0 +1,3446 @@
1
+ /*M///////////////////////////////////////////////////////////////////////////////////////
2
+ //
3
+ // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
+ //
5
+ // By downloading, copying, installing or using the software you agree to this
6
+ license.
7
+ // If you do not agree to this license, do not download, install,
8
+ // copy or use the software.
9
+ //
10
+ //
11
+ // License Agreement
12
+ // For Open Source Computer Vision Library
13
+ //
14
+ // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
15
+ // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
16
+ // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
17
+ // Copyright (C) 2015, Itseez Inc., all rights reserved.
18
+ // Third party copyrights are property of their respective owners.
19
+ //
20
+ // Redistribution and use in source and binary forms, with or without
21
+ modification,
22
+ // are permitted provided that the following conditions are met:
23
+ //
24
+ // * Redistribution's of source code must retain the above copyright notice,
25
+ // this list of conditions and the following disclaimer.
26
+ //
27
+ // * Redistribution's in binary form must reproduce the above copyright
28
+ notice,
29
+ // this list of conditions and the following disclaimer in the documentation
30
+ // and/or other materials provided with the distribution.
31
+ //
32
+ // * The name of the copyright holders may not be used to endorse or promote
33
+ products
34
+ // derived from this software without specific prior written permission.
35
+ //
36
+ // This software is provided by the copyright holders and contributors "as is"
37
+ and
38
+ // any express or implied warranties, including, but not limited to, the implied
39
+ // warranties of merchantability and fitness for a particular purpose are
40
+ disclaimed.
41
+ // In no event shall the Intel Corporation or contributors be liable for any
42
+ direct,
43
+ // indirect, incidental, special, exemplary, or consequential damages
44
+ // (including, but not limited to, procurement of substitute goods or services;
45
+ // loss of use, data, or profits; or business interruption) however caused
46
+ // and on any theory of liability, whether in contract, strict liability,
47
+ // or tort (including negligence or otherwise) arising in any way out of
48
+ // the use of this software, even if advised of the possibility of such damage.
49
+ //
50
+ //M*/
51
+
52
+ #ifndef OPENCV_HAL_INTRIN_CPP_HPP
53
+ #define OPENCV_HAL_INTRIN_CPP_HPP
54
+
55
+ #include "opencv2/core/saturate.hpp"
56
+ #include "opencv2/core/utility.hpp"
57
+ #include <algorithm>
58
+ #include <cstring>
59
+ #include <limits>
60
+
61
+ //! @cond IGNORED
62
+ #define CV_SIMD128_CPP 1
63
+ #if defined(CV_FORCE_SIMD128_CPP)
64
+ #define CV_SIMD128 1
65
+ #define CV_SIMD128_64F 1
66
+ #endif
67
+ #if defined(CV_DOXYGEN)
68
+ #define CV_SIMD128 1
69
+ #define CV_SIMD128_64F 1
70
+ #define CV_SIMD256 1
71
+ #define CV_SIMD256_64F 1
72
+ #define CV_SIMD512 1
73
+ #define CV_SIMD512_64F 1
74
+ #else
75
+ #define CV_SIMD256 \
76
+ 0 // Explicitly disable SIMD256 and SIMD512 support for scalar intrinsic
77
+ // implementation
78
+ #define CV_SIMD512 0 // to avoid warnings during compilation
79
+ #endif
80
+ //! @endcond
81
+
82
+ namespace cv {
83
+
84
+ #ifndef CV_DOXYGEN
85
+ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
86
+ #endif
87
+
88
+ /** @addtogroup core_hal_intrin
89
+
90
+ "Universal intrinsics" is a types and functions set intended to simplify
91
+ vectorization of code on different platforms. Currently a few different SIMD
92
+ extensions on different architectures are supported. 128 bit registers of
93
+ various types support is implemented for a wide range of architectures including
94
+ x86(__SSE/SSE2/SSE4.2__), ARM(__NEON__), PowerPC(__VSX__), MIPS(__MSA__). 256
95
+ bit long registers are supported on x86(__AVX2__) and 512 bit long registers are
96
+ supported on x86(__AVX512__). In case when there is no SIMD extension available
97
+ during compilation, fallback C++ implementation of intrinsics will be chosen and
98
+ code will work as expected although it could be slower.
99
+
100
+ ### Types
101
+
102
+ There are several types representing packed values vector registers, each type
103
+ is implemented as a structure based on a one SIMD register.
104
+
105
+ - cv::v_uint8 and cv::v_int8: 8-bit integer values (unsigned/signed) - char
106
+ - cv::v_uint16 and cv::v_int16: 16-bit integer values (unsigned/signed) - short
107
+ - cv::v_uint32 and cv::v_int32: 32-bit integer values (unsigned/signed) - int
108
+ - cv::v_uint64 and cv::v_int64: 64-bit integer values (unsigned/signed) - int64
109
+ - cv::v_float32: 32-bit floating point values (signed) - float
110
+ - cv::v_float64: 64-bit floating point values (signed) - double
111
+
112
+ Exact bit length(and value quantity) of listed types is compile time deduced and
113
+ depends on architecture SIMD capabilities chosen as available during compilation
114
+ of the library. All the types contains __nlanes__ enumeration to check for exact
115
+ value quantity of the type.
116
+
117
+ In case the exact bit length of the type is important it is possible to use
118
+ specific fixed length register types.
119
+
120
+ There are several types representing 128-bit registers.
121
+
122
+ - cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values
123
+ (unsigned/signed) - char
124
+ - cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values
125
+ (unsigned/signed) - short
126
+ - cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsigned/signed)
127
+ - int
128
+ - cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed)
129
+ - int64
130
+ - cv::v_float32x4: four 32-bit floating point values (signed) - float
131
+ - cv::v_float64x2: two 64-bit floating point values (signed) - double
132
+
133
+ There are several types representing 256-bit registers.
134
+
135
+ - cv::v_uint8x32 and cv::v_int8x32: thirty two 8-bit integer values
136
+ (unsigned/signed) - char
137
+ - cv::v_uint16x16 and cv::v_int16x16: sixteen 16-bit integer values
138
+ (unsigned/signed) - short
139
+ - cv::v_uint32x8 and cv::v_int32x8: eight 32-bit integer values
140
+ (unsigned/signed) - int
141
+ - cv::v_uint64x4 and cv::v_int64x4: four 64-bit integer values (unsigned/signed)
142
+ - int64
143
+ - cv::v_float32x8: eight 32-bit floating point values (signed) - float
144
+ - cv::v_float64x4: four 64-bit floating point values (signed) - double
145
+
146
+ @note
147
+ 256 bit registers at the moment implemented for AVX2 SIMD extension only, if you
148
+ want to use this type directly, don't forget to check the CV_SIMD256
149
+ preprocessor definition:
150
+ @code
151
+ #if CV_SIMD256
152
+ //...
153
+ #endif
154
+ @endcode
155
+
156
+ There are several types representing 512-bit registers.
157
+
158
+ - cv::v_uint8x64 and cv::v_int8x64: sixty four 8-bit integer values
159
+ (unsigned/signed) - char
160
+ - cv::v_uint16x32 and cv::v_int16x32: thirty two 16-bit integer values
161
+ (unsigned/signed) - short
162
+ - cv::v_uint32x16 and cv::v_int32x16: sixteen 32-bit integer values
163
+ (unsigned/signed) - int
164
+ - cv::v_uint64x8 and cv::v_int64x8: eight 64-bit integer values
165
+ (unsigned/signed) - int64
166
+ - cv::v_float32x16: sixteen 32-bit floating point values (signed) - float
167
+ - cv::v_float64x8: eight 64-bit floating point values (signed) - double
168
+ @note
169
+ 512 bit registers at the moment implemented for AVX512 SIMD extension only, if
170
+ you want to use this type directly, don't forget to check the CV_SIMD512
171
+ preprocessor definition.
172
+
173
+ @note
174
+ cv::v_float64x2 is not implemented in NEON variant, if you want to use this
175
+ type, don't forget to check the CV_SIMD128_64F preprocessor definition.
176
+
177
+ ### Load and store operations
178
+
179
+ These operations allow to set contents of the register explicitly or by loading
180
+ it from some memory block and to save contents of the register to memory block.
181
+
182
+ There are variable size register load operations that provide result of maximum
183
+ available size depending on chosen platform capabilities.
184
+ - Constructors:
185
+ @ref v_reg::v_reg(const _Tp *ptr) "from memory",
186
+ - Other create methods:
187
+ vx_setall_s8, vx_setall_u8, ...,
188
+ vx_setzero_u8, vx_setzero_s8, ...
189
+ - Memory load operations:
190
+ vx_load, vx_load_aligned, vx_load_low, vx_load_halves,
191
+ - Memory operations with expansion of values:
192
+ vx_load_expand, vx_load_expand_q
193
+
194
+ Also there are fixed size register load/store operations.
195
+
196
+ For 128 bit registers
197
+ - Constructors:
198
+ @ref v_reg::v_reg(const _Tp *ptr) "from memory",
199
+ @ref v_reg::v_reg(_Tp s0, _Tp s1) "from two values", ...
200
+ - Other create methods:
201
+ @ref v_setall_s8, @ref v_setall_u8, ...,
202
+ @ref v_setzero_u8, @ref v_setzero_s8, ...
203
+ - Memory load operations:
204
+ @ref v_load, @ref v_load_aligned, @ref v_load_low, @ref v_load_halves,
205
+ - Memory operations with expansion of values:
206
+ @ref v_load_expand, @ref v_load_expand_q
207
+
208
+ For 256 bit registers(check CV_SIMD256 preprocessor definition)
209
+ - Constructors:
210
+ @ref v_reg::v_reg(const _Tp *ptr) "from memory",
211
+ @ref v_reg::v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) "from four values", ...
212
+ - Other create methods:
213
+ @ref v256_setall_s8, @ref v256_setall_u8, ...,
214
+ @ref v256_setzero_u8, @ref v256_setzero_s8, ...
215
+ - Memory load operations:
216
+ @ref v256_load, @ref v256_load_aligned, @ref v256_load_low, @ref
217
+ v256_load_halves,
218
+ - Memory operations with expansion of values:
219
+ @ref v256_load_expand, @ref v256_load_expand_q
220
+
221
+ For 512 bit registers(check CV_SIMD512 preprocessor definition)
222
+ - Constructors:
223
+ @ref v_reg::v_reg(const _Tp *ptr) "from memory",
224
+ @ref v_reg::v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp
225
+ s7) "from eight values", ...
226
+ - Other create methods:
227
+ @ref v512_setall_s8, @ref v512_setall_u8, ...,
228
+ @ref v512_setzero_u8, @ref v512_setzero_s8, ...
229
+ - Memory load operations:
230
+ @ref v512_load, @ref v512_load_aligned, @ref v512_load_low, @ref
231
+ v512_load_halves,
232
+ - Memory operations with expansion of values:
233
+ @ref v512_load_expand, @ref v512_load_expand_q
234
+
235
+ Store to memory operations are similar across different platform capabilities:
236
+ @ref v_store, @ref v_store_aligned,
237
+ @ref v_store_high, @ref v_store_low
238
+
239
+ ### Value reordering
240
+
241
+ These operations allow to reorder or recombine elements in one or multiple
242
+ vectors.
243
+
244
+ - Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref
245
+ v_store_interleave
246
+ - Expand: @ref v_expand, @ref v_expand_low, @ref v_expand_high
247
+ - Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref
248
+ v_rshr_pack_u,
249
+ @ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref
250
+ v_rshr_pack_u_store
251
+ - Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref
252
+ v_combine_high
253
+ - Reverse: @ref v_reverse
254
+ - Extract: @ref v_extract
255
+
256
+
257
+ ### Arithmetic, bitwise and comparison operations
258
+
259
+ Element-wise binary and unary operations.
260
+
261
+ - Arithmetics:
262
+ @ref v_add(const v_reg &a, const v_reg &b) "+",
263
+ @ref v_sub(const v_reg &a, const v_reg &b) "-",
264
+ @ref v_mul(const v_reg &a, const v_reg &b) "*",
265
+ @ref v_div(const v_reg &a, const v_reg &b) "/",
266
+ @ref v_mul_expand
267
+
268
+ - Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap
269
+
270
+ - Bitwise shifts:
271
+ @ref v_shl(const v_reg &a, int s) "<<",
272
+ @ref v_shr(const v_reg &a, int s) ">>",
273
+ @ref v_shl, @ref v_shr
274
+
275
+ - Bitwise logic:
276
+ @ref v_and(const v_reg &a, const v_reg &b) "&",
277
+ @ref v_or(const v_reg &a, const v_reg &b) "|",
278
+ @ref v_xor(const v_reg &a, const v_reg &b) "^",
279
+ @ref v_not(const v_reg &a) "~"
280
+
281
+ - Comparison:
282
+ @ref v_gt(const v_reg &a, const v_reg &b) ">",
283
+ @ref v_ge(const v_reg &a, const v_reg &b) ">=",
284
+ @ref v_lt(const v_reg &a, const v_reg &b) "<",
285
+ @ref v_le(const v_reg &a, const v_reg &b) "<=",
286
+ @ref v_eq(const v_reg &a, const v_reg &b) "==",
287
+ @ref v_ne(const v_reg &a, const v_reg &b) "!="
288
+
289
+ - min/max: @ref v_min, @ref v_max
290
+
291
+ ### Reduce and mask
292
+
293
+ Most of these operations return only one value.
294
+
295
+ - Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum, @ref
296
+ v_popcount
297
+ - Mask: @ref v_signmask, @ref v_check_all, @ref v_check_any, @ref v_select
298
+
299
+ ### Other math
300
+
301
+ - Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref
302
+ v_sqr_magnitude, @ref v_exp, @ref v_log,
303
+ @ref v_erf, @ref v_sin, @ref v_cos
304
+ - Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs
305
+
306
+ ### Conversions
307
+
308
+ Different type conversions and casts:
309
+
310
+ - Rounding: @ref v_round, @ref v_floor, @ref v_ceil, @ref v_trunc,
311
+ - To float: @ref v_cvt_f32, @ref v_cvt_f64
312
+ - Reinterpret: @ref v_reinterpret_as_u8, @ref v_reinterpret_as_s8, ...
313
+
314
+ ### Matrix operations
315
+
316
+ In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref
317
+ v_dotprod_fast,
318
+ @ref v_dotprod_expand, @ref v_dotprod_expand_fast, @ref v_matmul, @ref
319
+ v_transpose4x4
320
+
321
+ ### Usability
322
+
323
+ Most operations are implemented only for some subset of the available types,
324
+ following matrices shows the applicability of different operations to the types.
325
+
326
+ Regular integers:
327
+
328
+ | Operations\\Types | uint 8 | int 8 | uint 16 | int 16 | uint 32 | int 32 |
329
+ |-------------------|:-:|:-:|:-:|:-:|:-:|:-:|
330
+ |load, store | x | x | x | x | x | x |
331
+ |interleave | x | x | x | x | x | x |
332
+ |expand | x | x | x | x | x | x |
333
+ |expand_low | x | x | x | x | x | x |
334
+ |expand_high | x | x | x | x | x | x |
335
+ |expand_q | x | x | | | | |
336
+ |add, sub | x | x | x | x | x | x |
337
+ |add_wrap, sub_wrap | x | x | x | x | | |
338
+ |mul_wrap | x | x | x | x | | |
339
+ |mul | x | x | x | x | x | x |
340
+ |mul_expand | x | x | x | x | x | |
341
+ |compare | x | x | x | x | x | x |
342
+ |shift | | | x | x | x | x |
343
+ |dotprod | | | | x | | x |
344
+ |dotprod_fast | | | | x | | x |
345
+ |dotprod_expand | x | x | x | x | | x |
346
+ |dotprod_expand_fast| x | x | x | x | | x |
347
+ |logical | x | x | x | x | x | x |
348
+ |min, max | x | x | x | x | x | x |
349
+ |absdiff | x | x | x | x | x | x |
350
+ |absdiffs | | x | | x | | |
351
+ |reduce | x | x | x | x | x | x |
352
+ |mask | x | x | x | x | x | x |
353
+ |pack | x | x | x | x | x | x |
354
+ |pack_u | x | | x | | | |
355
+ |pack_b | x | | | | | |
356
+ |unpack | x | x | x | x | x | x |
357
+ |extract | x | x | x | x | x | x |
358
+ |rotate (lanes) | x | x | x | x | x | x |
359
+ |cvt_flt32 | | | | | | x |
360
+ |cvt_flt64 | | | | | | x |
361
+ |transpose4x4 | | | | | x | x |
362
+ |reverse | x | x | x | x | x | x |
363
+ |extract_n | x | x | x | x | x | x |
364
+ |broadcast_element | | | | | x | x |
365
+
366
+ Big integers:
367
+
368
+ | Operations\\Types | uint 64 | int 64 |
369
+ |-------------------|:-:|:-:|
370
+ |load, store | x | x |
371
+ |add, sub | x | x |
372
+ |shift | x | x |
373
+ |logical | x | x |
374
+ |reverse | x | x |
375
+ |extract | x | x |
376
+ |rotate (lanes) | x | x |
377
+ |cvt_flt64 | | x |
378
+ |extract_n | x | x |
379
+
380
+ Floating point:
381
+
382
+ | Operations\\Types | float 32 | float 64 |
383
+ |-------------------|:-:|:-:|
384
+ |load, store | x | x |
385
+ |interleave | x | |
386
+ |add, sub | x | x |
387
+ |mul | x | x |
388
+ |div | x | x |
389
+ |compare | x | x |
390
+ |min, max | x | x |
391
+ |absdiff | x | x |
392
+ |reduce | x | |
393
+ |mask | x | x |
394
+ |unpack | x | x |
395
+ |cvt_flt32 | | x |
396
+ |cvt_flt64 | x | |
397
+ |sqrt, abs | x | x |
398
+ |float math | x | x |
399
+ |transpose4x4 | x | |
400
+ |extract | x | x |
401
+ |rotate (lanes) | x | x |
402
+ |reverse | x | x |
403
+ |extract_n | x | x |
404
+ |broadcast_element | x | |
405
+ |exp | x | x |
406
+ |log | x | x |
407
+ |sin, cos | x | x |
408
+
409
+ @{ */
410
+
411
+ template <typename _Tp, int n> struct v_reg {
412
+ //! @cond IGNORED
413
+ typedef _Tp lane_type;
414
+ enum { nlanes = n };
415
+ // !@endcond
416
+
417
+ /** @brief Constructor
418
+
419
+ Initializes register with data from memory
420
+ @param ptr pointer to memory block with data for register */
421
+ explicit v_reg(const _Tp *ptr) {
422
+ for (int i = 0; i < n; i++)
423
+ s[i] = ptr[i];
424
+ }
425
+
426
+ /** @brief Constructor
427
+
428
+ Initializes register with two 64-bit values */
429
+ v_reg(_Tp s0, _Tp s1) {
430
+ s[0] = s0;
431
+ s[1] = s1;
432
+ }
433
+
434
+ /** @brief Constructor
435
+
436
+ Initializes register with four 32-bit values */
437
+ v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) {
438
+ s[0] = s0;
439
+ s[1] = s1;
440
+ s[2] = s2;
441
+ s[3] = s3;
442
+ }
443
+
444
+ /** @brief Constructor
445
+
446
+ Initializes register with eight 16-bit values */
447
+ v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7) {
448
+ s[0] = s0;
449
+ s[1] = s1;
450
+ s[2] = s2;
451
+ s[3] = s3;
452
+ s[4] = s4;
453
+ s[5] = s5;
454
+ s[6] = s6;
455
+ s[7] = s7;
456
+ }
457
+
458
+ /** @brief Constructor
459
+
460
+ Initializes register with sixteen 8-bit values */
461
+ v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7, _Tp s8,
462
+ _Tp s9, _Tp s10, _Tp s11, _Tp s12, _Tp s13, _Tp s14, _Tp s15) {
463
+ s[0] = s0;
464
+ s[1] = s1;
465
+ s[2] = s2;
466
+ s[3] = s3;
467
+ s[4] = s4;
468
+ s[5] = s5;
469
+ s[6] = s6;
470
+ s[7] = s7;
471
+ s[8] = s8;
472
+ s[9] = s9;
473
+ s[10] = s10;
474
+ s[11] = s11;
475
+ s[12] = s12;
476
+ s[13] = s13;
477
+ s[14] = s14;
478
+ s[15] = s15;
479
+ }
480
+
481
+ /** @brief Default constructor
482
+
483
+ Does not initialize anything*/
484
+ v_reg() {}
485
+
486
+ /** @brief Copy constructor */
487
+ v_reg(const v_reg<_Tp, n> &r) {
488
+ for (int i = 0; i < n; i++)
489
+ s[i] = r.s[i];
490
+ }
491
+ /** @brief Access first value
492
+
493
+ Returns value of the first lane according to register type, for example:
494
+ @code{.cpp}
495
+ v_int32x4 r(1, 2, 3, 4);
496
+ int v = r.get0(); // returns 1
497
+ v_uint64x2 r(1, 2);
498
+ uint64_t v = r.get0(); // returns 1
499
+ @endcode
500
+ */
501
+ _Tp get0() const { return s[0]; }
502
+
503
+ //! @cond IGNORED
504
+ _Tp get(const int i) const { return s[i]; }
505
+ v_reg<_Tp, n> high() const {
506
+ v_reg<_Tp, n> c;
507
+ int i;
508
+ for (i = 0; i < n / 2; i++) {
509
+ c.s[i] = s[i + (n / 2)];
510
+ c.s[i + (n / 2)] = 0;
511
+ }
512
+ return c;
513
+ }
514
+
515
+ static v_reg<_Tp, n> zero() {
516
+ v_reg<_Tp, n> c;
517
+ for (int i = 0; i < n; i++)
518
+ c.s[i] = (_Tp)0;
519
+ return c;
520
+ }
521
+
522
+ static v_reg<_Tp, n> all(_Tp s) {
523
+ v_reg<_Tp, n> c;
524
+ for (int i = 0; i < n; i++)
525
+ c.s[i] = s;
526
+ return c;
527
+ }
528
+
529
+ template <typename _Tp2, int n2> v_reg<_Tp2, n2> reinterpret_as() const {
530
+ size_t bytes = std::min(sizeof(_Tp2) * n2, sizeof(_Tp) * n);
531
+ v_reg<_Tp2, n2> c;
532
+ std::memcpy(&c.s[0], &s[0], bytes);
533
+ return c;
534
+ }
535
+
536
+ v_reg &operator=(const v_reg<_Tp, n> &r) {
537
+ for (int i = 0; i < n; i++)
538
+ s[i] = r.s[i];
539
+ return *this;
540
+ }
541
+
542
+ _Tp s[n];
543
+ //! @endcond
544
+ };
545
+
546
+ /** @brief Sixteen 8-bit unsigned integer values */
547
+ typedef v_reg<uchar, 16> v_uint8x16;
548
+ /** @brief Sixteen 8-bit signed integer values */
549
+ typedef v_reg<schar, 16> v_int8x16;
550
+ /** @brief Eight 16-bit unsigned integer values */
551
+ typedef v_reg<ushort, 8> v_uint16x8;
552
+ /** @brief Eight 16-bit signed integer values */
553
+ typedef v_reg<short, 8> v_int16x8;
554
+ /** @brief Four 32-bit unsigned integer values */
555
+ typedef v_reg<unsigned, 4> v_uint32x4;
556
+ /** @brief Four 32-bit signed integer values */
557
+ typedef v_reg<int, 4> v_int32x4;
558
+ /** @brief Four 32-bit floating point values (single precision) */
559
+ typedef v_reg<float, 4> v_float32x4;
560
+ /** @brief Two 64-bit floating point values (double precision) */
561
+ typedef v_reg<double, 2> v_float64x2;
562
+ /** @brief Two 64-bit unsigned integer values */
563
+ typedef v_reg<uint64, 2> v_uint64x2;
564
+ /** @brief Two 64-bit signed integer values */
565
+ typedef v_reg<int64, 2> v_int64x2;
566
+
567
+ #if CV_SIMD256
568
+ /** @brief Thirty two 8-bit unsigned integer values */
569
+ typedef v_reg<uchar, 32> v_uint8x32;
570
+ /** @brief Thirty two 8-bit signed integer values */
571
+ typedef v_reg<schar, 32> v_int8x32;
572
+ /** @brief Sixteen 16-bit unsigned integer values */
573
+ typedef v_reg<ushort, 16> v_uint16x16;
574
+ /** @brief Sixteen 16-bit signed integer values */
575
+ typedef v_reg<short, 16> v_int16x16;
576
+ /** @brief Eight 32-bit unsigned integer values */
577
+ typedef v_reg<unsigned, 8> v_uint32x8;
578
+ /** @brief Eight 32-bit signed integer values */
579
+ typedef v_reg<int, 8> v_int32x8;
580
+ /** @brief Eight 32-bit floating point values (single precision) */
581
+ typedef v_reg<float, 8> v_float32x8;
582
+ /** @brief Four 64-bit floating point values (double precision) */
583
+ typedef v_reg<double, 4> v_float64x4;
584
+ /** @brief Four 64-bit unsigned integer values */
585
+ typedef v_reg<uint64, 4> v_uint64x4;
586
+ /** @brief Four 64-bit signed integer values */
587
+ typedef v_reg<int64, 4> v_int64x4;
588
+ #endif
589
+
590
+ #if CV_SIMD512
591
+ /** @brief Sixty four 8-bit unsigned integer values */
592
+ typedef v_reg<uchar, 64> v_uint8x64;
593
+ /** @brief Sixty four 8-bit signed integer values */
594
+ typedef v_reg<schar, 64> v_int8x64;
595
+ /** @brief Thirty two 16-bit unsigned integer values */
596
+ typedef v_reg<ushort, 32> v_uint16x32;
597
+ /** @brief Thirty two 16-bit signed integer values */
598
+ typedef v_reg<short, 32> v_int16x32;
599
+ /** @brief Sixteen 32-bit unsigned integer values */
600
+ typedef v_reg<unsigned, 16> v_uint32x16;
601
+ /** @brief Sixteen 32-bit signed integer values */
602
+ typedef v_reg<int, 16> v_int32x16;
603
+ /** @brief Sixteen 32-bit floating point values (single precision) */
604
+ typedef v_reg<float, 16> v_float32x16;
605
+ /** @brief Eight 64-bit floating point values (double precision) */
606
+ typedef v_reg<double, 8> v_float64x8;
607
+ /** @brief Eight 64-bit unsigned integer values */
608
+ typedef v_reg<uint64, 8> v_uint64x8;
609
+ /** @brief Eight 64-bit signed integer values */
610
+ typedef v_reg<int64, 8> v_int64x8;
611
+ #endif
612
+
613
+ enum {
614
+ simd128_width = 16,
615
+ #if CV_SIMD256
616
+ simd256_width = 32,
617
+ #endif
618
+ #if CV_SIMD512
619
+ simd512_width = 64,
620
+ simdmax_width = simd512_width
621
+ #elif CV_SIMD256
622
+ simdmax_width = simd256_width
623
+ #else
624
+ simdmax_width = simd128_width
625
+ #endif
626
+ };
627
+
628
+ /** @brief Add values
629
+
630
+ For all types. */
631
+ template <typename _Tp, int n>
632
+ CV_INLINE v_reg<_Tp, n> v_add(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b);
633
+
634
+ /** @brief Subtract values
635
+
636
+ For all types. */
637
+ template <typename _Tp, int n>
638
+ CV_INLINE v_reg<_Tp, n> v_sub(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b);
639
+
640
+ /** @brief Multiply values
641
+
642
+ For 16- and 32-bit integer types and floating types. */
643
+ template <typename _Tp, int n>
644
+ CV_INLINE v_reg<_Tp, n> v_mul(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b);
645
+
646
+ /** @brief Divide values
647
+
648
+ For floating types only. */
649
+ template <typename _Tp, int n>
650
+ CV_INLINE v_reg<_Tp, n> v_div(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b);
651
+
652
+ /** @brief Bitwise AND
653
+
654
+ Only for integer types. */
655
+ template <typename _Tp, int n>
656
+ CV_INLINE v_reg<_Tp, n> v_and(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b);
657
+
658
+ /** @brief Bitwise OR
659
+
660
+ Only for integer types. */
661
+ template <typename _Tp, int n>
662
+ CV_INLINE v_reg<_Tp, n> v_or(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b);
663
+
664
+ /** @brief Bitwise XOR
665
+
666
+ Only for integer types.*/
667
+ template <typename _Tp, int n>
668
+ CV_INLINE v_reg<_Tp, n> v_xor(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b);
669
+
670
+ /** @brief Bitwise NOT
671
+
672
+ Only for integer types.*/
673
+ template <typename _Tp, int n>
674
+ CV_INLINE v_reg<_Tp, n> v_not(const v_reg<_Tp, n> &a);
675
+
676
+ #ifndef CV_DOXYGEN
677
+
678
+ #define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \
679
+ __CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \
680
+ __CV_EXPAND(macro_name(schar, __VA_ARGS__)) \
681
+ __CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \
682
+ __CV_EXPAND(macro_name(short, __VA_ARGS__)) \
683
+ __CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \
684
+ __CV_EXPAND(macro_name(int, __VA_ARGS__)) \
685
+ __CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \
686
+ __CV_EXPAND(macro_name(int64, __VA_ARGS__))
687
+
688
+ #define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \
689
+ __CV_EXPAND(macro_name(float, __VA_ARGS__)) \
690
+ __CV_EXPAND(macro_name(double, __VA_ARGS__))
691
+
692
+ #define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \
693
+ CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \
694
+ CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__)
695
+
696
+ #define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op, func) \
697
+ template <int n> \
698
+ inline v_reg<_Tp, n> func(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) { \
699
+ v_reg<_Tp, n> c; \
700
+ for (int i = 0; i < n; i++) \
701
+ c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
702
+ return c; \
703
+ }
704
+
705
+ #define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op, func) \
706
+ CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op, \
707
+ func)
708
+
709
+ CV__HAL_INTRIN_IMPL_BIN_OP(+, v_add)
710
+ CV__HAL_INTRIN_IMPL_BIN_OP(-, v_sub)
711
+ CV__HAL_INTRIN_IMPL_BIN_OP(*, v_mul)
712
+ CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, /, v_div)
713
+
714
+ #define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op, func) \
715
+ template <int n> \
716
+ CV_INLINE v_reg<_Tp, n> func(const v_reg<_Tp, n> &a, \
717
+ const v_reg<_Tp, n> &b) { \
718
+ v_reg<_Tp, n> c; \
719
+ typedef typename V_TypeTraits<_Tp>::int_type itype; \
720
+ for (int i = 0; i < n; i++) \
721
+ c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int( \
722
+ (itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) \
723
+ bit_op V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
724
+ return c; \
725
+ }
726
+
727
+ #define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op, func) \
728
+ CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, \
729
+ bit_op, func) \
730
+ CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES( \
731
+ CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op, \
732
+ func) /* TODO: FIXIT remove this after masks refactoring */
733
+
734
+ CV__HAL_INTRIN_IMPL_BIT_OP(&, v_and)
735
+ CV__HAL_INTRIN_IMPL_BIT_OP(|, v_or)
736
+ CV__HAL_INTRIN_IMPL_BIT_OP(^, v_xor)
737
+
738
+ #define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy, dummy2) \
739
+ template <int n> CV_INLINE v_reg<_Tp, n> v_not(const v_reg<_Tp, n> &a) { \
740
+ v_reg<_Tp, n> c; \
741
+ for (int i = 0; i < n; i++) \
742
+ c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int( \
743
+ ~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \
744
+ return c; \
745
+ }
746
+
747
+ CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BITWISE_NOT_, ~,
748
+ v_not)
749
+
750
+ #endif // !CV_DOXYGEN
751
+
752
+ //! @brief Helper macro
753
+ //! @ingroup core_hal_intrin_impl
754
+ #define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
755
+ template <typename _Tp, int n> \
756
+ inline v_reg<_Tp2, n> func(const v_reg<_Tp, n> &a) { \
757
+ v_reg<_Tp2, n> c; \
758
+ for (int i = 0; i < n; i++) \
759
+ c.s[i] = cfunc(a.s[i]); \
760
+ return c; \
761
+ }
762
+
763
+ /** @brief Square root of elements
764
+
765
+ Only for floating point types.*/
766
+ OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp)
767
+
768
+ /**
769
+ * @brief Exponential \f$ e^x \f$ of elements
770
+ *
771
+ * Only for floating point types. Core implementation steps:
772
+ * 1. Decompose Input: Convert the input to \f$ 2^{x \cdot \log_2e} \f$ and
773
+ * split its exponential into integer and fractional parts:
774
+ * \f$ x \cdot \log_2e = n + f \f$, where \f$ n \f$ is the integer part and
775
+ * \f$ f \f$ is the fractional part.
776
+ * 2. Compute \f$ 2^n \f$: Calculated by shifting the bits.
777
+ * 3. Adjust Fractional Part: Compute \f$ f \cdot \ln2 \f$ to convert the
778
+ * fractional part to base \f$ e \f$.
779
+ * \f$ C1 \f$ and \f$ C2 \f$ are used to adjust the fractional part.
780
+ * 4. Polynomial Approximation for \f$ e^{f \cdot \ln2} \f$: The closer the
781
+ * fractional part is to 0, the more accurate the result.
782
+ * - For float16 and float32, use a Taylor Series with 6 terms.
783
+ * - For float64, use Pade Polynomials Approximation with 4 terms.
784
+ * 5. Combine Results: Multiply the two parts together to get the final result:
785
+ * \f$ e^x = 2^n \cdot e^{f \cdot \ln2} \f$.
786
+ *
787
+ * @note The precision of the calculation depends on the implementation and the
788
+ * data type of the input vector.
789
+ */
790
+ OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
791
+ #define OPENCV_HAL_MATH_HAVE_EXP 1
792
+
793
+ /**
794
+ * @brief Natural logarithm \f$ \log(x) \f$ of elements
795
+ *
796
+ * Only for floating point types. Core implementation steps:
797
+ * 1. Decompose Input: Use binary representation to decompose the input into
798
+ * mantissa part \f$ m \f$ and exponent part \f$ e \f$. Such that \f$ \log(x) =
799
+ * \log(m \cdot 2^e) = \log(m) + e \cdot \ln(2) \f$.
800
+ * 2. Adjust Mantissa and Exponent Parts: If the mantissa is less than \f$
801
+ * \sqrt{0.5} \f$, adjust the exponent and mantissa to ensure the mantissa is in
802
+ * the range \f$ (\sqrt{0.5}, \sqrt{2}) \f$ for better approximation.
803
+ * 3. Polynomial Approximation for \f$ \log(m) \f$: The closer the \f$ m \f$ is
804
+ * to 1, the more accurate the result.
805
+ * - For float16 and float32, use a Taylor Series with 9 terms.
806
+ * - For float64, use Pade Polynomials Approximation with 6 terms.
807
+ * 4. Combine Results: Add the two parts together to get the final result.
808
+ *
809
+ * @note The precision of the calculation depends on the implementation and the
810
+ * data type of the input.
811
+ *
812
+ * @note Similar to the behavior of std::log(), \f$ \ln(0) = -\infty \f$.
813
+ */
814
+ OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
815
+
816
+ /**
817
+ * @brief Error function.
818
+ *
819
+ * @note Support FP32 precision for now.
820
+ */
821
+ OPENCV_HAL_IMPL_MATH_FUNC(v_erf, std::erf, _Tp)
822
+
823
+ /**
824
+ * @brief Compute sine \f$ sin(x) \f$ and cosine \f$ cos(x) \f$ of elements at
825
+ * the same time
826
+ *
827
+ * Only for floating point types. Core implementation steps:
828
+ * 1. Input Normalization: Scale the periodicity from 2π to 4 and reduce the
829
+ * angle to the range \f$ [0, \frac{\pi}{4}] \f$ using periodicity and
830
+ * trigonometric identities.
831
+ * 2. Polynomial Approximation for \f$ sin(x) \f$ and \f$ cos(x) \f$:
832
+ * - For float16 and float32, use a Taylor series with 4 terms for sine and 5
833
+ * terms for cosine.
834
+ * - For float64, use a Taylor series with 7 terms for sine and 8 terms for
835
+ * cosine.
836
+ * 3. Select Results: select and convert the final sine and cosine values for
837
+ * the original input angle.
838
+ *
839
+ * @note The precision of the calculation depends on the implementation and the
840
+ * data type of the input vector.
841
+ */
842
+ template <typename _Tp, int n>
843
+ inline void v_sincos(const v_reg<_Tp, n> &x, v_reg<_Tp, n> &s,
844
+ v_reg<_Tp, n> &c) {
845
+ for (int i = 0; i < n; i++) {
846
+ s.s[i] = std::sin(x.s[i]);
847
+ c.s[i] = std::cos(x.s[i]);
848
+ }
849
+ }
850
+
851
+ /**
852
+ * @brief Sine \f$ sin(x) \f$ of elements
853
+ *
854
+ * Only for floating point types. Core implementation the same as @ref v_sincos.
855
+ */
856
+ OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
857
+
858
+ /**
859
+ * @brief Cosine \f$ cos(x) \f$ of elements
860
+ *
861
+ * Only for floating point types. Core implementation the same as @ref v_sincos.
862
+ */
863
+ OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp)
864
+
865
+ /** @brief Absolute value of elements
866
+
867
+ Only for floating point types.*/
868
+ OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs,
869
+ typename V_TypeTraits<_Tp>::abs_type)
870
+
871
+ //! @brief Helper macro
872
+ //! @ingroup core_hal_intrin_impl
873
+ #define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
874
+ template <typename _Tp, int n> \
875
+ inline v_reg<_Tp, n> func(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) { \
876
+ v_reg<_Tp, n> c; \
877
+ for (int i = 0; i < n; i++) \
878
+ c.s[i] = cfunc(a.s[i], b.s[i]); \
879
+ return c; \
880
+ }
881
+
882
+ //! @brief Helper macro
883
+ //! @ingroup core_hal_intrin_impl
884
+ #define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
885
+ template <typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n> &a) { \
886
+ _Tp c = a.s[0]; \
887
+ for (int i = 1; i < n; i++) \
888
+ c = cfunc(c, a.s[i]); \
889
+ return c; \
890
+ }
891
+
892
+ /** @brief Choose min values for each pair
893
+
894
+ Scheme:
895
+ @code
896
+ {A1 A2 ...}
897
+ {B1 B2 ...}
898
+ --------------
899
+ {min(A1,B1) min(A2,B2) ...}
900
+ @endcode
901
+ For all types except 64-bit integer. */
902
+ OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min)
903
+
904
+ /** @brief Choose max values for each pair
905
+
906
+ Scheme:
907
+ @code
908
+ {A1 A2 ...}
909
+ {B1 B2 ...}
910
+ --------------
911
+ {max(A1,B1) max(A2,B2) ...}
912
+ @endcode
913
+ For all types except 64-bit integer. */
914
+ OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max)
915
+
916
+ /** @brief Find one min value
917
+
918
+ Scheme:
919
+ @code
920
+ {A1 A2 A3 ...} => min(A1,A2,A3,...)
921
+ @endcode
922
+ For all types except 64-bit integer and 64-bit floating point types. */
923
+ OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
924
+
925
+ /** @brief Find one max value
926
+
927
+ Scheme:
928
+ @code
929
+ {A1 A2 A3 ...} => max(A1,A2,A3,...)
930
+ @endcode
931
+ For all types except 64-bit integer and 64-bit floating point types. */
932
+ OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max)
933
+
934
+ static const unsigned char popCountTable[] = {
935
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
936
+ 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
937
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
938
+ 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
939
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
940
+ 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
941
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
942
+ 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
943
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
944
+ 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
945
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
946
+ };
947
+ /** @brief Count the 1 bits in the vector lanes and return result as
948
+ corresponding unsigned type
949
+
950
+ Scheme:
951
+ @code
952
+ {A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...}
953
+ @endcode
954
+ For all integer types. */
955
+ template <typename _Tp, int n>
956
+ inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n>
957
+ v_popcount(const v_reg<_Tp, n> &a) {
958
+ v_reg<typename V_TypeTraits<_Tp>::abs_type, n> b =
959
+ v_reg<typename V_TypeTraits<_Tp>::abs_type, n>::zero();
960
+ for (int i = 0; i < n * (int)sizeof(_Tp); i++)
961
+ b.s[i / sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
962
+ return b;
963
+ }
964
+
965
+ //! @cond IGNORED
966
+ template <typename _Tp, int n>
967
+ inline void v_minmax(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b,
968
+ v_reg<_Tp, n> &minval, v_reg<_Tp, n> &maxval) {
969
+ for (int i = 0; i < n; i++) {
970
+ minval.s[i] = std::min(a.s[i], b.s[i]);
971
+ maxval.s[i] = std::max(a.s[i], b.s[i]);
972
+ }
973
+ }
974
+ //! @endcond
975
+
976
+ //! @brief Helper macro
977
+ //! @ingroup core_hal_intrin_impl
978
+ #define OPENCV_HAL_IMPL_CMP_OP(cmp_op, func) \
979
+ template <typename _Tp, int n> \
980
+ inline v_reg<_Tp, n> func(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) { \
981
+ typedef typename V_TypeTraits<_Tp>::int_type itype; \
982
+ v_reg<_Tp, n> c; \
983
+ for (int i = 0; i < n; i++) \
984
+ c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int( \
985
+ (itype) - (int)(a.s[i] cmp_op b.s[i])); \
986
+ return c; \
987
+ }
988
+
989
+ /** @brief Less-than comparison
990
+
991
+ For all types except 64-bit integer values. */
992
+ OPENCV_HAL_IMPL_CMP_OP(<, v_lt)
993
+
994
+ /** @brief Greater-than comparison
995
+
996
+ For all types except 64-bit integer values. */
997
+ OPENCV_HAL_IMPL_CMP_OP(>, v_gt)
998
+
999
+ /** @brief Less-than or equal comparison
1000
+
1001
+ For all types except 64-bit integer values. */
1002
+ OPENCV_HAL_IMPL_CMP_OP(<=, v_le)
1003
+
1004
+ /** @brief Greater-than or equal comparison
1005
+
1006
+ For all types except 64-bit integer values. */
1007
+ OPENCV_HAL_IMPL_CMP_OP(>=, v_ge)
1008
+
1009
+ /** @brief Equal comparison */
1010
+ OPENCV_HAL_IMPL_CMP_OP(==, v_eq)
1011
+
1012
+ /** @brief Not equal comparison */
1013
+ OPENCV_HAL_IMPL_CMP_OP(!=, v_ne)
1014
+
1015
+ template <int n> inline v_reg<float, n> v_not_nan(const v_reg<float, n> &a) {
1016
+ typedef typename V_TypeTraits<float>::int_type itype;
1017
+ v_reg<float, n> c;
1018
+ for (int i = 0; i < n; i++)
1019
+ c.s[i] = V_TypeTraits<float>::reinterpret_from_int((itype) -
1020
+ (int)(a.s[i] == a.s[i]));
1021
+ return c;
1022
+ }
1023
+ template <int n> inline v_reg<double, n> v_not_nan(const v_reg<double, n> &a) {
1024
+ typedef typename V_TypeTraits<double>::int_type itype;
1025
+ v_reg<double, n> c;
1026
+ for (int i = 0; i < n; i++)
1027
+ c.s[i] = V_TypeTraits<double>::reinterpret_from_int(
1028
+ (itype) - (int)(a.s[i] == a.s[i]));
1029
+ return c;
1030
+ }
1031
+
1032
+ //! @brief Helper macro
1033
+ //! @ingroup core_hal_intrin_impl
1034
+ #define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \
1035
+ template <typename _Tp, int n> \
1036
+ inline v_reg<_Tp2, n> func(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) { \
1037
+ typedef _Tp2 rtype; \
1038
+ v_reg<rtype, n> c; \
1039
+ for (int i = 0; i < n; i++) \
1040
+ c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
1041
+ return c; \
1042
+ }
1043
+
1044
+ /** @brief Add values without saturation
1045
+
1046
+ For 8- and 16-bit integer values. */
1047
+ OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp)
1048
+
1049
+ /** @brief Subtract values without saturation
1050
+
1051
+ For 8- and 16-bit integer values. */
1052
+ OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp)
1053
+
1054
+ /** @brief Multiply values without saturation
1055
+
1056
+ For 8- and 16-bit integer values. */
1057
+ OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp)
1058
+
1059
+ //! @cond IGNORED
1060
+ template <typename T> inline T _absdiff(T a, T b) {
1061
+ return a > b ? a - b : b - a;
1062
+ }
1063
+ //! @endcond
1064
+
1065
+ /** @brief Absolute difference
1066
+
1067
+ Returns \f$ |a - b| \f$ converted to corresponding unsigned type.
1068
+ Example:
1069
+ @code{.cpp}
1070
+ v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1}
1071
+ v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3}
1072
+ @endcode
1073
+ For 8-, 16-, 32-bit integer source types. */
1074
+ template <typename _Tp, int n>
1075
+ inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n>
1076
+ v_absdiff(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) {
1077
+ typedef typename V_TypeTraits<_Tp>::abs_type rtype;
1078
+ v_reg<rtype, n> c;
1079
+ const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed
1080
+ ? (1 << (sizeof(rtype) * 8 - 1))
1081
+ : 0);
1082
+ for (int i = 0; i < n; i++) {
1083
+ rtype ua = a.s[i] ^ mask;
1084
+ rtype ub = b.s[i] ^ mask;
1085
+ c.s[i] = _absdiff(ua, ub);
1086
+ }
1087
+ return c;
1088
+ }
1089
+
1090
+ /** @overload
1091
+
1092
+ For 32-bit floating point values */
1093
+ template <int n>
1094
+ inline v_reg<float, n> v_absdiff(const v_reg<float, n> &a,
1095
+ const v_reg<float, n> &b) {
1096
+ v_reg<float, n> c;
1097
+ for (int i = 0; i < c.nlanes; i++)
1098
+ c.s[i] = _absdiff(a.s[i], b.s[i]);
1099
+ return c;
1100
+ }
1101
+
1102
+ /** @overload
1103
+
1104
+ For 64-bit floating point values */
1105
+ template <int n>
1106
+ inline v_reg<double, n> v_absdiff(const v_reg<double, n> &a,
1107
+ const v_reg<double, n> &b) {
1108
+ v_reg<double, n> c;
1109
+ for (int i = 0; i < c.nlanes; i++)
1110
+ c.s[i] = _absdiff(a.s[i], b.s[i]);
1111
+ return c;
1112
+ }
1113
+
1114
+ /** @brief Saturating absolute difference
1115
+
1116
+ Returns \f$ saturate(|a - b|) \f$ .
1117
+ For 8-, 16-bit signed integer source types. */
1118
+ template <typename _Tp, int n>
1119
+ inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n> &a,
1120
+ const v_reg<_Tp, n> &b) {
1121
+ v_reg<_Tp, n> c;
1122
+ for (int i = 0; i < n; i++)
1123
+ c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i]));
1124
+ return c;
1125
+ }
1126
+
1127
+ /** @brief Inversed square root
1128
+
1129
+ Returns \f$ 1/sqrt(a) \f$
1130
+ For floating point types only. */
1131
+ template <typename _Tp, int n>
1132
+ inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n> &a) {
1133
+ v_reg<_Tp, n> c;
1134
+ for (int i = 0; i < n; i++)
1135
+ c.s[i] = 1.f / std::sqrt(a.s[i]);
1136
+ return c;
1137
+ }
1138
+
1139
+ /** @brief Magnitude
1140
+
1141
+ Returns \f$ sqrt(a^2 + b^2) \f$
1142
+ For floating point types only. */
1143
+ template <typename _Tp, int n>
1144
+ inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n> &a,
1145
+ const v_reg<_Tp, n> &b) {
1146
+ v_reg<_Tp, n> c;
1147
+ for (int i = 0; i < n; i++)
1148
+ c.s[i] = std::sqrt(a.s[i] * a.s[i] + b.s[i] * b.s[i]);
1149
+ return c;
1150
+ }
1151
+
1152
+ /** @brief Square of the magnitude
1153
+
1154
+ Returns \f$ a^2 + b^2 \f$
1155
+ For floating point types only. */
1156
+ template <typename _Tp, int n>
1157
+ inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n> &a,
1158
+ const v_reg<_Tp, n> &b) {
1159
+ v_reg<_Tp, n> c;
1160
+ for (int i = 0; i < n; i++)
1161
+ c.s[i] = a.s[i] * a.s[i] + b.s[i] * b.s[i];
1162
+ return c;
1163
+ }
1164
+
1165
+ /** @brief Multiply and add
1166
+
1167
+ Returns \f$ a*b + c \f$
1168
+ For floating point types and signed 32bit int only. */
1169
+ template <typename _Tp, int n>
1170
+ inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b,
1171
+ const v_reg<_Tp, n> &c) {
1172
+ v_reg<_Tp, n> d;
1173
+ for (int i = 0; i < n; i++)
1174
+ d.s[i] = a.s[i] * b.s[i] + c.s[i];
1175
+ return d;
1176
+ }
1177
+
1178
+ /** @brief A synonym for v_fma */
1179
+ template <typename _Tp, int n>
1180
+ inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b,
1181
+ const v_reg<_Tp, n> &c) {
1182
+ return v_fma(a, b, c);
1183
+ }
1184
+
1185
+ /** @brief Dot product of elements
1186
+
1187
+ Multiply values in two registers and sum adjacent result pairs.
1188
+
1189
+ Scheme:
1190
+ @code
1191
+ {A1 A2 ...} // 16-bit
1192
+ x {B1 B2 ...} // 16-bit
1193
+ -------------
1194
+ {A1B1+A2B2 ...} // 32-bit
1195
+
1196
+ @endcode
1197
+ */
1198
+ template <typename _Tp, int n>
1199
+ inline v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>
1200
+ v_dotprod(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) {
1201
+ typedef typename V_TypeTraits<_Tp>::w_type w_type;
1202
+ v_reg<w_type, n / 2> c;
1203
+ for (int i = 0; i < (n / 2); i++)
1204
+ c.s[i] = (w_type)a.s[i * 2] * b.s[i * 2] +
1205
+ (w_type)a.s[i * 2 + 1] * b.s[i * 2 + 1];
1206
+ return c;
1207
+ }
1208
+
1209
+ /** @brief Dot product of elements
1210
+
1211
+ Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs.
1212
+ Scheme:
1213
+ @code
1214
+ {A1 A2 ...} // 16-bit
1215
+ x {B1 B2 ...} // 16-bit
1216
+ -------------
1217
+ {A1B1+A2B2+C1 ...} // 32-bit
1218
+ @endcode
1219
+ */
1220
+ template <typename _Tp, int n>
1221
+ inline v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>
1222
+ v_dotprod(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b,
1223
+ const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2> &c) {
1224
+ typedef typename V_TypeTraits<_Tp>::w_type w_type;
1225
+ v_reg<w_type, n / 2> s;
1226
+ for (int i = 0; i < (n / 2); i++)
1227
+ s.s[i] = (w_type)a.s[i * 2] * b.s[i * 2] +
1228
+ (w_type)a.s[i * 2 + 1] * b.s[i * 2 + 1] + c.s[i];
1229
+ return s;
1230
+ }
1231
+
1232
+ /** @brief Fast Dot product of elements
1233
+
1234
+ Same as cv::v_dotprod, but it may perform unorder sum between result pairs in
1235
+ some platforms, this intrinsic can be used if the sum among all lanes is only
1236
+ matters and also it should be yielding better performance on the affected
1237
+ platforms.
1238
+
1239
+ */
1240
+ template <typename _Tp, int n>
1241
+ inline v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>
1242
+ v_dotprod_fast(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) {
1243
+ return v_dotprod(a, b);
1244
+ }
1245
+
1246
+ /** @brief Fast Dot product of elements
1247
+
1248
+ Same as cv::v_dotprod_fast, but add a third element to the sum of adjacent
1249
+ pairs.
1250
+ */
1251
+ template <typename _Tp, int n>
1252
+ inline v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>
1253
+ v_dotprod_fast(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b,
1254
+ const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2> &c) {
1255
+ return v_dotprod(a, b, c);
1256
+ }
1257
+
1258
+ /** @brief Dot product of elements and expand
1259
+
1260
+ Multiply values in two registers and expand the sum of adjacent result pairs.
1261
+
1262
+ Scheme:
1263
+ @code
1264
+ {A1 A2 A3 A4 ...} // 8-bit
1265
+ x {B1 B2 B3 B4 ...} // 8-bit
1266
+ -------------
1267
+ {A1B1+A2B2+A3B3+A4B4 ...} // 32-bit
1268
+
1269
+ @endcode
1270
+ */
1271
+ template <typename _Tp, int n>
1272
+ inline v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4>
1273
+ v_dotprod_expand(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) {
1274
+ typedef typename V_TypeTraits<_Tp>::q_type q_type;
1275
+ v_reg<q_type, n / 4> s;
1276
+ for (int i = 0; i < (n / 4); i++)
1277
+ s.s[i] = (q_type)a.s[i * 4] * b.s[i * 4] +
1278
+ (q_type)a.s[i * 4 + 1] * b.s[i * 4 + 1] +
1279
+ (q_type)a.s[i * 4 + 2] * b.s[i * 4 + 2] +
1280
+ (q_type)a.s[i * 4 + 3] * b.s[i * 4 + 3];
1281
+ return s;
1282
+ }
1283
+
1284
+ /** @brief Dot product of elements
1285
+
1286
+ Same as cv::v_dotprod_expand, but add a third element to the sum of adjacent
1287
+ pairs. Scheme:
1288
+ @code
1289
+ {A1 A2 A3 A4 ...} // 8-bit
1290
+ x {B1 B2 B3 B4 ...} // 8-bit
1291
+ -------------
1292
+ {A1B1+A2B2+A3B3+A4B4+C1 ...} // 32-bit
1293
+ @endcode
1294
+ */
1295
+ template <typename _Tp, int n>
1296
+ inline v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4>
1297
+ v_dotprod_expand(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b,
1298
+ const v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4> &c) {
1299
+ typedef typename V_TypeTraits<_Tp>::q_type q_type;
1300
+ v_reg<q_type, n / 4> s;
1301
+ for (int i = 0; i < (n / 4); i++)
1302
+ s.s[i] = (q_type)a.s[i * 4] * b.s[i * 4] +
1303
+ (q_type)a.s[i * 4 + 1] * b.s[i * 4 + 1] +
1304
+ (q_type)a.s[i * 4 + 2] * b.s[i * 4 + 2] +
1305
+ (q_type)a.s[i * 4 + 3] * b.s[i * 4 + 3] + c.s[i];
1306
+ return s;
1307
+ }
1308
+
1309
+ /** @brief Fast Dot product of elements and expand
1310
+
1311
+ Multiply values in two registers and expand the sum of adjacent result pairs.
1312
+
1313
+ Same as cv::v_dotprod_expand, but it may perform unorder sum between result
1314
+ pairs in some platforms, this intrinsic can be used if the sum among all lanes
1315
+ is only matters and also it should be yielding better performance on the
1316
+ affected platforms.
1317
+
1318
+ */
1319
+ template <typename _Tp, int n>
1320
+ inline v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4>
1321
+ v_dotprod_expand_fast(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) {
1322
+ return v_dotprod_expand(a, b);
1323
+ }
1324
+
1325
+ /** @brief Fast Dot product of elements
1326
+
1327
+ Same as cv::v_dotprod_expand_fast, but add a third element to the sum of
1328
+ adjacent pairs.
1329
+ */
1330
+ template <typename _Tp, int n>
1331
+ inline v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4> v_dotprod_expand_fast(
1332
+ const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b,
1333
+ const v_reg<typename V_TypeTraits<_Tp>::q_type, n / 4> &c) {
1334
+ return v_dotprod_expand(a, b, c);
1335
+ }
1336
+
1337
+ /** @brief Multiply and expand
1338
+
1339
+ Multiply values two registers and store results in two registers with wider pack
1340
+ type. Scheme:
1341
+ @code
1342
+ {A B C D} // 32-bit
1343
+ x {E F G H} // 32-bit
1344
+ ---------------
1345
+ {AE BF} // 64-bit
1346
+ {CG DH} // 64-bit
1347
+ @endcode
1348
+ Example:
1349
+ @code{.cpp}
1350
+ v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2}
1351
+ v_uint64x2 c, d; // results
1352
+ v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8}
1353
+ @endcode
1354
+ Implemented only for 16- and unsigned 32-bit source types (v_int16x8,
1355
+ v_uint16x8, v_uint32x4).
1356
+ */
1357
+ template <typename _Tp, int n>
1358
+ inline void v_mul_expand(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b,
1359
+ v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2> &c,
1360
+ v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2> &d) {
1361
+ typedef typename V_TypeTraits<_Tp>::w_type w_type;
1362
+ for (int i = 0; i < (n / 2); i++) {
1363
+ c.s[i] = (w_type)a.s[i] * b.s[i];
1364
+ d.s[i] = (w_type)a.s[i + (n / 2)] * b.s[i + (n / 2)];
1365
+ }
1366
+ }
1367
+
1368
+ /** @brief Multiply and extract high part
1369
+
1370
+ Multiply values two registers and store high part of the results.
1371
+ Implemented only for 16-bit source types (v_int16x8, v_uint16x8). Returns \f$
1372
+ a*b >> 16 \f$
1373
+ */
1374
+ template <typename _Tp, int n>
1375
+ inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) {
1376
+ typedef typename V_TypeTraits<_Tp>::w_type w_type;
1377
+ v_reg<_Tp, n> c;
1378
+ for (int i = 0; i < n; i++)
1379
+ c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp) * 8);
1380
+ return c;
1381
+ }
1382
+
1383
+ //! @cond IGNORED
1384
+ template <typename _Tp, int n>
1385
+ inline void v_hsum(const v_reg<_Tp, n> &a,
1386
+ v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2> &c) {
1387
+ typedef typename V_TypeTraits<_Tp>::w_type w_type;
1388
+ for (int i = 0; i < (n / 2); i++) {
1389
+ c.s[i] = (w_type)a.s[i * 2] + a.s[i * 2 + 1];
1390
+ }
1391
+ }
1392
+ //! @endcond
1393
+
1394
+ //! @brief Helper macro
1395
+ //! @ingroup core_hal_intrin_impl
1396
+ #define OPENCV_HAL_IMPL_SHIFT_OP(shift_op, func) \
1397
+ template <typename _Tp, int n> \
1398
+ inline v_reg<_Tp, n> func(const v_reg<_Tp, n> &a, int imm) { \
1399
+ v_reg<_Tp, n> c; \
1400
+ for (int i = 0; i < n; i++) \
1401
+ c.s[i] = (_Tp)(a.s[i] shift_op imm); \
1402
+ return c; \
1403
+ }
1404
+
1405
+ /** @brief Bitwise shift left
1406
+
1407
+ For 16-, 32- and 64-bit integer values. */
1408
+ OPENCV_HAL_IMPL_SHIFT_OP(<<, v_shl)
1409
+
1410
+ /** @brief Bitwise shift right
1411
+
1412
+ For 16-, 32- and 64-bit integer values. */
1413
+ OPENCV_HAL_IMPL_SHIFT_OP(>>, v_shr)
1414
+
1415
+ //! @brief Helper macro
1416
+ //! @ingroup core_hal_intrin_impl
1417
+ #define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix, opA, opB) \
1418
+ template <int imm, typename _Tp, int n> \
1419
+ inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n> &a) { \
1420
+ v_reg<_Tp, n> b; \
1421
+ for (int i = 0; i < n; i++) { \
1422
+ int sIndex = i opA imm; \
1423
+ if (0 <= sIndex && sIndex < n) { \
1424
+ b.s[i] = a.s[sIndex]; \
1425
+ } else { \
1426
+ b.s[i] = 0; \
1427
+ } \
1428
+ } \
1429
+ return b; \
1430
+ } \
1431
+ template <int imm, typename _Tp, int n> \
1432
+ inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n> &a, \
1433
+ const v_reg<_Tp, n> &b) { \
1434
+ v_reg<_Tp, n> c; \
1435
+ for (int i = 0; i < n; i++) { \
1436
+ int aIndex = i opA imm; \
1437
+ int bIndex = i opA imm opB n; \
1438
+ if (0 <= bIndex && bIndex < n) { \
1439
+ c.s[i] = b.s[bIndex]; \
1440
+ } else if (0 <= aIndex && aIndex < n) { \
1441
+ c.s[i] = a.s[aIndex]; \
1442
+ } else { \
1443
+ c.s[i] = 0; \
1444
+ } \
1445
+ } \
1446
+ return c; \
1447
+ }
1448
+
1449
+ /** @brief Element shift left among vector
1450
+
1451
+ For all type */
1452
+ OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left, -, +)
1453
+
1454
+ /** @brief Element shift right among vector
1455
+
1456
+ For all type */
1457
+ OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -)
1458
+
1459
+ /** @brief Sum packed values
1460
+
1461
+ Scheme:
1462
+ @code
1463
+ {A1 A2 A3 ...} => sum{A1,A2,A3,...}
1464
+ @endcode
1465
+ */
1466
+ template <typename _Tp, int n>
1467
+ inline typename V_TypeTraits<_Tp>::sum_type
1468
+ v_reduce_sum(const v_reg<_Tp, n> &a) {
1469
+ typename V_TypeTraits<_Tp>::sum_type c = a.s[0];
1470
+ for (int i = 1; i < n; i++)
1471
+ c += a.s[i];
1472
+ return c;
1473
+ }
1474
+
1475
+ /** @brief Sums all elements of each input vector, returns the vector of sums
1476
+
1477
+ Scheme:
1478
+ @code
1479
+ result[0] = a[0] + a[1] + a[2] + a[3]
1480
+ result[1] = b[0] + b[1] + b[2] + b[3]
1481
+ result[2] = c[0] + c[1] + c[2] + c[3]
1482
+ result[3] = d[0] + d[1] + d[2] + d[3]
1483
+ @endcode
1484
+ */
1485
+ template <int n>
1486
+ inline v_reg<float, n>
1487
+ v_reduce_sum4(const v_reg<float, n> &a, const v_reg<float, n> &b,
1488
+ const v_reg<float, n> &c, const v_reg<float, n> &d) {
1489
+ v_reg<float, n> r;
1490
+ for (int i = 0; i < (n / 4); i++) {
1491
+ r.s[i * 4 + 0] =
1492
+ a.s[i * 4 + 0] + a.s[i * 4 + 1] + a.s[i * 4 + 2] + a.s[i * 4 + 3];
1493
+ r.s[i * 4 + 1] =
1494
+ b.s[i * 4 + 0] + b.s[i * 4 + 1] + b.s[i * 4 + 2] + b.s[i * 4 + 3];
1495
+ r.s[i * 4 + 2] =
1496
+ c.s[i * 4 + 0] + c.s[i * 4 + 1] + c.s[i * 4 + 2] + c.s[i * 4 + 3];
1497
+ r.s[i * 4 + 3] =
1498
+ d.s[i * 4 + 0] + d.s[i * 4 + 1] + d.s[i * 4 + 2] + d.s[i * 4 + 3];
1499
+ }
1500
+ return r;
1501
+ }
1502
+
1503
+ /** @brief Sum absolute differences of values
1504
+
1505
+ Scheme:
1506
+ @code
1507
+ {A1 A2 A3 ...} {B1 B2 B3 ...} => sum{ABS(A1-B1),abs(A2-B2),abs(A3-B3),...}
1508
+ @endcode
1509
+ For all types except 64-bit types.*/
1510
+ template <typename _Tp, int n>
1511
+ inline typename V_TypeTraits<typename V_TypeTraits<_Tp>::abs_type>::sum_type
1512
+ v_reduce_sad(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) {
1513
+ typename V_TypeTraits<typename V_TypeTraits<_Tp>::abs_type>::sum_type c =
1514
+ _absdiff(a.s[0], b.s[0]);
1515
+ for (int i = 1; i < n; i++)
1516
+ c += _absdiff(a.s[i], b.s[i]);
1517
+ return c;
1518
+ }
1519
+
1520
+ /** @brief Get negative values mask
1521
+ @deprecated v_signmask depends on a lane count heavily and therefore isn't
1522
+ universal enough
1523
+
1524
+ Returned value is a bit mask with bits set to 1 on places corresponding to
1525
+ negative packed values indexes. Example:
1526
+ @code{.cpp}
1527
+ v_int32x4 r; // set to {-1, -1, 1, 1}
1528
+ int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011
1529
+ @endcode
1530
+ */
1531
+ template <typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n> &a) {
1532
+ int mask = 0;
1533
+ for (int i = 0; i < n; i++)
1534
+ mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i;
1535
+ return mask;
1536
+ }
1537
+
1538
+ /** @brief Get first negative lane index
1539
+
1540
+ Returned value is an index of first negative lane (undefined for input of all
1541
+ positive values) Example:
1542
+ @code{.cpp}
1543
+ v_int32x4 r; // set to {0, 0, -1, -1}
1544
+ int idx = v_heading_zeros(r); // idx = 2
1545
+ @endcode
1546
+ */
1547
+ template <typename _Tp, int n>
1548
+ inline int v_scan_forward(const v_reg<_Tp, n> &a) {
1549
+ for (int i = 0; i < n; i++)
1550
+ if (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0)
1551
+ return i;
1552
+ return 0;
1553
+ }
1554
+
1555
+ /** @brief Check if all packed values are less than zero
1556
+
1557
+ Unsigned values will be casted to signed: `uchar 254 => char -2`.
1558
+ */
1559
+ template <typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n> &a) {
1560
+ for (int i = 0; i < n; i++)
1561
+ if (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0)
1562
+ return false;
1563
+ return true;
1564
+ }
1565
+
1566
+ /** @brief Check if any of packed values is less than zero
1567
+
1568
+ Unsigned values will be casted to signed: `uchar 254 => char -2`.
1569
+ */
1570
+ template <typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n> &a) {
1571
+ for (int i = 0; i < n; i++)
1572
+ if (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0)
1573
+ return true;
1574
+ return false;
1575
+ }
1576
+
1577
+ /** @brief Per-element select (blend operation)
1578
+
1579
+ Return value will be built by combining values _a_ and _b_ using the following
1580
+ scheme: result[i] = mask[i] ? a[i] : b[i];
1581
+
1582
+ @note: _mask_ element values are restricted to these values:
1583
+ - 0: select element from _b_
1584
+ - 0xff/0xffff/etc: select element from _a_
1585
+ (fully compatible with bitwise-based operator)
1586
+ */
1587
+ template <typename _Tp, int n>
1588
+ inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n> &mask, const v_reg<_Tp, n> &a,
1589
+ const v_reg<_Tp, n> &b) {
1590
+ typedef V_TypeTraits<_Tp> Traits;
1591
+ typedef typename Traits::int_type int_type;
1592
+ v_reg<_Tp, n> c;
1593
+ for (int i = 0; i < n; i++) {
1594
+ int_type m = Traits::reinterpret_int(mask.s[i]);
1595
+ CV_DbgAssert(
1596
+ m == 0 ||
1597
+ m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc
1598
+ c.s[i] = m ? a.s[i] : b.s[i];
1599
+ }
1600
+ return c;
1601
+ }
1602
+
1603
+ /** @brief Expand values to the wider pack type
1604
+
1605
+ Copy contents of register to two registers with 2x wider pack type.
1606
+ Scheme:
1607
+ @code
1608
+ int32x4 int64x2 int64x2
1609
+ {A B C D} ==> {A B} , {C D}
1610
+ @endcode */
1611
+ template <typename _Tp, int n>
1612
+ inline void v_expand(const v_reg<_Tp, n> &a,
1613
+ v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2> &b0,
1614
+ v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2> &b1) {
1615
+ for (int i = 0; i < (n / 2); i++) {
1616
+ b0.s[i] = a.s[i];
1617
+ b1.s[i] = a.s[i + (n / 2)];
1618
+ }
1619
+ }
1620
+
1621
+ /** @brief Expand lower values to the wider pack type
1622
+
1623
+ Same as cv::v_expand, but return lower half of the vector.
1624
+
1625
+ Scheme:
1626
+ @code
1627
+ int32x4 int64x2
1628
+ {A B C D} ==> {A B}
1629
+ @endcode */
1630
+ template <typename _Tp, int n>
1631
+ inline v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>
1632
+ v_expand_low(const v_reg<_Tp, n> &a) {
1633
+ v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2> b;
1634
+ for (int i = 0; i < (n / 2); i++)
1635
+ b.s[i] = a.s[i];
1636
+ return b;
1637
+ }
1638
+
1639
+ /** @brief Expand higher values to the wider pack type
1640
+
1641
+ Same as cv::v_expand_low, but expand higher half of the vector instead.
1642
+
1643
+ Scheme:
1644
+ @code
1645
+ int32x4 int64x2
1646
+ {A B C D} ==> {C D}
1647
+ @endcode */
1648
+ template <typename _Tp, int n>
1649
+ inline v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>
1650
+ v_expand_high(const v_reg<_Tp, n> &a) {
1651
+ v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2> b;
1652
+ for (int i = 0; i < (n / 2); i++)
1653
+ b.s[i] = a.s[i + (n / 2)];
1654
+ return b;
1655
+ }
1656
+
1657
+ //! @cond IGNORED
1658
+ template <typename _Tp, int n>
1659
+ inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
1660
+ v_reinterpret_as_int(const v_reg<_Tp, n> &a) {
1661
+ v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
1662
+ for (int i = 0; i < n; i++)
1663
+ c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
1664
+ return c;
1665
+ }
1666
+
1667
+ template <typename _Tp, int n>
1668
+ inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
1669
+ v_reinterpret_as_uint(const v_reg<_Tp, n> &a) {
1670
+ v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
1671
+ for (int i = 0; i < n; i++)
1672
+ c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
1673
+ return c;
1674
+ }
1675
+ //! @endcond
1676
+
1677
+ /** @brief Interleave two vectors
1678
+
1679
+ Scheme:
1680
+ @code
1681
+ {A1 A2 A3 A4}
1682
+ {B1 B2 B3 B4}
1683
+ ---------------
1684
+ {A1 B1 A2 B2} and {A3 B3 A4 B4}
1685
+ @endcode
1686
+ For all types except 64-bit.
1687
+ */
1688
+ template <typename _Tp, int n>
1689
+ inline void v_zip(const v_reg<_Tp, n> &a0, const v_reg<_Tp, n> &a1,
1690
+ v_reg<_Tp, n> &b0, v_reg<_Tp, n> &b1) {
1691
+ int i;
1692
+ for (i = 0; i < n / 2; i++) {
1693
+ b0.s[i * 2] = a0.s[i];
1694
+ b0.s[i * 2 + 1] = a1.s[i];
1695
+ }
1696
+ for (; i < n; i++) {
1697
+ b1.s[i * 2 - n] = a0.s[i];
1698
+ b1.s[i * 2 - n + 1] = a1.s[i];
1699
+ }
1700
+ }
1701
+
1702
+ /** @brief Load register contents from memory
1703
+
1704
+ @param ptr pointer to memory block with data
1705
+ @return register object
1706
+
1707
+ @note Returned type will be detected from passed pointer type, for example uchar
1708
+ ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.
1709
+
1710
+ @note Use vx_load version to get maximum available register length result
1711
+
1712
+ @note Alignment requirement:
1713
+ if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane
1714
+ type)` should be enough). Do not cast pointer types without runtime check for
1715
+ pointer alignment (like `uchar*` => `int*`).
1716
+ */
1717
+ template <typename _Tp>
1718
+ inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load(const _Tp *ptr) {
1719
+ #if CV_STRONG_ALIGNMENT
1720
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1721
+ #endif
1722
+ return v_reg<_Tp, simd128_width / sizeof(_Tp)>(ptr);
1723
+ }
1724
+
1725
+ #if CV_SIMD256
1726
+ /** @brief Load 256-bit length register contents from memory
1727
+
1728
+ @param ptr pointer to memory block with data
1729
+ @return register object
1730
+
1731
+ @note Returned type will be detected from passed pointer type, for example uchar
1732
+ ==> cv::v_uint8x32, int ==> cv::v_int32x8, etc.
1733
+
1734
+ @note Check CV_SIMD256 preprocessor definition prior to use.
1735
+ Use vx_load version to get maximum available register length result
1736
+
1737
+ @note Alignment requirement:
1738
+ if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane
1739
+ type)` should be enough). Do not cast pointer types without runtime check for
1740
+ pointer alignment (like `uchar*` => `int*`).
1741
+ */
1742
+ template <typename _Tp>
1743
+ inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load(const _Tp *ptr) {
1744
+ #if CV_STRONG_ALIGNMENT
1745
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1746
+ #endif
1747
+ return v_reg<_Tp, simd256_width / sizeof(_Tp)>(ptr);
1748
+ }
1749
+ #endif
1750
+
1751
+ #if CV_SIMD512
1752
+ /** @brief Load 512-bit length register contents from memory
1753
+
1754
+ @param ptr pointer to memory block with data
1755
+ @return register object
1756
+
1757
+ @note Returned type will be detected from passed pointer type, for example uchar
1758
+ ==> cv::v_uint8x64, int ==> cv::v_int32x16, etc.
1759
+
1760
+ @note Check CV_SIMD512 preprocessor definition prior to use.
1761
+ Use vx_load version to get maximum available register length result
1762
+
1763
+ @note Alignment requirement:
1764
+ if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane
1765
+ type)` should be enough). Do not cast pointer types without runtime check for
1766
+ pointer alignment (like `uchar*` => `int*`).
1767
+ */
1768
+ template <typename _Tp>
1769
+ inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load(const _Tp *ptr) {
1770
+ #if CV_STRONG_ALIGNMENT
1771
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1772
+ #endif
1773
+ return v_reg<_Tp, simd512_width / sizeof(_Tp)>(ptr);
1774
+ }
1775
+ #endif
1776
+
1777
+ /** @brief Load register contents from memory (aligned)
1778
+
1779
+ similar to cv::v_load, but source memory block should be aligned (to 16-byte
1780
+ boundary in case of SIMD128, 32-byte - SIMD256, etc)
1781
+
1782
+ @note Use vx_load_aligned version to get maximum available register length
1783
+ result
1784
+ */
1785
+ template <typename _Tp>
1786
+ inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_aligned(const _Tp *ptr) {
1787
+ CV_Assert(isAligned<sizeof(v_reg<_Tp, simd128_width / sizeof(_Tp)>)>(ptr));
1788
+ return v_reg<_Tp, simd128_width / sizeof(_Tp)>(ptr);
1789
+ }
1790
+
1791
+ #if CV_SIMD256
1792
+ /** @brief Load register contents from memory (aligned)
1793
+
1794
+ similar to cv::v256_load, but source memory block should be aligned (to 32-byte
1795
+ boundary in case of SIMD256, 64-byte - SIMD512, etc)
1796
+
1797
+ @note Check CV_SIMD256 preprocessor definition prior to use.
1798
+ Use vx_load_aligned version to get maximum available register length result
1799
+ */
1800
+ template <typename _Tp>
1801
+ inline v_reg<_Tp, simd256_width / sizeof(_Tp)>
1802
+ v256_load_aligned(const _Tp *ptr) {
1803
+ CV_Assert(isAligned<sizeof(v_reg<_Tp, simd256_width / sizeof(_Tp)>)>(ptr));
1804
+ return v_reg<_Tp, simd256_width / sizeof(_Tp)>(ptr);
1805
+ }
1806
+ #endif
1807
+
1808
+ #if CV_SIMD512
1809
+ /** @brief Load register contents from memory (aligned)
1810
+
1811
+ similar to cv::v512_load, but source memory block should be aligned (to 64-byte
1812
+ boundary in case of SIMD512, etc)
1813
+
1814
+ @note Check CV_SIMD512 preprocessor definition prior to use.
1815
+ Use vx_load_aligned version to get maximum available register length result
1816
+ */
1817
+ template <typename _Tp>
1818
+ inline v_reg<_Tp, simd512_width / sizeof(_Tp)>
1819
+ v512_load_aligned(const _Tp *ptr) {
1820
+ CV_Assert(isAligned<sizeof(v_reg<_Tp, simd512_width / sizeof(_Tp)>)>(ptr));
1821
+ return v_reg<_Tp, simd512_width / sizeof(_Tp)>(ptr);
1822
+ }
1823
+ #endif
1824
+
1825
+ /** @brief Load 64-bits of data to lower part (high part is undefined).
1826
+
1827
+ @param ptr memory block containing data for first half (0..n/2)
1828
+
1829
+ @code{.cpp}
1830
+ int lo[2] = { 1, 2 };
1831
+ v_int32x4 r = v_load_low(lo);
1832
+ @endcode
1833
+
1834
+ @note Use vx_load_low version to get maximum available register length result
1835
+ */
1836
+ template <typename _Tp>
1837
+ inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_low(const _Tp *ptr) {
1838
+ #if CV_STRONG_ALIGNMENT
1839
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1840
+ #endif
1841
+ v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
1842
+ for (int i = 0; i < c.nlanes / 2; i++) {
1843
+ c.s[i] = ptr[i];
1844
+ }
1845
+ return c;
1846
+ }
1847
+
1848
+ #if CV_SIMD256
1849
+ /** @brief Load 128-bits of data to lower part (high part is undefined).
1850
+
1851
+ @param ptr memory block containing data for first half (0..n/2)
1852
+
1853
+ @code{.cpp}
1854
+ int lo[4] = { 1, 2, 3, 4 };
1855
+ v_int32x8 r = v256_load_low(lo);
1856
+ @endcode
1857
+
1858
+ @note Check CV_SIMD256 preprocessor definition prior to use.
1859
+ Use vx_load_low version to get maximum available register length result
1860
+ */
1861
+ template <typename _Tp>
1862
+ inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_low(const _Tp *ptr) {
1863
+ #if CV_STRONG_ALIGNMENT
1864
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1865
+ #endif
1866
+ v_reg<_Tp, simd256_width / sizeof(_Tp)> c;
1867
+ for (int i = 0; i < c.nlanes / 2; i++) {
1868
+ c.s[i] = ptr[i];
1869
+ }
1870
+ return c;
1871
+ }
1872
+ #endif
1873
+
1874
+ #if CV_SIMD512
1875
+ /** @brief Load 256-bits of data to lower part (high part is undefined).
1876
+
1877
+ @param ptr memory block containing data for first half (0..n/2)
1878
+
1879
+ @code{.cpp}
1880
+ int lo[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
1881
+ v_int32x16 r = v512_load_low(lo);
1882
+ @endcode
1883
+
1884
+ @note Check CV_SIMD512 preprocessor definition prior to use.
1885
+ Use vx_load_low version to get maximum available register length result
1886
+ */
1887
+ template <typename _Tp>
1888
+ inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_low(const _Tp *ptr) {
1889
+ #if CV_STRONG_ALIGNMENT
1890
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
1891
+ #endif
1892
+ v_reg<_Tp, simd512_width / sizeof(_Tp)> c;
1893
+ for (int i = 0; i < c.nlanes / 2; i++) {
1894
+ c.s[i] = ptr[i];
1895
+ }
1896
+ return c;
1897
+ }
1898
+ #endif
1899
+
1900
+ /** @brief Load register contents from two memory blocks
1901
+
1902
+ @param loptr memory block containing data for first half (0..n/2)
1903
+ @param hiptr memory block containing data for second half (n/2..n)
1904
+
1905
+ @code{.cpp}
1906
+ int lo[2] = { 1, 2 }, hi[2] = { 3, 4 };
1907
+ v_int32x4 r = v_load_halves(lo, hi);
1908
+ @endcode
1909
+
1910
+ @note Use vx_load_halves version to get maximum available register length result
1911
+ */
1912
+ template <typename _Tp>
1913
+ inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_halves(const _Tp *loptr,
1914
+ const _Tp *hiptr) {
1915
+ #if CV_STRONG_ALIGNMENT
1916
+ CV_Assert(isAligned<sizeof(_Tp)>(loptr));
1917
+ CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
1918
+ #endif
1919
+ v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
1920
+ for (int i = 0; i < c.nlanes / 2; i++) {
1921
+ c.s[i] = loptr[i];
1922
+ c.s[i + c.nlanes / 2] = hiptr[i];
1923
+ }
1924
+ return c;
1925
+ }
1926
+
1927
+ #if CV_SIMD256
1928
+ /** @brief Load register contents from two memory blocks
1929
+
1930
+ @param loptr memory block containing data for first half (0..n/2)
1931
+ @param hiptr memory block containing data for second half (n/2..n)
1932
+
1933
+ @code{.cpp}
1934
+ int lo[4] = { 1, 2, 3, 4 }, hi[4] = { 5, 6, 7, 8 };
1935
+ v_int32x8 r = v256_load_halves(lo, hi);
1936
+ @endcode
1937
+
1938
+ @note Check CV_SIMD256 preprocessor definition prior to use.
1939
+ Use vx_load_halves version to get maximum available register length result
1940
+ */
1941
+ template <typename _Tp>
1942
+ inline v_reg<_Tp, simd256_width / sizeof(_Tp)>
1943
+ v256_load_halves(const _Tp *loptr, const _Tp *hiptr) {
1944
+ #if CV_STRONG_ALIGNMENT
1945
+ CV_Assert(isAligned<sizeof(_Tp)>(loptr));
1946
+ CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
1947
+ #endif
1948
+ v_reg<_Tp, simd256_width / sizeof(_Tp)> c;
1949
+ for (int i = 0; i < c.nlanes / 2; i++) {
1950
+ c.s[i] = loptr[i];
1951
+ c.s[i + c.nlanes / 2] = hiptr[i];
1952
+ }
1953
+ return c;
1954
+ }
1955
+ #endif
1956
+
1957
+ #if CV_SIMD512
1958
+ /** @brief Load register contents from two memory blocks
1959
+
1960
+ @param loptr memory block containing data for first half (0..n/2)
1961
+ @param hiptr memory block containing data for second half (n/2..n)
1962
+
1963
+ @code{.cpp}
1964
+ int lo[4] = { 1, 2, 3, 4, 5, 6, 7, 8 }, hi[4] = { 9, 10, 11, 12, 13, 14, 15, 16
1965
+ }; v_int32x16 r = v512_load_halves(lo, hi);
1966
+ @endcode
1967
+
1968
+ @note Check CV_SIMD512 preprocessor definition prior to use.
1969
+ Use vx_load_halves version to get maximum available register length result
1970
+ */
1971
+ template <typename _Tp>
1972
+ inline v_reg<_Tp, simd512_width / sizeof(_Tp)>
1973
+ v512_load_halves(const _Tp *loptr, const _Tp *hiptr) {
1974
+ #if CV_STRONG_ALIGNMENT
1975
+ CV_Assert(isAligned<sizeof(_Tp)>(loptr));
1976
+ CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
1977
+ #endif
1978
+ v_reg<_Tp, simd512_width / sizeof(_Tp)> c;
1979
+ for (int i = 0; i < c.nlanes / 2; i++) {
1980
+ c.s[i] = loptr[i];
1981
+ c.s[i + c.nlanes / 2] = hiptr[i];
1982
+ }
1983
+ return c;
1984
+ }
1985
+ #endif
1986
+
1987
+ /** @brief Load register contents from memory with double expand
1988
+
1989
+ Same as cv::v_load, but result pack type will be 2x wider than memory type.
1990
+
1991
+ @code{.cpp}
1992
+ short buf[4] = {1, 2, 3, 4}; // type is int16
1993
+ v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32
1994
+ @endcode
1995
+ For 8-, 16-, 32-bit integer source types.
1996
+
1997
+ @note Use vx_load_expand version to get maximum available register length result
1998
+ */
1999
+ template <typename _Tp>
2000
+ inline v_reg<typename V_TypeTraits<_Tp>::w_type,
2001
+ simd128_width / sizeof(typename V_TypeTraits<_Tp>::w_type)>
2002
+ v_load_expand(const _Tp *ptr) {
2003
+ #if CV_STRONG_ALIGNMENT
2004
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2005
+ #endif
2006
+ typedef typename V_TypeTraits<_Tp>::w_type w_type;
2007
+ v_reg<w_type, simd128_width / sizeof(w_type)> c;
2008
+ for (int i = 0; i < c.nlanes; i++) {
2009
+ c.s[i] = ptr[i];
2010
+ }
2011
+ return c;
2012
+ }
2013
+
2014
+ #if CV_SIMD256
2015
+ /** @brief Load register contents from memory with double expand
2016
+
2017
+ Same as cv::v256_load, but result pack type will be 2x wider than memory type.
2018
+
2019
+ @code{.cpp}
2020
+ short buf[8] = {1, 2, 3, 4, 5, 6, 7, 8}; // type is int16
2021
+ v_int32x8 r = v256_load_expand(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8} - type is
2022
+ int32
2023
+ @endcode
2024
+ For 8-, 16-, 32-bit integer source types.
2025
+
2026
+ @note Check CV_SIMD256 preprocessor definition prior to use.
2027
+ Use vx_load_expand version to get maximum available register length result
2028
+ */
2029
+ template <typename _Tp>
2030
+ inline v_reg<typename V_TypeTraits<_Tp>::w_type,
2031
+ simd256_width / sizeof(typename V_TypeTraits<_Tp>::w_type)>
2032
+ v256_load_expand(const _Tp *ptr) {
2033
+ #if CV_STRONG_ALIGNMENT
2034
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2035
+ #endif
2036
+ typedef typename V_TypeTraits<_Tp>::w_type w_type;
2037
+ v_reg<w_type, simd256_width / sizeof(w_type)> c;
2038
+ for (int i = 0; i < c.nlanes; i++) {
2039
+ c.s[i] = ptr[i];
2040
+ }
2041
+ return c;
2042
+ }
2043
+ #endif
2044
+
2045
+ #if CV_SIMD512
2046
+ /** @brief Load register contents from memory with double expand
2047
+
2048
+ Same as cv::v512_load, but result pack type will be 2x wider than memory type.
2049
+
2050
+ @code{.cpp}
2051
+ short buf[8] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; // type
2052
+ is int16 v_int32x16 r = v512_load_expand(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8,
2053
+ 9, 10, 11, 12, 13, 14, 15, 16} - type is int32
2054
+ @endcode
2055
+ For 8-, 16-, 32-bit integer source types.
2056
+
2057
+ @note Check CV_SIMD512 preprocessor definition prior to use.
2058
+ Use vx_load_expand version to get maximum available register length result
2059
+ */
2060
+ template <typename _Tp>
2061
+ inline v_reg<typename V_TypeTraits<_Tp>::w_type,
2062
+ simd512_width / sizeof(typename V_TypeTraits<_Tp>::w_type)>
2063
+ v512_load_expand(const _Tp *ptr) {
2064
+ #if CV_STRONG_ALIGNMENT
2065
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2066
+ #endif
2067
+ typedef typename V_TypeTraits<_Tp>::w_type w_type;
2068
+ v_reg<w_type, simd512_width / sizeof(w_type)> c;
2069
+ for (int i = 0; i < c.nlanes; i++) {
2070
+ c.s[i] = ptr[i];
2071
+ }
2072
+ return c;
2073
+ }
2074
+ #endif
2075
+
2076
+ /** @brief Load register contents from memory with quad expand
2077
+
2078
+ Same as cv::v_load_expand, but result type is 4 times wider than source.
2079
+ @code{.cpp}
2080
+ char buf[4] = {1, 2, 3, 4}; // type is int8
2081
+ v_int32x4 r = v_load_expand_q(buf); // r = {1, 2, 3, 4} - type is int32
2082
+ @endcode
2083
+ For 8-bit integer source types.
2084
+
2085
+ @note Use vx_load_expand_q version to get maximum available register length
2086
+ result
2087
+ */
2088
+ template <typename _Tp>
2089
+ inline v_reg<typename V_TypeTraits<_Tp>::q_type,
2090
+ simd128_width / sizeof(typename V_TypeTraits<_Tp>::q_type)>
2091
+ v_load_expand_q(const _Tp *ptr) {
2092
+ #if CV_STRONG_ALIGNMENT
2093
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2094
+ #endif
2095
+ typedef typename V_TypeTraits<_Tp>::q_type q_type;
2096
+ v_reg<q_type, simd128_width / sizeof(q_type)> c;
2097
+ for (int i = 0; i < c.nlanes; i++) {
2098
+ c.s[i] = ptr[i];
2099
+ }
2100
+ return c;
2101
+ }
2102
+
2103
+ #if CV_SIMD256
2104
+ /** @brief Load register contents from memory with quad expand
2105
+
2106
+ Same as cv::v256_load_expand, but result type is 4 times wider than source.
2107
+ @code{.cpp}
2108
+ char buf[8] = {1, 2, 3, 4, 5, 6, 7, 8}; // type is int8
2109
+ v_int32x8 r = v256_load_expand_q(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8} - type is
2110
+ int32
2111
+ @endcode
2112
+ For 8-bit integer source types.
2113
+
2114
+ @note Check CV_SIMD256 preprocessor definition prior to use.
2115
+ Use vx_load_expand_q version to get maximum available register length result
2116
+ */
2117
+ template <typename _Tp>
2118
+ inline v_reg<typename V_TypeTraits<_Tp>::q_type,
2119
+ simd256_width / sizeof(typename V_TypeTraits<_Tp>::q_type)>
2120
+ v256_load_expand_q(const _Tp *ptr) {
2121
+ #if CV_STRONG_ALIGNMENT
2122
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2123
+ #endif
2124
+ typedef typename V_TypeTraits<_Tp>::q_type q_type;
2125
+ v_reg<q_type, simd256_width / sizeof(q_type)> c;
2126
+ for (int i = 0; i < c.nlanes; i++) {
2127
+ c.s[i] = ptr[i];
2128
+ }
2129
+ return c;
2130
+ }
2131
+ #endif
2132
+
2133
+ #if CV_SIMD512
2134
+ /** @brief Load register contents from memory with quad expand
2135
+
2136
+ Same as cv::v512_load_expand, but result type is 4 times wider than source.
2137
+ @code{.cpp}
2138
+ char buf[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; // type
2139
+ is int8 v_int32x16 r = v512_load_expand_q(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8,
2140
+ 9, 10, 11, 12, 13, 14, 15, 16} - type is int32
2141
+ @endcode
2142
+ For 8-bit integer source types.
2143
+
2144
+ @note Check CV_SIMD512 preprocessor definition prior to use.
2145
+ Use vx_load_expand_q version to get maximum available register length result
2146
+ */
2147
+ template <typename _Tp>
2148
+ inline v_reg<typename V_TypeTraits<_Tp>::q_type,
2149
+ simd512_width / sizeof(typename V_TypeTraits<_Tp>::q_type)>
2150
+ v512_load_expand_q(const _Tp *ptr) {
2151
+ #if CV_STRONG_ALIGNMENT
2152
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2153
+ #endif
2154
+ typedef typename V_TypeTraits<_Tp>::q_type q_type;
2155
+ v_reg<q_type, simd512_width / sizeof(q_type)> c;
2156
+ for (int i = 0; i < c.nlanes; i++) {
2157
+ c.s[i] = ptr[i];
2158
+ }
2159
+ return c;
2160
+ }
2161
+ #endif
2162
+
2163
+ /** @brief Load and deinterleave (2 channels)
2164
+
2165
+ Load data from memory deinterleave and store to 2 registers.
2166
+ Scheme:
2167
+ @code
2168
+ {A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...}
2169
+ @endcode
2170
+ For all types except 64-bit. */
2171
+ template <typename _Tp, int n>
2172
+ inline void v_load_deinterleave(const _Tp *ptr, v_reg<_Tp, n> &a,
2173
+ v_reg<_Tp, n> &b) {
2174
+ #if CV_STRONG_ALIGNMENT
2175
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2176
+ #endif
2177
+ int i, i2;
2178
+ for (i = i2 = 0; i < n; i++, i2 += 2) {
2179
+ a.s[i] = ptr[i2];
2180
+ b.s[i] = ptr[i2 + 1];
2181
+ }
2182
+ }
2183
+
2184
+ /** @brief Load and deinterleave (3 channels)
2185
+
2186
+ Load data from memory deinterleave and store to 3 registers.
2187
+ Scheme:
2188
+ @code
2189
+ {A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}
2190
+ @endcode
2191
+ For all types except 64-bit. */
2192
+ template <typename _Tp, int n>
2193
+ inline void v_load_deinterleave(const _Tp *ptr, v_reg<_Tp, n> &a,
2194
+ v_reg<_Tp, n> &b, v_reg<_Tp, n> &c) {
2195
+ #if CV_STRONG_ALIGNMENT
2196
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2197
+ #endif
2198
+ int i, i3;
2199
+ for (i = i3 = 0; i < n; i++, i3 += 3) {
2200
+ a.s[i] = ptr[i3];
2201
+ b.s[i] = ptr[i3 + 1];
2202
+ c.s[i] = ptr[i3 + 2];
2203
+ }
2204
+ }
2205
+
2206
+ /** @brief Load and deinterleave (4 channels)
2207
+
2208
+ Load data from memory deinterleave and store to 4 registers.
2209
+ Scheme:
2210
+ @code
2211
+ {A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2
2212
+ ...}
2213
+ @endcode
2214
+ For all types except 64-bit. */
2215
+ template <typename _Tp, int n>
2216
+ inline void v_load_deinterleave(const _Tp *ptr, v_reg<_Tp, n> &a,
2217
+ v_reg<_Tp, n> &b, v_reg<_Tp, n> &c,
2218
+ v_reg<_Tp, n> &d) {
2219
+ #if CV_STRONG_ALIGNMENT
2220
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2221
+ #endif
2222
+ int i, i4;
2223
+ for (i = i4 = 0; i < n; i++, i4 += 4) {
2224
+ a.s[i] = ptr[i4];
2225
+ b.s[i] = ptr[i4 + 1];
2226
+ c.s[i] = ptr[i4 + 2];
2227
+ d.s[i] = ptr[i4 + 3];
2228
+ }
2229
+ }
2230
+
2231
+ /** @brief Interleave and store (2 channels)
2232
+
2233
+ Interleave and store data from 2 registers to memory.
2234
+ Scheme:
2235
+ @code
2236
+ {A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...}
2237
+ @endcode
2238
+ For all types except 64-bit. */
2239
+ template <typename _Tp, int n>
2240
+ inline void v_store_interleave(_Tp *ptr, const v_reg<_Tp, n> &a,
2241
+ const v_reg<_Tp, n> &b,
2242
+ hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) {
2243
+ #if CV_STRONG_ALIGNMENT
2244
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2245
+ #endif
2246
+ int i, i2;
2247
+ for (i = i2 = 0; i < n; i++, i2 += 2) {
2248
+ ptr[i2] = a.s[i];
2249
+ ptr[i2 + 1] = b.s[i];
2250
+ }
2251
+ }
2252
+
2253
+ /** @brief Interleave and store (3 channels)
2254
+
2255
+ Interleave and store data from 3 registers to memory.
2256
+ Scheme:
2257
+ @code
2258
+ {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...}
2259
+ @endcode
2260
+ For all types except 64-bit. */
2261
+ template <typename _Tp, int n>
2262
+ inline void v_store_interleave(_Tp *ptr, const v_reg<_Tp, n> &a,
2263
+ const v_reg<_Tp, n> &b, const v_reg<_Tp, n> &c,
2264
+ hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) {
2265
+ #if CV_STRONG_ALIGNMENT
2266
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2267
+ #endif
2268
+ int i, i3;
2269
+ for (i = i3 = 0; i < n; i++, i3 += 3) {
2270
+ ptr[i3] = a.s[i];
2271
+ ptr[i3 + 1] = b.s[i];
2272
+ ptr[i3 + 2] = c.s[i];
2273
+ }
2274
+ }
2275
+
2276
+ /** @brief Interleave and store (4 channels)
2277
+
2278
+ Interleave and store data from 4 registers to memory.
2279
+ Scheme:
2280
+ @code
2281
+ {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2
2282
+ ...}
2283
+ @endcode
2284
+ For all types except 64-bit. */
2285
+ template <typename _Tp, int n>
2286
+ inline void v_store_interleave(_Tp *ptr, const v_reg<_Tp, n> &a,
2287
+ const v_reg<_Tp, n> &b, const v_reg<_Tp, n> &c,
2288
+ const v_reg<_Tp, n> &d,
2289
+ hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) {
2290
+ #if CV_STRONG_ALIGNMENT
2291
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2292
+ #endif
2293
+ int i, i4;
2294
+ for (i = i4 = 0; i < n; i++, i4 += 4) {
2295
+ ptr[i4] = a.s[i];
2296
+ ptr[i4 + 1] = b.s[i];
2297
+ ptr[i4 + 2] = c.s[i];
2298
+ ptr[i4 + 3] = d.s[i];
2299
+ }
2300
+ }
2301
+
2302
+ /** @brief Store data to memory
2303
+
2304
+ Store register contents to memory.
2305
+ Scheme:
2306
+ @code
2307
+ REG {A B C D} ==> MEM {A B C D}
2308
+ @endcode
2309
+ Pointer can be unaligned. */
2310
+ template <typename _Tp, int n>
2311
+ inline void v_store(_Tp *ptr, const v_reg<_Tp, n> &a) {
2312
+ #if CV_STRONG_ALIGNMENT
2313
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2314
+ #endif
2315
+ for (int i = 0; i < n; i++)
2316
+ ptr[i] = a.s[i];
2317
+ }
2318
+
2319
+ template <typename _Tp, int n>
2320
+ inline void v_store(_Tp *ptr, const v_reg<_Tp, n> &a, hal::StoreMode /*mode*/) {
2321
+ #if CV_STRONG_ALIGNMENT
2322
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2323
+ #endif
2324
+ v_store(ptr, a);
2325
+ }
2326
+
2327
+ /** @brief Store data to memory (lower half)
2328
+
2329
+ Store lower half of register contents to memory.
2330
+ Scheme:
2331
+ @code
2332
+ REG {A B C D} ==> MEM {A B}
2333
+ @endcode */
2334
+ template <typename _Tp, int n>
2335
+ inline void v_store_low(_Tp *ptr, const v_reg<_Tp, n> &a) {
2336
+ #if CV_STRONG_ALIGNMENT
2337
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2338
+ #endif
2339
+ for (int i = 0; i < (n / 2); i++)
2340
+ ptr[i] = a.s[i];
2341
+ }
2342
+
2343
+ /** @brief Store data to memory (higher half)
2344
+
2345
+ Store higher half of register contents to memory.
2346
+ Scheme:
2347
+ @code
2348
+ REG {A B C D} ==> MEM {C D}
2349
+ @endcode */
2350
+ template <typename _Tp, int n>
2351
+ inline void v_store_high(_Tp *ptr, const v_reg<_Tp, n> &a) {
2352
+ #if CV_STRONG_ALIGNMENT
2353
+ CV_Assert(isAligned<sizeof(_Tp)>(ptr));
2354
+ #endif
2355
+ for (int i = 0; i < (n / 2); i++)
2356
+ ptr[i] = a.s[i + (n / 2)];
2357
+ }
2358
+
2359
+ /** @brief Store data to memory (aligned)
2360
+
2361
+ Store register contents to memory.
2362
+ Scheme:
2363
+ @code
2364
+ REG {A B C D} ==> MEM {A B C D}
2365
+ @endcode
2366
+ Pointer __should__ be aligned by 16-byte boundary. */
2367
+ template <typename _Tp, int n>
2368
+ inline void v_store_aligned(_Tp *ptr, const v_reg<_Tp, n> &a) {
2369
+ CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
2370
+ v_store(ptr, a);
2371
+ }
2372
+
2373
+ template <typename _Tp, int n>
2374
+ inline void v_store_aligned_nocache(_Tp *ptr, const v_reg<_Tp, n> &a) {
2375
+ CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
2376
+ v_store(ptr, a);
2377
+ }
2378
+
2379
+ template <typename _Tp, int n>
2380
+ inline void v_store_aligned(_Tp *ptr, const v_reg<_Tp, n> &a,
2381
+ hal::StoreMode /*mode*/) {
2382
+ CV_Assert(isAligned<sizeof(v_reg<_Tp, n>)>(ptr));
2383
+ v_store(ptr, a);
2384
+ }
2385
+
2386
+ /** @brief Combine vector from first elements of two vectors
2387
+
2388
+ Scheme:
2389
+ @code
2390
+ {A1 A2 A3 A4}
2391
+ {B1 B2 B3 B4}
2392
+ ---------------
2393
+ {A1 A2 B1 B2}
2394
+ @endcode
2395
+ For all types except 64-bit. */
2396
+ template <typename _Tp, int n>
2397
+ inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n> &a,
2398
+ const v_reg<_Tp, n> &b) {
2399
+ v_reg<_Tp, n> c;
2400
+ for (int i = 0; i < (n / 2); i++) {
2401
+ c.s[i] = a.s[i];
2402
+ c.s[i + (n / 2)] = b.s[i];
2403
+ }
2404
+ return c;
2405
+ }
2406
+
2407
+ /** @brief Combine vector from last elements of two vectors
2408
+
2409
+ Scheme:
2410
+ @code
2411
+ {A1 A2 A3 A4}
2412
+ {B1 B2 B3 B4}
2413
+ ---------------
2414
+ {A3 A4 B3 B4}
2415
+ @endcode
2416
+ For all types except 64-bit. */
2417
+ template <typename _Tp, int n>
2418
+ inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n> &a,
2419
+ const v_reg<_Tp, n> &b) {
2420
+ v_reg<_Tp, n> c;
2421
+ for (int i = 0; i < (n / 2); i++) {
2422
+ c.s[i] = a.s[i + (n / 2)];
2423
+ c.s[i + (n / 2)] = b.s[i + (n / 2)];
2424
+ }
2425
+ return c;
2426
+ }
2427
+
2428
+ /** @brief Combine two vectors from lower and higher parts of two other vectors
2429
+
2430
+ @code{.cpp}
2431
+ low = cv::v_combine_low(a, b);
2432
+ high = cv::v_combine_high(a, b);
2433
+ @endcode */
2434
+ template <typename _Tp, int n>
2435
+ inline void v_recombine(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b,
2436
+ v_reg<_Tp, n> &low, v_reg<_Tp, n> &high) {
2437
+ for (int i = 0; i < (n / 2); i++) {
2438
+ low.s[i] = a.s[i];
2439
+ low.s[i + (n / 2)] = b.s[i];
2440
+ high.s[i] = a.s[i + (n / 2)];
2441
+ high.s[i + (n / 2)] = b.s[i + (n / 2)];
2442
+ }
2443
+ }
2444
+
2445
+ /** @brief Vector reverse order
2446
+
2447
+ Reverse the order of the vector
2448
+ Scheme:
2449
+ @code
2450
+ REG {A1 ... An} ==> REG {An ... A1}
2451
+ @endcode
2452
+ For all types. */
2453
+ template <typename _Tp, int n>
2454
+ inline v_reg<_Tp, n> v_reverse(const v_reg<_Tp, n> &a) {
2455
+ v_reg<_Tp, n> c;
2456
+ for (int i = 0; i < n; i++)
2457
+ c.s[i] = a.s[n - i - 1];
2458
+ return c;
2459
+ }
2460
+
2461
+ /** @brief Vector extract
2462
+
2463
+ Scheme:
2464
+ @code
2465
+ {A1 A2 A3 A4}
2466
+ {B1 B2 B3 B4}
2467
+ ========================
2468
+ shift = 1 {A2 A3 A4 B1}
2469
+ shift = 2 {A3 A4 B1 B2}
2470
+ shift = 3 {A4 B1 B2 B3}
2471
+ @endcode
2472
+ Restriction: 0 <= shift < nlanes
2473
+
2474
+ Usage:
2475
+ @code
2476
+ v_int32x4 a, b, c;
2477
+ c = v_extract<2>(a, b);
2478
+ @endcode
2479
+ For all types. */
2480
+ template <int s, typename _Tp, int n>
2481
+ inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n> &a, const v_reg<_Tp, n> &b) {
2482
+ v_reg<_Tp, n> r;
2483
+ const int shift = n - s;
2484
+ int i = 0;
2485
+ for (; i < shift; ++i)
2486
+ r.s[i] = a.s[i + s];
2487
+ for (; i < n; ++i)
2488
+ r.s[i] = b.s[i - shift];
2489
+ return r;
2490
+ }
2491
+
2492
+ /** @brief Vector extract
2493
+
2494
+ Scheme:
2495
+ Return the s-th element of v.
2496
+ Restriction: 0 <= s < nlanes
2497
+
2498
+ Usage:
2499
+ @code
2500
+ v_int32x4 a;
2501
+ int r;
2502
+ r = v_extract_n<2>(a);
2503
+ @endcode
2504
+ For all types. */
2505
+ template <int s, typename _Tp, int n>
2506
+ inline _Tp v_extract_n(const v_reg<_Tp, n> &v) {
2507
+ CV_DbgAssert(s >= 0 && s < n);
2508
+ return v.s[s];
2509
+ }
2510
+
2511
+ /** @brief Broadcast i-th element of vector
2512
+
2513
+ Scheme:
2514
+ @code
2515
+ { v[0] v[1] v[2] ... v[SZ] } => { v[i], v[i], v[i] ... v[i] }
2516
+ @endcode
2517
+ Restriction: 0 <= i < nlanes
2518
+ Supported types: 32-bit integers and floats (s32/u32/f32)
2519
+ */
2520
+ template <int i, typename _Tp, int n>
2521
+ inline v_reg<_Tp, n> v_broadcast_element(const v_reg<_Tp, n> &a) {
2522
+ CV_DbgAssert(i >= 0 && i < n);
2523
+ return v_reg<_Tp, n>::all(a.s[i]);
2524
+ }
2525
+
2526
+ /** @brief Round elements
2527
+
2528
+ Rounds each value. Input type is float vector ==> output type is int vector.
2529
+ @note Only for floating point types.
2530
+ */
2531
+ template <int n> inline v_reg<int, n> v_round(const v_reg<float, n> &a) {
2532
+ v_reg<int, n> c;
2533
+ for (int i = 0; i < n; i++)
2534
+ c.s[i] = cvRound(a.s[i]);
2535
+ return c;
2536
+ }
2537
+
2538
+ /** @overload */
2539
+ template <int n>
2540
+ inline v_reg<int, n * 2> v_round(const v_reg<double, n> &a,
2541
+ const v_reg<double, n> &b) {
2542
+ v_reg<int, n * 2> c;
2543
+ for (int i = 0; i < n; i++) {
2544
+ c.s[i] = cvRound(a.s[i]);
2545
+ c.s[i + n] = cvRound(b.s[i]);
2546
+ }
2547
+ return c;
2548
+ }
2549
+
2550
+ /** @brief Floor elements
2551
+
2552
+ Floor each value. Input type is float vector ==> output type is int vector.
2553
+ @note Only for floating point types.
2554
+ */
2555
+ template <int n> inline v_reg<int, n> v_floor(const v_reg<float, n> &a) {
2556
+ v_reg<int, n> c;
2557
+ for (int i = 0; i < n; i++)
2558
+ c.s[i] = cvFloor(a.s[i]);
2559
+ return c;
2560
+ }
2561
+
2562
+ /** @brief Ceil elements
2563
+
2564
+ Ceil each value. Input type is float vector ==> output type is int vector.
2565
+ @note Only for floating point types.
2566
+ */
2567
+ template <int n> inline v_reg<int, n> v_ceil(const v_reg<float, n> &a) {
2568
+ v_reg<int, n> c;
2569
+ for (int i = 0; i < n; i++)
2570
+ c.s[i] = cvCeil(a.s[i]);
2571
+ return c;
2572
+ }
2573
+
2574
+ /** @brief Truncate elements
2575
+
2576
+ Truncate each value. Input type is float vector ==> output type is int vector.
2577
+ @note Only for floating point types.
2578
+ */
2579
+ template <int n> inline v_reg<int, n> v_trunc(const v_reg<float, n> &a) {
2580
+ v_reg<int, n> c;
2581
+ for (int i = 0; i < n; i++)
2582
+ c.s[i] = (int)(a.s[i]);
2583
+ return c;
2584
+ }
2585
+
2586
+ /** @overload */
2587
+ template <int n> inline v_reg<int, n * 2> v_round(const v_reg<double, n> &a) {
2588
+ v_reg<int, n * 2> c;
2589
+ for (int i = 0; i < n; i++) {
2590
+ c.s[i] = cvRound(a.s[i]);
2591
+ c.s[i + n] = 0;
2592
+ }
2593
+ return c;
2594
+ }
2595
+
2596
+ /** @overload */
2597
+ template <int n> inline v_reg<int, n * 2> v_floor(const v_reg<double, n> &a) {
2598
+ v_reg<int, n * 2> c;
2599
+ for (int i = 0; i < n; i++) {
2600
+ c.s[i] = cvFloor(a.s[i]);
2601
+ c.s[i + n] = 0;
2602
+ }
2603
+ return c;
2604
+ }
2605
+
2606
+ /** @overload */
2607
+ template <int n> inline v_reg<int, n * 2> v_ceil(const v_reg<double, n> &a) {
2608
+ v_reg<int, n * 2> c;
2609
+ for (int i = 0; i < n; i++) {
2610
+ c.s[i] = cvCeil(a.s[i]);
2611
+ c.s[i + n] = 0;
2612
+ }
2613
+ return c;
2614
+ }
2615
+
2616
+ /** @overload */
2617
+ template <int n> inline v_reg<int, n * 2> v_trunc(const v_reg<double, n> &a) {
2618
+ v_reg<int, n * 2> c;
2619
+ for (int i = 0; i < n; i++) {
2620
+ c.s[i] = (int)(a.s[i]);
2621
+ c.s[i + n] = 0;
2622
+ }
2623
+ return c;
2624
+ }
2625
+
2626
+ /** @brief Convert to float
2627
+
2628
+ Supported input type is cv::v_int32. */
2629
+ template <int n> inline v_reg<float, n> v_cvt_f32(const v_reg<int, n> &a) {
2630
+ v_reg<float, n> c;
2631
+ for (int i = 0; i < n; i++)
2632
+ c.s[i] = (float)a.s[i];
2633
+ return c;
2634
+ }
2635
+
2636
+ /** @brief Convert lower half to float
2637
+
2638
+ Supported input type is cv::v_float64. */
2639
+ template <int n>
2640
+ inline v_reg<float, n * 2> v_cvt_f32(const v_reg<double, n> &a) {
2641
+ v_reg<float, n * 2> c;
2642
+ for (int i = 0; i < n; i++) {
2643
+ c.s[i] = (float)a.s[i];
2644
+ c.s[i + n] = 0;
2645
+ }
2646
+ return c;
2647
+ }
2648
+
2649
+ /** @brief Convert to float
2650
+
2651
+ Supported input type is cv::v_float64. */
2652
+ template <int n>
2653
+ inline v_reg<float, n * 2> v_cvt_f32(const v_reg<double, n> &a,
2654
+ const v_reg<double, n> &b) {
2655
+ v_reg<float, n * 2> c;
2656
+ for (int i = 0; i < n; i++) {
2657
+ c.s[i] = (float)a.s[i];
2658
+ c.s[i + n] = (float)b.s[i];
2659
+ }
2660
+ return c;
2661
+ }
2662
+
2663
+ /** @brief Convert lower half to double
2664
+
2665
+ Supported input type is cv::v_int32. */
2666
+ template <int n>
2667
+ CV_INLINE v_reg<double, n / 2> v_cvt_f64(const v_reg<int, n> &a) {
2668
+ v_reg<double, (n / 2)> c;
2669
+ for (int i = 0; i < (n / 2); i++)
2670
+ c.s[i] = (double)a.s[i];
2671
+ return c;
2672
+ }
2673
+
2674
+ /** @brief Convert to double high part of vector
2675
+
2676
+ Supported input type is cv::v_int32. */
2677
+ template <int n>
2678
+ CV_INLINE v_reg<double, (n / 2)> v_cvt_f64_high(const v_reg<int, n> &a) {
2679
+ v_reg<double, (n / 2)> c;
2680
+ for (int i = 0; i < (n / 2); i++)
2681
+ c.s[i] = (double)a.s[i + (n / 2)];
2682
+ return c;
2683
+ }
2684
+
2685
+ /** @brief Convert lower half to double
2686
+
2687
+ Supported input type is cv::v_float32. */
2688
+ template <int n>
2689
+ CV_INLINE v_reg<double, (n / 2)> v_cvt_f64(const v_reg<float, n> &a) {
2690
+ v_reg<double, (n / 2)> c;
2691
+ for (int i = 0; i < (n / 2); i++)
2692
+ c.s[i] = (double)a.s[i];
2693
+ return c;
2694
+ }
2695
+
2696
+ /** @brief Convert to double high part of vector
2697
+
2698
+ Supported input type is cv::v_float32. */
2699
+ template <int n>
2700
+ CV_INLINE v_reg<double, (n / 2)> v_cvt_f64_high(const v_reg<float, n> &a) {
2701
+ v_reg<double, (n / 2)> c;
2702
+ for (int i = 0; i < (n / 2); i++)
2703
+ c.s[i] = (double)a.s[i + (n / 2)];
2704
+ return c;
2705
+ }
2706
+
2707
+ /** @brief Convert to double
2708
+
2709
+ Supported input type is cv::v_int64. */
2710
+ template <int n>
2711
+ CV_INLINE v_reg<double, n> v_cvt_f64(const v_reg<int64, n> &a) {
2712
+ v_reg<double, n> c;
2713
+ for (int i = 0; i < n; i++)
2714
+ c.s[i] = (double)a.s[i];
2715
+ return c;
2716
+ }
2717
+
2718
+ template <typename _Tp>
2719
+ inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut(const _Tp *tab,
2720
+ const int *idx) {
2721
+ v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
2722
+ for (int i = 0; i < c.nlanes; i++)
2723
+ c.s[i] = tab[idx[i]];
2724
+ return c;
2725
+ }
2726
+ template <typename _Tp>
2727
+ inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut_pairs(const _Tp *tab,
2728
+ const int *idx) {
2729
+ v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
2730
+ for (int i = 0; i < c.nlanes; i++)
2731
+ c.s[i] = tab[idx[i / 2] + i % 2];
2732
+ return c;
2733
+ }
2734
+ template <typename _Tp>
2735
+ inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut_quads(const _Tp *tab,
2736
+ const int *idx) {
2737
+ v_reg<_Tp, simd128_width / sizeof(_Tp)> c;
2738
+ for (int i = 0; i < c.nlanes; i++)
2739
+ c.s[i] = tab[idx[i / 4] + i % 4];
2740
+ return c;
2741
+ }
2742
+
2743
+ template <int n>
2744
+ inline v_reg<int, n> v_lut(const int *tab, const v_reg<int, n> &idx) {
2745
+ v_reg<int, n> c;
2746
+ for (int i = 0; i < n; i++)
2747
+ c.s[i] = tab[idx.s[i]];
2748
+ return c;
2749
+ }
2750
+
2751
+ template <int n>
2752
+ inline v_reg<unsigned, n> v_lut(const unsigned *tab, const v_reg<int, n> &idx) {
2753
+ v_reg<int, n> c;
2754
+ for (int i = 0; i < n; i++)
2755
+ c.s[i] = tab[idx.s[i]];
2756
+ return c;
2757
+ }
2758
+
2759
+ template <int n>
2760
+ inline v_reg<float, n> v_lut(const float *tab, const v_reg<int, n> &idx) {
2761
+ v_reg<float, n> c;
2762
+ for (int i = 0; i < n; i++)
2763
+ c.s[i] = tab[idx.s[i]];
2764
+ return c;
2765
+ }
2766
+
2767
+ template <int n>
2768
+ inline v_reg<double, n / 2> v_lut(const double *tab, const v_reg<int, n> &idx) {
2769
+ v_reg<double, n / 2> c;
2770
+ for (int i = 0; i < n / 2; i++)
2771
+ c.s[i] = tab[idx.s[i]];
2772
+ return c;
2773
+ }
2774
+
2775
+ template <int n>
2776
+ inline void v_lut_deinterleave(const float *tab, const v_reg<int, n> &idx,
2777
+ v_reg<float, n> &x, v_reg<float, n> &y) {
2778
+ for (int i = 0; i < n; i++) {
2779
+ int j = idx.s[i];
2780
+ x.s[i] = tab[j];
2781
+ y.s[i] = tab[j + 1];
2782
+ }
2783
+ }
2784
+
2785
+ template <int n>
2786
+ inline void v_lut_deinterleave(const double *tab, const v_reg<int, n * 2> &idx,
2787
+ v_reg<double, n> &x, v_reg<double, n> &y) {
2788
+ for (int i = 0; i < n; i++) {
2789
+ int j = idx.s[i];
2790
+ x.s[i] = tab[j];
2791
+ y.s[i] = tab[j + 1];
2792
+ }
2793
+ }
2794
+
2795
+ template <typename _Tp, int n>
2796
+ inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n> &vec) {
2797
+ v_reg<_Tp, n> c;
2798
+ for (int i = 0; i < n / 4; i++) {
2799
+ c.s[4 * i] = vec.s[4 * i];
2800
+ c.s[4 * i + 1] = vec.s[4 * i + 2];
2801
+ c.s[4 * i + 2] = vec.s[4 * i + 1];
2802
+ c.s[4 * i + 3] = vec.s[4 * i + 3];
2803
+ }
2804
+ return c;
2805
+ }
2806
+
2807
+ template <typename _Tp, int n>
2808
+ inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n> &vec) {
2809
+ v_reg<_Tp, n> c;
2810
+ for (int i = 0; i < n / 8; i++) {
2811
+ c.s[8 * i] = vec.s[8 * i];
2812
+ c.s[8 * i + 1] = vec.s[8 * i + 4];
2813
+ c.s[8 * i + 2] = vec.s[8 * i + 1];
2814
+ c.s[8 * i + 3] = vec.s[8 * i + 5];
2815
+ c.s[8 * i + 4] = vec.s[8 * i + 2];
2816
+ c.s[8 * i + 5] = vec.s[8 * i + 6];
2817
+ c.s[8 * i + 6] = vec.s[8 * i + 3];
2818
+ c.s[8 * i + 7] = vec.s[8 * i + 7];
2819
+ }
2820
+ return c;
2821
+ }
2822
+
2823
+ template <typename _Tp, int n>
2824
+ inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n> &vec) {
2825
+ v_reg<_Tp, n> c;
2826
+ for (int i = 0; i < n / 4; i++) {
2827
+ c.s[3 * i] = vec.s[4 * i];
2828
+ c.s[3 * i + 1] = vec.s[4 * i + 1];
2829
+ c.s[3 * i + 2] = vec.s[4 * i + 2];
2830
+ }
2831
+ return c;
2832
+ }
2833
+
2834
+ /** @brief Transpose 4x4 matrix
2835
+
2836
+ Scheme:
2837
+ @code
2838
+ a0 {A1 A2 A3 A4}
2839
+ a1 {B1 B2 B3 B4}
2840
+ a2 {C1 C2 C3 C4}
2841
+ a3 {D1 D2 D3 D4}
2842
+ ===============
2843
+ b0 {A1 B1 C1 D1}
2844
+ b1 {A2 B2 C2 D2}
2845
+ b2 {A3 B3 C3 D3}
2846
+ b3 {A4 B4 C4 D4}
2847
+ @endcode
2848
+ */
2849
+ template <typename _Tp, int n>
2850
+ inline void v_transpose4x4(v_reg<_Tp, n> &a0, const v_reg<_Tp, n> &a1,
2851
+ const v_reg<_Tp, n> &a2, const v_reg<_Tp, n> &a3,
2852
+ v_reg<_Tp, n> &b0, v_reg<_Tp, n> &b1,
2853
+ v_reg<_Tp, n> &b2, v_reg<_Tp, n> &b3) {
2854
+ for (int i = 0; i < n / 4; i++) {
2855
+ b0.s[0 + i * 4] = a0.s[0 + i * 4];
2856
+ b0.s[1 + i * 4] = a1.s[0 + i * 4];
2857
+ b0.s[2 + i * 4] = a2.s[0 + i * 4];
2858
+ b0.s[3 + i * 4] = a3.s[0 + i * 4];
2859
+ b1.s[0 + i * 4] = a0.s[1 + i * 4];
2860
+ b1.s[1 + i * 4] = a1.s[1 + i * 4];
2861
+ b1.s[2 + i * 4] = a2.s[1 + i * 4];
2862
+ b1.s[3 + i * 4] = a3.s[1 + i * 4];
2863
+ b2.s[0 + i * 4] = a0.s[2 + i * 4];
2864
+ b2.s[1 + i * 4] = a1.s[2 + i * 4];
2865
+ b2.s[2 + i * 4] = a2.s[2 + i * 4];
2866
+ b2.s[3 + i * 4] = a3.s[2 + i * 4];
2867
+ b3.s[0 + i * 4] = a0.s[3 + i * 4];
2868
+ b3.s[1 + i * 4] = a1.s[3 + i * 4];
2869
+ b3.s[2 + i * 4] = a2.s[3 + i * 4];
2870
+ b3.s[3 + i * 4] = a3.s[3 + i * 4];
2871
+ }
2872
+ }
2873
+
2874
+ //! @brief Helper macro
2875
+ //! @ingroup core_hal_intrin_impl
2876
+ #define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \
2877
+ inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); } \
2878
+ template <> inline _Tpvec v_setzero_() { return _Tpvec::zero(); }
2879
+
2880
+ //! @name Init with zero
2881
+ //! @{
2882
+ //! @brief Create new vector with zero elements
2883
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, v, u8)
2884
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, v, s8)
2885
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, v, u16)
2886
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, v, s16)
2887
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, v, u32)
2888
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, v, s32)
2889
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, v, f32)
2890
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, v, f64)
2891
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, v, u64)
2892
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, v, s64)
2893
+
2894
+ #if CV_SIMD256
2895
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x32, v256, u8)
2896
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x32, v256, s8)
2897
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x16, v256, u16)
2898
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x16, v256, s16)
2899
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x8, v256, u32)
2900
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x8, v256, s32)
2901
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x8, v256, f32)
2902
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x4, v256, f64)
2903
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x4, v256, u64)
2904
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x4, v256, s64)
2905
+ #endif
2906
+
2907
+ #if CV_SIMD512
2908
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x64, v512, u8)
2909
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x64, v512, s8)
2910
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x32, v512, u16)
2911
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x32, v512, s16)
2912
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x16, v512, u32)
2913
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x16, v512, s32)
2914
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x16, v512, f32)
2915
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x8, v512, f64)
2916
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x8, v512, u64)
2917
+ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x8, v512, s64)
2918
+ #endif
2919
+ //! @}
2920
+
2921
+ //! @brief Helper macro
2922
+ //! @ingroup core_hal_intrin_impl
2923
+ #define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \
2924
+ inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); } \
2925
+ template <> inline _Tpvec v_setall_(_Tp val) { return _Tpvec::all(val); }
2926
+
2927
+ //! @name Init with value
2928
+ //! @{
2929
+ //! @brief Create new vector with elements set to a specific value
2930
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, v, u8)
2931
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, v, s8)
2932
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, v, u16)
2933
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, v, s16)
2934
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, v, u32)
2935
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, v, s32)
2936
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, v, f32)
2937
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, v, f64)
2938
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, v, u64)
2939
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, v, s64)
2940
+
2941
+ #if CV_SIMD256
2942
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x32, uchar, v256, u8)
2943
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x32, schar, v256, s8)
2944
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x16, ushort, v256, u16)
2945
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x16, short, v256, s16)
2946
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x8, unsigned, v256, u32)
2947
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x8, int, v256, s32)
2948
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x8, float, v256, f32)
2949
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x4, double, v256, f64)
2950
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x4, uint64, v256, u64)
2951
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x4, int64, v256, s64)
2952
+ #endif
2953
+
2954
+ #if CV_SIMD512
2955
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x64, uchar, v512, u8)
2956
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x64, schar, v512, s8)
2957
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x32, ushort, v512, u16)
2958
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x32, short, v512, s16)
2959
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x16, unsigned, v512, u32)
2960
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x16, int, v512, s32)
2961
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x16, float, v512, f32)
2962
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x8, double, v512, f64)
2963
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x8, uint64, v512, u64)
2964
+ OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x8, int64, v512, s64)
2965
+ #endif
2966
+ //! @}
2967
+
2968
+ //! @brief Helper macro
2969
+ //! @ingroup core_hal_intrin_impl
2970
+ #define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \
2971
+ template <typename _Tp0, int n0> \
2972
+ inline v_reg<_Tp, n0 * sizeof(_Tp0) / sizeof(_Tp)> \
2973
+ v_reinterpret_as_##suffix(const v_reg<_Tp0, n0> &a) { \
2974
+ return a.template reinterpret_as<_Tp, n0 * sizeof(_Tp0) / sizeof(_Tp)>(); \
2975
+ }
2976
+
2977
+ //! @name Reinterpret
2978
+ //! @{
2979
+ //! @brief Convert vector to different type without modifying underlying data.
2980
+ OPENCV_HAL_IMPL_C_REINTERPRET(uchar, u8)
2981
+ OPENCV_HAL_IMPL_C_REINTERPRET(schar, s8)
2982
+ OPENCV_HAL_IMPL_C_REINTERPRET(ushort, u16)
2983
+ OPENCV_HAL_IMPL_C_REINTERPRET(short, s16)
2984
+ OPENCV_HAL_IMPL_C_REINTERPRET(unsigned, u32)
2985
+ OPENCV_HAL_IMPL_C_REINTERPRET(int, s32)
2986
+ OPENCV_HAL_IMPL_C_REINTERPRET(float, f32)
2987
+ OPENCV_HAL_IMPL_C_REINTERPRET(double, f64)
2988
+ OPENCV_HAL_IMPL_C_REINTERPRET(uint64, u64)
2989
+ OPENCV_HAL_IMPL_C_REINTERPRET(int64, s64)
2990
+ //! @}
2991
+
2992
+ //! @brief Helper macro
2993
+ //! @ingroup core_hal_intrin_impl
2994
+ #define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \
2995
+ template <int shift, int n> \
2996
+ inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n> &a) { \
2997
+ return v_shl(a, shift); \
2998
+ }
2999
+
3000
+ //! @name Left shift
3001
+ //! @{
3002
+ //! @brief Shift left
3003
+ OPENCV_HAL_IMPL_C_SHIFTL(ushort)
3004
+ OPENCV_HAL_IMPL_C_SHIFTL(short)
3005
+ OPENCV_HAL_IMPL_C_SHIFTL(unsigned)
3006
+ OPENCV_HAL_IMPL_C_SHIFTL(int)
3007
+ OPENCV_HAL_IMPL_C_SHIFTL(uint64)
3008
+ OPENCV_HAL_IMPL_C_SHIFTL(int64)
3009
+ //! @}
3010
+
3011
+ //! @brief Helper macro
3012
+ //! @ingroup core_hal_intrin_impl
3013
+ #define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \
3014
+ template <int shift, int n> \
3015
+ inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n> &a) { \
3016
+ return v_shr(a, shift); \
3017
+ }
3018
+
3019
+ //! @name Right shift
3020
+ //! @{
3021
+ //! @brief Shift right
3022
+ OPENCV_HAL_IMPL_C_SHIFTR(ushort)
3023
+ OPENCV_HAL_IMPL_C_SHIFTR(short)
3024
+ OPENCV_HAL_IMPL_C_SHIFTR(unsigned)
3025
+ OPENCV_HAL_IMPL_C_SHIFTR(int)
3026
+ OPENCV_HAL_IMPL_C_SHIFTR(uint64)
3027
+ OPENCV_HAL_IMPL_C_SHIFTR(int64)
3028
+ //! @}
3029
+
3030
+ //! @brief Helper macro
3031
+ //! @ingroup core_hal_intrin_impl
3032
+ #define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \
3033
+ template <int shift, int n> \
3034
+ inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n> &a) { \
3035
+ v_reg<_Tp, n> c; \
3036
+ for (int i = 0; i < n; i++) \
3037
+ c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3038
+ return c; \
3039
+ }
3040
+
3041
+ //! @name Rounding shift
3042
+ //! @{
3043
+ //! @brief Rounding shift right
3044
+ OPENCV_HAL_IMPL_C_RSHIFTR(ushort)
3045
+ OPENCV_HAL_IMPL_C_RSHIFTR(short)
3046
+ OPENCV_HAL_IMPL_C_RSHIFTR(unsigned)
3047
+ OPENCV_HAL_IMPL_C_RSHIFTR(int)
3048
+ OPENCV_HAL_IMPL_C_RSHIFTR(uint64)
3049
+ OPENCV_HAL_IMPL_C_RSHIFTR(int64)
3050
+ //! @}
3051
+
3052
+ //! @brief Helper macro
3053
+ //! @ingroup core_hal_intrin_impl
3054
+ #define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \
3055
+ template <int n> \
3056
+ inline v_reg<_Tpn, 2 * n> v_##pack_suffix(const v_reg<_Tp, n> &a, \
3057
+ const v_reg<_Tp, n> &b) { \
3058
+ v_reg<_Tpn, 2 * n> c; \
3059
+ for (int i = 0; i < n; i++) { \
3060
+ c.s[i] = cast<_Tpn>(a.s[i]); \
3061
+ c.s[i + n] = cast<_Tpn>(b.s[i]); \
3062
+ } \
3063
+ return c; \
3064
+ }
3065
+
3066
+ //! @name Pack
3067
+ //! @{
3068
+ //! @brief Pack values from two vectors to one
3069
+ //!
3070
+ //! Return vector type have twice more elements than input vector types. Variant
3071
+ //! with _u_ suffix also converts to corresponding unsigned type.
3072
+ //!
3073
+ //! - pack: for 16-, 32- and 64-bit integer input types
3074
+ //! - pack_u: for 16- and 32-bit signed integer input types
3075
+ //!
3076
+ //! @note All variants except 64-bit use saturation.
3077
+ OPENCV_HAL_IMPL_C_PACK(ushort, uchar, pack, saturate_cast)
3078
+ OPENCV_HAL_IMPL_C_PACK(short, schar, pack, saturate_cast)
3079
+ OPENCV_HAL_IMPL_C_PACK(unsigned, ushort, pack, saturate_cast)
3080
+ OPENCV_HAL_IMPL_C_PACK(int, short, pack, saturate_cast)
3081
+ OPENCV_HAL_IMPL_C_PACK(uint64, unsigned, pack, static_cast)
3082
+ OPENCV_HAL_IMPL_C_PACK(int64, int, pack, static_cast)
3083
+ OPENCV_HAL_IMPL_C_PACK(short, uchar, pack_u, saturate_cast)
3084
+ OPENCV_HAL_IMPL_C_PACK(int, ushort, pack_u, saturate_cast)
3085
+ //! @}
3086
+
3087
+ //! @brief Helper macro
3088
+ //! @ingroup core_hal_intrin_impl
3089
+ #define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \
3090
+ template <int shift, int n> \
3091
+ inline v_reg<_Tpn, 2 * n> v_rshr_##pack_suffix(const v_reg<_Tp, n> &a, \
3092
+ const v_reg<_Tp, n> &b) { \
3093
+ v_reg<_Tpn, 2 * n> c; \
3094
+ for (int i = 0; i < n; i++) { \
3095
+ c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3096
+ c.s[i + n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3097
+ } \
3098
+ return c; \
3099
+ }
3100
+
3101
+ //! @name Pack with rounding shift
3102
+ //! @{
3103
+ //! @brief Pack values from two vectors to one with rounding shift
3104
+ //!
3105
+ //! Values from the input vectors will be shifted right by _n_ bits with
3106
+ //! rounding, converted to narrower type and returned in the result vector.
3107
+ //! Variant with _u_ suffix converts to unsigned type.
3108
+ //!
3109
+ //! - pack: for 16-, 32- and 64-bit integer input types
3110
+ //! - pack_u: for 16- and 32-bit signed integer input types
3111
+ //!
3112
+ //! @note All variants except 64-bit use saturation.
3113
+ OPENCV_HAL_IMPL_C_RSHR_PACK(ushort, uchar, pack, saturate_cast)
3114
+ OPENCV_HAL_IMPL_C_RSHR_PACK(short, schar, pack, saturate_cast)
3115
+ OPENCV_HAL_IMPL_C_RSHR_PACK(unsigned, ushort, pack, saturate_cast)
3116
+ OPENCV_HAL_IMPL_C_RSHR_PACK(int, short, pack, saturate_cast)
3117
+ OPENCV_HAL_IMPL_C_RSHR_PACK(uint64, unsigned, pack, static_cast)
3118
+ OPENCV_HAL_IMPL_C_RSHR_PACK(int64, int, pack, static_cast)
3119
+ OPENCV_HAL_IMPL_C_RSHR_PACK(short, uchar, pack_u, saturate_cast)
3120
+ OPENCV_HAL_IMPL_C_RSHR_PACK(int, ushort, pack_u, saturate_cast)
3121
+ //! @}
3122
+
3123
+ //! @brief Helper macro
3124
+ //! @ingroup core_hal_intrin_impl
3125
+ #define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3126
+ template <int n> \
3127
+ inline void v_##pack_suffix##_store(_Tpn *ptr, const v_reg<_Tp, n> &a) { \
3128
+ for (int i = 0; i < n; i++) \
3129
+ ptr[i] = cast<_Tpn>(a.s[i]); \
3130
+ }
3131
+
3132
+ //! @name Pack and store
3133
+ //! @{
3134
+ //! @brief Store values from the input vector into memory with pack
3135
+ //!
3136
+ //! Values will be stored into memory with conversion to narrower type.
3137
+ //! Variant with _u_ suffix converts to corresponding unsigned type.
3138
+ //!
3139
+ //! - pack: for 16-, 32- and 64-bit integer input types
3140
+ //! - pack_u: for 16- and 32-bit signed integer input types
3141
+ //!
3142
+ //! @note All variants except 64-bit use saturation.
3143
+ OPENCV_HAL_IMPL_C_PACK_STORE(ushort, uchar, pack, saturate_cast)
3144
+ OPENCV_HAL_IMPL_C_PACK_STORE(short, schar, pack, saturate_cast)
3145
+ OPENCV_HAL_IMPL_C_PACK_STORE(unsigned, ushort, pack, saturate_cast)
3146
+ OPENCV_HAL_IMPL_C_PACK_STORE(int, short, pack, saturate_cast)
3147
+ OPENCV_HAL_IMPL_C_PACK_STORE(uint64, unsigned, pack, static_cast)
3148
+ OPENCV_HAL_IMPL_C_PACK_STORE(int64, int, pack, static_cast)
3149
+ OPENCV_HAL_IMPL_C_PACK_STORE(short, uchar, pack_u, saturate_cast)
3150
+ OPENCV_HAL_IMPL_C_PACK_STORE(int, ushort, pack_u, saturate_cast)
3151
+ //! @}
3152
+
3153
+ //! @brief Helper macro
3154
+ //! @ingroup core_hal_intrin_impl
3155
+ #define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \
3156
+ template <int shift, int n> \
3157
+ inline void v_rshr_##pack_suffix##_store(_Tpn *ptr, \
3158
+ const v_reg<_Tp, n> &a) { \
3159
+ for (int i = 0; i < n; i++) \
3160
+ ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \
3161
+ }
3162
+
3163
+ //! @name Pack and store with rounding shift
3164
+ //! @{
3165
+ //! @brief Store values from the input vector into memory with pack
3166
+ //!
3167
+ //! Values will be shifted _n_ bits right with rounding, converted to narrower
3168
+ //! type and stored into memory. Variant with _u_ suffix converts to unsigned
3169
+ //! type.
3170
+ //!
3171
+ //! - pack: for 16-, 32- and 64-bit integer input types
3172
+ //! - pack_u: for 16- and 32-bit signed integer input types
3173
+ //!
3174
+ //! @note All variants except 64-bit use saturation.
3175
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(ushort, uchar, pack, saturate_cast)
3176
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(short, schar, pack, saturate_cast)
3177
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(unsigned, ushort, pack, saturate_cast)
3178
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int, short, pack, saturate_cast)
3179
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(uint64, unsigned, pack, static_cast)
3180
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int64, int, pack, static_cast)
3181
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(short, uchar, pack_u, saturate_cast)
3182
+ OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int, ushort, pack_u, saturate_cast)
3183
+ //! @}
3184
+
3185
+ //! @cond IGNORED
3186
+ template <typename _Tpm, typename _Tp, int n>
3187
+ inline void _pack_b(_Tpm *mptr, const v_reg<_Tp, n> &a,
3188
+ const v_reg<_Tp, n> &b) {
3189
+ for (int i = 0; i < n; ++i) {
3190
+ mptr[i] = (_Tpm)a.s[i];
3191
+ mptr[i + n] = (_Tpm)b.s[i];
3192
+ }
3193
+ }
3194
+ //! @endcond
3195
+
3196
+ //! @name Pack boolean values
3197
+ //! @{
3198
+ //! @brief Pack boolean values from multiple vectors to one unsigned 8-bit
3199
+ //! integer vector
3200
+ //!
3201
+ //! @note Must provide valid boolean values to guarantee same result for all
3202
+ //! architectures.
3203
+
3204
+ /** @brief
3205
+ //! For 16-bit boolean values
3206
+
3207
+ Scheme:
3208
+ @code
3209
+ a {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0}
3210
+ b {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF}
3211
+ ===============
3212
+ {
3213
+ 0xFF 0 0 0xFF 0 0xFF 0xFF 0
3214
+ 0xFF 0 0xFF 0 0 0xFF 0 0xFF
3215
+ }
3216
+ @endcode */
3217
+
3218
+ template <int n>
3219
+ inline v_reg<uchar, 2 * n> v_pack_b(const v_reg<ushort, n> &a,
3220
+ const v_reg<ushort, n> &b) {
3221
+ v_reg<uchar, 2 * n> mask;
3222
+ _pack_b(mask.s, a, b);
3223
+ return mask;
3224
+ }
3225
+
3226
+ /** @overload
3227
+ For 32-bit boolean values
3228
+
3229
+ Scheme:
3230
+ @code
3231
+ a {0xFFFF.. 0 0 0xFFFF..}
3232
+ b {0 0xFFFF.. 0xFFFF.. 0}
3233
+ c {0xFFFF.. 0 0xFFFF.. 0}
3234
+ d {0 0xFFFF.. 0 0xFFFF..}
3235
+ ===============
3236
+ {
3237
+ 0xFF 0 0 0xFF 0 0xFF 0xFF 0
3238
+ 0xFF 0 0xFF 0 0 0xFF 0 0xFF
3239
+ }
3240
+ @endcode */
3241
+
3242
+ template <int n>
3243
+ inline v_reg<uchar, 4 * n>
3244
+ v_pack_b(const v_reg<unsigned, n> &a, const v_reg<unsigned, n> &b,
3245
+ const v_reg<unsigned, n> &c, const v_reg<unsigned, n> &d) {
3246
+ v_reg<uchar, 4 * n> mask;
3247
+ _pack_b(mask.s, a, b);
3248
+ _pack_b(mask.s + 2 * n, c, d);
3249
+ return mask;
3250
+ }
3251
+
3252
+ /** @overload
3253
+ For 64-bit boolean values
3254
+
3255
+ Scheme:
3256
+ @code
3257
+ a {0xFFFF.. 0}
3258
+ b {0 0xFFFF..}
3259
+ c {0xFFFF.. 0}
3260
+ d {0 0xFFFF..}
3261
+
3262
+ e {0xFFFF.. 0}
3263
+ f {0xFFFF.. 0}
3264
+ g {0 0xFFFF..}
3265
+ h {0 0xFFFF..}
3266
+ ===============
3267
+ {
3268
+ 0xFF 0 0 0xFF 0xFF 0 0 0xFF
3269
+ 0xFF 0 0xFF 0 0 0xFF 0 0xFF
3270
+ }
3271
+ @endcode */
3272
+ template <int n>
3273
+ inline v_reg<uchar, 8 * n>
3274
+ v_pack_b(const v_reg<uint64, n> &a, const v_reg<uint64, n> &b,
3275
+ const v_reg<uint64, n> &c, const v_reg<uint64, n> &d,
3276
+ const v_reg<uint64, n> &e, const v_reg<uint64, n> &f,
3277
+ const v_reg<uint64, n> &g, const v_reg<uint64, n> &h) {
3278
+ v_reg<uchar, 8 * n> mask;
3279
+ _pack_b(mask.s, a, b);
3280
+ _pack_b(mask.s + 2 * n, c, d);
3281
+ _pack_b(mask.s + 4 * n, e, f);
3282
+ _pack_b(mask.s + 6 * n, g, h);
3283
+ return mask;
3284
+ }
3285
+ //! @}
3286
+
3287
+ /** @brief Matrix multiplication
3288
+
3289
+ Scheme:
3290
+ @code
3291
+ {A0 A1 A2 A3} |V0|
3292
+ {B0 B1 B2 B3} |V1|
3293
+ {C0 C1 C2 C3} |V2|
3294
+ {D0 D1 D2 D3} x |V3|
3295
+ ====================
3296
+ {R0 R1 R2 R3}, where:
3297
+ R0 = A0V0 + B0V1 + C0V2 + D0V3,
3298
+ R1 = A1V0 + B1V1 + C1V2 + D1V3
3299
+ ...
3300
+ @endcode
3301
+ */
3302
+ template <int n>
3303
+ inline v_reg<float, n>
3304
+ v_matmul(const v_reg<float, n> &v, const v_reg<float, n> &a,
3305
+ const v_reg<float, n> &b, const v_reg<float, n> &c,
3306
+ const v_reg<float, n> &d) {
3307
+ v_reg<float, n> res;
3308
+ for (int i = 0; i < n / 4; i++) {
3309
+ res.s[0 + i * 4] =
3310
+ v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] +
3311
+ v.s[2 + i * 4] * c.s[0 + i * 4] + v.s[3 + i * 4] * d.s[0 + i * 4];
3312
+ res.s[1 + i * 4] =
3313
+ v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] +
3314
+ v.s[2 + i * 4] * c.s[1 + i * 4] + v.s[3 + i * 4] * d.s[1 + i * 4];
3315
+ res.s[2 + i * 4] =
3316
+ v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] +
3317
+ v.s[2 + i * 4] * c.s[2 + i * 4] + v.s[3 + i * 4] * d.s[2 + i * 4];
3318
+ res.s[3 + i * 4] =
3319
+ v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] +
3320
+ v.s[2 + i * 4] * c.s[3 + i * 4] + v.s[3 + i * 4] * d.s[3 + i * 4];
3321
+ }
3322
+ return res;
3323
+ }
3324
+
3325
+ /** @brief Matrix multiplication and add
3326
+
3327
+ Scheme:
3328
+ @code
3329
+ {A0 A1 A2 A3} |V0| |D0|
3330
+ {B0 B1 B2 B3} |V1| |D1|
3331
+ {C0 C1 C2 C3} x |V2| + |D2|
3332
+ ==================== |D3|
3333
+ {R0 R1 R2 R3}, where:
3334
+ R0 = A0V0 + B0V1 + C0V2 + D0,
3335
+ R1 = A1V0 + B1V1 + C1V2 + D1
3336
+ ...
3337
+ @endcode
3338
+ */
3339
+ template <int n>
3340
+ inline v_reg<float, n>
3341
+ v_matmuladd(const v_reg<float, n> &v, const v_reg<float, n> &a,
3342
+ const v_reg<float, n> &b, const v_reg<float, n> &c,
3343
+ const v_reg<float, n> &d) {
3344
+ v_reg<float, n> res;
3345
+ for (int i = 0; i < n / 4; i++) {
3346
+ res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] +
3347
+ v.s[1 + i * 4] * b.s[0 + i * 4] +
3348
+ v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4];
3349
+ res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] +
3350
+ v.s[1 + i * 4] * b.s[1 + i * 4] +
3351
+ v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4];
3352
+ res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] +
3353
+ v.s[1 + i * 4] * b.s[2 + i * 4] +
3354
+ v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4];
3355
+ res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] +
3356
+ v.s[1 + i * 4] * b.s[3 + i * 4] +
3357
+ v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4];
3358
+ }
3359
+ return res;
3360
+ }
3361
+
3362
+ template <int n>
3363
+ inline v_reg<double, n / 2> v_dotprod_expand(const v_reg<int, n> &a,
3364
+ const v_reg<int, n> &b) {
3365
+ return v_fma(v_cvt_f64(a), v_cvt_f64(b),
3366
+ v_mul(v_cvt_f64_high(a), v_cvt_f64_high(b)));
3367
+ }
3368
+ template <int n>
3369
+ inline v_reg<double, n / 2> v_dotprod_expand(const v_reg<int, n> &a,
3370
+ const v_reg<int, n> &b,
3371
+ const v_reg<double, n / 2> &c) {
3372
+ return v_fma(v_cvt_f64(a), v_cvt_f64(b),
3373
+ v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c));
3374
+ }
3375
+
3376
+ template <int n>
3377
+ inline v_reg<double, n / 2> v_dotprod_expand_fast(const v_reg<int, n> &a,
3378
+ const v_reg<int, n> &b) {
3379
+ return v_dotprod_expand(a, b);
3380
+ }
3381
+ template <int n>
3382
+ inline v_reg<double, n / 2>
3383
+ v_dotprod_expand_fast(const v_reg<int, n> &a, const v_reg<int, n> &b,
3384
+ const v_reg<double, n / 2> &c) {
3385
+ return v_dotprod_expand(a, b, c);
3386
+ }
3387
+
3388
+ ////// FP16 support ///////
3389
+
3390
+ inline v_reg<float, simd128_width / sizeof(float)>
3391
+ v_load_expand(const hfloat *ptr) {
3392
+ v_reg<float, simd128_width / sizeof(float)> v;
3393
+ for (int i = 0; i < v.nlanes; i++) {
3394
+ v.s[i] = ptr[i];
3395
+ }
3396
+ return v;
3397
+ }
3398
+ #if CV_SIMD256
3399
+ inline v_reg<float, simd256_width / sizeof(float)>
3400
+ v256_load_expand(const hfloat *ptr) {
3401
+ v_reg<float, simd256_width / sizeof(float)> v;
3402
+ for (int i = 0; i < v.nlanes; i++) {
3403
+ v.s[i] = ptr[i];
3404
+ }
3405
+ return v;
3406
+ }
3407
+ #endif
3408
+ #if CV_SIMD512
3409
+ inline v_reg<float, simd512_width / sizeof(float)>
3410
+ v512_load_expand(const hfloat *ptr) {
3411
+ v_reg<float, simd512_width / sizeof(float)> v;
3412
+ for (int i = 0; i < v.nlanes; i++) {
3413
+ v.s[i] = ptr[i];
3414
+ }
3415
+ return v;
3416
+ }
3417
+ #endif
3418
+
3419
+ template <int n>
3420
+ inline void v_pack_store(hfloat *ptr, const v_reg<float, n> &v) {
3421
+ for (int i = 0; i < v.nlanes; i++) {
3422
+ ptr[i] = hfloat(v.s[i]);
3423
+ }
3424
+ }
3425
+
3426
+ inline void v_cleanup() {}
3427
+ #if CV_SIMD256
3428
+ inline void v256_cleanup() {}
3429
+ #endif
3430
+ #if CV_SIMD512
3431
+ inline void v512_cleanup() {}
3432
+ #endif
3433
+
3434
+ //! @}
3435
+
3436
+ #ifndef CV_DOXYGEN
3437
+ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
3438
+ #endif
3439
+ } // namespace cv
3440
+
3441
+ #if !defined(CV_DOXYGEN)
3442
+ #undef CV_SIMD256
3443
+ #undef CV_SIMD512
3444
+ #endif
3445
+
3446
+ #endif