react-native-executorch 0.4.8 → 0.5.1-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1021) hide show
  1. package/android/CMakeLists.txt +17 -0
  2. package/android/build.gradle +76 -13
  3. package/android/libs/classes.jar +0 -0
  4. package/android/src/main/cpp/CMakeLists.txt +73 -0
  5. package/android/src/main/cpp/ETInstallerModule.cpp +76 -0
  6. package/android/src/main/cpp/ETInstallerModule.h +43 -0
  7. package/android/src/main/java/com/swmansion/rnexecutorch/ETInstaller.kt +66 -0
  8. package/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +3 -3
  9. package/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +7 -113
  10. package/common/ada/ada.cpp +17406 -0
  11. package/common/ada/ada.h +10274 -0
  12. package/common/pfft/pfft.c +2205 -0
  13. package/common/pfft/pfft.h +185 -0
  14. package/common/rnexecutorch/Log.h +489 -0
  15. package/common/rnexecutorch/RnExecutorchInstaller.cpp +78 -0
  16. package/common/rnexecutorch/RnExecutorchInstaller.h +112 -0
  17. package/common/rnexecutorch/TokenizerModule.cpp +52 -0
  18. package/common/rnexecutorch/TokenizerModule.h +26 -0
  19. package/common/rnexecutorch/data_processing/FFT.cpp +21 -0
  20. package/common/rnexecutorch/data_processing/FFT.h +23 -0
  21. package/common/rnexecutorch/data_processing/FileUtils.h +30 -0
  22. package/common/rnexecutorch/data_processing/ImageProcessing.cpp +240 -0
  23. package/common/rnexecutorch/data_processing/ImageProcessing.h +55 -0
  24. package/common/rnexecutorch/data_processing/Numerical.cpp +82 -0
  25. package/common/rnexecutorch/data_processing/Numerical.h +23 -0
  26. package/common/rnexecutorch/data_processing/base64.cpp +110 -0
  27. package/common/rnexecutorch/data_processing/base64.h +46 -0
  28. package/common/rnexecutorch/data_processing/dsp.cpp +65 -0
  29. package/common/rnexecutorch/data_processing/dsp.h +12 -0
  30. package/common/rnexecutorch/host_objects/JSTensorViewIn.h +12 -0
  31. package/common/rnexecutorch/host_objects/JSTensorViewOut.h +22 -0
  32. package/common/rnexecutorch/host_objects/JsiConversions.h +410 -0
  33. package/common/rnexecutorch/host_objects/ModelHostObject.h +239 -0
  34. package/common/rnexecutorch/jsi/JsiHostObject.cpp +108 -0
  35. package/common/rnexecutorch/jsi/JsiHostObject.h +87 -0
  36. package/common/rnexecutorch/jsi/OwningArrayBuffer.h +40 -0
  37. package/common/rnexecutorch/jsi/Promise.cpp +20 -0
  38. package/common/rnexecutorch/jsi/Promise.h +69 -0
  39. package/common/rnexecutorch/jsi/RuntimeAwareCache.h +58 -0
  40. package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.cpp +53 -0
  41. package/common/rnexecutorch/jsi/RuntimeLifecycleMonitor.h +35 -0
  42. package/common/rnexecutorch/metaprogramming/ConstructorHelpers.h +131 -0
  43. package/common/rnexecutorch/metaprogramming/FunctionHelpers.h +50 -0
  44. package/common/rnexecutorch/metaprogramming/TypeConcepts.h +37 -0
  45. package/common/rnexecutorch/models/BaseModel.cpp +181 -0
  46. package/common/rnexecutorch/models/BaseModel.h +47 -0
  47. package/common/rnexecutorch/models/EncoderDecoderBase.cpp +21 -0
  48. package/common/rnexecutorch/models/EncoderDecoderBase.h +31 -0
  49. package/common/rnexecutorch/models/classification/Classification.cpp +72 -0
  50. package/common/rnexecutorch/models/classification/Classification.h +26 -0
  51. package/{ios/RnExecutorch/models/classification/Constants.mm → common/rnexecutorch/models/classification/Constants.h} +7 -2
  52. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +27 -0
  53. package/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +17 -0
  54. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +45 -0
  55. package/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +23 -0
  56. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +61 -0
  57. package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +26 -0
  58. package/{ios/RnExecutorch/models/image_segmentation/Constants.mm → common/rnexecutorch/models/image_segmentation/Constants.h} +7 -2
  59. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +173 -0
  60. package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h +43 -0
  61. package/{ios/RnExecutorch/utils/Constants.mm → common/rnexecutorch/models/object_detection/Constants.h} +9 -2
  62. package/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +82 -0
  63. package/common/rnexecutorch/models/object_detection/ObjectDetection.h +31 -0
  64. package/{ios/RnExecutorch/utils/ObjectDetectionUtils.mm → common/rnexecutorch/models/object_detection/Utils.cpp} +10 -30
  65. package/common/rnexecutorch/models/object_detection/Utils.h +17 -0
  66. package/common/rnexecutorch/models/ocr/CTCLabelConverter.cpp +88 -0
  67. package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +29 -0
  68. package/common/rnexecutorch/models/ocr/Constants.h +34 -0
  69. package/common/rnexecutorch/models/ocr/Detector.cpp +102 -0
  70. package/common/rnexecutorch/models/ocr/Detector.h +30 -0
  71. package/common/rnexecutorch/models/ocr/DetectorUtils.cpp +703 -0
  72. package/common/rnexecutorch/models/ocr/DetectorUtils.h +80 -0
  73. package/common/rnexecutorch/models/ocr/OCR.cpp +52 -0
  74. package/common/rnexecutorch/models/ocr/OCR.h +36 -0
  75. package/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +107 -0
  76. package/common/rnexecutorch/models/ocr/RecognitionHandler.h +40 -0
  77. package/common/rnexecutorch/models/ocr/RecognitionHandlerUtils.cpp +153 -0
  78. package/common/rnexecutorch/models/ocr/RecognitionHandlerUtils.h +72 -0
  79. package/common/rnexecutorch/models/ocr/Recognizer.cpp +80 -0
  80. package/common/rnexecutorch/models/ocr/Recognizer.h +36 -0
  81. package/common/rnexecutorch/models/ocr/RecognizerUtils.cpp +202 -0
  82. package/common/rnexecutorch/models/ocr/RecognizerUtils.h +70 -0
  83. package/common/rnexecutorch/models/ocr/Types.h +37 -0
  84. package/common/rnexecutorch/models/speech_to_text/MoonshineStrategy.cpp +31 -0
  85. package/common/rnexecutorch/models/speech_to_text/MoonshineStrategy.h +21 -0
  86. package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +70 -0
  87. package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +31 -0
  88. package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +26 -0
  89. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +38 -0
  90. package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +25 -0
  91. package/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +55 -0
  92. package/common/rnexecutorch/models/style_transfer/StyleTransfer.h +29 -0
  93. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.cpp +92 -0
  94. package/common/rnexecutorch/models/vertical_ocr/VerticalDetector.h +49 -0
  95. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +180 -0
  96. package/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +78 -0
  97. package/common/rnexecutorch/tests/LogTest.cpp +530 -0
  98. package/common/rnexecutorch/tests/README.md +20 -0
  99. package/common/rnexecutorch/tests/run_all_tests.sh +14 -0
  100. package/common/rnexecutorch/tests/run_test.sh +18 -0
  101. package/ios/ExecutorchLib.xcframework/Info.plist +4 -4
  102. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
  103. package/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
  104. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
  105. package/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
  106. package/ios/RnExecutorch/ETInstaller.h +8 -0
  107. package/ios/RnExecutorch/ETInstaller.mm +56 -0
  108. package/ios/RnExecutorch/utils/Conversions.h +8 -9
  109. package/ios/RnExecutorch/utils/Numerical.h +2 -0
  110. package/ios/RnExecutorch.xcodeproj/project.pbxproj +73 -0
  111. package/lib/common/Logger.d.ts +8 -0
  112. package/lib/common/Logger.js +19 -0
  113. package/lib/constants/modelUrls.d.ts +89 -0
  114. package/lib/constants/modelUrls.js +116 -0
  115. package/lib/constants/sttDefaults.js +66 -0
  116. package/lib/controllers/LLMController.js +210 -0
  117. package/lib/controllers/OCRController.js +65 -0
  118. package/lib/controllers/SpeechToTextController.d.ts +52 -0
  119. package/lib/controllers/SpeechToTextController.js +343 -0
  120. package/lib/hooks/natural_language_processing/useSpeechToText.js +44 -0
  121. package/lib/index.d.ts +50 -0
  122. package/{src/index.tsx → lib/index.js} +22 -10
  123. package/lib/module/Error.js +2 -0
  124. package/lib/module/Error.js.map +1 -1
  125. package/lib/module/common/Logger.js +23 -0
  126. package/lib/module/common/Logger.js.map +1 -0
  127. package/lib/module/constants/llmDefaults.js +8 -0
  128. package/lib/module/constants/llmDefaults.js.map +1 -1
  129. package/lib/module/constants/modelUrls.js +300 -84
  130. package/lib/module/constants/modelUrls.js.map +1 -1
  131. package/lib/module/constants/ocr/models.js +181 -286
  132. package/lib/module/constants/ocr/models.js.map +1 -1
  133. package/lib/module/constants/ocr/symbols.js +63 -63
  134. package/lib/module/constants/sttDefaults.js +12 -10
  135. package/lib/module/constants/sttDefaults.js.map +1 -1
  136. package/lib/module/controllers/LLMController.js +17 -11
  137. package/lib/module/controllers/LLMController.js.map +1 -1
  138. package/lib/module/controllers/OCRController.js +16 -9
  139. package/lib/module/controllers/OCRController.js.map +1 -1
  140. package/lib/module/controllers/SpeechToTextController.js +32 -19
  141. package/lib/module/controllers/SpeechToTextController.js.map +1 -1
  142. package/lib/module/controllers/VerticalOCRController.js +16 -9
  143. package/lib/module/controllers/VerticalOCRController.js.map +1 -1
  144. package/lib/module/hooks/computer_vision/useClassification.js +5 -5
  145. package/lib/module/hooks/computer_vision/useClassification.js.map +1 -1
  146. package/lib/module/hooks/computer_vision/useImageEmbeddings.js +13 -0
  147. package/lib/module/hooks/computer_vision/useImageEmbeddings.js.map +1 -0
  148. package/lib/module/hooks/computer_vision/useImageSegmentation.js +4 -4
  149. package/lib/module/hooks/computer_vision/useImageSegmentation.js.map +1 -1
  150. package/lib/module/hooks/computer_vision/useOCR.js +14 -15
  151. package/lib/module/hooks/computer_vision/useOCR.js.map +1 -1
  152. package/lib/module/hooks/computer_vision/useObjectDetection.js +5 -5
  153. package/lib/module/hooks/computer_vision/useObjectDetection.js.map +1 -1
  154. package/lib/module/hooks/computer_vision/useStyleTransfer.js +5 -5
  155. package/lib/module/hooks/computer_vision/useStyleTransfer.js.map +1 -1
  156. package/lib/module/hooks/computer_vision/useVerticalOCR.js +16 -17
  157. package/lib/module/hooks/computer_vision/useVerticalOCR.js.map +1 -1
  158. package/lib/module/hooks/general/useExecutorchModule.js +5 -3
  159. package/lib/module/hooks/general/useExecutorchModule.js.map +1 -1
  160. package/lib/module/hooks/natural_language_processing/useLLM.js +22 -25
  161. package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
  162. package/lib/module/hooks/natural_language_processing/useSpeechToText.js +16 -14
  163. package/lib/module/hooks/natural_language_processing/useSpeechToText.js.map +1 -1
  164. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js +4 -5
  165. package/lib/module/hooks/natural_language_processing/useTextEmbeddings.js.map +1 -1
  166. package/lib/module/hooks/natural_language_processing/useTokenizer.js +20 -19
  167. package/lib/module/hooks/natural_language_processing/useTokenizer.js.map +1 -1
  168. package/lib/module/hooks/useNonStaticModule.js +52 -0
  169. package/lib/module/hooks/useNonStaticModule.js.map +1 -0
  170. package/lib/module/index.js +16 -2
  171. package/lib/module/index.js.map +1 -1
  172. package/lib/module/modules/BaseModule.js +6 -3
  173. package/lib/module/modules/BaseModule.js.map +1 -1
  174. package/lib/module/modules/BaseNonStaticModule.js +17 -0
  175. package/lib/module/modules/BaseNonStaticModule.js.map +1 -0
  176. package/lib/module/modules/computer_vision/ClassificationModule.js +13 -8
  177. package/lib/module/modules/computer_vision/ClassificationModule.js.map +1 -1
  178. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js +19 -0
  179. package/lib/module/modules/computer_vision/ImageEmbeddingsModule.js.map +1 -0
  180. package/lib/module/modules/computer_vision/ImageSegmentationModule.js +21 -19
  181. package/lib/module/modules/computer_vision/ImageSegmentationModule.js.map +1 -1
  182. package/lib/module/modules/computer_vision/OCRModule.js +13 -10
  183. package/lib/module/modules/computer_vision/OCRModule.js.map +1 -1
  184. package/lib/module/modules/computer_vision/ObjectDetectionModule.js +13 -8
  185. package/lib/module/modules/computer_vision/ObjectDetectionModule.js.map +1 -1
  186. package/lib/module/modules/computer_vision/StyleTransferModule.js +13 -8
  187. package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
  188. package/lib/module/modules/computer_vision/VerticalOCRModule.js +15 -10
  189. package/lib/module/modules/computer_vision/VerticalOCRModule.js.map +1 -1
  190. package/lib/module/modules/general/ExecutorchModule.js +10 -36
  191. package/lib/module/modules/general/ExecutorchModule.js.map +1 -1
  192. package/lib/module/modules/natural_language_processing/LLMModule.js +18 -22
  193. package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
  194. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +27 -16
  195. package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
  196. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js +15 -8
  197. package/lib/module/modules/natural_language_processing/TextEmbeddingsModule.js.map +1 -1
  198. package/lib/module/modules/natural_language_processing/TokenizerModule.js +20 -14
  199. package/lib/module/modules/natural_language_processing/TokenizerModule.js.map +1 -1
  200. package/lib/module/native/NativeETInstaller.js +5 -0
  201. package/lib/module/native/NativeETInstaller.js.map +1 -0
  202. package/lib/module/native/RnExecutorchModules.js +2 -11
  203. package/lib/module/native/RnExecutorchModules.js.map +1 -1
  204. package/lib/module/types/common.js +25 -8
  205. package/lib/module/types/common.js.map +1 -1
  206. package/lib/module/types/stt.js +6 -0
  207. package/lib/module/types/stt.js.map +1 -1
  208. package/lib/module/utils/ResourceFetcher.js +276 -114
  209. package/lib/module/utils/ResourceFetcher.js.map +1 -1
  210. package/lib/module/utils/ResourceFetcherUtils.js +155 -0
  211. package/lib/module/utils/ResourceFetcherUtils.js.map +1 -0
  212. package/lib/module/utils/llm.js +41 -1
  213. package/lib/module/utils/llm.js.map +1 -1
  214. package/lib/modules/natural_language_processing/SpeechToTextModule.d.ts +14 -0
  215. package/lib/modules/natural_language_processing/SpeechToTextModule.js +30 -0
  216. package/lib/modules/natural_language_processing/TokenizerModule.js +29 -0
  217. package/lib/native/RnExecutorchModules.d.ts +3 -0
  218. package/lib/native/RnExecutorchModules.js +16 -0
  219. package/lib/typescript/Error.d.ts +2 -0
  220. package/lib/typescript/Error.d.ts.map +1 -1
  221. package/lib/typescript/common/Logger.d.ts +9 -0
  222. package/lib/typescript/common/Logger.d.ts.map +1 -0
  223. package/lib/typescript/constants/llmDefaults.d.ts +1 -0
  224. package/lib/typescript/constants/llmDefaults.d.ts.map +1 -1
  225. package/lib/typescript/constants/modelUrls.d.ts +223 -79
  226. package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
  227. package/lib/typescript/constants/ocr/models.d.ts +882 -284
  228. package/lib/typescript/constants/ocr/models.d.ts.map +1 -1
  229. package/lib/typescript/constants/sttDefaults.d.ts +1 -0
  230. package/lib/typescript/constants/sttDefaults.d.ts.map +1 -1
  231. package/lib/typescript/controllers/LLMController.d.ts +3 -4
  232. package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
  233. package/lib/typescript/controllers/OCRController.d.ts +5 -6
  234. package/lib/typescript/controllers/OCRController.d.ts.map +1 -1
  235. package/lib/typescript/controllers/SpeechToTextController.d.ts +11 -6
  236. package/lib/typescript/controllers/SpeechToTextController.d.ts.map +1 -1
  237. package/lib/typescript/controllers/VerticalOCRController.d.ts +5 -6
  238. package/lib/typescript/controllers/VerticalOCRController.d.ts.map +1 -1
  239. package/lib/typescript/hooks/computer_vision/useClassification.d.ts +8 -6
  240. package/lib/typescript/hooks/computer_vision/useClassification.d.ts.map +1 -1
  241. package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts +16 -0
  242. package/lib/typescript/hooks/computer_vision/useImageEmbeddings.d.ts.map +1 -0
  243. package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts +5 -3
  244. package/lib/typescript/hooks/computer_vision/useImageSegmentation.d.ts.map +1 -1
  245. package/lib/typescript/hooks/computer_vision/useOCR.d.ts +4 -4
  246. package/lib/typescript/hooks/computer_vision/useOCR.d.ts.map +1 -1
  247. package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts +5 -3
  248. package/lib/typescript/hooks/computer_vision/useObjectDetection.d.ts.map +1 -1
  249. package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts +5 -3
  250. package/lib/typescript/hooks/computer_vision/useStyleTransfer.d.ts.map +1 -1
  251. package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts +3 -5
  252. package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts.map +1 -1
  253. package/lib/typescript/hooks/general/useExecutorchModule.d.ts +1 -1
  254. package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts +6 -4
  255. package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
  256. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +7 -5
  257. package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts.map +1 -1
  258. package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts +9 -5
  259. package/lib/typescript/hooks/natural_language_processing/useTextEmbeddings.d.ts.map +1 -1
  260. package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts +6 -4
  261. package/lib/typescript/hooks/natural_language_processing/useTokenizer.d.ts.map +1 -1
  262. package/lib/typescript/hooks/useNonStaticModule.d.ts +21 -0
  263. package/lib/typescript/hooks/useNonStaticModule.d.ts.map +1 -0
  264. package/lib/typescript/index.d.ts +18 -2
  265. package/lib/typescript/index.d.ts.map +1 -1
  266. package/lib/typescript/modules/BaseModule.d.ts +1 -1
  267. package/lib/typescript/modules/BaseModule.d.ts.map +1 -1
  268. package/lib/typescript/modules/BaseNonStaticModule.d.ts +10 -0
  269. package/lib/typescript/modules/BaseNonStaticModule.d.ts.map +1 -0
  270. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts +6 -6
  271. package/lib/typescript/modules/computer_vision/ClassificationModule.d.ts.map +1 -1
  272. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts +9 -0
  273. package/lib/typescript/modules/computer_vision/ImageEmbeddingsModule.d.ts.map +1 -0
  274. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts +8 -28
  275. package/lib/typescript/modules/computer_vision/ImageSegmentationModule.d.ts.map +1 -1
  276. package/lib/typescript/modules/computer_vision/OCRModule.d.ts +8 -7
  277. package/lib/typescript/modules/computer_vision/OCRModule.d.ts.map +1 -1
  278. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts +7 -5
  279. package/lib/typescript/modules/computer_vision/ObjectDetectionModule.d.ts.map +1 -1
  280. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +6 -5
  281. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
  282. package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts +7 -8
  283. package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts.map +1 -1
  284. package/lib/typescript/modules/general/ExecutorchModule.d.ts +5 -8
  285. package/lib/typescript/modules/general/ExecutorchModule.d.ts.map +1 -1
  286. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +16 -16
  287. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
  288. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +19 -9
  289. package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
  290. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts +7 -5
  291. package/lib/typescript/modules/natural_language_processing/TextEmbeddingsModule.d.ts.map +1 -1
  292. package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts +10 -9
  293. package/lib/typescript/modules/natural_language_processing/TokenizerModule.d.ts.map +1 -1
  294. package/lib/typescript/native/{NativeStyleTransfer.d.ts → NativeETInstaller.d.ts} +2 -3
  295. package/lib/typescript/native/NativeETInstaller.d.ts.map +1 -0
  296. package/lib/typescript/native/RnExecutorchModules.d.ts +3 -21
  297. package/lib/typescript/native/RnExecutorchModules.d.ts.map +1 -1
  298. package/lib/typescript/types/common.d.ts +30 -2
  299. package/lib/typescript/types/common.d.ts.map +1 -1
  300. package/lib/typescript/types/stt.d.ts +5 -1
  301. package/lib/typescript/types/stt.d.ts.map +1 -1
  302. package/lib/typescript/utils/ResourceFetcher.d.ts +18 -10
  303. package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
  304. package/lib/typescript/utils/ResourceFetcherUtils.d.ts +55 -0
  305. package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -0
  306. package/lib/typescript/utils/llm.d.ts +4 -0
  307. package/lib/typescript/utils/llm.d.ts.map +1 -1
  308. package/lib/utils/ResourceFetcherUtils.js +119 -0
  309. package/lib/utils/llm.js +72 -0
  310. package/package.json +22 -64
  311. package/react-native-executorch.podspec +75 -3
  312. package/src/Error.ts +2 -0
  313. package/src/common/Logger.ts +25 -0
  314. package/src/constants/llmDefaults.ts +11 -0
  315. package/src/constants/modelUrls.ts +365 -168
  316. package/src/constants/ocr/models.ts +826 -395
  317. package/src/constants/ocr/symbols.ts +63 -63
  318. package/src/constants/sttDefaults.ts +14 -18
  319. package/src/controllers/LLMController.ts +28 -18
  320. package/src/controllers/OCRController.ts +24 -15
  321. package/src/controllers/SpeechToTextController.ts +53 -40
  322. package/src/controllers/VerticalOCRController.ts +24 -14
  323. package/src/hooks/computer_vision/useClassification.ts +10 -11
  324. package/src/hooks/computer_vision/useImageEmbeddings.ts +15 -0
  325. package/src/hooks/computer_vision/useImageSegmentation.ts +5 -8
  326. package/src/hooks/computer_vision/useOCR.ts +29 -21
  327. package/src/hooks/computer_vision/useObjectDetection.ts +6 -9
  328. package/src/hooks/computer_vision/useStyleTransfer.ts +6 -6
  329. package/src/hooks/computer_vision/useVerticalOCR.ts +30 -27
  330. package/src/hooks/general/useExecutorchModule.ts +3 -3
  331. package/src/hooks/natural_language_processing/useLLM.ts +38 -28
  332. package/src/hooks/natural_language_processing/useSpeechToText.ts +34 -26
  333. package/src/hooks/natural_language_processing/useTextEmbeddings.ts +11 -11
  334. package/src/hooks/natural_language_processing/useTokenizer.ts +22 -22
  335. package/src/hooks/useNonStaticModule.ts +74 -0
  336. package/src/index.ts +108 -0
  337. package/src/modules/BaseModule.ts +9 -3
  338. package/src/modules/BaseNonStaticModule.ts +26 -0
  339. package/src/modules/computer_vision/ClassificationModule.ts +20 -11
  340. package/src/modules/computer_vision/ImageEmbeddingsModule.ts +26 -0
  341. package/src/modules/computer_vision/ImageSegmentationModule.ts +35 -27
  342. package/src/modules/computer_vision/OCRModule.ts +23 -15
  343. package/src/modules/computer_vision/ObjectDetectionModule.ts +24 -11
  344. package/src/modules/computer_vision/StyleTransferModule.ts +20 -11
  345. package/src/modules/computer_vision/VerticalOCRModule.ts +25 -21
  346. package/src/modules/general/ExecutorchModule.ts +18 -48
  347. package/src/modules/natural_language_processing/LLMModule.ts +27 -30
  348. package/src/modules/natural_language_processing/SpeechToTextModule.ts +42 -37
  349. package/src/modules/natural_language_processing/TextEmbeddingsModule.ts +27 -12
  350. package/src/modules/natural_language_processing/TokenizerModule.ts +27 -17
  351. package/src/native/NativeETInstaller.ts +8 -0
  352. package/src/native/RnExecutorchModules.ts +4 -46
  353. package/src/types/common.ts +40 -12
  354. package/src/types/stt.ts +5 -1
  355. package/src/utils/ResourceFetcher.ts +338 -119
  356. package/src/utils/ResourceFetcherUtils.ts +186 -0
  357. package/src/utils/llm.ts +65 -1
  358. package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
  359. package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
  360. package/third-party/android/libs/opencv/arm64-v8a/libopencv_core.a +0 -0
  361. package/third-party/android/libs/opencv/arm64-v8a/libopencv_features2d.a +0 -0
  362. package/third-party/android/libs/opencv/arm64-v8a/libopencv_highgui.a +0 -0
  363. package/third-party/android/libs/opencv/arm64-v8a/libopencv_imgproc.a +0 -0
  364. package/third-party/android/libs/opencv/arm64-v8a/libopencv_photo.a +0 -0
  365. package/third-party/android/libs/opencv/arm64-v8a/libopencv_video.a +0 -0
  366. package/third-party/android/libs/opencv/x86_64/libopencv_core.a +0 -0
  367. package/third-party/android/libs/opencv/x86_64/libopencv_features2d.a +0 -0
  368. package/third-party/android/libs/opencv/x86_64/libopencv_highgui.a +0 -0
  369. package/third-party/android/libs/opencv/x86_64/libopencv_imgproc.a +0 -0
  370. package/third-party/android/libs/opencv/x86_64/libopencv_photo.a +0 -0
  371. package/third-party/android/libs/opencv/x86_64/libopencv_video.a +0 -0
  372. package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv.a +0 -0
  373. package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_hal.a +0 -0
  374. package/third-party/android/libs/opencv-third-party/arm64-v8a/libkleidicv_thread.a +0 -0
  375. package/third-party/include/c10/macros/Export.h +163 -0
  376. package/third-party/include/c10/macros/Macros.h +497 -0
  377. package/third-party/include/c10/util/BFloat16-inl.h +342 -0
  378. package/third-party/include/c10/util/BFloat16-math.h +266 -0
  379. package/third-party/include/c10/util/BFloat16.h +125 -0
  380. package/third-party/include/c10/util/Half-inl.h +347 -0
  381. package/third-party/include/c10/util/Half.h +416 -0
  382. package/third-party/include/c10/util/TypeSafeSignMath.h +133 -0
  383. package/third-party/include/c10/util/bit_cast.h +43 -0
  384. package/third-party/include/c10/util/floating_point_utils.h +33 -0
  385. package/third-party/include/c10/util/irange.h +107 -0
  386. package/third-party/include/executorch/ExecuTorch.h +13 -0
  387. package/third-party/include/executorch/ExecuTorchError.h +16 -0
  388. package/third-party/include/executorch/ExecuTorchLog.h +76 -0
  389. package/third-party/include/executorch/ExecuTorchModule.h +286 -0
  390. package/third-party/include/executorch/ExecuTorchTensor.h +742 -0
  391. package/third-party/include/executorch/ExecuTorchValue.h +219 -0
  392. package/third-party/include/executorch/extension/module/module.h +492 -0
  393. package/third-party/include/executorch/extension/tensor/tensor.h +13 -0
  394. package/third-party/include/executorch/extension/tensor/tensor_accessor.h +190 -0
  395. package/third-party/include/executorch/extension/tensor/tensor_ptr.h +347 -0
  396. package/third-party/include/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
  397. package/third-party/include/executorch/runtime/backend/backend_execution_context.h +71 -0
  398. package/third-party/include/executorch/runtime/backend/backend_init_context.h +72 -0
  399. package/third-party/include/executorch/runtime/backend/interface.h +166 -0
  400. package/third-party/include/executorch/runtime/core/array_ref.h +235 -0
  401. package/third-party/include/executorch/runtime/core/data_loader.h +136 -0
  402. package/third-party/include/executorch/runtime/core/defines.h +20 -0
  403. package/third-party/include/executorch/runtime/core/error.h +229 -0
  404. package/third-party/include/executorch/runtime/core/evalue.h +521 -0
  405. package/third-party/include/executorch/runtime/core/event_tracer.h +565 -0
  406. package/third-party/include/executorch/runtime/core/event_tracer_hooks.h +323 -0
  407. package/third-party/include/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
  408. package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
  409. package/third-party/include/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
  410. package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
  411. package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
  412. package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
  413. package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
  414. package/third-party/include/executorch/runtime/core/freeable_buffer.h +107 -0
  415. package/third-party/include/executorch/runtime/core/hierarchical_allocator.h +107 -0
  416. package/third-party/include/executorch/runtime/core/memory_allocator.h +198 -0
  417. package/third-party/include/executorch/runtime/core/named_data_map.h +86 -0
  418. package/third-party/include/executorch/runtime/core/portable_type/bfloat16.h +27 -0
  419. package/third-party/include/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
  420. package/third-party/include/executorch/runtime/core/portable_type/bits_types.h +83 -0
  421. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
  422. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
  423. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
  424. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
  425. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
  426. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
  427. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
  428. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
  429. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
  430. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
  431. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
  432. package/third-party/include/executorch/runtime/core/portable_type/complex.h +44 -0
  433. package/third-party/include/executorch/runtime/core/portable_type/device.h +70 -0
  434. package/third-party/include/executorch/runtime/core/portable_type/half.h +27 -0
  435. package/third-party/include/executorch/runtime/core/portable_type/optional.h +36 -0
  436. package/third-party/include/executorch/runtime/core/portable_type/qint_types.h +83 -0
  437. package/third-party/include/executorch/runtime/core/portable_type/scalar.h +110 -0
  438. package/third-party/include/executorch/runtime/core/portable_type/scalar_type.h +154 -0
  439. package/third-party/include/executorch/runtime/core/portable_type/string_view.h +29 -0
  440. package/third-party/include/executorch/runtime/core/portable_type/tensor.h +142 -0
  441. package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
  442. package/third-party/include/executorch/runtime/core/portable_type/tensor_options.h +60 -0
  443. package/third-party/include/executorch/runtime/core/result.h +258 -0
  444. package/third-party/include/executorch/runtime/core/span.h +93 -0
  445. package/third-party/include/executorch/runtime/core/tag.h +71 -0
  446. package/third-party/include/executorch/runtime/core/tensor_layout.h +79 -0
  447. package/third-party/include/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
  448. package/third-party/include/executorch/runtime/executor/memory_manager.h +113 -0
  449. package/third-party/include/executorch/runtime/executor/method.h +387 -0
  450. package/third-party/include/executorch/runtime/executor/method_meta.h +251 -0
  451. package/third-party/include/executorch/runtime/executor/program.h +320 -0
  452. package/third-party/include/executorch/runtime/executor/pte_data_map.h +144 -0
  453. package/third-party/include/executorch/runtime/executor/tensor_parser.h +156 -0
  454. package/third-party/include/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
  455. package/third-party/include/executorch/runtime/kernel/operator_registry.h +278 -0
  456. package/third-party/include/executorch/runtime/platform/abort.h +36 -0
  457. package/third-party/include/executorch/runtime/platform/assert.h +119 -0
  458. package/third-party/include/executorch/runtime/platform/clock.h +43 -0
  459. package/third-party/include/executorch/runtime/platform/compat_unistd.h +75 -0
  460. package/third-party/include/executorch/runtime/platform/compiler.h +191 -0
  461. package/third-party/include/executorch/runtime/platform/log.h +177 -0
  462. package/third-party/include/executorch/runtime/platform/platform.h +133 -0
  463. package/third-party/include/executorch/runtime/platform/profiler.h +292 -0
  464. package/third-party/include/executorch/runtime/platform/runtime.h +35 -0
  465. package/third-party/include/executorch/runtime/platform/system.h +49 -0
  466. package/third-party/include/executorch/runtime/platform/types.h +24 -0
  467. package/third-party/include/executorch/schema/extended_header.h +76 -0
  468. package/third-party/include/opencv2/core/affine.hpp +676 -0
  469. package/third-party/include/opencv2/core/async.hpp +107 -0
  470. package/third-party/include/opencv2/core/base.hpp +735 -0
  471. package/third-party/include/opencv2/core/bindings_utils.hpp +279 -0
  472. package/third-party/include/opencv2/core/bufferpool.hpp +39 -0
  473. package/third-party/include/opencv2/core/check.hpp +231 -0
  474. package/third-party/include/opencv2/core/core.hpp +55 -0
  475. package/third-party/include/opencv2/core/core_c.h +3261 -0
  476. package/third-party/include/opencv2/core/cv_cpu_dispatch.h +404 -0
  477. package/third-party/include/opencv2/core/cv_cpu_helper.h +856 -0
  478. package/third-party/include/opencv2/core/cvdef.h +1003 -0
  479. package/third-party/include/opencv2/core/cvstd.hpp +196 -0
  480. package/third-party/include/opencv2/core/cvstd.inl.hpp +188 -0
  481. package/third-party/include/opencv2/core/cvstd_wrapper.hpp +187 -0
  482. package/third-party/include/opencv2/core/detail/async_promise.hpp +73 -0
  483. package/third-party/include/opencv2/core/detail/dispatch_helper.impl.hpp +48 -0
  484. package/third-party/include/opencv2/core/detail/exception_ptr.hpp +24 -0
  485. package/third-party/include/opencv2/core/dualquaternion.hpp +1054 -0
  486. package/third-party/include/opencv2/core/dualquaternion.inl.hpp +464 -0
  487. package/third-party/include/opencv2/core/eigen.hpp +405 -0
  488. package/third-party/include/opencv2/core/fast_math.hpp +433 -0
  489. package/third-party/include/opencv2/core/hal/hal.hpp +451 -0
  490. package/third-party/include/opencv2/core/hal/interface.h +191 -0
  491. package/third-party/include/opencv2/core/hal/intrin.hpp +1222 -0
  492. package/third-party/include/opencv2/core/hal/intrin_avx.hpp +3378 -0
  493. package/third-party/include/opencv2/core/hal/intrin_avx512.hpp +3688 -0
  494. package/third-party/include/opencv2/core/hal/intrin_cpp.hpp +3446 -0
  495. package/third-party/include/opencv2/core/hal/intrin_forward.hpp +195 -0
  496. package/third-party/include/opencv2/core/hal/intrin_lasx.hpp +3243 -0
  497. package/third-party/include/opencv2/core/hal/intrin_lsx.hpp +2671 -0
  498. package/third-party/include/opencv2/core/hal/intrin_math.hpp +772 -0
  499. package/third-party/include/opencv2/core/hal/intrin_msa.hpp +1973 -0
  500. package/third-party/include/opencv2/core/hal/intrin_neon.hpp +2710 -0
  501. package/third-party/include/opencv2/core/hal/intrin_rvv071.hpp +3452 -0
  502. package/third-party/include/opencv2/core/hal/intrin_rvv_scalable.hpp +2559 -0
  503. package/third-party/include/opencv2/core/hal/intrin_sse.hpp +3528 -0
  504. package/third-party/include/opencv2/core/hal/intrin_sse_em.hpp +175 -0
  505. package/third-party/include/opencv2/core/hal/intrin_vsx.hpp +1756 -0
  506. package/third-party/include/opencv2/core/hal/intrin_wasm.hpp +2911 -0
  507. package/third-party/include/opencv2/core/hal/msa_macros.h +2079 -0
  508. package/third-party/include/opencv2/core/hal/simd_utils.impl.hpp +313 -0
  509. package/third-party/include/opencv2/core/mat.hpp +3842 -0
  510. package/third-party/include/opencv2/core/mat.inl.hpp +2753 -0
  511. package/third-party/include/opencv2/core/matx.hpp +603 -0
  512. package/third-party/include/opencv2/core/matx.inl.hpp +1132 -0
  513. package/third-party/include/opencv2/core/neon_utils.hpp +127 -0
  514. package/third-party/include/opencv2/core/operations.hpp +610 -0
  515. package/third-party/include/opencv2/core/optim.hpp +362 -0
  516. package/third-party/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp +66 -0
  517. package/third-party/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp +148 -0
  518. package/third-party/include/opencv2/core/parallel/parallel_backend.hpp +108 -0
  519. package/third-party/include/opencv2/core/persistence.hpp +1321 -0
  520. package/third-party/include/opencv2/core/quaternion.hpp +1889 -0
  521. package/third-party/include/opencv2/core/quaternion.inl.hpp +907 -0
  522. package/third-party/include/opencv2/core/saturate.hpp +347 -0
  523. package/third-party/include/opencv2/core/simd_intrinsics.hpp +90 -0
  524. package/third-party/include/opencv2/core/softfloat.hpp +657 -0
  525. package/third-party/include/opencv2/core/sse_utils.hpp +861 -0
  526. package/third-party/include/opencv2/core/traits.hpp +417 -0
  527. package/third-party/include/opencv2/core/types.hpp +2368 -0
  528. package/third-party/include/opencv2/core/types_c.h +2064 -0
  529. package/third-party/include/opencv2/core/utility.hpp +1296 -0
  530. package/third-party/include/opencv2/core/utils/allocator_stats.hpp +31 -0
  531. package/third-party/include/opencv2/core/utils/allocator_stats.impl.hpp +111 -0
  532. package/third-party/include/opencv2/core/utils/filesystem.hpp +91 -0
  533. package/third-party/include/opencv2/core/utils/fp_control_utils.hpp +70 -0
  534. package/third-party/include/opencv2/core/utils/instrumentation.hpp +127 -0
  535. package/third-party/include/opencv2/core/utils/logger.defines.hpp +50 -0
  536. package/third-party/include/opencv2/core/utils/logger.hpp +258 -0
  537. package/third-party/include/opencv2/core/utils/logtag.hpp +27 -0
  538. package/third-party/include/opencv2/core/utils/tls.hpp +230 -0
  539. package/third-party/include/opencv2/core/utils/trace.hpp +281 -0
  540. package/third-party/include/opencv2/core/version.hpp +29 -0
  541. package/third-party/include/opencv2/core/vsx_utils.hpp +1115 -0
  542. package/third-party/include/opencv2/core.hpp +3699 -0
  543. package/third-party/include/opencv2/cvconfig.h +155 -0
  544. package/third-party/include/opencv2/dnn/dnn.hpp +51 -0
  545. package/third-party/include/opencv2/dnn.hpp +17 -0
  546. package/third-party/include/opencv2/features2d/features2d.hpp +55 -0
  547. package/third-party/include/opencv2/features2d/hal/interface.h +32 -0
  548. package/third-party/include/opencv2/features2d.hpp +1756 -0
  549. package/third-party/include/opencv2/highgui/highgui.hpp +113 -0
  550. package/third-party/include/opencv2/highgui.hpp +17 -0
  551. package/third-party/include/opencv2/imgproc/bindings.hpp +34 -0
  552. package/third-party/include/opencv2/imgproc/detail/gcgraph.hpp +355 -0
  553. package/third-party/include/opencv2/imgproc/detail/legacy.hpp +35 -0
  554. package/third-party/include/opencv2/imgproc/hal/hal.hpp +246 -0
  555. package/third-party/include/opencv2/imgproc/hal/interface.h +52 -0
  556. package/third-party/include/opencv2/imgproc/imgproc.hpp +55 -0
  557. package/third-party/include/opencv2/imgproc/imgproc_c.h +1261 -0
  558. package/third-party/include/opencv2/imgproc/segmentation.hpp +168 -0
  559. package/third-party/include/opencv2/imgproc/types_c.h +632 -0
  560. package/third-party/include/opencv2/imgproc.hpp +5956 -0
  561. package/third-party/include/opencv2/opencv.hpp +102 -0
  562. package/third-party/include/opencv2/opencv_modules.hpp +19 -0
  563. package/third-party/include/opencv2/photo/legacy/constants_c.h +10 -0
  564. package/third-party/include/opencv2/photo/photo.hpp +55 -0
  565. package/third-party/include/opencv2/photo.hpp +975 -0
  566. package/third-party/include/opencv2/video/background_segm.hpp +341 -0
  567. package/third-party/include/opencv2/video/detail/tracking.detail.hpp +435 -0
  568. package/third-party/include/opencv2/video/legacy/constants_c.h +15 -0
  569. package/third-party/include/opencv2/video/tracking.hpp +1014 -0
  570. package/third-party/include/opencv2/video/video.hpp +55 -0
  571. package/third-party/include/opencv2/video.hpp +65 -0
  572. package/third-party/include/tokenizers-cpp/tokenizers_c.h +61 -0
  573. package/third-party/include/tokenizers-cpp/tokenizers_cpp.h +118 -0
  574. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.h +27 -0
  575. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/ETModel.mm +249 -0
  576. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h +14 -0
  577. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm +80 -0
  578. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h +32 -0
  579. package/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm +95 -0
  580. package/third-party/ios/ExecutorchLib/ExecutorchLib/InputType.h +12 -0
  581. package/third-party/ios/ExecutorchLib/ExecutorchLib/Utils.hpp +217 -0
  582. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.cpp +11 -0
  583. package/third-party/ios/ExecutorchLib/ExecutorchLib/model/Model.h +11 -0
  584. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/irunner.h +48 -0
  585. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.cpp +278 -0
  586. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.h +67 -0
  587. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/stats.h +164 -0
  588. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.cpp +65 -0
  589. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.h +105 -0
  590. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.cpp +91 -0
  591. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.h +51 -0
  592. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_token_generator.h +162 -0
  593. package/third-party/ios/ExecutorchLib/ExecutorchLib/runner/util.h +108 -0
  594. package/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp +193 -0
  595. package/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h +64 -0
  596. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h +202 -0
  597. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp +313 -0
  598. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h +57 -0
  599. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp +78 -0
  600. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h +23 -0
  601. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp +427 -0
  602. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h +87 -0
  603. package/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h +76 -0
  604. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj +683 -0
  605. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  606. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/xcuserdata/norbertklockiewicz.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  607. package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/xcuserdata/norbertklockiewicz.xcuserdatad/xcschemes/xcschememanagement.plist +14 -0
  608. package/third-party/ios/ExecutorchLib/build.sh +44 -0
  609. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/Info.plist +43 -0
  610. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64/libbackend_coreml_ios.a +0 -0
  611. package/third-party/ios/ExecutorchLib/frameworks/backend_coreml.xcframework/ios-arm64-simulator/libbackend_coreml_simulator.a +0 -0
  612. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/Info.plist +43 -0
  613. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64/libbackend_mps_ios.a +0 -0
  614. package/third-party/ios/ExecutorchLib/frameworks/backend_mps.xcframework/ios-arm64-simulator/libbackend_mps_simulator.a +0 -0
  615. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/Info.plist +43 -0
  616. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64/libbackend_xnnpack_ios.a +0 -0
  617. package/third-party/ios/ExecutorchLib/frameworks/backend_xnnpack.xcframework/ios-arm64-simulator/libbackend_xnnpack_simulator.a +0 -0
  618. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/Info.plist +47 -0
  619. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Export.h +163 -0
  620. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/macros/Macros.h +497 -0
  621. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-inl.h +342 -0
  622. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16-math.h +266 -0
  623. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/BFloat16.h +125 -0
  624. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half-inl.h +347 -0
  625. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/Half.h +416 -0
  626. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/TypeSafeSignMath.h +133 -0
  627. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/bit_cast.h +43 -0
  628. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/floating_point_utils.h +33 -0
  629. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/c10/util/irange.h +107 -0
  630. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorch.h +13 -0
  631. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchError.h +16 -0
  632. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchLog.h +76 -0
  633. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchModule.h +286 -0
  634. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchTensor.h +742 -0
  635. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/ExecuTorchValue.h +219 -0
  636. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/module/module.h +492 -0
  637. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor.h +13 -0
  638. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_accessor.h +190 -0
  639. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr.h +347 -0
  640. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
  641. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_execution_context.h +71 -0
  642. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/backend_init_context.h +72 -0
  643. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/backend/interface.h +166 -0
  644. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/array_ref.h +235 -0
  645. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/data_loader.h +136 -0
  646. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/defines.h +20 -0
  647. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/error.h +229 -0
  648. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/evalue.h +521 -0
  649. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer.h +565 -0
  650. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks.h +323 -0
  651. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
  652. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
  653. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
  654. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
  655. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
  656. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
  657. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
  658. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/freeable_buffer.h +107 -0
  659. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/hierarchical_allocator.h +107 -0
  660. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/memory_allocator.h +198 -0
  661. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/named_data_map.h +86 -0
  662. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16.h +27 -0
  663. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
  664. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/bits_types.h +83 -0
  665. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
  666. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
  667. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
  668. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
  669. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
  670. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
  671. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
  672. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
  673. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
  674. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
  675. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
  676. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/complex.h +44 -0
  677. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/device.h +70 -0
  678. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/half.h +27 -0
  679. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/optional.h +36 -0
  680. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/qint_types.h +83 -0
  681. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar.h +110 -0
  682. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/scalar_type.h +154 -0
  683. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/string_view.h +29 -0
  684. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor.h +142 -0
  685. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
  686. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/portable_type/tensor_options.h +60 -0
  687. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/result.h +258 -0
  688. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/span.h +93 -0
  689. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tag.h +71 -0
  690. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_layout.h +79 -0
  691. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
  692. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/memory_manager.h +113 -0
  693. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method.h +387 -0
  694. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/method_meta.h +251 -0
  695. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/program.h +320 -0
  696. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/pte_data_map.h +144 -0
  697. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/executor/tensor_parser.h +156 -0
  698. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
  699. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/kernel/operator_registry.h +278 -0
  700. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/abort.h +36 -0
  701. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/assert.h +119 -0
  702. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/clock.h +43 -0
  703. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compat_unistd.h +75 -0
  704. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/compiler.h +191 -0
  705. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/log.h +177 -0
  706. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/platform.h +133 -0
  707. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/profiler.h +292 -0
  708. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/runtime.h +35 -0
  709. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/system.h +49 -0
  710. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/runtime/platform/types.h +24 -0
  711. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/executorch/schema/extended_header.h +76 -0
  712. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/Headers/module.modulemap +5 -0
  713. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64/libexecutorch_ios.a +0 -0
  714. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Export.h +163 -0
  715. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/macros/Macros.h +497 -0
  716. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-inl.h +342 -0
  717. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16-math.h +266 -0
  718. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/BFloat16.h +125 -0
  719. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half-inl.h +347 -0
  720. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/Half.h +416 -0
  721. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/TypeSafeSignMath.h +133 -0
  722. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/bit_cast.h +43 -0
  723. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/floating_point_utils.h +33 -0
  724. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/c10/util/irange.h +107 -0
  725. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorch.h +13 -0
  726. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchError.h +16 -0
  727. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchLog.h +76 -0
  728. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchModule.h +286 -0
  729. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchTensor.h +742 -0
  730. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/ExecuTorchValue.h +219 -0
  731. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/module/module.h +492 -0
  732. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor.h +13 -0
  733. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_accessor.h +190 -0
  734. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr.h +347 -0
  735. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/extension/tensor/tensor_ptr_maker.h +653 -0
  736. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_execution_context.h +71 -0
  737. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/backend_init_context.h +72 -0
  738. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/backend/interface.h +166 -0
  739. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/array_ref.h +235 -0
  740. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/data_loader.h +136 -0
  741. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/defines.h +20 -0
  742. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/error.h +229 -0
  743. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/evalue.h +521 -0
  744. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer.h +565 -0
  745. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks.h +323 -0
  746. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/event_tracer_hooks_delegate.h +197 -0
  747. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/exec_aten.h +147 -0
  748. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/dim_order_util.h +263 -0
  749. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/scalar_type_util.h +1331 -0
  750. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_dimension_limit.h +21 -0
  751. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_shape_to_c_string.h +69 -0
  752. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/exec_aten/util/tensor_util.h +1250 -0
  753. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/freeable_buffer.h +107 -0
  754. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/hierarchical_allocator.h +107 -0
  755. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/memory_allocator.h +198 -0
  756. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/named_data_map.h +86 -0
  757. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16.h +27 -0
  758. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bfloat16_math.h +14 -0
  759. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/bits_types.h +83 -0
  760. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +163 -0
  761. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +497 -0
  762. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +342 -0
  763. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-math.h +266 -0
  764. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +125 -0
  765. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +347 -0
  766. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/Half.h +416 -0
  767. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +133 -0
  768. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +43 -0
  769. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +33 -0
  770. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/c10/c10/util/irange.h +107 -0
  771. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/complex.h +44 -0
  772. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/device.h +70 -0
  773. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/half.h +27 -0
  774. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/optional.h +36 -0
  775. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/qint_types.h +83 -0
  776. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar.h +110 -0
  777. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/scalar_type.h +154 -0
  778. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/string_view.h +29 -0
  779. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor.h +142 -0
  780. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_impl.h +261 -0
  781. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/portable_type/tensor_options.h +60 -0
  782. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/result.h +258 -0
  783. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/span.h +93 -0
  784. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tag.h +71 -0
  785. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_layout.h +79 -0
  786. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/core/tensor_shape_dynamism.h +39 -0
  787. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/memory_manager.h +113 -0
  788. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method.h +387 -0
  789. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/method_meta.h +251 -0
  790. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/program.h +320 -0
  791. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/pte_data_map.h +144 -0
  792. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/executor/tensor_parser.h +156 -0
  793. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/kernel_runtime_context.h +122 -0
  794. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/kernel/operator_registry.h +278 -0
  795. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/abort.h +36 -0
  796. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/assert.h +119 -0
  797. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/clock.h +43 -0
  798. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compat_unistd.h +75 -0
  799. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/compiler.h +191 -0
  800. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/log.h +177 -0
  801. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/platform.h +133 -0
  802. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/profiler.h +292 -0
  803. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/runtime.h +35 -0
  804. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/system.h +49 -0
  805. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/runtime/platform/types.h +24 -0
  806. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/executorch/schema/extended_header.h +76 -0
  807. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/Headers/module.modulemap +5 -0
  808. package/third-party/ios/ExecutorchLib/frameworks/executorch.xcframework/ios-arm64-simulator/libexecutorch_simulator.a +0 -0
  809. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/Info.plist +43 -0
  810. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64/libkernels_custom_ios.a +0 -0
  811. package/third-party/ios/ExecutorchLib/frameworks/kernels_custom.xcframework/ios-arm64-simulator/libkernels_custom_simulator.a +0 -0
  812. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/Info.plist +43 -0
  813. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64/libkernels_optimized_ios.a +0 -0
  814. package/third-party/ios/ExecutorchLib/frameworks/kernels_optimized.xcframework/ios-arm64-simulator/libkernels_optimized_simulator.a +0 -0
  815. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/Info.plist +43 -0
  816. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64/libkernels_portable_ios.a +0 -0
  817. package/third-party/ios/ExecutorchLib/frameworks/kernels_portable.xcframework/ios-arm64-simulator/libkernels_portable_simulator.a +0 -0
  818. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/Info.plist +43 -0
  819. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64/libkernels_quantized_ios.a +0 -0
  820. package/third-party/ios/ExecutorchLib/frameworks/kernels_quantized.xcframework/ios-arm64-simulator/libkernels_quantized_simulator.a +0 -0
  821. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/Info.plist +43 -0
  822. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/bitmap256.h +82 -0
  823. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/filtered_re2.h +111 -0
  824. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/pod_array.h +43 -0
  825. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter.h +130 -0
  826. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prefilter_tree.h +139 -0
  827. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/prog.h +483 -0
  828. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/re2.h +994 -0
  829. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/regexp.h +692 -0
  830. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/set.h +85 -0
  831. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_array.h +367 -0
  832. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/sparse_set.h +241 -0
  833. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/stringpiece.h +205 -0
  834. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_casefold.h +78 -0
  835. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/unicode_groups.h +64 -0
  836. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Headers/walker-inl.h +235 -0
  837. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/Info.plist +26 -0
  838. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64/re2.framework/re2 +0 -0
  839. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/bitmap256.h +82 -0
  840. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/filtered_re2.h +111 -0
  841. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/pod_array.h +43 -0
  842. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter.h +130 -0
  843. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prefilter_tree.h +139 -0
  844. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/prog.h +483 -0
  845. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/re2.h +994 -0
  846. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/regexp.h +692 -0
  847. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/set.h +85 -0
  848. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_array.h +367 -0
  849. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/sparse_set.h +241 -0
  850. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/stringpiece.h +205 -0
  851. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_casefold.h +78 -0
  852. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/unicode_groups.h +64 -0
  853. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Headers/walker-inl.h +235 -0
  854. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/Info.plist +26 -0
  855. package/third-party/ios/ExecutorchLib/frameworks/re2.xcframework/ios-arm64-simulator/re2.framework/re2 +0 -0
  856. package/third-party/ios/ios.toolchain.cmake +1122 -0
  857. package/LICENSE +0 -79
  858. package/README.md +0 -148
  859. package/android/src/main/java/com/swmansion/rnexecutorch/Classification.kt +0 -64
  860. package/android/src/main/java/com/swmansion/rnexecutorch/ETModule.kt +0 -90
  861. package/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt +0 -58
  862. package/android/src/main/java/com/swmansion/rnexecutorch/OCR.kt +0 -90
  863. package/android/src/main/java/com/swmansion/rnexecutorch/ObjectDetection.kt +0 -64
  864. package/android/src/main/java/com/swmansion/rnexecutorch/SpeechToText.kt +0 -91
  865. package/android/src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt +0 -54
  866. package/android/src/main/java/com/swmansion/rnexecutorch/TextEmbeddings.kt +0 -51
  867. package/android/src/main/java/com/swmansion/rnexecutorch/Tokenizer.kt +0 -86
  868. package/android/src/main/java/com/swmansion/rnexecutorch/VerticalOCR.kt +0 -179
  869. package/android/src/main/java/com/swmansion/rnexecutorch/models/BaseModel.kt +0 -54
  870. package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsModel.kt +0 -48
  871. package/android/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsUtils.kt +0 -37
  872. package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt +0 -46
  873. package/android/src/main/java/com/swmansion/rnexecutorch/models/classification/Constants.kt +0 -1005
  874. package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt +0 -26
  875. package/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt +0 -142
  876. package/android/src/main/java/com/swmansion/rnexecutorch/models/objectDetection/SSDLiteLargeModel.kt +0 -74
  877. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Detector.kt +0 -82
  878. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/RecognitionHandler.kt +0 -117
  879. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/Recognizer.kt +0 -51
  880. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/VerticalDetector.kt +0 -89
  881. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/CTCLabelConverter.kt +0 -58
  882. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/Constants.kt +0 -31
  883. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/DetectorUtils.kt +0 -608
  884. package/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/RecognizerUtils.kt +0 -430
  885. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TDecoder.kt +0 -39
  886. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/BaseS2TModule.kt +0 -43
  887. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Moonshine.kt +0 -16
  888. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineDecoder.kt +0 -23
  889. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/MoonshineEncoder.kt +0 -20
  890. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/Whisper.kt +0 -16
  891. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperDecoder.kt +0 -22
  892. package/android/src/main/java/com/swmansion/rnexecutorch/models/speechToText/WhisperEncoder.kt +0 -29
  893. package/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt +0 -43
  894. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ArrayUtils.kt +0 -87
  895. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ETError.kt +0 -34
  896. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ImageProcessor.kt +0 -237
  897. package/android/src/main/java/com/swmansion/rnexecutorch/utils/Numerical.kt +0 -8
  898. package/android/src/main/java/com/swmansion/rnexecutorch/utils/ObjectDetectionUtils.kt +0 -201
  899. package/android/src/main/java/com/swmansion/rnexecutorch/utils/STFT.kt +0 -50
  900. package/android/src/main/java/com/swmansion/rnexecutorch/utils/TensorUtils.kt +0 -103
  901. package/ios/RnExecutorch/Classification.h +0 -5
  902. package/ios/RnExecutorch/Classification.mm +0 -54
  903. package/ios/RnExecutorch/ETModule.h +0 -5
  904. package/ios/RnExecutorch/ETModule.mm +0 -75
  905. package/ios/RnExecutorch/ImageSegmentation.h +0 -5
  906. package/ios/RnExecutorch/ImageSegmentation.mm +0 -60
  907. package/ios/RnExecutorch/OCR.h +0 -5
  908. package/ios/RnExecutorch/OCR.mm +0 -96
  909. package/ios/RnExecutorch/ObjectDetection.h +0 -5
  910. package/ios/RnExecutorch/ObjectDetection.mm +0 -56
  911. package/ios/RnExecutorch/SpeechToText.h +0 -5
  912. package/ios/RnExecutorch/SpeechToText.mm +0 -125
  913. package/ios/RnExecutorch/StyleTransfer.h +0 -5
  914. package/ios/RnExecutorch/StyleTransfer.mm +0 -55
  915. package/ios/RnExecutorch/TextEmbeddings.h +0 -5
  916. package/ios/RnExecutorch/TextEmbeddings.mm +0 -62
  917. package/ios/RnExecutorch/Tokenizer.h +0 -5
  918. package/ios/RnExecutorch/Tokenizer.mm +0 -83
  919. package/ios/RnExecutorch/VerticalOCR.h +0 -5
  920. package/ios/RnExecutorch/VerticalOCR.mm +0 -183
  921. package/ios/RnExecutorch/models/BaseModel.h +0 -21
  922. package/ios/RnExecutorch/models/BaseModel.mm +0 -43
  923. package/ios/RnExecutorch/models/classification/ClassificationModel.h +0 -10
  924. package/ios/RnExecutorch/models/classification/ClassificationModel.mm +0 -53
  925. package/ios/RnExecutorch/models/classification/Constants.h +0 -3
  926. package/ios/RnExecutorch/models/image_segmentation/Constants.h +0 -4
  927. package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +0 -10
  928. package/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +0 -146
  929. package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.hpp +0 -11
  930. package/ios/RnExecutorch/models/object_detection/SSDLiteLargeModel.mm +0 -64
  931. package/ios/RnExecutorch/models/ocr/Detector.h +0 -9
  932. package/ios/RnExecutorch/models/ocr/Detector.mm +0 -101
  933. package/ios/RnExecutorch/models/ocr/RecognitionHandler.h +0 -16
  934. package/ios/RnExecutorch/models/ocr/RecognitionHandler.mm +0 -135
  935. package/ios/RnExecutorch/models/ocr/Recognizer.h +0 -8
  936. package/ios/RnExecutorch/models/ocr/Recognizer.mm +0 -77
  937. package/ios/RnExecutorch/models/ocr/VerticalDetector.h +0 -10
  938. package/ios/RnExecutorch/models/ocr/VerticalDetector.mm +0 -118
  939. package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.h +0 -16
  940. package/ios/RnExecutorch/models/ocr/utils/CTCLabelConverter.mm +0 -80
  941. package/ios/RnExecutorch/models/ocr/utils/Constants.h +0 -26
  942. package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.h +0 -31
  943. package/ios/RnExecutorch/models/ocr/utils/DetectorUtils.mm +0 -754
  944. package/ios/RnExecutorch/models/ocr/utils/OCRUtils.h +0 -10
  945. package/ios/RnExecutorch/models/ocr/utils/OCRUtils.mm +0 -67
  946. package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.h +0 -35
  947. package/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.mm +0 -331
  948. package/ios/RnExecutorch/models/stt/Moonshine.hpp +0 -13
  949. package/ios/RnExecutorch/models/stt/Moonshine.mm +0 -64
  950. package/ios/RnExecutorch/models/stt/MoonshineDecoder.hpp +0 -16
  951. package/ios/RnExecutorch/models/stt/MoonshineDecoder.mm +0 -24
  952. package/ios/RnExecutorch/models/stt/MoonshineEncoder.hpp +0 -15
  953. package/ios/RnExecutorch/models/stt/MoonshineEncoder.mm +0 -18
  954. package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.hpp +0 -26
  955. package/ios/RnExecutorch/models/stt/SpeechToTextBaseModel.mm +0 -19
  956. package/ios/RnExecutorch/models/stt/Whisper.hpp +0 -12
  957. package/ios/RnExecutorch/models/stt/Whisper.mm +0 -68
  958. package/ios/RnExecutorch/models/stt/WhisperDecoder.hpp +0 -16
  959. package/ios/RnExecutorch/models/stt/WhisperDecoder.mm +0 -22
  960. package/ios/RnExecutorch/models/stt/WhisperEncoder.hpp +0 -15
  961. package/ios/RnExecutorch/models/stt/WhisperEncoder.mm +0 -21
  962. package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h +0 -11
  963. package/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm +0 -50
  964. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.h +0 -15
  965. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsModel.mm +0 -45
  966. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.h +0 -8
  967. package/ios/RnExecutorch/models/text_embeddings/TextEmbeddingsUtils.mm +0 -49
  968. package/ios/RnExecutorch/utils/Constants.h +0 -8
  969. package/ios/RnExecutorch/utils/ObjectDetectionUtils.hpp +0 -23
  970. package/ios/RnExecutorch/utils/SFFT.hpp +0 -13
  971. package/ios/RnExecutorch/utils/SFFT.mm +0 -71
  972. package/lib/module/native/NativeClassification.js +0 -5
  973. package/lib/module/native/NativeClassification.js.map +0 -1
  974. package/lib/module/native/NativeETModule.js +0 -5
  975. package/lib/module/native/NativeETModule.js.map +0 -1
  976. package/lib/module/native/NativeImageSegmentation.js +0 -5
  977. package/lib/module/native/NativeImageSegmentation.js.map +0 -1
  978. package/lib/module/native/NativeOCR.js +0 -5
  979. package/lib/module/native/NativeOCR.js.map +0 -1
  980. package/lib/module/native/NativeObjectDetection.js +0 -5
  981. package/lib/module/native/NativeObjectDetection.js.map +0 -1
  982. package/lib/module/native/NativeSpeechToText.js +0 -5
  983. package/lib/module/native/NativeSpeechToText.js.map +0 -1
  984. package/lib/module/native/NativeStyleTransfer.js +0 -5
  985. package/lib/module/native/NativeStyleTransfer.js.map +0 -1
  986. package/lib/module/native/NativeTextEmbeddings.js +0 -5
  987. package/lib/module/native/NativeTextEmbeddings.js.map +0 -1
  988. package/lib/module/native/NativeTokenizer.js +0 -5
  989. package/lib/module/native/NativeTokenizer.js.map +0 -1
  990. package/lib/module/native/NativeVerticalOCR.js +0 -5
  991. package/lib/module/native/NativeVerticalOCR.js.map +0 -1
  992. package/lib/module/package.json +0 -1
  993. package/lib/typescript/native/NativeClassification.d.ts +0 -10
  994. package/lib/typescript/native/NativeClassification.d.ts.map +0 -1
  995. package/lib/typescript/native/NativeETModule.d.ts +0 -9
  996. package/lib/typescript/native/NativeETModule.d.ts.map +0 -1
  997. package/lib/typescript/native/NativeImageSegmentation.d.ts +0 -10
  998. package/lib/typescript/native/NativeImageSegmentation.d.ts.map +0 -1
  999. package/lib/typescript/native/NativeOCR.d.ts +0 -9
  1000. package/lib/typescript/native/NativeOCR.d.ts.map +0 -1
  1001. package/lib/typescript/native/NativeObjectDetection.d.ts +0 -9
  1002. package/lib/typescript/native/NativeObjectDetection.d.ts.map +0 -1
  1003. package/lib/typescript/native/NativeSpeechToText.d.ts +0 -12
  1004. package/lib/typescript/native/NativeSpeechToText.d.ts.map +0 -1
  1005. package/lib/typescript/native/NativeStyleTransfer.d.ts.map +0 -1
  1006. package/lib/typescript/native/NativeTextEmbeddings.d.ts +0 -8
  1007. package/lib/typescript/native/NativeTextEmbeddings.d.ts.map +0 -1
  1008. package/lib/typescript/native/NativeTokenizer.d.ts +0 -12
  1009. package/lib/typescript/native/NativeTokenizer.d.ts.map +0 -1
  1010. package/lib/typescript/native/NativeVerticalOCR.d.ts +0 -9
  1011. package/lib/typescript/native/NativeVerticalOCR.d.ts.map +0 -1
  1012. package/src/native/NativeClassification.ts +0 -9
  1013. package/src/native/NativeETModule.ts +0 -14
  1014. package/src/native/NativeImageSegmentation.ts +0 -14
  1015. package/src/native/NativeOCR.ts +0 -16
  1016. package/src/native/NativeObjectDetection.ts +0 -10
  1017. package/src/native/NativeSpeechToText.ts +0 -17
  1018. package/src/native/NativeStyleTransfer.ts +0 -10
  1019. package/src/native/NativeTextEmbeddings.ts +0 -9
  1020. package/src/native/NativeTokenizer.ts +0 -13
  1021. package/src/native/NativeVerticalOCR.ts +0 -16
@@ -0,0 +1,2559 @@
1
+ // This file is part of OpenCV project.
2
+ // It is subject to the license terms in the LICENSE file found in the top-level
3
+ // directory of this distribution and at http://opencv.org/license.html.
4
+
5
+ // The original implementation is contributed by HAN Liutong.
6
+ // Copyright (C) 2022, Institute of Software, Chinese Academy of Sciences.
7
+
8
+ #ifndef OPENCV_HAL_INTRIN_RVV_SCALABLE_HPP
9
+ #define OPENCV_HAL_INTRIN_RVV_SCALABLE_HPP
10
+
11
+ #include <opencv2/core/check.hpp>
12
+
13
+ #if defined(__GNUC__) && !defined(__clang__)
14
+ // FIXIT: eliminate massive warnigs from templates
15
+ // GCC from 'rvv-next': riscv64-unknown-linux-gnu-g++ (g42df3464463) 12.0.1
16
+ // 20220505 (prerelease) doesn't work: #pragma GCC diagnostic push
17
+ #pragma GCC diagnostic ignored "-Wignored-attributes"
18
+ #endif
19
+
20
+ #ifndef CV_RVV_MAX_VLEN
21
+ #define CV_RVV_MAX_VLEN 1024
22
+ #endif
23
+
24
+ namespace cv {
25
+
26
+ //! @cond IGNORED
27
+
28
+ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
29
+
30
+ #define CV_SIMD_SCALABLE 1
31
+ #define CV_SIMD_SCALABLE_64F 1
32
+
33
+ using v_uint8 = vuint8m2_t;
34
+ using v_int8 = vint8m2_t;
35
+ using v_uint16 = vuint16m2_t;
36
+ using v_int16 = vint16m2_t;
37
+ using v_uint32 = vuint32m2_t;
38
+ using v_int32 = vint32m2_t;
39
+ using v_uint64 = vuint64m2_t;
40
+ using v_int64 = vint64m2_t;
41
+
42
+ using v_float32 = vfloat32m2_t;
43
+ #if CV_SIMD_SCALABLE_64F
44
+ using v_float64 = vfloat64m2_t;
45
+ #endif
46
+
47
+ using uchar = unsigned char;
48
+ using schar = signed char;
49
+ using ushort = unsigned short;
50
+ using uint = unsigned int;
51
+ using uint64 = unsigned long int;
52
+ using int64 = long int;
53
+
54
+ template <class T> struct VTraits;
55
+
56
+ #define OPENCV_HAL_IMPL_RVV_TRAITS(REG, TYP, SUF, SZ) \
57
+ template <> struct VTraits<REG> { \
58
+ static inline int vlanes() { return __riscv_vsetvlmax_##SUF(); } \
59
+ using lane_type = TYP; \
60
+ static const int max_nlanes = CV_RVV_MAX_VLEN / SZ; \
61
+ };
62
+
63
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint8m1_t, int8_t, e8m1, 8)
64
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint8m2_t, int8_t, e8m2, 8)
65
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint8m4_t, int8_t, e8m4, 8)
66
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint8m8_t, int8_t, e8m8, 8)
67
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m1_t, uint8_t, e8m1, 8)
68
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m2_t, uint8_t, e8m2, 8)
69
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m4_t, uint8_t, e8m4, 8)
70
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m8_t, uint8_t, e8m8, 8)
71
+
72
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint16m1_t, int16_t, e16m1, 16)
73
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint16m2_t, int16_t, e16m2, 16)
74
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint16m4_t, int16_t, e16m4, 16)
75
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint16m8_t, int16_t, e16m8, 16)
76
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m1_t, uint16_t, e16m1, 16)
77
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m2_t, uint16_t, e16m2, 16)
78
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m4_t, uint16_t, e16m4, 16)
79
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m8_t, uint16_t, e16m8, 16)
80
+
81
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint32m1_t, int32_t, e32m1, 32)
82
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint32m2_t, int32_t, e32m2, 32)
83
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint32m4_t, int32_t, e32m4, 32)
84
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint32m8_t, int32_t, e32m8, 32)
85
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m1_t, uint32_t, e32m1, 32)
86
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m2_t, uint32_t, e32m2, 32)
87
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m4_t, uint32_t, e32m4, 32)
88
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m8_t, uint32_t, e32m8, 32)
89
+
90
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint64m1_t, int64_t, e64m1, 64)
91
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint64m2_t, int64_t, e64m2, 64)
92
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint64m4_t, int64_t, e64m4, 64)
93
+ OPENCV_HAL_IMPL_RVV_TRAITS(vint64m8_t, int64_t, e64m8, 64)
94
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m1_t, uint64_t, e64m1, 64)
95
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m2_t, uint64_t, e64m2, 64)
96
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m4_t, uint64_t, e64m4, 64)
97
+ OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m8_t, uint64_t, e64m8, 64)
98
+
99
+ OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m1_t, float, e32m1, 32)
100
+ OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m2_t, float, e32m2, 32)
101
+ OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m4_t, float, e32m4, 32)
102
+ OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m8_t, float, e32m8, 32)
103
+
104
+ #if CV_SIMD_SCALABLE_64F
105
+ OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m1_t, double, e64m1, 64)
106
+ OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m2_t, double, e64m2, 64)
107
+ OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m4_t, double, e64m4, 64)
108
+ OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m8_t, double, e64m8, 64)
109
+ #endif
110
+
111
+ // LLVM/Clang defines "overloaded intrinsics" e.g. 'vand(op1, op2)'
112
+ // GCC does not have these functions, so we need to implement them manually
113
+ // We implement only selected subset required to build current state of the code
114
+ // Included inside namespace cv::
115
+ // #ifndef __riscv_v_intrinsic_overloading
116
+ // #include "intrin_rvv_compat_overloaded.hpp"
117
+ // #endif // __riscv_v_intrinsic_overloading
118
+
119
+ //////////// get0 ////////////
120
+ #define OPENCV_HAL_IMPL_RVV_GRT0_INT(_Tpvec, _Tp) \
121
+ inline _Tp v_get0(const v_##_Tpvec &v) { return __riscv_vmv_x(v); }
122
+
123
+ OPENCV_HAL_IMPL_RVV_GRT0_INT(uint8, uchar)
124
+ OPENCV_HAL_IMPL_RVV_GRT0_INT(int8, schar)
125
+ OPENCV_HAL_IMPL_RVV_GRT0_INT(uint16, ushort)
126
+ OPENCV_HAL_IMPL_RVV_GRT0_INT(int16, short)
127
+ OPENCV_HAL_IMPL_RVV_GRT0_INT(uint32, unsigned)
128
+ OPENCV_HAL_IMPL_RVV_GRT0_INT(int32, int)
129
+ OPENCV_HAL_IMPL_RVV_GRT0_INT(uint64, uint64)
130
+ OPENCV_HAL_IMPL_RVV_GRT0_INT(int64, int64)
131
+
132
+ inline float v_get0(const v_float32 &v) { return __riscv_vfmv_f(v); }
133
+ #if CV_SIMD_SCALABLE_64F
134
+ inline double v_get0(const v_float64 &v) { return __riscv_vfmv_f(v); }
135
+ #endif
136
+
137
+ //////////// Initial ////////////
138
+
139
+ #define OPENCV_HAL_IMPL_RVV_INIT_INTEGER(_Tpvec, _Tp, suffix1, suffix2, vl) \
140
+ inline v_##_Tpvec v_setzero_##suffix1() { \
141
+ return __riscv_vmv_v_x_##suffix2##m2(0, vl); \
142
+ } \
143
+ inline v_##_Tpvec v_setall_##suffix1(_Tp v) { \
144
+ return __riscv_vmv_v_x_##suffix2##m2(v, vl); \
145
+ } \
146
+ template <> inline v_##_Tpvec v_setzero_() { return v_setzero_##suffix1(); } \
147
+ template <> inline v_##_Tpvec v_setall_(_Tp v) { \
148
+ return v_setall_##suffix1(v); \
149
+ }
150
+
151
+ OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8, uchar, u8, u8,
152
+ VTraits<v_int8>::vlanes())
153
+ OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int8, schar, s8, i8, VTraits<v_int8>::vlanes())
154
+ OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint16, ushort, u16, u16,
155
+ VTraits<v_uint16>::vlanes())
156
+ OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int16, short, s16, i16,
157
+ VTraits<v_int16>::vlanes())
158
+ OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint32, uint, u32, u32,
159
+ VTraits<v_uint32>::vlanes())
160
+ OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int32, int, s32, i32,
161
+ VTraits<v_int32>::vlanes())
162
+ OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint64, uint64, u64, u64,
163
+ VTraits<v_uint64>::vlanes())
164
+ OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int64, int64, s64, i64,
165
+ VTraits<v_int64>::vlanes())
166
+
167
+ #define OPENCV_HAL_IMPL_RVV_INIT_FP(_Tpv, _Tp, suffix, vl) \
168
+ inline v_##_Tpv v_setzero_##suffix() { \
169
+ return __riscv_vfmv_v_f_##suffix##m2(0, vl); \
170
+ } \
171
+ inline v_##_Tpv v_setall_##suffix(_Tp v) { \
172
+ return __riscv_vfmv_v_f_##suffix##m2(v, vl); \
173
+ } \
174
+ template <> inline v_##_Tpv v_setzero_() { return v_setzero_##suffix(); } \
175
+ template <> inline v_##_Tpv v_setall_(_Tp v) { return v_setall_##suffix(v); }
176
+
177
+ OPENCV_HAL_IMPL_RVV_INIT_FP(float32, float, f32, VTraits<v_float32>::vlanes())
178
+ #if CV_SIMD_SCALABLE_64F
179
+ OPENCV_HAL_IMPL_RVV_INIT_FP(float64, double, f64, VTraits<v_float64>::vlanes())
180
+ #endif
181
+
182
+ //////////// Reinterpret ////////////
183
+ #define OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(_Tpvec1, suffix1) \
184
+ inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec1 &v) { \
185
+ return v; \
186
+ }
187
+ OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(uint8, u8)
188
+ OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(uint16, u16)
189
+ OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(uint32, u32)
190
+ OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(uint64, u64)
191
+ OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(int8, s8)
192
+ OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(int16, s16)
193
+ OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(int32, s32)
194
+ OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(int64, s64)
195
+ OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(float32, f32)
196
+ #if CV_SIMD_SCALABLE_64F
197
+ OPENCV_HAL_IMPL_RVV_NOTHING_REINTERPRET(float64, f64)
198
+ #endif
199
+ // TODO: can be simplified by using overloaded RV intrinsic
200
+ #define OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(_Tpvec1, _Tpvec2, suffix1, \
201
+ suffix2, nsuffix1, nsuffix2) \
202
+ inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2 &v) { \
203
+ return v_##_Tpvec1( \
204
+ __riscv_vreinterpret_v_##nsuffix2##m2_##nsuffix1##m2(v)); \
205
+ } \
206
+ inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1 &v) { \
207
+ return v_##_Tpvec2( \
208
+ __riscv_vreinterpret_v_##nsuffix1##m2_##nsuffix2##m2(v)); \
209
+ }
210
+
211
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8, int8, u8, s8, u8, i8)
212
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16, int16, u16, s16, u16, i16)
213
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32, int32, u32, s32, u32, i32)
214
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32, float32, u32, f32, u32, f32)
215
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int32, float32, s32, f32, i32, f32)
216
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint64, int64, u64, s64, u64, i64)
217
+ #if CV_SIMD_SCALABLE_64F
218
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint64, float64, u64, f64, u64, f64)
219
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int64, float64, s64, f64, i64, f64)
220
+ #endif
221
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8, uint16, u8, u16, u8, u16)
222
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8, uint32, u8, u32, u8, u32)
223
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint8, uint64, u8, u64, u8, u64)
224
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16, uint32, u16, u32, u16, u32)
225
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint16, uint64, u16, u64, u16, u64)
226
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(uint32, uint64, u32, u64, u32, u64)
227
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8, int16, s8, s16, i8, i16)
228
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8, int32, s8, s32, i8, i32)
229
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int8, int64, s8, s64, i8, i64)
230
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int16, int32, s16, s32, i16, i32)
231
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int16, int64, s16, s64, i16, i64)
232
+ OPENCV_HAL_IMPL_RVV_NATIVE_REINTERPRET(int32, int64, s32, s64, i32, i64)
233
+
234
+ #define OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET( \
235
+ _Tpvec1, _Tpvec2, suffix1, suffix2, nsuffix1, nsuffix2, width1, width2) \
236
+ inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2 &v) { \
237
+ return __riscv_vreinterpret_v_##nsuffix1##width2##m2_##nsuffix1##width1##m2( \
238
+ __riscv_vreinterpret_v_##nsuffix2##width2##m2_##nsuffix1##width2##m2( \
239
+ v)); \
240
+ } \
241
+ inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1 &v) { \
242
+ return __riscv_vreinterpret_v_##nsuffix1##width2##m2_##nsuffix2##width2##m2( \
243
+ __riscv_vreinterpret_v_##nsuffix1##width1##m2_##nsuffix1##width2##m2( \
244
+ v)); \
245
+ }
246
+
247
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, int16, u8, s16, u, i, 8, 16)
248
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, int32, u8, s32, u, i, 8, 32)
249
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, int64, u8, s64, u, i, 8, 64)
250
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, int8, u16, s8, u, i, 16, 8)
251
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, int32, u16, s32, u, i, 16, 32)
252
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, int64, u16, s64, u, i, 16, 64)
253
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32, int8, u32, s8, u, i, 32, 8)
254
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32, int16, u32, s16, u, i, 32, 16)
255
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32, int64, u32, s64, u, i, 32, 64)
256
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint64, int8, u64, s8, u, i, 64, 8)
257
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint64, int16, u64, s16, u, i, 64, 16)
258
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint64, int32, u64, s32, u, i, 64, 32)
259
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, float32, u8, f32, u, f, 8, 32)
260
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, float32, u16, f32, u, f, 16,
261
+ 32)
262
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint64, float32, u64, f32, u, f, 64,
263
+ 32)
264
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int8, float32, s8, f32, i, f, 8, 32)
265
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int16, float32, s16, f32, i, f, 16,
266
+ 32)
267
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int64, float32, s64, f32, i, f, 64,
268
+ 32)
269
+ #if CV_SIMD_SCALABLE_64F
270
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8, float64, u8, f64, u, f, 8, 64)
271
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16, float64, u16, f64, u, f, 16,
272
+ 64)
273
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32, float64, u32, f64, u, f, 32,
274
+ 64)
275
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int8, float64, s8, f64, i, f, 8, 64)
276
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int16, float64, s16, f64, i, f, 16,
277
+ 64)
278
+ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int32, float64, s32, f64, i, f, 32,
279
+ 64)
280
+ // Three times reinterpret
281
+ inline v_float32 v_reinterpret_as_f32(const v_float64 &v) {
282
+ return __riscv_vreinterpret_v_u32m2_f32m2(__riscv_vreinterpret_v_u64m2_u32m2(
283
+ __riscv_vreinterpret_v_f64m2_u64m2(v)));
284
+ }
285
+
286
+ inline v_float64 v_reinterpret_as_f64(const v_float32 &v) {
287
+ return __riscv_vreinterpret_v_u64m2_f64m2(__riscv_vreinterpret_v_u32m2_u64m2(
288
+ __riscv_vreinterpret_v_f32m2_u32m2(v)));
289
+ }
290
+ #endif
291
+
292
+ //////////// Extract //////////////
293
+
294
+ #define OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(_Tpvec, _Tp, vl) \
295
+ template <int s = 0> \
296
+ inline _Tpvec v_extract(const _Tpvec &a, const _Tpvec &b, int i = s) { \
297
+ return __riscv_vslideup(__riscv_vslidedown(a, i, vl), b, \
298
+ VTraits<_Tpvec>::vlanes() - i, vl); \
299
+ } \
300
+ template <int s = 0> inline _Tp v_extract_n(_Tpvec v, int i = s) { \
301
+ return __riscv_vmv_x(__riscv_vslidedown(v, i, vl)); \
302
+ }
303
+
304
+ OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint8, uchar, VTraits<v_uint8>::vlanes())
305
+ OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int8, schar, VTraits<v_int8>::vlanes())
306
+ OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint16, ushort,
307
+ VTraits<v_uint16>::vlanes())
308
+ OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int16, short, VTraits<v_int16>::vlanes())
309
+ OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint32, unsigned int,
310
+ VTraits<v_uint32>::vlanes())
311
+ OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int32, int, VTraits<v_int32>::vlanes())
312
+ OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint64, uint64,
313
+ VTraits<v_uint64>::vlanes())
314
+ OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int64, int64, VTraits<v_int64>::vlanes())
315
+
316
+ #define OPENCV_HAL_IMPL_RVV_EXTRACT_FP(_Tpvec, _Tp, vl) \
317
+ template <int s = 0> \
318
+ inline _Tpvec v_extract(const _Tpvec &a, const _Tpvec &b, int i = s) { \
319
+ return __riscv_vslideup(__riscv_vslidedown(a, i, vl), b, \
320
+ VTraits<_Tpvec>::vlanes() - i, vl); \
321
+ } \
322
+ template <int s = 0> inline _Tp v_extract_n(_Tpvec v, int i = s) { \
323
+ return __riscv_vfmv_f(__riscv_vslidedown(v, i, vl)); \
324
+ }
325
+
326
+ OPENCV_HAL_IMPL_RVV_EXTRACT_FP(v_float32, float, VTraits<v_float32>::vlanes())
327
+ #if CV_SIMD_SCALABLE_64F
328
+ OPENCV_HAL_IMPL_RVV_EXTRACT_FP(v_float64, double, VTraits<v_float64>::vlanes())
329
+ #endif
330
+
331
+ #define OPENCV_HAL_IMPL_RVV_EXTRACT(_Tpvec, _Tp, vl) \
332
+ inline _Tp v_extract_highest(_Tpvec v) { return v_extract_n(v, vl - 1); }
333
+
334
+ OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint8, uchar, VTraits<v_uint8>::vlanes())
335
+ OPENCV_HAL_IMPL_RVV_EXTRACT(v_int8, schar, VTraits<v_int8>::vlanes())
336
+ OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint16, ushort, VTraits<v_uint16>::vlanes())
337
+ OPENCV_HAL_IMPL_RVV_EXTRACT(v_int16, short, VTraits<v_int16>::vlanes())
338
+ OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint32, unsigned int, VTraits<v_uint32>::vlanes())
339
+ OPENCV_HAL_IMPL_RVV_EXTRACT(v_int32, int, VTraits<v_int32>::vlanes())
340
+ OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint64, uint64, VTraits<v_uint64>::vlanes())
341
+ OPENCV_HAL_IMPL_RVV_EXTRACT(v_int64, int64, VTraits<v_int64>::vlanes())
342
+ OPENCV_HAL_IMPL_RVV_EXTRACT(v_float32, float, VTraits<v_float32>::vlanes())
343
+ #if CV_SIMD_SCALABLE_64F
344
+ OPENCV_HAL_IMPL_RVV_EXTRACT(v_float64, double, VTraits<v_float64>::vlanes())
345
+ #endif
346
+
347
+ ////////////// Load/Store //////////////
348
+ #define OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(_Tpvec, _nTpvec, _Tp, hvl, vl, width, \
349
+ suffix) \
350
+ inline _Tpvec v_load(const _Tp *ptr) { \
351
+ return __riscv_vle##width##_v_##suffix##m2(ptr, vl); \
352
+ } \
353
+ inline _Tpvec v_load_aligned(const _Tp *ptr) { \
354
+ return __riscv_vle##width##_v_##suffix##m2(ptr, vl); \
355
+ } \
356
+ inline void v_store(_Tp *ptr, const _Tpvec &a, hal::StoreMode /*mode*/) { \
357
+ __riscv_vse##width##_v_##suffix##m2(ptr, a, vl); \
358
+ } \
359
+ inline _Tpvec v_load_low(const _Tp *ptr) { \
360
+ return __riscv_vle##width##_v_##suffix##m2(ptr, hvl); \
361
+ } \
362
+ inline _Tpvec v_load_halves(const _Tp *ptr0, const _Tp *ptr1) { \
363
+ return __riscv_vslideup(__riscv_vle##width##_v_##suffix##m2(ptr0, hvl), \
364
+ __riscv_vle##width##_v_##suffix##m2(ptr1, hvl), \
365
+ hvl, vl); \
366
+ } \
367
+ inline void v_store(_Tp *ptr, const _Tpvec &a) { \
368
+ __riscv_vse##width(ptr, a, vl); \
369
+ } \
370
+ inline void v_store_aligned(_Tp *ptr, const _Tpvec &a) { \
371
+ __riscv_vse##width(ptr, a, vl); \
372
+ } \
373
+ inline void v_store_aligned_nocache(_Tp *ptr, const _Tpvec &a) { \
374
+ __riscv_vse##width(ptr, a, vl); \
375
+ } \
376
+ inline void v_store_low(_Tp *ptr, const _Tpvec &a) { \
377
+ __riscv_vse##width(ptr, a, hvl); \
378
+ } \
379
+ inline void v_store_high(_Tp *ptr, const _Tpvec &a) { \
380
+ __riscv_vse##width(ptr, __riscv_vslidedown_vx_##suffix##m2(a, hvl, vl), \
381
+ hvl); \
382
+ } \
383
+ template <typename... Targs> _Tpvec v_load_##suffix(Targs... nScalars) { \
384
+ return v_load({nScalars...}); \
385
+ }
386
+
387
+ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint8, vuint8m2_t, uchar,
388
+ VTraits<v_uint8>::vlanes() / 2,
389
+ VTraits<v_uint8>::vlanes(), 8, u8)
390
+ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int8, vint8m2_t, schar,
391
+ VTraits<v_int8>::vlanes() / 2,
392
+ VTraits<v_int8>::vlanes(), 8, i8)
393
+ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint16, vuint16m2_t, ushort,
394
+ VTraits<v_uint16>::vlanes() / 2,
395
+ VTraits<v_uint16>::vlanes(), 16, u16)
396
+ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int16, vint16m2_t, short,
397
+ VTraits<v_int16>::vlanes() / 2,
398
+ VTraits<v_int16>::vlanes(), 16, i16)
399
+ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint32, vuint32m2_t, unsigned int,
400
+ VTraits<v_uint32>::vlanes() / 2,
401
+ VTraits<v_uint32>::vlanes(), 32, u32)
402
+ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int32, vint32m2_t, int,
403
+ VTraits<v_int32>::vlanes() / 2,
404
+ VTraits<v_int32>::vlanes(), 32, i32)
405
+ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint64, vuint64m2_t, uint64,
406
+ VTraits<v_uint64>::vlanes() / 2,
407
+ VTraits<v_uint64>::vlanes(), 64, u64)
408
+ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int64, vint64m2_t, int64,
409
+ VTraits<v_int64>::vlanes() / 2,
410
+ VTraits<v_int64>::vlanes(), 64, i64)
411
+ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float32, vfloat32m2_t, float,
412
+ VTraits<v_float32>::vlanes() / 2,
413
+ VTraits<v_float32>::vlanes(), 32, f32)
414
+
415
+ #if CV_SIMD_SCALABLE_64F
416
+ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float64, vfloat64m2_t, double,
417
+ VTraits<v_float64>::vlanes() / 2,
418
+ VTraits<v_float64>::vlanes(), 64, f64)
419
+ #endif
420
+
421
+ ////////////// Lookup table access ////////////////////
422
+ #define OPENCV_HAL_IMPL_RVV_LUT(_Tpvec, _Tp, suffix) \
423
+ inline _Tpvec v_lut(const _Tp *tab, const int *idx) { \
424
+ auto vidx = __riscv_vmul( \
425
+ __riscv_vreinterpret_u32##suffix( \
426
+ __riscv_vle32_v_i32##suffix(idx, VTraits<_Tpvec>::vlanes())), \
427
+ sizeof(_Tp), VTraits<_Tpvec>::vlanes()); \
428
+ return __riscv_vloxei32(tab, vidx, VTraits<_Tpvec>::vlanes()); \
429
+ }
430
+ OPENCV_HAL_IMPL_RVV_LUT(v_int8, schar, m8)
431
+ OPENCV_HAL_IMPL_RVV_LUT(v_int16, short, m4)
432
+ OPENCV_HAL_IMPL_RVV_LUT(v_int32, int, m2)
433
+ OPENCV_HAL_IMPL_RVV_LUT(v_int64, int64_t, m1)
434
+ OPENCV_HAL_IMPL_RVV_LUT(v_float32, float, m2)
435
+ #if CV_SIMD_SCALABLE_64F
436
+ OPENCV_HAL_IMPL_RVV_LUT(v_float64, double, m1)
437
+ #endif
438
+
439
+ #define OPENCV_HAL_IMPL_RVV_LUT_PAIRS(_Tpvec, _Tp, suffix1, suffix2, v_trunc) \
440
+ inline _Tpvec v_lut_pairs(const _Tp *tab, const int *idx) { \
441
+ auto v0 = __riscv_vle32_v_u32##suffix1((unsigned *)idx, \
442
+ VTraits<_Tpvec>::vlanes() / 2); \
443
+ auto v1 = __riscv_vadd(v0, 1, VTraits<_Tpvec>::vlanes() / 2); \
444
+ auto w0 = __riscv_vwcvtu_x(v0, VTraits<_Tpvec>::vlanes() / 2); \
445
+ auto w1 = __riscv_vwcvtu_x(v1, VTraits<_Tpvec>::vlanes() / 2); \
446
+ auto sh1 = \
447
+ __riscv_vslide1up(v_trunc(__riscv_vreinterpret_u32##suffix2(w1)), 0, \
448
+ VTraits<_Tpvec>::vlanes()); \
449
+ auto vid = \
450
+ __riscv_vor(sh1, v_trunc(__riscv_vreinterpret_u32##suffix2(w0)), \
451
+ VTraits<_Tpvec>::vlanes()); \
452
+ auto vidx = __riscv_vmul(vid, sizeof(_Tp), VTraits<_Tpvec>::vlanes()); \
453
+ return __riscv_vloxei32(tab, vidx, VTraits<_Tpvec>::vlanes()); \
454
+ }
455
+ OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int8, schar, m4, m8, OPENCV_HAL_NOP)
456
+ OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int16, short, m2, m4, OPENCV_HAL_NOP)
457
+ OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int32, int, m1, m2, OPENCV_HAL_NOP)
458
+ OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_float32, float, m1, m2, OPENCV_HAL_NOP)
459
+ OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_int64, int64_t, m1, m2,
460
+ __riscv_vlmul_trunc_u32m1)
461
+ #if CV_SIMD_SCALABLE_64F
462
+ OPENCV_HAL_IMPL_RVV_LUT_PAIRS(v_float64, double, m1, m2,
463
+ __riscv_vlmul_trunc_u32m1)
464
+ #endif
465
+
466
+ #define OPENCV_HAL_IMPL_RVV_LUT_QUADS(_Tpvec, _Tp, suffix0, suffix1, suffix2, \
467
+ v_trunc) \
468
+ inline _Tpvec v_lut_quads(const _Tp *tab, const int *idx) { \
469
+ auto v0 = __riscv_vle32_v_u32##suffix0((unsigned *)idx, \
470
+ VTraits<_Tpvec>::vlanes() / 4); \
471
+ auto v1 = __riscv_vadd(v0, 1, VTraits<_Tpvec>::vlanes() / 4); \
472
+ auto v2 = __riscv_vadd(v0, 2, VTraits<_Tpvec>::vlanes() / 4); \
473
+ auto v3 = __riscv_vadd(v0, 3, VTraits<_Tpvec>::vlanes() / 4); \
474
+ auto w0 = __riscv_vwcvtu_x(v0, VTraits<_Tpvec>::vlanes() / 4); \
475
+ auto w1 = __riscv_vwcvtu_x(v1, VTraits<_Tpvec>::vlanes() / 4); \
476
+ auto w2 = __riscv_vwcvtu_x(v2, VTraits<_Tpvec>::vlanes() / 4); \
477
+ auto w3 = __riscv_vwcvtu_x(v3, VTraits<_Tpvec>::vlanes() / 4); \
478
+ auto sh2 = __riscv_vslide1up(__riscv_vreinterpret_u32##suffix1(w2), 0, \
479
+ VTraits<_Tpvec>::vlanes() / 2); \
480
+ auto sh3 = __riscv_vslide1up(__riscv_vreinterpret_u32##suffix1(w3), 0, \
481
+ VTraits<_Tpvec>::vlanes() / 2); \
482
+ auto vid0 = __riscv_vor(sh2, __riscv_vreinterpret_u32##suffix1(w0), \
483
+ VTraits<_Tpvec>::vlanes() / 2); \
484
+ auto vid1 = __riscv_vor(sh3, __riscv_vreinterpret_u32##suffix1(w1), \
485
+ VTraits<_Tpvec>::vlanes() / 2); \
486
+ auto wid0 = \
487
+ __riscv_vwcvtu_x(v_trunc(vid0), VTraits<_Tpvec>::vlanes() / 2); \
488
+ auto wid1 = \
489
+ __riscv_vwcvtu_x(v_trunc(vid1), VTraits<_Tpvec>::vlanes() / 2); \
490
+ auto shwid1 = __riscv_vslide1up(__riscv_vreinterpret_u32##suffix2(wid1), \
491
+ 0, VTraits<_Tpvec>::vlanes()); \
492
+ auto vid = __riscv_vor(shwid1, __riscv_vreinterpret_u32##suffix2(wid0), \
493
+ VTraits<_Tpvec>::vlanes()); \
494
+ auto vidx = __riscv_vmul(vid, sizeof(_Tp), VTraits<_Tpvec>::vlanes()); \
495
+ return __riscv_vloxei32(tab, vidx, VTraits<_Tpvec>::vlanes()); \
496
+ }
497
+ OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_int8, schar, m2, m4, m8, OPENCV_HAL_NOP)
498
+ OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_int16, short, m1, m2, m4, OPENCV_HAL_NOP)
499
+ OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_int32, int, m1, m2, m2,
500
+ __riscv_vlmul_trunc_u32m1)
501
+ OPENCV_HAL_IMPL_RVV_LUT_QUADS(v_float32, float, m1, m2, m2,
502
+ __riscv_vlmul_trunc_u32m1)
503
+
504
+ #define OPENCV_HAL_IMPL_RVV_LUT_VEC(_Tpvec, _Tp) \
505
+ inline _Tpvec v_lut(const _Tp *tab, const v_int32 &vidx) { \
506
+ v_uint32 vidx_ = __riscv_vmul(__riscv_vreinterpret_u32m2(vidx), \
507
+ sizeof(_Tp), VTraits<v_int32>::vlanes()); \
508
+ return __riscv_vloxei32(tab, vidx_, VTraits<_Tpvec>::vlanes()); \
509
+ }
510
+ OPENCV_HAL_IMPL_RVV_LUT_VEC(v_float32, float)
511
+ OPENCV_HAL_IMPL_RVV_LUT_VEC(v_int32, int)
512
+ OPENCV_HAL_IMPL_RVV_LUT_VEC(v_uint32, unsigned)
513
+
514
+ #if CV_SIMD_SCALABLE_64F
515
+ inline v_float64 v_lut(const double *tab, const v_int32 &vidx) {
516
+ vuint32m1_t vidx_ =
517
+ __riscv_vmul(__riscv_vlmul_trunc_u32m1(__riscv_vreinterpret_u32m2(vidx)),
518
+ sizeof(double), VTraits<v_float64>::vlanes());
519
+ return __riscv_vloxei32(tab, vidx_, VTraits<v_float64>::vlanes());
520
+ }
521
+ #endif
522
+
523
+ inline v_uint8 v_lut(const uchar *tab, const int *idx) {
524
+ return v_reinterpret_as_u8(v_lut((schar *)tab, idx));
525
+ }
526
+ inline v_uint8 v_lut_pairs(const uchar *tab, const int *idx) {
527
+ return v_reinterpret_as_u8(v_lut_pairs((schar *)tab, idx));
528
+ }
529
+ inline v_uint8 v_lut_quads(const uchar *tab, const int *idx) {
530
+ return v_reinterpret_as_u8(v_lut_quads((schar *)tab, idx));
531
+ }
532
+ inline v_uint16 v_lut(const ushort *tab, const int *idx) {
533
+ return v_reinterpret_as_u16(v_lut((short *)tab, idx));
534
+ }
535
+ inline v_uint16 v_lut_pairs(const ushort *tab, const int *idx) {
536
+ return v_reinterpret_as_u16(v_lut_pairs((short *)tab, idx));
537
+ }
538
+ inline v_uint16 v_lut_quads(const ushort *tab, const int *idx) {
539
+ return v_reinterpret_as_u16(v_lut_quads((short *)tab, idx));
540
+ }
541
+ inline v_uint32 v_lut(const unsigned *tab, const int *idx) {
542
+ return v_reinterpret_as_u32(v_lut((int *)tab, idx));
543
+ }
544
+ inline v_uint32 v_lut_pairs(const unsigned *tab, const int *idx) {
545
+ return v_reinterpret_as_u32(v_lut_pairs((int *)tab, idx));
546
+ }
547
+ inline v_uint32 v_lut_quads(const unsigned *tab, const int *idx) {
548
+ return v_reinterpret_as_u32(v_lut_quads((int *)tab, idx));
549
+ }
550
+ inline v_uint64 v_lut(const uint64 *tab, const int *idx) {
551
+ return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx));
552
+ }
553
+ inline v_uint64 v_lut_pairs(const uint64 *tab, const int *idx) {
554
+ return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx));
555
+ }
556
+
557
+ ////////////// Pack boolean ////////////////////
558
+ inline v_uint8 v_pack_b(const v_uint16 &a, const v_uint16 &b) {
559
+ return __riscv_vnsrl(__riscv_vset(__riscv_vlmul_ext_v_u16m2_u16m4(a), 1, b),
560
+ 0, VTraits<v_uint8>::vlanes());
561
+ }
562
+
563
+ inline v_uint8 v_pack_b(const v_uint32 &a, const v_uint32 &b, const v_uint32 &c,
564
+ const v_uint32 &d) {
565
+
566
+ return __riscv_vnsrl(
567
+ __riscv_vnsrl(
568
+ __riscv_vset(
569
+ __riscv_vset(__riscv_vset(__riscv_vlmul_ext_u32m8(a), 1, b), 2,
570
+ c),
571
+ 3, d),
572
+ 0, VTraits<v_uint8>::vlanes()),
573
+ 0, VTraits<v_uint8>::vlanes());
574
+ }
575
+
576
+ inline v_uint8 v_pack_b(const v_uint64 &a, const v_uint64 &b, const v_uint64 &c,
577
+ const v_uint64 &d, const v_uint64 &e, const v_uint64 &f,
578
+ const v_uint64 &g, const v_uint64 &h) {
579
+ vuint8m1_t t0 = __riscv_vnsrl(
580
+ __riscv_vnsrl(
581
+ __riscv_vnsrl(
582
+ __riscv_vset(
583
+ __riscv_vset(__riscv_vset(__riscv_vlmul_ext_u64m8(a), 1, b),
584
+ 2, c),
585
+ 3, d),
586
+ 0, VTraits<v_uint8>::vlanes()),
587
+ 0, VTraits<v_uint8>::vlanes()),
588
+ 0, VTraits<v_uint8>::vlanes());
589
+ vuint8m1_t t1 = __riscv_vnsrl(
590
+ __riscv_vnsrl(
591
+ __riscv_vnsrl(
592
+ __riscv_vset(
593
+ __riscv_vset(__riscv_vset(__riscv_vlmul_ext_u64m8(e), 1, f),
594
+ 2, g),
595
+ 3, h),
596
+ 0, VTraits<v_uint8>::vlanes()),
597
+ 0, VTraits<v_uint8>::vlanes()),
598
+ 0, VTraits<v_uint8>::vlanes());
599
+
600
+ return __riscv_vset(__riscv_vlmul_ext_u8m2(t0), 1, t1);
601
+ }
602
+
603
+ ////////////// Arithmetics //////////////
604
+ #define OPENCV_HAL_IMPL_RVV_BIN_OP(_Tpvec, ocv_intrin, rvv_intrin) \
605
+ inline _Tpvec v_##ocv_intrin(const _Tpvec &a, const _Tpvec &b) { \
606
+ return rvv_intrin(a, b, VTraits<_Tpvec>::vlanes()); \
607
+ }
608
+
609
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, add, __riscv_vsaddu)
610
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, sub, __riscv_vssubu)
611
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, add, __riscv_vsadd)
612
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, sub, __riscv_vssub)
613
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, add, __riscv_vsaddu)
614
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, sub, __riscv_vssubu)
615
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, add, __riscv_vsadd)
616
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, sub, __riscv_vssub)
617
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint32, add, __riscv_vadd)
618
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint32, sub, __riscv_vsub)
619
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint32, mul, __riscv_vmul)
620
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int32, add, __riscv_vadd)
621
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int32, sub, __riscv_vsub)
622
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int32, mul, __riscv_vmul)
623
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32, add, __riscv_vfadd)
624
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32, sub, __riscv_vfsub)
625
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32, mul, __riscv_vfmul)
626
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_float32, div, __riscv_vfdiv)
627
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint64, add, __riscv_vadd)
628
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint64, sub, __riscv_vsub)
629
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int64, add, __riscv_vadd)
630
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int64, sub, __riscv_vsub)
631
+
632
+ #if CV_SIMD_SCALABLE_64F
633
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64, add, __riscv_vfadd)
634
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64, sub, __riscv_vfsub)
635
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64, mul, __riscv_vfmul)
636
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_float64, div, __riscv_vfdiv)
637
+ #endif
638
+
639
+ #define OPENCV_HAL_IMPL_RVV_BIN_MADD(_Tpvec, rvv_add) \
640
+ template <typename... Args> \
641
+ inline _Tpvec v_add(const _Tpvec &f1, const _Tpvec &f2, const Args &...vf) { \
642
+ return v_add(rvv_add(f1, f2, VTraits<_Tpvec>::vlanes()), vf...); \
643
+ }
644
+ #define OPENCV_HAL_IMPL_RVV_BIN_MMUL(_Tpvec, rvv_mul) \
645
+ template <typename... Args> \
646
+ inline _Tpvec v_mul(const _Tpvec &f1, const _Tpvec &f2, const Args &...vf) { \
647
+ return v_mul(rvv_mul(f1, f2, VTraits<_Tpvec>::vlanes()), vf...); \
648
+ }
649
+ OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint8, __riscv_vsaddu)
650
+ OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int8, __riscv_vsadd)
651
+ OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint16, __riscv_vsaddu)
652
+ OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int16, __riscv_vsadd)
653
+ OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint32, __riscv_vadd)
654
+ OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int32, __riscv_vadd)
655
+ OPENCV_HAL_IMPL_RVV_BIN_MADD(v_float32, __riscv_vfadd)
656
+ OPENCV_HAL_IMPL_RVV_BIN_MADD(v_uint64, __riscv_vadd)
657
+ OPENCV_HAL_IMPL_RVV_BIN_MADD(v_int64, __riscv_vadd)
658
+
659
+ OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_uint32, __riscv_vmul)
660
+ OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_int32, __riscv_vmul)
661
+ OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_float32, __riscv_vfmul)
662
+ #if CV_SIMD_SCALABLE_64F
663
+ OPENCV_HAL_IMPL_RVV_BIN_MADD(v_float64, __riscv_vfadd)
664
+ OPENCV_HAL_IMPL_RVV_BIN_MMUL(v_float64, __riscv_vfmul)
665
+ #endif
666
+
667
+ #define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _TpwvecM2, suffix, \
668
+ wmul) \
669
+ inline void v_mul_expand(const _Tpvec &a, const _Tpvec &b, _Tpwvec &c, \
670
+ _Tpwvec &d) { \
671
+ _TpwvecM2 temp = wmul(a, b, VTraits<_Tpvec>::vlanes()); \
672
+ c = __riscv_vget_##suffix##m2(temp, 0); \
673
+ d = __riscv_vget_##suffix##m2(temp, 1); \
674
+ }
675
+
676
+ OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint8, v_uint16, vuint16m4_t, u16,
677
+ __riscv_vwmulu)
678
+ OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int8, v_int16, vint16m4_t, i16, __riscv_vwmul)
679
+ OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint16, v_uint32, vuint32m4_t, u32,
680
+ __riscv_vwmulu)
681
+ OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int16, v_int32, vint32m4_t, i32, __riscv_vwmul)
682
+ OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint32, v_uint64, vuint64m4_t, u64,
683
+ __riscv_vwmulu)
684
+
685
+ inline v_int16 v_mul_hi(const v_int16 &a, const v_int16 &b) {
686
+ return __riscv_vmulh(a, b, VTraits<v_int16>::vlanes());
687
+ }
688
+ inline v_uint16 v_mul_hi(const v_uint16 &a, const v_uint16 &b) {
689
+ return __riscv_vmulhu(a, b, VTraits<v_uint16>::vlanes());
690
+ }
691
+
692
+ ////////////// Arithmetics (wrap)//////////////
693
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, add_wrap, __riscv_vadd)
694
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, add_wrap, __riscv_vadd)
695
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, add_wrap, __riscv_vadd)
696
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, add_wrap, __riscv_vadd)
697
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, sub_wrap, __riscv_vsub)
698
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, sub_wrap, __riscv_vsub)
699
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, sub_wrap, __riscv_vsub)
700
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, sub_wrap, __riscv_vsub)
701
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint8, mul_wrap, __riscv_vmul)
702
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int8, mul_wrap, __riscv_vmul)
703
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_uint16, mul_wrap, __riscv_vmul)
704
+ OPENCV_HAL_IMPL_RVV_BIN_OP(v_int16, mul_wrap, __riscv_vmul)
705
+
706
+ //////// Saturating Multiply ////////
707
+ #define OPENCV_HAL_IMPL_RVV_MUL_SAT(_Tpvec, _clip, _wmul) \
708
+ inline _Tpvec v_mul(const _Tpvec &a, const _Tpvec &b) { \
709
+ return _clip(_wmul(a, b, VTraits<_Tpvec>::vlanes()), 0, 0, \
710
+ VTraits<_Tpvec>::vlanes()); \
711
+ } \
712
+ template <typename... Args> \
713
+ inline _Tpvec v_mul(const _Tpvec &a1, const _Tpvec &a2, const Args &...va) { \
714
+ return v_mul(_clip(_wmul(a1, a2, VTraits<_Tpvec>::vlanes()), 0, 0, \
715
+ VTraits<_Tpvec>::vlanes()), \
716
+ va...); \
717
+ }
718
+
719
+ OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint8, __riscv_vnclipu, __riscv_vwmulu)
720
+ OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int8, __riscv_vnclip, __riscv_vwmul)
721
+ OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint16, __riscv_vnclipu, __riscv_vwmulu)
722
+ OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int16, __riscv_vnclip, __riscv_vwmul)
723
+
724
+ ////////////// Bitwise logic //////////////
725
+
726
+ #define OPENCV_HAL_IMPL_RVV_LOGIC_OP(_Tpvec, vl) \
727
+ inline _Tpvec v_and(const _Tpvec &a, const _Tpvec &b) { \
728
+ return __riscv_vand(a, b, vl); \
729
+ } \
730
+ inline _Tpvec v_or(const _Tpvec &a, const _Tpvec &b) { \
731
+ return __riscv_vor(a, b, vl); \
732
+ } \
733
+ inline _Tpvec v_xor(const _Tpvec &a, const _Tpvec &b) { \
734
+ return __riscv_vxor(a, b, vl); \
735
+ } \
736
+ inline _Tpvec v_not(const _Tpvec &a) { return __riscv_vnot(a, vl); }
737
+
738
+ OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint8, VTraits<v_uint8>::vlanes())
739
+ OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int8, VTraits<v_int8>::vlanes())
740
+ OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint16, VTraits<v_uint16>::vlanes())
741
+ OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int16, VTraits<v_int16>::vlanes())
742
+ OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint32, VTraits<v_uint32>::vlanes())
743
+ OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int32, VTraits<v_int32>::vlanes())
744
+ OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint64, VTraits<v_uint64>::vlanes())
745
+ OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int64, VTraits<v_int64>::vlanes())
746
+
747
+ #define OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(intrin) \
748
+ inline v_float32 intrin(const v_float32 &a, const v_float32 &b) { \
749
+ return __riscv_vreinterpret_f32m2( \
750
+ intrin(__riscv_vreinterpret_i32m2(a), __riscv_vreinterpret_i32m2(b))); \
751
+ }
752
+ OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(v_and)
753
+ OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(v_or)
754
+ OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(v_xor)
755
+
756
+ inline v_float32 v_not(const v_float32 &a) {
757
+ return __riscv_vreinterpret_f32m2(v_not(__riscv_vreinterpret_i32m2(a)));
758
+ }
759
+
760
+ #if CV_SIMD_SCALABLE_64F
761
+ #define OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(intrin) \
762
+ inline v_float64 intrin(const v_float64 &a, const v_float64 &b) { \
763
+ return __riscv_vreinterpret_f64m2( \
764
+ intrin(__riscv_vreinterpret_i64m2(a), __riscv_vreinterpret_i64m2(b))); \
765
+ }
766
+ OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(v_and)
767
+ OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(v_or)
768
+ OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(v_xor)
769
+
770
+ inline v_float64 v_not(const v_float64 &a) {
771
+ return __riscv_vreinterpret_f64m2(v_not(__riscv_vreinterpret_i64m2(a)));
772
+ }
773
+ #endif
774
+
775
+ ////////////// Bitwise shifts //////////////
776
+ /* Usage
777
+ 1. v_shl<N>(vec);
778
+ 2. v_shl(vec, N); // instead of vec << N, when N is non-constant.
779
+ */
780
+
781
+ #define OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(_Tpvec, vl) \
782
+ template <int s = 0> inline _Tpvec v_shl(const _Tpvec &a, int n = s) { \
783
+ return _Tpvec(__riscv_vsll(a, uint8_t(n), vl)); \
784
+ } \
785
+ template <int s = 0> inline _Tpvec v_shr(const _Tpvec &a, int n = s) { \
786
+ return _Tpvec(__riscv_vsrl(a, uint8_t(n), vl)); \
787
+ }
788
+
789
+ #define OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(_Tpvec, vl) \
790
+ template <int s = 0> inline _Tpvec v_shl(const _Tpvec &a, int n = s) { \
791
+ return _Tpvec(__riscv_vsll(a, uint8_t(n), vl)); \
792
+ } \
793
+ template <int s = 0> inline _Tpvec v_shr(const _Tpvec &a, int n = s) { \
794
+ return _Tpvec(__riscv_vsra(a, uint8_t(n), vl)); \
795
+ }
796
+
797
+ OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint16, VTraits<v_uint16>::vlanes())
798
+ OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint32, VTraits<v_uint32>::vlanes())
799
+ OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint64, VTraits<v_uint64>::vlanes())
800
+ OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int16, VTraits<v_int16>::vlanes())
801
+ OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int32, VTraits<v_int32>::vlanes())
802
+ OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int64, VTraits<v_int64>::vlanes())
803
+
804
+ ////////////// Comparison //////////////
805
+ #define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix) \
806
+ inline _Tpvec v_##op(const _Tpvec &a, const _Tpvec &b) { \
807
+ size_t VLEN = VTraits<_Tpvec>::vlanes(); \
808
+ uint64_t ones = -1; \
809
+ return __riscv_vmerge(__riscv_vmv_v_x_##suffix##m2(0, VLEN), ones, \
810
+ intrin(a, b, VLEN), VLEN); \
811
+ }
812
+
813
+ #define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix) \
814
+ inline _Tpvec v_##op(const _Tpvec &a, const _Tpvec &b) { \
815
+ size_t VLEN = VTraits<_Tpvec>::vlanes(); \
816
+ union { \
817
+ uint64_t u; \
818
+ VTraits<_Tpvec>::lane_type d; \
819
+ } ones; \
820
+ ones.u = -1; \
821
+ auto diff = intrin(a, b, VLEN); \
822
+ auto z = __riscv_vfmv_v_f_##suffix##m2(0, VLEN); \
823
+ auto res = __riscv_vfmerge(z, ones.d, diff, VLEN); \
824
+ return _Tpvec(res); \
825
+ } // TODO
826
+
827
+ #define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix) \
828
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, eq, __riscv_vmseq, suffix) \
829
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ne, __riscv_vmsne, suffix) \
830
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, lt, __riscv_vmsltu, suffix) \
831
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, gt, __riscv_vmsgtu, suffix) \
832
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, le, __riscv_vmsleu, suffix) \
833
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ge, __riscv_vmsgeu, suffix)
834
+
835
+ #define OPENCV_HAL_IMPL_RVV_SIGNED_CMP(_Tpvec, suffix) \
836
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, eq, __riscv_vmseq, suffix) \
837
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ne, __riscv_vmsne, suffix) \
838
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, lt, __riscv_vmslt, suffix) \
839
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, gt, __riscv_vmsgt, suffix) \
840
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, le, __riscv_vmsle, suffix) \
841
+ OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ge, __riscv_vmsge, suffix)
842
+
843
+ #define OPENCV_HAL_IMPL_RVV_FLOAT_CMP(_Tpvec, suffix) \
844
+ OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, eq, __riscv_vmfeq, suffix) \
845
+ OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ne, __riscv_vmfne, suffix) \
846
+ OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, lt, __riscv_vmflt, suffix) \
847
+ OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, gt, __riscv_vmfgt, suffix) \
848
+ OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, le, __riscv_vmfle, suffix) \
849
+ OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ge, __riscv_vmfge, suffix)
850
+
851
+ OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint8, u8)
852
+ OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint16, u16)
853
+ OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint32, u32)
854
+ OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint64, u64)
855
+ OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int8, i8)
856
+ OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int16, i16)
857
+ OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int32, i32)
858
+ OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int64, i64)
859
+ OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float32, f32)
860
+ #if CV_SIMD_SCALABLE_64F
861
+ OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float64, f64)
862
+ #endif
863
+
864
+ inline v_float32 v_not_nan(const v_float32 &a) { return v_eq(a, a); }
865
+
866
+ #if CV_SIMD_SCALABLE_64F
867
+ inline v_float64 v_not_nan(const v_float64 &a) { return v_eq(a, a); }
868
+ #endif
869
+
870
+ ////////////// Min/Max //////////////
871
+
872
+ #define OPENCV_HAL_IMPL_RVV_BIN_FUNC(_Tpvec, func, intrin, vl) \
873
+ inline _Tpvec func(const _Tpvec &a, const _Tpvec &b) { \
874
+ return intrin(a, b, vl); \
875
+ }
876
+
877
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8, v_min, __riscv_vminu,
878
+ VTraits<v_uint8>::vlanes())
879
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8, v_max, __riscv_vmaxu,
880
+ VTraits<v_uint8>::vlanes())
881
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8, v_min, __riscv_vmin,
882
+ VTraits<v_int8>::vlanes())
883
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8, v_max, __riscv_vmax,
884
+ VTraits<v_int8>::vlanes())
885
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16, v_min, __riscv_vminu,
886
+ VTraits<v_uint16>::vlanes())
887
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16, v_max, __riscv_vmaxu,
888
+ VTraits<v_uint16>::vlanes())
889
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16, v_min, __riscv_vmin,
890
+ VTraits<v_int16>::vlanes())
891
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16, v_max, __riscv_vmax,
892
+ VTraits<v_int16>::vlanes())
893
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32, v_min, __riscv_vminu,
894
+ VTraits<v_uint32>::vlanes())
895
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32, v_max, __riscv_vmaxu,
896
+ VTraits<v_uint32>::vlanes())
897
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32, v_min, __riscv_vmin,
898
+ VTraits<v_int32>::vlanes())
899
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32, v_max, __riscv_vmax,
900
+ VTraits<v_int32>::vlanes())
901
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32, v_min, __riscv_vfmin,
902
+ VTraits<v_float32>::vlanes())
903
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32, v_max, __riscv_vfmax,
904
+ VTraits<v_float32>::vlanes())
905
+ #if CV_SIMD_SCALABLE_64F
906
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64, v_min, __riscv_vfmin,
907
+ VTraits<v_float64>::vlanes())
908
+ OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64, v_max, __riscv_vfmax,
909
+ VTraits<v_float64>::vlanes())
910
+ #endif
911
+
912
+ ////////////// Transpose4x4 //////////////
913
+ #define OPENCV_HAL_IMPL_RVV_ZIP4(_Tpvec, _wTpvec, suffix, convert2u, convert) \
914
+ inline void v_zip4(const _Tpvec &a0, const _Tpvec &a1, _Tpvec &b0, \
915
+ _Tpvec &b1) { \
916
+ int vl = 4; \
917
+ _wTpvec temp = __riscv_vreinterpret_##suffix##m4(convert2u(__riscv_vor( \
918
+ __riscv_vzext_vf2(convert(a0), vl), \
919
+ __riscv_vreinterpret_u64m4(__riscv_vslide1up( \
920
+ __riscv_vreinterpret_u32m4(__riscv_vzext_vf2(convert(a1), vl)), 0, \
921
+ vl * 2)), \
922
+ vl))); \
923
+ b0 = __riscv_vget_##suffix##m2(temp, 0); \
924
+ b1 = __riscv_vget_##suffix##m2( \
925
+ __riscv_vrgather( \
926
+ temp, __riscv_vadd(__riscv_vid_v_u32m4(vl), 4, vl) /*{4,5,6,7} */, \
927
+ vl), \
928
+ 0); \
929
+ }
930
+
931
+ OPENCV_HAL_IMPL_RVV_ZIP4(v_uint32, vuint32m4_t, u32, OPENCV_HAL_NOP,
932
+ OPENCV_HAL_NOP)
933
+ OPENCV_HAL_IMPL_RVV_ZIP4(v_int32, vint32m4_t, i32, __riscv_vreinterpret_u32m4,
934
+ __riscv_vreinterpret_u32m2)
935
+ OPENCV_HAL_IMPL_RVV_ZIP4(v_float32, vfloat32m4_t, f32,
936
+ __riscv_vreinterpret_u32m4, __riscv_vreinterpret_u32m2)
937
+
938
+ #define OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(_Tpvec, suffix) \
939
+ inline void v_transpose4x4(const _Tpvec &a0, const _Tpvec &a1, \
940
+ const _Tpvec &a2, const _Tpvec &a3, _Tpvec &b0, \
941
+ _Tpvec &b1, _Tpvec &b2, _Tpvec &b3) { \
942
+ _Tpvec t0, t1, t2, t3; \
943
+ v_zip4(a0, a2, t0, t2); \
944
+ v_zip4(a1, a3, t1, t3); \
945
+ v_zip4(t0, t1, b0, b1); \
946
+ v_zip4(t2, t3, b2, b3); \
947
+ }
948
+
949
+ OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(v_uint32, u32) OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(
950
+ v_int32, i32) OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(v_float32, f32)
951
+
952
+ ////////////// Reduce //////////////
953
+
954
+ #define OPENCV_HAL_IMPL_RVV_REDUCE_SUM(_Tpvec, _wTpvec, _nwTpvec, scalartype, \
955
+ wsuffix, vl, red) \
956
+ inline scalartype v_reduce_sum(const _Tpvec &a) { \
957
+ _nwTpvec zero = __riscv_vmv_v_x_##wsuffix##m1(0, vl); \
958
+ _nwTpvec res = __riscv_vmv_v_x_##wsuffix##m1(0, vl); \
959
+ res = __riscv_v##red(a, zero, vl); \
960
+ return (scalartype)__riscv_vmv_x(res); \
961
+ }
962
+ OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
963
+ v_uint8, v_uint16, vuint16m1_t, unsigned, u16,
964
+ VTraits<v_uint8>::vlanes(),
965
+ wredsumu) OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int8, v_int16, vint16m1_t,
966
+ int, i16,
967
+ VTraits<v_int8>::vlanes(),
968
+ wredsum)
969
+ OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
970
+ v_uint16, v_uint32, vuint32m1_t, unsigned, u32,
971
+ VTraits<v_uint16>::vlanes(),
972
+ wredsumu) OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int16, v_int32,
973
+ vint32m1_t, int, i32,
974
+ VTraits<v_int16>::vlanes(),
975
+ wredsum)
976
+ OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
977
+ v_uint32, v_uint64, vuint64m1_t, unsigned, u64,
978
+ VTraits<v_uint32>::vlanes(),
979
+ wredsumu) OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int32, v_int64,
980
+ vint64m1_t, int, i64,
981
+ VTraits<
982
+ v_int32>::vlanes(),
983
+ wredsum)
984
+ OPENCV_HAL_IMPL_RVV_REDUCE_SUM(
985
+ v_uint64, v_uint64, vuint64m1_t, uint64, u64,
986
+ VTraits<v_uint64>::vlanes(),
987
+ redsum) OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int64, v_int64,
988
+ vint64m1_t, int64,
989
+ i64,
990
+ VTraits<v_int64>::
991
+ vlanes(),
992
+ redsum)
993
+
994
+ #define OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(_Tpvec, _wTpvec, _nwTpvec, \
995
+ scalartype, wsuffix, vl) \
996
+ inline scalartype v_reduce_sum(const _Tpvec &a) { \
997
+ _nwTpvec zero = __riscv_vfmv_v_f_##wsuffix##m1(0, vl); \
998
+ _nwTpvec res = __riscv_vfmv_v_f_##wsuffix##m1(0, vl); \
999
+ res = __riscv_vfredusum(a, zero, vl); \
1000
+ return (scalartype)__riscv_vfmv_f(res); \
1001
+ }
1002
+ OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(
1003
+ v_float32, v_float32, vfloat32m1_t, float, f32,
1004
+ VTraits<v_float32>::vlanes())
1005
+ #if CV_SIMD_SCALABLE_64F
1006
+ OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(
1007
+ v_float64, v_float64, vfloat64m1_t, float, f64,
1008
+ VTraits<v_float64>::vlanes())
1009
+ #endif
1010
+
1011
+ #define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, _nTpvec, func, scalartype, suffix, \
1012
+ vl, red) \
1013
+ inline scalartype v_reduce_##func(const _Tpvec &a) { \
1014
+ _nTpvec narrowM1 = __riscv_vlmul_trunc_##suffix##m1(a); \
1015
+ return (scalartype)__riscv_vmv_x(__riscv_v##red(a, narrowM1, vl)); \
1016
+ }
1017
+
1018
+ #define OPENCV_HAL_IMPL_RVV_REDUCE_FP(_Tpvec, _nTpvec, func, scalartype, \
1019
+ suffix, vl, red) \
1020
+ inline scalartype v_reduce_##func(const _Tpvec &a) { \
1021
+ _nTpvec narrowM1 = __riscv_vlmul_trunc_##suffix##m1(a); \
1022
+ return (scalartype)__riscv_vfmv_f(__riscv_v##red(a, narrowM1, vl)); \
1023
+ }
1024
+
1025
+ OPENCV_HAL_IMPL_RVV_REDUCE(
1026
+ v_uint8, vuint8m1_t, min, uchar, u8,
1027
+ VTraits<v_uint8>::vlanes(),
1028
+ redminu) OPENCV_HAL_IMPL_RVV_REDUCE(v_int8,
1029
+ vint8m1_t,
1030
+ min, schar,
1031
+ i8,
1032
+ VTraits<
1033
+ v_int8>::
1034
+ vlanes(),
1035
+ redmin)
1036
+ OPENCV_HAL_IMPL_RVV_REDUCE(
1037
+ v_uint16, vuint16m1_t, min, ushort, u16,
1038
+ VTraits<v_uint16>::vlanes(),
1039
+ redminu) OPENCV_HAL_IMPL_RVV_REDUCE(v_int16,
1040
+ vint16m1_t,
1041
+ min,
1042
+ short,
1043
+ i16,
1044
+ VTraits<
1045
+ v_int16>::
1046
+ vlanes(),
1047
+ redmin)
1048
+ OPENCV_HAL_IMPL_RVV_REDUCE(
1049
+ v_uint32, vuint32m1_t, min, unsigned,
1050
+ u32, VTraits<v_uint32>::vlanes(),
1051
+ redminu) OPENCV_HAL_IMPL_RVV_REDUCE(v_int32,
1052
+ vint32m1_t,
1053
+ min,
1054
+ int,
1055
+ i32,
1056
+ VTraits<
1057
+ v_int32>::
1058
+ vlanes(),
1059
+ redmin)
1060
+ OPENCV_HAL_IMPL_RVV_REDUCE_FP(
1061
+ v_float32, vfloat32m1_t, min, float,
1062
+ f32, VTraits<v_float32>::vlanes(),
1063
+ fredmin)
1064
+ OPENCV_HAL_IMPL_RVV_REDUCE(
1065
+ v_uint8, vuint8m1_t, max, uchar,
1066
+ u8, VTraits<v_uint8>::vlanes(),
1067
+ redmaxu)
1068
+ OPENCV_HAL_IMPL_RVV_REDUCE(
1069
+ v_int8, vint8m1_t, max,
1070
+ schar, i8,
1071
+ VTraits<v_int8>::vlanes(),
1072
+ redmax)
1073
+ OPENCV_HAL_IMPL_RVV_REDUCE(
1074
+ v_uint16, vuint16m1_t,
1075
+ max, ushort, u16,
1076
+ VTraits<
1077
+ v_uint16>::vlanes(),
1078
+ redmaxu)
1079
+ OPENCV_HAL_IMPL_RVV_REDUCE(
1080
+ v_int16, vint16m1_t,
1081
+ max, short, i16,
1082
+ VTraits<v_int16>::
1083
+ vlanes(),
1084
+ redmax)
1085
+ OPENCV_HAL_IMPL_RVV_REDUCE(
1086
+ v_uint32,
1087
+ vuint32m1_t,
1088
+ max, unsigned,
1089
+ u32,
1090
+ VTraits<
1091
+ v_uint32>::
1092
+ vlanes(),
1093
+ redmaxu)
1094
+ OPENCV_HAL_IMPL_RVV_REDUCE(
1095
+ v_int32,
1096
+ vint32m1_t,
1097
+ max, int,
1098
+ i32,
1099
+ VTraits<
1100
+ v_int32>::
1101
+ vlanes(),
1102
+ redmax)
1103
+ OPENCV_HAL_IMPL_RVV_REDUCE_FP(
1104
+ v_float32,
1105
+ vfloat32m1_t,
1106
+ max,
1107
+ float,
1108
+ f32,
1109
+ VTraits<
1110
+ v_float32>::
1111
+ vlanes(),
1112
+ fredmax)
1113
+
1114
+ inline v_float32
1115
+ v_reduce_sum4(const v_float32 &a, const v_float32 &b, const v_float32 &c,
1116
+ const v_float32 &d) {
1117
+ // 0000 1111 2222 3333 ....
1118
+ vuint64m4_t vid1 = __riscv_vid_v_u64m4(VTraits<vuint32m2_t>::vlanes());
1119
+ vuint16m4_t t1 = __riscv_vreinterpret_u16m4(vid1);
1120
+ vuint16m4_t t2 = __riscv_vslide1up(t1, 0, VTraits<vuint8m2_t>::vlanes());
1121
+ vuint16m4_t t3 = __riscv_vslide1up(t2, 0, VTraits<vuint8m2_t>::vlanes());
1122
+ vuint16m4_t t4 = __riscv_vslide1up(t3, 0, VTraits<vuint8m2_t>::vlanes());
1123
+ t1 = __riscv_vor(__riscv_vor(t1, t2, VTraits<vuint8m2_t>::vlanes()),
1124
+ __riscv_vor(t3, t4, VTraits<vuint8m2_t>::vlanes()),
1125
+ VTraits<vuint8m2_t>::vlanes());
1126
+
1127
+ // index for transpose4X4
1128
+ vuint16m4_t vidx0 = __riscv_vmul(t1, 12, VTraits<vuint8m2_t>::vlanes());
1129
+ vidx0 =
1130
+ __riscv_vadd(vidx0, __riscv_vid_v_u16m4(VTraits<vuint8m2_t>::vlanes()),
1131
+ VTraits<vuint8m2_t>::vlanes());
1132
+ vuint16m4_t vidx1 = __riscv_vadd(vidx0, 4, VTraits<vuint8m2_t>::vlanes());
1133
+ vuint16m4_t vidx2 = __riscv_vadd(vidx0, 8, VTraits<vuint8m2_t>::vlanes());
1134
+ vuint16m4_t vidx3 = __riscv_vadd(vidx0, 12, VTraits<vuint8m2_t>::vlanes());
1135
+
1136
+ // zip
1137
+ vuint32m4_t tempA = __riscv_vreinterpret_u32m4(__riscv_vor(
1138
+ __riscv_vzext_vf2(__riscv_vreinterpret_u32m2(a),
1139
+ VTraits<vuint16m2_t>::vlanes()),
1140
+ __riscv_vreinterpret_u64m4(__riscv_vslide1up(
1141
+ __riscv_vreinterpret_u32m4(__riscv_vzext_vf2(
1142
+ __riscv_vreinterpret_u32m2(c), VTraits<vuint16m2_t>::vlanes())),
1143
+ 0, VTraits<vuint16m2_t>::vlanes())),
1144
+ VTraits<vuint32m2_t>::vlanes()));
1145
+ vuint32m4_t tempB = __riscv_vreinterpret_u32m4(__riscv_vor(
1146
+ __riscv_vzext_vf2(__riscv_vreinterpret_u32m2(b),
1147
+ VTraits<vuint16m2_t>::vlanes()),
1148
+ __riscv_vreinterpret_u64m4(__riscv_vslide1up(
1149
+ __riscv_vreinterpret_u32m4(__riscv_vzext_vf2(
1150
+ __riscv_vreinterpret_u32m2(d), VTraits<vuint16m2_t>::vlanes())),
1151
+ 0, VTraits<vuint16m2_t>::vlanes())),
1152
+ VTraits<vuint32m2_t>::vlanes()));
1153
+ vfloat32m8_t temp = __riscv_vreinterpret_f32m8(__riscv_vreinterpret_u32m8(
1154
+ __riscv_vor(__riscv_vzext_vf2(tempA, VTraits<vuint8m2_t>::vlanes()),
1155
+ __riscv_vreinterpret_u64m8(__riscv_vslide1up(
1156
+ __riscv_vreinterpret_u32m8(__riscv_vzext_vf2(
1157
+ tempB, VTraits<vuint8m2_t>::vlanes())),
1158
+ 0, VTraits<vuint8m2_t>::vlanes())),
1159
+ VTraits<vuint16m2_t>::vlanes())));
1160
+
1161
+ // transpose
1162
+ vfloat32m2_t b0 = __riscv_vlmul_trunc_f32m2(
1163
+ __riscv_vrgatherei16(temp, vidx0, VTraits<vuint8m2_t>::vlanes()));
1164
+ vfloat32m2_t b1 = __riscv_vlmul_trunc_f32m2(
1165
+ __riscv_vrgatherei16(temp, vidx1, VTraits<vuint8m2_t>::vlanes()));
1166
+ vfloat32m2_t b2 = __riscv_vlmul_trunc_f32m2(
1167
+ __riscv_vrgatherei16(temp, vidx2, VTraits<vuint8m2_t>::vlanes()));
1168
+ vfloat32m2_t b3 = __riscv_vlmul_trunc_f32m2(
1169
+ __riscv_vrgatherei16(temp, vidx3, VTraits<vuint8m2_t>::vlanes()));
1170
+
1171
+ // vector add
1172
+ v_float32 res =
1173
+ __riscv_vfadd(__riscv_vfadd(b0, b1, VTraits<vfloat32m2_t>::vlanes()),
1174
+ __riscv_vfadd(b2, b3, VTraits<vfloat32m2_t>::vlanes()),
1175
+ VTraits<vfloat32m2_t>::vlanes());
1176
+ return res;
1177
+ }
1178
+
1179
+ ////////////// Square-Root //////////////
1180
+
1181
+ inline v_float32 v_sqrt(const v_float32 &x) {
1182
+ return __riscv_vfsqrt(x, VTraits<v_float32>::vlanes());
1183
+ }
1184
+
1185
+ inline v_float32 v_invsqrt(const v_float32 &x) {
1186
+ v_float32 one = v_setall_f32(1.0f);
1187
+ return v_div(one, v_sqrt(x));
1188
+ }
1189
+
1190
+ #if CV_SIMD_SCALABLE_64F
1191
+ inline v_float64 v_sqrt(const v_float64 &x) {
1192
+ return __riscv_vfsqrt(x, VTraits<v_float64>::vlanes());
1193
+ }
1194
+
1195
+ inline v_float64 v_invsqrt(const v_float64 &x) {
1196
+ v_float64 one = v_setall_f64(1.0f);
1197
+ return v_div(one, v_sqrt(x));
1198
+ }
1199
+ #endif
1200
+
1201
+ inline v_float32 v_magnitude(const v_float32 &a, const v_float32 &b) {
1202
+ v_float32 x =
1203
+ __riscv_vfmacc(__riscv_vfmul(a, a, VTraits<v_float32>::vlanes()), b, b,
1204
+ VTraits<v_float32>::vlanes());
1205
+ return v_sqrt(x);
1206
+ }
1207
+
1208
+ inline v_float32 v_sqr_magnitude(const v_float32 &a, const v_float32 &b) {
1209
+ return v_float32(
1210
+ __riscv_vfmacc(__riscv_vfmul(a, a, VTraits<v_float32>::vlanes()), b, b,
1211
+ VTraits<v_float32>::vlanes()));
1212
+ }
1213
+
1214
+ #if CV_SIMD_SCALABLE_64F
1215
+ inline v_float64 v_magnitude(const v_float64 &a, const v_float64 &b) {
1216
+ v_float64 x =
1217
+ __riscv_vfmacc(__riscv_vfmul(a, a, VTraits<v_float64>::vlanes()), b, b,
1218
+ VTraits<v_float64>::vlanes());
1219
+ return v_sqrt(x);
1220
+ }
1221
+
1222
+ inline v_float64 v_sqr_magnitude(const v_float64 &a, const v_float64 &b) {
1223
+ return __riscv_vfmacc(__riscv_vfmul(a, a, VTraits<v_float64>::vlanes()), b, b,
1224
+ VTraits<v_float64>::vlanes());
1225
+ }
1226
+ #endif
1227
+
1228
+ ////////////// Multiply-Add //////////////
1229
+
1230
+ inline v_float32 v_fma(const v_float32 &a, const v_float32 &b,
1231
+ const v_float32 &c) {
1232
+ return __riscv_vfmacc(c, a, b, VTraits<v_float32>::vlanes());
1233
+ }
1234
+ inline v_int32 v_fma(const v_int32 &a, const v_int32 &b, const v_int32 &c) {
1235
+ return __riscv_vmacc(c, a, b, VTraits<v_float32>::vlanes());
1236
+ }
1237
+
1238
+ inline v_float32 v_muladd(const v_float32 &a, const v_float32 &b,
1239
+ const v_float32 &c) {
1240
+ return v_fma(a, b, c);
1241
+ }
1242
+
1243
+ inline v_int32 v_muladd(const v_int32 &a, const v_int32 &b, const v_int32 &c) {
1244
+ return v_fma(a, b, c);
1245
+ }
1246
+
1247
+ #if CV_SIMD_SCALABLE_64F
1248
+ inline v_float64 v_fma(const v_float64 &a, const v_float64 &b,
1249
+ const v_float64 &c) {
1250
+ return __riscv_vfmacc_vv_f64m2(c, a, b, VTraits<v_float64>::vlanes());
1251
+ }
1252
+
1253
+ inline v_float64 v_muladd(const v_float64 &a, const v_float64 &b,
1254
+ const v_float64 &c) {
1255
+ return v_fma(a, b, c);
1256
+ }
1257
+ #endif
1258
+
1259
+ ////////////// Check all/any //////////////
1260
+
1261
+ #define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, vl) \
1262
+ inline bool v_check_all(const _Tpvec &a) { \
1263
+ return (int)__riscv_vcpop(__riscv_vmslt(a, 0, vl), vl) == vl; \
1264
+ } \
1265
+ inline bool v_check_any(const _Tpvec &a) { \
1266
+ return (int)__riscv_vcpop(__riscv_vmslt(a, 0, vl), vl) != 0; \
1267
+ }
1268
+
1269
+ OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int8, VTraits<v_int8>::vlanes())
1270
+ OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int16, VTraits<v_int16>::vlanes())
1271
+ OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int32, VTraits<v_int32>::vlanes())
1272
+ OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int64, VTraits<v_int64>::vlanes())
1273
+
1274
+ inline bool v_check_all(const v_uint8 &a) {
1275
+ return v_check_all(v_reinterpret_as_s8(a));
1276
+ }
1277
+ inline bool v_check_any(const v_uint8 &a) {
1278
+ return v_check_any(v_reinterpret_as_s8(a));
1279
+ }
1280
+
1281
+ inline bool v_check_all(const v_uint16 &a) {
1282
+ return v_check_all(v_reinterpret_as_s16(a));
1283
+ }
1284
+ inline bool v_check_any(const v_uint16 &a) {
1285
+ return v_check_any(v_reinterpret_as_s16(a));
1286
+ }
1287
+
1288
+ inline bool v_check_all(const v_uint32 &a) {
1289
+ return v_check_all(v_reinterpret_as_s32(a));
1290
+ }
1291
+ inline bool v_check_any(const v_uint32 &a) {
1292
+ return v_check_any(v_reinterpret_as_s32(a));
1293
+ }
1294
+
1295
+ inline bool v_check_all(const v_float32 &a) {
1296
+ return v_check_all(v_reinterpret_as_s32(a));
1297
+ }
1298
+ inline bool v_check_any(const v_float32 &a) {
1299
+ return v_check_any(v_reinterpret_as_s32(a));
1300
+ }
1301
+
1302
+ inline bool v_check_all(const v_uint64 &a) {
1303
+ return v_check_all(v_reinterpret_as_s64(a));
1304
+ }
1305
+ inline bool v_check_any(const v_uint64 &a) {
1306
+ return v_check_any(v_reinterpret_as_s64(a));
1307
+ }
1308
+
1309
+ #if CV_SIMD_SCALABLE_64F
1310
+ inline bool v_check_all(const v_float64 &a) {
1311
+ return v_check_all(v_reinterpret_as_s64(a));
1312
+ }
1313
+ inline bool v_check_any(const v_float64 &a) {
1314
+ return v_check_any(v_reinterpret_as_s64(a));
1315
+ }
1316
+ #endif
1317
+
1318
+ ////////////// abs //////////////
1319
+
1320
+ #define OPENCV_HAL_IMPL_RVV_ABSDIFF(_Tpvec, abs) \
1321
+ inline _Tpvec v_##abs(const _Tpvec &a, const _Tpvec &b) { \
1322
+ return v_sub(v_max(a, b), v_min(a, b)); \
1323
+ }
1324
+
1325
+ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint8, absdiff)
1326
+ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint16, absdiff)
1327
+ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint32, absdiff)
1328
+ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float32, absdiff)
1329
+ #if CV_SIMD_SCALABLE_64F
1330
+ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float64, absdiff)
1331
+ #endif
1332
+ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int8, absdiffs)
1333
+ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16, absdiffs)
1334
+
1335
+ #define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, width) \
1336
+ inline _rTpvec v_absdiff(const _Tpvec &a, const _Tpvec &b) { \
1337
+ return __riscv_vnclipu( \
1338
+ __riscv_vreinterpret_u##width##m4(__riscv_vwsub_vv( \
1339
+ v_max(a, b), v_min(a, b), VTraits<_Tpvec>::vlanes())), \
1340
+ 0, 0, VTraits<_Tpvec>::vlanes()); \
1341
+ }
1342
+
1343
+ OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8, v_uint8, 16)
1344
+ OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16, v_uint16, 32)
1345
+ OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32, v_uint32, 64)
1346
+
1347
+ #define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \
1348
+ inline _Tprvec v_abs(const _Tpvec &a) { \
1349
+ return v_absdiff(a, v_setzero_##suffix()); \
1350
+ }
1351
+
1352
+ OPENCV_HAL_IMPL_RVV_ABS(v_uint8, v_int8, s8)
1353
+ OPENCV_HAL_IMPL_RVV_ABS(v_uint16, v_int16, s16)
1354
+ OPENCV_HAL_IMPL_RVV_ABS(v_uint32, v_int32, s32)
1355
+ OPENCV_HAL_IMPL_RVV_ABS(v_float32, v_float32, f32)
1356
+ #if CV_SIMD_SCALABLE_64F
1357
+ OPENCV_HAL_IMPL_RVV_ABS(v_float64, v_float64, f64)
1358
+ #endif
1359
+
1360
+ #define OPENCV_HAL_IMPL_RVV_REDUCE_SAD(_Tpvec, scalartype) \
1361
+ inline scalartype v_reduce_sad(const _Tpvec &a, const _Tpvec &b) { \
1362
+ return v_reduce_sum(v_absdiff(a, b)); \
1363
+ }
1364
+
1365
+ OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint8, unsigned)
1366
+ OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int8, unsigned)
1367
+ OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint16, unsigned)
1368
+ OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int16, unsigned)
1369
+ OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint32, unsigned)
1370
+ OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int32, unsigned)
1371
+ OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_float32, float)
1372
+
1373
+ ////////////// Select //////////////
1374
+
1375
+ #define OPENCV_HAL_IMPL_RVV_SELECT(_Tpvec, vl) \
1376
+ inline _Tpvec v_select(const _Tpvec &mask, const _Tpvec &a, \
1377
+ const _Tpvec &b) { \
1378
+ return __riscv_vmerge(b, a, __riscv_vmsne(mask, 0, vl), vl); \
1379
+ }
1380
+
1381
+ OPENCV_HAL_IMPL_RVV_SELECT(v_uint8, VTraits<v_uint8>::vlanes())
1382
+ OPENCV_HAL_IMPL_RVV_SELECT(v_uint16, VTraits<v_uint16>::vlanes())
1383
+ OPENCV_HAL_IMPL_RVV_SELECT(v_uint32, VTraits<v_uint32>::vlanes())
1384
+ OPENCV_HAL_IMPL_RVV_SELECT(v_int8, VTraits<v_int8>::vlanes())
1385
+ OPENCV_HAL_IMPL_RVV_SELECT(v_int16, VTraits<v_int16>::vlanes())
1386
+ OPENCV_HAL_IMPL_RVV_SELECT(v_int32, VTraits<v_int32>::vlanes())
1387
+
1388
+ inline v_float32 v_select(const v_float32 &mask, const v_float32 &a,
1389
+ const v_float32 &b) {
1390
+ return __riscv_vmerge(b, a,
1391
+ __riscv_vmfne(mask, 0, VTraits<v_float32>::vlanes()),
1392
+ VTraits<v_float32>::vlanes());
1393
+ }
1394
+
1395
+ #if CV_SIMD_SCALABLE_64F
1396
+ inline v_float64 v_select(const v_float64 &mask, const v_float64 &a,
1397
+ const v_float64 &b) {
1398
+ return __riscv_vmerge(b, a,
1399
+ __riscv_vmfne(mask, 0, VTraits<v_float64>::vlanes()),
1400
+ VTraits<v_float64>::vlanes());
1401
+ }
1402
+ #endif
1403
+
1404
+ ////////////// Rotate shift //////////////
1405
+
1406
+ #define OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(_Tpvec, suffix, vl) \
1407
+ template <int n> inline _Tpvec v_rotate_right(const _Tpvec &a) { \
1408
+ return __riscv_vslidedown(a, n, vl); \
1409
+ } \
1410
+ template <int n> inline _Tpvec v_rotate_left(const _Tpvec &a) { \
1411
+ return __riscv_vslideup(__riscv_vmv_v_x_##suffix##m2(0, vl), a, n, vl); \
1412
+ } \
1413
+ template <> inline _Tpvec v_rotate_left<0>(const _Tpvec &a) { return a; } \
1414
+ template <int n> \
1415
+ inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b) { \
1416
+ return __riscv_vslideup(__riscv_vslidedown(a, n, vl), b, \
1417
+ VTraits<_Tpvec>::vlanes() - n, vl); \
1418
+ } \
1419
+ template <int n> \
1420
+ inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b) { \
1421
+ return __riscv_vslideup( \
1422
+ __riscv_vslidedown(b, VTraits<_Tpvec>::vlanes() - n, vl), a, n, vl); \
1423
+ } \
1424
+ template <> \
1425
+ inline _Tpvec v_rotate_left<0>(const _Tpvec &a, const _Tpvec &b) { \
1426
+ CV_UNUSED(b); \
1427
+ return a; \
1428
+ }
1429
+
1430
+ OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint8, u8, VTraits<v_uint8>::vlanes())
1431
+ OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int8, i8, VTraits<v_int8>::vlanes())
1432
+ OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint16, u16, VTraits<v_uint16>::vlanes())
1433
+ OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int16, i16, VTraits<v_int16>::vlanes())
1434
+ OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint32, u32, VTraits<v_uint32>::vlanes())
1435
+ OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int32, i32, VTraits<v_int32>::vlanes())
1436
+ OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint64, u64, VTraits<v_uint64>::vlanes())
1437
+ OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int64, i64, VTraits<v_int64>::vlanes())
1438
+
1439
+ #define OPENCV_HAL_IMPL_RVV_ROTATE_FP(_Tpvec, suffix, vl) \
1440
+ template <int n> inline _Tpvec v_rotate_right(const _Tpvec &a) { \
1441
+ return __riscv_vslidedown(a, n, vl); \
1442
+ } \
1443
+ template <int n> inline _Tpvec v_rotate_left(const _Tpvec &a) { \
1444
+ return __riscv_vslideup(__riscv_vfmv_v_f_##suffix##m2(0, vl), a, n, vl); \
1445
+ } \
1446
+ template <> inline _Tpvec v_rotate_left<0>(const _Tpvec &a) { return a; } \
1447
+ template <int n> \
1448
+ inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b) { \
1449
+ return __riscv_vslideup(__riscv_vslidedown(a, n, vl), b, \
1450
+ VTraits<_Tpvec>::vlanes() - n, vl); \
1451
+ } \
1452
+ template <int n> \
1453
+ inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b) { \
1454
+ return __riscv_vslideup( \
1455
+ __riscv_vslidedown(b, VTraits<_Tpvec>::vlanes() - n, vl), a, n, vl); \
1456
+ } \
1457
+ template <> \
1458
+ inline _Tpvec v_rotate_left<0>(const _Tpvec &a, const _Tpvec &b) { \
1459
+ CV_UNUSED(b); \
1460
+ return a; \
1461
+ }
1462
+
1463
+ OPENCV_HAL_IMPL_RVV_ROTATE_FP(v_float32, f32, VTraits<v_float32>::vlanes())
1464
+ #if CV_SIMD_SCALABLE_64F
1465
+ OPENCV_HAL_IMPL_RVV_ROTATE_FP(v_float64, f64, VTraits<v_float64>::vlanes())
1466
+ #endif
1467
+
1468
+ ////////////// Convert to float //////////////
1469
+ inline v_float32 v_cvt_f32(const v_int32 &a) {
1470
+ return __riscv_vfcvt_f_x_v_f32m2(a, VTraits<v_float32>::vlanes());
1471
+ }
1472
+
1473
+ #if CV_SIMD_SCALABLE_64F
1474
+ inline v_float32 v_cvt_f32(const v_float64 &a) {
1475
+ return __riscv_vfncvt_f(__riscv_vlmul_ext_f64m4(a),
1476
+ VTraits<v_float64>::vlanes());
1477
+ }
1478
+
1479
+ inline v_float32 v_cvt_f32(const v_float64 &a, const v_float64 &b) {
1480
+ return __riscv_vfncvt_f(__riscv_vset(__riscv_vlmul_ext_f64m4(a), 1, b),
1481
+ VTraits<v_float32>::vlanes());
1482
+ }
1483
+
1484
+ inline v_float64 v_cvt_f64(const v_int32 &a) {
1485
+ return __riscv_vget_f64m2(__riscv_vfwcvt_f(a, VTraits<v_int32>::vlanes()), 0);
1486
+ }
1487
+
1488
+ inline v_float64 v_cvt_f64_high(const v_int32 &a) {
1489
+ return __riscv_vget_f64m2(__riscv_vfwcvt_f(a, VTraits<v_int32>::vlanes()), 1);
1490
+ }
1491
+
1492
+ inline v_float64 v_cvt_f64(const v_float32 &a) {
1493
+ return __riscv_vget_f64m2(__riscv_vfwcvt_f(a, VTraits<v_float32>::vlanes()),
1494
+ 0);
1495
+ }
1496
+
1497
+ inline v_float64 v_cvt_f64_high(const v_float32 &a) {
1498
+ return __riscv_vget_f64m2(__riscv_vfwcvt_f(a, VTraits<v_float32>::vlanes()),
1499
+ 1);
1500
+ }
1501
+
1502
+ inline v_float64 v_cvt_f64(const v_int64 &a) {
1503
+ return __riscv_vfcvt_f(a, VTraits<v_int64>::vlanes());
1504
+ }
1505
+ #endif
1506
+
1507
+ //////////// Broadcast //////////////
1508
+
1509
+ #define OPENCV_HAL_IMPL_RVV_BROADCAST(_Tpvec, suffix) \
1510
+ template <int s = 0> \
1511
+ inline _Tpvec v_broadcast_element(_Tpvec v, int i = s) { \
1512
+ return v_setall_##suffix(v_extract_n(v, i)); \
1513
+ } \
1514
+ inline _Tpvec v_broadcast_highest(_Tpvec v) { \
1515
+ return v_setall_##suffix(v_extract_n(v, VTraits<_Tpvec>::vlanes() - 1)); \
1516
+ }
1517
+
1518
+ OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint32, u32)
1519
+ OPENCV_HAL_IMPL_RVV_BROADCAST(v_int32, s32)
1520
+ OPENCV_HAL_IMPL_RVV_BROADCAST(v_float32, f32)
1521
+
1522
+ ////////////// Reverse //////////////
1523
+ #define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, width) \
1524
+ inline _Tpvec v_reverse(const _Tpvec &a) { \
1525
+ vuint##width##m2_t vidx = __riscv_vrsub( \
1526
+ __riscv_vid_v_u##width##m2(VTraits<_Tpvec>::vlanes()), \
1527
+ VTraits<_Tpvec>::vlanes() - 1, VTraits<_Tpvec>::vlanes()); \
1528
+ return __riscv_vrgather(a, vidx, VTraits<_Tpvec>::vlanes()); \
1529
+ }
1530
+ OPENCV_HAL_IMPL_RVV_REVERSE(v_uint8, 8)
1531
+ OPENCV_HAL_IMPL_RVV_REVERSE(v_int8, 8)
1532
+ OPENCV_HAL_IMPL_RVV_REVERSE(v_uint16, 16)
1533
+ OPENCV_HAL_IMPL_RVV_REVERSE(v_int16, 16)
1534
+ OPENCV_HAL_IMPL_RVV_REVERSE(v_uint32, 32)
1535
+ OPENCV_HAL_IMPL_RVV_REVERSE(v_int32, 32)
1536
+ OPENCV_HAL_IMPL_RVV_REVERSE(v_float32, 32)
1537
+ OPENCV_HAL_IMPL_RVV_REVERSE(v_uint64, 64)
1538
+ OPENCV_HAL_IMPL_RVV_REVERSE(v_int64, 64)
1539
+ #if CV_SIMD_SCALABLE_64F
1540
+ OPENCV_HAL_IMPL_RVV_REVERSE(v_float64, 64)
1541
+ #endif
1542
+
1543
+ //////////// Value reordering ////////////
1544
+
1545
+ #define OPENCV_HAL_IMPL_RVV_EXPAND(_Tp, _Tpwvec, _Tpwvec_m2, _Tpvec, width, \
1546
+ suffix, suffix2, cvt) \
1547
+ inline void v_expand(const _Tpvec &a, _Tpwvec &b0, _Tpwvec &b1) { \
1548
+ _Tpwvec_m2 temp = cvt(a, VTraits<_Tpvec>::vlanes()); \
1549
+ b0 = __riscv_vget_##suffix##m2(temp, 0); \
1550
+ b1 = __riscv_vget_##suffix##m2(temp, 1); \
1551
+ } \
1552
+ inline _Tpwvec v_expand_low(const _Tpvec &a) { \
1553
+ _Tpwvec_m2 temp = cvt(a, VTraits<_Tpvec>::vlanes()); \
1554
+ return __riscv_vget_##suffix##m2(temp, 0); \
1555
+ } \
1556
+ inline _Tpwvec v_expand_high(const _Tpvec &a) { \
1557
+ _Tpwvec_m2 temp = cvt(a, VTraits<_Tpvec>::vlanes()); \
1558
+ return __riscv_vget_##suffix##m2(temp, 1); \
1559
+ } \
1560
+ inline _Tpwvec v_load_expand(const _Tp *ptr) { \
1561
+ return cvt( \
1562
+ __riscv_vle##width##_v_##suffix2##m1(ptr, VTraits<_Tpvec>::vlanes()), \
1563
+ VTraits<_Tpvec>::vlanes()); \
1564
+ }
1565
+
1566
+ OPENCV_HAL_IMPL_RVV_EXPAND(uchar, v_uint16, vuint16m4_t, v_uint8, 8, u16, u8,
1567
+ __riscv_vwcvtu_x)
1568
+ OPENCV_HAL_IMPL_RVV_EXPAND(schar, v_int16, vint16m4_t, v_int8, 8, i16, i8,
1569
+ __riscv_vwcvt_x)
1570
+ OPENCV_HAL_IMPL_RVV_EXPAND(ushort, v_uint32, vuint32m4_t, v_uint16, 16, u32,
1571
+ u16, __riscv_vwcvtu_x)
1572
+ OPENCV_HAL_IMPL_RVV_EXPAND(short, v_int32, vint32m4_t, v_int16, 16, i32, i16,
1573
+ __riscv_vwcvt_x)
1574
+ OPENCV_HAL_IMPL_RVV_EXPAND(uint, v_uint64, vuint64m4_t, v_uint32, 32, u64, u32,
1575
+ __riscv_vwcvtu_x)
1576
+ OPENCV_HAL_IMPL_RVV_EXPAND(int, v_int64, vint64m4_t, v_int32, 32, i64, i32,
1577
+ __riscv_vwcvt_x)
1578
+
1579
+ inline v_uint32 v_load_expand_q(const uchar *ptr) {
1580
+ return __riscv_vwcvtu_x(
1581
+ __riscv_vwcvtu_x(__riscv_vle8_v_u8mf2(ptr, VTraits<v_uint32>::vlanes()),
1582
+ VTraits<v_uint32>::vlanes()),
1583
+ VTraits<v_uint32>::vlanes());
1584
+ }
1585
+
1586
+ inline v_int32 v_load_expand_q(const schar *ptr) {
1587
+ return __riscv_vwcvt_x(
1588
+ __riscv_vwcvt_x(__riscv_vle8_v_i8mf2(ptr, VTraits<v_int32>::vlanes()),
1589
+ VTraits<v_int32>::vlanes()),
1590
+ VTraits<v_int32>::vlanes());
1591
+ }
1592
+
1593
+ #define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, hwidth, hsuffix, \
1594
+ suffix, rshr, shr) \
1595
+ inline _Tpvec v_pack(const _wTpvec &a, const _wTpvec &b) { \
1596
+ return shr(__riscv_vset(__riscv_vlmul_ext_##suffix##m4(a), 1, b), 0, 0, \
1597
+ VTraits<_Tpvec>::vlanes()); \
1598
+ } \
1599
+ inline void v_pack_store(_Tp *ptr, const _wTpvec &a) { \
1600
+ __riscv_vse##hwidth##_v_##hsuffix##m1( \
1601
+ ptr, shr(a, 0, 0, VTraits<_Tpvec>::vlanes()), \
1602
+ VTraits<_wTpvec>::vlanes()); \
1603
+ } \
1604
+ template <int n = 0> \
1605
+ inline _Tpvec v_rshr_pack(const _wTpvec &a, const _wTpvec &b, int N = n) { \
1606
+ return rshr(__riscv_vset(__riscv_vlmul_ext_##suffix##m4(a), 1, b), N, 0, \
1607
+ VTraits<_Tpvec>::vlanes()); \
1608
+ } \
1609
+ template <int n = 0> \
1610
+ inline void v_rshr_pack_store(_Tp *ptr, const _wTpvec &a, int N = n) { \
1611
+ __riscv_vse##hwidth##_v_##hsuffix##m1( \
1612
+ ptr, rshr(a, N, 0, VTraits<_Tpvec>::vlanes()), \
1613
+ VTraits<_wTpvec>::vlanes()); \
1614
+ }
1615
+
1616
+ #define OPENCV_HAL_IMPL_RVV_PACK_32(_Tpvec, _Tp, _wTpvec, hwidth, hsuffix, \
1617
+ suffix, rshr, shr) \
1618
+ inline _Tpvec v_pack(const _wTpvec &a, const _wTpvec &b) { \
1619
+ return shr(__riscv_vset(__riscv_vlmul_ext_##suffix##m4(a), 1, b), 0, \
1620
+ VTraits<_Tpvec>::vlanes()); \
1621
+ } \
1622
+ inline void v_pack_store(_Tp *ptr, const _wTpvec &a) { \
1623
+ __riscv_vse##hwidth##_v_##hsuffix##m1( \
1624
+ ptr, shr(a, 0, VTraits<_Tpvec>::vlanes()), \
1625
+ VTraits<_wTpvec>::vlanes()); \
1626
+ } \
1627
+ template <int n = 0> \
1628
+ inline _Tpvec v_rshr_pack(const _wTpvec &a, const _wTpvec &b, int N = n) { \
1629
+ return rshr(__riscv_vset(__riscv_vlmul_ext_##suffix##m4(a), 1, b), N, 0, \
1630
+ VTraits<_Tpvec>::vlanes()); \
1631
+ } \
1632
+ template <int n = 0> \
1633
+ inline void v_rshr_pack_store(_Tp *ptr, const _wTpvec &a, int N = n) { \
1634
+ __riscv_vse##hwidth##_v_##hsuffix##m1( \
1635
+ ptr, rshr(a, N, 0, VTraits<_Tpvec>::vlanes()), \
1636
+ VTraits<_wTpvec>::vlanes()); \
1637
+ }
1638
+
1639
+ OPENCV_HAL_IMPL_RVV_PACK(v_uint8, uchar, v_uint16, 8, u8, u16, __riscv_vnclipu,
1640
+ __riscv_vnclipu)
1641
+ OPENCV_HAL_IMPL_RVV_PACK(v_int8, schar, v_int16, 8, i8, i16, __riscv_vnclip,
1642
+ __riscv_vnclip)
1643
+ OPENCV_HAL_IMPL_RVV_PACK(v_uint16, ushort, v_uint32, 16, u16, u32,
1644
+ __riscv_vnclipu, __riscv_vnclipu)
1645
+ OPENCV_HAL_IMPL_RVV_PACK(v_int16, short, v_int32, 16, i16, i32, __riscv_vnclip,
1646
+ __riscv_vnclip)
1647
+ OPENCV_HAL_IMPL_RVV_PACK_32(v_uint32, unsigned, v_uint64, 32, u32, u64,
1648
+ __riscv_vnclipu, __riscv_vnsrl)
1649
+ OPENCV_HAL_IMPL_RVV_PACK_32(v_int32, int, v_int64, 32, i32, i64, __riscv_vnclip,
1650
+ __riscv_vnsra)
1651
+
1652
+ #define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, hwidth, width, \
1653
+ hsuffix, suffix, cast, hvl, vl) \
1654
+ inline _Tpvec v_pack_u(const _wTpvec &a, const _wTpvec &b) { \
1655
+ return __riscv_vnclipu( \
1656
+ cast(__riscv_vmax( \
1657
+ __riscv_vset(__riscv_vlmul_ext_##suffix##m4(a), 1, b), 0, vl)), \
1658
+ 0, 0, vl); \
1659
+ } \
1660
+ inline void v_pack_u_store(_Tp *ptr, const _wTpvec &a) { \
1661
+ __riscv_vse##hwidth##_v_##hsuffix##m1( \
1662
+ ptr, \
1663
+ __riscv_vnclipu( \
1664
+ __riscv_vreinterpret_u##width##m2(__riscv_vmax(a, 0, vl)), 0, 0, \
1665
+ vl), \
1666
+ hvl); \
1667
+ } \
1668
+ template <int N = 0> \
1669
+ inline _Tpvec v_rshr_pack_u(const _wTpvec &a, const _wTpvec &b, int n = N) { \
1670
+ return __riscv_vnclipu( \
1671
+ cast(__riscv_vmax( \
1672
+ __riscv_vset(__riscv_vlmul_ext_##suffix##m4(a), 1, b), 0, vl)), \
1673
+ n, 0, vl); \
1674
+ } \
1675
+ template <int N = 0> \
1676
+ inline void v_rshr_pack_u_store(_Tp *ptr, const _wTpvec &a, int n = N) { \
1677
+ __riscv_vse##hwidth##_v_##hsuffix##m1( \
1678
+ ptr, \
1679
+ __riscv_vnclipu( \
1680
+ __riscv_vreinterpret_u##width##m2(__riscv_vmax(a, 0, vl)), n, 0, \
1681
+ vl), \
1682
+ hvl); \
1683
+ }
1684
+
1685
+ OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8, uchar, v_int16, short, 8, 16, u8, i16,
1686
+ __riscv_vreinterpret_v_i16m4_u16m4,
1687
+ VTraits<v_int16>::vlanes(),
1688
+ VTraits<v_uint8>::vlanes())
1689
+ OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16, ushort, v_int32, int, 16, 32, u16, i32,
1690
+ __riscv_vreinterpret_v_i32m4_u32m4,
1691
+ VTraits<v_int32>::vlanes(),
1692
+ VTraits<v_uint16>::vlanes())
1693
+
1694
+ /* void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
1695
+ a0 = {A1 A2 A3 A4}
1696
+ a1 = {B1 B2 B3 B4}
1697
+ ---------------
1698
+ {A1 B1 A2 B2} and {A3 B3 A4 B4}
1699
+ */
1700
+
1701
+ #define OPENCV_HAL_IMPL_RVV_ZIP(_Tpvec, _wTpvec, suffix, width, width2, \
1702
+ convert2um2, convert2um1) \
1703
+ inline void v_zip(const _Tpvec &a0, const _Tpvec &a1, _Tpvec &b0, \
1704
+ _Tpvec &b1) { \
1705
+ _wTpvec temp = __riscv_vreinterpret_##suffix##m4(convert2um2(__riscv_vor( \
1706
+ __riscv_vzext_vf2(convert2um1(a0), VTraits<_Tpvec>::vlanes() * 2), \
1707
+ __riscv_vreinterpret_u##width2##m4(__riscv_vslide1up( \
1708
+ __riscv_vreinterpret_u##width##m4(__riscv_vzext_vf2( \
1709
+ convert2um1(a1), VTraits<_Tpvec>::vlanes() * 2)), \
1710
+ 0, VTraits<_Tpvec>::vlanes() * 2)), \
1711
+ VTraits<_Tpvec>::vlanes()))); \
1712
+ b0 = __riscv_vget_##suffix##m2(temp, 0); \
1713
+ b1 = __riscv_vget_##suffix##m2(temp, 1); \
1714
+ }
1715
+ OPENCV_HAL_IMPL_RVV_ZIP(v_uint8, vuint8m4_t, u8, 8, 16, OPENCV_HAL_NOP,
1716
+ OPENCV_HAL_NOP)
1717
+ OPENCV_HAL_IMPL_RVV_ZIP(v_int8, vint8m4_t, i8, 8, 16, __riscv_vreinterpret_u8m4,
1718
+ __riscv_vreinterpret_u8m2)
1719
+ OPENCV_HAL_IMPL_RVV_ZIP(v_uint16, vuint16m4_t, u16, 16, 32, OPENCV_HAL_NOP,
1720
+ OPENCV_HAL_NOP)
1721
+ OPENCV_HAL_IMPL_RVV_ZIP(v_int16, vint16m4_t, i16, 16, 32,
1722
+ __riscv_vreinterpret_u16m4, __riscv_vreinterpret_u16m2)
1723
+ OPENCV_HAL_IMPL_RVV_ZIP(v_uint32, vuint32m4_t, u32, 32, 64, OPENCV_HAL_NOP,
1724
+ OPENCV_HAL_NOP)
1725
+ OPENCV_HAL_IMPL_RVV_ZIP(v_int32, vint32m4_t, i32, 32, 64,
1726
+ __riscv_vreinterpret_u32m4, __riscv_vreinterpret_u32m2)
1727
+ OPENCV_HAL_IMPL_RVV_ZIP(v_float32, vfloat32m4_t, f32, 32, 64,
1728
+ __riscv_vreinterpret_u32m4, __riscv_vreinterpret_u32m2)
1729
+
1730
+ #if CV_SIMD_SCALABLE_64F
1731
+ inline void v_zip(const v_float64 &a0, const v_float64 &a1, v_float64 &b0,
1732
+ v_float64 &b1) {
1733
+ vuint16mf2_t idx0 = __riscv_vid_v_u16mf2(VTraits<v_float64>::vlanes());
1734
+ vuint16mf2_t idx1 = __riscv_vadd(idx0, VTraits<v_float64>::vlanes(),
1735
+ VTraits<v_float64>::vlanes());
1736
+ vuint16m1_t idx = __riscv_vreinterpret_u16m1(
1737
+ (__riscv_vor(__riscv_vzext_vf2(idx0, VTraits<v_float64>::vlanes()),
1738
+ __riscv_vreinterpret_u32m1(__riscv_vslide1up(
1739
+ __riscv_vreinterpret_u16m1(__riscv_vzext_vf2(
1740
+ idx1, VTraits<v_float64>::vlanes())),
1741
+ 0, VTraits<v_uint32>::vlanes())),
1742
+ VTraits<v_uint32>::vlanes())));
1743
+ #if 0
1744
+ vfloat64m4_t temp = __riscv_vcreate_v_f64m2_f64m4(a0, a1);
1745
+ #else // TODO: clean up when RVV Intrinsic is frozen.
1746
+ vfloat64m4_t temp = __riscv_vlmul_ext_f64m4(a0);
1747
+ temp = __riscv_vset(temp, 1, a1);
1748
+ #endif
1749
+ temp = __riscv_vrgatherei16(temp, idx, VTraits<v_float64>::vlanes() * 2);
1750
+ b0 = __riscv_vget_f64m2(temp, 0);
1751
+ b1 = __riscv_vget_f64m2(temp, 1);
1752
+ }
1753
+ #endif
1754
+
1755
+ #define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, width) \
1756
+ inline _Tpvec v_combine_low(const _Tpvec &a, const _Tpvec &b) { \
1757
+ return __riscv_vslideup(a, b, VTraits<_Tpvec>::vlanes() / 2, \
1758
+ VTraits<_Tpvec>::vlanes()); \
1759
+ } \
1760
+ inline _Tpvec v_combine_high(const _Tpvec &a, const _Tpvec &b) { \
1761
+ return __riscv_vslideup( \
1762
+ __riscv_vslidedown(a, VTraits<_Tpvec>::vlanes() / 2, \
1763
+ VTraits<_Tpvec>::vlanes()), \
1764
+ __riscv_vslidedown(b, VTraits<_Tpvec>::vlanes() / 2, \
1765
+ VTraits<_Tpvec>::vlanes()), \
1766
+ VTraits<_Tpvec>::vlanes() / 2, VTraits<_Tpvec>::vlanes()); \
1767
+ } \
1768
+ inline void v_recombine(const _Tpvec &a, const _Tpvec &b, _Tpvec &c, \
1769
+ _Tpvec &d) { \
1770
+ c = v_combine_low(a, b); \
1771
+ d = v_combine_high(a, b); \
1772
+ }
1773
+
1774
+ OPENCV_HAL_IMPL_RVV_UNPACKS(v_uint8, 8)
1775
+ OPENCV_HAL_IMPL_RVV_UNPACKS(v_int8, 8)
1776
+ OPENCV_HAL_IMPL_RVV_UNPACKS(v_uint16, 16)
1777
+ OPENCV_HAL_IMPL_RVV_UNPACKS(v_int16, 16)
1778
+ OPENCV_HAL_IMPL_RVV_UNPACKS(v_uint32, 32)
1779
+ OPENCV_HAL_IMPL_RVV_UNPACKS(v_int32, 32)
1780
+ OPENCV_HAL_IMPL_RVV_UNPACKS(v_float32, 32)
1781
+ #if CV_SIMD_SCALABLE_64F
1782
+ OPENCV_HAL_IMPL_RVV_UNPACKS(v_float64, 64)
1783
+ #endif
1784
+
1785
+ #define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp, suffix, width, hwidth, \
1786
+ vl) \
1787
+ inline void v_load_deinterleave(const _Tp *ptr, v_##_Tpvec &a, \
1788
+ v_##_Tpvec &b) { \
1789
+ a = __riscv_vlse##width##_v_##suffix##m2(ptr, sizeof(_Tp) * 2, \
1790
+ VTraits<v_##_Tpvec>::vlanes()); \
1791
+ b = __riscv_vlse##width##_v_##suffix##m2(ptr + 1, sizeof(_Tp) * 2, \
1792
+ VTraits<v_##_Tpvec>::vlanes()); \
1793
+ } \
1794
+ inline void v_load_deinterleave(const _Tp *ptr, v_##_Tpvec &a, \
1795
+ v_##_Tpvec &b, v_##_Tpvec &c) { \
1796
+ a = __riscv_vlse##width##_v_##suffix##m2(ptr, sizeof(_Tp) * 3, \
1797
+ VTraits<v_##_Tpvec>::vlanes()); \
1798
+ b = __riscv_vlse##width##_v_##suffix##m2(ptr + 1, sizeof(_Tp) * 3, \
1799
+ VTraits<v_##_Tpvec>::vlanes()); \
1800
+ c = __riscv_vlse##width##_v_##suffix##m2(ptr + 2, sizeof(_Tp) * 3, \
1801
+ VTraits<v_##_Tpvec>::vlanes()); \
1802
+ } \
1803
+ inline void v_load_deinterleave(const _Tp *ptr, v_##_Tpvec &a, \
1804
+ v_##_Tpvec &b, v_##_Tpvec &c, \
1805
+ v_##_Tpvec &d) { \
1806
+ \
1807
+ a = __riscv_vlse##width##_v_##suffix##m2(ptr, sizeof(_Tp) * 4, \
1808
+ VTraits<v_##_Tpvec>::vlanes()); \
1809
+ b = __riscv_vlse##width##_v_##suffix##m2(ptr + 1, sizeof(_Tp) * 4, \
1810
+ VTraits<v_##_Tpvec>::vlanes()); \
1811
+ c = __riscv_vlse##width##_v_##suffix##m2(ptr + 2, sizeof(_Tp) * 4, \
1812
+ VTraits<v_##_Tpvec>::vlanes()); \
1813
+ d = __riscv_vlse##width##_v_##suffix##m2(ptr + 3, sizeof(_Tp) * 4, \
1814
+ VTraits<v_##_Tpvec>::vlanes()); \
1815
+ } \
1816
+ inline void v_store_interleave( \
1817
+ _Tp *ptr, const v_##_Tpvec &a, const v_##_Tpvec &b, \
1818
+ hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) { \
1819
+ __riscv_vsse##width(ptr, sizeof(_Tp) * 2, a, \
1820
+ VTraits<v_##_Tpvec>::vlanes()); \
1821
+ __riscv_vsse##width(ptr + 1, sizeof(_Tp) * 2, b, \
1822
+ VTraits<v_##_Tpvec>::vlanes()); \
1823
+ } \
1824
+ inline void v_store_interleave( \
1825
+ _Tp *ptr, const v_##_Tpvec &a, const v_##_Tpvec &b, const v_##_Tpvec &c, \
1826
+ hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) { \
1827
+ __riscv_vsse##width(ptr, sizeof(_Tp) * 3, a, \
1828
+ VTraits<v_##_Tpvec>::vlanes()); \
1829
+ __riscv_vsse##width(ptr + 1, sizeof(_Tp) * 3, b, \
1830
+ VTraits<v_##_Tpvec>::vlanes()); \
1831
+ __riscv_vsse##width(ptr + 2, sizeof(_Tp) * 3, c, \
1832
+ VTraits<v_##_Tpvec>::vlanes()); \
1833
+ } \
1834
+ inline void v_store_interleave( \
1835
+ _Tp *ptr, const v_##_Tpvec &a, const v_##_Tpvec &b, const v_##_Tpvec &c, \
1836
+ const v_##_Tpvec &d, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) { \
1837
+ __riscv_vsse##width(ptr, sizeof(_Tp) * 4, a, \
1838
+ VTraits<v_##_Tpvec>::vlanes()); \
1839
+ __riscv_vsse##width(ptr + 1, sizeof(_Tp) * 4, b, \
1840
+ VTraits<v_##_Tpvec>::vlanes()); \
1841
+ __riscv_vsse##width(ptr + 2, sizeof(_Tp) * 4, c, \
1842
+ VTraits<v_##_Tpvec>::vlanes()); \
1843
+ __riscv_vsse##width(ptr + 3, sizeof(_Tp) * 4, d, \
1844
+ VTraits<v_##_Tpvec>::vlanes()); \
1845
+ }
1846
+
1847
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint8, uchar, u8, 8, 4,
1848
+ VTraits<v_uint8>::vlanes())
1849
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED(int8, schar, i8, 8, 4,
1850
+ VTraits<v_int8>::vlanes())
1851
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint16, ushort, u16, 16, 8,
1852
+ VTraits<v_uint16>::vlanes())
1853
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED(int16, short, i16, 16, 8,
1854
+ VTraits<v_int16>::vlanes())
1855
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint32, unsigned, u32, 32, 16,
1856
+ VTraits<v_uint32>::vlanes())
1857
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED(int32, int, i32, 32, 16,
1858
+ VTraits<v_int32>::vlanes())
1859
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED(float32, float, f32, 32, 16,
1860
+ VTraits<v_float32>::vlanes())
1861
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint64, uint64, u64, 64, 32,
1862
+ VTraits<v_uint64>::vlanes())
1863
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED(int64, int64, i64, 64, 32,
1864
+ VTraits<v_int64>::vlanes())
1865
+ #if CV_SIMD_SCALABLE_64F
1866
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED(float64, double, f64, 64, 32,
1867
+ VTraits<v_float64>::vlanes())
1868
+ #endif
1869
+
1870
+ static uint64_t idx_interleave_pairs[] = {
1871
+ 0x0705060403010200, 0x0f0d0e0c0b090a08, 0x1715161413111210,
1872
+ 0x1f1d1e1c1b191a18, 0x2725262423212220, 0x2f2d2e2c2b292a28,
1873
+ 0x3735363433313230, 0x3f3d3e3c3b393a38, 0x4745464443414240,
1874
+ 0x4f4d4e4c4b494a48, 0x5755565453515250, 0x5f5d5e5c5b595a58,
1875
+ 0x6765666463616260, 0x6f6d6e6c6b696a68, 0x7775767473717270,
1876
+ 0x7f7d7e7c7b797a78};
1877
+
1878
+ static uint64_t idx_interleave_quads[] = {
1879
+ 0x0703060205010400, 0x0f0b0e0a0d090c08, 0x1713161215111410,
1880
+ 0x1f1b1e1a1d191c18, 0x2723262225212420, 0x2f2b2e2a2d292c28,
1881
+ 0x3733363235313430, 0x3f3b3e3a3d393c38, 0x4743464245414440,
1882
+ 0x4f4b4e4a4d494c48, 0x5753565255515450, 0x5f5b5e5a5d595c58,
1883
+ 0x6763666265616460, 0x6f6b6e6a6d696c68, 0x7773767275717470,
1884
+ 0x7f7b7e7a7d797c78};
1885
+
1886
+ #define OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(_Tpvec, func) \
1887
+ inline _Tpvec v_interleave_##func(const _Tpvec &vec) { \
1888
+ CV_CheckLE( \
1889
+ VTraits<_Tpvec>::vlanes(), VTraits<_Tpvec>::max_nlanes, \
1890
+ "RVV implementation only supports VLEN in the range [128, 1024]"); \
1891
+ vuint8m2_t vidx = __riscv_vundefined_u8m2(); \
1892
+ vidx = __riscv_vreinterpret_u8m2( \
1893
+ __riscv_vle64_v_u64m2(idx_interleave_##func, 16)); \
1894
+ return __riscv_vrgather(vec, vidx, VTraits<v_uint8>::vlanes()); \
1895
+ }
1896
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_uint8, pairs)
1897
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_int8, pairs)
1898
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_uint8, quads)
1899
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ_NOEXPEND(v_int8, quads)
1900
+
1901
+ #define OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(_Tpvec, width, vzext_vfx, func) \
1902
+ inline _Tpvec v_interleave_##func(const _Tpvec &vec) { \
1903
+ CV_CheckLE( \
1904
+ VTraits<_Tpvec>::vlanes(), VTraits<_Tpvec>::max_nlanes, \
1905
+ "RVV implementation only supports VLEN in the range [128, 1024]"); \
1906
+ vuint##width##m2_t vidx = __riscv_vundefined_u##width##m2(); \
1907
+ vidx = __riscv_vget_u##width##m2( \
1908
+ vzext_vfx(__riscv_vreinterpret_u8m2( \
1909
+ __riscv_vle64_v_u64m2(idx_interleave_##func, 16)), \
1910
+ VTraits<v_uint8>::vlanes()), \
1911
+ 0); \
1912
+ return __riscv_vrgather(vec, vidx, VTraits<_Tpvec>::vlanes()); \
1913
+ }
1914
+
1915
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint16, 16, __riscv_vzext_vf2, pairs)
1916
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int16, 16, __riscv_vzext_vf2, pairs)
1917
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint32, 32, __riscv_vzext_vf4, pairs)
1918
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int32, 32, __riscv_vzext_vf4, pairs)
1919
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_float32, 32, __riscv_vzext_vf4, pairs)
1920
+
1921
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint16, 16, __riscv_vzext_vf2, quads)
1922
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int16, 16, __riscv_vzext_vf2, quads)
1923
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_uint32, 32, __riscv_vzext_vf4, quads)
1924
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_int32, 32, __riscv_vzext_vf4, quads)
1925
+ OPENCV_HAL_IMPL_RVV_INTERLEAVED_PQ(v_float32, 32, __riscv_vzext_vf4, quads)
1926
+
1927
+ //////////// PopCount //////////
1928
+ static const unsigned char popCountTable[256] = {
1929
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
1930
+ 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1931
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
1932
+ 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1933
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
1934
+ 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1935
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
1936
+ 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1937
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
1938
+ 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1939
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
1940
+ };
1941
+ #define OPENCV_HAL_IMPL_RVV_HADD(_Tpvec, _Tpvec2, _Tm2, width, width2, suffix, \
1942
+ add) \
1943
+ static inline _Tpvec2 v_hadd(_Tpvec a) { \
1944
+ vuint##width2##m2_t oneX2 = \
1945
+ __riscv_vmv_v_x_u##width2##m2(1, VTraits<v_uint##width2>::vlanes()); \
1946
+ vuint##width##m2_t one = __riscv_vreinterpret_u##width##m2(oneX2); \
1947
+ _Tm2 res = \
1948
+ add(a, __riscv_vslide1down(a, 0, VTraits<v_uint##width>::vlanes()), \
1949
+ VTraits<v_uint##width>::vlanes()); \
1950
+ return __riscv_vget_##suffix##m2( \
1951
+ __riscv_vcompress( \
1952
+ res, __riscv_vmseq(one, 1, VTraits<v_uint##width>::vlanes()), \
1953
+ VTraits<v_uint##width>::vlanes()), \
1954
+ 0); \
1955
+ }
1956
+ OPENCV_HAL_IMPL_RVV_HADD(v_uint8, v_uint16, vuint16m4_t, 8, 16, u16,
1957
+ __riscv_vwaddu_vv)
1958
+ OPENCV_HAL_IMPL_RVV_HADD(v_uint16, v_uint32, vuint32m4_t, 16, 32, u32,
1959
+ __riscv_vwaddu_vv)
1960
+ OPENCV_HAL_IMPL_RVV_HADD(v_uint32, v_uint64, vuint64m4_t, 32, 64, u64,
1961
+ __riscv_vwaddu_vv)
1962
+ OPENCV_HAL_IMPL_RVV_HADD(v_int8, v_int16, vint16m4_t, 8, 16, i16,
1963
+ __riscv_vwadd_vv)
1964
+ OPENCV_HAL_IMPL_RVV_HADD(v_int16, v_int32, vint32m4_t, 16, 32, i32,
1965
+ __riscv_vwadd_vv)
1966
+ OPENCV_HAL_IMPL_RVV_HADD(v_int32, v_int64, vint64m4_t, 32, 64, i64,
1967
+ __riscv_vwadd_vv)
1968
+
1969
+ OPENCV_HAL_IMPL_RVV_HADD(vint32m4_t, v_int32, vint32m4_t, 16, 32, i32,
1970
+ __riscv_vadd)
1971
+ OPENCV_HAL_IMPL_RVV_HADD(vint64m4_t, v_int64, vint64m4_t, 32, 64, i64,
1972
+ __riscv_vadd)
1973
+
1974
+ inline v_uint8 v_popcount(const v_uint8 &a) {
1975
+ return __riscv_vloxei8(popCountTable, a, VTraits<v_uint8>::vlanes());
1976
+ }
1977
+ inline v_uint16 v_popcount(const v_uint16 &a) {
1978
+ return v_hadd(v_popcount(__riscv_vreinterpret_u8m2(a)));
1979
+ }
1980
+ inline v_uint32 v_popcount(const v_uint32 &a) {
1981
+ return v_hadd(v_hadd(v_popcount(__riscv_vreinterpret_u8m2(a))));
1982
+ }
1983
+ inline v_uint64 v_popcount(const v_uint64 &a) {
1984
+ return v_hadd(v_hadd(v_hadd(v_popcount(__riscv_vreinterpret_u8m2(a)))));
1985
+ }
1986
+
1987
+ inline v_uint8 v_popcount(const v_int8 &a) { return v_popcount(v_abs(a)); }
1988
+ inline v_uint16 v_popcount(const v_int16 &a) { return v_popcount(v_abs(a)); }
1989
+ inline v_uint32 v_popcount(const v_int32 &a) { return v_popcount(v_abs(a)); }
1990
+ inline v_uint64 v_popcount(const v_int64 &a) {
1991
+ // max(0 - a) is used, since v_abs does not support 64-bit integers.
1992
+ return v_popcount(v_reinterpret_as_u64(
1993
+ __riscv_vmax(a, v_sub(v_setzero_s64(), a), VTraits<v_int64>::vlanes())));
1994
+ }
1995
+
1996
+ //////////// SignMask ////////////
1997
+ #define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec) \
1998
+ inline int v_signmask(const _Tpvec &a) { \
1999
+ uint8_t ans[4] = {0}; \
2000
+ __riscv_vsm(ans, __riscv_vmslt(a, 0, VTraits<_Tpvec>::vlanes()), \
2001
+ VTraits<_Tpvec>::vlanes()); \
2002
+ return *(reinterpret_cast<int *>(ans)) & \
2003
+ (((__int128_t)1 << VTraits<_Tpvec>::vlanes()) - 1); \
2004
+ } \
2005
+ inline int v_scan_forward(const _Tpvec &a) { \
2006
+ return (int)__riscv_vfirst(__riscv_vmslt(a, 0, VTraits<_Tpvec>::vlanes()), \
2007
+ VTraits<_Tpvec>::vlanes()); \
2008
+ }
2009
+
2010
+ OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int8)
2011
+ OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int16)
2012
+ OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int32)
2013
+ OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int64)
2014
+
2015
+ inline int64 v_signmask(const v_uint8 &a) {
2016
+ return v_signmask(v_reinterpret_as_s8(a));
2017
+ }
2018
+ inline int64 v_signmask(const v_uint16 &a) {
2019
+ return v_signmask(v_reinterpret_as_s16(a));
2020
+ }
2021
+ inline int v_signmask(const v_uint32 &a) {
2022
+ return v_signmask(v_reinterpret_as_s32(a));
2023
+ }
2024
+ inline int v_signmask(const v_float32 &a) {
2025
+ return v_signmask(v_reinterpret_as_s32(a));
2026
+ }
2027
+ inline int v_signmask(const v_uint64 &a) {
2028
+ return v_signmask(v_reinterpret_as_s64(a));
2029
+ }
2030
+ #if CV_SIMD_SCALABLE_64F
2031
+ inline int v_signmask(const v_float64 &a) {
2032
+ return v_signmask(v_reinterpret_as_s64(a));
2033
+ }
2034
+ #endif
2035
+
2036
+ //////////// Scan forward ////////////
2037
+ inline int v_scan_forward(const v_uint8 &a) {
2038
+ return v_scan_forward(v_reinterpret_as_s8(a));
2039
+ }
2040
+ inline int v_scan_forward(const v_uint16 &a) {
2041
+ return v_scan_forward(v_reinterpret_as_s16(a));
2042
+ }
2043
+ inline int v_scan_forward(const v_uint32 &a) {
2044
+ return v_scan_forward(v_reinterpret_as_s32(a));
2045
+ }
2046
+ inline int v_scan_forward(const v_float32 &a) {
2047
+ return v_scan_forward(v_reinterpret_as_s32(a));
2048
+ }
2049
+ inline int v_scan_forward(const v_uint64 &a) {
2050
+ return v_scan_forward(v_reinterpret_as_s64(a));
2051
+ }
2052
+ #if CV_SIMD_SCALABLE_64F
2053
+ inline int v_scan_forward(const v_float64 &a) {
2054
+ return v_scan_forward(v_reinterpret_as_s64(a));
2055
+ }
2056
+ #endif
2057
+
2058
+ //////////// Pack triplets ////////////
2059
+ // {A0, A1, A2, A3, B0, B1, B2, B3, C0 ...} --> {A0, A1, A2, B0, B1, B2, C0 ...}
2060
+ // mask: {0,0,0,1, ...} -> {T,T,T,F, ...}
2061
+ #define OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(_Tpvec, v_trunc) \
2062
+ inline _Tpvec v_pack_triplets(const _Tpvec &vec) { \
2063
+ size_t vl = VTraits<v_uint8>::vlanes(); \
2064
+ vuint32m2_t one = __riscv_vmv_v_x_u32m2(1, VTraits<v_uint32>::vlanes()); \
2065
+ vuint8m2_t zero = __riscv_vmv_v_x_u8m2(0, vl); \
2066
+ vuint8m2_t mask = __riscv_vreinterpret_u8m2(one); \
2067
+ return __riscv_vcompress( \
2068
+ vec, \
2069
+ __riscv_vmseq(v_trunc(__riscv_vslideup(zero, mask, 3, vl)), 0, vl), \
2070
+ VTraits<_Tpvec>::vlanes()); \
2071
+ }
2072
+
2073
+ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint8, OPENCV_HAL_NOP)
2074
+ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int8, OPENCV_HAL_NOP)
2075
+ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint16, __riscv_vlmul_trunc_u8m1)
2076
+ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int16, __riscv_vlmul_trunc_u8m1)
2077
+ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint32, __riscv_vlmul_trunc_u8mf2)
2078
+ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int32, __riscv_vlmul_trunc_u8mf2)
2079
+ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float32, __riscv_vlmul_trunc_u8mf2)
2080
+ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint64, __riscv_vlmul_trunc_u8mf4)
2081
+ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int64, __riscv_vlmul_trunc_u8mf4)
2082
+ #if CV_SIMD_SCALABLE_64F
2083
+ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float64, __riscv_vlmul_trunc_u8mf4)
2084
+ #endif
2085
+
2086
+ ////// FP16 support ///////
2087
+
2088
+ #if defined(__riscv_zfh) && __riscv_zfh
2089
+ inline v_float32 v_load_expand(const hfloat *ptr) {
2090
+ return __riscv_vfwcvt_f(
2091
+ __riscv_vle16_v_f16m1((_Float16 *)ptr, VTraits<v_float32>::vlanes()),
2092
+ VTraits<v_float32>::vlanes());
2093
+ ;
2094
+ }
2095
+
2096
+ inline void v_pack_store(hfloat *ptr, const v_float32 &v) {
2097
+ __riscv_vse16_v_f16m1(
2098
+ (_Float16 *)ptr,
2099
+ __riscv_vfncvt_f_f_w_f16m1(v, VTraits<v_float32>::vlanes()),
2100
+ VTraits<v_float32>::vlanes());
2101
+ }
2102
+ #else
2103
+ inline v_float32 v_load_expand(const hfloat *ptr) {
2104
+ float buf[32];
2105
+ for (int i = 0; i < VTraits<v_float32>::vlanes(); i++)
2106
+ buf[i] = (float)ptr[i];
2107
+ return v_load(buf);
2108
+ }
2109
+
2110
+ inline void v_pack_store(hfloat *ptr, const v_float32 &v) {
2111
+ float buf[32];
2112
+ v_store(buf, v);
2113
+ for (int i = 0; i < VTraits<v_float32>::vlanes(); i++)
2114
+ ptr[i] = hfloat(buf[i]);
2115
+ }
2116
+ #endif
2117
+ ////////////// Rounding //////////////
2118
+ inline v_int32 v_round(const v_float32 &a) {
2119
+ // return vfcvt_x(vfadd(a, 1e-6, VTraits<v_float32>::vlanes()),
2120
+ // VTraits<v_float32>::vlanes());
2121
+ return __riscv_vfcvt_x(a, VTraits<v_float32>::vlanes());
2122
+ }
2123
+
2124
+ inline v_int32 v_floor(const v_float32 &a) {
2125
+ return __riscv_vfcvt_x(
2126
+ __riscv_vfsub(a, 0.5f - 1e-5, VTraits<v_float32>::vlanes()),
2127
+ VTraits<v_float32>::vlanes());
2128
+ // return vfcvt_x(a, VTraits<v_float32>::vlanes());
2129
+ }
2130
+
2131
+ inline v_int32 v_ceil(const v_float32 &a) {
2132
+ return __riscv_vfcvt_x(
2133
+ __riscv_vfadd(a, 0.5f - 1e-5, VTraits<v_float32>::vlanes()),
2134
+ VTraits<v_float32>::vlanes());
2135
+ }
2136
+
2137
+ inline v_int32 v_trunc(const v_float32 &a) {
2138
+ return __riscv_vfcvt_rtz_x(a, VTraits<v_float32>::vlanes());
2139
+ }
2140
+ #if CV_SIMD_SCALABLE_64F
2141
+ inline v_int32 v_round(const v_float64 &a) {
2142
+ return __riscv_vfncvt_x(__riscv_vlmul_ext_f64m4(a),
2143
+ VTraits<v_float32>::vlanes());
2144
+ }
2145
+
2146
+ inline v_int32 v_round(const v_float64 &a, const v_float64 &b) {
2147
+ // return vfncvt_x(vset(vlmul_ext_f64m2(vfadd(a, 1e-6,
2148
+ // VTraits<v_float64>::vlanes())), 1, b), VTraits<v_float32>::vlanes()); Fix
2149
+ // https://github.com/opencv/opencv/issues/24746
2150
+ return __riscv_vfncvt_x(__riscv_vset(__riscv_vlmul_ext_f64m4(a), 1, b),
2151
+ VTraits<v_float32>::vlanes());
2152
+ }
2153
+
2154
+ inline v_int32 v_floor(const v_float64 &a) {
2155
+ return __riscv_vfncvt_x(__riscv_vlmul_ext_f64m4(__riscv_vfsub(
2156
+ a, 0.5f - 1e-6, VTraits<v_float64>::vlanes())),
2157
+ VTraits<v_float32>::vlanes());
2158
+ }
2159
+
2160
+ inline v_int32 v_ceil(const v_float64 &a) {
2161
+ return __riscv_vfncvt_x(__riscv_vlmul_ext_f64m4(__riscv_vfadd(
2162
+ a, 0.5f - 1e-6, VTraits<v_float64>::vlanes())),
2163
+ VTraits<v_float32>::vlanes());
2164
+ }
2165
+
2166
+ inline v_int32 v_trunc(const v_float64 &a) {
2167
+ return __riscv_vfncvt_rtz_x(__riscv_vlmul_ext_f64m4(a),
2168
+ VTraits<v_float32>::vlanes());
2169
+ }
2170
+ #endif
2171
+
2172
+ //////// Dot Product ////////
2173
+
2174
+ // 16 >> 32
2175
+ inline v_int32 v_dotprod(const v_int16 &a, const v_int16 &b) {
2176
+ vint32m4_t temp1 = __riscv_vwmul(a, b, VTraits<v_int16>::vlanes());
2177
+ return v_hadd(temp1);
2178
+ }
2179
+
2180
+ inline v_int32 v_dotprod(const v_int16 &a, const v_int16 &b, const v_int32 &c) {
2181
+ vint32m4_t temp1 = __riscv_vwmul(a, b, VTraits<v_int16>::vlanes());
2182
+ return __riscv_vadd(v_hadd(temp1), c, VTraits<v_int32>::vlanes());
2183
+ }
2184
+
2185
+ // 32 >> 64
2186
+ inline v_int64 v_dotprod(const v_int32 &a, const v_int32 &b) {
2187
+ vuint64m2_t one64 = __riscv_vmv_v_x_u64m2(1, VTraits<v_uint64>::vlanes());
2188
+ vuint32m2_t one32 = __riscv_vreinterpret_u32m2(one64);
2189
+ vbool16_t mask = __riscv_vmseq(one32, 1, VTraits<v_uint32>::vlanes());
2190
+ vint64m4_t temp1 = __riscv_vwmul(a, b, VTraits<v_int32>::vlanes());
2191
+ vint64m4_t temp2 = __riscv_vslide1down(temp1, 0, VTraits<v_int32>::vlanes());
2192
+ vint64m4_t res = __riscv_vadd(temp1, temp2, VTraits<v_int32>::vlanes());
2193
+ res = __riscv_vcompress(res, mask, VTraits<v_int32>::vlanes());
2194
+ return __riscv_vlmul_trunc_i64m2(res);
2195
+ }
2196
+ inline v_int64 v_dotprod(const v_int32 &a, const v_int32 &b, const v_int64 &c) {
2197
+ vuint64m2_t one64 = __riscv_vmv_v_x_u64m2(1, VTraits<v_uint64>::vlanes());
2198
+ vuint32m2_t one32 = __riscv_vreinterpret_u32m2(one64);
2199
+ vbool16_t mask = __riscv_vmseq(one32, 1, VTraits<v_uint32>::vlanes());
2200
+ vint64m4_t temp1 = __riscv_vwmul(a, b, VTraits<v_int32>::vlanes());
2201
+ vint64m4_t temp2 = __riscv_vslide1down(temp1, 0, VTraits<v_int32>::vlanes());
2202
+ vint64m4_t res = __riscv_vadd(temp1, temp2, VTraits<v_int32>::vlanes());
2203
+ res = __riscv_vcompress(res, mask, VTraits<v_int32>::vlanes());
2204
+ return __riscv_vadd(__riscv_vlmul_trunc_i64m2(res), c,
2205
+ VTraits<v_int64>::vlanes());
2206
+ }
2207
+
2208
+ // 8 >> 32
2209
+ inline v_uint32 v_dotprod_expand(const v_uint8 &a, const v_uint8 &b) {
2210
+ vuint32m2_t one32 = __riscv_vmv_v_x_u32m2(1, VTraits<v_uint32>::vlanes());
2211
+ vuint8m2_t one8 = __riscv_vreinterpret_u8m2(one32);
2212
+ vbool4_t mask = __riscv_vmseq(one8, 1, VTraits<v_uint8>::vlanes());
2213
+ vuint16m4_t t0 = __riscv_vwmulu(a, b, VTraits<v_uint8>::vlanes());
2214
+ vuint16m4_t t1 = __riscv_vslide1down(t0, 0, VTraits<v_uint8>::vlanes());
2215
+ vuint16m4_t t2 = __riscv_vslide1down(t1, 0, VTraits<v_uint8>::vlanes());
2216
+ vuint16m4_t t3 = __riscv_vslide1down(t2, 0, VTraits<v_uint8>::vlanes());
2217
+ vuint32m8_t res =
2218
+ __riscv_vadd(__riscv_vwaddu_vv(t2, t3, VTraits<v_uint8>::vlanes()),
2219
+ __riscv_vwaddu_vv(t0, t1, VTraits<v_uint8>::vlanes()),
2220
+ VTraits<v_uint8>::vlanes());
2221
+ res = __riscv_vcompress(res, mask, VTraits<v_uint8>::vlanes());
2222
+ return __riscv_vlmul_trunc_u32m2(res);
2223
+ }
2224
+
2225
+ inline v_uint32 v_dotprod_expand(const v_uint8 &a, const v_uint8 &b,
2226
+ const v_uint32 &c) {
2227
+ vuint32m2_t one32 = __riscv_vmv_v_x_u32m2(1, VTraits<v_uint32>::vlanes());
2228
+ vuint8m2_t one8 = __riscv_vreinterpret_u8m2(one32);
2229
+ vbool4_t mask = __riscv_vmseq(one8, 1, VTraits<v_uint8>::vlanes());
2230
+ vuint16m4_t t0 = __riscv_vwmulu(a, b, VTraits<v_uint8>::vlanes());
2231
+ vuint16m4_t t1 = __riscv_vslide1down(t0, 0, VTraits<v_uint8>::vlanes());
2232
+ vuint16m4_t t2 = __riscv_vslide1down(t1, 0, VTraits<v_uint8>::vlanes());
2233
+ vuint16m4_t t3 = __riscv_vslide1down(t2, 0, VTraits<v_uint8>::vlanes());
2234
+ vuint32m8_t res =
2235
+ __riscv_vadd(__riscv_vwaddu_vv(t2, t3, VTraits<v_uint8>::vlanes()),
2236
+ __riscv_vwaddu_vv(t0, t1, VTraits<v_uint8>::vlanes()),
2237
+ VTraits<v_uint8>::vlanes());
2238
+ res = __riscv_vcompress(res, mask, VTraits<v_uint8>::vlanes());
2239
+ return __riscv_vadd(__riscv_vlmul_trunc_u32m2(res), c,
2240
+ VTraits<v_uint8>::vlanes());
2241
+ }
2242
+
2243
+ inline v_int32 v_dotprod_expand(const v_int8 &a, const v_int8 &b) {
2244
+ vuint32m2_t one32 = __riscv_vmv_v_x_u32m2(1, VTraits<v_uint32>::vlanes());
2245
+ vuint8m2_t one8 = __riscv_vreinterpret_u8m2(one32);
2246
+ vbool4_t mask = __riscv_vmseq(one8, 1, VTraits<v_uint8>::vlanes());
2247
+ vint16m4_t t0 = __riscv_vwmul(a, b, VTraits<v_int8>::vlanes());
2248
+ vint16m4_t t1 = __riscv_vslide1down(t0, 0, VTraits<v_int8>::vlanes());
2249
+ vint16m4_t t2 = __riscv_vslide1down(t1, 0, VTraits<v_int8>::vlanes());
2250
+ vint16m4_t t3 = __riscv_vslide1down(t2, 0, VTraits<v_int8>::vlanes());
2251
+ vint32m8_t res =
2252
+ __riscv_vadd(__riscv_vwadd_vv(t2, t3, VTraits<v_int8>::vlanes()),
2253
+ __riscv_vwadd_vv(t0, t1, VTraits<v_int8>::vlanes()),
2254
+ VTraits<v_int8>::vlanes());
2255
+ res = __riscv_vcompress(res, mask, VTraits<v_int8>::vlanes());
2256
+ return __riscv_vlmul_trunc_i32m2(res);
2257
+ }
2258
+
2259
+ inline v_int32 v_dotprod_expand(const v_int8 &a, const v_int8 &b,
2260
+ const v_int32 &c) {
2261
+ vuint32m2_t one32 = __riscv_vmv_v_x_u32m2(1, VTraits<v_uint32>::vlanes());
2262
+ vuint8m2_t one8 = __riscv_vreinterpret_u8m2(one32);
2263
+ vbool4_t mask = __riscv_vmseq(one8, 1, VTraits<v_uint8>::vlanes());
2264
+ vint16m4_t t0 = __riscv_vwmul(a, b, VTraits<v_int8>::vlanes());
2265
+ vint16m4_t t1 = __riscv_vslide1down(t0, 0, VTraits<v_int8>::vlanes());
2266
+ vint16m4_t t2 = __riscv_vslide1down(t1, 0, VTraits<v_int8>::vlanes());
2267
+ vint16m4_t t3 = __riscv_vslide1down(t2, 0, VTraits<v_int8>::vlanes());
2268
+ vint32m8_t res =
2269
+ __riscv_vadd(__riscv_vwadd_vv(t2, t3, VTraits<v_int8>::vlanes()),
2270
+ __riscv_vwadd_vv(t0, t1, VTraits<v_int8>::vlanes()),
2271
+ VTraits<v_int8>::vlanes());
2272
+ res = __riscv_vcompress(res, mask, VTraits<v_int8>::vlanes());
2273
+ return __riscv_vadd(__riscv_vlmul_trunc_i32m2(res), c,
2274
+ VTraits<v_int8>::vlanes());
2275
+ }
2276
+
2277
+ // // 16 >> 64
2278
+ inline v_uint64 v_dotprod_expand(const v_uint16 &a, const v_uint16 &b) {
2279
+ vuint64m2_t one64 = __riscv_vmv_v_x_u64m2(1, VTraits<v_uint64>::vlanes());
2280
+ vuint16m2_t one16 = __riscv_vreinterpret_u16m2(one64);
2281
+ vbool8_t mask = __riscv_vmseq(one16, 1, VTraits<v_uint16>::vlanes());
2282
+ vuint32m4_t t0 = __riscv_vwmulu(a, b, VTraits<v_uint16>::vlanes());
2283
+ vuint32m4_t t1 = __riscv_vslide1down(t0, 0, VTraits<v_uint16>::vlanes());
2284
+ vuint32m4_t t2 = __riscv_vslide1down(t1, 0, VTraits<v_uint16>::vlanes());
2285
+ vuint32m4_t t3 = __riscv_vslide1down(t2, 0, VTraits<v_uint16>::vlanes());
2286
+ vuint64m8_t res =
2287
+ __riscv_vadd(__riscv_vwaddu_vv(t2, t3, VTraits<v_uint16>::vlanes()),
2288
+ __riscv_vwaddu_vv(t0, t1, VTraits<v_uint16>::vlanes()),
2289
+ VTraits<v_uint16>::vlanes());
2290
+ res = __riscv_vcompress(res, mask, VTraits<v_uint16>::vlanes());
2291
+ return __riscv_vlmul_trunc_u64m2(res);
2292
+ }
2293
+ inline v_uint64 v_dotprod_expand(const v_uint16 &a, const v_uint16 &b,
2294
+ const v_uint64 &c) {
2295
+ vuint64m2_t one64 = __riscv_vmv_v_x_u64m2(1, VTraits<v_uint64>::vlanes());
2296
+ vuint16m2_t one16 = __riscv_vreinterpret_u16m2(one64);
2297
+ vbool8_t mask = __riscv_vmseq(one16, 1, VTraits<v_uint16>::vlanes());
2298
+ vuint32m4_t t0 = __riscv_vwmulu(a, b, VTraits<v_uint16>::vlanes());
2299
+ vuint32m4_t t1 = __riscv_vslide1down(t0, 0, VTraits<v_uint16>::vlanes());
2300
+ vuint32m4_t t2 = __riscv_vslide1down(t1, 0, VTraits<v_uint16>::vlanes());
2301
+ vuint32m4_t t3 = __riscv_vslide1down(t2, 0, VTraits<v_uint16>::vlanes());
2302
+ vuint64m8_t res =
2303
+ __riscv_vadd(__riscv_vwaddu_vv(t2, t3, VTraits<v_uint16>::vlanes()),
2304
+ __riscv_vwaddu_vv(t0, t1, VTraits<v_uint16>::vlanes()),
2305
+ VTraits<v_uint16>::vlanes());
2306
+ res = __riscv_vcompress(res, mask, VTraits<v_uint16>::vlanes());
2307
+ return __riscv_vadd(__riscv_vlmul_trunc_u64m2(res), c,
2308
+ VTraits<v_uint16>::vlanes());
2309
+ }
2310
+
2311
+ inline v_int64 v_dotprod_expand(const v_int16 &a, const v_int16 &b) {
2312
+ vuint64m2_t one64 = __riscv_vmv_v_x_u64m2(1, VTraits<v_uint64>::vlanes());
2313
+ vuint16m2_t one16 = __riscv_vreinterpret_u16m2(one64);
2314
+ vbool8_t mask = __riscv_vmseq(one16, 1, VTraits<v_uint16>::vlanes());
2315
+ vint32m4_t t0 = __riscv_vwmul(a, b, VTraits<v_int16>::vlanes());
2316
+ vint32m4_t t1 = __riscv_vslide1down(t0, 0, VTraits<v_int16>::vlanes());
2317
+ vint32m4_t t2 = __riscv_vslide1down(t1, 0, VTraits<v_int16>::vlanes());
2318
+ vint32m4_t t3 = __riscv_vslide1down(t2, 0, VTraits<v_int16>::vlanes());
2319
+ vint64m8_t res =
2320
+ __riscv_vadd(__riscv_vwadd_vv(t2, t3, VTraits<v_int16>::vlanes()),
2321
+ __riscv_vwadd_vv(t0, t1, VTraits<v_int16>::vlanes()),
2322
+ VTraits<v_int16>::vlanes());
2323
+ res = __riscv_vcompress(res, mask, VTraits<v_int16>::vlanes());
2324
+ return __riscv_vlmul_trunc_i64m2(res);
2325
+ }
2326
+ inline v_int64 v_dotprod_expand(const v_int16 &a, const v_int16 &b,
2327
+ const v_int64 &c) {
2328
+ vuint64m2_t one64 = __riscv_vmv_v_x_u64m2(1, VTraits<v_uint64>::vlanes());
2329
+ vuint16m2_t one16 = __riscv_vreinterpret_u16m2(one64);
2330
+ vbool8_t mask = __riscv_vmseq(one16, 1, VTraits<v_uint16>::vlanes());
2331
+ vint32m4_t t0 = __riscv_vwmul(a, b, VTraits<v_int16>::vlanes());
2332
+ vint32m4_t t1 = __riscv_vslide1down(t0, 0, VTraits<v_int16>::vlanes());
2333
+ vint32m4_t t2 = __riscv_vslide1down(t1, 0, VTraits<v_int16>::vlanes());
2334
+ vint32m4_t t3 = __riscv_vslide1down(t2, 0, VTraits<v_int16>::vlanes());
2335
+ vint64m8_t res =
2336
+ __riscv_vadd(__riscv_vwadd_vv(t2, t3, VTraits<v_int16>::vlanes()),
2337
+ __riscv_vwadd_vv(t0, t1, VTraits<v_int16>::vlanes()),
2338
+ VTraits<v_int16>::vlanes());
2339
+ res = __riscv_vcompress(res, mask, VTraits<v_int16>::vlanes());
2340
+ return __riscv_vadd(__riscv_vlmul_trunc_i64m2(res), c,
2341
+ VTraits<v_int16>::vlanes());
2342
+ }
2343
+
2344
+ // // 32 >> 64f
2345
+ #if CV_SIMD_SCALABLE_64F
2346
+ inline v_float64 v_dotprod_expand(const v_int32 &a, const v_int32 &b) {
2347
+ return v_cvt_f64(v_dotprod(a, b));
2348
+ }
2349
+ inline v_float64 v_dotprod_expand(const v_int32 &a, const v_int32 &b,
2350
+ const v_float64 &c) {
2351
+ return v_add(v_dotprod_expand(a, b), c);
2352
+ }
2353
+ #endif
2354
+
2355
+ //////// Fast Dot Product ////////
2356
+ // 16 >> 32
2357
+ inline v_int32 v_dotprod_fast(const v_int16 &a, const v_int16 &b) {
2358
+ vint32m1_t zero = __riscv_vmv_v_x_i32m1(0, VTraits<vint32m1_t>::vlanes());
2359
+ return __riscv_vset(
2360
+ __riscv_vmv_v_x_i32m2(0, VTraits<v_int32>::vlanes()), 0,
2361
+ __riscv_vredsum_tu(zero, __riscv_vwmul(a, b, VTraits<v_int16>::vlanes()),
2362
+ zero, VTraits<v_int16>::vlanes()));
2363
+ }
2364
+ inline v_int32 v_dotprod_fast(const v_int16 &a, const v_int16 &b,
2365
+ const v_int32 &c) {
2366
+ vint32m1_t zero = __riscv_vmv_v_x_i32m1(0, VTraits<vint32m1_t>::vlanes());
2367
+ return __riscv_vadd(
2368
+ c,
2369
+ __riscv_vset(__riscv_vmv_v_x_i32m2(0, VTraits<v_int32>::vlanes()), 0,
2370
+ __riscv_vredsum_tu(
2371
+ zero, __riscv_vwmul(a, b, VTraits<v_int16>::vlanes()),
2372
+ zero, VTraits<v_int16>::vlanes())),
2373
+ VTraits<v_int32>::vlanes());
2374
+ }
2375
+
2376
+ // 32 >> 64
2377
+ inline v_int64 v_dotprod_fast(const v_int32 &a, const v_int32 &b) {
2378
+ vint64m1_t zero = __riscv_vmv_v_x_i64m1(0, VTraits<vint64m1_t>::vlanes());
2379
+ return __riscv_vset(
2380
+ __riscv_vmv_v_x_i64m2(0, VTraits<v_int64>::vlanes()), 0,
2381
+ __riscv_vredsum_tu(zero, __riscv_vwmul(a, b, VTraits<v_int32>::vlanes()),
2382
+ zero, VTraits<v_int32>::vlanes()));
2383
+ }
2384
+ inline v_int64 v_dotprod_fast(const v_int32 &a, const v_int32 &b,
2385
+ const v_int64 &c) {
2386
+ vint64m1_t zero = __riscv_vmv_v_x_i64m1(0, VTraits<vint64m1_t>::vlanes());
2387
+ return __riscv_vadd(
2388
+ c,
2389
+ __riscv_vset(__riscv_vmv_v_x_i64m2(0, VTraits<v_int64>::vlanes()), 0,
2390
+ __riscv_vredsum_tu(
2391
+ zero, __riscv_vwmul(a, b, VTraits<v_int32>::vlanes()),
2392
+ zero, VTraits<v_int32>::vlanes())),
2393
+ VTraits<v_int64>::vlanes());
2394
+ }
2395
+
2396
+ // 8 >> 32
2397
+ inline v_uint32 v_dotprod_expand_fast(const v_uint8 &a, const v_uint8 &b) {
2398
+ vuint32m1_t zero = __riscv_vmv_v_x_u32m1(0, VTraits<vuint32m1_t>::vlanes());
2399
+ auto res = __riscv_vwredsumu_tu(
2400
+ zero, __riscv_vwmulu(a, b, VTraits<v_uint8>::vlanes()), zero,
2401
+ VTraits<v_uint8>::vlanes());
2402
+ return __riscv_vset(__riscv_vmv_v_x_u32m2(0, VTraits<v_uint32>::vlanes()), 0,
2403
+ res);
2404
+ }
2405
+ inline v_uint32 v_dotprod_expand_fast(const v_uint8 &a, const v_uint8 &b,
2406
+ const v_uint32 &c) {
2407
+ vuint32m1_t zero = __riscv_vmv_v_x_u32m1(0, VTraits<vuint32m1_t>::vlanes());
2408
+ auto res = __riscv_vwredsumu_tu(
2409
+ zero, __riscv_vwmulu(a, b, VTraits<v_uint8>::vlanes()), zero,
2410
+ VTraits<v_uint8>::vlanes());
2411
+ return __riscv_vadd(
2412
+ c,
2413
+ __riscv_vset(__riscv_vmv_v_x_u32m2(0, VTraits<v_uint32>::vlanes()), 0,
2414
+ res),
2415
+ VTraits<v_uint32>::vlanes());
2416
+ }
2417
+ inline v_int32 v_dotprod_expand_fast(const v_int8 &a, const v_int8 &b) {
2418
+ vint32m1_t zero = __riscv_vmv_v_x_i32m1(0, VTraits<vint32m1_t>::vlanes());
2419
+ return __riscv_vset(
2420
+ __riscv_vmv_v_x_i32m2(0, VTraits<v_uint32>::vlanes()), 0,
2421
+ __riscv_vwredsum_tu(zero, __riscv_vwmul(a, b, VTraits<v_int8>::vlanes()),
2422
+ zero, VTraits<v_int8>::vlanes()));
2423
+ }
2424
+ inline v_int32 v_dotprod_expand_fast(const v_int8 &a, const v_int8 &b,
2425
+ const v_int32 &c) {
2426
+ vint32m1_t zero = __riscv_vmv_v_x_i32m1(0, VTraits<vint32m1_t>::vlanes());
2427
+ return __riscv_vadd(
2428
+ c,
2429
+ __riscv_vset(__riscv_vmv_v_x_i32m2(0, VTraits<v_uint32>::vlanes()), 0,
2430
+ __riscv_vwredsum_tu(
2431
+ zero, __riscv_vwmul(a, b, VTraits<v_int8>::vlanes()),
2432
+ zero, VTraits<v_int8>::vlanes())),
2433
+ VTraits<v_int32>::vlanes());
2434
+ }
2435
+
2436
+ // 16 >> 64
2437
+ inline v_uint64 v_dotprod_expand_fast(const v_uint16 &a, const v_uint16 &b) {
2438
+ vuint64m1_t zero = __riscv_vmv_v_x_u64m1(0, VTraits<vuint64m1_t>::vlanes());
2439
+ return __riscv_vset(
2440
+ __riscv_vmv_v_x_u64m2(0, VTraits<v_uint64>::vlanes()), 0,
2441
+ __riscv_vwredsumu_tu(zero,
2442
+ __riscv_vwmulu(a, b, VTraits<v_uint16>::vlanes()),
2443
+ zero, VTraits<v_uint16>::vlanes()));
2444
+ }
2445
+ inline v_uint64 v_dotprod_expand_fast(const v_uint16 &a, const v_uint16 &b,
2446
+ const v_uint64 &c) {
2447
+ vuint64m1_t zero = __riscv_vmv_v_x_u64m1(0, VTraits<vuint64m1_t>::vlanes());
2448
+ return __riscv_vadd(
2449
+ c,
2450
+ __riscv_vset(__riscv_vmv_v_x_u64m2(0, VTraits<v_uint64>::vlanes()), 0,
2451
+ __riscv_vwredsumu_tu(
2452
+ zero, __riscv_vwmulu(a, b, VTraits<v_uint16>::vlanes()),
2453
+ zero, VTraits<v_uint16>::vlanes())),
2454
+ VTraits<v_uint64>::vlanes());
2455
+ }
2456
+ inline v_int64 v_dotprod_expand_fast(const v_int16 &a, const v_int16 &b) {
2457
+ vint64m1_t zero = __riscv_vmv_v_x_i64m1(0, VTraits<vint64m1_t>::vlanes());
2458
+ return __riscv_vset(
2459
+ __riscv_vmv_v_x_i64m2(0, VTraits<v_int64>::vlanes()), 0,
2460
+ __riscv_vwredsum_tu(zero, __riscv_vwmul(a, b, VTraits<v_int16>::vlanes()),
2461
+ zero, VTraits<v_int16>::vlanes()));
2462
+ }
2463
+ inline v_int64 v_dotprod_expand_fast(const v_int16 &a, const v_int16 &b,
2464
+ const v_int64 &c) {
2465
+ vint64m1_t zero = __riscv_vmv_v_x_i64m1(0, VTraits<vint64m1_t>::vlanes());
2466
+ return __riscv_vadd(
2467
+ c,
2468
+ __riscv_vset(__riscv_vmv_v_x_i64m2(0, VTraits<v_int64>::vlanes()), 0,
2469
+ __riscv_vwredsum_tu(
2470
+ zero, __riscv_vwmul(a, b, VTraits<v_int16>::vlanes()),
2471
+ zero, VTraits<v_int16>::vlanes())),
2472
+ VTraits<v_int64>::vlanes());
2473
+ }
2474
+
2475
+ // 32 >> 64f
2476
+ #if CV_SIMD_SCALABLE_64F
2477
+ inline v_float64 v_dotprod_expand_fast(const v_int32 &a, const v_int32 &b) {
2478
+ return v_cvt_f64(v_dotprod_fast(a, b));
2479
+ }
2480
+ inline v_float64 v_dotprod_expand_fast(const v_int32 &a, const v_int32 &b,
2481
+ const v_float64 &c) {
2482
+ return v_add(v_dotprod_expand_fast(a, b), c);
2483
+ }
2484
+ #endif
2485
+
2486
+ // TODO: only 128 bit now.
2487
+ inline v_float32 v_matmul(const v_float32 &v, const v_float32 &mat0,
2488
+ const v_float32 &mat1, const v_float32 &mat2,
2489
+ const v_float32 &mat3) {
2490
+ vfloat32m2_t res;
2491
+ res = __riscv_vfmul_vf_f32m2(mat0, v_extract_n(v, 0),
2492
+ VTraits<v_float32>::vlanes());
2493
+ res = __riscv_vfmacc_vf_f32m2(res, v_extract_n(v, 1), mat1,
2494
+ VTraits<v_float32>::vlanes());
2495
+ res = __riscv_vfmacc_vf_f32m2(res, v_extract_n(v, 2), mat2,
2496
+ VTraits<v_float32>::vlanes());
2497
+ res = __riscv_vfmacc_vf_f32m2(res, v_extract_n(v, 3), mat3,
2498
+ VTraits<v_float32>::vlanes());
2499
+ return res;
2500
+ }
2501
+
2502
+ // TODO: only 128 bit now.
2503
+ inline v_float32 v_matmuladd(const v_float32 &v, const v_float32 &mat0,
2504
+ const v_float32 &mat1, const v_float32 &mat2,
2505
+ const v_float32 &a) {
2506
+ vfloat32m2_t res = __riscv_vfmul_vf_f32m2(mat0, v_extract_n(v, 0),
2507
+ VTraits<v_float32>::vlanes());
2508
+ res = __riscv_vfmacc_vf_f32m2(res, v_extract_n(v, 1), mat1,
2509
+ VTraits<v_float32>::vlanes());
2510
+ res = __riscv_vfmacc_vf_f32m2(res, v_extract_n(v, 2), mat2,
2511
+ VTraits<v_float32>::vlanes());
2512
+ return __riscv_vfadd(res, a, VTraits<v_float32>::vlanes());
2513
+ }
2514
+
2515
+ inline void v_cleanup() {}
2516
+
2517
+ #include "intrin_math.hpp"
2518
+ inline v_float32 v_exp(const v_float32 &x) {
2519
+ return v_exp_default_32f<v_float32, v_int32>(x);
2520
+ }
2521
+ inline v_float32 v_log(const v_float32 &x) {
2522
+ return v_log_default_32f<v_float32, v_int32>(x);
2523
+ }
2524
+ inline void v_sincos(const v_float32 &x, v_float32 &s, v_float32 &c) {
2525
+ v_sincos_default_32f<v_float32, v_int32>(x, s, c);
2526
+ }
2527
+ inline v_float32 v_sin(const v_float32 &x) {
2528
+ return v_sin_default_32f<v_float32, v_int32>(x);
2529
+ }
2530
+ inline v_float32 v_cos(const v_float32 &x) {
2531
+ return v_cos_default_32f<v_float32, v_int32>(x);
2532
+ }
2533
+ inline v_float32 v_erf(const v_float32 &x) {
2534
+ return v_erf_default_32f<v_float32, v_int32>(x);
2535
+ }
2536
+
2537
+ inline v_float64 v_exp(const v_float64 &x) {
2538
+ return v_exp_default_64f<v_float64, v_int64>(x);
2539
+ }
2540
+ inline v_float64 v_log(const v_float64 &x) {
2541
+ return v_log_default_64f<v_float64, v_int64>(x);
2542
+ }
2543
+ inline void v_sincos(const v_float64 &x, v_float64 &s, v_float64 &c) {
2544
+ v_sincos_default_64f<v_float64, v_int64>(x, s, c);
2545
+ }
2546
+ inline v_float64 v_sin(const v_float64 &x) {
2547
+ return v_sin_default_64f<v_float64, v_int64>(x);
2548
+ }
2549
+ inline v_float64 v_cos(const v_float64 &x) {
2550
+ return v_cos_default_64f<v_float64, v_int64>(x);
2551
+ }
2552
+
2553
+ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
2554
+
2555
+ //! @endcond
2556
+
2557
+ } // namespace cv
2558
+
2559
+ #endif // OPENCV_HAL_INTRIN_RVV_SCALABLE_HPP