numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2865 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import sys
5
+ import operator
6
+
7
+ import numpy as np
8
+ from llvmlite.ir import IntType, Constant
9
+
10
+ from numba.cuda.cgutils import is_nonelike
11
+ from numba.cuda.extending import (
12
+ NativeValue,
13
+ overload,
14
+ overload_method,
15
+ register_jitable,
16
+ )
17
+ from numba.cuda.extending import models
18
+ from numba.cuda.core.pythonapi import box, unbox
19
+ from numba.cuda.extending import make_attribute_wrapper, intrinsic
20
+ from numba.cuda.models import register_model
21
+ from numba.cuda.core.imputils import (
22
+ iternext_impl,
23
+ impl_ret_new_ref,
24
+ RefType,
25
+ Registry,
26
+ )
27
+ from numba.cuda.datamodel import register_default, StructModel
28
+ from numba.cuda import types
29
+ from numba.cuda import cgutils
30
+ from numba.cuda.utils import PYVERSION
31
+ from numba.cuda.core.pythonapi import (
32
+ PY_UNICODE_1BYTE_KIND,
33
+ PY_UNICODE_2BYTE_KIND,
34
+ PY_UNICODE_4BYTE_KIND,
35
+ )
36
+ from numba.cuda.cext._helperlib import c_helpers
37
+ from numba.cuda.core.unsafe.bytes import memcpy_region
38
+ from numba.cuda.core.errors import TypingError
39
+ from numba.cuda.cpython.unicode_support import (
40
+ _Py_TOUPPER,
41
+ _Py_TOLOWER,
42
+ _Py_UCS4,
43
+ _Py_ISALNUM,
44
+ _PyUnicode_ToUpperFull,
45
+ _PyUnicode_ToLowerFull,
46
+ _PyUnicode_ToFoldedFull,
47
+ _PyUnicode_ToTitleFull,
48
+ _PyUnicode_IsPrintable,
49
+ _PyUnicode_IsSpace,
50
+ _Py_ISSPACE,
51
+ _PyUnicode_IsXidStart,
52
+ _PyUnicode_IsXidContinue,
53
+ _PyUnicode_IsCased,
54
+ _PyUnicode_IsCaseIgnorable,
55
+ _PyUnicode_IsUppercase,
56
+ _PyUnicode_IsLowercase,
57
+ _PyUnicode_IsLineBreak,
58
+ _Py_ISLINEBREAK,
59
+ _Py_ISLINEFEED,
60
+ _Py_ISCARRIAGERETURN,
61
+ _PyUnicode_IsTitlecase,
62
+ _Py_ISLOWER,
63
+ _Py_ISUPPER,
64
+ _Py_TAB,
65
+ _Py_LINEFEED,
66
+ _Py_CARRIAGE_RETURN,
67
+ _Py_SPACE,
68
+ _PyUnicode_IsAlpha,
69
+ _PyUnicode_IsNumeric,
70
+ _Py_ISALPHA,
71
+ _PyUnicode_IsDigit,
72
+ _PyUnicode_IsDecimalDigit,
73
+ )
74
+ from numba.cuda.cpython import slicing
75
+
76
+ _hash_width = sys.hash_info.width
77
+ _Py_hash_t = getattr(types, "int%s" % _hash_width)
78
+
79
+ registry = Registry("unicode")
80
+ lower = registry.lower
81
+ lower_cast = registry.lower_cast
82
+ lower_constant = registry.lower_constant
83
+ lower_getattr = registry.lower_getattr
84
+
85
+ if PYVERSION in ((3, 9), (3, 10), (3, 11)):
86
+ from numba.cuda.core.pythonapi import PY_UNICODE_WCHAR_KIND
87
+
88
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L84-L85 # noqa: E501
89
+ _MAX_UNICODE = 0x10FFFF
90
+
91
+ # https://github.com/python/cpython/blob/1960eb005e04b7ad8a91018088cfdb0646bc1ca0/Objects/stringlib/fastsearch.h#L31 # noqa: E501
92
+ _BLOOM_WIDTH = types.intp.bitwidth
93
+
94
+ # DATA MODEL
95
+
96
+
97
+ @register_model(types.UnicodeType)
98
+ class UnicodeModel(models.StructModel):
99
+ def __init__(self, dmm, fe_type):
100
+ members = [
101
+ ("data", types.voidptr),
102
+ ("length", types.intp),
103
+ ("kind", types.int32),
104
+ ("is_ascii", types.uint32),
105
+ ("hash", _Py_hash_t),
106
+ ("meminfo", types.MemInfoPointer(types.voidptr)),
107
+ # A pointer to the owner python str/unicode object
108
+ ("parent", types.pyobject),
109
+ ]
110
+ models.StructModel.__init__(self, dmm, fe_type, members)
111
+
112
+
113
+ make_attribute_wrapper(types.UnicodeType, "data", "_data")
114
+ make_attribute_wrapper(types.UnicodeType, "length", "_length")
115
+ make_attribute_wrapper(types.UnicodeType, "kind", "_kind")
116
+ make_attribute_wrapper(types.UnicodeType, "is_ascii", "_is_ascii")
117
+ make_attribute_wrapper(types.UnicodeType, "hash", "_hash")
118
+
119
+
120
+ @register_default(types.UnicodeIteratorType)
121
+ class UnicodeIteratorModel(StructModel):
122
+ def __init__(self, dmm, fe_type):
123
+ members = [
124
+ ("index", types.EphemeralPointer(types.uintp)),
125
+ ("data", fe_type.data),
126
+ ]
127
+ super(UnicodeIteratorModel, self).__init__(dmm, fe_type, members)
128
+
129
+
130
+ # CAST
131
+
132
+
133
+ def compile_time_get_string_data(obj):
134
+ """Get string data from a python string for use at compile-time to embed
135
+ the string data into the LLVM module.
136
+ """
137
+ from ctypes import (
138
+ CFUNCTYPE,
139
+ c_void_p,
140
+ c_int,
141
+ c_uint,
142
+ c_ssize_t,
143
+ c_ubyte,
144
+ py_object,
145
+ POINTER,
146
+ byref,
147
+ )
148
+
149
+ extract_unicode_fn = c_helpers["extract_unicode"]
150
+ proto = CFUNCTYPE(
151
+ c_void_p,
152
+ py_object,
153
+ POINTER(c_ssize_t),
154
+ POINTER(c_int),
155
+ POINTER(c_uint),
156
+ POINTER(c_ssize_t),
157
+ )
158
+ fn = proto(extract_unicode_fn)
159
+ length = c_ssize_t()
160
+ kind = c_int()
161
+ is_ascii = c_uint()
162
+ hashv = c_ssize_t()
163
+ data = fn(obj, byref(length), byref(kind), byref(is_ascii), byref(hashv))
164
+ if data is None:
165
+ raise ValueError("cannot extract unicode data from the given string")
166
+ length = length.value
167
+ kind = kind.value
168
+ is_ascii = is_ascii.value
169
+ nbytes = (length + 1) * _kind_to_byte_width(kind)
170
+ out = (c_ubyte * nbytes).from_address(data)
171
+ return bytes(out), length, kind, is_ascii, hashv.value
172
+
173
+
174
+ def make_string_from_constant(context, builder, typ, literal_string):
175
+ """
176
+ Get string data by `compile_time_get_string_data()` and return a
177
+ unicode_type LLVM value
178
+ """
179
+ databytes, length, kind, is_ascii, hashv = compile_time_get_string_data(
180
+ literal_string
181
+ )
182
+ mod = builder.module
183
+ gv = context.insert_const_bytes(mod, databytes)
184
+ uni_str = cgutils.create_struct_proxy(typ)(context, builder)
185
+ uni_str.data = gv
186
+ uni_str.length = uni_str.length.type(length)
187
+ uni_str.kind = uni_str.kind.type(kind)
188
+ uni_str.is_ascii = uni_str.is_ascii.type(is_ascii)
189
+ # Set hash to -1 to indicate that it should be computed.
190
+ # We cannot bake in the hash value because of hashseed randomization.
191
+ uni_str.hash = uni_str.hash.type(-1)
192
+ return uni_str._getvalue()
193
+
194
+
195
+ @lower_cast(types.StringLiteral, types.unicode_type)
196
+ def cast_from_literal(context, builder, fromty, toty, val):
197
+ return make_string_from_constant(
198
+ context,
199
+ builder,
200
+ toty,
201
+ fromty.literal_value,
202
+ )
203
+
204
+
205
+ # CONSTANT
206
+
207
+
208
+ @lower_constant(types.unicode_type)
209
+ def constant_unicode(context, builder, typ, pyval):
210
+ return make_string_from_constant(context, builder, typ, pyval)
211
+
212
+
213
+ # BOXING
214
+
215
+
216
+ @unbox(types.UnicodeType)
217
+ def unbox_unicode_str(typ, obj, c):
218
+ """
219
+ Convert a unicode str object to a native unicode structure.
220
+ """
221
+ ok, data, length, kind, is_ascii, hashv = (
222
+ c.pyapi.string_as_string_size_and_kind(obj)
223
+ )
224
+ uni_str = cgutils.create_struct_proxy(typ)(c.context, c.builder)
225
+ uni_str.data = data
226
+ uni_str.length = length
227
+ uni_str.kind = kind
228
+ uni_str.is_ascii = is_ascii
229
+ uni_str.hash = hashv
230
+ uni_str.meminfo = c.pyapi.nrt_meminfo_new_from_pyobject(
231
+ data, # the borrowed data pointer
232
+ obj, # the owner pyobject; the call will incref it.
233
+ )
234
+ uni_str.parent = obj
235
+
236
+ is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred())
237
+ return NativeValue(uni_str._getvalue(), is_error=is_error)
238
+
239
+
240
+ @box(types.UnicodeType)
241
+ def box_unicode_str(typ, val, c):
242
+ """
243
+ Convert a native unicode structure to a unicode string
244
+ """
245
+ uni_str = cgutils.create_struct_proxy(typ)(c.context, c.builder, value=val)
246
+ res = c.pyapi.string_from_kind_and_data(
247
+ uni_str.kind, uni_str.data, uni_str.length
248
+ )
249
+ # hash isn't needed now, just compute it so it ends up in the unicodeobject
250
+ # hash cache, cpython doesn't always do this, depends how a string was
251
+ # created it's safe, just burns the cycles required to hash on @box
252
+ c.pyapi.object_hash(res)
253
+ c.context.nrt.decref(c.builder, typ, val)
254
+ return res
255
+
256
+
257
+ # HELPER FUNCTIONS
258
+
259
+
260
+ def make_deref_codegen(bitsize):
261
+ def codegen(context, builder, signature, args):
262
+ data, idx = args
263
+ ptr = builder.bitcast(data, IntType(bitsize).as_pointer())
264
+ ch = builder.load(builder.gep(ptr, [idx]))
265
+ return builder.zext(ch, IntType(32))
266
+
267
+ return codegen
268
+
269
+
270
+ @intrinsic
271
+ def deref_uint8(typingctx, data, offset):
272
+ sig = types.uint32(types.voidptr, types.intp)
273
+ return sig, make_deref_codegen(8)
274
+
275
+
276
+ @intrinsic
277
+ def deref_uint16(typingctx, data, offset):
278
+ sig = types.uint32(types.voidptr, types.intp)
279
+ return sig, make_deref_codegen(16)
280
+
281
+
282
+ @intrinsic
283
+ def deref_uint32(typingctx, data, offset):
284
+ sig = types.uint32(types.voidptr, types.intp)
285
+ return sig, make_deref_codegen(32)
286
+
287
+
288
+ @intrinsic
289
+ def _malloc_string(typingctx, kind, char_bytes, length, is_ascii):
290
+ """make empty string with data buffer of size alloc_bytes.
291
+
292
+ Must set length and kind values for string after it is returned
293
+ """
294
+
295
+ def details(context, builder, signature, args):
296
+ [kind_val, char_bytes_val, length_val, is_ascii_val] = args
297
+
298
+ # fill the struct
299
+ uni_str_ctor = cgutils.create_struct_proxy(types.unicode_type)
300
+ uni_str = uni_str_ctor(context, builder)
301
+ # add null padding character
302
+ nbytes_val = builder.mul(
303
+ char_bytes_val,
304
+ builder.add(length_val, Constant(length_val.type, 1)),
305
+ )
306
+ uni_str.meminfo = context.nrt.meminfo_alloc(builder, nbytes_val)
307
+ uni_str.kind = kind_val
308
+ uni_str.is_ascii = is_ascii_val
309
+ uni_str.length = length_val
310
+ # empty string has hash value -1 to indicate "need to compute hash"
311
+ uni_str.hash = context.get_constant(_Py_hash_t, -1)
312
+ uni_str.data = context.nrt.meminfo_data(builder, uni_str.meminfo)
313
+ # Set parent to NULL
314
+ uni_str.parent = cgutils.get_null_value(uni_str.parent.type)
315
+ return uni_str._getvalue()
316
+
317
+ sig = types.unicode_type(types.int32, types.intp, types.intp, types.uint32)
318
+ return sig, details
319
+
320
+
321
+ @register_jitable
322
+ def _empty_string(kind, length, is_ascii=0):
323
+ char_width = _kind_to_byte_width(kind)
324
+ s = _malloc_string(kind, char_width, length, is_ascii)
325
+ _set_code_point(s, length, np.uint32(0)) # Write NULL character
326
+ return s
327
+
328
+
329
+ # Disable RefCt for performance.
330
+ @register_jitable(_nrt=False)
331
+ def _get_code_point(a, i):
332
+ if a._kind == PY_UNICODE_1BYTE_KIND:
333
+ return deref_uint8(a._data, i)
334
+ elif a._kind == PY_UNICODE_2BYTE_KIND:
335
+ return deref_uint16(a._data, i)
336
+ elif a._kind == PY_UNICODE_4BYTE_KIND:
337
+ return deref_uint32(a._data, i)
338
+ else:
339
+ # there's also a wchar kind, but that's one of the above,
340
+ # so skipping for this example
341
+ return 0
342
+
343
+
344
+ ####
345
+
346
+
347
+ def make_set_codegen(bitsize):
348
+ def codegen(context, builder, signature, args):
349
+ data, idx, ch = args
350
+ if bitsize < 32:
351
+ ch = builder.trunc(ch, IntType(bitsize))
352
+ ptr = builder.bitcast(data, IntType(bitsize).as_pointer())
353
+ builder.store(ch, builder.gep(ptr, [idx]))
354
+ return context.get_dummy_value()
355
+
356
+ return codegen
357
+
358
+
359
+ @intrinsic
360
+ def set_uint8(typingctx, data, idx, ch):
361
+ sig = types.void(types.voidptr, types.int64, types.uint32)
362
+ return sig, make_set_codegen(8)
363
+
364
+
365
+ @intrinsic
366
+ def set_uint16(typingctx, data, idx, ch):
367
+ sig = types.void(types.voidptr, types.int64, types.uint32)
368
+ return sig, make_set_codegen(16)
369
+
370
+
371
+ @intrinsic
372
+ def set_uint32(typingctx, data, idx, ch):
373
+ sig = types.void(types.voidptr, types.int64, types.uint32)
374
+ return sig, make_set_codegen(32)
375
+
376
+
377
+ @register_jitable(_nrt=False)
378
+ def _set_code_point(a, i, ch):
379
+ # WARNING: This method is very dangerous:
380
+ # * Assumes that data contents can be changed (only allowed for new
381
+ # strings)
382
+ # * Assumes that the kind of unicode string is sufficiently wide to
383
+ # accept ch. Will truncate ch to make it fit.
384
+ # * Assumes that i is within the valid boundaries of the function
385
+ if a._kind == PY_UNICODE_1BYTE_KIND:
386
+ set_uint8(a._data, i, ch)
387
+ elif a._kind == PY_UNICODE_2BYTE_KIND:
388
+ set_uint16(a._data, i, ch)
389
+ elif a._kind == PY_UNICODE_4BYTE_KIND:
390
+ set_uint32(a._data, i, ch)
391
+ else:
392
+ raise AssertionError(
393
+ "Unexpected unicode representation in _set_code_point"
394
+ )
395
+
396
+
397
+ if PYVERSION in ((3, 12), (3, 13)):
398
+
399
+ @register_jitable
400
+ def _pick_kind(kind1, kind2):
401
+ if kind1 == PY_UNICODE_1BYTE_KIND:
402
+ return kind2
403
+ elif kind1 == PY_UNICODE_2BYTE_KIND:
404
+ if kind2 == PY_UNICODE_4BYTE_KIND:
405
+ return kind2
406
+ else:
407
+ return kind1
408
+ elif kind1 == PY_UNICODE_4BYTE_KIND:
409
+ return kind1
410
+ else:
411
+ raise AssertionError(
412
+ "Unexpected unicode representation in _pick_kind"
413
+ )
414
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
415
+
416
+ @register_jitable
417
+ def _pick_kind(kind1, kind2):
418
+ if kind1 == PY_UNICODE_WCHAR_KIND or kind2 == PY_UNICODE_WCHAR_KIND:
419
+ raise AssertionError("PY_UNICODE_WCHAR_KIND unsupported")
420
+
421
+ if kind1 == PY_UNICODE_1BYTE_KIND:
422
+ return kind2
423
+ elif kind1 == PY_UNICODE_2BYTE_KIND:
424
+ if kind2 == PY_UNICODE_4BYTE_KIND:
425
+ return kind2
426
+ else:
427
+ return kind1
428
+ elif kind1 == PY_UNICODE_4BYTE_KIND:
429
+ return kind1
430
+ else:
431
+ raise AssertionError(
432
+ "Unexpected unicode representation in _pick_kind"
433
+ )
434
+ else:
435
+ raise NotImplementedError(PYVERSION)
436
+
437
+
438
+ @register_jitable
439
+ def _pick_ascii(is_ascii1, is_ascii2):
440
+ if is_ascii1 == 1 and is_ascii2 == 1:
441
+ return types.uint32(1)
442
+ return types.uint32(0)
443
+
444
+
445
+ if PYVERSION in ((3, 12), (3, 13)):
446
+
447
+ @register_jitable
448
+ def _kind_to_byte_width(kind):
449
+ if kind == PY_UNICODE_1BYTE_KIND:
450
+ return 1
451
+ elif kind == PY_UNICODE_2BYTE_KIND:
452
+ return 2
453
+ elif kind == PY_UNICODE_4BYTE_KIND:
454
+ return 4
455
+ else:
456
+ raise AssertionError("Unexpected unicode encoding encountered")
457
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
458
+
459
+ @register_jitable
460
+ def _kind_to_byte_width(kind):
461
+ if kind == PY_UNICODE_1BYTE_KIND:
462
+ return 1
463
+ elif kind == PY_UNICODE_2BYTE_KIND:
464
+ return 2
465
+ elif kind == PY_UNICODE_4BYTE_KIND:
466
+ return 4
467
+ elif kind == PY_UNICODE_WCHAR_KIND:
468
+ raise AssertionError("PY_UNICODE_WCHAR_KIND unsupported")
469
+ else:
470
+ raise AssertionError("Unexpected unicode encoding encountered")
471
+ else:
472
+ raise NotImplementedError(PYVERSION)
473
+
474
+
475
+ @register_jitable(_nrt=False)
476
+ def _cmp_region(a, a_offset, b, b_offset, n):
477
+ if n == 0:
478
+ return 0
479
+ elif a_offset + n > a._length:
480
+ return -1
481
+ elif b_offset + n > b._length:
482
+ return 1
483
+
484
+ for i in range(n):
485
+ a_chr = _get_code_point(a, a_offset + i)
486
+ b_chr = _get_code_point(b, b_offset + i)
487
+ if a_chr < b_chr:
488
+ return -1
489
+ elif a_chr > b_chr:
490
+ return 1
491
+
492
+ return 0
493
+
494
+
495
+ @register_jitable
496
+ def _codepoint_to_kind(cp):
497
+ """
498
+ Compute the minimum unicode kind needed to hold a given codepoint
499
+ """
500
+ if cp < 256:
501
+ return PY_UNICODE_1BYTE_KIND
502
+ elif cp < 65536:
503
+ return PY_UNICODE_2BYTE_KIND
504
+ else:
505
+ # Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
506
+ MAX_UNICODE = 0x10FFFF
507
+ if cp > MAX_UNICODE:
508
+ msg = "Invalid codepoint. Found value greater than Unicode maximum"
509
+ raise ValueError(msg)
510
+ return PY_UNICODE_4BYTE_KIND
511
+
512
+
513
+ @register_jitable
514
+ def _codepoint_is_ascii(ch):
515
+ """
516
+ Returns true if a codepoint is in the ASCII range
517
+ """
518
+ return ch < 128
519
+
520
+
521
+ # PUBLIC API
522
+
523
+
524
+ @overload(len)
525
+ def unicode_len(s):
526
+ if isinstance(s, types.UnicodeType):
527
+
528
+ def len_impl(s):
529
+ return s._length
530
+
531
+ return len_impl
532
+
533
+
534
+ @overload(operator.eq)
535
+ def unicode_eq(a, b):
536
+ if not (a.is_internal and b.is_internal):
537
+ return
538
+ if isinstance(a, types.Optional):
539
+ check_a = a.type
540
+ else:
541
+ check_a = a
542
+ if isinstance(b, types.Optional):
543
+ check_b = b.type
544
+ else:
545
+ check_b = b
546
+ accept = (types.UnicodeType, types.StringLiteral, types.UnicodeCharSeq)
547
+ a_unicode = isinstance(check_a, accept)
548
+ b_unicode = isinstance(check_b, accept)
549
+ if a_unicode and b_unicode:
550
+
551
+ def eq_impl(a, b):
552
+ # handle Optionals at runtime
553
+ a_none = a is None
554
+ b_none = b is None
555
+ if a_none or b_none:
556
+ if a_none and b_none:
557
+ return True
558
+ else:
559
+ return False
560
+ # the str() is for UnicodeCharSeq, it's a nop else
561
+ a = str(a)
562
+ b = str(b)
563
+ if len(a) != len(b):
564
+ return False
565
+ return _cmp_region(a, 0, b, 0, len(a)) == 0
566
+
567
+ return eq_impl
568
+ elif a_unicode ^ b_unicode:
569
+ # one of the things is unicode, everything compares False
570
+ def eq_impl(a, b):
571
+ return False
572
+
573
+ return eq_impl
574
+
575
+
576
+ @overload(operator.ne)
577
+ def unicode_ne(a, b):
578
+ if not (a.is_internal and b.is_internal):
579
+ return
580
+ accept = (types.UnicodeType, types.StringLiteral, types.UnicodeCharSeq)
581
+ a_unicode = isinstance(a, accept)
582
+ b_unicode = isinstance(b, accept)
583
+ if a_unicode and b_unicode:
584
+
585
+ def ne_impl(a, b):
586
+ return not (a == b)
587
+
588
+ return ne_impl
589
+ elif a_unicode ^ b_unicode:
590
+ # one of the things is unicode, everything compares True
591
+ def eq_impl(a, b):
592
+ return True
593
+
594
+ return eq_impl
595
+
596
+
597
+ @overload(operator.lt)
598
+ def unicode_lt(a, b):
599
+ a_unicode = isinstance(a, (types.UnicodeType, types.StringLiteral))
600
+ b_unicode = isinstance(b, (types.UnicodeType, types.StringLiteral))
601
+ if a_unicode and b_unicode:
602
+
603
+ def lt_impl(a, b):
604
+ minlen = min(len(a), len(b))
605
+ eqcode = _cmp_region(a, 0, b, 0, minlen)
606
+ if eqcode == -1:
607
+ return True
608
+ elif eqcode == 0:
609
+ return len(a) < len(b)
610
+ return False
611
+
612
+ return lt_impl
613
+
614
+
615
+ @overload(operator.gt)
616
+ def unicode_gt(a, b):
617
+ a_unicode = isinstance(a, (types.UnicodeType, types.StringLiteral))
618
+ b_unicode = isinstance(b, (types.UnicodeType, types.StringLiteral))
619
+ if a_unicode and b_unicode:
620
+
621
+ def gt_impl(a, b):
622
+ minlen = min(len(a), len(b))
623
+ eqcode = _cmp_region(a, 0, b, 0, minlen)
624
+ if eqcode == 1:
625
+ return True
626
+ elif eqcode == 0:
627
+ return len(a) > len(b)
628
+ return False
629
+
630
+ return gt_impl
631
+
632
+
633
+ @overload(operator.le)
634
+ def unicode_le(a, b):
635
+ a_unicode = isinstance(a, (types.UnicodeType, types.StringLiteral))
636
+ b_unicode = isinstance(b, (types.UnicodeType, types.StringLiteral))
637
+ if a_unicode and b_unicode:
638
+
639
+ def le_impl(a, b):
640
+ return not (a > b)
641
+
642
+ return le_impl
643
+
644
+
645
+ @overload(operator.ge)
646
+ def unicode_ge(a, b):
647
+ a_unicode = isinstance(a, (types.UnicodeType, types.StringLiteral))
648
+ b_unicode = isinstance(b, (types.UnicodeType, types.StringLiteral))
649
+ if a_unicode and b_unicode:
650
+
651
+ def ge_impl(a, b):
652
+ return not (a < b)
653
+
654
+ return ge_impl
655
+
656
+
657
+ @overload(operator.contains)
658
+ def unicode_contains(a, b):
659
+ if isinstance(a, types.UnicodeType) and isinstance(b, types.UnicodeType):
660
+
661
+ def contains_impl(a, b):
662
+ # note parameter swap: contains(a, b) == b in a
663
+ return _find(a, b) > -1
664
+
665
+ return contains_impl
666
+
667
+
668
+ def unicode_idx_check_type(ty, name):
669
+ """Check object belongs to one of specific types
670
+ ty: type
671
+ Type of the object
672
+ name: str
673
+ Name of the object
674
+ """
675
+ thety = ty
676
+ # if the type is omitted, the concrete type is the value
677
+ if isinstance(ty, types.Omitted):
678
+ thety = ty.value
679
+ # if the type is optional, the concrete type is the captured type
680
+ elif isinstance(ty, types.Optional):
681
+ thety = ty.type
682
+
683
+ accepted = (types.Integer, types.NoneType)
684
+ if thety is not None and not isinstance(thety, accepted):
685
+ raise TypingError('"{}" must be {}, not {}'.format(name, accepted, ty))
686
+
687
+
688
+ def unicode_sub_check_type(ty, name):
689
+ """Check object belongs to unicode type"""
690
+ if not isinstance(ty, types.UnicodeType):
691
+ msg = '"{}" must be {}, not {}'.format(name, types.UnicodeType, ty)
692
+ raise TypingError(msg)
693
+
694
+
695
+ # FAST SEARCH algorithm implementation from cpython
696
+
697
+
698
+ @register_jitable
699
+ def _bloom_add(mask, ch):
700
+ mask |= 1 << (ch & (_BLOOM_WIDTH - 1))
701
+ return mask
702
+
703
+
704
+ @register_jitable
705
+ def _bloom_check(mask, ch):
706
+ return mask & (1 << (ch & (_BLOOM_WIDTH - 1)))
707
+
708
+
709
+ # https://github.com/python/cpython/blob/1960eb005e04b7ad8a91018088cfdb0646bc1ca0/Objects/stringlib/fastsearch.h#L550 # noqa: E501
710
+ @register_jitable
711
+ def _default_find(data, substr, start, end):
712
+ """Left finder."""
713
+ m = len(substr)
714
+ if m == 0:
715
+ return start
716
+
717
+ gap = mlast = m - 1
718
+ last = _get_code_point(substr, mlast)
719
+
720
+ zero = types.intp(0)
721
+ mask = _bloom_add(zero, last)
722
+ for i in range(mlast):
723
+ ch = _get_code_point(substr, i)
724
+ mask = _bloom_add(mask, ch)
725
+ if ch == last:
726
+ gap = mlast - i - 1
727
+
728
+ i = start
729
+ while i <= end - m:
730
+ ch = _get_code_point(data, mlast + i)
731
+ if ch == last:
732
+ j = 0
733
+ while j < mlast:
734
+ haystack_ch = _get_code_point(data, i + j)
735
+ needle_ch = _get_code_point(substr, j)
736
+ if haystack_ch != needle_ch:
737
+ break
738
+ j += 1
739
+ if j == mlast:
740
+ # got a match
741
+ return i
742
+
743
+ ch = _get_code_point(data, mlast + i + 1)
744
+ if _bloom_check(mask, ch) == 0:
745
+ i += m
746
+ else:
747
+ i += gap
748
+ else:
749
+ ch = _get_code_point(data, mlast + i + 1)
750
+ if _bloom_check(mask, ch) == 0:
751
+ i += m
752
+ i += 1
753
+
754
+ return -1
755
+
756
+
757
+ @register_jitable
758
+ def _default_rfind(data, substr, start, end):
759
+ """Right finder."""
760
+ m = len(substr)
761
+ if m == 0:
762
+ return end
763
+
764
+ skip = mlast = m - 1
765
+ mfirst = _get_code_point(substr, 0)
766
+ mask = _bloom_add(0, mfirst)
767
+ i = mlast
768
+ while i > 0:
769
+ ch = _get_code_point(substr, i)
770
+ mask = _bloom_add(mask, ch)
771
+ if ch == mfirst:
772
+ skip = i - 1
773
+ i -= 1
774
+
775
+ i = end - m
776
+ while i >= start:
777
+ ch = _get_code_point(data, i)
778
+ if ch == mfirst:
779
+ j = mlast
780
+ while j > 0:
781
+ haystack_ch = _get_code_point(data, i + j)
782
+ needle_ch = _get_code_point(substr, j)
783
+ if haystack_ch != needle_ch:
784
+ break
785
+ j -= 1
786
+
787
+ if j == 0:
788
+ # got a match
789
+ return i
790
+
791
+ ch = _get_code_point(data, i - 1)
792
+ if i > start and _bloom_check(mask, ch) == 0:
793
+ i -= m
794
+ else:
795
+ i -= skip
796
+
797
+ else:
798
+ ch = _get_code_point(data, i - 1)
799
+ if i > start and _bloom_check(mask, ch) == 0:
800
+ i -= m
801
+ i -= 1
802
+
803
+ return -1
804
+
805
+
806
+ def generate_finder(find_func):
807
+ """Generate finder either left or right."""
808
+
809
+ def impl(data, substr, start=None, end=None):
810
+ length = len(data)
811
+ sub_length = len(substr)
812
+ if start is None:
813
+ start = 0
814
+ if end is None:
815
+ end = length
816
+
817
+ start, end = _adjust_indices(length, start, end)
818
+ if end - start < sub_length:
819
+ return -1
820
+
821
+ return find_func(data, substr, start, end)
822
+
823
+ return impl
824
+
825
+
826
+ _find = register_jitable(generate_finder(_default_find))
827
+ _rfind = register_jitable(generate_finder(_default_rfind))
828
+
829
+
830
+ @overload_method(types.UnicodeType, "find")
831
+ def unicode_find(data, substr, start=None, end=None):
832
+ """Implements str.find()"""
833
+ if isinstance(substr, types.UnicodeCharSeq):
834
+
835
+ def find_impl(data, substr, start=None, end=None):
836
+ return data.find(str(substr))
837
+
838
+ return find_impl
839
+
840
+ unicode_idx_check_type(start, "start")
841
+ unicode_idx_check_type(end, "end")
842
+ unicode_sub_check_type(substr, "substr")
843
+
844
+ return _find
845
+
846
+
847
+ @overload_method(types.UnicodeType, "rfind")
848
+ def unicode_rfind(data, substr, start=None, end=None):
849
+ """Implements str.rfind()"""
850
+ if isinstance(substr, types.UnicodeCharSeq):
851
+
852
+ def rfind_impl(data, substr, start=None, end=None):
853
+ return data.rfind(str(substr))
854
+
855
+ return rfind_impl
856
+
857
+ unicode_idx_check_type(start, "start")
858
+ unicode_idx_check_type(end, "end")
859
+ unicode_sub_check_type(substr, "substr")
860
+
861
+ return _rfind
862
+
863
+
864
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12831-L12857 # noqa: E501
865
+ @overload_method(types.UnicodeType, "rindex")
866
+ def unicode_rindex(s, sub, start=None, end=None):
867
+ """Implements str.rindex()"""
868
+ unicode_idx_check_type(start, "start")
869
+ unicode_idx_check_type(end, "end")
870
+ unicode_sub_check_type(sub, "sub")
871
+
872
+ def rindex_impl(s, sub, start=None, end=None):
873
+ result = s.rfind(sub, start, end)
874
+ if result < 0:
875
+ raise ValueError("substring not found")
876
+
877
+ return result
878
+
879
+ return rindex_impl
880
+
881
+
882
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11692-L11718 # noqa: E501
883
+ @overload_method(types.UnicodeType, "index")
884
+ def unicode_index(s, sub, start=None, end=None):
885
+ """Implements str.index()"""
886
+ unicode_idx_check_type(start, "start")
887
+ unicode_idx_check_type(end, "end")
888
+ unicode_sub_check_type(sub, "sub")
889
+
890
+ def index_impl(s, sub, start=None, end=None):
891
+ result = s.find(sub, start, end)
892
+ if result < 0:
893
+ raise ValueError("substring not found")
894
+
895
+ return result
896
+
897
+ return index_impl
898
+
899
+
900
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12922-L12976 # noqa: E501
901
+ @overload_method(types.UnicodeType, "partition")
902
+ def unicode_partition(data, sep):
903
+ """Implements str.partition()"""
904
+ thety = sep
905
+ # if the type is omitted, the concrete type is the value
906
+ if isinstance(sep, types.Omitted):
907
+ thety = sep.value
908
+ # if the type is optional, the concrete type is the captured type
909
+ elif isinstance(sep, types.Optional):
910
+ thety = sep.type
911
+
912
+ accepted = (types.UnicodeType, types.UnicodeCharSeq)
913
+ if thety is not None and not isinstance(thety, accepted):
914
+ msg = '"{}" must be {}, not {}'.format("sep", accepted, sep)
915
+ raise TypingError(msg)
916
+
917
+ def impl(data, sep):
918
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/partition.h#L7-L60 # noqa: E501
919
+ sep = str(sep)
920
+ empty_str = _empty_string(data._kind, 0, data._is_ascii)
921
+ sep_length = len(sep)
922
+ if data._kind < sep._kind or len(data) < sep_length:
923
+ return data, empty_str, empty_str
924
+
925
+ if sep_length == 0:
926
+ raise ValueError("empty separator")
927
+
928
+ pos = data.find(sep)
929
+ if pos < 0:
930
+ return data, empty_str, empty_str
931
+
932
+ return data[0:pos], sep, data[pos + sep_length : len(data)]
933
+
934
+ return impl
935
+
936
+
937
+ @overload_method(types.UnicodeType, "count")
938
+ def unicode_count(src, sub, start=None, end=None):
939
+ _count_args_types_check(start)
940
+ _count_args_types_check(end)
941
+
942
+ if isinstance(sub, types.UnicodeType):
943
+
944
+ def count_impl(src, sub, start=None, end=None):
945
+ count = 0
946
+ src_len = len(src)
947
+ sub_len = len(sub)
948
+
949
+ start = _normalize_slice_idx_count(start, src_len, 0)
950
+ end = _normalize_slice_idx_count(end, src_len, src_len)
951
+
952
+ if end - start < 0 or start > src_len:
953
+ return 0
954
+
955
+ src = src[start:end]
956
+ src_len = len(src)
957
+ start, end = 0, src_len
958
+ if sub_len == 0:
959
+ return src_len + 1
960
+
961
+ while start + sub_len <= src_len:
962
+ if src[start : start + sub_len] == sub:
963
+ count += 1
964
+ start += sub_len
965
+ else:
966
+ start += 1
967
+ return count
968
+
969
+ return count_impl
970
+ error_msg = "The substring must be a UnicodeType, not {}"
971
+ raise TypingError(error_msg.format(type(sub)))
972
+
973
+
974
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12979-L13033 # noqa: E501
975
+ @overload_method(types.UnicodeType, "rpartition")
976
+ def unicode_rpartition(data, sep):
977
+ """Implements str.rpartition()"""
978
+ thety = sep
979
+ # if the type is omitted, the concrete type is the value
980
+ if isinstance(sep, types.Omitted):
981
+ thety = sep.value
982
+ # if the type is optional, the concrete type is the captured type
983
+ elif isinstance(sep, types.Optional):
984
+ thety = sep.type
985
+
986
+ accepted = (types.UnicodeType, types.UnicodeCharSeq)
987
+ if thety is not None and not isinstance(thety, accepted):
988
+ msg = '"{}" must be {}, not {}'.format("sep", accepted, sep)
989
+ raise TypingError(msg)
990
+
991
+ def impl(data, sep):
992
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/partition.h#L62-L115 # noqa: E501
993
+ sep = str(sep)
994
+ empty_str = _empty_string(data._kind, 0, data._is_ascii)
995
+ sep_length = len(sep)
996
+ if data._kind < sep._kind or len(data) < sep_length:
997
+ return empty_str, empty_str, data
998
+
999
+ if sep_length == 0:
1000
+ raise ValueError("empty separator")
1001
+
1002
+ pos = data.rfind(sep)
1003
+ if pos < 0:
1004
+ return empty_str, empty_str, data
1005
+
1006
+ return data[0:pos], sep, data[pos + sep_length : len(data)]
1007
+
1008
+ return impl
1009
+
1010
+
1011
+ # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9342-L9354 # noqa: E501
1012
+ @register_jitable
1013
+ def _adjust_indices(length, start, end):
1014
+ if end > length:
1015
+ end = length
1016
+ if end < 0:
1017
+ end += length
1018
+ if end < 0:
1019
+ end = 0
1020
+ if start < 0:
1021
+ start += length
1022
+ if start < 0:
1023
+ start = 0
1024
+
1025
+ return start, end
1026
+
1027
+
1028
+ @overload_method(types.UnicodeType, "startswith")
1029
+ def unicode_startswith(s, prefix, start=None, end=None):
1030
+ if not is_nonelike(start) and not isinstance(start, types.Integer):
1031
+ raise TypingError(
1032
+ "When specified, the arg 'start' must be an Integer or None"
1033
+ )
1034
+
1035
+ if not is_nonelike(end) and not isinstance(end, types.Integer):
1036
+ raise TypingError(
1037
+ "When specified, the arg 'end' must be an Integer or None"
1038
+ )
1039
+
1040
+ if isinstance(prefix, types.UniTuple) and isinstance(
1041
+ prefix.dtype, types.UnicodeType
1042
+ ):
1043
+
1044
+ def startswith_tuple_impl(s, prefix, start=None, end=None):
1045
+ for item in prefix:
1046
+ if s.startswith(item, start, end):
1047
+ return True
1048
+ return False
1049
+
1050
+ return startswith_tuple_impl
1051
+
1052
+ elif isinstance(prefix, types.UnicodeCharSeq):
1053
+
1054
+ def startswith_char_seq_impl(s, prefix, start=None, end=None):
1055
+ return s.startswith(str(prefix), start, end)
1056
+
1057
+ return startswith_char_seq_impl
1058
+
1059
+ elif isinstance(prefix, types.UnicodeType):
1060
+
1061
+ def startswith_unicode_impl(s, prefix, start=None, end=None):
1062
+ length, prefix_length = len(s), len(prefix)
1063
+ if start is None:
1064
+ start = 0
1065
+ if end is None:
1066
+ end = length
1067
+
1068
+ start, end = _adjust_indices(length, start, end)
1069
+ if end - start < prefix_length:
1070
+ return False
1071
+
1072
+ if prefix_length == 0:
1073
+ return True
1074
+
1075
+ s_slice = s[start:end]
1076
+
1077
+ return _cmp_region(s_slice, 0, prefix, 0, prefix_length) == 0
1078
+
1079
+ return startswith_unicode_impl
1080
+
1081
+ else:
1082
+ raise TypingError(
1083
+ "The arg 'prefix' should be a string or a tuple of strings"
1084
+ )
1085
+
1086
+
1087
+ @overload_method(types.UnicodeType, "endswith")
1088
+ def unicode_endswith(s, substr, start=None, end=None):
1089
+ if not (
1090
+ start is None
1091
+ or isinstance(start, (types.Omitted, types.Integer, types.NoneType))
1092
+ ):
1093
+ raise TypingError("The arg must be a Integer or None")
1094
+
1095
+ if not (
1096
+ end is None
1097
+ or isinstance(end, (types.Omitted, types.Integer, types.NoneType))
1098
+ ):
1099
+ raise TypingError("The arg must be a Integer or None")
1100
+
1101
+ if isinstance(substr, (types.Tuple, types.UniTuple)):
1102
+
1103
+ def endswith_impl(s, substr, start=None, end=None):
1104
+ for item in substr:
1105
+ if s.endswith(item, start, end) is True:
1106
+ return True
1107
+
1108
+ return False
1109
+
1110
+ return endswith_impl
1111
+
1112
+ if isinstance(substr, types.UnicodeType):
1113
+
1114
+ def endswith_impl(s, substr, start=None, end=None):
1115
+ length = len(s)
1116
+ sub_length = len(substr)
1117
+ if start is None:
1118
+ start = 0
1119
+ if end is None:
1120
+ end = length
1121
+
1122
+ start, end = _adjust_indices(length, start, end)
1123
+ if end - start < sub_length:
1124
+ return False
1125
+
1126
+ if sub_length == 0:
1127
+ return True
1128
+
1129
+ s = s[start:end]
1130
+ offset = len(s) - sub_length
1131
+
1132
+ return _cmp_region(s, offset, substr, 0, sub_length) == 0
1133
+
1134
+ return endswith_impl
1135
+
1136
+ if isinstance(substr, types.UnicodeCharSeq):
1137
+
1138
+ def endswith_impl(s, substr, start=None, end=None):
1139
+ return s.endswith(str(substr), start, end)
1140
+
1141
+ return endswith_impl
1142
+
1143
+
1144
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11519-L11595 # noqa: E501
1145
+ @overload_method(types.UnicodeType, "expandtabs")
1146
+ def unicode_expandtabs(data, tabsize=8):
1147
+ """Implements str.expandtabs()"""
1148
+ thety = tabsize
1149
+ # if the type is omitted, the concrete type is the value
1150
+ if isinstance(tabsize, types.Omitted):
1151
+ thety = tabsize.value
1152
+ # if the type is optional, the concrete type is the captured type
1153
+ elif isinstance(tabsize, types.Optional):
1154
+ thety = tabsize.type
1155
+
1156
+ accepted = (types.Integer, int)
1157
+ if thety is not None and not isinstance(thety, accepted):
1158
+ raise TypingError(
1159
+ '"tabsize" must be {}, not {}'.format(accepted, tabsize)
1160
+ )
1161
+
1162
+ def expandtabs_impl(data, tabsize=8):
1163
+ length = len(data)
1164
+ j = line_pos = 0
1165
+ found = False
1166
+ for i in range(length):
1167
+ code_point = _get_code_point(data, i)
1168
+ if code_point == _Py_TAB:
1169
+ found = True
1170
+ if tabsize > 0:
1171
+ # cannot overflow
1172
+ incr = tabsize - (line_pos % tabsize)
1173
+ if j > sys.maxsize - incr:
1174
+ raise OverflowError("new string is too long")
1175
+ line_pos += incr
1176
+ j += incr
1177
+ else:
1178
+ if j > sys.maxsize - 1:
1179
+ raise OverflowError("new string is too long")
1180
+ line_pos += 1
1181
+ j += 1
1182
+ if code_point in (_Py_LINEFEED, _Py_CARRIAGE_RETURN):
1183
+ line_pos = 0
1184
+
1185
+ if not found:
1186
+ return data
1187
+
1188
+ res = _empty_string(data._kind, j, data._is_ascii)
1189
+ j = line_pos = 0
1190
+ for i in range(length):
1191
+ code_point = _get_code_point(data, i)
1192
+ if code_point == _Py_TAB:
1193
+ if tabsize > 0:
1194
+ incr = tabsize - (line_pos % tabsize)
1195
+ line_pos += incr
1196
+ for idx in range(j, j + incr):
1197
+ _set_code_point(res, idx, _Py_SPACE)
1198
+ j += incr
1199
+ else:
1200
+ line_pos += 1
1201
+ _set_code_point(res, j, code_point)
1202
+ j += 1
1203
+ if code_point in (_Py_LINEFEED, _Py_CARRIAGE_RETURN):
1204
+ line_pos = 0
1205
+
1206
+ return res
1207
+
1208
+ return expandtabs_impl
1209
+
1210
+
1211
+ @overload_method(types.UnicodeType, "split")
1212
+ def unicode_split(a, sep=None, maxsplit=-1):
1213
+ if not (
1214
+ maxsplit == -1
1215
+ or isinstance(
1216
+ maxsplit, (types.Omitted, types.Integer, types.IntegerLiteral)
1217
+ )
1218
+ ):
1219
+ return None # fail typing if maxsplit is not an integer
1220
+
1221
+ if isinstance(sep, types.UnicodeCharSeq):
1222
+
1223
+ def split_impl(a, sep=None, maxsplit=-1):
1224
+ return a.split(str(sep), maxsplit=maxsplit)
1225
+
1226
+ return split_impl
1227
+
1228
+ if isinstance(sep, types.UnicodeType):
1229
+
1230
+ def split_impl(a, sep=None, maxsplit=-1):
1231
+ a_len = len(a)
1232
+ sep_len = len(sep)
1233
+
1234
+ if sep_len == 0:
1235
+ raise ValueError("empty separator")
1236
+
1237
+ parts = []
1238
+ last = 0
1239
+ idx = 0
1240
+
1241
+ if sep_len == 1 and maxsplit == -1:
1242
+ sep_code_point = _get_code_point(sep, 0)
1243
+ for idx in range(a_len):
1244
+ if _get_code_point(a, idx) == sep_code_point:
1245
+ parts.append(a[last:idx])
1246
+ last = idx + 1
1247
+ else:
1248
+ split_count = 0
1249
+
1250
+ while idx < a_len and (
1251
+ maxsplit == -1 or split_count < maxsplit
1252
+ ):
1253
+ if _cmp_region(a, idx, sep, 0, sep_len) == 0:
1254
+ parts.append(a[last:idx])
1255
+ idx += sep_len
1256
+ last = idx
1257
+ split_count += 1
1258
+ else:
1259
+ idx += 1
1260
+
1261
+ if last <= a_len:
1262
+ parts.append(a[last:])
1263
+
1264
+ return parts
1265
+
1266
+ return split_impl
1267
+ elif (
1268
+ sep is None
1269
+ or isinstance(sep, types.NoneType)
1270
+ or getattr(sep, "value", False) is None
1271
+ ):
1272
+
1273
+ def split_whitespace_impl(a, sep=None, maxsplit=-1):
1274
+ a_len = len(a)
1275
+
1276
+ parts = []
1277
+ last = 0
1278
+ idx = 0
1279
+ split_count = 0
1280
+ in_whitespace_block = True
1281
+
1282
+ for idx in range(a_len):
1283
+ code_point = _get_code_point(a, idx)
1284
+ is_whitespace = _PyUnicode_IsSpace(code_point)
1285
+ if in_whitespace_block:
1286
+ if is_whitespace:
1287
+ pass # keep consuming space
1288
+ else:
1289
+ last = idx # this is the start of the next string
1290
+ in_whitespace_block = False
1291
+ else:
1292
+ if not is_whitespace:
1293
+ pass # keep searching for whitespace transition
1294
+ else:
1295
+ parts.append(a[last:idx])
1296
+ in_whitespace_block = True
1297
+ split_count += 1
1298
+ if maxsplit != -1 and split_count == maxsplit:
1299
+ break
1300
+
1301
+ if last <= a_len and not in_whitespace_block:
1302
+ parts.append(a[last:])
1303
+
1304
+ return parts
1305
+
1306
+ return split_whitespace_impl
1307
+
1308
+
1309
+ def generate_rsplit_whitespace_impl(isspace_func):
1310
+ """Generate whitespace rsplit func based on either ascii or unicode"""
1311
+
1312
+ def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
1313
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L192-L240 # noqa: E501
1314
+ if maxsplit < 0:
1315
+ maxsplit = sys.maxsize
1316
+
1317
+ result = []
1318
+ i = len(data) - 1
1319
+ while maxsplit > 0:
1320
+ while i >= 0:
1321
+ code_point = _get_code_point(data, i)
1322
+ if not isspace_func(code_point):
1323
+ break
1324
+ i -= 1
1325
+ if i < 0:
1326
+ break
1327
+ j = i
1328
+ i -= 1
1329
+ while i >= 0:
1330
+ code_point = _get_code_point(data, i)
1331
+ if isspace_func(code_point):
1332
+ break
1333
+ i -= 1
1334
+ result.append(data[i + 1 : j + 1])
1335
+ maxsplit -= 1
1336
+
1337
+ if i >= 0:
1338
+ # Only occurs when maxsplit was reached
1339
+ # Skip any remaining whitespace and copy to beginning of string
1340
+ while i >= 0:
1341
+ code_point = _get_code_point(data, i)
1342
+ if not isspace_func(code_point):
1343
+ break
1344
+ i -= 1
1345
+ if i >= 0:
1346
+ result.append(data[0 : i + 1])
1347
+
1348
+ return result[::-1]
1349
+
1350
+ return rsplit_whitespace_impl
1351
+
1352
+
1353
+ unicode_rsplit_whitespace_impl = register_jitable(
1354
+ generate_rsplit_whitespace_impl(_PyUnicode_IsSpace)
1355
+ )
1356
+ ascii_rsplit_whitespace_impl = register_jitable(
1357
+ generate_rsplit_whitespace_impl(_Py_ISSPACE)
1358
+ )
1359
+
1360
+
1361
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13095-L13108 # noqa: E501
1362
+ @overload_method(types.UnicodeType, "rsplit")
1363
+ def unicode_rsplit(data, sep=None, maxsplit=-1):
1364
+ """Implements str.unicode_rsplit()"""
1365
+
1366
+ def _unicode_rsplit_check_type(ty, name, accepted):
1367
+ """Check object belongs to one of specified types"""
1368
+ thety = ty
1369
+ # if the type is omitted, the concrete type is the value
1370
+ if isinstance(ty, types.Omitted):
1371
+ thety = ty.value
1372
+ # if the type is optional, the concrete type is the captured type
1373
+ elif isinstance(ty, types.Optional):
1374
+ thety = ty.type
1375
+
1376
+ if thety is not None and not isinstance(thety, accepted):
1377
+ raise TypingError(
1378
+ '"{}" must be {}, not {}'.format(name, accepted, ty)
1379
+ )
1380
+
1381
+ _unicode_rsplit_check_type(
1382
+ sep, "sep", (types.UnicodeType, types.UnicodeCharSeq, types.NoneType)
1383
+ )
1384
+ _unicode_rsplit_check_type(maxsplit, "maxsplit", (types.Integer, int))
1385
+
1386
+ if sep is None or isinstance(sep, (types.NoneType, types.Omitted)):
1387
+
1388
+ def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
1389
+ if data._is_ascii:
1390
+ return ascii_rsplit_whitespace_impl(data, sep, maxsplit)
1391
+ return unicode_rsplit_whitespace_impl(data, sep, maxsplit)
1392
+
1393
+ return rsplit_whitespace_impl
1394
+
1395
+ def rsplit_impl(data, sep=None, maxsplit=-1):
1396
+ sep = str(sep)
1397
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L286-L333 # noqa: E501
1398
+ if data._kind < sep._kind or len(data) < len(sep):
1399
+ return [data]
1400
+
1401
+ def _rsplit_char(data, ch, maxsplit):
1402
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L242-L284 # noqa: E501
1403
+ result = []
1404
+ ch_code_point = _get_code_point(ch, 0)
1405
+ i = j = len(data) - 1
1406
+ while i >= 0 and maxsplit > 0:
1407
+ data_code_point = _get_code_point(data, i)
1408
+ if data_code_point == ch_code_point:
1409
+ result.append(data[i + 1 : j + 1])
1410
+ j = i = i - 1
1411
+ maxsplit -= 1
1412
+ i -= 1
1413
+ if j >= -1:
1414
+ result.append(data[0 : j + 1])
1415
+
1416
+ return result[::-1]
1417
+
1418
+ if maxsplit < 0:
1419
+ maxsplit = sys.maxsize
1420
+
1421
+ sep_length = len(sep)
1422
+
1423
+ if sep_length == 0:
1424
+ raise ValueError("empty separator")
1425
+ if sep_length == 1:
1426
+ return _rsplit_char(data, sep, maxsplit)
1427
+
1428
+ result = []
1429
+ j = len(data)
1430
+ while maxsplit > 0:
1431
+ pos = data.rfind(sep, start=0, end=j)
1432
+ if pos < 0:
1433
+ break
1434
+ result.append(data[pos + sep_length : j])
1435
+ j = pos
1436
+ maxsplit -= 1
1437
+
1438
+ result.append(data[0:j])
1439
+
1440
+ return result[::-1]
1441
+
1442
+ return rsplit_impl
1443
+
1444
+
1445
+ @overload_method(types.UnicodeType, "center")
1446
+ def unicode_center(string, width, fillchar=" "):
1447
+ if not isinstance(width, types.Integer):
1448
+ raise TypingError("The width must be an Integer")
1449
+
1450
+ if isinstance(fillchar, types.UnicodeCharSeq):
1451
+
1452
+ def center_impl(string, width, fillchar=" "):
1453
+ return string.center(width, str(fillchar))
1454
+
1455
+ return center_impl
1456
+
1457
+ if not (
1458
+ fillchar == " "
1459
+ or isinstance(fillchar, (types.Omitted, types.UnicodeType))
1460
+ ):
1461
+ raise TypingError("The fillchar must be a UnicodeType")
1462
+
1463
+ def center_impl(string, width, fillchar=" "):
1464
+ str_len = len(string)
1465
+ fillchar_len = len(fillchar)
1466
+
1467
+ if fillchar_len != 1:
1468
+ raise ValueError(
1469
+ "The fill character must be exactly one character long"
1470
+ )
1471
+
1472
+ if width <= str_len:
1473
+ return string
1474
+
1475
+ allmargin = width - str_len
1476
+ lmargin = (allmargin // 2) + (allmargin & width & 1)
1477
+ rmargin = allmargin - lmargin
1478
+
1479
+ l_string = fillchar * lmargin
1480
+ if lmargin == rmargin:
1481
+ return l_string + string + l_string
1482
+ else:
1483
+ return l_string + string + (fillchar * rmargin)
1484
+
1485
+ return center_impl
1486
+
1487
+
1488
+ def gen_unicode_Xjust(STRING_FIRST):
1489
+ def unicode_Xjust(string, width, fillchar=" "):
1490
+ if not isinstance(width, types.Integer):
1491
+ raise TypingError("The width must be an Integer")
1492
+
1493
+ if isinstance(fillchar, types.UnicodeCharSeq):
1494
+ if STRING_FIRST:
1495
+
1496
+ def ljust_impl(string, width, fillchar=" "):
1497
+ return string.ljust(width, str(fillchar))
1498
+
1499
+ return ljust_impl
1500
+ else:
1501
+
1502
+ def rjust_impl(string, width, fillchar=" "):
1503
+ return string.rjust(width, str(fillchar))
1504
+
1505
+ return rjust_impl
1506
+
1507
+ if not (
1508
+ fillchar == " "
1509
+ or isinstance(fillchar, (types.Omitted, types.UnicodeType))
1510
+ ):
1511
+ raise TypingError("The fillchar must be a UnicodeType")
1512
+
1513
+ def impl(string, width, fillchar=" "):
1514
+ str_len = len(string)
1515
+ fillchar_len = len(fillchar)
1516
+
1517
+ if fillchar_len != 1:
1518
+ raise ValueError(
1519
+ "The fill character must be exactly one character long"
1520
+ )
1521
+
1522
+ if width <= str_len:
1523
+ return string
1524
+
1525
+ newstr = fillchar * (width - str_len)
1526
+ if STRING_FIRST:
1527
+ return string + newstr
1528
+ else:
1529
+ return newstr + string
1530
+
1531
+ return impl
1532
+
1533
+ return unicode_Xjust
1534
+
1535
+
1536
+ overload_method(types.UnicodeType, "rjust")(gen_unicode_Xjust(False))
1537
+ overload_method(types.UnicodeType, "ljust")(gen_unicode_Xjust(True))
1538
+
1539
+
1540
+ def generate_splitlines_func(is_line_break_func):
1541
+ """Generate splitlines performer based on ascii or unicode line breaks."""
1542
+
1543
+ def impl(data, keepends):
1544
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L335-L389 # noqa: E501
1545
+ length = len(data)
1546
+ result = []
1547
+ i = j = 0
1548
+ while i < length:
1549
+ # find a line and append it
1550
+ while i < length:
1551
+ code_point = _get_code_point(data, i)
1552
+ if is_line_break_func(code_point):
1553
+ break
1554
+ i += 1
1555
+
1556
+ # skip the line break reading CRLF as one line break
1557
+ eol = i
1558
+ if i < length:
1559
+ if i + 1 < length:
1560
+ cur_cp = _get_code_point(data, i)
1561
+ next_cp = _get_code_point(data, i + 1)
1562
+ if _Py_ISCARRIAGERETURN(cur_cp) and _Py_ISLINEFEED(next_cp):
1563
+ i += 1
1564
+ i += 1
1565
+ if keepends:
1566
+ eol = i
1567
+
1568
+ result.append(data[j:eol])
1569
+ j = i
1570
+
1571
+ return result
1572
+
1573
+ return impl
1574
+
1575
+
1576
+ _ascii_splitlines = register_jitable(generate_splitlines_func(_Py_ISLINEBREAK))
1577
+ _unicode_splitlines = register_jitable(
1578
+ generate_splitlines_func(_PyUnicode_IsLineBreak)
1579
+ )
1580
+
1581
+
1582
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10196-L10229 # noqa: E501
1583
+ @overload_method(types.UnicodeType, "splitlines")
1584
+ def unicode_splitlines(data, keepends=False):
1585
+ """Implements str.splitlines()"""
1586
+ thety = keepends
1587
+ # if the type is omitted, the concrete type is the value
1588
+ if isinstance(keepends, types.Omitted):
1589
+ thety = keepends.value
1590
+ # if the type is optional, the concrete type is the captured type
1591
+ elif isinstance(keepends, types.Optional):
1592
+ thety = keepends.type
1593
+
1594
+ accepted = (types.Integer, int, types.Boolean, bool)
1595
+ if thety is not None and not isinstance(thety, accepted):
1596
+ raise TypingError(
1597
+ '"{}" must be {}, not {}'.format("keepends", accepted, keepends)
1598
+ )
1599
+
1600
+ def splitlines_impl(data, keepends=False):
1601
+ if data._is_ascii:
1602
+ return _ascii_splitlines(data, keepends)
1603
+
1604
+ return _unicode_splitlines(data, keepends)
1605
+
1606
+ return splitlines_impl
1607
+
1608
+
1609
+ @register_jitable
1610
+ def join_list(sep, parts):
1611
+ parts_len = len(parts)
1612
+ if parts_len == 0:
1613
+ return ""
1614
+
1615
+ # Precompute size and char_width of result
1616
+ sep_len = len(sep)
1617
+ length = (parts_len - 1) * sep_len
1618
+ kind = sep._kind
1619
+ is_ascii = sep._is_ascii
1620
+ for p in parts:
1621
+ length += len(p)
1622
+ kind = _pick_kind(kind, p._kind)
1623
+ is_ascii = _pick_ascii(is_ascii, p._is_ascii)
1624
+
1625
+ result = _empty_string(kind, length, is_ascii)
1626
+
1627
+ # populate string
1628
+ part = parts[0]
1629
+ _strncpy(result, 0, part, 0, len(part))
1630
+ dst_offset = len(part)
1631
+ for idx in range(1, parts_len):
1632
+ _strncpy(result, dst_offset, sep, 0, sep_len)
1633
+ dst_offset += sep_len
1634
+ part = parts[idx]
1635
+ _strncpy(result, dst_offset, part, 0, len(part))
1636
+ dst_offset += len(part)
1637
+
1638
+ return result
1639
+
1640
+
1641
+ @overload_method(types.UnicodeType, "join")
1642
+ def unicode_join(sep, parts):
1643
+ if isinstance(parts, types.List):
1644
+ if isinstance(parts.dtype, types.UnicodeType):
1645
+
1646
+ def join_list_impl(sep, parts):
1647
+ return join_list(sep, parts)
1648
+
1649
+ return join_list_impl
1650
+ elif isinstance(parts.dtype, types.UnicodeCharSeq):
1651
+
1652
+ def join_list_impl(sep, parts):
1653
+ _parts = [str(p) for p in parts]
1654
+ return join_list(sep, _parts)
1655
+
1656
+ return join_list_impl
1657
+ else:
1658
+ pass # lists of any other type not supported
1659
+ elif isinstance(parts, types.IterableType):
1660
+
1661
+ def join_iter_impl(sep, parts):
1662
+ parts_list = [p for p in parts]
1663
+ return sep.join(parts_list)
1664
+
1665
+ return join_iter_impl
1666
+ elif isinstance(parts, types.UnicodeType):
1667
+ # Temporary workaround until UnicodeType is iterable
1668
+ def join_str_impl(sep, parts):
1669
+ parts_list = [parts[i] for i in range(len(parts))]
1670
+ return join_list(sep, parts_list)
1671
+
1672
+ return join_str_impl
1673
+
1674
+
1675
+ @overload_method(types.UnicodeType, "zfill")
1676
+ def unicode_zfill(string, width):
1677
+ if not isinstance(width, types.Integer):
1678
+ raise TypingError("<width> must be an Integer")
1679
+
1680
+ def zfill_impl(string, width):
1681
+ str_len = len(string)
1682
+
1683
+ if width <= str_len:
1684
+ return string
1685
+
1686
+ first_char = string[0] if str_len else ""
1687
+ padding = "0" * (width - str_len)
1688
+
1689
+ if first_char in ["+", "-"]:
1690
+ newstr = first_char + padding + string[1:]
1691
+ else:
1692
+ newstr = padding + string
1693
+
1694
+ return newstr
1695
+
1696
+ return zfill_impl
1697
+
1698
+
1699
+ # ------------------------------------------------------------------------------
1700
+ # Strip functions
1701
+ # ------------------------------------------------------------------------------
1702
+ @register_jitable
1703
+ def unicode_strip_left_bound(string, chars):
1704
+ str_len = len(string)
1705
+
1706
+ i = 0
1707
+ if chars is not None:
1708
+ for i in range(str_len):
1709
+ if string[i] not in chars:
1710
+ return i
1711
+ else:
1712
+ for i in range(str_len):
1713
+ if not _PyUnicode_IsSpace(string[i]):
1714
+ return i
1715
+
1716
+ return str_len
1717
+
1718
+
1719
+ @register_jitable
1720
+ def unicode_strip_right_bound(string, chars):
1721
+ str_len = len(string)
1722
+ i = 0
1723
+ if chars is not None:
1724
+ for i in range(str_len - 1, -1, -1):
1725
+ if string[i] not in chars:
1726
+ i += 1
1727
+ break
1728
+ else:
1729
+ for i in range(str_len - 1, -1, -1):
1730
+ if not _PyUnicode_IsSpace(string[i]):
1731
+ i += 1
1732
+ break
1733
+
1734
+ return i
1735
+
1736
+
1737
+ def unicode_strip_types_check(chars):
1738
+ if isinstance(chars, types.Optional):
1739
+ chars = chars.type # catch optional type with invalid non-None type
1740
+ if not (
1741
+ chars is None
1742
+ or isinstance(chars, (types.Omitted, types.UnicodeType, types.NoneType))
1743
+ ):
1744
+ raise TypingError("The arg must be a UnicodeType or None")
1745
+
1746
+
1747
+ def _count_args_types_check(arg):
1748
+ if isinstance(arg, types.Optional):
1749
+ arg = arg.type
1750
+ if not (
1751
+ arg is None
1752
+ or isinstance(arg, (types.Omitted, types.Integer, types.NoneType))
1753
+ ):
1754
+ raise TypingError("The slice indices must be an Integer or None")
1755
+
1756
+
1757
+ @overload_method(types.UnicodeType, "lstrip")
1758
+ def unicode_lstrip(string, chars=None):
1759
+ if isinstance(chars, types.UnicodeCharSeq):
1760
+
1761
+ def lstrip_impl(string, chars=None):
1762
+ return string.lstrip(str(chars))
1763
+
1764
+ return lstrip_impl
1765
+
1766
+ unicode_strip_types_check(chars)
1767
+
1768
+ def lstrip_impl(string, chars=None):
1769
+ return string[unicode_strip_left_bound(string, chars) :]
1770
+
1771
+ return lstrip_impl
1772
+
1773
+
1774
+ @overload_method(types.UnicodeType, "rstrip")
1775
+ def unicode_rstrip(string, chars=None):
1776
+ if isinstance(chars, types.UnicodeCharSeq):
1777
+
1778
+ def rstrip_impl(string, chars=None):
1779
+ return string.rstrip(str(chars))
1780
+
1781
+ return rstrip_impl
1782
+
1783
+ unicode_strip_types_check(chars)
1784
+
1785
+ def rstrip_impl(string, chars=None):
1786
+ return string[: unicode_strip_right_bound(string, chars)]
1787
+
1788
+ return rstrip_impl
1789
+
1790
+
1791
+ @overload_method(types.UnicodeType, "strip")
1792
+ def unicode_strip(string, chars=None):
1793
+ if isinstance(chars, types.UnicodeCharSeq):
1794
+
1795
+ def strip_impl(string, chars=None):
1796
+ return string.strip(str(chars))
1797
+
1798
+ return strip_impl
1799
+
1800
+ unicode_strip_types_check(chars)
1801
+
1802
+ def strip_impl(string, chars=None):
1803
+ lb = unicode_strip_left_bound(string, chars)
1804
+ rb = unicode_strip_right_bound(string, chars)
1805
+ return string[lb:rb]
1806
+
1807
+ return strip_impl
1808
+
1809
+
1810
+ # ------------------------------------------------------------------------------
1811
+ # Slice functions
1812
+ # ------------------------------------------------------------------------------
1813
+
1814
+
1815
+ @register_jitable
1816
+ def normalize_str_idx(idx, length, is_start=True):
1817
+ """
1818
+ Parameters
1819
+ ----------
1820
+ idx : int or None
1821
+ the index
1822
+ length : int
1823
+ the string length
1824
+ is_start : bool; optional with defaults to True
1825
+ Is it the *start* or the *stop* of the slice?
1826
+
1827
+ Returns
1828
+ -------
1829
+ norm_idx : int
1830
+ normalized index
1831
+ """
1832
+ if idx is None:
1833
+ if is_start:
1834
+ return 0
1835
+ else:
1836
+ return length
1837
+ elif idx < 0:
1838
+ idx += length
1839
+
1840
+ if idx < 0 or idx >= length:
1841
+ raise IndexError("string index out of range")
1842
+
1843
+ return idx
1844
+
1845
+
1846
+ @register_jitable
1847
+ def _normalize_slice_idx_count(arg, slice_len, default):
1848
+ """
1849
+ Used for unicode_count
1850
+
1851
+ If arg < -slice_len, returns 0 (prevents circle)
1852
+
1853
+ If arg is within slice, e.g -slice_len <= arg < slice_len
1854
+ returns its real index via arg % slice_len
1855
+
1856
+ If arg > slice_len, returns arg (in this case count must
1857
+ return 0 if it is start index)
1858
+ """
1859
+
1860
+ if arg is None:
1861
+ return default
1862
+ if -slice_len <= arg < slice_len:
1863
+ return arg % slice_len
1864
+ return 0 if arg < 0 else arg
1865
+
1866
+
1867
+ @intrinsic
1868
+ def _normalize_slice(typingctx, sliceobj, length):
1869
+ """Fix slice object."""
1870
+ sig = sliceobj(sliceobj, length)
1871
+
1872
+ def codegen(context, builder, sig, args):
1873
+ [slicetype, lengthtype] = sig.args
1874
+ [sliceobj, length] = args
1875
+ slice = context.make_helper(builder, slicetype, sliceobj)
1876
+ slicing.guard_invalid_slice(context, builder, slicetype, slice)
1877
+ slicing.fix_slice(builder, slice, length)
1878
+ return slice._getvalue()
1879
+
1880
+ return sig, codegen
1881
+
1882
+
1883
+ @intrinsic
1884
+ def _slice_span(typingctx, sliceobj):
1885
+ """Compute the span from the given slice object."""
1886
+ sig = types.intp(sliceobj)
1887
+
1888
+ def codegen(context, builder, sig, args):
1889
+ [slicetype] = sig.args
1890
+ [sliceobj] = args
1891
+ slice = context.make_helper(builder, slicetype, sliceobj)
1892
+ result_size = slicing.get_slice_length(builder, slice)
1893
+ return result_size
1894
+
1895
+ return sig, codegen
1896
+
1897
+
1898
+ @register_jitable(_nrt=False)
1899
+ def _strncpy(dst, dst_offset, src, src_offset, n):
1900
+ if src._kind == dst._kind:
1901
+ byte_width = _kind_to_byte_width(src._kind)
1902
+ src_byte_offset = byte_width * src_offset
1903
+ dst_byte_offset = byte_width * dst_offset
1904
+ nbytes = n * byte_width
1905
+ memcpy_region(
1906
+ dst._data,
1907
+ dst_byte_offset,
1908
+ src._data,
1909
+ src_byte_offset,
1910
+ nbytes,
1911
+ align=1,
1912
+ )
1913
+ else:
1914
+ for i in range(n):
1915
+ _set_code_point(
1916
+ dst, dst_offset + i, _get_code_point(src, src_offset + i)
1917
+ )
1918
+
1919
+
1920
+ @intrinsic
1921
+ def _get_str_slice_view(typingctx, src_t, start_t, length_t):
1922
+ """Create a slice of a unicode string using a view of its data to avoid
1923
+ extra allocation.
1924
+ """
1925
+ assert src_t == types.unicode_type
1926
+
1927
+ def codegen(context, builder, sig, args):
1928
+ src, start, length = args
1929
+ in_str = cgutils.create_struct_proxy(types.unicode_type)(
1930
+ context, builder, value=src
1931
+ )
1932
+ view_str = cgutils.create_struct_proxy(types.unicode_type)(
1933
+ context, builder
1934
+ )
1935
+ view_str.meminfo = in_str.meminfo
1936
+ view_str.kind = in_str.kind
1937
+ view_str.is_ascii = in_str.is_ascii
1938
+ view_str.length = length
1939
+ # hash value -1 to indicate "need to compute hash"
1940
+ view_str.hash = context.get_constant(_Py_hash_t, -1)
1941
+ # get a pointer to start of slice data
1942
+ bw_typ = context.typing_context.resolve_value_type(_kind_to_byte_width)
1943
+ bw_sig = bw_typ.get_call_type(
1944
+ context.typing_context, (types.int32,), {}
1945
+ )
1946
+ bw_impl = context.get_function(bw_typ, bw_sig)
1947
+ byte_width = bw_impl(builder, (in_str.kind,))
1948
+ offset = builder.mul(start, byte_width)
1949
+ view_str.data = builder.gep(in_str.data, [offset])
1950
+ # Set parent pyobject to NULL
1951
+ view_str.parent = cgutils.get_null_value(view_str.parent.type)
1952
+ # incref original string
1953
+ if context.enable_nrt:
1954
+ context.nrt.incref(builder, sig.args[0], src)
1955
+ return view_str._getvalue()
1956
+
1957
+ sig = types.unicode_type(types.unicode_type, types.intp, types.intp)
1958
+ return sig, codegen
1959
+
1960
+
1961
+ @overload(operator.getitem)
1962
+ def unicode_getitem(s, idx):
1963
+ if isinstance(s, types.UnicodeType):
1964
+ if isinstance(idx, types.Integer):
1965
+
1966
+ def getitem_char(s, idx):
1967
+ idx = normalize_str_idx(idx, len(s))
1968
+ cp = _get_code_point(s, idx)
1969
+ kind = _codepoint_to_kind(cp)
1970
+ if kind == s._kind:
1971
+ return _get_str_slice_view(s, idx, 1)
1972
+ else:
1973
+ is_ascii = _codepoint_is_ascii(cp)
1974
+ ret = _empty_string(kind, 1, is_ascii)
1975
+ _set_code_point(ret, 0, cp)
1976
+ return ret
1977
+
1978
+ return getitem_char
1979
+ elif isinstance(idx, types.SliceType):
1980
+
1981
+ def getitem_slice(s, idx):
1982
+ slice_idx = _normalize_slice(idx, len(s))
1983
+ span = _slice_span(slice_idx)
1984
+
1985
+ cp = _get_code_point(s, slice_idx.start)
1986
+ kind = _codepoint_to_kind(cp)
1987
+ is_ascii = _codepoint_is_ascii(cp)
1988
+
1989
+ # Check slice to see if it's homogeneous in kind
1990
+ for i in range(
1991
+ slice_idx.start + slice_idx.step,
1992
+ slice_idx.stop,
1993
+ slice_idx.step,
1994
+ ):
1995
+ cp = _get_code_point(s, i)
1996
+ is_ascii &= _codepoint_is_ascii(cp)
1997
+ new_kind = _codepoint_to_kind(cp)
1998
+ if kind != new_kind:
1999
+ kind = _pick_kind(kind, new_kind)
2000
+ # TODO: it might be possible to break here if the kind
2001
+ # is PY_UNICODE_4BYTE_KIND but there are potentially
2002
+ # strings coming from other internal functions that are
2003
+ # this wide and also actually ASCII (i.e. kind is larger
2004
+ # than actually required for storing the code point), so
2005
+ # it's necessary to continue.
2006
+
2007
+ if slice_idx.step == 1 and kind == s._kind:
2008
+ # Can return a view, the slice has the same kind as the
2009
+ # string itself and it's a stride slice 1.
2010
+ return _get_str_slice_view(s, slice_idx.start, span)
2011
+ else:
2012
+ # It's heterogeneous in kind OR stride != 1
2013
+ ret = _empty_string(kind, span, is_ascii)
2014
+ cur = slice_idx.start
2015
+ for i in range(span):
2016
+ _set_code_point(ret, i, _get_code_point(s, cur))
2017
+ cur += slice_idx.step
2018
+ return ret
2019
+
2020
+ return getitem_slice
2021
+
2022
+
2023
+ # ------------------------------------------------------------------------------
2024
+ # String operations
2025
+ # ------------------------------------------------------------------------------
2026
+
2027
+
2028
+ @overload(operator.add)
2029
+ @overload(operator.iadd)
2030
+ def unicode_concat(a, b):
2031
+ if isinstance(a, types.UnicodeType) and isinstance(b, types.UnicodeType):
2032
+
2033
+ def concat_impl(a, b):
2034
+ new_length = a._length + b._length
2035
+ new_kind = _pick_kind(a._kind, b._kind)
2036
+ new_ascii = _pick_ascii(a._is_ascii, b._is_ascii)
2037
+ result = _empty_string(new_kind, new_length, new_ascii)
2038
+ for i in range(len(a)):
2039
+ _set_code_point(result, i, _get_code_point(a, i))
2040
+ for j in range(len(b)):
2041
+ _set_code_point(result, len(a) + j, _get_code_point(b, j))
2042
+ return result
2043
+
2044
+ return concat_impl
2045
+
2046
+ if isinstance(a, types.UnicodeType) and isinstance(b, types.UnicodeCharSeq):
2047
+
2048
+ def concat_impl(a, b):
2049
+ return a + str(b)
2050
+
2051
+ return concat_impl
2052
+
2053
+
2054
+ @register_jitable
2055
+ def _repeat_impl(str_arg, mult_arg):
2056
+ if str_arg == "" or mult_arg < 1:
2057
+ return ""
2058
+ elif mult_arg == 1:
2059
+ return str_arg
2060
+ else:
2061
+ new_length = str_arg._length * mult_arg
2062
+ new_kind = str_arg._kind
2063
+ result = _empty_string(new_kind, new_length, str_arg._is_ascii)
2064
+ # make initial copy into result
2065
+ len_a = len(str_arg)
2066
+ _strncpy(result, 0, str_arg, 0, len_a)
2067
+ # loop through powers of 2 for efficient copying
2068
+ copy_size = len_a
2069
+ while 2 * copy_size <= new_length:
2070
+ _strncpy(result, copy_size, result, 0, copy_size)
2071
+ copy_size *= 2
2072
+
2073
+ if not 2 * copy_size == new_length:
2074
+ # if copy_size not an exact multiple it then needs
2075
+ # to complete the rest of the copies
2076
+ rest = new_length - copy_size
2077
+ _strncpy(result, copy_size, result, copy_size - rest, rest)
2078
+ return result
2079
+
2080
+
2081
+ @overload(operator.mul)
2082
+ def unicode_repeat(a, b):
2083
+ if isinstance(a, types.UnicodeType) and isinstance(b, types.Integer):
2084
+
2085
+ def wrap(a, b):
2086
+ return _repeat_impl(a, b)
2087
+
2088
+ return wrap
2089
+ elif isinstance(a, types.Integer) and isinstance(b, types.UnicodeType):
2090
+
2091
+ def wrap(a, b):
2092
+ return _repeat_impl(b, a)
2093
+
2094
+ return wrap
2095
+
2096
+
2097
+ @overload(operator.not_)
2098
+ def unicode_not(a):
2099
+ if isinstance(a, types.UnicodeType):
2100
+
2101
+ def impl(a):
2102
+ return len(a) == 0
2103
+
2104
+ return impl
2105
+
2106
+
2107
+ @overload_method(types.UnicodeType, "replace")
2108
+ def unicode_replace(s, old_str, new_str, count=-1):
2109
+ thety = count
2110
+ if isinstance(count, types.Omitted):
2111
+ thety = count.value
2112
+ elif isinstance(count, types.Optional):
2113
+ thety = count.type
2114
+
2115
+ if not isinstance(thety, (int, types.Integer)):
2116
+ raise TypingError(
2117
+ "Unsupported parameters. The parameters "
2118
+ "must be Integer. Given count: {}".format(count)
2119
+ )
2120
+
2121
+ if not isinstance(old_str, (types.UnicodeType, types.NoneType)):
2122
+ raise TypingError(
2123
+ "The object must be a UnicodeType. Given: {}".format(old_str)
2124
+ )
2125
+
2126
+ if not isinstance(new_str, types.UnicodeType):
2127
+ raise TypingError(
2128
+ "The object must be a UnicodeType. Given: {}".format(new_str)
2129
+ )
2130
+
2131
+ def impl(s, old_str, new_str, count=-1):
2132
+ if count == 0:
2133
+ return s
2134
+ if old_str == "":
2135
+ schars = list(s)
2136
+ if count == -1:
2137
+ return new_str + new_str.join(schars) + new_str
2138
+ split_result = [new_str]
2139
+ min_count = min(len(schars), count)
2140
+ for i in range(min_count):
2141
+ split_result.append(schars[i])
2142
+ if i + 1 != min_count:
2143
+ split_result.append(new_str)
2144
+ else:
2145
+ split_result.append("".join(schars[(i + 1) :]))
2146
+ if count > len(schars):
2147
+ split_result.append(new_str)
2148
+ return "".join(split_result)
2149
+ schars = s.split(old_str, count)
2150
+ result = new_str.join(schars)
2151
+ return result
2152
+
2153
+ return impl
2154
+
2155
+
2156
+ # ------------------------------------------------------------------------------
2157
+ # String `is*()` methods
2158
+ # ------------------------------------------------------------------------------
2159
+
2160
+
2161
+ # generates isalpha/isalnum
2162
+ def gen_isAlX(ascii_func, unicode_func):
2163
+ def unicode_isAlX(data):
2164
+ def impl(data):
2165
+ length = len(data)
2166
+ if length == 0:
2167
+ return False
2168
+
2169
+ if length == 1:
2170
+ code_point = _get_code_point(data, 0)
2171
+ if data._is_ascii:
2172
+ return ascii_func(code_point)
2173
+ else:
2174
+ return unicode_func(code_point)
2175
+
2176
+ if data._is_ascii:
2177
+ for i in range(length):
2178
+ code_point = _get_code_point(data, i)
2179
+ if not ascii_func(code_point):
2180
+ return False
2181
+
2182
+ for i in range(length):
2183
+ code_point = _get_code_point(data, i)
2184
+ if not unicode_func(code_point):
2185
+ return False
2186
+
2187
+ return True
2188
+
2189
+ return impl
2190
+
2191
+ return unicode_isAlX
2192
+
2193
+
2194
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11928-L11964 # noqa: E501
2195
+ overload_method(types.UnicodeType, "isalpha")(
2196
+ gen_isAlX(_Py_ISALPHA, _PyUnicode_IsAlpha)
2197
+ )
2198
+
2199
+ _unicode_is_alnum = register_jitable(
2200
+ lambda x: (_PyUnicode_IsNumeric(x) or _PyUnicode_IsAlpha(x))
2201
+ )
2202
+
2203
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11975-L12006 # noqa: E501
2204
+ overload_method(types.UnicodeType, "isalnum")(
2205
+ gen_isAlX(_Py_ISALNUM, _unicode_is_alnum)
2206
+ )
2207
+
2208
+
2209
+ def _is_upper(is_lower, is_upper, is_title):
2210
+ # impl is an approximate translation of:
2211
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11794-L11827 # noqa: E501
2212
+ # mixed with:
2213
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L218-L242 # noqa: E501
2214
+ def impl(a):
2215
+ l = len(a)
2216
+ if l == 1:
2217
+ return is_upper(_get_code_point(a, 0)) != 0
2218
+ if l == 0:
2219
+ return False
2220
+ cased = False
2221
+ for idx in range(l):
2222
+ code_point = _get_code_point(a, idx)
2223
+ if is_lower(code_point) or is_title(code_point):
2224
+ return False
2225
+ elif not cased and is_upper(code_point):
2226
+ cased = True
2227
+ return cased
2228
+
2229
+ return impl
2230
+
2231
+
2232
+ _always_false = register_jitable(lambda x: False)
2233
+ _ascii_is_upper = register_jitable(
2234
+ _is_upper(_Py_ISLOWER, _Py_ISUPPER, _always_false)
2235
+ )
2236
+ _unicode_is_upper = register_jitable(
2237
+ _is_upper(
2238
+ _PyUnicode_IsLowercase, _PyUnicode_IsUppercase, _PyUnicode_IsTitlecase
2239
+ )
2240
+ )
2241
+
2242
+
2243
+ @overload_method(types.UnicodeType, "isupper")
2244
+ def unicode_isupper(a):
2245
+ """
2246
+ Implements .isupper()
2247
+ """
2248
+
2249
+ def impl(a):
2250
+ if a._is_ascii:
2251
+ return _ascii_is_upper(a)
2252
+ else:
2253
+ return _unicode_is_upper(a)
2254
+
2255
+ return impl
2256
+
2257
+
2258
+ @overload_method(types.UnicodeType, "isascii")
2259
+ def unicode_isascii(data):
2260
+ """Implements UnicodeType.isascii()"""
2261
+
2262
+ def impl(data):
2263
+ return data._is_ascii
2264
+
2265
+ return impl
2266
+
2267
+
2268
+ @overload_method(types.UnicodeType, "istitle")
2269
+ def unicode_istitle(data):
2270
+ """
2271
+ Implements UnicodeType.istitle()
2272
+ The algorithm is an approximate translation from CPython:
2273
+ https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11829-L11885 # noqa: E501
2274
+ """
2275
+
2276
+ def impl(data):
2277
+ length = len(data)
2278
+ if length == 1:
2279
+ char = _get_code_point(data, 0)
2280
+ return _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char)
2281
+
2282
+ if length == 0:
2283
+ return False
2284
+
2285
+ cased = False
2286
+ previous_is_cased = False
2287
+ for idx in range(length):
2288
+ char = _get_code_point(data, idx)
2289
+ if _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char):
2290
+ if previous_is_cased:
2291
+ return False
2292
+ previous_is_cased = True
2293
+ cased = True
2294
+ elif _PyUnicode_IsLowercase(char):
2295
+ if not previous_is_cased:
2296
+ return False
2297
+ previous_is_cased = True
2298
+ cased = True
2299
+ else:
2300
+ previous_is_cased = False
2301
+
2302
+ return cased
2303
+
2304
+ return impl
2305
+
2306
+
2307
+ @overload_method(types.UnicodeType, "islower")
2308
+ def unicode_islower(data):
2309
+ """
2310
+ impl is an approximate translation of:
2311
+ https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L11900-L11933 # noqa: E501
2312
+ mixed with:
2313
+ https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/bytes_methods.c#L131-L156 # noqa: E501
2314
+ """
2315
+
2316
+ def impl(data):
2317
+ length = len(data)
2318
+ if length == 1:
2319
+ return _PyUnicode_IsLowercase(_get_code_point(data, 0))
2320
+ if length == 0:
2321
+ return False
2322
+
2323
+ cased = False
2324
+ for idx in range(length):
2325
+ cp = _get_code_point(data, idx)
2326
+ if _PyUnicode_IsUppercase(cp) or _PyUnicode_IsTitlecase(cp):
2327
+ return False
2328
+ elif not cased and _PyUnicode_IsLowercase(cp):
2329
+ cased = True
2330
+ return cased
2331
+
2332
+ return impl
2333
+
2334
+
2335
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12126-L12161 # noqa: E501
2336
+ @overload_method(types.UnicodeType, "isidentifier")
2337
+ def unicode_isidentifier(data):
2338
+ """Implements UnicodeType.isidentifier()"""
2339
+
2340
+ def impl(data):
2341
+ length = len(data)
2342
+ if length == 0:
2343
+ return False
2344
+
2345
+ first_cp = _get_code_point(data, 0)
2346
+ if not _PyUnicode_IsXidStart(first_cp) and first_cp != 0x5F:
2347
+ return False
2348
+
2349
+ for i in range(1, length):
2350
+ code_point = _get_code_point(data, i)
2351
+ if not _PyUnicode_IsXidContinue(code_point):
2352
+ return False
2353
+
2354
+ return True
2355
+
2356
+ return impl
2357
+
2358
+
2359
+ # generator for simple unicode "isX" methods
2360
+ def gen_isX(_PyUnicode_IS_func, empty_is_false=True):
2361
+ def unicode_isX(data):
2362
+ def impl(data):
2363
+ length = len(data)
2364
+ if length == 1:
2365
+ return _PyUnicode_IS_func(_get_code_point(data, 0))
2366
+
2367
+ if empty_is_false and length == 0:
2368
+ return False
2369
+
2370
+ for i in range(length):
2371
+ code_point = _get_code_point(data, i)
2372
+ if not _PyUnicode_IS_func(code_point):
2373
+ return False
2374
+
2375
+ return True
2376
+
2377
+ return impl
2378
+
2379
+ return unicode_isX
2380
+
2381
+
2382
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11896-L11925 # noqa: E501
2383
+ overload_method(types.UnicodeType, "isspace")(gen_isX(_PyUnicode_IsSpace))
2384
+
2385
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12096-L12124 # noqa: E501
2386
+ overload_method(types.UnicodeType, "isnumeric")(gen_isX(_PyUnicode_IsNumeric))
2387
+
2388
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12056-L12085 # noqa: E501
2389
+ overload_method(types.UnicodeType, "isdigit")(gen_isX(_PyUnicode_IsDigit))
2390
+
2391
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12017-L12045 # noqa: E501
2392
+ overload_method(types.UnicodeType, "isdecimal")(
2393
+ gen_isX(_PyUnicode_IsDecimalDigit)
2394
+ )
2395
+
2396
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12188-L12213 # noqa: E501
2397
+ overload_method(types.UnicodeType, "isprintable")(
2398
+ gen_isX(_PyUnicode_IsPrintable, False)
2399
+ )
2400
+
2401
+ # ------------------------------------------------------------------------------
2402
+ # String methods that apply a transformation to the characters themselves
2403
+ # ------------------------------------------------------------------------------
2404
+
2405
+
2406
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908 # noqa: E501
2407
+ def case_operation(ascii_func, unicode_func):
2408
+ """Generate common case operation performer."""
2409
+
2410
+ def impl(data):
2411
+ length = len(data)
2412
+ if length == 0:
2413
+ return _empty_string(data._kind, length, data._is_ascii)
2414
+
2415
+ if data._is_ascii:
2416
+ res = _empty_string(data._kind, length, 1)
2417
+ ascii_func(data, res)
2418
+ return res
2419
+
2420
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908 # noqa: E501
2421
+ tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
2422
+ # maxchar should be inside of a list to be pass as argument by reference
2423
+ maxchars = [0]
2424
+ newlength = unicode_func(data, length, tmp, maxchars)
2425
+ maxchar = maxchars[0]
2426
+ newkind = _codepoint_to_kind(maxchar)
2427
+ res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
2428
+ for i in range(newlength):
2429
+ _set_code_point(res, i, _get_code_point(tmp, i))
2430
+
2431
+ return res
2432
+
2433
+ return impl
2434
+
2435
+
2436
+ # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9856-L9883 # noqa: E501
2437
+ @register_jitable
2438
+ def _handle_capital_sigma(data, length, idx):
2439
+ """This is a translation of the function that handles the capital sigma."""
2440
+ c = 0
2441
+ j = idx - 1
2442
+ while j >= 0:
2443
+ c = _get_code_point(data, j)
2444
+ if not _PyUnicode_IsCaseIgnorable(c):
2445
+ break
2446
+ j -= 1
2447
+ final_sigma = j >= 0 and _PyUnicode_IsCased(c)
2448
+ if final_sigma:
2449
+ j = idx + 1
2450
+ while j < length:
2451
+ c = _get_code_point(data, j)
2452
+ if not _PyUnicode_IsCaseIgnorable(c):
2453
+ break
2454
+ j += 1
2455
+ final_sigma = j == length or (not _PyUnicode_IsCased(c))
2456
+
2457
+ return 0x3C2 if final_sigma else 0x3C3
2458
+
2459
+
2460
+ # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9885-L9895 # noqa: E501
2461
+ @register_jitable
2462
+ def _lower_ucs4(code_point, data, length, idx, mapped):
2463
+ """This is a translation of the function that lowers a character."""
2464
+ if code_point == 0x3A3:
2465
+ mapped[0] = _handle_capital_sigma(data, length, idx)
2466
+ return 1
2467
+ return _PyUnicode_ToLowerFull(code_point, mapped)
2468
+
2469
+
2470
+ # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9946-L9965 # noqa: E501
2471
+ def _gen_unicode_upper_or_lower(lower):
2472
+ def _do_upper_or_lower(data, length, res, maxchars):
2473
+ k = 0
2474
+ for idx in range(length):
2475
+ mapped = np.zeros(3, dtype=_Py_UCS4)
2476
+ code_point = _get_code_point(data, idx)
2477
+ if lower:
2478
+ n_res = _lower_ucs4(code_point, data, length, idx, mapped)
2479
+ else:
2480
+ # might be needed if call _do_upper_or_lower in unicode_upper
2481
+ n_res = _PyUnicode_ToUpperFull(code_point, mapped)
2482
+ for m in mapped[:n_res]:
2483
+ maxchars[0] = max(maxchars[0], m)
2484
+ _set_code_point(res, k, m)
2485
+ k += 1
2486
+ return k
2487
+
2488
+ return _do_upper_or_lower
2489
+
2490
+
2491
+ _unicode_upper = register_jitable(_gen_unicode_upper_or_lower(False))
2492
+ _unicode_lower = register_jitable(_gen_unicode_upper_or_lower(True))
2493
+
2494
+
2495
+ def _gen_ascii_upper_or_lower(func):
2496
+ def _ascii_upper_or_lower(data, res):
2497
+ for idx in range(len(data)):
2498
+ code_point = _get_code_point(data, idx)
2499
+ _set_code_point(res, idx, func(code_point))
2500
+
2501
+ return _ascii_upper_or_lower
2502
+
2503
+
2504
+ _ascii_upper = register_jitable(_gen_ascii_upper_or_lower(_Py_TOUPPER))
2505
+ _ascii_lower = register_jitable(_gen_ascii_upper_or_lower(_Py_TOLOWER))
2506
+
2507
+
2508
+ @overload_method(types.UnicodeType, "lower")
2509
+ def unicode_lower(data):
2510
+ """Implements .lower()"""
2511
+ return case_operation(_ascii_lower, _unicode_lower)
2512
+
2513
+
2514
+ @overload_method(types.UnicodeType, "upper")
2515
+ def unicode_upper(data):
2516
+ """Implements .upper()"""
2517
+ return case_operation(_ascii_upper, _unicode_upper)
2518
+
2519
+
2520
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834 # noqa: E501
2521
+ @register_jitable
2522
+ def _unicode_casefold(data, length, res, maxchars):
2523
+ k = 0
2524
+ mapped = np.zeros(3, dtype=_Py_UCS4)
2525
+ for idx in range(length):
2526
+ mapped.fill(0)
2527
+ code_point = _get_code_point(data, idx)
2528
+ n_res = _PyUnicode_ToFoldedFull(code_point, mapped)
2529
+ for m in mapped[:n_res]:
2530
+ maxchar = maxchars[0]
2531
+ maxchars[0] = max(maxchar, m)
2532
+ _set_code_point(res, k, m)
2533
+ k += 1
2534
+
2535
+ return k
2536
+
2537
+
2538
+ @register_jitable
2539
+ def _ascii_casefold(data, res):
2540
+ for idx in range(len(data)):
2541
+ code_point = _get_code_point(data, idx)
2542
+ _set_code_point(res, idx, _Py_TOLOWER(code_point))
2543
+
2544
+
2545
+ @overload_method(types.UnicodeType, "casefold")
2546
+ def unicode_casefold(data):
2547
+ """Implements str.casefold()"""
2548
+ return case_operation(_ascii_casefold, _unicode_casefold)
2549
+
2550
+
2551
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759 # noqa: E501
2552
+ @register_jitable
2553
+ def _unicode_capitalize(data, length, res, maxchars):
2554
+ k = 0
2555
+ maxchar = 0
2556
+ mapped = np.zeros(3, dtype=_Py_UCS4)
2557
+ code_point = _get_code_point(data, 0)
2558
+
2559
+ n_res = _PyUnicode_ToTitleFull(code_point, mapped)
2560
+
2561
+ for m in mapped[:n_res]:
2562
+ maxchar = max(maxchar, m)
2563
+ _set_code_point(res, k, m)
2564
+ k += 1
2565
+ for idx in range(1, length):
2566
+ mapped.fill(0)
2567
+ code_point = _get_code_point(data, idx)
2568
+ n_res = _lower_ucs4(code_point, data, length, idx, mapped)
2569
+ for m in mapped[:n_res]:
2570
+ maxchar = max(maxchar, m)
2571
+ _set_code_point(res, k, m)
2572
+ k += 1
2573
+ maxchars[0] = maxchar
2574
+ return k
2575
+
2576
+
2577
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L361-L382 # noqa: E501
2578
+ @register_jitable
2579
+ def _ascii_capitalize(data, res):
2580
+ code_point = _get_code_point(data, 0)
2581
+ _set_code_point(res, 0, _Py_TOUPPER(code_point))
2582
+ for idx in range(1, len(data)):
2583
+ code_point = _get_code_point(data, idx)
2584
+ _set_code_point(res, idx, _Py_TOLOWER(code_point))
2585
+
2586
+
2587
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10765-L10774 # noqa: E501
2588
+ @overload_method(types.UnicodeType, "capitalize")
2589
+ def unicode_capitalize(data):
2590
+ return case_operation(_ascii_capitalize, _unicode_capitalize)
2591
+
2592
+
2593
+ # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9996-L10021 # noqa: E501
2594
+ @register_jitable
2595
+ def _unicode_title(data, length, res, maxchars):
2596
+ """This is a translation of the function that titles a unicode string."""
2597
+ k = 0
2598
+ previous_cased = False
2599
+ mapped = np.empty(3, dtype=_Py_UCS4)
2600
+ for idx in range(length):
2601
+ mapped.fill(0)
2602
+ code_point = _get_code_point(data, idx)
2603
+ if previous_cased:
2604
+ n_res = _lower_ucs4(code_point, data, length, idx, mapped)
2605
+ else:
2606
+ n_res = _PyUnicode_ToTitleFull(_Py_UCS4(code_point), mapped)
2607
+ for m in mapped[:n_res]:
2608
+ (maxchar,) = maxchars
2609
+ maxchars[0] = max(maxchar, m)
2610
+ _set_code_point(res, k, m)
2611
+ k += 1
2612
+ previous_cased = _PyUnicode_IsCased(_Py_UCS4(code_point))
2613
+ return k
2614
+
2615
+
2616
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L332-L352 # noqa: E501
2617
+ @register_jitable
2618
+ def _ascii_title(data, res):
2619
+ """Does .title() on an ASCII string"""
2620
+ previous_is_cased = False
2621
+ for idx in range(len(data)):
2622
+ code_point = _get_code_point(data, idx)
2623
+ if _Py_ISLOWER(code_point):
2624
+ if not previous_is_cased:
2625
+ code_point = _Py_TOUPPER(code_point)
2626
+ previous_is_cased = True
2627
+ elif _Py_ISUPPER(code_point):
2628
+ if previous_is_cased:
2629
+ code_point = _Py_TOLOWER(code_point)
2630
+ previous_is_cased = True
2631
+ else:
2632
+ previous_is_cased = False
2633
+ _set_code_point(res, idx, code_point)
2634
+
2635
+
2636
+ # https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L10023-L10069 # noqa: E501
2637
+ @overload_method(types.UnicodeType, "title")
2638
+ def unicode_title(data):
2639
+ """Implements str.title()"""
2640
+ # https://docs.python.org/3/library/stdtypes.html#str.title
2641
+ return case_operation(_ascii_title, _unicode_title)
2642
+
2643
+
2644
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L391-L408 # noqa: E501
2645
+ @register_jitable
2646
+ def _ascii_swapcase(data, res):
2647
+ for idx in range(len(data)):
2648
+ code_point = _get_code_point(data, idx)
2649
+ if _Py_ISUPPER(code_point):
2650
+ code_point = _Py_TOLOWER(code_point)
2651
+ elif _Py_ISLOWER(code_point):
2652
+ code_point = _Py_TOUPPER(code_point)
2653
+ _set_code_point(res, idx, code_point)
2654
+
2655
+
2656
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9761-L9784 # noqa: E501
2657
+ @register_jitable
2658
+ def _unicode_swapcase(data, length, res, maxchars):
2659
+ k = 0
2660
+ maxchar = 0
2661
+ mapped = np.empty(3, dtype=_Py_UCS4)
2662
+ for idx in range(length):
2663
+ mapped.fill(0)
2664
+ code_point = _get_code_point(data, idx)
2665
+ if _PyUnicode_IsUppercase(code_point):
2666
+ n_res = _lower_ucs4(code_point, data, length, idx, mapped)
2667
+ elif _PyUnicode_IsLowercase(code_point):
2668
+ n_res = _PyUnicode_ToUpperFull(code_point, mapped)
2669
+ else:
2670
+ n_res = 1
2671
+ mapped[0] = code_point
2672
+ for m in mapped[:n_res]:
2673
+ maxchar = max(maxchar, m)
2674
+ _set_code_point(res, k, m)
2675
+ k += 1
2676
+ maxchars[0] = maxchar
2677
+ return k
2678
+
2679
+
2680
+ @overload_method(types.UnicodeType, "swapcase")
2681
+ def unicode_swapcase(data):
2682
+ return case_operation(_ascii_swapcase, _unicode_swapcase)
2683
+
2684
+
2685
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Python/bltinmodule.c#L1781-L1824 # noqa: E501
2686
+ @overload(ord)
2687
+ def ol_ord(c):
2688
+ if isinstance(c, types.UnicodeType):
2689
+
2690
+ def impl(c):
2691
+ lc = len(c)
2692
+ if lc != 1:
2693
+ # CPython does TypeError
2694
+ raise TypeError("ord() expected a character")
2695
+ return _get_code_point(c, 0)
2696
+
2697
+ return impl
2698
+
2699
+
2700
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L2005-L2028 # noqa: E501
2701
+ # This looks a bit different to the cpython implementation but, with the
2702
+ # exception of a latin1 fast path is logically the same. It finds the "kind" of
2703
+ # the codepoint `ch`, creates a length 1 string of that kind and then injects
2704
+ # the code point into the zero position of that string. Cpython does similar but
2705
+ # branches for each kind (this is encapsulated in Numba's _set_code_point).
2706
+ @register_jitable
2707
+ def _unicode_char(ch):
2708
+ assert ch <= _MAX_UNICODE
2709
+ kind = _codepoint_to_kind(ch)
2710
+ ret = _empty_string(kind, 1, kind == PY_UNICODE_1BYTE_KIND)
2711
+ _set_code_point(ret, 0, ch)
2712
+ return ret
2713
+
2714
+
2715
+ _out_of_range_msg = "chr() arg not in range(0x%hx)" % _MAX_UNICODE
2716
+
2717
+
2718
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L3045-L3055 # noqa: E501
2719
+ @register_jitable
2720
+ def _PyUnicode_FromOrdinal(ordinal):
2721
+ if ordinal < 0 or ordinal > _MAX_UNICODE:
2722
+ raise ValueError(_out_of_range_msg)
2723
+
2724
+ return _unicode_char(_Py_UCS4(ordinal))
2725
+
2726
+
2727
+ # https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Python/bltinmodule.c#L715-L720 # noqa: E501
2728
+ @overload(chr)
2729
+ def ol_chr(i):
2730
+ if isinstance(i, types.Integer):
2731
+
2732
+ def impl(i):
2733
+ return _PyUnicode_FromOrdinal(i)
2734
+
2735
+ return impl
2736
+
2737
+
2738
+ @overload_method(types.UnicodeType, "__str__")
2739
+ def unicode_str(s):
2740
+ return lambda s: s
2741
+
2742
+
2743
+ @overload_method(types.UnicodeType, "__repr__")
2744
+ def unicode_repr(s):
2745
+ # Can't use f-string as the impl ends up calling str and then repr, which
2746
+ # then recurses somewhere in imports.
2747
+ return lambda s: "'" + s + "'"
2748
+
2749
+
2750
+ @overload_method(types.Integer, "__str__")
2751
+ def integer_str(n):
2752
+ ten = n(10)
2753
+
2754
+ def impl(n):
2755
+ flag = False
2756
+ if n < 0:
2757
+ n = -n
2758
+ flag = True
2759
+ if n == 0:
2760
+ return "0"
2761
+ length = flag + 1 + int(np.floor(np.log10(n)))
2762
+ kind = PY_UNICODE_1BYTE_KIND
2763
+ char_width = _kind_to_byte_width(kind)
2764
+ s = _malloc_string(kind, char_width, length, True)
2765
+ if flag:
2766
+ _set_code_point(s, 0, ord("-"))
2767
+ idx = length - 1
2768
+ while n > 0:
2769
+ n, digit = divmod(n, ten)
2770
+ c = ord("0") + digit
2771
+ _set_code_point(s, idx, c)
2772
+ idx -= 1
2773
+ return s
2774
+
2775
+ return impl
2776
+
2777
+
2778
+ @overload_method(types.Integer, "__repr__")
2779
+ def integer_repr(n):
2780
+ return lambda n: n.__str__()
2781
+
2782
+
2783
+ @overload_method(types.Boolean, "__repr__")
2784
+ @overload_method(types.Boolean, "__str__")
2785
+ def boolean_str(b):
2786
+ return lambda b: "True" if b else "False"
2787
+
2788
+
2789
+ # ------------------------------------------------------------------------------
2790
+ # iteration
2791
+ # ------------------------------------------------------------------------------
2792
+
2793
+
2794
+ @lower("getiter", types.UnicodeType)
2795
+ def getiter_unicode(context, builder, sig, args):
2796
+ [ty] = sig.args
2797
+ [data] = args
2798
+
2799
+ iterobj = context.make_helper(builder, sig.return_type)
2800
+
2801
+ # set the index to zero
2802
+ zero = context.get_constant(types.uintp, 0)
2803
+ indexptr = cgutils.alloca_once_value(builder, zero)
2804
+
2805
+ iterobj.index = indexptr
2806
+
2807
+ # wire in the unicode type data
2808
+ iterobj.data = data
2809
+
2810
+ # incref as needed
2811
+ if context.enable_nrt:
2812
+ context.nrt.incref(builder, ty, data)
2813
+
2814
+ res = iterobj._getvalue()
2815
+ return impl_ret_new_ref(context, builder, sig.return_type, res)
2816
+
2817
+
2818
+ @lower("iternext", types.UnicodeIteratorType)
2819
+ # a new ref counted object is put into result._yield so set the new_ref to True!
2820
+ @iternext_impl(RefType.NEW)
2821
+ def iternext_unicode(context, builder, sig, args, result):
2822
+ [iterty] = sig.args
2823
+ [iter] = args
2824
+
2825
+ tyctx = context.typing_context
2826
+
2827
+ # get ref to unicode.__getitem__
2828
+ fnty = tyctx.resolve_value_type(operator.getitem)
2829
+ getitem_sig = fnty.get_call_type(
2830
+ tyctx, (types.unicode_type, types.uintp), {}
2831
+ )
2832
+ getitem_impl = context.get_function(fnty, getitem_sig)
2833
+
2834
+ # get ref to unicode.__len__
2835
+ fnty = tyctx.resolve_value_type(len)
2836
+ len_sig = fnty.get_call_type(tyctx, (types.unicode_type,), {})
2837
+ len_impl = context.get_function(fnty, len_sig)
2838
+
2839
+ # grab unicode iterator struct
2840
+ iterobj = context.make_helper(builder, iterty, value=iter)
2841
+
2842
+ # find the length of the string
2843
+ strlen = len_impl(builder, (iterobj.data,))
2844
+
2845
+ # find the current index
2846
+ index = builder.load(iterobj.index)
2847
+
2848
+ # see if the index is in range
2849
+ is_valid = builder.icmp_unsigned("<", index, strlen)
2850
+ result.set_valid(is_valid)
2851
+
2852
+ with builder.if_then(is_valid):
2853
+ # return value at index
2854
+ gotitem = getitem_impl(
2855
+ builder,
2856
+ (
2857
+ iterobj.data,
2858
+ index,
2859
+ ),
2860
+ )
2861
+ result.yield_(gotitem)
2862
+
2863
+ # bump index for next cycle
2864
+ nindex = cgutils.increment_index(builder, index)
2865
+ builder.store(nindex, iterobj.index)