numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2865 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import sys
|
|
5
|
+
import operator
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from llvmlite.ir import IntType, Constant
|
|
9
|
+
|
|
10
|
+
from numba.cuda.cgutils import is_nonelike
|
|
11
|
+
from numba.cuda.extending import (
|
|
12
|
+
NativeValue,
|
|
13
|
+
overload,
|
|
14
|
+
overload_method,
|
|
15
|
+
register_jitable,
|
|
16
|
+
)
|
|
17
|
+
from numba.cuda.extending import models
|
|
18
|
+
from numba.cuda.core.pythonapi import box, unbox
|
|
19
|
+
from numba.cuda.extending import make_attribute_wrapper, intrinsic
|
|
20
|
+
from numba.cuda.models import register_model
|
|
21
|
+
from numba.cuda.core.imputils import (
|
|
22
|
+
iternext_impl,
|
|
23
|
+
impl_ret_new_ref,
|
|
24
|
+
RefType,
|
|
25
|
+
Registry,
|
|
26
|
+
)
|
|
27
|
+
from numba.cuda.datamodel import register_default, StructModel
|
|
28
|
+
from numba.cuda import types
|
|
29
|
+
from numba.cuda import cgutils
|
|
30
|
+
from numba.cuda.utils import PYVERSION
|
|
31
|
+
from numba.cuda.core.pythonapi import (
|
|
32
|
+
PY_UNICODE_1BYTE_KIND,
|
|
33
|
+
PY_UNICODE_2BYTE_KIND,
|
|
34
|
+
PY_UNICODE_4BYTE_KIND,
|
|
35
|
+
)
|
|
36
|
+
from numba.cuda.cext._helperlib import c_helpers
|
|
37
|
+
from numba.cuda.core.unsafe.bytes import memcpy_region
|
|
38
|
+
from numba.cuda.core.errors import TypingError
|
|
39
|
+
from numba.cuda.cpython.unicode_support import (
|
|
40
|
+
_Py_TOUPPER,
|
|
41
|
+
_Py_TOLOWER,
|
|
42
|
+
_Py_UCS4,
|
|
43
|
+
_Py_ISALNUM,
|
|
44
|
+
_PyUnicode_ToUpperFull,
|
|
45
|
+
_PyUnicode_ToLowerFull,
|
|
46
|
+
_PyUnicode_ToFoldedFull,
|
|
47
|
+
_PyUnicode_ToTitleFull,
|
|
48
|
+
_PyUnicode_IsPrintable,
|
|
49
|
+
_PyUnicode_IsSpace,
|
|
50
|
+
_Py_ISSPACE,
|
|
51
|
+
_PyUnicode_IsXidStart,
|
|
52
|
+
_PyUnicode_IsXidContinue,
|
|
53
|
+
_PyUnicode_IsCased,
|
|
54
|
+
_PyUnicode_IsCaseIgnorable,
|
|
55
|
+
_PyUnicode_IsUppercase,
|
|
56
|
+
_PyUnicode_IsLowercase,
|
|
57
|
+
_PyUnicode_IsLineBreak,
|
|
58
|
+
_Py_ISLINEBREAK,
|
|
59
|
+
_Py_ISLINEFEED,
|
|
60
|
+
_Py_ISCARRIAGERETURN,
|
|
61
|
+
_PyUnicode_IsTitlecase,
|
|
62
|
+
_Py_ISLOWER,
|
|
63
|
+
_Py_ISUPPER,
|
|
64
|
+
_Py_TAB,
|
|
65
|
+
_Py_LINEFEED,
|
|
66
|
+
_Py_CARRIAGE_RETURN,
|
|
67
|
+
_Py_SPACE,
|
|
68
|
+
_PyUnicode_IsAlpha,
|
|
69
|
+
_PyUnicode_IsNumeric,
|
|
70
|
+
_Py_ISALPHA,
|
|
71
|
+
_PyUnicode_IsDigit,
|
|
72
|
+
_PyUnicode_IsDecimalDigit,
|
|
73
|
+
)
|
|
74
|
+
from numba.cuda.cpython import slicing
|
|
75
|
+
|
|
76
|
+
_hash_width = sys.hash_info.width
|
|
77
|
+
_Py_hash_t = getattr(types, "int%s" % _hash_width)
|
|
78
|
+
|
|
79
|
+
registry = Registry("unicode")
|
|
80
|
+
lower = registry.lower
|
|
81
|
+
lower_cast = registry.lower_cast
|
|
82
|
+
lower_constant = registry.lower_constant
|
|
83
|
+
lower_getattr = registry.lower_getattr
|
|
84
|
+
|
|
85
|
+
if PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
86
|
+
from numba.cuda.core.pythonapi import PY_UNICODE_WCHAR_KIND
|
|
87
|
+
|
|
88
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L84-L85 # noqa: E501
|
|
89
|
+
_MAX_UNICODE = 0x10FFFF
|
|
90
|
+
|
|
91
|
+
# https://github.com/python/cpython/blob/1960eb005e04b7ad8a91018088cfdb0646bc1ca0/Objects/stringlib/fastsearch.h#L31 # noqa: E501
|
|
92
|
+
_BLOOM_WIDTH = types.intp.bitwidth
|
|
93
|
+
|
|
94
|
+
# DATA MODEL
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@register_model(types.UnicodeType)
|
|
98
|
+
class UnicodeModel(models.StructModel):
|
|
99
|
+
def __init__(self, dmm, fe_type):
|
|
100
|
+
members = [
|
|
101
|
+
("data", types.voidptr),
|
|
102
|
+
("length", types.intp),
|
|
103
|
+
("kind", types.int32),
|
|
104
|
+
("is_ascii", types.uint32),
|
|
105
|
+
("hash", _Py_hash_t),
|
|
106
|
+
("meminfo", types.MemInfoPointer(types.voidptr)),
|
|
107
|
+
# A pointer to the owner python str/unicode object
|
|
108
|
+
("parent", types.pyobject),
|
|
109
|
+
]
|
|
110
|
+
models.StructModel.__init__(self, dmm, fe_type, members)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
make_attribute_wrapper(types.UnicodeType, "data", "_data")
|
|
114
|
+
make_attribute_wrapper(types.UnicodeType, "length", "_length")
|
|
115
|
+
make_attribute_wrapper(types.UnicodeType, "kind", "_kind")
|
|
116
|
+
make_attribute_wrapper(types.UnicodeType, "is_ascii", "_is_ascii")
|
|
117
|
+
make_attribute_wrapper(types.UnicodeType, "hash", "_hash")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@register_default(types.UnicodeIteratorType)
|
|
121
|
+
class UnicodeIteratorModel(StructModel):
|
|
122
|
+
def __init__(self, dmm, fe_type):
|
|
123
|
+
members = [
|
|
124
|
+
("index", types.EphemeralPointer(types.uintp)),
|
|
125
|
+
("data", fe_type.data),
|
|
126
|
+
]
|
|
127
|
+
super(UnicodeIteratorModel, self).__init__(dmm, fe_type, members)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# CAST
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def compile_time_get_string_data(obj):
|
|
134
|
+
"""Get string data from a python string for use at compile-time to embed
|
|
135
|
+
the string data into the LLVM module.
|
|
136
|
+
"""
|
|
137
|
+
from ctypes import (
|
|
138
|
+
CFUNCTYPE,
|
|
139
|
+
c_void_p,
|
|
140
|
+
c_int,
|
|
141
|
+
c_uint,
|
|
142
|
+
c_ssize_t,
|
|
143
|
+
c_ubyte,
|
|
144
|
+
py_object,
|
|
145
|
+
POINTER,
|
|
146
|
+
byref,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
extract_unicode_fn = c_helpers["extract_unicode"]
|
|
150
|
+
proto = CFUNCTYPE(
|
|
151
|
+
c_void_p,
|
|
152
|
+
py_object,
|
|
153
|
+
POINTER(c_ssize_t),
|
|
154
|
+
POINTER(c_int),
|
|
155
|
+
POINTER(c_uint),
|
|
156
|
+
POINTER(c_ssize_t),
|
|
157
|
+
)
|
|
158
|
+
fn = proto(extract_unicode_fn)
|
|
159
|
+
length = c_ssize_t()
|
|
160
|
+
kind = c_int()
|
|
161
|
+
is_ascii = c_uint()
|
|
162
|
+
hashv = c_ssize_t()
|
|
163
|
+
data = fn(obj, byref(length), byref(kind), byref(is_ascii), byref(hashv))
|
|
164
|
+
if data is None:
|
|
165
|
+
raise ValueError("cannot extract unicode data from the given string")
|
|
166
|
+
length = length.value
|
|
167
|
+
kind = kind.value
|
|
168
|
+
is_ascii = is_ascii.value
|
|
169
|
+
nbytes = (length + 1) * _kind_to_byte_width(kind)
|
|
170
|
+
out = (c_ubyte * nbytes).from_address(data)
|
|
171
|
+
return bytes(out), length, kind, is_ascii, hashv.value
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def make_string_from_constant(context, builder, typ, literal_string):
|
|
175
|
+
"""
|
|
176
|
+
Get string data by `compile_time_get_string_data()` and return a
|
|
177
|
+
unicode_type LLVM value
|
|
178
|
+
"""
|
|
179
|
+
databytes, length, kind, is_ascii, hashv = compile_time_get_string_data(
|
|
180
|
+
literal_string
|
|
181
|
+
)
|
|
182
|
+
mod = builder.module
|
|
183
|
+
gv = context.insert_const_bytes(mod, databytes)
|
|
184
|
+
uni_str = cgutils.create_struct_proxy(typ)(context, builder)
|
|
185
|
+
uni_str.data = gv
|
|
186
|
+
uni_str.length = uni_str.length.type(length)
|
|
187
|
+
uni_str.kind = uni_str.kind.type(kind)
|
|
188
|
+
uni_str.is_ascii = uni_str.is_ascii.type(is_ascii)
|
|
189
|
+
# Set hash to -1 to indicate that it should be computed.
|
|
190
|
+
# We cannot bake in the hash value because of hashseed randomization.
|
|
191
|
+
uni_str.hash = uni_str.hash.type(-1)
|
|
192
|
+
return uni_str._getvalue()
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@lower_cast(types.StringLiteral, types.unicode_type)
|
|
196
|
+
def cast_from_literal(context, builder, fromty, toty, val):
|
|
197
|
+
return make_string_from_constant(
|
|
198
|
+
context,
|
|
199
|
+
builder,
|
|
200
|
+
toty,
|
|
201
|
+
fromty.literal_value,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
# CONSTANT
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@lower_constant(types.unicode_type)
|
|
209
|
+
def constant_unicode(context, builder, typ, pyval):
|
|
210
|
+
return make_string_from_constant(context, builder, typ, pyval)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# BOXING
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
@unbox(types.UnicodeType)
|
|
217
|
+
def unbox_unicode_str(typ, obj, c):
|
|
218
|
+
"""
|
|
219
|
+
Convert a unicode str object to a native unicode structure.
|
|
220
|
+
"""
|
|
221
|
+
ok, data, length, kind, is_ascii, hashv = (
|
|
222
|
+
c.pyapi.string_as_string_size_and_kind(obj)
|
|
223
|
+
)
|
|
224
|
+
uni_str = cgutils.create_struct_proxy(typ)(c.context, c.builder)
|
|
225
|
+
uni_str.data = data
|
|
226
|
+
uni_str.length = length
|
|
227
|
+
uni_str.kind = kind
|
|
228
|
+
uni_str.is_ascii = is_ascii
|
|
229
|
+
uni_str.hash = hashv
|
|
230
|
+
uni_str.meminfo = c.pyapi.nrt_meminfo_new_from_pyobject(
|
|
231
|
+
data, # the borrowed data pointer
|
|
232
|
+
obj, # the owner pyobject; the call will incref it.
|
|
233
|
+
)
|
|
234
|
+
uni_str.parent = obj
|
|
235
|
+
|
|
236
|
+
is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred())
|
|
237
|
+
return NativeValue(uni_str._getvalue(), is_error=is_error)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@box(types.UnicodeType)
|
|
241
|
+
def box_unicode_str(typ, val, c):
|
|
242
|
+
"""
|
|
243
|
+
Convert a native unicode structure to a unicode string
|
|
244
|
+
"""
|
|
245
|
+
uni_str = cgutils.create_struct_proxy(typ)(c.context, c.builder, value=val)
|
|
246
|
+
res = c.pyapi.string_from_kind_and_data(
|
|
247
|
+
uni_str.kind, uni_str.data, uni_str.length
|
|
248
|
+
)
|
|
249
|
+
# hash isn't needed now, just compute it so it ends up in the unicodeobject
|
|
250
|
+
# hash cache, cpython doesn't always do this, depends how a string was
|
|
251
|
+
# created it's safe, just burns the cycles required to hash on @box
|
|
252
|
+
c.pyapi.object_hash(res)
|
|
253
|
+
c.context.nrt.decref(c.builder, typ, val)
|
|
254
|
+
return res
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# HELPER FUNCTIONS
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def make_deref_codegen(bitsize):
|
|
261
|
+
def codegen(context, builder, signature, args):
|
|
262
|
+
data, idx = args
|
|
263
|
+
ptr = builder.bitcast(data, IntType(bitsize).as_pointer())
|
|
264
|
+
ch = builder.load(builder.gep(ptr, [idx]))
|
|
265
|
+
return builder.zext(ch, IntType(32))
|
|
266
|
+
|
|
267
|
+
return codegen
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@intrinsic
|
|
271
|
+
def deref_uint8(typingctx, data, offset):
|
|
272
|
+
sig = types.uint32(types.voidptr, types.intp)
|
|
273
|
+
return sig, make_deref_codegen(8)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
@intrinsic
|
|
277
|
+
def deref_uint16(typingctx, data, offset):
|
|
278
|
+
sig = types.uint32(types.voidptr, types.intp)
|
|
279
|
+
return sig, make_deref_codegen(16)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
@intrinsic
|
|
283
|
+
def deref_uint32(typingctx, data, offset):
|
|
284
|
+
sig = types.uint32(types.voidptr, types.intp)
|
|
285
|
+
return sig, make_deref_codegen(32)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
@intrinsic
|
|
289
|
+
def _malloc_string(typingctx, kind, char_bytes, length, is_ascii):
|
|
290
|
+
"""make empty string with data buffer of size alloc_bytes.
|
|
291
|
+
|
|
292
|
+
Must set length and kind values for string after it is returned
|
|
293
|
+
"""
|
|
294
|
+
|
|
295
|
+
def details(context, builder, signature, args):
|
|
296
|
+
[kind_val, char_bytes_val, length_val, is_ascii_val] = args
|
|
297
|
+
|
|
298
|
+
# fill the struct
|
|
299
|
+
uni_str_ctor = cgutils.create_struct_proxy(types.unicode_type)
|
|
300
|
+
uni_str = uni_str_ctor(context, builder)
|
|
301
|
+
# add null padding character
|
|
302
|
+
nbytes_val = builder.mul(
|
|
303
|
+
char_bytes_val,
|
|
304
|
+
builder.add(length_val, Constant(length_val.type, 1)),
|
|
305
|
+
)
|
|
306
|
+
uni_str.meminfo = context.nrt.meminfo_alloc(builder, nbytes_val)
|
|
307
|
+
uni_str.kind = kind_val
|
|
308
|
+
uni_str.is_ascii = is_ascii_val
|
|
309
|
+
uni_str.length = length_val
|
|
310
|
+
# empty string has hash value -1 to indicate "need to compute hash"
|
|
311
|
+
uni_str.hash = context.get_constant(_Py_hash_t, -1)
|
|
312
|
+
uni_str.data = context.nrt.meminfo_data(builder, uni_str.meminfo)
|
|
313
|
+
# Set parent to NULL
|
|
314
|
+
uni_str.parent = cgutils.get_null_value(uni_str.parent.type)
|
|
315
|
+
return uni_str._getvalue()
|
|
316
|
+
|
|
317
|
+
sig = types.unicode_type(types.int32, types.intp, types.intp, types.uint32)
|
|
318
|
+
return sig, details
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
@register_jitable
|
|
322
|
+
def _empty_string(kind, length, is_ascii=0):
|
|
323
|
+
char_width = _kind_to_byte_width(kind)
|
|
324
|
+
s = _malloc_string(kind, char_width, length, is_ascii)
|
|
325
|
+
_set_code_point(s, length, np.uint32(0)) # Write NULL character
|
|
326
|
+
return s
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
# Disable RefCt for performance.
|
|
330
|
+
@register_jitable(_nrt=False)
|
|
331
|
+
def _get_code_point(a, i):
|
|
332
|
+
if a._kind == PY_UNICODE_1BYTE_KIND:
|
|
333
|
+
return deref_uint8(a._data, i)
|
|
334
|
+
elif a._kind == PY_UNICODE_2BYTE_KIND:
|
|
335
|
+
return deref_uint16(a._data, i)
|
|
336
|
+
elif a._kind == PY_UNICODE_4BYTE_KIND:
|
|
337
|
+
return deref_uint32(a._data, i)
|
|
338
|
+
else:
|
|
339
|
+
# there's also a wchar kind, but that's one of the above,
|
|
340
|
+
# so skipping for this example
|
|
341
|
+
return 0
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
####
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def make_set_codegen(bitsize):
|
|
348
|
+
def codegen(context, builder, signature, args):
|
|
349
|
+
data, idx, ch = args
|
|
350
|
+
if bitsize < 32:
|
|
351
|
+
ch = builder.trunc(ch, IntType(bitsize))
|
|
352
|
+
ptr = builder.bitcast(data, IntType(bitsize).as_pointer())
|
|
353
|
+
builder.store(ch, builder.gep(ptr, [idx]))
|
|
354
|
+
return context.get_dummy_value()
|
|
355
|
+
|
|
356
|
+
return codegen
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
@intrinsic
|
|
360
|
+
def set_uint8(typingctx, data, idx, ch):
|
|
361
|
+
sig = types.void(types.voidptr, types.int64, types.uint32)
|
|
362
|
+
return sig, make_set_codegen(8)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
@intrinsic
|
|
366
|
+
def set_uint16(typingctx, data, idx, ch):
|
|
367
|
+
sig = types.void(types.voidptr, types.int64, types.uint32)
|
|
368
|
+
return sig, make_set_codegen(16)
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
@intrinsic
|
|
372
|
+
def set_uint32(typingctx, data, idx, ch):
|
|
373
|
+
sig = types.void(types.voidptr, types.int64, types.uint32)
|
|
374
|
+
return sig, make_set_codegen(32)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
@register_jitable(_nrt=False)
|
|
378
|
+
def _set_code_point(a, i, ch):
|
|
379
|
+
# WARNING: This method is very dangerous:
|
|
380
|
+
# * Assumes that data contents can be changed (only allowed for new
|
|
381
|
+
# strings)
|
|
382
|
+
# * Assumes that the kind of unicode string is sufficiently wide to
|
|
383
|
+
# accept ch. Will truncate ch to make it fit.
|
|
384
|
+
# * Assumes that i is within the valid boundaries of the function
|
|
385
|
+
if a._kind == PY_UNICODE_1BYTE_KIND:
|
|
386
|
+
set_uint8(a._data, i, ch)
|
|
387
|
+
elif a._kind == PY_UNICODE_2BYTE_KIND:
|
|
388
|
+
set_uint16(a._data, i, ch)
|
|
389
|
+
elif a._kind == PY_UNICODE_4BYTE_KIND:
|
|
390
|
+
set_uint32(a._data, i, ch)
|
|
391
|
+
else:
|
|
392
|
+
raise AssertionError(
|
|
393
|
+
"Unexpected unicode representation in _set_code_point"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
if PYVERSION in ((3, 12), (3, 13)):
|
|
398
|
+
|
|
399
|
+
@register_jitable
|
|
400
|
+
def _pick_kind(kind1, kind2):
|
|
401
|
+
if kind1 == PY_UNICODE_1BYTE_KIND:
|
|
402
|
+
return kind2
|
|
403
|
+
elif kind1 == PY_UNICODE_2BYTE_KIND:
|
|
404
|
+
if kind2 == PY_UNICODE_4BYTE_KIND:
|
|
405
|
+
return kind2
|
|
406
|
+
else:
|
|
407
|
+
return kind1
|
|
408
|
+
elif kind1 == PY_UNICODE_4BYTE_KIND:
|
|
409
|
+
return kind1
|
|
410
|
+
else:
|
|
411
|
+
raise AssertionError(
|
|
412
|
+
"Unexpected unicode representation in _pick_kind"
|
|
413
|
+
)
|
|
414
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
415
|
+
|
|
416
|
+
@register_jitable
|
|
417
|
+
def _pick_kind(kind1, kind2):
|
|
418
|
+
if kind1 == PY_UNICODE_WCHAR_KIND or kind2 == PY_UNICODE_WCHAR_KIND:
|
|
419
|
+
raise AssertionError("PY_UNICODE_WCHAR_KIND unsupported")
|
|
420
|
+
|
|
421
|
+
if kind1 == PY_UNICODE_1BYTE_KIND:
|
|
422
|
+
return kind2
|
|
423
|
+
elif kind1 == PY_UNICODE_2BYTE_KIND:
|
|
424
|
+
if kind2 == PY_UNICODE_4BYTE_KIND:
|
|
425
|
+
return kind2
|
|
426
|
+
else:
|
|
427
|
+
return kind1
|
|
428
|
+
elif kind1 == PY_UNICODE_4BYTE_KIND:
|
|
429
|
+
return kind1
|
|
430
|
+
else:
|
|
431
|
+
raise AssertionError(
|
|
432
|
+
"Unexpected unicode representation in _pick_kind"
|
|
433
|
+
)
|
|
434
|
+
else:
|
|
435
|
+
raise NotImplementedError(PYVERSION)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
@register_jitable
|
|
439
|
+
def _pick_ascii(is_ascii1, is_ascii2):
|
|
440
|
+
if is_ascii1 == 1 and is_ascii2 == 1:
|
|
441
|
+
return types.uint32(1)
|
|
442
|
+
return types.uint32(0)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
if PYVERSION in ((3, 12), (3, 13)):
|
|
446
|
+
|
|
447
|
+
@register_jitable
|
|
448
|
+
def _kind_to_byte_width(kind):
|
|
449
|
+
if kind == PY_UNICODE_1BYTE_KIND:
|
|
450
|
+
return 1
|
|
451
|
+
elif kind == PY_UNICODE_2BYTE_KIND:
|
|
452
|
+
return 2
|
|
453
|
+
elif kind == PY_UNICODE_4BYTE_KIND:
|
|
454
|
+
return 4
|
|
455
|
+
else:
|
|
456
|
+
raise AssertionError("Unexpected unicode encoding encountered")
|
|
457
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
458
|
+
|
|
459
|
+
@register_jitable
|
|
460
|
+
def _kind_to_byte_width(kind):
|
|
461
|
+
if kind == PY_UNICODE_1BYTE_KIND:
|
|
462
|
+
return 1
|
|
463
|
+
elif kind == PY_UNICODE_2BYTE_KIND:
|
|
464
|
+
return 2
|
|
465
|
+
elif kind == PY_UNICODE_4BYTE_KIND:
|
|
466
|
+
return 4
|
|
467
|
+
elif kind == PY_UNICODE_WCHAR_KIND:
|
|
468
|
+
raise AssertionError("PY_UNICODE_WCHAR_KIND unsupported")
|
|
469
|
+
else:
|
|
470
|
+
raise AssertionError("Unexpected unicode encoding encountered")
|
|
471
|
+
else:
|
|
472
|
+
raise NotImplementedError(PYVERSION)
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
@register_jitable(_nrt=False)
|
|
476
|
+
def _cmp_region(a, a_offset, b, b_offset, n):
|
|
477
|
+
if n == 0:
|
|
478
|
+
return 0
|
|
479
|
+
elif a_offset + n > a._length:
|
|
480
|
+
return -1
|
|
481
|
+
elif b_offset + n > b._length:
|
|
482
|
+
return 1
|
|
483
|
+
|
|
484
|
+
for i in range(n):
|
|
485
|
+
a_chr = _get_code_point(a, a_offset + i)
|
|
486
|
+
b_chr = _get_code_point(b, b_offset + i)
|
|
487
|
+
if a_chr < b_chr:
|
|
488
|
+
return -1
|
|
489
|
+
elif a_chr > b_chr:
|
|
490
|
+
return 1
|
|
491
|
+
|
|
492
|
+
return 0
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
@register_jitable
|
|
496
|
+
def _codepoint_to_kind(cp):
|
|
497
|
+
"""
|
|
498
|
+
Compute the minimum unicode kind needed to hold a given codepoint
|
|
499
|
+
"""
|
|
500
|
+
if cp < 256:
|
|
501
|
+
return PY_UNICODE_1BYTE_KIND
|
|
502
|
+
elif cp < 65536:
|
|
503
|
+
return PY_UNICODE_2BYTE_KIND
|
|
504
|
+
else:
|
|
505
|
+
# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
|
|
506
|
+
MAX_UNICODE = 0x10FFFF
|
|
507
|
+
if cp > MAX_UNICODE:
|
|
508
|
+
msg = "Invalid codepoint. Found value greater than Unicode maximum"
|
|
509
|
+
raise ValueError(msg)
|
|
510
|
+
return PY_UNICODE_4BYTE_KIND
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
@register_jitable
|
|
514
|
+
def _codepoint_is_ascii(ch):
|
|
515
|
+
"""
|
|
516
|
+
Returns true if a codepoint is in the ASCII range
|
|
517
|
+
"""
|
|
518
|
+
return ch < 128
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
# PUBLIC API
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
@overload(len)
|
|
525
|
+
def unicode_len(s):
|
|
526
|
+
if isinstance(s, types.UnicodeType):
|
|
527
|
+
|
|
528
|
+
def len_impl(s):
|
|
529
|
+
return s._length
|
|
530
|
+
|
|
531
|
+
return len_impl
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
@overload(operator.eq)
|
|
535
|
+
def unicode_eq(a, b):
|
|
536
|
+
if not (a.is_internal and b.is_internal):
|
|
537
|
+
return
|
|
538
|
+
if isinstance(a, types.Optional):
|
|
539
|
+
check_a = a.type
|
|
540
|
+
else:
|
|
541
|
+
check_a = a
|
|
542
|
+
if isinstance(b, types.Optional):
|
|
543
|
+
check_b = b.type
|
|
544
|
+
else:
|
|
545
|
+
check_b = b
|
|
546
|
+
accept = (types.UnicodeType, types.StringLiteral, types.UnicodeCharSeq)
|
|
547
|
+
a_unicode = isinstance(check_a, accept)
|
|
548
|
+
b_unicode = isinstance(check_b, accept)
|
|
549
|
+
if a_unicode and b_unicode:
|
|
550
|
+
|
|
551
|
+
def eq_impl(a, b):
|
|
552
|
+
# handle Optionals at runtime
|
|
553
|
+
a_none = a is None
|
|
554
|
+
b_none = b is None
|
|
555
|
+
if a_none or b_none:
|
|
556
|
+
if a_none and b_none:
|
|
557
|
+
return True
|
|
558
|
+
else:
|
|
559
|
+
return False
|
|
560
|
+
# the str() is for UnicodeCharSeq, it's a nop else
|
|
561
|
+
a = str(a)
|
|
562
|
+
b = str(b)
|
|
563
|
+
if len(a) != len(b):
|
|
564
|
+
return False
|
|
565
|
+
return _cmp_region(a, 0, b, 0, len(a)) == 0
|
|
566
|
+
|
|
567
|
+
return eq_impl
|
|
568
|
+
elif a_unicode ^ b_unicode:
|
|
569
|
+
# one of the things is unicode, everything compares False
|
|
570
|
+
def eq_impl(a, b):
|
|
571
|
+
return False
|
|
572
|
+
|
|
573
|
+
return eq_impl
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
@overload(operator.ne)
|
|
577
|
+
def unicode_ne(a, b):
|
|
578
|
+
if not (a.is_internal and b.is_internal):
|
|
579
|
+
return
|
|
580
|
+
accept = (types.UnicodeType, types.StringLiteral, types.UnicodeCharSeq)
|
|
581
|
+
a_unicode = isinstance(a, accept)
|
|
582
|
+
b_unicode = isinstance(b, accept)
|
|
583
|
+
if a_unicode and b_unicode:
|
|
584
|
+
|
|
585
|
+
def ne_impl(a, b):
|
|
586
|
+
return not (a == b)
|
|
587
|
+
|
|
588
|
+
return ne_impl
|
|
589
|
+
elif a_unicode ^ b_unicode:
|
|
590
|
+
# one of the things is unicode, everything compares True
|
|
591
|
+
def eq_impl(a, b):
|
|
592
|
+
return True
|
|
593
|
+
|
|
594
|
+
return eq_impl
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
@overload(operator.lt)
|
|
598
|
+
def unicode_lt(a, b):
|
|
599
|
+
a_unicode = isinstance(a, (types.UnicodeType, types.StringLiteral))
|
|
600
|
+
b_unicode = isinstance(b, (types.UnicodeType, types.StringLiteral))
|
|
601
|
+
if a_unicode and b_unicode:
|
|
602
|
+
|
|
603
|
+
def lt_impl(a, b):
|
|
604
|
+
minlen = min(len(a), len(b))
|
|
605
|
+
eqcode = _cmp_region(a, 0, b, 0, minlen)
|
|
606
|
+
if eqcode == -1:
|
|
607
|
+
return True
|
|
608
|
+
elif eqcode == 0:
|
|
609
|
+
return len(a) < len(b)
|
|
610
|
+
return False
|
|
611
|
+
|
|
612
|
+
return lt_impl
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
@overload(operator.gt)
|
|
616
|
+
def unicode_gt(a, b):
|
|
617
|
+
a_unicode = isinstance(a, (types.UnicodeType, types.StringLiteral))
|
|
618
|
+
b_unicode = isinstance(b, (types.UnicodeType, types.StringLiteral))
|
|
619
|
+
if a_unicode and b_unicode:
|
|
620
|
+
|
|
621
|
+
def gt_impl(a, b):
|
|
622
|
+
minlen = min(len(a), len(b))
|
|
623
|
+
eqcode = _cmp_region(a, 0, b, 0, minlen)
|
|
624
|
+
if eqcode == 1:
|
|
625
|
+
return True
|
|
626
|
+
elif eqcode == 0:
|
|
627
|
+
return len(a) > len(b)
|
|
628
|
+
return False
|
|
629
|
+
|
|
630
|
+
return gt_impl
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
@overload(operator.le)
|
|
634
|
+
def unicode_le(a, b):
|
|
635
|
+
a_unicode = isinstance(a, (types.UnicodeType, types.StringLiteral))
|
|
636
|
+
b_unicode = isinstance(b, (types.UnicodeType, types.StringLiteral))
|
|
637
|
+
if a_unicode and b_unicode:
|
|
638
|
+
|
|
639
|
+
def le_impl(a, b):
|
|
640
|
+
return not (a > b)
|
|
641
|
+
|
|
642
|
+
return le_impl
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
@overload(operator.ge)
|
|
646
|
+
def unicode_ge(a, b):
|
|
647
|
+
a_unicode = isinstance(a, (types.UnicodeType, types.StringLiteral))
|
|
648
|
+
b_unicode = isinstance(b, (types.UnicodeType, types.StringLiteral))
|
|
649
|
+
if a_unicode and b_unicode:
|
|
650
|
+
|
|
651
|
+
def ge_impl(a, b):
|
|
652
|
+
return not (a < b)
|
|
653
|
+
|
|
654
|
+
return ge_impl
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
@overload(operator.contains)
|
|
658
|
+
def unicode_contains(a, b):
|
|
659
|
+
if isinstance(a, types.UnicodeType) and isinstance(b, types.UnicodeType):
|
|
660
|
+
|
|
661
|
+
def contains_impl(a, b):
|
|
662
|
+
# note parameter swap: contains(a, b) == b in a
|
|
663
|
+
return _find(a, b) > -1
|
|
664
|
+
|
|
665
|
+
return contains_impl
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def unicode_idx_check_type(ty, name):
|
|
669
|
+
"""Check object belongs to one of specific types
|
|
670
|
+
ty: type
|
|
671
|
+
Type of the object
|
|
672
|
+
name: str
|
|
673
|
+
Name of the object
|
|
674
|
+
"""
|
|
675
|
+
thety = ty
|
|
676
|
+
# if the type is omitted, the concrete type is the value
|
|
677
|
+
if isinstance(ty, types.Omitted):
|
|
678
|
+
thety = ty.value
|
|
679
|
+
# if the type is optional, the concrete type is the captured type
|
|
680
|
+
elif isinstance(ty, types.Optional):
|
|
681
|
+
thety = ty.type
|
|
682
|
+
|
|
683
|
+
accepted = (types.Integer, types.NoneType)
|
|
684
|
+
if thety is not None and not isinstance(thety, accepted):
|
|
685
|
+
raise TypingError('"{}" must be {}, not {}'.format(name, accepted, ty))
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def unicode_sub_check_type(ty, name):
|
|
689
|
+
"""Check object belongs to unicode type"""
|
|
690
|
+
if not isinstance(ty, types.UnicodeType):
|
|
691
|
+
msg = '"{}" must be {}, not {}'.format(name, types.UnicodeType, ty)
|
|
692
|
+
raise TypingError(msg)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
# FAST SEARCH algorithm implementation from cpython
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
@register_jitable
|
|
699
|
+
def _bloom_add(mask, ch):
|
|
700
|
+
mask |= 1 << (ch & (_BLOOM_WIDTH - 1))
|
|
701
|
+
return mask
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
@register_jitable
|
|
705
|
+
def _bloom_check(mask, ch):
|
|
706
|
+
return mask & (1 << (ch & (_BLOOM_WIDTH - 1)))
|
|
707
|
+
|
|
708
|
+
|
|
709
|
+
# https://github.com/python/cpython/blob/1960eb005e04b7ad8a91018088cfdb0646bc1ca0/Objects/stringlib/fastsearch.h#L550 # noqa: E501
|
|
710
|
+
@register_jitable
|
|
711
|
+
def _default_find(data, substr, start, end):
|
|
712
|
+
"""Left finder."""
|
|
713
|
+
m = len(substr)
|
|
714
|
+
if m == 0:
|
|
715
|
+
return start
|
|
716
|
+
|
|
717
|
+
gap = mlast = m - 1
|
|
718
|
+
last = _get_code_point(substr, mlast)
|
|
719
|
+
|
|
720
|
+
zero = types.intp(0)
|
|
721
|
+
mask = _bloom_add(zero, last)
|
|
722
|
+
for i in range(mlast):
|
|
723
|
+
ch = _get_code_point(substr, i)
|
|
724
|
+
mask = _bloom_add(mask, ch)
|
|
725
|
+
if ch == last:
|
|
726
|
+
gap = mlast - i - 1
|
|
727
|
+
|
|
728
|
+
i = start
|
|
729
|
+
while i <= end - m:
|
|
730
|
+
ch = _get_code_point(data, mlast + i)
|
|
731
|
+
if ch == last:
|
|
732
|
+
j = 0
|
|
733
|
+
while j < mlast:
|
|
734
|
+
haystack_ch = _get_code_point(data, i + j)
|
|
735
|
+
needle_ch = _get_code_point(substr, j)
|
|
736
|
+
if haystack_ch != needle_ch:
|
|
737
|
+
break
|
|
738
|
+
j += 1
|
|
739
|
+
if j == mlast:
|
|
740
|
+
# got a match
|
|
741
|
+
return i
|
|
742
|
+
|
|
743
|
+
ch = _get_code_point(data, mlast + i + 1)
|
|
744
|
+
if _bloom_check(mask, ch) == 0:
|
|
745
|
+
i += m
|
|
746
|
+
else:
|
|
747
|
+
i += gap
|
|
748
|
+
else:
|
|
749
|
+
ch = _get_code_point(data, mlast + i + 1)
|
|
750
|
+
if _bloom_check(mask, ch) == 0:
|
|
751
|
+
i += m
|
|
752
|
+
i += 1
|
|
753
|
+
|
|
754
|
+
return -1
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
@register_jitable
|
|
758
|
+
def _default_rfind(data, substr, start, end):
|
|
759
|
+
"""Right finder."""
|
|
760
|
+
m = len(substr)
|
|
761
|
+
if m == 0:
|
|
762
|
+
return end
|
|
763
|
+
|
|
764
|
+
skip = mlast = m - 1
|
|
765
|
+
mfirst = _get_code_point(substr, 0)
|
|
766
|
+
mask = _bloom_add(0, mfirst)
|
|
767
|
+
i = mlast
|
|
768
|
+
while i > 0:
|
|
769
|
+
ch = _get_code_point(substr, i)
|
|
770
|
+
mask = _bloom_add(mask, ch)
|
|
771
|
+
if ch == mfirst:
|
|
772
|
+
skip = i - 1
|
|
773
|
+
i -= 1
|
|
774
|
+
|
|
775
|
+
i = end - m
|
|
776
|
+
while i >= start:
|
|
777
|
+
ch = _get_code_point(data, i)
|
|
778
|
+
if ch == mfirst:
|
|
779
|
+
j = mlast
|
|
780
|
+
while j > 0:
|
|
781
|
+
haystack_ch = _get_code_point(data, i + j)
|
|
782
|
+
needle_ch = _get_code_point(substr, j)
|
|
783
|
+
if haystack_ch != needle_ch:
|
|
784
|
+
break
|
|
785
|
+
j -= 1
|
|
786
|
+
|
|
787
|
+
if j == 0:
|
|
788
|
+
# got a match
|
|
789
|
+
return i
|
|
790
|
+
|
|
791
|
+
ch = _get_code_point(data, i - 1)
|
|
792
|
+
if i > start and _bloom_check(mask, ch) == 0:
|
|
793
|
+
i -= m
|
|
794
|
+
else:
|
|
795
|
+
i -= skip
|
|
796
|
+
|
|
797
|
+
else:
|
|
798
|
+
ch = _get_code_point(data, i - 1)
|
|
799
|
+
if i > start and _bloom_check(mask, ch) == 0:
|
|
800
|
+
i -= m
|
|
801
|
+
i -= 1
|
|
802
|
+
|
|
803
|
+
return -1
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
def generate_finder(find_func):
|
|
807
|
+
"""Generate finder either left or right."""
|
|
808
|
+
|
|
809
|
+
def impl(data, substr, start=None, end=None):
|
|
810
|
+
length = len(data)
|
|
811
|
+
sub_length = len(substr)
|
|
812
|
+
if start is None:
|
|
813
|
+
start = 0
|
|
814
|
+
if end is None:
|
|
815
|
+
end = length
|
|
816
|
+
|
|
817
|
+
start, end = _adjust_indices(length, start, end)
|
|
818
|
+
if end - start < sub_length:
|
|
819
|
+
return -1
|
|
820
|
+
|
|
821
|
+
return find_func(data, substr, start, end)
|
|
822
|
+
|
|
823
|
+
return impl
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
_find = register_jitable(generate_finder(_default_find))
|
|
827
|
+
_rfind = register_jitable(generate_finder(_default_rfind))
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
@overload_method(types.UnicodeType, "find")
|
|
831
|
+
def unicode_find(data, substr, start=None, end=None):
|
|
832
|
+
"""Implements str.find()"""
|
|
833
|
+
if isinstance(substr, types.UnicodeCharSeq):
|
|
834
|
+
|
|
835
|
+
def find_impl(data, substr, start=None, end=None):
|
|
836
|
+
return data.find(str(substr))
|
|
837
|
+
|
|
838
|
+
return find_impl
|
|
839
|
+
|
|
840
|
+
unicode_idx_check_type(start, "start")
|
|
841
|
+
unicode_idx_check_type(end, "end")
|
|
842
|
+
unicode_sub_check_type(substr, "substr")
|
|
843
|
+
|
|
844
|
+
return _find
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
@overload_method(types.UnicodeType, "rfind")
|
|
848
|
+
def unicode_rfind(data, substr, start=None, end=None):
|
|
849
|
+
"""Implements str.rfind()"""
|
|
850
|
+
if isinstance(substr, types.UnicodeCharSeq):
|
|
851
|
+
|
|
852
|
+
def rfind_impl(data, substr, start=None, end=None):
|
|
853
|
+
return data.rfind(str(substr))
|
|
854
|
+
|
|
855
|
+
return rfind_impl
|
|
856
|
+
|
|
857
|
+
unicode_idx_check_type(start, "start")
|
|
858
|
+
unicode_idx_check_type(end, "end")
|
|
859
|
+
unicode_sub_check_type(substr, "substr")
|
|
860
|
+
|
|
861
|
+
return _rfind
|
|
862
|
+
|
|
863
|
+
|
|
864
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12831-L12857 # noqa: E501
|
|
865
|
+
@overload_method(types.UnicodeType, "rindex")
|
|
866
|
+
def unicode_rindex(s, sub, start=None, end=None):
|
|
867
|
+
"""Implements str.rindex()"""
|
|
868
|
+
unicode_idx_check_type(start, "start")
|
|
869
|
+
unicode_idx_check_type(end, "end")
|
|
870
|
+
unicode_sub_check_type(sub, "sub")
|
|
871
|
+
|
|
872
|
+
def rindex_impl(s, sub, start=None, end=None):
|
|
873
|
+
result = s.rfind(sub, start, end)
|
|
874
|
+
if result < 0:
|
|
875
|
+
raise ValueError("substring not found")
|
|
876
|
+
|
|
877
|
+
return result
|
|
878
|
+
|
|
879
|
+
return rindex_impl
|
|
880
|
+
|
|
881
|
+
|
|
882
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11692-L11718 # noqa: E501
|
|
883
|
+
@overload_method(types.UnicodeType, "index")
|
|
884
|
+
def unicode_index(s, sub, start=None, end=None):
|
|
885
|
+
"""Implements str.index()"""
|
|
886
|
+
unicode_idx_check_type(start, "start")
|
|
887
|
+
unicode_idx_check_type(end, "end")
|
|
888
|
+
unicode_sub_check_type(sub, "sub")
|
|
889
|
+
|
|
890
|
+
def index_impl(s, sub, start=None, end=None):
|
|
891
|
+
result = s.find(sub, start, end)
|
|
892
|
+
if result < 0:
|
|
893
|
+
raise ValueError("substring not found")
|
|
894
|
+
|
|
895
|
+
return result
|
|
896
|
+
|
|
897
|
+
return index_impl
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12922-L12976 # noqa: E501
|
|
901
|
+
@overload_method(types.UnicodeType, "partition")
|
|
902
|
+
def unicode_partition(data, sep):
|
|
903
|
+
"""Implements str.partition()"""
|
|
904
|
+
thety = sep
|
|
905
|
+
# if the type is omitted, the concrete type is the value
|
|
906
|
+
if isinstance(sep, types.Omitted):
|
|
907
|
+
thety = sep.value
|
|
908
|
+
# if the type is optional, the concrete type is the captured type
|
|
909
|
+
elif isinstance(sep, types.Optional):
|
|
910
|
+
thety = sep.type
|
|
911
|
+
|
|
912
|
+
accepted = (types.UnicodeType, types.UnicodeCharSeq)
|
|
913
|
+
if thety is not None and not isinstance(thety, accepted):
|
|
914
|
+
msg = '"{}" must be {}, not {}'.format("sep", accepted, sep)
|
|
915
|
+
raise TypingError(msg)
|
|
916
|
+
|
|
917
|
+
def impl(data, sep):
|
|
918
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/partition.h#L7-L60 # noqa: E501
|
|
919
|
+
sep = str(sep)
|
|
920
|
+
empty_str = _empty_string(data._kind, 0, data._is_ascii)
|
|
921
|
+
sep_length = len(sep)
|
|
922
|
+
if data._kind < sep._kind or len(data) < sep_length:
|
|
923
|
+
return data, empty_str, empty_str
|
|
924
|
+
|
|
925
|
+
if sep_length == 0:
|
|
926
|
+
raise ValueError("empty separator")
|
|
927
|
+
|
|
928
|
+
pos = data.find(sep)
|
|
929
|
+
if pos < 0:
|
|
930
|
+
return data, empty_str, empty_str
|
|
931
|
+
|
|
932
|
+
return data[0:pos], sep, data[pos + sep_length : len(data)]
|
|
933
|
+
|
|
934
|
+
return impl
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
@overload_method(types.UnicodeType, "count")
|
|
938
|
+
def unicode_count(src, sub, start=None, end=None):
|
|
939
|
+
_count_args_types_check(start)
|
|
940
|
+
_count_args_types_check(end)
|
|
941
|
+
|
|
942
|
+
if isinstance(sub, types.UnicodeType):
|
|
943
|
+
|
|
944
|
+
def count_impl(src, sub, start=None, end=None):
|
|
945
|
+
count = 0
|
|
946
|
+
src_len = len(src)
|
|
947
|
+
sub_len = len(sub)
|
|
948
|
+
|
|
949
|
+
start = _normalize_slice_idx_count(start, src_len, 0)
|
|
950
|
+
end = _normalize_slice_idx_count(end, src_len, src_len)
|
|
951
|
+
|
|
952
|
+
if end - start < 0 or start > src_len:
|
|
953
|
+
return 0
|
|
954
|
+
|
|
955
|
+
src = src[start:end]
|
|
956
|
+
src_len = len(src)
|
|
957
|
+
start, end = 0, src_len
|
|
958
|
+
if sub_len == 0:
|
|
959
|
+
return src_len + 1
|
|
960
|
+
|
|
961
|
+
while start + sub_len <= src_len:
|
|
962
|
+
if src[start : start + sub_len] == sub:
|
|
963
|
+
count += 1
|
|
964
|
+
start += sub_len
|
|
965
|
+
else:
|
|
966
|
+
start += 1
|
|
967
|
+
return count
|
|
968
|
+
|
|
969
|
+
return count_impl
|
|
970
|
+
error_msg = "The substring must be a UnicodeType, not {}"
|
|
971
|
+
raise TypingError(error_msg.format(type(sub)))
|
|
972
|
+
|
|
973
|
+
|
|
974
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12979-L13033 # noqa: E501
|
|
975
|
+
@overload_method(types.UnicodeType, "rpartition")
|
|
976
|
+
def unicode_rpartition(data, sep):
|
|
977
|
+
"""Implements str.rpartition()"""
|
|
978
|
+
thety = sep
|
|
979
|
+
# if the type is omitted, the concrete type is the value
|
|
980
|
+
if isinstance(sep, types.Omitted):
|
|
981
|
+
thety = sep.value
|
|
982
|
+
# if the type is optional, the concrete type is the captured type
|
|
983
|
+
elif isinstance(sep, types.Optional):
|
|
984
|
+
thety = sep.type
|
|
985
|
+
|
|
986
|
+
accepted = (types.UnicodeType, types.UnicodeCharSeq)
|
|
987
|
+
if thety is not None and not isinstance(thety, accepted):
|
|
988
|
+
msg = '"{}" must be {}, not {}'.format("sep", accepted, sep)
|
|
989
|
+
raise TypingError(msg)
|
|
990
|
+
|
|
991
|
+
def impl(data, sep):
|
|
992
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/partition.h#L62-L115 # noqa: E501
|
|
993
|
+
sep = str(sep)
|
|
994
|
+
empty_str = _empty_string(data._kind, 0, data._is_ascii)
|
|
995
|
+
sep_length = len(sep)
|
|
996
|
+
if data._kind < sep._kind or len(data) < sep_length:
|
|
997
|
+
return empty_str, empty_str, data
|
|
998
|
+
|
|
999
|
+
if sep_length == 0:
|
|
1000
|
+
raise ValueError("empty separator")
|
|
1001
|
+
|
|
1002
|
+
pos = data.rfind(sep)
|
|
1003
|
+
if pos < 0:
|
|
1004
|
+
return empty_str, empty_str, data
|
|
1005
|
+
|
|
1006
|
+
return data[0:pos], sep, data[pos + sep_length : len(data)]
|
|
1007
|
+
|
|
1008
|
+
return impl
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9342-L9354 # noqa: E501
|
|
1012
|
+
@register_jitable
|
|
1013
|
+
def _adjust_indices(length, start, end):
|
|
1014
|
+
if end > length:
|
|
1015
|
+
end = length
|
|
1016
|
+
if end < 0:
|
|
1017
|
+
end += length
|
|
1018
|
+
if end < 0:
|
|
1019
|
+
end = 0
|
|
1020
|
+
if start < 0:
|
|
1021
|
+
start += length
|
|
1022
|
+
if start < 0:
|
|
1023
|
+
start = 0
|
|
1024
|
+
|
|
1025
|
+
return start, end
|
|
1026
|
+
|
|
1027
|
+
|
|
1028
|
+
@overload_method(types.UnicodeType, "startswith")
|
|
1029
|
+
def unicode_startswith(s, prefix, start=None, end=None):
|
|
1030
|
+
if not is_nonelike(start) and not isinstance(start, types.Integer):
|
|
1031
|
+
raise TypingError(
|
|
1032
|
+
"When specified, the arg 'start' must be an Integer or None"
|
|
1033
|
+
)
|
|
1034
|
+
|
|
1035
|
+
if not is_nonelike(end) and not isinstance(end, types.Integer):
|
|
1036
|
+
raise TypingError(
|
|
1037
|
+
"When specified, the arg 'end' must be an Integer or None"
|
|
1038
|
+
)
|
|
1039
|
+
|
|
1040
|
+
if isinstance(prefix, types.UniTuple) and isinstance(
|
|
1041
|
+
prefix.dtype, types.UnicodeType
|
|
1042
|
+
):
|
|
1043
|
+
|
|
1044
|
+
def startswith_tuple_impl(s, prefix, start=None, end=None):
|
|
1045
|
+
for item in prefix:
|
|
1046
|
+
if s.startswith(item, start, end):
|
|
1047
|
+
return True
|
|
1048
|
+
return False
|
|
1049
|
+
|
|
1050
|
+
return startswith_tuple_impl
|
|
1051
|
+
|
|
1052
|
+
elif isinstance(prefix, types.UnicodeCharSeq):
|
|
1053
|
+
|
|
1054
|
+
def startswith_char_seq_impl(s, prefix, start=None, end=None):
|
|
1055
|
+
return s.startswith(str(prefix), start, end)
|
|
1056
|
+
|
|
1057
|
+
return startswith_char_seq_impl
|
|
1058
|
+
|
|
1059
|
+
elif isinstance(prefix, types.UnicodeType):
|
|
1060
|
+
|
|
1061
|
+
def startswith_unicode_impl(s, prefix, start=None, end=None):
|
|
1062
|
+
length, prefix_length = len(s), len(prefix)
|
|
1063
|
+
if start is None:
|
|
1064
|
+
start = 0
|
|
1065
|
+
if end is None:
|
|
1066
|
+
end = length
|
|
1067
|
+
|
|
1068
|
+
start, end = _adjust_indices(length, start, end)
|
|
1069
|
+
if end - start < prefix_length:
|
|
1070
|
+
return False
|
|
1071
|
+
|
|
1072
|
+
if prefix_length == 0:
|
|
1073
|
+
return True
|
|
1074
|
+
|
|
1075
|
+
s_slice = s[start:end]
|
|
1076
|
+
|
|
1077
|
+
return _cmp_region(s_slice, 0, prefix, 0, prefix_length) == 0
|
|
1078
|
+
|
|
1079
|
+
return startswith_unicode_impl
|
|
1080
|
+
|
|
1081
|
+
else:
|
|
1082
|
+
raise TypingError(
|
|
1083
|
+
"The arg 'prefix' should be a string or a tuple of strings"
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
|
|
1087
|
+
@overload_method(types.UnicodeType, "endswith")
|
|
1088
|
+
def unicode_endswith(s, substr, start=None, end=None):
|
|
1089
|
+
if not (
|
|
1090
|
+
start is None
|
|
1091
|
+
or isinstance(start, (types.Omitted, types.Integer, types.NoneType))
|
|
1092
|
+
):
|
|
1093
|
+
raise TypingError("The arg must be a Integer or None")
|
|
1094
|
+
|
|
1095
|
+
if not (
|
|
1096
|
+
end is None
|
|
1097
|
+
or isinstance(end, (types.Omitted, types.Integer, types.NoneType))
|
|
1098
|
+
):
|
|
1099
|
+
raise TypingError("The arg must be a Integer or None")
|
|
1100
|
+
|
|
1101
|
+
if isinstance(substr, (types.Tuple, types.UniTuple)):
|
|
1102
|
+
|
|
1103
|
+
def endswith_impl(s, substr, start=None, end=None):
|
|
1104
|
+
for item in substr:
|
|
1105
|
+
if s.endswith(item, start, end) is True:
|
|
1106
|
+
return True
|
|
1107
|
+
|
|
1108
|
+
return False
|
|
1109
|
+
|
|
1110
|
+
return endswith_impl
|
|
1111
|
+
|
|
1112
|
+
if isinstance(substr, types.UnicodeType):
|
|
1113
|
+
|
|
1114
|
+
def endswith_impl(s, substr, start=None, end=None):
|
|
1115
|
+
length = len(s)
|
|
1116
|
+
sub_length = len(substr)
|
|
1117
|
+
if start is None:
|
|
1118
|
+
start = 0
|
|
1119
|
+
if end is None:
|
|
1120
|
+
end = length
|
|
1121
|
+
|
|
1122
|
+
start, end = _adjust_indices(length, start, end)
|
|
1123
|
+
if end - start < sub_length:
|
|
1124
|
+
return False
|
|
1125
|
+
|
|
1126
|
+
if sub_length == 0:
|
|
1127
|
+
return True
|
|
1128
|
+
|
|
1129
|
+
s = s[start:end]
|
|
1130
|
+
offset = len(s) - sub_length
|
|
1131
|
+
|
|
1132
|
+
return _cmp_region(s, offset, substr, 0, sub_length) == 0
|
|
1133
|
+
|
|
1134
|
+
return endswith_impl
|
|
1135
|
+
|
|
1136
|
+
if isinstance(substr, types.UnicodeCharSeq):
|
|
1137
|
+
|
|
1138
|
+
def endswith_impl(s, substr, start=None, end=None):
|
|
1139
|
+
return s.endswith(str(substr), start, end)
|
|
1140
|
+
|
|
1141
|
+
return endswith_impl
|
|
1142
|
+
|
|
1143
|
+
|
|
1144
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11519-L11595 # noqa: E501
|
|
1145
|
+
@overload_method(types.UnicodeType, "expandtabs")
|
|
1146
|
+
def unicode_expandtabs(data, tabsize=8):
|
|
1147
|
+
"""Implements str.expandtabs()"""
|
|
1148
|
+
thety = tabsize
|
|
1149
|
+
# if the type is omitted, the concrete type is the value
|
|
1150
|
+
if isinstance(tabsize, types.Omitted):
|
|
1151
|
+
thety = tabsize.value
|
|
1152
|
+
# if the type is optional, the concrete type is the captured type
|
|
1153
|
+
elif isinstance(tabsize, types.Optional):
|
|
1154
|
+
thety = tabsize.type
|
|
1155
|
+
|
|
1156
|
+
accepted = (types.Integer, int)
|
|
1157
|
+
if thety is not None and not isinstance(thety, accepted):
|
|
1158
|
+
raise TypingError(
|
|
1159
|
+
'"tabsize" must be {}, not {}'.format(accepted, tabsize)
|
|
1160
|
+
)
|
|
1161
|
+
|
|
1162
|
+
def expandtabs_impl(data, tabsize=8):
|
|
1163
|
+
length = len(data)
|
|
1164
|
+
j = line_pos = 0
|
|
1165
|
+
found = False
|
|
1166
|
+
for i in range(length):
|
|
1167
|
+
code_point = _get_code_point(data, i)
|
|
1168
|
+
if code_point == _Py_TAB:
|
|
1169
|
+
found = True
|
|
1170
|
+
if tabsize > 0:
|
|
1171
|
+
# cannot overflow
|
|
1172
|
+
incr = tabsize - (line_pos % tabsize)
|
|
1173
|
+
if j > sys.maxsize - incr:
|
|
1174
|
+
raise OverflowError("new string is too long")
|
|
1175
|
+
line_pos += incr
|
|
1176
|
+
j += incr
|
|
1177
|
+
else:
|
|
1178
|
+
if j > sys.maxsize - 1:
|
|
1179
|
+
raise OverflowError("new string is too long")
|
|
1180
|
+
line_pos += 1
|
|
1181
|
+
j += 1
|
|
1182
|
+
if code_point in (_Py_LINEFEED, _Py_CARRIAGE_RETURN):
|
|
1183
|
+
line_pos = 0
|
|
1184
|
+
|
|
1185
|
+
if not found:
|
|
1186
|
+
return data
|
|
1187
|
+
|
|
1188
|
+
res = _empty_string(data._kind, j, data._is_ascii)
|
|
1189
|
+
j = line_pos = 0
|
|
1190
|
+
for i in range(length):
|
|
1191
|
+
code_point = _get_code_point(data, i)
|
|
1192
|
+
if code_point == _Py_TAB:
|
|
1193
|
+
if tabsize > 0:
|
|
1194
|
+
incr = tabsize - (line_pos % tabsize)
|
|
1195
|
+
line_pos += incr
|
|
1196
|
+
for idx in range(j, j + incr):
|
|
1197
|
+
_set_code_point(res, idx, _Py_SPACE)
|
|
1198
|
+
j += incr
|
|
1199
|
+
else:
|
|
1200
|
+
line_pos += 1
|
|
1201
|
+
_set_code_point(res, j, code_point)
|
|
1202
|
+
j += 1
|
|
1203
|
+
if code_point in (_Py_LINEFEED, _Py_CARRIAGE_RETURN):
|
|
1204
|
+
line_pos = 0
|
|
1205
|
+
|
|
1206
|
+
return res
|
|
1207
|
+
|
|
1208
|
+
return expandtabs_impl
|
|
1209
|
+
|
|
1210
|
+
|
|
1211
|
+
@overload_method(types.UnicodeType, "split")
|
|
1212
|
+
def unicode_split(a, sep=None, maxsplit=-1):
|
|
1213
|
+
if not (
|
|
1214
|
+
maxsplit == -1
|
|
1215
|
+
or isinstance(
|
|
1216
|
+
maxsplit, (types.Omitted, types.Integer, types.IntegerLiteral)
|
|
1217
|
+
)
|
|
1218
|
+
):
|
|
1219
|
+
return None # fail typing if maxsplit is not an integer
|
|
1220
|
+
|
|
1221
|
+
if isinstance(sep, types.UnicodeCharSeq):
|
|
1222
|
+
|
|
1223
|
+
def split_impl(a, sep=None, maxsplit=-1):
|
|
1224
|
+
return a.split(str(sep), maxsplit=maxsplit)
|
|
1225
|
+
|
|
1226
|
+
return split_impl
|
|
1227
|
+
|
|
1228
|
+
if isinstance(sep, types.UnicodeType):
|
|
1229
|
+
|
|
1230
|
+
def split_impl(a, sep=None, maxsplit=-1):
|
|
1231
|
+
a_len = len(a)
|
|
1232
|
+
sep_len = len(sep)
|
|
1233
|
+
|
|
1234
|
+
if sep_len == 0:
|
|
1235
|
+
raise ValueError("empty separator")
|
|
1236
|
+
|
|
1237
|
+
parts = []
|
|
1238
|
+
last = 0
|
|
1239
|
+
idx = 0
|
|
1240
|
+
|
|
1241
|
+
if sep_len == 1 and maxsplit == -1:
|
|
1242
|
+
sep_code_point = _get_code_point(sep, 0)
|
|
1243
|
+
for idx in range(a_len):
|
|
1244
|
+
if _get_code_point(a, idx) == sep_code_point:
|
|
1245
|
+
parts.append(a[last:idx])
|
|
1246
|
+
last = idx + 1
|
|
1247
|
+
else:
|
|
1248
|
+
split_count = 0
|
|
1249
|
+
|
|
1250
|
+
while idx < a_len and (
|
|
1251
|
+
maxsplit == -1 or split_count < maxsplit
|
|
1252
|
+
):
|
|
1253
|
+
if _cmp_region(a, idx, sep, 0, sep_len) == 0:
|
|
1254
|
+
parts.append(a[last:idx])
|
|
1255
|
+
idx += sep_len
|
|
1256
|
+
last = idx
|
|
1257
|
+
split_count += 1
|
|
1258
|
+
else:
|
|
1259
|
+
idx += 1
|
|
1260
|
+
|
|
1261
|
+
if last <= a_len:
|
|
1262
|
+
parts.append(a[last:])
|
|
1263
|
+
|
|
1264
|
+
return parts
|
|
1265
|
+
|
|
1266
|
+
return split_impl
|
|
1267
|
+
elif (
|
|
1268
|
+
sep is None
|
|
1269
|
+
or isinstance(sep, types.NoneType)
|
|
1270
|
+
or getattr(sep, "value", False) is None
|
|
1271
|
+
):
|
|
1272
|
+
|
|
1273
|
+
def split_whitespace_impl(a, sep=None, maxsplit=-1):
|
|
1274
|
+
a_len = len(a)
|
|
1275
|
+
|
|
1276
|
+
parts = []
|
|
1277
|
+
last = 0
|
|
1278
|
+
idx = 0
|
|
1279
|
+
split_count = 0
|
|
1280
|
+
in_whitespace_block = True
|
|
1281
|
+
|
|
1282
|
+
for idx in range(a_len):
|
|
1283
|
+
code_point = _get_code_point(a, idx)
|
|
1284
|
+
is_whitespace = _PyUnicode_IsSpace(code_point)
|
|
1285
|
+
if in_whitespace_block:
|
|
1286
|
+
if is_whitespace:
|
|
1287
|
+
pass # keep consuming space
|
|
1288
|
+
else:
|
|
1289
|
+
last = idx # this is the start of the next string
|
|
1290
|
+
in_whitespace_block = False
|
|
1291
|
+
else:
|
|
1292
|
+
if not is_whitespace:
|
|
1293
|
+
pass # keep searching for whitespace transition
|
|
1294
|
+
else:
|
|
1295
|
+
parts.append(a[last:idx])
|
|
1296
|
+
in_whitespace_block = True
|
|
1297
|
+
split_count += 1
|
|
1298
|
+
if maxsplit != -1 and split_count == maxsplit:
|
|
1299
|
+
break
|
|
1300
|
+
|
|
1301
|
+
if last <= a_len and not in_whitespace_block:
|
|
1302
|
+
parts.append(a[last:])
|
|
1303
|
+
|
|
1304
|
+
return parts
|
|
1305
|
+
|
|
1306
|
+
return split_whitespace_impl
|
|
1307
|
+
|
|
1308
|
+
|
|
1309
|
+
def generate_rsplit_whitespace_impl(isspace_func):
|
|
1310
|
+
"""Generate whitespace rsplit func based on either ascii or unicode"""
|
|
1311
|
+
|
|
1312
|
+
def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
|
|
1313
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L192-L240 # noqa: E501
|
|
1314
|
+
if maxsplit < 0:
|
|
1315
|
+
maxsplit = sys.maxsize
|
|
1316
|
+
|
|
1317
|
+
result = []
|
|
1318
|
+
i = len(data) - 1
|
|
1319
|
+
while maxsplit > 0:
|
|
1320
|
+
while i >= 0:
|
|
1321
|
+
code_point = _get_code_point(data, i)
|
|
1322
|
+
if not isspace_func(code_point):
|
|
1323
|
+
break
|
|
1324
|
+
i -= 1
|
|
1325
|
+
if i < 0:
|
|
1326
|
+
break
|
|
1327
|
+
j = i
|
|
1328
|
+
i -= 1
|
|
1329
|
+
while i >= 0:
|
|
1330
|
+
code_point = _get_code_point(data, i)
|
|
1331
|
+
if isspace_func(code_point):
|
|
1332
|
+
break
|
|
1333
|
+
i -= 1
|
|
1334
|
+
result.append(data[i + 1 : j + 1])
|
|
1335
|
+
maxsplit -= 1
|
|
1336
|
+
|
|
1337
|
+
if i >= 0:
|
|
1338
|
+
# Only occurs when maxsplit was reached
|
|
1339
|
+
# Skip any remaining whitespace and copy to beginning of string
|
|
1340
|
+
while i >= 0:
|
|
1341
|
+
code_point = _get_code_point(data, i)
|
|
1342
|
+
if not isspace_func(code_point):
|
|
1343
|
+
break
|
|
1344
|
+
i -= 1
|
|
1345
|
+
if i >= 0:
|
|
1346
|
+
result.append(data[0 : i + 1])
|
|
1347
|
+
|
|
1348
|
+
return result[::-1]
|
|
1349
|
+
|
|
1350
|
+
return rsplit_whitespace_impl
|
|
1351
|
+
|
|
1352
|
+
|
|
1353
|
+
unicode_rsplit_whitespace_impl = register_jitable(
|
|
1354
|
+
generate_rsplit_whitespace_impl(_PyUnicode_IsSpace)
|
|
1355
|
+
)
|
|
1356
|
+
ascii_rsplit_whitespace_impl = register_jitable(
|
|
1357
|
+
generate_rsplit_whitespace_impl(_Py_ISSPACE)
|
|
1358
|
+
)
|
|
1359
|
+
|
|
1360
|
+
|
|
1361
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L13095-L13108 # noqa: E501
|
|
1362
|
+
@overload_method(types.UnicodeType, "rsplit")
|
|
1363
|
+
def unicode_rsplit(data, sep=None, maxsplit=-1):
|
|
1364
|
+
"""Implements str.unicode_rsplit()"""
|
|
1365
|
+
|
|
1366
|
+
def _unicode_rsplit_check_type(ty, name, accepted):
|
|
1367
|
+
"""Check object belongs to one of specified types"""
|
|
1368
|
+
thety = ty
|
|
1369
|
+
# if the type is omitted, the concrete type is the value
|
|
1370
|
+
if isinstance(ty, types.Omitted):
|
|
1371
|
+
thety = ty.value
|
|
1372
|
+
# if the type is optional, the concrete type is the captured type
|
|
1373
|
+
elif isinstance(ty, types.Optional):
|
|
1374
|
+
thety = ty.type
|
|
1375
|
+
|
|
1376
|
+
if thety is not None and not isinstance(thety, accepted):
|
|
1377
|
+
raise TypingError(
|
|
1378
|
+
'"{}" must be {}, not {}'.format(name, accepted, ty)
|
|
1379
|
+
)
|
|
1380
|
+
|
|
1381
|
+
_unicode_rsplit_check_type(
|
|
1382
|
+
sep, "sep", (types.UnicodeType, types.UnicodeCharSeq, types.NoneType)
|
|
1383
|
+
)
|
|
1384
|
+
_unicode_rsplit_check_type(maxsplit, "maxsplit", (types.Integer, int))
|
|
1385
|
+
|
|
1386
|
+
if sep is None or isinstance(sep, (types.NoneType, types.Omitted)):
|
|
1387
|
+
|
|
1388
|
+
def rsplit_whitespace_impl(data, sep=None, maxsplit=-1):
|
|
1389
|
+
if data._is_ascii:
|
|
1390
|
+
return ascii_rsplit_whitespace_impl(data, sep, maxsplit)
|
|
1391
|
+
return unicode_rsplit_whitespace_impl(data, sep, maxsplit)
|
|
1392
|
+
|
|
1393
|
+
return rsplit_whitespace_impl
|
|
1394
|
+
|
|
1395
|
+
def rsplit_impl(data, sep=None, maxsplit=-1):
|
|
1396
|
+
sep = str(sep)
|
|
1397
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L286-L333 # noqa: E501
|
|
1398
|
+
if data._kind < sep._kind or len(data) < len(sep):
|
|
1399
|
+
return [data]
|
|
1400
|
+
|
|
1401
|
+
def _rsplit_char(data, ch, maxsplit):
|
|
1402
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L242-L284 # noqa: E501
|
|
1403
|
+
result = []
|
|
1404
|
+
ch_code_point = _get_code_point(ch, 0)
|
|
1405
|
+
i = j = len(data) - 1
|
|
1406
|
+
while i >= 0 and maxsplit > 0:
|
|
1407
|
+
data_code_point = _get_code_point(data, i)
|
|
1408
|
+
if data_code_point == ch_code_point:
|
|
1409
|
+
result.append(data[i + 1 : j + 1])
|
|
1410
|
+
j = i = i - 1
|
|
1411
|
+
maxsplit -= 1
|
|
1412
|
+
i -= 1
|
|
1413
|
+
if j >= -1:
|
|
1414
|
+
result.append(data[0 : j + 1])
|
|
1415
|
+
|
|
1416
|
+
return result[::-1]
|
|
1417
|
+
|
|
1418
|
+
if maxsplit < 0:
|
|
1419
|
+
maxsplit = sys.maxsize
|
|
1420
|
+
|
|
1421
|
+
sep_length = len(sep)
|
|
1422
|
+
|
|
1423
|
+
if sep_length == 0:
|
|
1424
|
+
raise ValueError("empty separator")
|
|
1425
|
+
if sep_length == 1:
|
|
1426
|
+
return _rsplit_char(data, sep, maxsplit)
|
|
1427
|
+
|
|
1428
|
+
result = []
|
|
1429
|
+
j = len(data)
|
|
1430
|
+
while maxsplit > 0:
|
|
1431
|
+
pos = data.rfind(sep, start=0, end=j)
|
|
1432
|
+
if pos < 0:
|
|
1433
|
+
break
|
|
1434
|
+
result.append(data[pos + sep_length : j])
|
|
1435
|
+
j = pos
|
|
1436
|
+
maxsplit -= 1
|
|
1437
|
+
|
|
1438
|
+
result.append(data[0:j])
|
|
1439
|
+
|
|
1440
|
+
return result[::-1]
|
|
1441
|
+
|
|
1442
|
+
return rsplit_impl
|
|
1443
|
+
|
|
1444
|
+
|
|
1445
|
+
@overload_method(types.UnicodeType, "center")
|
|
1446
|
+
def unicode_center(string, width, fillchar=" "):
|
|
1447
|
+
if not isinstance(width, types.Integer):
|
|
1448
|
+
raise TypingError("The width must be an Integer")
|
|
1449
|
+
|
|
1450
|
+
if isinstance(fillchar, types.UnicodeCharSeq):
|
|
1451
|
+
|
|
1452
|
+
def center_impl(string, width, fillchar=" "):
|
|
1453
|
+
return string.center(width, str(fillchar))
|
|
1454
|
+
|
|
1455
|
+
return center_impl
|
|
1456
|
+
|
|
1457
|
+
if not (
|
|
1458
|
+
fillchar == " "
|
|
1459
|
+
or isinstance(fillchar, (types.Omitted, types.UnicodeType))
|
|
1460
|
+
):
|
|
1461
|
+
raise TypingError("The fillchar must be a UnicodeType")
|
|
1462
|
+
|
|
1463
|
+
def center_impl(string, width, fillchar=" "):
|
|
1464
|
+
str_len = len(string)
|
|
1465
|
+
fillchar_len = len(fillchar)
|
|
1466
|
+
|
|
1467
|
+
if fillchar_len != 1:
|
|
1468
|
+
raise ValueError(
|
|
1469
|
+
"The fill character must be exactly one character long"
|
|
1470
|
+
)
|
|
1471
|
+
|
|
1472
|
+
if width <= str_len:
|
|
1473
|
+
return string
|
|
1474
|
+
|
|
1475
|
+
allmargin = width - str_len
|
|
1476
|
+
lmargin = (allmargin // 2) + (allmargin & width & 1)
|
|
1477
|
+
rmargin = allmargin - lmargin
|
|
1478
|
+
|
|
1479
|
+
l_string = fillchar * lmargin
|
|
1480
|
+
if lmargin == rmargin:
|
|
1481
|
+
return l_string + string + l_string
|
|
1482
|
+
else:
|
|
1483
|
+
return l_string + string + (fillchar * rmargin)
|
|
1484
|
+
|
|
1485
|
+
return center_impl
|
|
1486
|
+
|
|
1487
|
+
|
|
1488
|
+
def gen_unicode_Xjust(STRING_FIRST):
|
|
1489
|
+
def unicode_Xjust(string, width, fillchar=" "):
|
|
1490
|
+
if not isinstance(width, types.Integer):
|
|
1491
|
+
raise TypingError("The width must be an Integer")
|
|
1492
|
+
|
|
1493
|
+
if isinstance(fillchar, types.UnicodeCharSeq):
|
|
1494
|
+
if STRING_FIRST:
|
|
1495
|
+
|
|
1496
|
+
def ljust_impl(string, width, fillchar=" "):
|
|
1497
|
+
return string.ljust(width, str(fillchar))
|
|
1498
|
+
|
|
1499
|
+
return ljust_impl
|
|
1500
|
+
else:
|
|
1501
|
+
|
|
1502
|
+
def rjust_impl(string, width, fillchar=" "):
|
|
1503
|
+
return string.rjust(width, str(fillchar))
|
|
1504
|
+
|
|
1505
|
+
return rjust_impl
|
|
1506
|
+
|
|
1507
|
+
if not (
|
|
1508
|
+
fillchar == " "
|
|
1509
|
+
or isinstance(fillchar, (types.Omitted, types.UnicodeType))
|
|
1510
|
+
):
|
|
1511
|
+
raise TypingError("The fillchar must be a UnicodeType")
|
|
1512
|
+
|
|
1513
|
+
def impl(string, width, fillchar=" "):
|
|
1514
|
+
str_len = len(string)
|
|
1515
|
+
fillchar_len = len(fillchar)
|
|
1516
|
+
|
|
1517
|
+
if fillchar_len != 1:
|
|
1518
|
+
raise ValueError(
|
|
1519
|
+
"The fill character must be exactly one character long"
|
|
1520
|
+
)
|
|
1521
|
+
|
|
1522
|
+
if width <= str_len:
|
|
1523
|
+
return string
|
|
1524
|
+
|
|
1525
|
+
newstr = fillchar * (width - str_len)
|
|
1526
|
+
if STRING_FIRST:
|
|
1527
|
+
return string + newstr
|
|
1528
|
+
else:
|
|
1529
|
+
return newstr + string
|
|
1530
|
+
|
|
1531
|
+
return impl
|
|
1532
|
+
|
|
1533
|
+
return unicode_Xjust
|
|
1534
|
+
|
|
1535
|
+
|
|
1536
|
+
overload_method(types.UnicodeType, "rjust")(gen_unicode_Xjust(False))
|
|
1537
|
+
overload_method(types.UnicodeType, "ljust")(gen_unicode_Xjust(True))
|
|
1538
|
+
|
|
1539
|
+
|
|
1540
|
+
def generate_splitlines_func(is_line_break_func):
|
|
1541
|
+
"""Generate splitlines performer based on ascii or unicode line breaks."""
|
|
1542
|
+
|
|
1543
|
+
def impl(data, keepends):
|
|
1544
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/stringlib/split.h#L335-L389 # noqa: E501
|
|
1545
|
+
length = len(data)
|
|
1546
|
+
result = []
|
|
1547
|
+
i = j = 0
|
|
1548
|
+
while i < length:
|
|
1549
|
+
# find a line and append it
|
|
1550
|
+
while i < length:
|
|
1551
|
+
code_point = _get_code_point(data, i)
|
|
1552
|
+
if is_line_break_func(code_point):
|
|
1553
|
+
break
|
|
1554
|
+
i += 1
|
|
1555
|
+
|
|
1556
|
+
# skip the line break reading CRLF as one line break
|
|
1557
|
+
eol = i
|
|
1558
|
+
if i < length:
|
|
1559
|
+
if i + 1 < length:
|
|
1560
|
+
cur_cp = _get_code_point(data, i)
|
|
1561
|
+
next_cp = _get_code_point(data, i + 1)
|
|
1562
|
+
if _Py_ISCARRIAGERETURN(cur_cp) and _Py_ISLINEFEED(next_cp):
|
|
1563
|
+
i += 1
|
|
1564
|
+
i += 1
|
|
1565
|
+
if keepends:
|
|
1566
|
+
eol = i
|
|
1567
|
+
|
|
1568
|
+
result.append(data[j:eol])
|
|
1569
|
+
j = i
|
|
1570
|
+
|
|
1571
|
+
return result
|
|
1572
|
+
|
|
1573
|
+
return impl
|
|
1574
|
+
|
|
1575
|
+
|
|
1576
|
+
_ascii_splitlines = register_jitable(generate_splitlines_func(_Py_ISLINEBREAK))
|
|
1577
|
+
_unicode_splitlines = register_jitable(
|
|
1578
|
+
generate_splitlines_func(_PyUnicode_IsLineBreak)
|
|
1579
|
+
)
|
|
1580
|
+
|
|
1581
|
+
|
|
1582
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10196-L10229 # noqa: E501
|
|
1583
|
+
@overload_method(types.UnicodeType, "splitlines")
|
|
1584
|
+
def unicode_splitlines(data, keepends=False):
|
|
1585
|
+
"""Implements str.splitlines()"""
|
|
1586
|
+
thety = keepends
|
|
1587
|
+
# if the type is omitted, the concrete type is the value
|
|
1588
|
+
if isinstance(keepends, types.Omitted):
|
|
1589
|
+
thety = keepends.value
|
|
1590
|
+
# if the type is optional, the concrete type is the captured type
|
|
1591
|
+
elif isinstance(keepends, types.Optional):
|
|
1592
|
+
thety = keepends.type
|
|
1593
|
+
|
|
1594
|
+
accepted = (types.Integer, int, types.Boolean, bool)
|
|
1595
|
+
if thety is not None and not isinstance(thety, accepted):
|
|
1596
|
+
raise TypingError(
|
|
1597
|
+
'"{}" must be {}, not {}'.format("keepends", accepted, keepends)
|
|
1598
|
+
)
|
|
1599
|
+
|
|
1600
|
+
def splitlines_impl(data, keepends=False):
|
|
1601
|
+
if data._is_ascii:
|
|
1602
|
+
return _ascii_splitlines(data, keepends)
|
|
1603
|
+
|
|
1604
|
+
return _unicode_splitlines(data, keepends)
|
|
1605
|
+
|
|
1606
|
+
return splitlines_impl
|
|
1607
|
+
|
|
1608
|
+
|
|
1609
|
+
@register_jitable
|
|
1610
|
+
def join_list(sep, parts):
|
|
1611
|
+
parts_len = len(parts)
|
|
1612
|
+
if parts_len == 0:
|
|
1613
|
+
return ""
|
|
1614
|
+
|
|
1615
|
+
# Precompute size and char_width of result
|
|
1616
|
+
sep_len = len(sep)
|
|
1617
|
+
length = (parts_len - 1) * sep_len
|
|
1618
|
+
kind = sep._kind
|
|
1619
|
+
is_ascii = sep._is_ascii
|
|
1620
|
+
for p in parts:
|
|
1621
|
+
length += len(p)
|
|
1622
|
+
kind = _pick_kind(kind, p._kind)
|
|
1623
|
+
is_ascii = _pick_ascii(is_ascii, p._is_ascii)
|
|
1624
|
+
|
|
1625
|
+
result = _empty_string(kind, length, is_ascii)
|
|
1626
|
+
|
|
1627
|
+
# populate string
|
|
1628
|
+
part = parts[0]
|
|
1629
|
+
_strncpy(result, 0, part, 0, len(part))
|
|
1630
|
+
dst_offset = len(part)
|
|
1631
|
+
for idx in range(1, parts_len):
|
|
1632
|
+
_strncpy(result, dst_offset, sep, 0, sep_len)
|
|
1633
|
+
dst_offset += sep_len
|
|
1634
|
+
part = parts[idx]
|
|
1635
|
+
_strncpy(result, dst_offset, part, 0, len(part))
|
|
1636
|
+
dst_offset += len(part)
|
|
1637
|
+
|
|
1638
|
+
return result
|
|
1639
|
+
|
|
1640
|
+
|
|
1641
|
+
@overload_method(types.UnicodeType, "join")
|
|
1642
|
+
def unicode_join(sep, parts):
|
|
1643
|
+
if isinstance(parts, types.List):
|
|
1644
|
+
if isinstance(parts.dtype, types.UnicodeType):
|
|
1645
|
+
|
|
1646
|
+
def join_list_impl(sep, parts):
|
|
1647
|
+
return join_list(sep, parts)
|
|
1648
|
+
|
|
1649
|
+
return join_list_impl
|
|
1650
|
+
elif isinstance(parts.dtype, types.UnicodeCharSeq):
|
|
1651
|
+
|
|
1652
|
+
def join_list_impl(sep, parts):
|
|
1653
|
+
_parts = [str(p) for p in parts]
|
|
1654
|
+
return join_list(sep, _parts)
|
|
1655
|
+
|
|
1656
|
+
return join_list_impl
|
|
1657
|
+
else:
|
|
1658
|
+
pass # lists of any other type not supported
|
|
1659
|
+
elif isinstance(parts, types.IterableType):
|
|
1660
|
+
|
|
1661
|
+
def join_iter_impl(sep, parts):
|
|
1662
|
+
parts_list = [p for p in parts]
|
|
1663
|
+
return sep.join(parts_list)
|
|
1664
|
+
|
|
1665
|
+
return join_iter_impl
|
|
1666
|
+
elif isinstance(parts, types.UnicodeType):
|
|
1667
|
+
# Temporary workaround until UnicodeType is iterable
|
|
1668
|
+
def join_str_impl(sep, parts):
|
|
1669
|
+
parts_list = [parts[i] for i in range(len(parts))]
|
|
1670
|
+
return join_list(sep, parts_list)
|
|
1671
|
+
|
|
1672
|
+
return join_str_impl
|
|
1673
|
+
|
|
1674
|
+
|
|
1675
|
+
@overload_method(types.UnicodeType, "zfill")
|
|
1676
|
+
def unicode_zfill(string, width):
|
|
1677
|
+
if not isinstance(width, types.Integer):
|
|
1678
|
+
raise TypingError("<width> must be an Integer")
|
|
1679
|
+
|
|
1680
|
+
def zfill_impl(string, width):
|
|
1681
|
+
str_len = len(string)
|
|
1682
|
+
|
|
1683
|
+
if width <= str_len:
|
|
1684
|
+
return string
|
|
1685
|
+
|
|
1686
|
+
first_char = string[0] if str_len else ""
|
|
1687
|
+
padding = "0" * (width - str_len)
|
|
1688
|
+
|
|
1689
|
+
if first_char in ["+", "-"]:
|
|
1690
|
+
newstr = first_char + padding + string[1:]
|
|
1691
|
+
else:
|
|
1692
|
+
newstr = padding + string
|
|
1693
|
+
|
|
1694
|
+
return newstr
|
|
1695
|
+
|
|
1696
|
+
return zfill_impl
|
|
1697
|
+
|
|
1698
|
+
|
|
1699
|
+
# ------------------------------------------------------------------------------
|
|
1700
|
+
# Strip functions
|
|
1701
|
+
# ------------------------------------------------------------------------------
|
|
1702
|
+
@register_jitable
|
|
1703
|
+
def unicode_strip_left_bound(string, chars):
|
|
1704
|
+
str_len = len(string)
|
|
1705
|
+
|
|
1706
|
+
i = 0
|
|
1707
|
+
if chars is not None:
|
|
1708
|
+
for i in range(str_len):
|
|
1709
|
+
if string[i] not in chars:
|
|
1710
|
+
return i
|
|
1711
|
+
else:
|
|
1712
|
+
for i in range(str_len):
|
|
1713
|
+
if not _PyUnicode_IsSpace(string[i]):
|
|
1714
|
+
return i
|
|
1715
|
+
|
|
1716
|
+
return str_len
|
|
1717
|
+
|
|
1718
|
+
|
|
1719
|
+
@register_jitable
|
|
1720
|
+
def unicode_strip_right_bound(string, chars):
|
|
1721
|
+
str_len = len(string)
|
|
1722
|
+
i = 0
|
|
1723
|
+
if chars is not None:
|
|
1724
|
+
for i in range(str_len - 1, -1, -1):
|
|
1725
|
+
if string[i] not in chars:
|
|
1726
|
+
i += 1
|
|
1727
|
+
break
|
|
1728
|
+
else:
|
|
1729
|
+
for i in range(str_len - 1, -1, -1):
|
|
1730
|
+
if not _PyUnicode_IsSpace(string[i]):
|
|
1731
|
+
i += 1
|
|
1732
|
+
break
|
|
1733
|
+
|
|
1734
|
+
return i
|
|
1735
|
+
|
|
1736
|
+
|
|
1737
|
+
def unicode_strip_types_check(chars):
|
|
1738
|
+
if isinstance(chars, types.Optional):
|
|
1739
|
+
chars = chars.type # catch optional type with invalid non-None type
|
|
1740
|
+
if not (
|
|
1741
|
+
chars is None
|
|
1742
|
+
or isinstance(chars, (types.Omitted, types.UnicodeType, types.NoneType))
|
|
1743
|
+
):
|
|
1744
|
+
raise TypingError("The arg must be a UnicodeType or None")
|
|
1745
|
+
|
|
1746
|
+
|
|
1747
|
+
def _count_args_types_check(arg):
|
|
1748
|
+
if isinstance(arg, types.Optional):
|
|
1749
|
+
arg = arg.type
|
|
1750
|
+
if not (
|
|
1751
|
+
arg is None
|
|
1752
|
+
or isinstance(arg, (types.Omitted, types.Integer, types.NoneType))
|
|
1753
|
+
):
|
|
1754
|
+
raise TypingError("The slice indices must be an Integer or None")
|
|
1755
|
+
|
|
1756
|
+
|
|
1757
|
+
@overload_method(types.UnicodeType, "lstrip")
|
|
1758
|
+
def unicode_lstrip(string, chars=None):
|
|
1759
|
+
if isinstance(chars, types.UnicodeCharSeq):
|
|
1760
|
+
|
|
1761
|
+
def lstrip_impl(string, chars=None):
|
|
1762
|
+
return string.lstrip(str(chars))
|
|
1763
|
+
|
|
1764
|
+
return lstrip_impl
|
|
1765
|
+
|
|
1766
|
+
unicode_strip_types_check(chars)
|
|
1767
|
+
|
|
1768
|
+
def lstrip_impl(string, chars=None):
|
|
1769
|
+
return string[unicode_strip_left_bound(string, chars) :]
|
|
1770
|
+
|
|
1771
|
+
return lstrip_impl
|
|
1772
|
+
|
|
1773
|
+
|
|
1774
|
+
@overload_method(types.UnicodeType, "rstrip")
|
|
1775
|
+
def unicode_rstrip(string, chars=None):
|
|
1776
|
+
if isinstance(chars, types.UnicodeCharSeq):
|
|
1777
|
+
|
|
1778
|
+
def rstrip_impl(string, chars=None):
|
|
1779
|
+
return string.rstrip(str(chars))
|
|
1780
|
+
|
|
1781
|
+
return rstrip_impl
|
|
1782
|
+
|
|
1783
|
+
unicode_strip_types_check(chars)
|
|
1784
|
+
|
|
1785
|
+
def rstrip_impl(string, chars=None):
|
|
1786
|
+
return string[: unicode_strip_right_bound(string, chars)]
|
|
1787
|
+
|
|
1788
|
+
return rstrip_impl
|
|
1789
|
+
|
|
1790
|
+
|
|
1791
|
+
@overload_method(types.UnicodeType, "strip")
|
|
1792
|
+
def unicode_strip(string, chars=None):
|
|
1793
|
+
if isinstance(chars, types.UnicodeCharSeq):
|
|
1794
|
+
|
|
1795
|
+
def strip_impl(string, chars=None):
|
|
1796
|
+
return string.strip(str(chars))
|
|
1797
|
+
|
|
1798
|
+
return strip_impl
|
|
1799
|
+
|
|
1800
|
+
unicode_strip_types_check(chars)
|
|
1801
|
+
|
|
1802
|
+
def strip_impl(string, chars=None):
|
|
1803
|
+
lb = unicode_strip_left_bound(string, chars)
|
|
1804
|
+
rb = unicode_strip_right_bound(string, chars)
|
|
1805
|
+
return string[lb:rb]
|
|
1806
|
+
|
|
1807
|
+
return strip_impl
|
|
1808
|
+
|
|
1809
|
+
|
|
1810
|
+
# ------------------------------------------------------------------------------
|
|
1811
|
+
# Slice functions
|
|
1812
|
+
# ------------------------------------------------------------------------------
|
|
1813
|
+
|
|
1814
|
+
|
|
1815
|
+
@register_jitable
|
|
1816
|
+
def normalize_str_idx(idx, length, is_start=True):
|
|
1817
|
+
"""
|
|
1818
|
+
Parameters
|
|
1819
|
+
----------
|
|
1820
|
+
idx : int or None
|
|
1821
|
+
the index
|
|
1822
|
+
length : int
|
|
1823
|
+
the string length
|
|
1824
|
+
is_start : bool; optional with defaults to True
|
|
1825
|
+
Is it the *start* or the *stop* of the slice?
|
|
1826
|
+
|
|
1827
|
+
Returns
|
|
1828
|
+
-------
|
|
1829
|
+
norm_idx : int
|
|
1830
|
+
normalized index
|
|
1831
|
+
"""
|
|
1832
|
+
if idx is None:
|
|
1833
|
+
if is_start:
|
|
1834
|
+
return 0
|
|
1835
|
+
else:
|
|
1836
|
+
return length
|
|
1837
|
+
elif idx < 0:
|
|
1838
|
+
idx += length
|
|
1839
|
+
|
|
1840
|
+
if idx < 0 or idx >= length:
|
|
1841
|
+
raise IndexError("string index out of range")
|
|
1842
|
+
|
|
1843
|
+
return idx
|
|
1844
|
+
|
|
1845
|
+
|
|
1846
|
+
@register_jitable
|
|
1847
|
+
def _normalize_slice_idx_count(arg, slice_len, default):
|
|
1848
|
+
"""
|
|
1849
|
+
Used for unicode_count
|
|
1850
|
+
|
|
1851
|
+
If arg < -slice_len, returns 0 (prevents circle)
|
|
1852
|
+
|
|
1853
|
+
If arg is within slice, e.g -slice_len <= arg < slice_len
|
|
1854
|
+
returns its real index via arg % slice_len
|
|
1855
|
+
|
|
1856
|
+
If arg > slice_len, returns arg (in this case count must
|
|
1857
|
+
return 0 if it is start index)
|
|
1858
|
+
"""
|
|
1859
|
+
|
|
1860
|
+
if arg is None:
|
|
1861
|
+
return default
|
|
1862
|
+
if -slice_len <= arg < slice_len:
|
|
1863
|
+
return arg % slice_len
|
|
1864
|
+
return 0 if arg < 0 else arg
|
|
1865
|
+
|
|
1866
|
+
|
|
1867
|
+
@intrinsic
|
|
1868
|
+
def _normalize_slice(typingctx, sliceobj, length):
|
|
1869
|
+
"""Fix slice object."""
|
|
1870
|
+
sig = sliceobj(sliceobj, length)
|
|
1871
|
+
|
|
1872
|
+
def codegen(context, builder, sig, args):
|
|
1873
|
+
[slicetype, lengthtype] = sig.args
|
|
1874
|
+
[sliceobj, length] = args
|
|
1875
|
+
slice = context.make_helper(builder, slicetype, sliceobj)
|
|
1876
|
+
slicing.guard_invalid_slice(context, builder, slicetype, slice)
|
|
1877
|
+
slicing.fix_slice(builder, slice, length)
|
|
1878
|
+
return slice._getvalue()
|
|
1879
|
+
|
|
1880
|
+
return sig, codegen
|
|
1881
|
+
|
|
1882
|
+
|
|
1883
|
+
@intrinsic
|
|
1884
|
+
def _slice_span(typingctx, sliceobj):
|
|
1885
|
+
"""Compute the span from the given slice object."""
|
|
1886
|
+
sig = types.intp(sliceobj)
|
|
1887
|
+
|
|
1888
|
+
def codegen(context, builder, sig, args):
|
|
1889
|
+
[slicetype] = sig.args
|
|
1890
|
+
[sliceobj] = args
|
|
1891
|
+
slice = context.make_helper(builder, slicetype, sliceobj)
|
|
1892
|
+
result_size = slicing.get_slice_length(builder, slice)
|
|
1893
|
+
return result_size
|
|
1894
|
+
|
|
1895
|
+
return sig, codegen
|
|
1896
|
+
|
|
1897
|
+
|
|
1898
|
+
@register_jitable(_nrt=False)
|
|
1899
|
+
def _strncpy(dst, dst_offset, src, src_offset, n):
|
|
1900
|
+
if src._kind == dst._kind:
|
|
1901
|
+
byte_width = _kind_to_byte_width(src._kind)
|
|
1902
|
+
src_byte_offset = byte_width * src_offset
|
|
1903
|
+
dst_byte_offset = byte_width * dst_offset
|
|
1904
|
+
nbytes = n * byte_width
|
|
1905
|
+
memcpy_region(
|
|
1906
|
+
dst._data,
|
|
1907
|
+
dst_byte_offset,
|
|
1908
|
+
src._data,
|
|
1909
|
+
src_byte_offset,
|
|
1910
|
+
nbytes,
|
|
1911
|
+
align=1,
|
|
1912
|
+
)
|
|
1913
|
+
else:
|
|
1914
|
+
for i in range(n):
|
|
1915
|
+
_set_code_point(
|
|
1916
|
+
dst, dst_offset + i, _get_code_point(src, src_offset + i)
|
|
1917
|
+
)
|
|
1918
|
+
|
|
1919
|
+
|
|
1920
|
+
@intrinsic
|
|
1921
|
+
def _get_str_slice_view(typingctx, src_t, start_t, length_t):
|
|
1922
|
+
"""Create a slice of a unicode string using a view of its data to avoid
|
|
1923
|
+
extra allocation.
|
|
1924
|
+
"""
|
|
1925
|
+
assert src_t == types.unicode_type
|
|
1926
|
+
|
|
1927
|
+
def codegen(context, builder, sig, args):
|
|
1928
|
+
src, start, length = args
|
|
1929
|
+
in_str = cgutils.create_struct_proxy(types.unicode_type)(
|
|
1930
|
+
context, builder, value=src
|
|
1931
|
+
)
|
|
1932
|
+
view_str = cgutils.create_struct_proxy(types.unicode_type)(
|
|
1933
|
+
context, builder
|
|
1934
|
+
)
|
|
1935
|
+
view_str.meminfo = in_str.meminfo
|
|
1936
|
+
view_str.kind = in_str.kind
|
|
1937
|
+
view_str.is_ascii = in_str.is_ascii
|
|
1938
|
+
view_str.length = length
|
|
1939
|
+
# hash value -1 to indicate "need to compute hash"
|
|
1940
|
+
view_str.hash = context.get_constant(_Py_hash_t, -1)
|
|
1941
|
+
# get a pointer to start of slice data
|
|
1942
|
+
bw_typ = context.typing_context.resolve_value_type(_kind_to_byte_width)
|
|
1943
|
+
bw_sig = bw_typ.get_call_type(
|
|
1944
|
+
context.typing_context, (types.int32,), {}
|
|
1945
|
+
)
|
|
1946
|
+
bw_impl = context.get_function(bw_typ, bw_sig)
|
|
1947
|
+
byte_width = bw_impl(builder, (in_str.kind,))
|
|
1948
|
+
offset = builder.mul(start, byte_width)
|
|
1949
|
+
view_str.data = builder.gep(in_str.data, [offset])
|
|
1950
|
+
# Set parent pyobject to NULL
|
|
1951
|
+
view_str.parent = cgutils.get_null_value(view_str.parent.type)
|
|
1952
|
+
# incref original string
|
|
1953
|
+
if context.enable_nrt:
|
|
1954
|
+
context.nrt.incref(builder, sig.args[0], src)
|
|
1955
|
+
return view_str._getvalue()
|
|
1956
|
+
|
|
1957
|
+
sig = types.unicode_type(types.unicode_type, types.intp, types.intp)
|
|
1958
|
+
return sig, codegen
|
|
1959
|
+
|
|
1960
|
+
|
|
1961
|
+
@overload(operator.getitem)
|
|
1962
|
+
def unicode_getitem(s, idx):
|
|
1963
|
+
if isinstance(s, types.UnicodeType):
|
|
1964
|
+
if isinstance(idx, types.Integer):
|
|
1965
|
+
|
|
1966
|
+
def getitem_char(s, idx):
|
|
1967
|
+
idx = normalize_str_idx(idx, len(s))
|
|
1968
|
+
cp = _get_code_point(s, idx)
|
|
1969
|
+
kind = _codepoint_to_kind(cp)
|
|
1970
|
+
if kind == s._kind:
|
|
1971
|
+
return _get_str_slice_view(s, idx, 1)
|
|
1972
|
+
else:
|
|
1973
|
+
is_ascii = _codepoint_is_ascii(cp)
|
|
1974
|
+
ret = _empty_string(kind, 1, is_ascii)
|
|
1975
|
+
_set_code_point(ret, 0, cp)
|
|
1976
|
+
return ret
|
|
1977
|
+
|
|
1978
|
+
return getitem_char
|
|
1979
|
+
elif isinstance(idx, types.SliceType):
|
|
1980
|
+
|
|
1981
|
+
def getitem_slice(s, idx):
|
|
1982
|
+
slice_idx = _normalize_slice(idx, len(s))
|
|
1983
|
+
span = _slice_span(slice_idx)
|
|
1984
|
+
|
|
1985
|
+
cp = _get_code_point(s, slice_idx.start)
|
|
1986
|
+
kind = _codepoint_to_kind(cp)
|
|
1987
|
+
is_ascii = _codepoint_is_ascii(cp)
|
|
1988
|
+
|
|
1989
|
+
# Check slice to see if it's homogeneous in kind
|
|
1990
|
+
for i in range(
|
|
1991
|
+
slice_idx.start + slice_idx.step,
|
|
1992
|
+
slice_idx.stop,
|
|
1993
|
+
slice_idx.step,
|
|
1994
|
+
):
|
|
1995
|
+
cp = _get_code_point(s, i)
|
|
1996
|
+
is_ascii &= _codepoint_is_ascii(cp)
|
|
1997
|
+
new_kind = _codepoint_to_kind(cp)
|
|
1998
|
+
if kind != new_kind:
|
|
1999
|
+
kind = _pick_kind(kind, new_kind)
|
|
2000
|
+
# TODO: it might be possible to break here if the kind
|
|
2001
|
+
# is PY_UNICODE_4BYTE_KIND but there are potentially
|
|
2002
|
+
# strings coming from other internal functions that are
|
|
2003
|
+
# this wide and also actually ASCII (i.e. kind is larger
|
|
2004
|
+
# than actually required for storing the code point), so
|
|
2005
|
+
# it's necessary to continue.
|
|
2006
|
+
|
|
2007
|
+
if slice_idx.step == 1 and kind == s._kind:
|
|
2008
|
+
# Can return a view, the slice has the same kind as the
|
|
2009
|
+
# string itself and it's a stride slice 1.
|
|
2010
|
+
return _get_str_slice_view(s, slice_idx.start, span)
|
|
2011
|
+
else:
|
|
2012
|
+
# It's heterogeneous in kind OR stride != 1
|
|
2013
|
+
ret = _empty_string(kind, span, is_ascii)
|
|
2014
|
+
cur = slice_idx.start
|
|
2015
|
+
for i in range(span):
|
|
2016
|
+
_set_code_point(ret, i, _get_code_point(s, cur))
|
|
2017
|
+
cur += slice_idx.step
|
|
2018
|
+
return ret
|
|
2019
|
+
|
|
2020
|
+
return getitem_slice
|
|
2021
|
+
|
|
2022
|
+
|
|
2023
|
+
# ------------------------------------------------------------------------------
|
|
2024
|
+
# String operations
|
|
2025
|
+
# ------------------------------------------------------------------------------
|
|
2026
|
+
|
|
2027
|
+
|
|
2028
|
+
@overload(operator.add)
|
|
2029
|
+
@overload(operator.iadd)
|
|
2030
|
+
def unicode_concat(a, b):
|
|
2031
|
+
if isinstance(a, types.UnicodeType) and isinstance(b, types.UnicodeType):
|
|
2032
|
+
|
|
2033
|
+
def concat_impl(a, b):
|
|
2034
|
+
new_length = a._length + b._length
|
|
2035
|
+
new_kind = _pick_kind(a._kind, b._kind)
|
|
2036
|
+
new_ascii = _pick_ascii(a._is_ascii, b._is_ascii)
|
|
2037
|
+
result = _empty_string(new_kind, new_length, new_ascii)
|
|
2038
|
+
for i in range(len(a)):
|
|
2039
|
+
_set_code_point(result, i, _get_code_point(a, i))
|
|
2040
|
+
for j in range(len(b)):
|
|
2041
|
+
_set_code_point(result, len(a) + j, _get_code_point(b, j))
|
|
2042
|
+
return result
|
|
2043
|
+
|
|
2044
|
+
return concat_impl
|
|
2045
|
+
|
|
2046
|
+
if isinstance(a, types.UnicodeType) and isinstance(b, types.UnicodeCharSeq):
|
|
2047
|
+
|
|
2048
|
+
def concat_impl(a, b):
|
|
2049
|
+
return a + str(b)
|
|
2050
|
+
|
|
2051
|
+
return concat_impl
|
|
2052
|
+
|
|
2053
|
+
|
|
2054
|
+
@register_jitable
|
|
2055
|
+
def _repeat_impl(str_arg, mult_arg):
|
|
2056
|
+
if str_arg == "" or mult_arg < 1:
|
|
2057
|
+
return ""
|
|
2058
|
+
elif mult_arg == 1:
|
|
2059
|
+
return str_arg
|
|
2060
|
+
else:
|
|
2061
|
+
new_length = str_arg._length * mult_arg
|
|
2062
|
+
new_kind = str_arg._kind
|
|
2063
|
+
result = _empty_string(new_kind, new_length, str_arg._is_ascii)
|
|
2064
|
+
# make initial copy into result
|
|
2065
|
+
len_a = len(str_arg)
|
|
2066
|
+
_strncpy(result, 0, str_arg, 0, len_a)
|
|
2067
|
+
# loop through powers of 2 for efficient copying
|
|
2068
|
+
copy_size = len_a
|
|
2069
|
+
while 2 * copy_size <= new_length:
|
|
2070
|
+
_strncpy(result, copy_size, result, 0, copy_size)
|
|
2071
|
+
copy_size *= 2
|
|
2072
|
+
|
|
2073
|
+
if not 2 * copy_size == new_length:
|
|
2074
|
+
# if copy_size not an exact multiple it then needs
|
|
2075
|
+
# to complete the rest of the copies
|
|
2076
|
+
rest = new_length - copy_size
|
|
2077
|
+
_strncpy(result, copy_size, result, copy_size - rest, rest)
|
|
2078
|
+
return result
|
|
2079
|
+
|
|
2080
|
+
|
|
2081
|
+
@overload(operator.mul)
|
|
2082
|
+
def unicode_repeat(a, b):
|
|
2083
|
+
if isinstance(a, types.UnicodeType) and isinstance(b, types.Integer):
|
|
2084
|
+
|
|
2085
|
+
def wrap(a, b):
|
|
2086
|
+
return _repeat_impl(a, b)
|
|
2087
|
+
|
|
2088
|
+
return wrap
|
|
2089
|
+
elif isinstance(a, types.Integer) and isinstance(b, types.UnicodeType):
|
|
2090
|
+
|
|
2091
|
+
def wrap(a, b):
|
|
2092
|
+
return _repeat_impl(b, a)
|
|
2093
|
+
|
|
2094
|
+
return wrap
|
|
2095
|
+
|
|
2096
|
+
|
|
2097
|
+
@overload(operator.not_)
|
|
2098
|
+
def unicode_not(a):
|
|
2099
|
+
if isinstance(a, types.UnicodeType):
|
|
2100
|
+
|
|
2101
|
+
def impl(a):
|
|
2102
|
+
return len(a) == 0
|
|
2103
|
+
|
|
2104
|
+
return impl
|
|
2105
|
+
|
|
2106
|
+
|
|
2107
|
+
@overload_method(types.UnicodeType, "replace")
|
|
2108
|
+
def unicode_replace(s, old_str, new_str, count=-1):
|
|
2109
|
+
thety = count
|
|
2110
|
+
if isinstance(count, types.Omitted):
|
|
2111
|
+
thety = count.value
|
|
2112
|
+
elif isinstance(count, types.Optional):
|
|
2113
|
+
thety = count.type
|
|
2114
|
+
|
|
2115
|
+
if not isinstance(thety, (int, types.Integer)):
|
|
2116
|
+
raise TypingError(
|
|
2117
|
+
"Unsupported parameters. The parameters "
|
|
2118
|
+
"must be Integer. Given count: {}".format(count)
|
|
2119
|
+
)
|
|
2120
|
+
|
|
2121
|
+
if not isinstance(old_str, (types.UnicodeType, types.NoneType)):
|
|
2122
|
+
raise TypingError(
|
|
2123
|
+
"The object must be a UnicodeType. Given: {}".format(old_str)
|
|
2124
|
+
)
|
|
2125
|
+
|
|
2126
|
+
if not isinstance(new_str, types.UnicodeType):
|
|
2127
|
+
raise TypingError(
|
|
2128
|
+
"The object must be a UnicodeType. Given: {}".format(new_str)
|
|
2129
|
+
)
|
|
2130
|
+
|
|
2131
|
+
def impl(s, old_str, new_str, count=-1):
|
|
2132
|
+
if count == 0:
|
|
2133
|
+
return s
|
|
2134
|
+
if old_str == "":
|
|
2135
|
+
schars = list(s)
|
|
2136
|
+
if count == -1:
|
|
2137
|
+
return new_str + new_str.join(schars) + new_str
|
|
2138
|
+
split_result = [new_str]
|
|
2139
|
+
min_count = min(len(schars), count)
|
|
2140
|
+
for i in range(min_count):
|
|
2141
|
+
split_result.append(schars[i])
|
|
2142
|
+
if i + 1 != min_count:
|
|
2143
|
+
split_result.append(new_str)
|
|
2144
|
+
else:
|
|
2145
|
+
split_result.append("".join(schars[(i + 1) :]))
|
|
2146
|
+
if count > len(schars):
|
|
2147
|
+
split_result.append(new_str)
|
|
2148
|
+
return "".join(split_result)
|
|
2149
|
+
schars = s.split(old_str, count)
|
|
2150
|
+
result = new_str.join(schars)
|
|
2151
|
+
return result
|
|
2152
|
+
|
|
2153
|
+
return impl
|
|
2154
|
+
|
|
2155
|
+
|
|
2156
|
+
# ------------------------------------------------------------------------------
|
|
2157
|
+
# String `is*()` methods
|
|
2158
|
+
# ------------------------------------------------------------------------------
|
|
2159
|
+
|
|
2160
|
+
|
|
2161
|
+
# generates isalpha/isalnum
|
|
2162
|
+
def gen_isAlX(ascii_func, unicode_func):
|
|
2163
|
+
def unicode_isAlX(data):
|
|
2164
|
+
def impl(data):
|
|
2165
|
+
length = len(data)
|
|
2166
|
+
if length == 0:
|
|
2167
|
+
return False
|
|
2168
|
+
|
|
2169
|
+
if length == 1:
|
|
2170
|
+
code_point = _get_code_point(data, 0)
|
|
2171
|
+
if data._is_ascii:
|
|
2172
|
+
return ascii_func(code_point)
|
|
2173
|
+
else:
|
|
2174
|
+
return unicode_func(code_point)
|
|
2175
|
+
|
|
2176
|
+
if data._is_ascii:
|
|
2177
|
+
for i in range(length):
|
|
2178
|
+
code_point = _get_code_point(data, i)
|
|
2179
|
+
if not ascii_func(code_point):
|
|
2180
|
+
return False
|
|
2181
|
+
|
|
2182
|
+
for i in range(length):
|
|
2183
|
+
code_point = _get_code_point(data, i)
|
|
2184
|
+
if not unicode_func(code_point):
|
|
2185
|
+
return False
|
|
2186
|
+
|
|
2187
|
+
return True
|
|
2188
|
+
|
|
2189
|
+
return impl
|
|
2190
|
+
|
|
2191
|
+
return unicode_isAlX
|
|
2192
|
+
|
|
2193
|
+
|
|
2194
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11928-L11964 # noqa: E501
|
|
2195
|
+
overload_method(types.UnicodeType, "isalpha")(
|
|
2196
|
+
gen_isAlX(_Py_ISALPHA, _PyUnicode_IsAlpha)
|
|
2197
|
+
)
|
|
2198
|
+
|
|
2199
|
+
_unicode_is_alnum = register_jitable(
|
|
2200
|
+
lambda x: (_PyUnicode_IsNumeric(x) or _PyUnicode_IsAlpha(x))
|
|
2201
|
+
)
|
|
2202
|
+
|
|
2203
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11975-L12006 # noqa: E501
|
|
2204
|
+
overload_method(types.UnicodeType, "isalnum")(
|
|
2205
|
+
gen_isAlX(_Py_ISALNUM, _unicode_is_alnum)
|
|
2206
|
+
)
|
|
2207
|
+
|
|
2208
|
+
|
|
2209
|
+
def _is_upper(is_lower, is_upper, is_title):
|
|
2210
|
+
# impl is an approximate translation of:
|
|
2211
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11794-L11827 # noqa: E501
|
|
2212
|
+
# mixed with:
|
|
2213
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L218-L242 # noqa: E501
|
|
2214
|
+
def impl(a):
|
|
2215
|
+
l = len(a)
|
|
2216
|
+
if l == 1:
|
|
2217
|
+
return is_upper(_get_code_point(a, 0)) != 0
|
|
2218
|
+
if l == 0:
|
|
2219
|
+
return False
|
|
2220
|
+
cased = False
|
|
2221
|
+
for idx in range(l):
|
|
2222
|
+
code_point = _get_code_point(a, idx)
|
|
2223
|
+
if is_lower(code_point) or is_title(code_point):
|
|
2224
|
+
return False
|
|
2225
|
+
elif not cased and is_upper(code_point):
|
|
2226
|
+
cased = True
|
|
2227
|
+
return cased
|
|
2228
|
+
|
|
2229
|
+
return impl
|
|
2230
|
+
|
|
2231
|
+
|
|
2232
|
+
_always_false = register_jitable(lambda x: False)
|
|
2233
|
+
_ascii_is_upper = register_jitable(
|
|
2234
|
+
_is_upper(_Py_ISLOWER, _Py_ISUPPER, _always_false)
|
|
2235
|
+
)
|
|
2236
|
+
_unicode_is_upper = register_jitable(
|
|
2237
|
+
_is_upper(
|
|
2238
|
+
_PyUnicode_IsLowercase, _PyUnicode_IsUppercase, _PyUnicode_IsTitlecase
|
|
2239
|
+
)
|
|
2240
|
+
)
|
|
2241
|
+
|
|
2242
|
+
|
|
2243
|
+
@overload_method(types.UnicodeType, "isupper")
|
|
2244
|
+
def unicode_isupper(a):
|
|
2245
|
+
"""
|
|
2246
|
+
Implements .isupper()
|
|
2247
|
+
"""
|
|
2248
|
+
|
|
2249
|
+
def impl(a):
|
|
2250
|
+
if a._is_ascii:
|
|
2251
|
+
return _ascii_is_upper(a)
|
|
2252
|
+
else:
|
|
2253
|
+
return _unicode_is_upper(a)
|
|
2254
|
+
|
|
2255
|
+
return impl
|
|
2256
|
+
|
|
2257
|
+
|
|
2258
|
+
@overload_method(types.UnicodeType, "isascii")
|
|
2259
|
+
def unicode_isascii(data):
|
|
2260
|
+
"""Implements UnicodeType.isascii()"""
|
|
2261
|
+
|
|
2262
|
+
def impl(data):
|
|
2263
|
+
return data._is_ascii
|
|
2264
|
+
|
|
2265
|
+
return impl
|
|
2266
|
+
|
|
2267
|
+
|
|
2268
|
+
@overload_method(types.UnicodeType, "istitle")
|
|
2269
|
+
def unicode_istitle(data):
|
|
2270
|
+
"""
|
|
2271
|
+
Implements UnicodeType.istitle()
|
|
2272
|
+
The algorithm is an approximate translation from CPython:
|
|
2273
|
+
https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11829-L11885 # noqa: E501
|
|
2274
|
+
"""
|
|
2275
|
+
|
|
2276
|
+
def impl(data):
|
|
2277
|
+
length = len(data)
|
|
2278
|
+
if length == 1:
|
|
2279
|
+
char = _get_code_point(data, 0)
|
|
2280
|
+
return _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char)
|
|
2281
|
+
|
|
2282
|
+
if length == 0:
|
|
2283
|
+
return False
|
|
2284
|
+
|
|
2285
|
+
cased = False
|
|
2286
|
+
previous_is_cased = False
|
|
2287
|
+
for idx in range(length):
|
|
2288
|
+
char = _get_code_point(data, idx)
|
|
2289
|
+
if _PyUnicode_IsUppercase(char) or _PyUnicode_IsTitlecase(char):
|
|
2290
|
+
if previous_is_cased:
|
|
2291
|
+
return False
|
|
2292
|
+
previous_is_cased = True
|
|
2293
|
+
cased = True
|
|
2294
|
+
elif _PyUnicode_IsLowercase(char):
|
|
2295
|
+
if not previous_is_cased:
|
|
2296
|
+
return False
|
|
2297
|
+
previous_is_cased = True
|
|
2298
|
+
cased = True
|
|
2299
|
+
else:
|
|
2300
|
+
previous_is_cased = False
|
|
2301
|
+
|
|
2302
|
+
return cased
|
|
2303
|
+
|
|
2304
|
+
return impl
|
|
2305
|
+
|
|
2306
|
+
|
|
2307
|
+
@overload_method(types.UnicodeType, "islower")
|
|
2308
|
+
def unicode_islower(data):
|
|
2309
|
+
"""
|
|
2310
|
+
impl is an approximate translation of:
|
|
2311
|
+
https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L11900-L11933 # noqa: E501
|
|
2312
|
+
mixed with:
|
|
2313
|
+
https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/bytes_methods.c#L131-L156 # noqa: E501
|
|
2314
|
+
"""
|
|
2315
|
+
|
|
2316
|
+
def impl(data):
|
|
2317
|
+
length = len(data)
|
|
2318
|
+
if length == 1:
|
|
2319
|
+
return _PyUnicode_IsLowercase(_get_code_point(data, 0))
|
|
2320
|
+
if length == 0:
|
|
2321
|
+
return False
|
|
2322
|
+
|
|
2323
|
+
cased = False
|
|
2324
|
+
for idx in range(length):
|
|
2325
|
+
cp = _get_code_point(data, idx)
|
|
2326
|
+
if _PyUnicode_IsUppercase(cp) or _PyUnicode_IsTitlecase(cp):
|
|
2327
|
+
return False
|
|
2328
|
+
elif not cased and _PyUnicode_IsLowercase(cp):
|
|
2329
|
+
cased = True
|
|
2330
|
+
return cased
|
|
2331
|
+
|
|
2332
|
+
return impl
|
|
2333
|
+
|
|
2334
|
+
|
|
2335
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12126-L12161 # noqa: E501
|
|
2336
|
+
@overload_method(types.UnicodeType, "isidentifier")
|
|
2337
|
+
def unicode_isidentifier(data):
|
|
2338
|
+
"""Implements UnicodeType.isidentifier()"""
|
|
2339
|
+
|
|
2340
|
+
def impl(data):
|
|
2341
|
+
length = len(data)
|
|
2342
|
+
if length == 0:
|
|
2343
|
+
return False
|
|
2344
|
+
|
|
2345
|
+
first_cp = _get_code_point(data, 0)
|
|
2346
|
+
if not _PyUnicode_IsXidStart(first_cp) and first_cp != 0x5F:
|
|
2347
|
+
return False
|
|
2348
|
+
|
|
2349
|
+
for i in range(1, length):
|
|
2350
|
+
code_point = _get_code_point(data, i)
|
|
2351
|
+
if not _PyUnicode_IsXidContinue(code_point):
|
|
2352
|
+
return False
|
|
2353
|
+
|
|
2354
|
+
return True
|
|
2355
|
+
|
|
2356
|
+
return impl
|
|
2357
|
+
|
|
2358
|
+
|
|
2359
|
+
# generator for simple unicode "isX" methods
|
|
2360
|
+
def gen_isX(_PyUnicode_IS_func, empty_is_false=True):
|
|
2361
|
+
def unicode_isX(data):
|
|
2362
|
+
def impl(data):
|
|
2363
|
+
length = len(data)
|
|
2364
|
+
if length == 1:
|
|
2365
|
+
return _PyUnicode_IS_func(_get_code_point(data, 0))
|
|
2366
|
+
|
|
2367
|
+
if empty_is_false and length == 0:
|
|
2368
|
+
return False
|
|
2369
|
+
|
|
2370
|
+
for i in range(length):
|
|
2371
|
+
code_point = _get_code_point(data, i)
|
|
2372
|
+
if not _PyUnicode_IS_func(code_point):
|
|
2373
|
+
return False
|
|
2374
|
+
|
|
2375
|
+
return True
|
|
2376
|
+
|
|
2377
|
+
return impl
|
|
2378
|
+
|
|
2379
|
+
return unicode_isX
|
|
2380
|
+
|
|
2381
|
+
|
|
2382
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L11896-L11925 # noqa: E501
|
|
2383
|
+
overload_method(types.UnicodeType, "isspace")(gen_isX(_PyUnicode_IsSpace))
|
|
2384
|
+
|
|
2385
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12096-L12124 # noqa: E501
|
|
2386
|
+
overload_method(types.UnicodeType, "isnumeric")(gen_isX(_PyUnicode_IsNumeric))
|
|
2387
|
+
|
|
2388
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12056-L12085 # noqa: E501
|
|
2389
|
+
overload_method(types.UnicodeType, "isdigit")(gen_isX(_PyUnicode_IsDigit))
|
|
2390
|
+
|
|
2391
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12017-L12045 # noqa: E501
|
|
2392
|
+
overload_method(types.UnicodeType, "isdecimal")(
|
|
2393
|
+
gen_isX(_PyUnicode_IsDecimalDigit)
|
|
2394
|
+
)
|
|
2395
|
+
|
|
2396
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L12188-L12213 # noqa: E501
|
|
2397
|
+
overload_method(types.UnicodeType, "isprintable")(
|
|
2398
|
+
gen_isX(_PyUnicode_IsPrintable, False)
|
|
2399
|
+
)
|
|
2400
|
+
|
|
2401
|
+
# ------------------------------------------------------------------------------
|
|
2402
|
+
# String methods that apply a transformation to the characters themselves
|
|
2403
|
+
# ------------------------------------------------------------------------------
|
|
2404
|
+
|
|
2405
|
+
|
|
2406
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908 # noqa: E501
|
|
2407
|
+
def case_operation(ascii_func, unicode_func):
|
|
2408
|
+
"""Generate common case operation performer."""
|
|
2409
|
+
|
|
2410
|
+
def impl(data):
|
|
2411
|
+
length = len(data)
|
|
2412
|
+
if length == 0:
|
|
2413
|
+
return _empty_string(data._kind, length, data._is_ascii)
|
|
2414
|
+
|
|
2415
|
+
if data._is_ascii:
|
|
2416
|
+
res = _empty_string(data._kind, length, 1)
|
|
2417
|
+
ascii_func(data, res)
|
|
2418
|
+
return res
|
|
2419
|
+
|
|
2420
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9863-L9908 # noqa: E501
|
|
2421
|
+
tmp = _empty_string(PY_UNICODE_4BYTE_KIND, 3 * length, data._is_ascii)
|
|
2422
|
+
# maxchar should be inside of a list to be pass as argument by reference
|
|
2423
|
+
maxchars = [0]
|
|
2424
|
+
newlength = unicode_func(data, length, tmp, maxchars)
|
|
2425
|
+
maxchar = maxchars[0]
|
|
2426
|
+
newkind = _codepoint_to_kind(maxchar)
|
|
2427
|
+
res = _empty_string(newkind, newlength, _codepoint_is_ascii(maxchar))
|
|
2428
|
+
for i in range(newlength):
|
|
2429
|
+
_set_code_point(res, i, _get_code_point(tmp, i))
|
|
2430
|
+
|
|
2431
|
+
return res
|
|
2432
|
+
|
|
2433
|
+
return impl
|
|
2434
|
+
|
|
2435
|
+
|
|
2436
|
+
# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9856-L9883 # noqa: E501
|
|
2437
|
+
@register_jitable
|
|
2438
|
+
def _handle_capital_sigma(data, length, idx):
|
|
2439
|
+
"""This is a translation of the function that handles the capital sigma."""
|
|
2440
|
+
c = 0
|
|
2441
|
+
j = idx - 1
|
|
2442
|
+
while j >= 0:
|
|
2443
|
+
c = _get_code_point(data, j)
|
|
2444
|
+
if not _PyUnicode_IsCaseIgnorable(c):
|
|
2445
|
+
break
|
|
2446
|
+
j -= 1
|
|
2447
|
+
final_sigma = j >= 0 and _PyUnicode_IsCased(c)
|
|
2448
|
+
if final_sigma:
|
|
2449
|
+
j = idx + 1
|
|
2450
|
+
while j < length:
|
|
2451
|
+
c = _get_code_point(data, j)
|
|
2452
|
+
if not _PyUnicode_IsCaseIgnorable(c):
|
|
2453
|
+
break
|
|
2454
|
+
j += 1
|
|
2455
|
+
final_sigma = j == length or (not _PyUnicode_IsCased(c))
|
|
2456
|
+
|
|
2457
|
+
return 0x3C2 if final_sigma else 0x3C3
|
|
2458
|
+
|
|
2459
|
+
|
|
2460
|
+
# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9885-L9895 # noqa: E501
|
|
2461
|
+
@register_jitable
|
|
2462
|
+
def _lower_ucs4(code_point, data, length, idx, mapped):
|
|
2463
|
+
"""This is a translation of the function that lowers a character."""
|
|
2464
|
+
if code_point == 0x3A3:
|
|
2465
|
+
mapped[0] = _handle_capital_sigma(data, length, idx)
|
|
2466
|
+
return 1
|
|
2467
|
+
return _PyUnicode_ToLowerFull(code_point, mapped)
|
|
2468
|
+
|
|
2469
|
+
|
|
2470
|
+
# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9946-L9965 # noqa: E501
|
|
2471
|
+
def _gen_unicode_upper_or_lower(lower):
|
|
2472
|
+
def _do_upper_or_lower(data, length, res, maxchars):
|
|
2473
|
+
k = 0
|
|
2474
|
+
for idx in range(length):
|
|
2475
|
+
mapped = np.zeros(3, dtype=_Py_UCS4)
|
|
2476
|
+
code_point = _get_code_point(data, idx)
|
|
2477
|
+
if lower:
|
|
2478
|
+
n_res = _lower_ucs4(code_point, data, length, idx, mapped)
|
|
2479
|
+
else:
|
|
2480
|
+
# might be needed if call _do_upper_or_lower in unicode_upper
|
|
2481
|
+
n_res = _PyUnicode_ToUpperFull(code_point, mapped)
|
|
2482
|
+
for m in mapped[:n_res]:
|
|
2483
|
+
maxchars[0] = max(maxchars[0], m)
|
|
2484
|
+
_set_code_point(res, k, m)
|
|
2485
|
+
k += 1
|
|
2486
|
+
return k
|
|
2487
|
+
|
|
2488
|
+
return _do_upper_or_lower
|
|
2489
|
+
|
|
2490
|
+
|
|
2491
|
+
_unicode_upper = register_jitable(_gen_unicode_upper_or_lower(False))
|
|
2492
|
+
_unicode_lower = register_jitable(_gen_unicode_upper_or_lower(True))
|
|
2493
|
+
|
|
2494
|
+
|
|
2495
|
+
def _gen_ascii_upper_or_lower(func):
|
|
2496
|
+
def _ascii_upper_or_lower(data, res):
|
|
2497
|
+
for idx in range(len(data)):
|
|
2498
|
+
code_point = _get_code_point(data, idx)
|
|
2499
|
+
_set_code_point(res, idx, func(code_point))
|
|
2500
|
+
|
|
2501
|
+
return _ascii_upper_or_lower
|
|
2502
|
+
|
|
2503
|
+
|
|
2504
|
+
_ascii_upper = register_jitable(_gen_ascii_upper_or_lower(_Py_TOUPPER))
|
|
2505
|
+
_ascii_lower = register_jitable(_gen_ascii_upper_or_lower(_Py_TOLOWER))
|
|
2506
|
+
|
|
2507
|
+
|
|
2508
|
+
@overload_method(types.UnicodeType, "lower")
|
|
2509
|
+
def unicode_lower(data):
|
|
2510
|
+
"""Implements .lower()"""
|
|
2511
|
+
return case_operation(_ascii_lower, _unicode_lower)
|
|
2512
|
+
|
|
2513
|
+
|
|
2514
|
+
@overload_method(types.UnicodeType, "upper")
|
|
2515
|
+
def unicode_upper(data):
|
|
2516
|
+
"""Implements .upper()"""
|
|
2517
|
+
return case_operation(_ascii_upper, _unicode_upper)
|
|
2518
|
+
|
|
2519
|
+
|
|
2520
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9819-L9834 # noqa: E501
|
|
2521
|
+
@register_jitable
|
|
2522
|
+
def _unicode_casefold(data, length, res, maxchars):
|
|
2523
|
+
k = 0
|
|
2524
|
+
mapped = np.zeros(3, dtype=_Py_UCS4)
|
|
2525
|
+
for idx in range(length):
|
|
2526
|
+
mapped.fill(0)
|
|
2527
|
+
code_point = _get_code_point(data, idx)
|
|
2528
|
+
n_res = _PyUnicode_ToFoldedFull(code_point, mapped)
|
|
2529
|
+
for m in mapped[:n_res]:
|
|
2530
|
+
maxchar = maxchars[0]
|
|
2531
|
+
maxchars[0] = max(maxchar, m)
|
|
2532
|
+
_set_code_point(res, k, m)
|
|
2533
|
+
k += 1
|
|
2534
|
+
|
|
2535
|
+
return k
|
|
2536
|
+
|
|
2537
|
+
|
|
2538
|
+
@register_jitable
|
|
2539
|
+
def _ascii_casefold(data, res):
|
|
2540
|
+
for idx in range(len(data)):
|
|
2541
|
+
code_point = _get_code_point(data, idx)
|
|
2542
|
+
_set_code_point(res, idx, _Py_TOLOWER(code_point))
|
|
2543
|
+
|
|
2544
|
+
|
|
2545
|
+
@overload_method(types.UnicodeType, "casefold")
|
|
2546
|
+
def unicode_casefold(data):
|
|
2547
|
+
"""Implements str.casefold()"""
|
|
2548
|
+
return case_operation(_ascii_casefold, _unicode_casefold)
|
|
2549
|
+
|
|
2550
|
+
|
|
2551
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9737-L9759 # noqa: E501
|
|
2552
|
+
@register_jitable
|
|
2553
|
+
def _unicode_capitalize(data, length, res, maxchars):
|
|
2554
|
+
k = 0
|
|
2555
|
+
maxchar = 0
|
|
2556
|
+
mapped = np.zeros(3, dtype=_Py_UCS4)
|
|
2557
|
+
code_point = _get_code_point(data, 0)
|
|
2558
|
+
|
|
2559
|
+
n_res = _PyUnicode_ToTitleFull(code_point, mapped)
|
|
2560
|
+
|
|
2561
|
+
for m in mapped[:n_res]:
|
|
2562
|
+
maxchar = max(maxchar, m)
|
|
2563
|
+
_set_code_point(res, k, m)
|
|
2564
|
+
k += 1
|
|
2565
|
+
for idx in range(1, length):
|
|
2566
|
+
mapped.fill(0)
|
|
2567
|
+
code_point = _get_code_point(data, idx)
|
|
2568
|
+
n_res = _lower_ucs4(code_point, data, length, idx, mapped)
|
|
2569
|
+
for m in mapped[:n_res]:
|
|
2570
|
+
maxchar = max(maxchar, m)
|
|
2571
|
+
_set_code_point(res, k, m)
|
|
2572
|
+
k += 1
|
|
2573
|
+
maxchars[0] = maxchar
|
|
2574
|
+
return k
|
|
2575
|
+
|
|
2576
|
+
|
|
2577
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L361-L382 # noqa: E501
|
|
2578
|
+
@register_jitable
|
|
2579
|
+
def _ascii_capitalize(data, res):
|
|
2580
|
+
code_point = _get_code_point(data, 0)
|
|
2581
|
+
_set_code_point(res, 0, _Py_TOUPPER(code_point))
|
|
2582
|
+
for idx in range(1, len(data)):
|
|
2583
|
+
code_point = _get_code_point(data, idx)
|
|
2584
|
+
_set_code_point(res, idx, _Py_TOLOWER(code_point))
|
|
2585
|
+
|
|
2586
|
+
|
|
2587
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L10765-L10774 # noqa: E501
|
|
2588
|
+
@overload_method(types.UnicodeType, "capitalize")
|
|
2589
|
+
def unicode_capitalize(data):
|
|
2590
|
+
return case_operation(_ascii_capitalize, _unicode_capitalize)
|
|
2591
|
+
|
|
2592
|
+
|
|
2593
|
+
# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L9996-L10021 # noqa: E501
|
|
2594
|
+
@register_jitable
|
|
2595
|
+
def _unicode_title(data, length, res, maxchars):
|
|
2596
|
+
"""This is a translation of the function that titles a unicode string."""
|
|
2597
|
+
k = 0
|
|
2598
|
+
previous_cased = False
|
|
2599
|
+
mapped = np.empty(3, dtype=_Py_UCS4)
|
|
2600
|
+
for idx in range(length):
|
|
2601
|
+
mapped.fill(0)
|
|
2602
|
+
code_point = _get_code_point(data, idx)
|
|
2603
|
+
if previous_cased:
|
|
2604
|
+
n_res = _lower_ucs4(code_point, data, length, idx, mapped)
|
|
2605
|
+
else:
|
|
2606
|
+
n_res = _PyUnicode_ToTitleFull(_Py_UCS4(code_point), mapped)
|
|
2607
|
+
for m in mapped[:n_res]:
|
|
2608
|
+
(maxchar,) = maxchars
|
|
2609
|
+
maxchars[0] = max(maxchar, m)
|
|
2610
|
+
_set_code_point(res, k, m)
|
|
2611
|
+
k += 1
|
|
2612
|
+
previous_cased = _PyUnicode_IsCased(_Py_UCS4(code_point))
|
|
2613
|
+
return k
|
|
2614
|
+
|
|
2615
|
+
|
|
2616
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L332-L352 # noqa: E501
|
|
2617
|
+
@register_jitable
|
|
2618
|
+
def _ascii_title(data, res):
|
|
2619
|
+
"""Does .title() on an ASCII string"""
|
|
2620
|
+
previous_is_cased = False
|
|
2621
|
+
for idx in range(len(data)):
|
|
2622
|
+
code_point = _get_code_point(data, idx)
|
|
2623
|
+
if _Py_ISLOWER(code_point):
|
|
2624
|
+
if not previous_is_cased:
|
|
2625
|
+
code_point = _Py_TOUPPER(code_point)
|
|
2626
|
+
previous_is_cased = True
|
|
2627
|
+
elif _Py_ISUPPER(code_point):
|
|
2628
|
+
if previous_is_cased:
|
|
2629
|
+
code_point = _Py_TOLOWER(code_point)
|
|
2630
|
+
previous_is_cased = True
|
|
2631
|
+
else:
|
|
2632
|
+
previous_is_cased = False
|
|
2633
|
+
_set_code_point(res, idx, code_point)
|
|
2634
|
+
|
|
2635
|
+
|
|
2636
|
+
# https://github.com/python/cpython/blob/201c8f79450628241574fba940e08107178dc3a5/Objects/unicodeobject.c#L10023-L10069 # noqa: E501
|
|
2637
|
+
@overload_method(types.UnicodeType, "title")
|
|
2638
|
+
def unicode_title(data):
|
|
2639
|
+
"""Implements str.title()"""
|
|
2640
|
+
# https://docs.python.org/3/library/stdtypes.html#str.title
|
|
2641
|
+
return case_operation(_ascii_title, _unicode_title)
|
|
2642
|
+
|
|
2643
|
+
|
|
2644
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/bytes_methods.c#L391-L408 # noqa: E501
|
|
2645
|
+
@register_jitable
|
|
2646
|
+
def _ascii_swapcase(data, res):
|
|
2647
|
+
for idx in range(len(data)):
|
|
2648
|
+
code_point = _get_code_point(data, idx)
|
|
2649
|
+
if _Py_ISUPPER(code_point):
|
|
2650
|
+
code_point = _Py_TOLOWER(code_point)
|
|
2651
|
+
elif _Py_ISLOWER(code_point):
|
|
2652
|
+
code_point = _Py_TOUPPER(code_point)
|
|
2653
|
+
_set_code_point(res, idx, code_point)
|
|
2654
|
+
|
|
2655
|
+
|
|
2656
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L9761-L9784 # noqa: E501
|
|
2657
|
+
@register_jitable
|
|
2658
|
+
def _unicode_swapcase(data, length, res, maxchars):
|
|
2659
|
+
k = 0
|
|
2660
|
+
maxchar = 0
|
|
2661
|
+
mapped = np.empty(3, dtype=_Py_UCS4)
|
|
2662
|
+
for idx in range(length):
|
|
2663
|
+
mapped.fill(0)
|
|
2664
|
+
code_point = _get_code_point(data, idx)
|
|
2665
|
+
if _PyUnicode_IsUppercase(code_point):
|
|
2666
|
+
n_res = _lower_ucs4(code_point, data, length, idx, mapped)
|
|
2667
|
+
elif _PyUnicode_IsLowercase(code_point):
|
|
2668
|
+
n_res = _PyUnicode_ToUpperFull(code_point, mapped)
|
|
2669
|
+
else:
|
|
2670
|
+
n_res = 1
|
|
2671
|
+
mapped[0] = code_point
|
|
2672
|
+
for m in mapped[:n_res]:
|
|
2673
|
+
maxchar = max(maxchar, m)
|
|
2674
|
+
_set_code_point(res, k, m)
|
|
2675
|
+
k += 1
|
|
2676
|
+
maxchars[0] = maxchar
|
|
2677
|
+
return k
|
|
2678
|
+
|
|
2679
|
+
|
|
2680
|
+
@overload_method(types.UnicodeType, "swapcase")
|
|
2681
|
+
def unicode_swapcase(data):
|
|
2682
|
+
return case_operation(_ascii_swapcase, _unicode_swapcase)
|
|
2683
|
+
|
|
2684
|
+
|
|
2685
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Python/bltinmodule.c#L1781-L1824 # noqa: E501
|
|
2686
|
+
@overload(ord)
|
|
2687
|
+
def ol_ord(c):
|
|
2688
|
+
if isinstance(c, types.UnicodeType):
|
|
2689
|
+
|
|
2690
|
+
def impl(c):
|
|
2691
|
+
lc = len(c)
|
|
2692
|
+
if lc != 1:
|
|
2693
|
+
# CPython does TypeError
|
|
2694
|
+
raise TypeError("ord() expected a character")
|
|
2695
|
+
return _get_code_point(c, 0)
|
|
2696
|
+
|
|
2697
|
+
return impl
|
|
2698
|
+
|
|
2699
|
+
|
|
2700
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L2005-L2028 # noqa: E501
|
|
2701
|
+
# This looks a bit different to the cpython implementation but, with the
|
|
2702
|
+
# exception of a latin1 fast path is logically the same. It finds the "kind" of
|
|
2703
|
+
# the codepoint `ch`, creates a length 1 string of that kind and then injects
|
|
2704
|
+
# the code point into the zero position of that string. Cpython does similar but
|
|
2705
|
+
# branches for each kind (this is encapsulated in Numba's _set_code_point).
|
|
2706
|
+
@register_jitable
|
|
2707
|
+
def _unicode_char(ch):
|
|
2708
|
+
assert ch <= _MAX_UNICODE
|
|
2709
|
+
kind = _codepoint_to_kind(ch)
|
|
2710
|
+
ret = _empty_string(kind, 1, kind == PY_UNICODE_1BYTE_KIND)
|
|
2711
|
+
_set_code_point(ret, 0, ch)
|
|
2712
|
+
return ret
|
|
2713
|
+
|
|
2714
|
+
|
|
2715
|
+
_out_of_range_msg = "chr() arg not in range(0x%hx)" % _MAX_UNICODE
|
|
2716
|
+
|
|
2717
|
+
|
|
2718
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodeobject.c#L3045-L3055 # noqa: E501
|
|
2719
|
+
@register_jitable
|
|
2720
|
+
def _PyUnicode_FromOrdinal(ordinal):
|
|
2721
|
+
if ordinal < 0 or ordinal > _MAX_UNICODE:
|
|
2722
|
+
raise ValueError(_out_of_range_msg)
|
|
2723
|
+
|
|
2724
|
+
return _unicode_char(_Py_UCS4(ordinal))
|
|
2725
|
+
|
|
2726
|
+
|
|
2727
|
+
# https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Python/bltinmodule.c#L715-L720 # noqa: E501
|
|
2728
|
+
@overload(chr)
|
|
2729
|
+
def ol_chr(i):
|
|
2730
|
+
if isinstance(i, types.Integer):
|
|
2731
|
+
|
|
2732
|
+
def impl(i):
|
|
2733
|
+
return _PyUnicode_FromOrdinal(i)
|
|
2734
|
+
|
|
2735
|
+
return impl
|
|
2736
|
+
|
|
2737
|
+
|
|
2738
|
+
@overload_method(types.UnicodeType, "__str__")
|
|
2739
|
+
def unicode_str(s):
|
|
2740
|
+
return lambda s: s
|
|
2741
|
+
|
|
2742
|
+
|
|
2743
|
+
@overload_method(types.UnicodeType, "__repr__")
|
|
2744
|
+
def unicode_repr(s):
|
|
2745
|
+
# Can't use f-string as the impl ends up calling str and then repr, which
|
|
2746
|
+
# then recurses somewhere in imports.
|
|
2747
|
+
return lambda s: "'" + s + "'"
|
|
2748
|
+
|
|
2749
|
+
|
|
2750
|
+
@overload_method(types.Integer, "__str__")
|
|
2751
|
+
def integer_str(n):
|
|
2752
|
+
ten = n(10)
|
|
2753
|
+
|
|
2754
|
+
def impl(n):
|
|
2755
|
+
flag = False
|
|
2756
|
+
if n < 0:
|
|
2757
|
+
n = -n
|
|
2758
|
+
flag = True
|
|
2759
|
+
if n == 0:
|
|
2760
|
+
return "0"
|
|
2761
|
+
length = flag + 1 + int(np.floor(np.log10(n)))
|
|
2762
|
+
kind = PY_UNICODE_1BYTE_KIND
|
|
2763
|
+
char_width = _kind_to_byte_width(kind)
|
|
2764
|
+
s = _malloc_string(kind, char_width, length, True)
|
|
2765
|
+
if flag:
|
|
2766
|
+
_set_code_point(s, 0, ord("-"))
|
|
2767
|
+
idx = length - 1
|
|
2768
|
+
while n > 0:
|
|
2769
|
+
n, digit = divmod(n, ten)
|
|
2770
|
+
c = ord("0") + digit
|
|
2771
|
+
_set_code_point(s, idx, c)
|
|
2772
|
+
idx -= 1
|
|
2773
|
+
return s
|
|
2774
|
+
|
|
2775
|
+
return impl
|
|
2776
|
+
|
|
2777
|
+
|
|
2778
|
+
@overload_method(types.Integer, "__repr__")
|
|
2779
|
+
def integer_repr(n):
|
|
2780
|
+
return lambda n: n.__str__()
|
|
2781
|
+
|
|
2782
|
+
|
|
2783
|
+
@overload_method(types.Boolean, "__repr__")
|
|
2784
|
+
@overload_method(types.Boolean, "__str__")
|
|
2785
|
+
def boolean_str(b):
|
|
2786
|
+
return lambda b: "True" if b else "False"
|
|
2787
|
+
|
|
2788
|
+
|
|
2789
|
+
# ------------------------------------------------------------------------------
|
|
2790
|
+
# iteration
|
|
2791
|
+
# ------------------------------------------------------------------------------
|
|
2792
|
+
|
|
2793
|
+
|
|
2794
|
+
@lower("getiter", types.UnicodeType)
|
|
2795
|
+
def getiter_unicode(context, builder, sig, args):
|
|
2796
|
+
[ty] = sig.args
|
|
2797
|
+
[data] = args
|
|
2798
|
+
|
|
2799
|
+
iterobj = context.make_helper(builder, sig.return_type)
|
|
2800
|
+
|
|
2801
|
+
# set the index to zero
|
|
2802
|
+
zero = context.get_constant(types.uintp, 0)
|
|
2803
|
+
indexptr = cgutils.alloca_once_value(builder, zero)
|
|
2804
|
+
|
|
2805
|
+
iterobj.index = indexptr
|
|
2806
|
+
|
|
2807
|
+
# wire in the unicode type data
|
|
2808
|
+
iterobj.data = data
|
|
2809
|
+
|
|
2810
|
+
# incref as needed
|
|
2811
|
+
if context.enable_nrt:
|
|
2812
|
+
context.nrt.incref(builder, ty, data)
|
|
2813
|
+
|
|
2814
|
+
res = iterobj._getvalue()
|
|
2815
|
+
return impl_ret_new_ref(context, builder, sig.return_type, res)
|
|
2816
|
+
|
|
2817
|
+
|
|
2818
|
+
@lower("iternext", types.UnicodeIteratorType)
|
|
2819
|
+
# a new ref counted object is put into result._yield so set the new_ref to True!
|
|
2820
|
+
@iternext_impl(RefType.NEW)
|
|
2821
|
+
def iternext_unicode(context, builder, sig, args, result):
|
|
2822
|
+
[iterty] = sig.args
|
|
2823
|
+
[iter] = args
|
|
2824
|
+
|
|
2825
|
+
tyctx = context.typing_context
|
|
2826
|
+
|
|
2827
|
+
# get ref to unicode.__getitem__
|
|
2828
|
+
fnty = tyctx.resolve_value_type(operator.getitem)
|
|
2829
|
+
getitem_sig = fnty.get_call_type(
|
|
2830
|
+
tyctx, (types.unicode_type, types.uintp), {}
|
|
2831
|
+
)
|
|
2832
|
+
getitem_impl = context.get_function(fnty, getitem_sig)
|
|
2833
|
+
|
|
2834
|
+
# get ref to unicode.__len__
|
|
2835
|
+
fnty = tyctx.resolve_value_type(len)
|
|
2836
|
+
len_sig = fnty.get_call_type(tyctx, (types.unicode_type,), {})
|
|
2837
|
+
len_impl = context.get_function(fnty, len_sig)
|
|
2838
|
+
|
|
2839
|
+
# grab unicode iterator struct
|
|
2840
|
+
iterobj = context.make_helper(builder, iterty, value=iter)
|
|
2841
|
+
|
|
2842
|
+
# find the length of the string
|
|
2843
|
+
strlen = len_impl(builder, (iterobj.data,))
|
|
2844
|
+
|
|
2845
|
+
# find the current index
|
|
2846
|
+
index = builder.load(iterobj.index)
|
|
2847
|
+
|
|
2848
|
+
# see if the index is in range
|
|
2849
|
+
is_valid = builder.icmp_unsigned("<", index, strlen)
|
|
2850
|
+
result.set_valid(is_valid)
|
|
2851
|
+
|
|
2852
|
+
with builder.if_then(is_valid):
|
|
2853
|
+
# return value at index
|
|
2854
|
+
gotitem = getitem_impl(
|
|
2855
|
+
builder,
|
|
2856
|
+
(
|
|
2857
|
+
iterobj.data,
|
|
2858
|
+
index,
|
|
2859
|
+
),
|
|
2860
|
+
)
|
|
2861
|
+
result.yield_(gotitem)
|
|
2862
|
+
|
|
2863
|
+
# bump index for next cycle
|
|
2864
|
+
nindex = cgutils.increment_index(builder, index)
|
|
2865
|
+
builder.store(nindex, iterobj.index)
|