torch-rb 0.23.1 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +3 -1
- data/codegen/generate_functions.rb +1 -6
- data/codegen/native_functions.yaml +229 -58
- data/ext/torch/ivalue.cpp +4 -4
- data/ext/torch/ruby_arg_parser.cpp +14 -14
- data/ext/torch/ruby_arg_parser.h +11 -11
- data/ext/torch/templates.h +1 -1
- data/ext/torch/tensor.cpp +6 -12
- data/ext/torch/torch.cpp +6 -6
- data/ext/torch/utils.h +5 -5
- data/ext/torch/wrap_outputs.h +29 -22
- data/lib/torch/hub.rb +8 -28
- data/lib/torch/nn/module.rb +1 -1
- data/lib/torch/nn/rnn_base.rb +1 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +4 -4
- metadata +3 -3
|
@@ -418,11 +418,13 @@
|
|
|
418
418
|
variants: function
|
|
419
419
|
dispatch:
|
|
420
420
|
CPU, CUDA, MPS, Meta: view_as_real
|
|
421
|
+
SparseCPU, SparseCUDA, SparseMPS: view_as_real_sparse
|
|
421
422
|
|
|
422
423
|
- func: view_as_complex(Tensor(a) self) -> Tensor(a)
|
|
423
424
|
variants: function
|
|
424
425
|
dispatch:
|
|
425
426
|
CPU, CUDA, MPS, Meta: view_as_complex
|
|
427
|
+
SparseCPU, SparseCUDA, SparseMPS: view_as_complex_sparse
|
|
426
428
|
|
|
427
429
|
- func: sgn(Tensor self) -> Tensor
|
|
428
430
|
variants: function, method
|
|
@@ -478,6 +480,7 @@
|
|
|
478
480
|
CompositeExplicitAutograd: _conj_physical
|
|
479
481
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: conj_physical_sparse_csr
|
|
480
482
|
autogen: _conj_physical.out
|
|
483
|
+
tags: pointwise
|
|
481
484
|
|
|
482
485
|
- func: conj_physical(Tensor self) -> Tensor
|
|
483
486
|
variants: function, method
|
|
@@ -1089,11 +1092,13 @@
|
|
|
1089
1092
|
variants: function
|
|
1090
1093
|
dispatch:
|
|
1091
1094
|
CUDA: _baddbmm_dtype_cuda
|
|
1095
|
+
XPU: _baddbmm_dtype_xpu
|
|
1092
1096
|
|
|
1093
1097
|
- func: baddbmm.dtype_out(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
|
1094
1098
|
variants: function
|
|
1095
1099
|
dispatch:
|
|
1096
1100
|
CUDA: _baddbmm_out_dtype_cuda
|
|
1101
|
+
XPU: _baddbmm_out_dtype_xpu
|
|
1097
1102
|
|
|
1098
1103
|
- func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
|
1099
1104
|
dispatch:
|
|
@@ -1318,7 +1323,7 @@
|
|
|
1318
1323
|
- func: logical_xor.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
|
1319
1324
|
device_check: NoCheck # TensorIterator
|
|
1320
1325
|
dispatch:
|
|
1321
|
-
CPU, CUDA: logical_xor_out
|
|
1326
|
+
CPU, CUDA, MTIA: logical_xor_out
|
|
1322
1327
|
MPS: logical_xor_out_mps
|
|
1323
1328
|
tags: pointwise
|
|
1324
1329
|
|
|
@@ -1403,11 +1408,13 @@
|
|
|
1403
1408
|
variants: function
|
|
1404
1409
|
dispatch:
|
|
1405
1410
|
CUDA: _bmm_dtype_cuda
|
|
1411
|
+
XPU: _bmm_dtype_xpu
|
|
1406
1412
|
|
|
1407
1413
|
- func: bmm.dtype_out(Tensor self, Tensor mat2, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
|
|
1408
1414
|
variants: function
|
|
1409
1415
|
dispatch:
|
|
1410
1416
|
CUDA: _bmm_out_dtype_cuda
|
|
1417
|
+
XPU: _bmm_out_dtype_xpu
|
|
1411
1418
|
|
|
1412
1419
|
- func: broadcast_tensors(Tensor[] tensors) -> Tensor[]
|
|
1413
1420
|
device_check: NoCheck
|
|
@@ -2311,6 +2318,7 @@
|
|
|
2311
2318
|
dispatch:
|
|
2312
2319
|
CPU: vdot
|
|
2313
2320
|
CUDA: vdot_cuda
|
|
2321
|
+
MPS: vdot_mps
|
|
2314
2322
|
|
|
2315
2323
|
- func: vdot.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
|
2316
2324
|
dispatch:
|
|
@@ -2728,8 +2736,7 @@
|
|
|
2728
2736
|
device_check: NoCheck # TensorIterator
|
|
2729
2737
|
variants: function, method
|
|
2730
2738
|
dispatch:
|
|
2731
|
-
CPU, CUDA: fill_
|
|
2732
|
-
MPS: fill_scalar_mps
|
|
2739
|
+
CPU, CUDA, MPS: fill_
|
|
2733
2740
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
|
2734
2741
|
Meta: fill_meta_
|
|
2735
2742
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: fill_sparse_csr_
|
|
@@ -2740,8 +2747,7 @@
|
|
|
2740
2747
|
device_check: NoCheck # TensorIterator
|
|
2741
2748
|
variants: function, method
|
|
2742
2749
|
dispatch:
|
|
2743
|
-
CPU, CUDA: fill_
|
|
2744
|
-
MPS: fill_tensor_mps_
|
|
2750
|
+
CPU, CUDA, MPS: fill_
|
|
2745
2751
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
|
2746
2752
|
Meta: fill_meta_
|
|
2747
2753
|
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: fill_nested_
|
|
@@ -2870,7 +2876,7 @@
|
|
|
2870
2876
|
structured: True
|
|
2871
2877
|
structured_inherits: TensorIteratorBase
|
|
2872
2878
|
dispatch:
|
|
2873
|
-
CPU, CUDA: gcd_out
|
|
2879
|
+
CPU, CUDA, MPS: gcd_out
|
|
2874
2880
|
tags: pointwise
|
|
2875
2881
|
|
|
2876
2882
|
- func: gcd(Tensor self, Tensor other) -> Tensor
|
|
@@ -2941,6 +2947,8 @@
|
|
|
2941
2947
|
autogen: _grid_sampler_2d_cpu_fallback.out
|
|
2942
2948
|
|
|
2943
2949
|
- func: _grid_sampler_2d_cpu_fallback_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor)
|
|
2950
|
+
dispatch:
|
|
2951
|
+
CompositeExplicitAutograd: _grid_sampler_2d_cpu_fallback_backward
|
|
2944
2952
|
|
|
2945
2953
|
- func: grid_sampler_3d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
|
|
2946
2954
|
dispatch:
|
|
@@ -3043,6 +3051,7 @@
|
|
|
3043
3051
|
CPU: _fft_c2r_mkl
|
|
3044
3052
|
CUDA: _fft_c2r_cufft
|
|
3045
3053
|
MPS: _fft_c2r_mps
|
|
3054
|
+
tags: core
|
|
3046
3055
|
|
|
3047
3056
|
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
|
|
3048
3057
|
variants: function
|
|
@@ -3288,6 +3297,42 @@
|
|
|
3288
3297
|
device_guard: False
|
|
3289
3298
|
manual_cpp_binding: True
|
|
3290
3299
|
|
|
3300
|
+
- func: numel(Tensor self) -> int
|
|
3301
|
+
variants: method
|
|
3302
|
+
device_check: NoCheck
|
|
3303
|
+
device_guard: False
|
|
3304
|
+
manual_cpp_binding: True
|
|
3305
|
+
|
|
3306
|
+
- func: dim(Tensor self) -> int
|
|
3307
|
+
variants: method
|
|
3308
|
+
device_check: NoCheck
|
|
3309
|
+
device_guard: False
|
|
3310
|
+
manual_cpp_binding: True
|
|
3311
|
+
|
|
3312
|
+
- func: get_device(Tensor self) -> int
|
|
3313
|
+
variants: method
|
|
3314
|
+
device_check: NoCheck
|
|
3315
|
+
device_guard: False
|
|
3316
|
+
manual_cpp_binding: True
|
|
3317
|
+
|
|
3318
|
+
- func: storage_offset(Tensor self) -> int
|
|
3319
|
+
variants: method
|
|
3320
|
+
device_check: NoCheck
|
|
3321
|
+
device_guard: False
|
|
3322
|
+
manual_cpp_binding: True
|
|
3323
|
+
|
|
3324
|
+
- func: is_contiguous(Tensor self) -> bool
|
|
3325
|
+
variants: method
|
|
3326
|
+
device_check: NoCheck
|
|
3327
|
+
device_guard: False
|
|
3328
|
+
manual_cpp_binding: True
|
|
3329
|
+
|
|
3330
|
+
- func: is_contiguous.memory_format(Tensor self, MemoryFormat memory_format) -> bool
|
|
3331
|
+
variants: method
|
|
3332
|
+
device_check: NoCheck
|
|
3333
|
+
device_guard: False
|
|
3334
|
+
manual_cpp_binding: True
|
|
3335
|
+
|
|
3291
3336
|
- func: kl_div(Tensor self, Tensor target, int reduction=Mean, *, bool log_target=False) -> Tensor
|
|
3292
3337
|
|
|
3293
3338
|
- func: kron(Tensor self, Tensor other) -> Tensor
|
|
@@ -3342,11 +3387,13 @@
|
|
|
3342
3387
|
dispatch:
|
|
3343
3388
|
CUDA: _fused_rms_norm_cuda
|
|
3344
3389
|
MPS: _fused_rms_norm_mps
|
|
3390
|
+
XPU: _fused_rms_norm_xpu
|
|
3345
3391
|
CompositeImplicitAutograd: rms_norm_composite
|
|
3346
3392
|
|
|
3347
3393
|
- func: _fused_rms_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor rstd, Tensor? weight, bool[2] output_mask) -> (Tensor, Tensor)
|
|
3348
3394
|
dispatch:
|
|
3349
3395
|
CUDA: _fused_rms_norm_backward_cuda
|
|
3396
|
+
XPU: _fused_rms_norm_backward_xpu
|
|
3350
3397
|
|
|
3351
3398
|
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
|
|
3352
3399
|
variants: function, method
|
|
@@ -3476,13 +3523,20 @@
|
|
|
3476
3523
|
|
|
3477
3524
|
- func: ldexp.Tensor(Tensor self, Tensor other) -> Tensor
|
|
3478
3525
|
variants: function, method
|
|
3526
|
+
tags: pointwise
|
|
3527
|
+
dispatch:
|
|
3528
|
+
CompositeExplicitAutograd: ldexp
|
|
3479
3529
|
|
|
3480
3530
|
- func: ldexp_(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
|
3481
3531
|
variants: function, method
|
|
3482
3532
|
tags: pointwise
|
|
3533
|
+
dispatch:
|
|
3534
|
+
CompositeExplicitAutograd: ldexp_
|
|
3483
3535
|
|
|
3484
3536
|
- func: ldexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
|
3485
3537
|
tags: pointwise
|
|
3538
|
+
dispatch:
|
|
3539
|
+
CompositeExplicitAutograd: ldexp_out
|
|
3486
3540
|
|
|
3487
3541
|
- func: linspace(Scalar start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
|
3488
3542
|
dispatch:
|
|
@@ -3676,8 +3730,7 @@
|
|
|
3676
3730
|
structured_inherits: TensorIteratorBase
|
|
3677
3731
|
variants: function
|
|
3678
3732
|
dispatch:
|
|
3679
|
-
CPU, CUDA: xlogy_out
|
|
3680
|
-
MPS: xlogy_out_mps
|
|
3733
|
+
CPU, CUDA, MPS: xlogy_out
|
|
3681
3734
|
tags: pointwise
|
|
3682
3735
|
|
|
3683
3736
|
- func: xlogy.OutScalar_Self(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
|
@@ -3871,7 +3924,7 @@
|
|
|
3871
3924
|
device_check: NoCheck # TensorIterator
|
|
3872
3925
|
structured: True
|
|
3873
3926
|
dispatch:
|
|
3874
|
-
CPU, CUDA
|
|
3927
|
+
CPU, CUDA: aminmax_out
|
|
3875
3928
|
MPS: aminmax_out_mps
|
|
3876
3929
|
tags: reduction
|
|
3877
3930
|
|
|
@@ -3926,7 +3979,7 @@
|
|
|
3926
3979
|
- func: amax.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
|
3927
3980
|
structured: True
|
|
3928
3981
|
dispatch:
|
|
3929
|
-
CPU, CUDA
|
|
3982
|
+
CPU, CUDA: amax_out
|
|
3930
3983
|
MPS: amax_out_mps
|
|
3931
3984
|
tags: reduction
|
|
3932
3985
|
|
|
@@ -4115,7 +4168,7 @@
|
|
|
4115
4168
|
- func: amin.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
|
4116
4169
|
structured: True
|
|
4117
4170
|
dispatch:
|
|
4118
|
-
CPU, CUDA
|
|
4171
|
+
CPU, CUDA: amin_out
|
|
4119
4172
|
MPS: amin_out_mps
|
|
4120
4173
|
tags: reduction
|
|
4121
4174
|
|
|
@@ -4193,6 +4246,27 @@
|
|
|
4193
4246
|
CUDA: miopen_rnn_backward
|
|
4194
4247
|
autogen: miopen_rnn_backward.out
|
|
4195
4248
|
|
|
4249
|
+
- func: _use_miopen_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank) -> bool
|
|
4250
|
+
device_check: NoCheck # Tensor arguments allowed to be on different devices, see also miopen_ctc_loss
|
|
4251
|
+
dispatch:
|
|
4252
|
+
CUDA: _use_miopen_ctc_loss
|
|
4253
|
+
|
|
4254
|
+
- func: _use_miopen_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank) -> bool
|
|
4255
|
+
device_check: NoCheck # Tensor arguments allowed to be on different devices, see also miopen_ctc_loss
|
|
4256
|
+
dispatch:
|
|
4257
|
+
CUDA: _use_miopen_ctc_loss_tensor
|
|
4258
|
+
|
|
4259
|
+
- func: miopen_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank, bool deterministic, bool zero_infinity) -> (Tensor, Tensor)
|
|
4260
|
+
device_check: NoCheck # log_probs is expected to be on CUDA while targets is expected to be on CPU
|
|
4261
|
+
dispatch:
|
|
4262
|
+
CUDA: miopen_ctc_loss
|
|
4263
|
+
autogen: miopen_ctc_loss.out
|
|
4264
|
+
|
|
4265
|
+
- func: miopen_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank, bool deterministic, bool zero_infinity) -> (Tensor, Tensor)
|
|
4266
|
+
device_check: NoCheck # log_probs is expected to be on CUDA while targets is expected to be on CPU
|
|
4267
|
+
dispatch:
|
|
4268
|
+
CUDA: miopen_ctc_loss_tensor
|
|
4269
|
+
|
|
4196
4270
|
- func: mm(Tensor self, Tensor mat2) -> Tensor
|
|
4197
4271
|
structured_delegate: mm.out
|
|
4198
4272
|
variants: function, method
|
|
@@ -4215,10 +4289,12 @@
|
|
|
4215
4289
|
- func: mm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
|
|
4216
4290
|
dispatch:
|
|
4217
4291
|
CUDA: _mm_dtype_cuda
|
|
4292
|
+
XPU: _mm_dtype_xpu
|
|
4218
4293
|
|
|
4219
4294
|
- func: mm.dtype_out(Tensor self, Tensor mat2, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
|
|
4220
4295
|
dispatch:
|
|
4221
4296
|
CUDA: _mm_dtype_out_cuda
|
|
4297
|
+
XPU: _mm_dtype_out_xpu
|
|
4222
4298
|
|
|
4223
4299
|
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
|
|
4224
4300
|
dispatch:
|
|
@@ -4381,7 +4457,7 @@
|
|
|
4381
4457
|
|
|
4382
4458
|
- func: mvlgamma.out(Tensor self, int p, *, Tensor(a!) out) -> Tensor(a!)
|
|
4383
4459
|
dispatch:
|
|
4384
|
-
CPU, CUDA: mvlgamma_out
|
|
4460
|
+
CPU, CUDA, MPS: mvlgamma_out
|
|
4385
4461
|
tags: pointwise
|
|
4386
4462
|
|
|
4387
4463
|
- func: mvlgamma(Tensor self, int p) -> Tensor
|
|
@@ -4993,8 +5069,7 @@
|
|
|
4993
5069
|
structured: True
|
|
4994
5070
|
structured_inherits: TensorIteratorBase
|
|
4995
5071
|
dispatch:
|
|
4996
|
-
CPU, CUDA, MTIA: reciprocal_out
|
|
4997
|
-
MPS: reciprocal_out_mps
|
|
5072
|
+
CPU, CUDA, MPS, MTIA: reciprocal_out
|
|
4998
5073
|
tags: pointwise
|
|
4999
5074
|
|
|
5000
5075
|
- func: neg(Tensor self) -> Tensor
|
|
@@ -5344,6 +5419,7 @@
|
|
|
5344
5419
|
|
|
5345
5420
|
- func: selu_(Tensor(a!) self) -> Tensor(a!)
|
|
5346
5421
|
device_check: NoCheck # TensorIterator
|
|
5422
|
+
tags: pointwise
|
|
5347
5423
|
|
|
5348
5424
|
- func: celu(Tensor self, Scalar alpha=1.0) -> Tensor
|
|
5349
5425
|
device_check: NoCheck # TensorIterator
|
|
@@ -5356,6 +5432,7 @@
|
|
|
5356
5432
|
dispatch:
|
|
5357
5433
|
CompositeExplicitAutograd: celu_
|
|
5358
5434
|
autogen: celu.out
|
|
5435
|
+
tags: pointwise
|
|
5359
5436
|
|
|
5360
5437
|
- func: silu(Tensor self) -> Tensor
|
|
5361
5438
|
structured_delegate: silu.out
|
|
@@ -5376,8 +5453,7 @@
|
|
|
5376
5453
|
structured_inherits: TensorIteratorBase
|
|
5377
5454
|
python_module: nn
|
|
5378
5455
|
dispatch:
|
|
5379
|
-
CPU, CUDA, MTIA: silu_out
|
|
5380
|
-
MPS: silu_out_mps
|
|
5456
|
+
CPU, CUDA, MPS, MTIA: silu_out
|
|
5381
5457
|
tags: pointwise
|
|
5382
5458
|
|
|
5383
5459
|
- func: silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
|
|
@@ -5385,8 +5461,7 @@
|
|
|
5385
5461
|
structured_inherits: TensorIteratorBase
|
|
5386
5462
|
python_module: nn
|
|
5387
5463
|
dispatch:
|
|
5388
|
-
CPU, CUDA: silu_backward_out
|
|
5389
|
-
MPS: silu_backward_out_mps
|
|
5464
|
+
CPU, CUDA, MPS: silu_backward_out
|
|
5390
5465
|
tags: pointwise
|
|
5391
5466
|
|
|
5392
5467
|
- func: silu_backward(Tensor grad_output, Tensor self) -> Tensor
|
|
@@ -6532,6 +6607,7 @@
|
|
|
6532
6607
|
dispatch:
|
|
6533
6608
|
CPU: _unique_cpu
|
|
6534
6609
|
CUDA: _unique_cuda
|
|
6610
|
+
MPS: _unique_mps
|
|
6535
6611
|
autogen: _unique.out
|
|
6536
6612
|
|
|
6537
6613
|
- func: unique_dim(Tensor self, int dim, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)
|
|
@@ -6618,7 +6694,6 @@
|
|
|
6618
6694
|
dispatch:
|
|
6619
6695
|
CPU, CUDA: var
|
|
6620
6696
|
MPS: var_mps
|
|
6621
|
-
MTIA: var_mtia
|
|
6622
6697
|
tags: [core, reduction]
|
|
6623
6698
|
|
|
6624
6699
|
- func: var.out(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
|
@@ -6780,6 +6855,7 @@
|
|
|
6780
6855
|
dispatch:
|
|
6781
6856
|
CPU: _standard_gamma_grad_cpu
|
|
6782
6857
|
CUDA: _standard_gamma_grad_cuda
|
|
6858
|
+
MPS: _standard_gamma_grad_mps
|
|
6783
6859
|
autogen: _standard_gamma_grad.out
|
|
6784
6860
|
|
|
6785
6861
|
- func: _standard_gamma(Tensor self, Generator? generator=None) -> Tensor
|
|
@@ -6787,9 +6863,32 @@
|
|
|
6787
6863
|
dispatch:
|
|
6788
6864
|
CPU: _s_gamma_cpu
|
|
6789
6865
|
CUDA: _s_gamma_cuda
|
|
6866
|
+
MPS: _s_gamma_mps
|
|
6790
6867
|
tags: nondeterministic_seeded
|
|
6791
6868
|
autogen: _standard_gamma.out
|
|
6792
6869
|
|
|
6870
|
+
- func: _philox_key_split(Tensor key, int num_splits) -> Tensor
|
|
6871
|
+
variants: function
|
|
6872
|
+
dispatch:
|
|
6873
|
+
CUDA: _philox_key_split_cuda
|
|
6874
|
+
|
|
6875
|
+
- func: _philox_key_fold_in(Tensor key, int data) -> Tensor
|
|
6876
|
+
variants: function
|
|
6877
|
+
dispatch:
|
|
6878
|
+
CUDA: _philox_key_fold_in_cuda
|
|
6879
|
+
|
|
6880
|
+
- func: _philox_normal_(Tensor(a!) self, Tensor key, float mean=0, float std=1) -> Tensor(a!)
|
|
6881
|
+
variants: function, method
|
|
6882
|
+
dispatch:
|
|
6883
|
+
CUDA: _philox_normal_cuda_
|
|
6884
|
+
autogen: _philox_normal, _philox_normal.out
|
|
6885
|
+
|
|
6886
|
+
- func: _philox_uniform_(Tensor(a!) self, Tensor key, float low=0, float high=1) -> Tensor(a!)
|
|
6887
|
+
variants: function, method
|
|
6888
|
+
dispatch:
|
|
6889
|
+
CUDA: _philox_uniform_cuda_
|
|
6890
|
+
autogen: _philox_uniform, _philox_uniform.out
|
|
6891
|
+
|
|
6793
6892
|
- func: _dirichlet_grad(Tensor x, Tensor alpha, Tensor total) -> Tensor
|
|
6794
6893
|
dispatch:
|
|
6795
6894
|
CPU: _dirichlet_grad_cpu
|
|
@@ -6978,16 +7077,14 @@
|
|
|
6978
7077
|
structured: True
|
|
6979
7078
|
device_check: NoCheck # TensorIterator
|
|
6980
7079
|
dispatch:
|
|
6981
|
-
CPU, CUDA: norm_dtype_out
|
|
6982
|
-
MPS: norm_dtype_out_mps
|
|
7080
|
+
CPU, CUDA, MPS: norm_dtype_out
|
|
6983
7081
|
tags: reduction
|
|
6984
7082
|
|
|
6985
7083
|
- func: norm.out(Tensor self, Scalar? p, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
|
6986
7084
|
structured: True
|
|
6987
7085
|
device_check: NoCheck # TensorIterator
|
|
6988
7086
|
dispatch:
|
|
6989
|
-
CPU, CUDA: norm_out
|
|
6990
|
-
MPS: norm_out_mps
|
|
7087
|
+
CPU, CUDA, MPS: norm_out
|
|
6991
7088
|
tags: reduction
|
|
6992
7089
|
|
|
6993
7090
|
# These four redispatch in their implementation, so OK to be CompositeImplicitAutograd
|
|
@@ -7080,8 +7177,7 @@
|
|
|
7080
7177
|
device_check: NoCheck # TensorIterator
|
|
7081
7178
|
variants: method, function
|
|
7082
7179
|
dispatch:
|
|
7083
|
-
CPU, CUDA: zero_
|
|
7084
|
-
MPS: zero_mps_
|
|
7180
|
+
CPU, CUDA, MPS: zero_
|
|
7085
7181
|
Meta: zero_meta_
|
|
7086
7182
|
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: zero_sparse_
|
|
7087
7183
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
|
|
@@ -7242,10 +7338,12 @@
|
|
|
7242
7338
|
- func: addmm.dtype(Tensor self, Tensor mat1, Tensor mat2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
|
7243
7339
|
dispatch:
|
|
7244
7340
|
CUDA: _addmm_dtype_cuda
|
|
7341
|
+
XPU: _addmm_dtype_xpu
|
|
7245
7342
|
|
|
7246
7343
|
- func: addmm.dtype_out(Tensor self, Tensor mat1, Tensor mat2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
|
7247
7344
|
dispatch:
|
|
7248
7345
|
CUDA: _addmm_dtype_out_cuda
|
|
7346
|
+
XPU: _addmm_dtype_out_xpu
|
|
7249
7347
|
|
|
7250
7348
|
- func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
|
|
7251
7349
|
structured_delegate: addmm.out
|
|
@@ -7287,14 +7385,18 @@
|
|
|
7287
7385
|
- func: _scaled_mm_v2(Tensor self, Tensor mat2, Tensor[] scale_a, int[] recipe_a, int[] swizzle_a, Tensor[] scale_b, int[] recipe_b, int[] swizzle_b, Tensor? bias, ScalarType? out_dtype, int[] contraction_dim=[], bool use_fast_accum=False) -> Tensor
|
|
7288
7386
|
variants: function
|
|
7289
7387
|
dispatch:
|
|
7388
|
+
CPU: _scaled_mm_cpu_v2
|
|
7290
7389
|
CUDA: _scaled_mm_cuda_v2
|
|
7291
7390
|
XPU: _scaled_mm_xpu_v2
|
|
7391
|
+
tags: needs_exact_strides
|
|
7292
7392
|
|
|
7293
7393
|
- func: _scaled_mm_v2.out(Tensor self, Tensor mat2, Tensor[] scale_a, int[] recipe_a, int[] swizzle_a, Tensor[] scale_b, int[] recipe_b, int[] swizzle_b, Tensor? bias, ScalarType? out_dtype, int[] contraction_dim=[], bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
|
|
7294
7394
|
variants: function
|
|
7295
7395
|
dispatch:
|
|
7396
|
+
CPU: _scaled_mm_cpu_v2_out
|
|
7296
7397
|
CUDA: _scaled_mm_cuda_v2_out
|
|
7297
7398
|
XPU: _scaled_mm_xpu_v2_out
|
|
7399
|
+
tags: needs_exact_strides
|
|
7298
7400
|
|
|
7299
7401
|
|
|
7300
7402
|
- func: _scaled_grouped_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? offs=None, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
|
|
@@ -8380,6 +8482,7 @@
|
|
|
8380
8482
|
dispatch:
|
|
8381
8483
|
CPU: index_reduce_cpu_out
|
|
8382
8484
|
CUDA: index_reduce_cuda_out
|
|
8485
|
+
MPS: index_reduce_mps_out
|
|
8383
8486
|
|
|
8384
8487
|
- func: index_reduce_(Tensor(a!) self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor(a!)
|
|
8385
8488
|
structured_delegate: index_reduce.out
|
|
@@ -8393,9 +8496,7 @@
|
|
|
8393
8496
|
device_check: NoCheck # TensorIterator
|
|
8394
8497
|
variants: method
|
|
8395
8498
|
dispatch:
|
|
8396
|
-
CPU: index_fill_
|
|
8397
|
-
CUDA: index_fill_
|
|
8398
|
-
MPS: index_fill_mps_
|
|
8499
|
+
CPU, CUDA, MPS: index_fill_
|
|
8399
8500
|
autogen: index_fill.int_Scalar_out
|
|
8400
8501
|
|
|
8401
8502
|
- func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
|
|
@@ -8408,8 +8509,7 @@
|
|
|
8408
8509
|
device_check: NoCheck # TensorIterator
|
|
8409
8510
|
variants: method
|
|
8410
8511
|
dispatch:
|
|
8411
|
-
CPU, CUDA: index_fill_
|
|
8412
|
-
MPS: index_fill_mps_
|
|
8512
|
+
CPU, CUDA, MPS: index_fill_
|
|
8413
8513
|
autogen: index_fill.int_Tensor_out
|
|
8414
8514
|
|
|
8415
8515
|
- func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
|
|
@@ -8755,14 +8855,14 @@
|
|
|
8755
8855
|
device_check: NoCheck # TensorIterator
|
|
8756
8856
|
variants: method, function
|
|
8757
8857
|
dispatch:
|
|
8758
|
-
CPU, CUDA, MPS: __lshift__
|
|
8858
|
+
CPU, CUDA, MPS, MTIA: __lshift__
|
|
8759
8859
|
tags: pointwise
|
|
8760
8860
|
|
|
8761
8861
|
- func: __lshift__.Tensor(Tensor self, Tensor other) -> Tensor
|
|
8762
8862
|
device_check: NoCheck # TensorIterator
|
|
8763
8863
|
variants: method, function
|
|
8764
8864
|
dispatch:
|
|
8765
|
-
CPU, CUDA, MPS: __lshift__
|
|
8865
|
+
CPU, CUDA, MPS, MTIA: __lshift__
|
|
8766
8866
|
tags: pointwise
|
|
8767
8867
|
|
|
8768
8868
|
- func: __ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
|
@@ -8834,14 +8934,14 @@
|
|
|
8834
8934
|
device_check: NoCheck # TensorIterator
|
|
8835
8935
|
variants: method, function
|
|
8836
8936
|
dispatch:
|
|
8837
|
-
CPU, CUDA, MPS: __rshift__
|
|
8937
|
+
CPU, CUDA, MPS, MTIA: __rshift__
|
|
8838
8938
|
tags: pointwise
|
|
8839
8939
|
|
|
8840
8940
|
- func: __rshift__.Tensor(Tensor self, Tensor other) -> Tensor
|
|
8841
8941
|
device_check: NoCheck # TensorIterator
|
|
8842
8942
|
variants: method, function
|
|
8843
8943
|
dispatch:
|
|
8844
|
-
CPU, CUDA, MPS: __rshift__
|
|
8944
|
+
CPU, CUDA, MPS, MTIA: __rshift__
|
|
8845
8945
|
tags: pointwise
|
|
8846
8946
|
|
|
8847
8947
|
- func: __irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
|
@@ -8996,6 +9096,7 @@
|
|
|
8996
9096
|
tags: nondeterministic_seeded
|
|
8997
9097
|
dispatch:
|
|
8998
9098
|
CPU, CUDA: cauchy_
|
|
9099
|
+
MPS: cauchy_mps_
|
|
8999
9100
|
autogen: cauchy, cauchy.out
|
|
9000
9101
|
|
|
9001
9102
|
- func: log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!)
|
|
@@ -9004,6 +9105,7 @@
|
|
|
9004
9105
|
variants: method
|
|
9005
9106
|
dispatch:
|
|
9006
9107
|
CPU, CUDA: log_normal_
|
|
9108
|
+
MPS: log_normal_mps_
|
|
9007
9109
|
autogen: log_normal, log_normal.out
|
|
9008
9110
|
|
|
9009
9111
|
- func: exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!)
|
|
@@ -9021,6 +9123,7 @@
|
|
|
9021
9123
|
variants: method
|
|
9022
9124
|
dispatch:
|
|
9023
9125
|
CPU, CUDA: geometric_
|
|
9126
|
+
MPS: geometric_mps_
|
|
9024
9127
|
|
|
9025
9128
|
# wrappers for TH functions
|
|
9026
9129
|
autogen: geometric, geometric.out
|
|
@@ -9525,12 +9628,14 @@
|
|
|
9525
9628
|
dispatch:
|
|
9526
9629
|
CPU: nonzero_static_out_cpu
|
|
9527
9630
|
CUDA: nonzero_static_out_cuda
|
|
9631
|
+
MPS: nonzero_static_out_mps
|
|
9528
9632
|
|
|
9529
9633
|
- func: nonzero_static(Tensor self, *, SymInt size, int fill_value=-1) -> Tensor
|
|
9530
9634
|
variants: method, function
|
|
9531
9635
|
dispatch:
|
|
9532
9636
|
CPU: nonzero_static_cpu
|
|
9533
9637
|
CUDA: nonzero_static_cuda
|
|
9638
|
+
MPS: nonzero_static_mps
|
|
9534
9639
|
|
|
9535
9640
|
- func: nonzero_numpy(Tensor self) -> Tensor[]
|
|
9536
9641
|
variants: method, function
|
|
@@ -9695,16 +9800,17 @@
|
|
|
9695
9800
|
dispatch:
|
|
9696
9801
|
CPU: _cholesky_solve_helper_cpu
|
|
9697
9802
|
CUDA: _cholesky_solve_helper_cuda
|
|
9803
|
+
MPS: _cholesky_solve_helper_mps
|
|
9698
9804
|
autogen: _cholesky_solve_helper.out
|
|
9699
9805
|
|
|
9700
9806
|
- func: cholesky_inverse(Tensor self, bool upper=False) -> Tensor
|
|
9701
9807
|
variants: method, function
|
|
9702
9808
|
dispatch:
|
|
9703
|
-
CPU, CUDA: cholesky_inverse
|
|
9809
|
+
CPU, CUDA, MPS: cholesky_inverse
|
|
9704
9810
|
|
|
9705
9811
|
- func: cholesky_inverse.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
|
|
9706
9812
|
dispatch:
|
|
9707
|
-
CPU, CUDA: cholesky_inverse_out
|
|
9813
|
+
CPU, CUDA, MPS: cholesky_inverse_out
|
|
9708
9814
|
|
|
9709
9815
|
- func: qr.Q(Tensor self, bool some=True, *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R)
|
|
9710
9816
|
|
|
@@ -9773,8 +9879,7 @@
|
|
|
9773
9879
|
structured: True
|
|
9774
9880
|
structured_inherits: TensorIteratorBase
|
|
9775
9881
|
dispatch:
|
|
9776
|
-
CPU, CUDA: lgamma_out
|
|
9777
|
-
MPS: lgamma_out_mps
|
|
9882
|
+
CPU, CUDA, MPS: lgamma_out
|
|
9778
9883
|
tags: pointwise
|
|
9779
9884
|
|
|
9780
9885
|
- func: lgamma_(Tensor(a!) self) -> Tensor(a!)
|
|
@@ -9794,8 +9899,7 @@
|
|
|
9794
9899
|
structured: True
|
|
9795
9900
|
structured_inherits: TensorIteratorBase
|
|
9796
9901
|
dispatch:
|
|
9797
|
-
CPU, CUDA: digamma_out
|
|
9798
|
-
MPS: digamma_out_mps
|
|
9902
|
+
CPU, CUDA, MPS: digamma_out
|
|
9799
9903
|
tags: pointwise
|
|
9800
9904
|
|
|
9801
9905
|
- func: digamma(Tensor self) -> Tensor
|
|
@@ -9809,8 +9913,7 @@
|
|
|
9809
9913
|
structured: True
|
|
9810
9914
|
structured_inherits: TensorIteratorBase
|
|
9811
9915
|
dispatch:
|
|
9812
|
-
CPU, CUDA: polygamma_out
|
|
9813
|
-
MPS: polygamma_out_mps
|
|
9916
|
+
CPU, CUDA, MPS: polygamma_out
|
|
9814
9917
|
tags: pointwise
|
|
9815
9918
|
|
|
9816
9919
|
- func: polygamma(int n, Tensor self) -> Tensor
|
|
@@ -9931,8 +10034,7 @@
|
|
|
9931
10034
|
structured: True
|
|
9932
10035
|
structured_inherits: TensorIteratorBase
|
|
9933
10036
|
dispatch:
|
|
9934
|
-
CPU, CUDA: atan2_out
|
|
9935
|
-
MPS: atan2_out_mps
|
|
10037
|
+
CPU, CUDA, MPS: atan2_out
|
|
9936
10038
|
tags: [core, pointwise]
|
|
9937
10039
|
|
|
9938
10040
|
- func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
|
@@ -9970,8 +10072,7 @@
|
|
|
9970
10072
|
structured: True
|
|
9971
10073
|
structured_inherits: TensorIteratorBase
|
|
9972
10074
|
dispatch:
|
|
9973
|
-
CPU, CUDA: lerp_Tensor
|
|
9974
|
-
MPS: lerp_Tensor_mps
|
|
10075
|
+
CPU, CUDA, MPS: lerp_Tensor
|
|
9975
10076
|
tags: pointwise
|
|
9976
10077
|
|
|
9977
10078
|
- func: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor
|
|
@@ -10256,8 +10357,7 @@
|
|
|
10256
10357
|
structured_inherits: TensorIteratorBase
|
|
10257
10358
|
device_check: NoCheck # TensorIterator
|
|
10258
10359
|
dispatch:
|
|
10259
|
-
CPU, CUDA, MTIA: maximum_out
|
|
10260
|
-
MPS: maximum_out_mps
|
|
10360
|
+
CPU, CUDA, MTIA, MPS: maximum_out
|
|
10261
10361
|
tags: pointwise
|
|
10262
10362
|
|
|
10263
10363
|
# binary max, alias of maximum
|
|
@@ -10289,8 +10389,7 @@
|
|
|
10289
10389
|
structured_inherits: TensorIteratorBase
|
|
10290
10390
|
device_check: NoCheck # TensorIterator
|
|
10291
10391
|
dispatch:
|
|
10292
|
-
CPU, CUDA, MTIA: minimum_out
|
|
10293
|
-
MPS: minimum_out_mps
|
|
10392
|
+
CPU, CUDA, MTIA, MPS: minimum_out
|
|
10294
10393
|
tags: pointwise
|
|
10295
10394
|
|
|
10296
10395
|
# binary min, alias for minimum
|
|
@@ -10496,9 +10595,8 @@
|
|
|
10496
10595
|
structured: True
|
|
10497
10596
|
structured_inherits: TensorIteratorBase
|
|
10498
10597
|
dispatch:
|
|
10499
|
-
CPU, CUDA: pow_Tensor_Scalar_out
|
|
10598
|
+
CPU, CUDA, MPS: pow_Tensor_Scalar_out
|
|
10500
10599
|
SparseCPU, SparseCUDA, SparseMPS: pow_out_sparse_scalar
|
|
10501
|
-
MPS: pow_tensor_scalar_out_mps
|
|
10502
10600
|
tags: pointwise
|
|
10503
10601
|
|
|
10504
10602
|
- func: pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor
|
|
@@ -11524,6 +11622,15 @@
|
|
|
11524
11622
|
MTIA: foreach_tensor_norm_mtia
|
|
11525
11623
|
autogen: _foreach_norm.Scalar_out
|
|
11526
11624
|
|
|
11625
|
+
# Like _foreach_norm but returns sum(|x|^ord) without the final root
|
|
11626
|
+
- func: _foreach_powsum.Scalar(Tensor[] self, Scalar ord=2, ScalarType? dtype=None) -> Tensor[]
|
|
11627
|
+
device_check: NoCheck
|
|
11628
|
+
variants: function
|
|
11629
|
+
dispatch:
|
|
11630
|
+
CompositeExplicitAutograd: foreach_tensor_powsum_slow
|
|
11631
|
+
CUDA: foreach_tensor_powsum_cuda
|
|
11632
|
+
autogen: _foreach_powsum.Scalar_out
|
|
11633
|
+
|
|
11527
11634
|
- func: _foreach_pow.List(Tensor[] self, Tensor[] exponent) -> Tensor[]
|
|
11528
11635
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
|
11529
11636
|
variants: function
|
|
@@ -11750,6 +11857,14 @@
|
|
|
11750
11857
|
CUDA: foreach_tensor_zero_cuda_
|
|
11751
11858
|
autogen: _foreach_zero, _foreach_zero.out
|
|
11752
11859
|
|
|
11860
|
+
- func: _foreach_clone(Tensor[] self, *, MemoryFormat? memory_format=None) -> Tensor[]
|
|
11861
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
|
11862
|
+
variants: function
|
|
11863
|
+
dispatch:
|
|
11864
|
+
CompositeExplicitAutograd: foreach_tensor_clone_slow
|
|
11865
|
+
CUDA: foreach_tensor_clone_cuda
|
|
11866
|
+
autogen: _foreach_clone.out
|
|
11867
|
+
|
|
11753
11868
|
- func: _foreach_copy_(Tensor(a!)[] self, Tensor[] src, bool non_blocking=False) -> ()
|
|
11754
11869
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
|
11755
11870
|
variants: function
|
|
@@ -12083,6 +12198,7 @@
|
|
|
12083
12198
|
structured_delegate: elu.out
|
|
12084
12199
|
device_check: NoCheck # TensorIterator
|
|
12085
12200
|
python_module: nn
|
|
12201
|
+
tags: pointwise
|
|
12086
12202
|
|
|
12087
12203
|
- func: glu.out(Tensor self, int dim=-1, *, Tensor(a!) out) -> Tensor(a!)
|
|
12088
12204
|
structured: True
|
|
@@ -12144,6 +12260,7 @@
|
|
|
12144
12260
|
structured_delegate: hardsigmoid.out
|
|
12145
12261
|
device_check: NoCheck # TensorIterator
|
|
12146
12262
|
python_module: nn
|
|
12263
|
+
tags: pointwise
|
|
12147
12264
|
|
|
12148
12265
|
- func: hardsigmoid_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
|
|
12149
12266
|
structured: True
|
|
@@ -12189,6 +12306,7 @@
|
|
|
12189
12306
|
dispatch:
|
|
12190
12307
|
CPU, CUDA, MPS: hardtanh_
|
|
12191
12308
|
QuantizedCPU: hardtanh_quantized_cpu_
|
|
12309
|
+
tags: pointwise
|
|
12192
12310
|
|
|
12193
12311
|
- func: hardswish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
|
12194
12312
|
device_check: NoCheck # TensorIterator
|
|
@@ -12229,7 +12347,7 @@
|
|
|
12229
12347
|
python_module: nn
|
|
12230
12348
|
dispatch:
|
|
12231
12349
|
QuantizedCPU: leaky_relu_quantized_cpu
|
|
12232
|
-
tags: core
|
|
12350
|
+
tags: [core, pointwise]
|
|
12233
12351
|
|
|
12234
12352
|
- func: leaky_relu_backward.grad_input(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result, *, Tensor(a!) grad_input) -> Tensor(a!)
|
|
12235
12353
|
structured: True
|
|
@@ -12248,6 +12366,7 @@
|
|
|
12248
12366
|
python_module: nn
|
|
12249
12367
|
dispatch:
|
|
12250
12368
|
QuantizedCPU: leaky_relu_quantized_cpu_
|
|
12369
|
+
tags: pointwise
|
|
12251
12370
|
|
|
12252
12371
|
- func: log_sigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
|
12253
12372
|
device_check: NoCheck # TensorIterator
|
|
@@ -12904,6 +13023,10 @@
|
|
|
12904
13023
|
python_module: nn
|
|
12905
13024
|
autogen: _upsample_bicubic2d_aa.vec_out
|
|
12906
13025
|
|
|
13026
|
+
- func: _upsample_lanczos2d_aa.vec(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
|
|
13027
|
+
python_module: nn
|
|
13028
|
+
autogen: _upsample_lanczos2d_aa.vec_out
|
|
13029
|
+
|
|
12907
13030
|
- func: upsample_nearest1d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor
|
|
12908
13031
|
python_module: nn
|
|
12909
13032
|
autogen: upsample_nearest1d.vec_out
|
|
@@ -13050,6 +13173,26 @@
|
|
|
13050
13173
|
python_module: nn
|
|
13051
13174
|
structured_delegate: _upsample_bicubic2d_aa_backward.grad_input
|
|
13052
13175
|
|
|
13176
|
+
- func: _upsample_lanczos2d_aa.out(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
|
|
13177
|
+
python_module: nn
|
|
13178
|
+
structured: True
|
|
13179
|
+
dispatch:
|
|
13180
|
+
CPU: _upsample_lanczos2d_aa_out_cpu
|
|
13181
|
+
|
|
13182
|
+
- func: _upsample_lanczos2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
|
13183
|
+
python_module: nn
|
|
13184
|
+
structured_delegate: _upsample_lanczos2d_aa.out
|
|
13185
|
+
|
|
13186
|
+
- func: _upsample_lanczos2d_aa_backward.grad_input(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
|
|
13187
|
+
python_module: nn
|
|
13188
|
+
structured: True
|
|
13189
|
+
dispatch:
|
|
13190
|
+
CPU: _upsample_lanczos2d_aa_backward_out_cpu
|
|
13191
|
+
|
|
13192
|
+
- func: _upsample_lanczos2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
|
13193
|
+
python_module: nn
|
|
13194
|
+
structured_delegate: _upsample_lanczos2d_aa_backward.grad_input
|
|
13195
|
+
|
|
13053
13196
|
- func: upsample_trilinear3d.out(Tensor self, SymInt[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
|
|
13054
13197
|
python_module: nn
|
|
13055
13198
|
structured: True
|
|
@@ -13608,7 +13751,7 @@
|
|
|
13608
13751
|
structured: True
|
|
13609
13752
|
structured_inherits: TensorIteratorBase
|
|
13610
13753
|
dispatch:
|
|
13611
|
-
CPU, CUDA: special_erfcx_out
|
|
13754
|
+
CPU, CUDA, MPS: special_erfcx_out
|
|
13612
13755
|
tags: pointwise
|
|
13613
13756
|
|
|
13614
13757
|
- func: special_erfinv(Tensor self) -> Tensor
|
|
@@ -14471,8 +14614,18 @@
|
|
|
14471
14614
|
python_module: linalg
|
|
14472
14615
|
structured: True
|
|
14473
14616
|
dispatch:
|
|
14474
|
-
CPU, CUDA: linalg_vector_norm_out
|
|
14475
|
-
|
|
14617
|
+
CPU, CUDA, MPS: linalg_vector_norm_out
|
|
14618
|
+
tags: reduction
|
|
14619
|
+
|
|
14620
|
+
# Computes sum(|x|^ord) - the "power sum" without the final root.
|
|
14621
|
+
# This is useful for distributed computing where partial power sums
|
|
14622
|
+
# can be reduced across shards before taking the final root.
|
|
14623
|
+
- func: linalg__powsum(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
|
14624
|
+
python_module: linalg
|
|
14625
|
+
variants: function
|
|
14626
|
+
dispatch:
|
|
14627
|
+
CompositeExplicitAutograd: linalg__powsum_slow
|
|
14628
|
+
CPU, CUDA: linalg__powsum
|
|
14476
14629
|
tags: reduction
|
|
14477
14630
|
|
|
14478
14631
|
- func: linalg_matrix_norm(Tensor self, Scalar ord, int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
|
@@ -14622,6 +14775,7 @@
|
|
|
14622
14775
|
structured: True
|
|
14623
14776
|
dispatch:
|
|
14624
14777
|
CPU, CUDA: linalg_qr_out
|
|
14778
|
+
MPS: linalg_qr_out_mps
|
|
14625
14779
|
|
|
14626
14780
|
- func: linalg_matrix_power(Tensor self, int n) -> Tensor
|
|
14627
14781
|
python_module: linalg
|
|
@@ -15122,7 +15276,7 @@
|
|
|
15122
15276
|
variants: function
|
|
15123
15277
|
tags: nondeterministic_seeded
|
|
15124
15278
|
|
|
15125
|
-
- func: _scaled_dot_product_attention_math_for_mps(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
|
|
15279
|
+
- func: _scaled_dot_product_attention_math_for_mps(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)
|
|
15126
15280
|
dispatch:
|
|
15127
15281
|
MPS: _scaled_dot_product_attention_math_mps
|
|
15128
15282
|
tags: nondeterministic_seeded
|
|
@@ -15134,6 +15288,11 @@
|
|
|
15134
15288
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
|
15135
15289
|
tags: nondeterministic_seeded
|
|
15136
15290
|
|
|
15291
|
+
- func: _scaled_dot_product_flash_attention.quantized(Tensor query, Tensor key, Tensor value, Tensor? q_descale, Tensor? k_descale, Tensor? v_descale, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
|
|
15292
|
+
dispatch:
|
|
15293
|
+
CUDA: _scaled_dot_product_flash_attention_cuda_quantized
|
|
15294
|
+
tags: nondeterministic_seeded
|
|
15295
|
+
|
|
15137
15296
|
- func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
|
|
15138
15297
|
dispatch:
|
|
15139
15298
|
CPU: _scaled_dot_product_flash_attention_cpu
|
|
@@ -15189,12 +15348,24 @@
|
|
|
15189
15348
|
NestedTensorCUDA: _scaled_dot_product_cudnn_attention_nestedtensor_backward_cuda
|
|
15190
15349
|
tags: nondeterministic_seeded
|
|
15191
15350
|
|
|
15192
|
-
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
|
|
15351
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None, Tensor? block_table=None, int? num_splits=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
|
|
15193
15352
|
variants: function
|
|
15194
15353
|
dispatch:
|
|
15195
15354
|
CUDA: _flash_attention_forward
|
|
15196
15355
|
tags: nondeterministic_seeded
|
|
15197
15356
|
|
|
15357
|
+
- func: _flash_attention_forward_no_dropout_inplace(Tensor(a!) out, Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None, Tensor? block_table=None, int? num_splits=None) -> Tensor softmax_logsumexp
|
|
15358
|
+
variants: function
|
|
15359
|
+
dispatch:
|
|
15360
|
+
CUDA: _flash_attention_forward_no_dropout_inplace
|
|
15361
|
+
tags: nondeterministic_seeded
|
|
15362
|
+
|
|
15363
|
+
- func: _flash_attention_forward.quantized(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, Tensor? q_descale, Tensor? k_descale, Tensor? v_descale, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
|
|
15364
|
+
variants: function
|
|
15365
|
+
dispatch:
|
|
15366
|
+
CUDA: _flash_attention_forward_quantized
|
|
15367
|
+
tags: nondeterministic_seeded
|
|
15368
|
+
|
|
15198
15369
|
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor rng_state, Tensor unused, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
|
|
15199
15370
|
device_check: NoCheck
|
|
15200
15371
|
variants: function
|