llama_cpp 0.14.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +71 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +9 -0
- data/vendor/tmp/llama.cpp/Makefile +28 -12
- data/vendor/tmp/llama.cpp/ggml-alloc.c +45 -64
- data/vendor/tmp/llama.cpp/ggml-alloc.h +13 -5
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +358 -135
- data/vendor/tmp/llama.cpp/ggml-backend.h +41 -17
- data/vendor/tmp/llama.cpp/ggml-common.h +1830 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +187 -1033
- data/vendor/tmp/llama.cpp/ggml-impl.h +6 -2
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +42 -20
- data/vendor/tmp/llama.cpp/ggml-metal.metal +44 -910
- data/vendor/tmp/llama.cpp/ggml-quants.c +457 -1074
- data/vendor/tmp/llama.cpp/ggml-quants.h +27 -259
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +388 -565
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +6 -39
- data/vendor/tmp/llama.cpp/ggml.c +509 -343
- data/vendor/tmp/llama.cpp/ggml.h +61 -47
- data/vendor/tmp/llama.cpp/llama.cpp +1446 -687
- data/vendor/tmp/llama.cpp/llama.h +25 -11
- data/vendor/tmp/llama.cpp/unicode.cpp +1672 -0
- data/vendor/tmp/llama.cpp/unicode.h +16 -774
- metadata +4 -2
@@ -2,6 +2,15 @@
|
|
2
2
|
#include "ggml.h"
|
3
3
|
#include "ggml-backend-impl.h"
|
4
4
|
|
5
|
+
#if defined(GGML_USE_HIPBLAS)
|
6
|
+
#define GGML_COMMON_DECL_HIP
|
7
|
+
#define GGML_COMMON_IMPL_HIP
|
8
|
+
#else
|
9
|
+
#define GGML_COMMON_DECL_CUDA
|
10
|
+
#define GGML_COMMON_IMPL_CUDA
|
11
|
+
#endif
|
12
|
+
#include "ggml-common.h"
|
13
|
+
|
5
14
|
#include <algorithm>
|
6
15
|
#include <assert.h>
|
7
16
|
#include <atomic>
|
@@ -63,6 +72,7 @@
|
|
63
72
|
#define cudaEventCreateWithFlags hipEventCreateWithFlags
|
64
73
|
#define cudaEventDisableTiming hipEventDisableTiming
|
65
74
|
#define cudaEventRecord hipEventRecord
|
75
|
+
#define cudaEventSynchronize hipEventSynchronize
|
66
76
|
#define cudaEvent_t hipEvent_t
|
67
77
|
#define cudaEventDestroy hipEventDestroy
|
68
78
|
#define cudaFree hipFree
|
@@ -72,6 +82,7 @@
|
|
72
82
|
#define cudaGetDeviceProperties hipGetDeviceProperties
|
73
83
|
#define cudaGetErrorString hipGetErrorString
|
74
84
|
#define cudaGetLastError hipGetLastError
|
85
|
+
#define cudaLaunchHostFunc hipLaunchHostFunc
|
75
86
|
#ifdef GGML_HIP_UMA
|
76
87
|
#define cudaMalloc hipMallocManaged
|
77
88
|
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size)
|
@@ -95,6 +106,7 @@
|
|
95
106
|
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
|
96
107
|
#define cudaStreamFireAndForget hipStreamFireAndForget
|
97
108
|
#define cudaStreamNonBlocking hipStreamNonBlocking
|
109
|
+
#define cudaStreamPerThread hipStreamPerThread
|
98
110
|
#define cudaStreamSynchronize hipStreamSynchronize
|
99
111
|
#define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags)
|
100
112
|
#define cudaStream_t hipStream_t
|
@@ -356,66 +368,6 @@ typedef void (*ggml_cuda_op_flatten_t)(
|
|
356
368
|
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
|
357
369
|
const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream);
|
358
370
|
|
359
|
-
// QK = number of values after dequantization
|
360
|
-
// QR = QK / number of values before dequantization
|
361
|
-
// QI = number of 32 bit integers before dequantization
|
362
|
-
|
363
|
-
#define QK4_0 32
|
364
|
-
#define QR4_0 2
|
365
|
-
#define QI4_0 (QK4_0 / (4 * QR4_0))
|
366
|
-
typedef struct {
|
367
|
-
half d; // delta
|
368
|
-
uint8_t qs[QK4_0 / 2]; // nibbles / quants
|
369
|
-
} block_q4_0;
|
370
|
-
static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
|
371
|
-
|
372
|
-
#define QK4_1 32
|
373
|
-
#define QR4_1 2
|
374
|
-
#define QI4_1 (QK4_1 / (4 * QR4_1))
|
375
|
-
typedef struct {
|
376
|
-
half2 dm; // dm.x = delta, dm.y = min
|
377
|
-
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
378
|
-
} block_q4_1;
|
379
|
-
static_assert(sizeof(block_q4_1) == sizeof(ggml_fp16_t) * 2 + QK4_1 / 2, "wrong q4_1 block size/padding");
|
380
|
-
|
381
|
-
#define QK5_0 32
|
382
|
-
#define QR5_0 2
|
383
|
-
#define QI5_0 (QK5_0 / (4 * QR5_0))
|
384
|
-
typedef struct {
|
385
|
-
half d; // delta
|
386
|
-
uint8_t qh[4]; // 5-th bit of quants
|
387
|
-
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
388
|
-
} block_q5_0;
|
389
|
-
static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
|
390
|
-
|
391
|
-
#define QK5_1 32
|
392
|
-
#define QR5_1 2
|
393
|
-
#define QI5_1 (QK5_1 / (4 * QR5_1))
|
394
|
-
typedef struct {
|
395
|
-
half2 dm; // dm.x = delta, dm.y = min
|
396
|
-
uint8_t qh[4]; // 5-th bit of quants
|
397
|
-
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
398
|
-
} block_q5_1;
|
399
|
-
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
400
|
-
|
401
|
-
#define QK8_0 32
|
402
|
-
#define QR8_0 1
|
403
|
-
#define QI8_0 (QK8_0 / (4 * QR8_0))
|
404
|
-
typedef struct {
|
405
|
-
half d; // delta
|
406
|
-
int8_t qs[QK8_0]; // quants
|
407
|
-
} block_q8_0;
|
408
|
-
static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
|
409
|
-
|
410
|
-
#define QK8_1 32
|
411
|
-
#define QR8_1 1
|
412
|
-
#define QI8_1 (QK8_1 / (4 * QR8_1))
|
413
|
-
typedef struct {
|
414
|
-
half2 ds; // ds.x = delta, ds.y = sum
|
415
|
-
int8_t qs[QK8_0]; // quants
|
416
|
-
} block_q8_1;
|
417
|
-
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_fp16_t) + QK8_0, "wrong q8_1 block size/padding");
|
418
|
-
|
419
371
|
typedef float (*vec_dot_q_cuda_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs);
|
420
372
|
typedef void (*allocate_tiles_cuda_t)(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc);
|
421
373
|
typedef void (*load_tiles_cuda_t)(
|
@@ -425,174 +377,6 @@ typedef float (*vec_dot_q_mul_mat_cuda_t)(
|
|
425
377
|
const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
|
426
378
|
const int * __restrict__ y_qs, const half2 * __restrict__ y_ms, const int & i, const int & j, const int & k);
|
427
379
|
|
428
|
-
//================================= k-quants
|
429
|
-
|
430
|
-
#ifdef GGML_QKK_64
|
431
|
-
#define QK_K 64
|
432
|
-
#define K_SCALE_SIZE 4
|
433
|
-
#else
|
434
|
-
#define QK_K 256
|
435
|
-
#define K_SCALE_SIZE 12
|
436
|
-
#endif
|
437
|
-
|
438
|
-
#define QR2_K 4
|
439
|
-
#define QI2_K (QK_K / (4*QR2_K))
|
440
|
-
typedef struct {
|
441
|
-
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
442
|
-
uint8_t qs[QK_K/4]; // quants
|
443
|
-
half2 dm; // super-block scale for quantized scales/mins
|
444
|
-
} block_q2_K;
|
445
|
-
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
446
|
-
|
447
|
-
#define QR3_K 4
|
448
|
-
#define QI3_K (QK_K / (4*QR3_K))
|
449
|
-
typedef struct {
|
450
|
-
uint8_t hmask[QK_K/8]; // quants - high bit
|
451
|
-
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
452
|
-
#ifdef GGML_QKK_64
|
453
|
-
uint8_t scales[2]; // scales, quantized with 8 bits
|
454
|
-
#else
|
455
|
-
uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
|
456
|
-
#endif
|
457
|
-
half d; // super-block scale
|
458
|
-
} block_q3_K;
|
459
|
-
//static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + K_SCALE_SIZE, "wrong q3_K block size/padding");
|
460
|
-
|
461
|
-
#define QR4_K 2
|
462
|
-
#define QI4_K (QK_K / (4*QR4_K))
|
463
|
-
#ifdef GGML_QKK_64
|
464
|
-
typedef struct {
|
465
|
-
half dm[2]; // super-block scales/mins
|
466
|
-
uint8_t scales[2]; // 4-bit block scales/mins
|
467
|
-
uint8_t qs[QK_K/2]; // 4--bit quants
|
468
|
-
} block_q4_K;
|
469
|
-
static_assert(sizeof(block_q4_K) == sizeof(half2) + QK_K/2 + 2, "wrong q4_K block size/padding");
|
470
|
-
#else
|
471
|
-
typedef struct {
|
472
|
-
half2 dm; // super-block scale for quantized scales/mins
|
473
|
-
uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits
|
474
|
-
uint8_t qs[QK_K/2]; // 4--bit quants
|
475
|
-
} block_q4_K;
|
476
|
-
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2, "wrong q4_K block size/padding");
|
477
|
-
#endif
|
478
|
-
|
479
|
-
#define QR5_K 2
|
480
|
-
#define QI5_K (QK_K / (4*QR5_K))
|
481
|
-
#ifdef GGML_QKK_64
|
482
|
-
typedef struct {
|
483
|
-
half d; // super-block scale
|
484
|
-
int8_t scales[QK_K/16]; // block scales
|
485
|
-
uint8_t qh[QK_K/8]; // quants, high bit
|
486
|
-
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
487
|
-
} block_q5_K;
|
488
|
-
static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
|
489
|
-
#else
|
490
|
-
typedef struct {
|
491
|
-
half2 dm; // super-block scale for quantized scales/mins
|
492
|
-
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
493
|
-
uint8_t qh[QK_K/8]; // quants, high bit
|
494
|
-
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
495
|
-
} block_q5_K;
|
496
|
-
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
|
497
|
-
#endif
|
498
|
-
|
499
|
-
#define QR6_K 2
|
500
|
-
#define QI6_K (QK_K / (4*QR6_K))
|
501
|
-
typedef struct {
|
502
|
-
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
503
|
-
uint8_t qh[QK_K/4]; // quants, upper 2 bits
|
504
|
-
int8_t scales[QK_K/16]; // scales
|
505
|
-
half d; // delta
|
506
|
-
} block_q6_K;
|
507
|
-
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_K block size/padding");
|
508
|
-
|
509
|
-
#define QR2_XXS 8
|
510
|
-
#define QI2_XXS (QK_K / (4*QR2_XXS))
|
511
|
-
typedef struct {
|
512
|
-
half d;
|
513
|
-
uint16_t qs[QK_K/8];
|
514
|
-
} block_iq2_xxs;
|
515
|
-
static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
|
516
|
-
|
517
|
-
#define QR2_XS 8
|
518
|
-
#define QI2_XS (QK_K / (4*QR2_XS))
|
519
|
-
typedef struct {
|
520
|
-
half d;
|
521
|
-
uint16_t qs[QK_K/8];
|
522
|
-
uint8_t scales[QK_K/32];
|
523
|
-
} block_iq2_xs;
|
524
|
-
static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
|
525
|
-
|
526
|
-
// 2.5625 bpw quants
|
527
|
-
#define QR2_S 8
|
528
|
-
#define QI2_S (QK_K / (4*QR2_S))
|
529
|
-
typedef struct {
|
530
|
-
half d;
|
531
|
-
uint8_t qs[QK_K/4];
|
532
|
-
uint8_t qh[QK_K/32];
|
533
|
-
uint8_t scales[QK_K/32];
|
534
|
-
} block_iq2_s;
|
535
|
-
static_assert(sizeof(block_iq2_s) == sizeof(ggml_fp16_t) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding");
|
536
|
-
|
537
|
-
#define QR3_XXS 8
|
538
|
-
#define QI3_XXS (QK_K / (4*QR3_XXS))
|
539
|
-
typedef struct {
|
540
|
-
half d;
|
541
|
-
uint8_t qs[3*(QK_K/8)];
|
542
|
-
} block_iq3_xxs;
|
543
|
-
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
|
544
|
-
|
545
|
-
#define QR3_XS 8
|
546
|
-
#define QI3_XS (QK_K / (4*QR3_XS))
|
547
|
-
#if QK_K == 64
|
548
|
-
#define IQ3S_N_SCALE 2
|
549
|
-
#else
|
550
|
-
#define IQ3S_N_SCALE QK_K/64
|
551
|
-
#endif
|
552
|
-
typedef struct {
|
553
|
-
half d;
|
554
|
-
uint8_t qs[QK_K/4];
|
555
|
-
uint8_t qh[QK_K/32];
|
556
|
-
uint8_t signs[QK_K/8];
|
557
|
-
uint8_t scales[IQ3S_N_SCALE];
|
558
|
-
} block_iq3_s;
|
559
|
-
static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
|
560
|
-
|
561
|
-
#define QR1_S 8
|
562
|
-
#define QI1_S (QK_K / (4*QR1_S))
|
563
|
-
typedef struct {
|
564
|
-
half d;
|
565
|
-
uint8_t qs[QK_K/8];
|
566
|
-
uint8_t scales[QK_K/16];
|
567
|
-
} block_iq1_s;
|
568
|
-
static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
|
569
|
-
|
570
|
-
#define QK4_NL 32
|
571
|
-
#define QR4_NL 2
|
572
|
-
#define QI4_NL (QK4_NL / (4*QR4_NL))
|
573
|
-
typedef struct {
|
574
|
-
half d;
|
575
|
-
uint8_t qs[QK4_NL/2];
|
576
|
-
} block_iq4_nl;
|
577
|
-
static_assert(sizeof(block_iq4_nl) == sizeof(ggml_fp16_t) + QK4_NL/2, "wrong iq4_nl block size/padding");
|
578
|
-
|
579
|
-
#if QK_K == 64
|
580
|
-
#define block_iq4_xs block_iq4_nl
|
581
|
-
#define QR4_XS QR4_NL
|
582
|
-
#define QI4_XS QI4_NL
|
583
|
-
#else
|
584
|
-
// QR4_XS = 8 is very slightly faster than QR4_XS = 4
|
585
|
-
#define QR4_XS 8
|
586
|
-
#define QI4_XS (QK_K / (4*QR4_XS))
|
587
|
-
typedef struct {
|
588
|
-
half d;
|
589
|
-
uint16_t scales_h;
|
590
|
-
uint8_t scales_l[QK_K/64];
|
591
|
-
uint8_t qs[QK_K/2];
|
592
|
-
} block_iq4_xs;
|
593
|
-
static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
|
594
|
-
#endif
|
595
|
-
|
596
380
|
#define WARP_SIZE 32
|
597
381
|
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
|
598
382
|
|
@@ -1569,746 +1353,6 @@ static __global__ void dequantize_block_q6_K(const void * __restrict__ vx, dst_t
|
|
1569
1353
|
#endif
|
1570
1354
|
}
|
1571
1355
|
|
1572
|
-
static const __device__ uint64_t iq2xxs_grid[256] = {
|
1573
|
-
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
1574
|
-
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x08080808082b0808,
|
1575
|
-
0x08080808082b082b, 0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
|
1576
|
-
0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
|
1577
|
-
0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
|
1578
|
-
0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
|
1579
|
-
0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
|
1580
|
-
0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
|
1581
|
-
0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
|
1582
|
-
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
|
1583
|
-
0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
|
1584
|
-
0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
|
1585
|
-
0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
|
1586
|
-
0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
|
1587
|
-
0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
|
1588
|
-
0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
|
1589
|
-
0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
|
1590
|
-
0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
|
1591
|
-
0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
|
1592
|
-
0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
|
1593
|
-
0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
|
1594
|
-
0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
|
1595
|
-
0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
|
1596
|
-
0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
|
1597
|
-
0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
|
1598
|
-
0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
|
1599
|
-
0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
|
1600
|
-
0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
|
1601
|
-
0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
|
1602
|
-
0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
|
1603
|
-
0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
|
1604
|
-
0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
|
1605
|
-
0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
|
1606
|
-
0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
|
1607
|
-
0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
|
1608
|
-
0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
|
1609
|
-
0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
|
1610
|
-
0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
|
1611
|
-
0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
|
1612
|
-
0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
|
1613
|
-
0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
|
1614
|
-
0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
|
1615
|
-
0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
|
1616
|
-
0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
|
1617
|
-
0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
|
1618
|
-
0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
|
1619
|
-
0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
|
1620
|
-
0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
|
1621
|
-
0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
|
1622
|
-
0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
|
1623
|
-
0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
|
1624
|
-
0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
|
1625
|
-
0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
|
1626
|
-
0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
|
1627
|
-
0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
|
1628
|
-
0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
|
1629
|
-
0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
|
1630
|
-
0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
|
1631
|
-
0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
|
1632
|
-
0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
|
1633
|
-
0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
|
1634
|
-
0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
|
1635
|
-
0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
|
1636
|
-
0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
|
1637
|
-
};
|
1638
|
-
|
1639
|
-
static const __device__ uint64_t iq2xs_grid[512] = {
|
1640
|
-
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
1641
|
-
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
1642
|
-
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
1643
|
-
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
1644
|
-
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
1645
|
-
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
1646
|
-
0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08, 0x080808082b190819,
|
1647
|
-
0x080808082b191908, 0x080808082b192b19, 0x080808082b2b0808, 0x0808081908080819,
|
1648
|
-
0x0808081908081908, 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
|
1649
|
-
0x080808190819082b, 0x0808081908191919, 0x0808081908192b08, 0x0808081908192b2b,
|
1650
|
-
0x08080819082b0819, 0x08080819082b1908, 0x0808081919080808, 0x080808191908082b,
|
1651
|
-
0x0808081919081919, 0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
|
1652
|
-
0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819, 0x080808192b081908,
|
1653
|
-
0x080808192b190808, 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b08081919,
|
1654
|
-
0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
|
1655
|
-
0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808, 0x0808082b19191919,
|
1656
|
-
0x0808082b2b080808, 0x0808082b2b082b2b, 0x0808190808080819, 0x0808190808081908,
|
1657
|
-
0x080819080808192b, 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
|
1658
|
-
0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908,
|
1659
|
-
0x0808190819080808, 0x080819081908082b, 0x0808190819081919, 0x0808190819082b08,
|
1660
|
-
0x0808190819190819, 0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
|
1661
|
-
0x080819082b080819, 0x080819082b081908, 0x080819082b190808, 0x0808191908080808,
|
1662
|
-
0x080819190808082b, 0x0808191908081919, 0x0808191908082b08, 0x0808191908190819,
|
1663
|
-
0x0808191908191908, 0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
|
1664
|
-
0x0808191919190808, 0x08081919192b0819, 0x080819192b080808, 0x0808192b08080819,
|
1665
|
-
0x0808192b08081908, 0x0808192b08190808, 0x0808192b082b192b, 0x0808192b19080808,
|
1666
|
-
0x0808192b1908082b, 0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
|
1667
|
-
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b, 0x08082b0808190819,
|
1668
|
-
0x08082b0808191908, 0x08082b08082b0808, 0x08082b08082b1919, 0x08082b0819080819,
|
1669
|
-
0x08082b0819081908, 0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
|
1670
|
-
0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819, 0x08082b1908081908,
|
1671
|
-
0x08082b1908190808, 0x08082b1919080808, 0x08082b192b080819, 0x08082b192b082b19,
|
1672
|
-
0x08082b2b08080808, 0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
|
1673
|
-
0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908, 0x081908080808192b,
|
1674
|
-
0x0819080808082b19, 0x0819080808190808, 0x081908080819082b, 0x0819080808191919,
|
1675
|
-
0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
|
1676
|
-
0x081908081908082b, 0x0819080819081919, 0x0819080819082b08, 0x0819080819190819,
|
1677
|
-
0x0819080819191908, 0x08190808192b0808, 0x08190808192b2b2b, 0x081908082b080819,
|
1678
|
-
0x081908082b081908, 0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
|
1679
|
-
0x0819081908081919, 0x0819081908082b08, 0x0819081908190819, 0x0819081908191908,
|
1680
|
-
0x08190819082b0808, 0x0819081919080819, 0x0819081919081908, 0x0819081919190808,
|
1681
|
-
0x081908192b080808, 0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
|
1682
|
-
0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808, 0x0819082b19080808,
|
1683
|
-
0x0819082b192b0808, 0x0819190808080808, 0x081919080808082b, 0x0819190808081919,
|
1684
|
-
0x0819190808082b08, 0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
|
1685
|
-
0x0819190819080819, 0x0819190819081908, 0x0819190819082b19, 0x0819190819190808,
|
1686
|
-
0x08191908192b1908, 0x081919082b080808, 0x0819191908080819, 0x0819191908081908,
|
1687
|
-
0x0819191908190808, 0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
|
1688
|
-
0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
|
1689
|
-
0x08192b080819082b, 0x08192b0819080808, 0x08192b0819191908, 0x08192b082b08192b,
|
1690
|
-
0x08192b1908080808, 0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
|
1691
|
-
0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919,
|
1692
|
-
0x082b080808082b08, 0x082b080808082b2b, 0x082b080808190819, 0x082b080808191908,
|
1693
|
-
0x082b0808082b0808, 0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
|
1694
|
-
0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819, 0x082b081908081908,
|
1695
|
-
0x082b081908190808, 0x082b081919080808, 0x082b081919082b08, 0x082b0819192b1919,
|
1696
|
-
0x082b082b08080808, 0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
|
1697
|
-
0x082b190808080819, 0x082b190808081908, 0x082b190808190808, 0x082b1908082b2b19,
|
1698
|
-
0x082b190819080808, 0x082b191908080808, 0x082b191919080819, 0x082b19191919082b,
|
1699
|
-
0x082b19192b192b19, 0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
|
1700
|
-
0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b, 0x082b2b08082b0808,
|
1701
|
-
0x082b2b0819191919, 0x082b2b082b082b08, 0x082b2b082b2b082b, 0x082b2b19192b2b08,
|
1702
|
-
0x082b2b192b190808, 0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
|
1703
|
-
0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819, 0x1908080808081908,
|
1704
|
-
0x190808080808192b, 0x1908080808082b19, 0x1908080808190808, 0x190808080819082b,
|
1705
|
-
0x1908080808191919, 0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
1706
|
-
0x1908080819080808, 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08,
|
1707
|
-
0x1908080819082b2b, 0x1908080819190819, 0x1908080819191908, 0x19080808192b0808,
|
1708
|
-
0x19080808192b1919, 0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
|
1709
|
-
0x1908081908080808, 0x190808190808082b, 0x1908081908081919, 0x1908081908082b08,
|
1710
|
-
0x1908081908190819, 0x1908081908191908, 0x19080819082b0808, 0x1908081919080819,
|
1711
|
-
0x1908081919081908, 0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
|
1712
|
-
0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908, 0x1908082b08190808,
|
1713
|
-
0x1908082b0819082b, 0x1908082b082b2b19, 0x1908082b19080808, 0x1908190808080808,
|
1714
|
-
0x190819080808082b, 0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
|
1715
|
-
0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808, 0x1908190819080819,
|
1716
|
-
0x1908190819081908, 0x1908190819190808, 0x190819082b080808, 0x190819082b191908,
|
1717
|
-
0x1908191908080819, 0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
|
1718
|
-
0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808, 0x1908192b08082b2b,
|
1719
|
-
0x1908192b19081908, 0x1908192b19190808, 0x19082b0808080819, 0x19082b0808081908,
|
1720
|
-
0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
|
1721
|
-
0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819, 0x19082b1919081908,
|
1722
|
-
0x19082b1919190808, 0x19082b19192b2b19, 0x19082b2b08081908, 0x1919080808080808,
|
1723
|
-
0x191908080808082b, 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
|
1724
|
-
0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08, 0x1919080819080819,
|
1725
|
-
0x1919080819081908, 0x1919080819190808, 0x191908082b080808, 0x1919081908080819,
|
1726
|
-
0x1919081908081908, 0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
|
1727
|
-
0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908, 0x1919082b2b2b2b2b,
|
1728
|
-
0x1919190808080819, 0x1919190808081908, 0x1919190808190808, 0x19191908082b0819,
|
1729
|
-
0x1919190819080808, 0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
|
1730
|
-
0x1919191908080808, 0x1919191908082b08, 0x191919192b080808, 0x191919192b082b08,
|
1731
|
-
0x1919192b082b0819, 0x1919192b192b2b08, 0x1919192b2b2b0819, 0x19192b0808080808,
|
1732
|
-
0x19192b0808191908, 0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
|
1733
|
-
0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b, 0x19192b2b2b081919,
|
1734
|
-
0x192b080808080819, 0x192b080808081908, 0x192b080808190808, 0x192b080819080808,
|
1735
|
-
0x192b080819191908, 0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
|
1736
|
-
0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b, 0x192b082b2b19082b,
|
1737
|
-
0x192b190808080808, 0x192b19080819192b, 0x192b191908190808, 0x192b191919080808,
|
1738
|
-
0x192b191919081919, 0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
|
1739
|
-
0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908, 0x192b2b2b192b082b,
|
1740
|
-
0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08,
|
1741
|
-
0x2b08080808190819, 0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
|
1742
|
-
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808082b080808,
|
1743
|
-
0x2b0808082b08082b, 0x2b0808082b2b2b08, 0x2b0808082b2b2b2b, 0x2b08081908080819,
|
1744
|
-
0x2b08081908081908, 0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
|
1745
|
-
0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808, 0x2b08082b082b0808,
|
1746
|
-
0x2b08082b2b080808, 0x2b08082b2b08082b, 0x2b08082b2b2b0808, 0x2b08082b2b2b2b08,
|
1747
|
-
0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
|
1748
|
-
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808, 0x2b0819082b082b19,
|
1749
|
-
0x2b08191908080808, 0x2b08191919081908, 0x2b0819192b2b1919, 0x2b08192b08192b08,
|
1750
|
-
0x2b08192b192b2b2b, 0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
|
1751
|
-
0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b, 0x2b082b082b2b2b08,
|
1752
|
-
0x2b082b190808192b, 0x2b082b2b082b082b, 0x2b082b2b2b080808, 0x2b082b2b2b082b08,
|
1753
|
-
0x2b082b2b2b19192b, 0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
|
1754
|
-
0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b, 0x2b1908082b081908,
|
1755
|
-
0x2b19081908080808, 0x2b190819082b082b, 0x2b190819192b1908, 0x2b19082b1919192b,
|
1756
|
-
0x2b19082b2b082b19, 0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
|
1757
|
-
0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19, 0x2b1919192b190808,
|
1758
|
-
0x2b1919192b19082b, 0x2b19192b19080819, 0x2b192b0819190819, 0x2b192b082b2b192b,
|
1759
|
-
0x2b192b1919082b19, 0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
|
1760
|
-
0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b, 0x2b2b0808082b0808,
|
1761
|
-
0x2b2b0808082b2b2b, 0x2b2b08082b2b0808, 0x2b2b081919190819, 0x2b2b081919192b19,
|
1762
|
-
0x2b2b08192b2b192b, 0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
|
1763
|
-
0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808, 0x2b2b190819080808,
|
1764
|
-
0x2b2b19082b191919, 0x2b2b192b192b1919, 0x2b2b192b2b192b08, 0x2b2b2b0808082b2b,
|
1765
|
-
0x2b2b2b08082b0808, 0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
|
1766
|
-
0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908, 0x2b2b2b192b08192b,
|
1767
|
-
0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
1768
|
-
};
|
1769
|
-
|
1770
|
-
static const __device__ uint64_t iq2s_grid[1024] = {
|
1771
|
-
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
1772
|
-
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
1773
|
-
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
1774
|
-
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
1775
|
-
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
1776
|
-
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x08080808192b192b,
|
1777
|
-
0x08080808192b2b19, 0x080808082b080808, 0x080808082b08082b, 0x080808082b081919,
|
1778
|
-
0x080808082b082b08, 0x080808082b190819, 0x080808082b191908, 0x080808082b2b0808,
|
1779
|
-
0x080808082b2b1919, 0x080808082b2b2b2b, 0x0808081908080819, 0x0808081908081908,
|
1780
|
-
0x080808190808192b, 0x0808081908082b19, 0x0808081908190808, 0x080808190819082b,
|
1781
|
-
0x0808081908191919, 0x0808081908192b08, 0x08080819082b0819, 0x08080819082b1908,
|
1782
|
-
0x0808081919080808, 0x080808191908082b, 0x0808081919081919, 0x0808081919082b08,
|
1783
|
-
0x0808081919190819, 0x0808081919191908, 0x080808191919192b, 0x0808081919192b19,
|
1784
|
-
0x08080819192b0808, 0x08080819192b1919, 0x08080819192b2b08, 0x080808192b080819,
|
1785
|
-
0x080808192b081908, 0x080808192b190808, 0x080808192b19082b, 0x080808192b191919,
|
1786
|
-
0x080808192b2b0819, 0x080808192b2b1908, 0x0808082b08080808, 0x0808082b0808082b,
|
1787
|
-
0x0808082b08081919, 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908,
|
1788
|
-
0x0808082b082b0808, 0x0808082b082b2b2b, 0x0808082b19080819, 0x0808082b19081908,
|
1789
|
-
0x0808082b1908192b, 0x0808082b19082b19, 0x0808082b19190808, 0x0808082b19191919,
|
1790
|
-
0x0808082b2b080808, 0x0808082b2b081919, 0x0808082b2b082b2b, 0x0808082b2b191908,
|
1791
|
-
0x0808082b2b2b082b, 0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
|
1792
|
-
0x0808190808082b19, 0x0808190808190808, 0x080819080819082b, 0x0808190808191919,
|
1793
|
-
0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908, 0x08081908082b192b,
|
1794
|
-
0x08081908082b2b19, 0x0808190819080808, 0x080819081908082b, 0x0808190819081919,
|
1795
|
-
0x0808190819082b08, 0x0808190819082b2b, 0x0808190819190819, 0x0808190819191908,
|
1796
|
-
0x080819081919192b, 0x0808190819192b19, 0x08081908192b0808, 0x08081908192b082b,
|
1797
|
-
0x08081908192b1919, 0x080819082b080819, 0x080819082b081908, 0x080819082b08192b,
|
1798
|
-
0x080819082b082b19, 0x080819082b190808, 0x080819082b191919, 0x080819082b192b08,
|
1799
|
-
0x080819082b2b0819, 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b,
|
1800
|
-
0x0808191908081919, 0x0808191908082b08, 0x0808191908082b2b, 0x0808191908190819,
|
1801
|
-
0x0808191908191908, 0x080819190819192b, 0x0808191908192b19, 0x08081919082b0808,
|
1802
|
-
0x08081919082b1919, 0x08081919082b2b08, 0x0808191919080819, 0x0808191919081908,
|
1803
|
-
0x080819191908192b, 0x0808191919082b19, 0x0808191919190808, 0x080819191919082b,
|
1804
|
-
0x0808191919191919, 0x0808191919192b08, 0x08081919192b0819, 0x08081919192b1908,
|
1805
|
-
0x080819192b080808, 0x080819192b08082b, 0x080819192b081919, 0x080819192b082b08,
|
1806
|
-
0x080819192b190819, 0x080819192b191908, 0x080819192b2b0808, 0x0808192b08080819,
|
1807
|
-
0x0808192b08081908, 0x0808192b0808192b, 0x0808192b08082b19, 0x0808192b08190808,
|
1808
|
-
0x0808192b08191919, 0x0808192b19080808, 0x0808192b19081919, 0x0808192b19082b08,
|
1809
|
-
0x0808192b19190819, 0x0808192b19191908, 0x0808192b192b0808, 0x0808192b2b080819,
|
1810
|
-
0x0808192b2b081908, 0x0808192b2b190808, 0x08082b0808080808, 0x08082b080808082b,
|
1811
|
-
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808190819, 0x08082b0808191908,
|
1812
|
-
0x08082b080819192b, 0x08082b0808192b19, 0x08082b08082b0808, 0x08082b08082b1919,
|
1813
|
-
0x08082b08082b2b2b, 0x08082b0819080819, 0x08082b0819081908, 0x08082b081908192b,
|
1814
|
-
0x08082b0819082b19, 0x08082b0819190808, 0x08082b081919082b, 0x08082b0819191919,
|
1815
|
-
0x08082b0819192b08, 0x08082b08192b0819, 0x08082b08192b1908, 0x08082b082b080808,
|
1816
|
-
0x08082b082b081919, 0x08082b082b191908, 0x08082b082b2b2b2b, 0x08082b1908080819,
|
1817
|
-
0x08082b1908081908, 0x08082b1908190808, 0x08082b190819082b, 0x08082b1908191919,
|
1818
|
-
0x08082b1908192b08, 0x08082b19082b0819, 0x08082b1919080808, 0x08082b1919081919,
|
1819
|
-
0x08082b1919082b08, 0x08082b1919190819, 0x08082b1919191908, 0x08082b19192b0808,
|
1820
|
-
0x08082b192b080819, 0x08082b192b190808, 0x08082b2b08080808, 0x08082b2b08190819,
|
1821
|
-
0x08082b2b08191908, 0x08082b2b082b082b, 0x08082b2b082b2b08, 0x08082b2b082b2b2b,
|
1822
|
-
0x08082b2b19190808, 0x08082b2b2b192b19, 0x0819080808080819, 0x0819080808081908,
|
1823
|
-
0x081908080808192b, 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b,
|
1824
|
-
0x0819080808191919, 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908,
|
1825
|
-
0x08190808082b192b, 0x0819080819080808, 0x081908081908082b, 0x0819080819081919,
|
1826
|
-
0x0819080819082b08, 0x0819080819190819, 0x0819080819191908, 0x081908081919192b,
|
1827
|
-
0x0819080819192b19, 0x08190808192b0808, 0x08190808192b082b, 0x08190808192b1919,
|
1828
|
-
0x08190808192b2b08, 0x081908082b080819, 0x081908082b081908, 0x081908082b08192b,
|
1829
|
-
0x081908082b190808, 0x081908082b191919, 0x081908082b192b08, 0x081908082b2b0819,
|
1830
|
-
0x081908082b2b1908, 0x0819081908080808, 0x081908190808082b, 0x0819081908081919,
|
1831
|
-
0x0819081908082b08, 0x0819081908082b2b, 0x0819081908190819, 0x0819081908191908,
|
1832
|
-
0x081908190819192b, 0x0819081908192b19, 0x08190819082b0808, 0x08190819082b082b,
|
1833
|
-
0x08190819082b1919, 0x08190819082b2b08, 0x0819081919080819, 0x0819081919081908,
|
1834
|
-
0x081908191908192b, 0x0819081919082b19, 0x0819081919190808, 0x081908191919082b,
|
1835
|
-
0x0819081919191919, 0x0819081919192b08, 0x08190819192b0819, 0x08190819192b1908,
|
1836
|
-
0x081908192b080808, 0x081908192b08082b, 0x081908192b081919, 0x081908192b082b08,
|
1837
|
-
0x081908192b190819, 0x081908192b191908, 0x0819082b08080819, 0x0819082b08081908,
|
1838
|
-
0x0819082b08082b19, 0x0819082b08190808, 0x0819082b08191919, 0x0819082b082b0819,
|
1839
|
-
0x0819082b082b1908, 0x0819082b19080808, 0x0819082b19081919, 0x0819082b19190819,
|
1840
|
-
0x0819082b19191908, 0x0819082b2b080819, 0x0819082b2b081908, 0x0819082b2b190808,
|
1841
|
-
0x0819190808080808, 0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
|
1842
|
-
0x0819190808190819, 0x0819190808191908, 0x081919080819192b, 0x0819190808192b19,
|
1843
|
-
0x08191908082b0808, 0x08191908082b1919, 0x08191908082b2b08, 0x0819190819080819,
|
1844
|
-
0x0819190819081908, 0x081919081908192b, 0x0819190819082b19, 0x0819190819190808,
|
1845
|
-
0x081919081919082b, 0x0819190819191919, 0x0819190819192b08, 0x08191908192b0819,
|
1846
|
-
0x08191908192b1908, 0x081919082b080808, 0x081919082b08082b, 0x081919082b081919,
|
1847
|
-
0x081919082b082b08, 0x081919082b190819, 0x081919082b191908, 0x081919082b2b0808,
|
1848
|
-
0x0819191908080819, 0x0819191908081908, 0x081919190808192b, 0x0819191908082b19,
|
1849
|
-
0x0819191908190808, 0x081919190819082b, 0x0819191908191919, 0x0819191908192b08,
|
1850
|
-
0x08191919082b0819, 0x08191919082b1908, 0x0819191919080808, 0x081919191908082b,
|
1851
|
-
0x0819191919081919, 0x0819191919082b08, 0x0819191919190819, 0x0819191919191908,
|
1852
|
-
0x08191919192b0808, 0x081919192b080819, 0x081919192b081908, 0x081919192b190808,
|
1853
|
-
0x0819192b08080808, 0x0819192b08081919, 0x0819192b08082b08, 0x0819192b08190819,
|
1854
|
-
0x0819192b08191908, 0x0819192b082b0808, 0x0819192b19080819, 0x0819192b19081908,
|
1855
|
-
0x0819192b19190808, 0x0819192b2b080808, 0x0819192b2b2b2b2b, 0x08192b0808080819,
|
1856
|
-
0x08192b0808081908, 0x08192b080808192b, 0x08192b0808082b19, 0x08192b0808190808,
|
1857
|
-
0x08192b0808191919, 0x08192b0808192b08, 0x08192b08082b0819, 0x08192b0819080808,
|
1858
|
-
0x08192b081908082b, 0x08192b0819081919, 0x08192b0819082b08, 0x08192b0819190819,
|
1859
|
-
0x08192b0819191908, 0x08192b08192b0808, 0x08192b082b080819, 0x08192b082b081908,
|
1860
|
-
0x08192b1908080808, 0x08192b190808082b, 0x08192b1908081919, 0x08192b1908082b08,
|
1861
|
-
0x08192b1908190819, 0x08192b1908191908, 0x08192b19082b0808, 0x08192b1919080819,
|
1862
|
-
0x08192b1919081908, 0x08192b1919190808, 0x08192b19192b2b19, 0x08192b192b2b082b,
|
1863
|
-
0x08192b2b08081908, 0x08192b2b08190808, 0x08192b2b19080808, 0x08192b2b1919192b,
|
1864
|
-
0x082b080808080808, 0x082b08080808082b, 0x082b080808081919, 0x082b080808082b08,
|
1865
|
-
0x082b080808190819, 0x082b080808191908, 0x082b08080819192b, 0x082b080808192b19,
|
1866
|
-
0x082b0808082b0808, 0x082b0808082b1919, 0x082b0808082b2b2b, 0x082b080819080819,
|
1867
|
-
0x082b080819081908, 0x082b080819190808, 0x082b08081919082b, 0x082b080819191919,
|
1868
|
-
0x082b0808192b1908, 0x082b08082b080808, 0x082b08082b082b2b, 0x082b08082b191908,
|
1869
|
-
0x082b08082b2b2b2b, 0x082b081908080819, 0x082b081908081908, 0x082b081908190808,
|
1870
|
-
0x082b08190819082b, 0x082b081908191919, 0x082b0819082b0819, 0x082b081919080808,
|
1871
|
-
0x082b08191908082b, 0x082b081919081919, 0x082b081919190819, 0x082b081919191908,
|
1872
|
-
0x082b0819192b0808, 0x082b08192b080819, 0x082b08192b081908, 0x082b08192b190808,
|
1873
|
-
0x082b082b08080808, 0x082b082b08082b2b, 0x082b082b082b082b, 0x082b082b082b2b08,
|
1874
|
-
0x082b082b082b2b2b, 0x082b082b19081908, 0x082b082b19190808, 0x082b082b2b082b08,
|
1875
|
-
0x082b082b2b082b2b, 0x082b082b2b2b2b08, 0x082b190808080819, 0x082b190808081908,
|
1876
|
-
0x082b19080808192b, 0x082b190808082b19, 0x082b190808190808, 0x082b190808191919,
|
1877
|
-
0x082b190808192b08, 0x082b1908082b0819, 0x082b1908082b1908, 0x082b190819080808,
|
1878
|
-
0x082b19081908082b, 0x082b190819081919, 0x082b190819082b08, 0x082b190819190819,
|
1879
|
-
0x082b190819191908, 0x082b1908192b0808, 0x082b19082b080819, 0x082b19082b081908,
|
1880
|
-
0x082b19082b190808, 0x082b191908080808, 0x082b191908081919, 0x082b191908082b08,
|
1881
|
-
0x082b191908190819, 0x082b191908191908, 0x082b1919082b0808, 0x082b191919080819,
|
1882
|
-
0x082b191919081908, 0x082b191919190808, 0x082b1919192b192b, 0x082b19192b080808,
|
1883
|
-
0x082b192b08080819, 0x082b192b08081908, 0x082b192b08190808, 0x082b192b19080808,
|
1884
|
-
0x082b192b19192b19, 0x082b2b0808080808, 0x082b2b0808081919, 0x082b2b0808190819,
|
1885
|
-
0x082b2b0808191908, 0x082b2b0819080819, 0x082b2b0819081908, 0x082b2b0819190808,
|
1886
|
-
0x082b2b082b082b2b, 0x082b2b082b2b2b2b, 0x082b2b1908080819, 0x082b2b1908081908,
|
1887
|
-
0x082b2b1908190808, 0x082b2b192b191919, 0x082b2b2b08082b2b, 0x082b2b2b082b082b,
|
1888
|
-
0x082b2b2b192b1908, 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
|
1889
|
-
0x1908080808081908, 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808,
|
1890
|
-
0x190808080819082b, 0x1908080808191919, 0x1908080808192b08, 0x1908080808192b2b,
|
1891
|
-
0x19080808082b0819, 0x19080808082b1908, 0x19080808082b192b, 0x1908080819080808,
|
1892
|
-
0x190808081908082b, 0x1908080819081919, 0x1908080819082b08, 0x1908080819082b2b,
|
1893
|
-
0x1908080819190819, 0x1908080819191908, 0x190808081919192b, 0x1908080819192b19,
|
1894
|
-
0x19080808192b0808, 0x19080808192b082b, 0x19080808192b1919, 0x190808082b080819,
|
1895
|
-
0x190808082b081908, 0x190808082b190808, 0x190808082b191919, 0x190808082b192b08,
|
1896
|
-
0x190808082b2b0819, 0x190808082b2b1908, 0x1908081908080808, 0x190808190808082b,
|
1897
|
-
0x1908081908081919, 0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
|
1898
|
-
0x190808190819192b, 0x1908081908192b19, 0x19080819082b0808, 0x19080819082b082b,
|
1899
|
-
0x19080819082b1919, 0x1908081919080819, 0x1908081919081908, 0x190808191908192b,
|
1900
|
-
0x1908081919082b19, 0x1908081919190808, 0x190808191919082b, 0x1908081919191919,
|
1901
|
-
0x1908081919192b08, 0x19080819192b0819, 0x19080819192b1908, 0x190808192b080808,
|
1902
|
-
0x190808192b08082b, 0x190808192b081919, 0x190808192b082b08, 0x190808192b190819,
|
1903
|
-
0x190808192b191908, 0x190808192b2b0808, 0x1908082b08080819, 0x1908082b08081908,
|
1904
|
-
0x1908082b08190808, 0x1908082b0819082b, 0x1908082b08191919, 0x1908082b08192b08,
|
1905
|
-
0x1908082b082b1908, 0x1908082b19080808, 0x1908082b19081919, 0x1908082b19082b08,
|
1906
|
-
0x1908082b19190819, 0x1908082b19191908, 0x1908082b192b0808, 0x1908082b2b080819,
|
1907
|
-
0x1908082b2b081908, 0x1908190808080808, 0x190819080808082b, 0x1908190808081919,
|
1908
|
-
0x1908190808082b08, 0x1908190808082b2b, 0x1908190808190819, 0x1908190808191908,
|
1909
|
-
0x190819080819192b, 0x1908190808192b19, 0x19081908082b0808, 0x19081908082b082b,
|
1910
|
-
0x19081908082b1919, 0x19081908082b2b08, 0x1908190819080819, 0x1908190819081908,
|
1911
|
-
0x190819081908192b, 0x1908190819082b19, 0x1908190819190808, 0x190819081919082b,
|
1912
|
-
0x1908190819191919, 0x1908190819192b08, 0x19081908192b0819, 0x19081908192b1908,
|
1913
|
-
0x190819082b080808, 0x190819082b08082b, 0x190819082b081919, 0x190819082b082b08,
|
1914
|
-
0x190819082b190819, 0x190819082b191908, 0x190819082b2b0808, 0x1908191908080819,
|
1915
|
-
0x1908191908081908, 0x190819190808192b, 0x1908191908082b19, 0x1908191908190808,
|
1916
|
-
0x190819190819082b, 0x1908191908191919, 0x1908191908192b08, 0x19081919082b0819,
|
1917
|
-
0x19081919082b1908, 0x1908191919080808, 0x190819191908082b, 0x1908191919081919,
|
1918
|
-
0x1908191919082b08, 0x1908191919190819, 0x1908191919191908, 0x19081919192b0808,
|
1919
|
-
0x19081919192b2b2b, 0x190819192b080819, 0x190819192b081908, 0x190819192b190808,
|
1920
|
-
0x1908192b08080808, 0x1908192b0808082b, 0x1908192b08081919, 0x1908192b08082b08,
|
1921
|
-
0x1908192b08190819, 0x1908192b08191908, 0x1908192b082b0808, 0x1908192b19080819,
|
1922
|
-
0x1908192b19081908, 0x1908192b19190808, 0x1908192b2b080808, 0x1908192b2b2b1919,
|
1923
|
-
0x19082b0808080819, 0x19082b0808081908, 0x19082b0808082b19, 0x19082b0808190808,
|
1924
|
-
0x19082b080819082b, 0x19082b0808191919, 0x19082b0808192b08, 0x19082b08082b0819,
|
1925
|
-
0x19082b08082b1908, 0x19082b0819080808, 0x19082b081908082b, 0x19082b0819081919,
|
1926
|
-
0x19082b0819082b08, 0x19082b0819190819, 0x19082b0819191908, 0x19082b08192b0808,
|
1927
|
-
0x19082b082b081908, 0x19082b082b190808, 0x19082b1908080808, 0x19082b190808082b,
|
1928
|
-
0x19082b1908081919, 0x19082b1908082b08, 0x19082b1908190819, 0x19082b1908191908,
|
1929
|
-
0x19082b19082b0808, 0x19082b1919080819, 0x19082b1919081908, 0x19082b1919190808,
|
1930
|
-
0x19082b192b080808, 0x19082b192b19192b, 0x19082b2b08080819, 0x19082b2b08081908,
|
1931
|
-
0x19082b2b08190808, 0x19082b2b19080808, 0x1919080808080808, 0x191908080808082b,
|
1932
|
-
0x1919080808081919, 0x1919080808082b08, 0x1919080808190819, 0x1919080808191908,
|
1933
|
-
0x191908080819192b, 0x1919080808192b19, 0x19190808082b0808, 0x19190808082b082b,
|
1934
|
-
0x19190808082b1919, 0x19190808082b2b08, 0x1919080819080819, 0x1919080819081908,
|
1935
|
-
0x191908081908192b, 0x1919080819082b19, 0x1919080819190808, 0x191908081919082b,
|
1936
|
-
0x1919080819191919, 0x1919080819192b08, 0x19190808192b0819, 0x19190808192b1908,
|
1937
|
-
0x191908082b080808, 0x191908082b08082b, 0x191908082b081919, 0x191908082b082b08,
|
1938
|
-
0x191908082b190819, 0x191908082b191908, 0x1919081908080819, 0x1919081908081908,
|
1939
|
-
0x191908190808192b, 0x1919081908082b19, 0x1919081908190808, 0x191908190819082b,
|
1940
|
-
0x1919081908191919, 0x1919081908192b08, 0x19190819082b0819, 0x19190819082b1908,
|
1941
|
-
0x1919081919080808, 0x191908191908082b, 0x1919081919081919, 0x1919081919082b08,
|
1942
|
-
0x1919081919190819, 0x1919081919191908, 0x19190819192b0808, 0x191908192b080819,
|
1943
|
-
0x191908192b081908, 0x191908192b190808, 0x1919082b08080808, 0x1919082b08081919,
|
1944
|
-
0x1919082b08082b08, 0x1919082b08190819, 0x1919082b08191908, 0x1919082b082b0808,
|
1945
|
-
0x1919082b19080819, 0x1919082b19081908, 0x1919082b19190808, 0x1919082b192b2b19,
|
1946
|
-
0x1919082b2b080808, 0x1919190808080819, 0x1919190808081908, 0x191919080808192b,
|
1947
|
-
0x1919190808082b19, 0x1919190808190808, 0x191919080819082b, 0x1919190808191919,
|
1948
|
-
0x1919190808192b08, 0x19191908082b0819, 0x19191908082b1908, 0x1919190819080808,
|
1949
|
-
0x191919081908082b, 0x1919190819081919, 0x1919190819082b08, 0x1919190819190819,
|
1950
|
-
0x1919190819191908, 0x19191908192b0808, 0x191919082b080819, 0x191919082b081908,
|
1951
|
-
0x191919082b190808, 0x1919191908080808, 0x191919190808082b, 0x1919191908081919,
|
1952
|
-
0x1919191908082b08, 0x1919191908190819, 0x1919191908191908, 0x19191919082b0808,
|
1953
|
-
0x1919191919080819, 0x1919191919081908, 0x1919191919190808, 0x191919192b080808,
|
1954
|
-
0x1919192b08080819, 0x1919192b08081908, 0x1919192b08190808, 0x1919192b082b192b,
|
1955
|
-
0x1919192b19080808, 0x19192b0808080808, 0x19192b080808082b, 0x19192b0808081919,
|
1956
|
-
0x19192b0808082b08, 0x19192b0808190819, 0x19192b0808191908, 0x19192b08082b0808,
|
1957
|
-
0x19192b0819080819, 0x19192b0819081908, 0x19192b0819190808, 0x19192b0819192b2b,
|
1958
|
-
0x19192b082b080808, 0x19192b1908080819, 0x19192b1908081908, 0x19192b1908190808,
|
1959
|
-
0x19192b1919080808, 0x19192b2b08080808, 0x19192b2b08192b19, 0x19192b2b2b081919,
|
1960
|
-
0x19192b2b2b2b2b08, 0x192b080808080819, 0x192b080808081908, 0x192b08080808192b,
|
1961
|
-
0x192b080808190808, 0x192b08080819082b, 0x192b080808191919, 0x192b080808192b08,
|
1962
|
-
0x192b0808082b0819, 0x192b0808082b1908, 0x192b080819080808, 0x192b080819081919,
|
1963
|
-
0x192b080819082b08, 0x192b080819190819, 0x192b080819191908, 0x192b0808192b0808,
|
1964
|
-
0x192b08082b081908, 0x192b08082b190808, 0x192b081908080808, 0x192b08190808082b,
|
1965
|
-
0x192b081908081919, 0x192b081908082b08, 0x192b081908190819, 0x192b081908191908,
|
1966
|
-
0x192b0819082b0808, 0x192b081919080819, 0x192b081919081908, 0x192b081919190808,
|
1967
|
-
0x192b08192b080808, 0x192b08192b192b19, 0x192b082b08081908, 0x192b082b08190808,
|
1968
|
-
0x192b082b19080808, 0x192b082b1919192b, 0x192b082b2b2b0819, 0x192b190808080808,
|
1969
|
-
0x192b190808081919, 0x192b190808082b08, 0x192b190808190819, 0x192b190808191908,
|
1970
|
-
0x192b1908082b0808, 0x192b190819080819, 0x192b190819081908, 0x192b190819190808,
|
1971
|
-
0x192b19082b080808, 0x192b191908080819, 0x192b191908081908, 0x192b191908190808,
|
1972
|
-
0x192b191919080808, 0x192b191919082b2b, 0x192b1919192b2b08, 0x192b19192b19082b,
|
1973
|
-
0x192b192b08080808, 0x192b192b2b191908, 0x192b2b0808080819, 0x192b2b0808081908,
|
1974
|
-
0x192b2b0808190808, 0x192b2b08192b1919, 0x192b2b082b192b08, 0x192b2b1908080808,
|
1975
|
-
0x192b2b19082b2b2b, 0x192b2b2b1908082b, 0x192b2b2b2b2b0819, 0x2b08080808080808,
|
1976
|
-
0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
|
1977
|
-
0x2b08080808191908, 0x2b08080808192b19, 0x2b080808082b0808, 0x2b080808082b1919,
|
1978
|
-
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808081919082b,
|
1979
|
-
0x2b08080819191919, 0x2b08080819192b08, 0x2b080808192b0819, 0x2b0808082b080808,
|
1980
|
-
0x2b0808082b081919, 0x2b0808082b190819, 0x2b0808082b191908, 0x2b08081908080819,
|
1981
|
-
0x2b08081908081908, 0x2b08081908082b19, 0x2b08081908190808, 0x2b0808190819082b,
|
1982
|
-
0x2b08081908191919, 0x2b08081908192b08, 0x2b080819082b0819, 0x2b080819082b1908,
|
1983
|
-
0x2b08081919080808, 0x2b0808191908082b, 0x2b08081919081919, 0x2b08081919082b08,
|
1984
|
-
0x2b08081919190819, 0x2b08081919191908, 0x2b0808192b080819, 0x2b0808192b081908,
|
1985
|
-
0x2b0808192b190808, 0x2b0808192b2b2b19, 0x2b08082b08080808, 0x2b08082b08081919,
|
1986
|
-
0x2b08082b08082b2b, 0x2b08082b08190819, 0x2b08082b08191908, 0x2b08082b19080819,
|
1987
|
-
0x2b08082b19081908, 0x2b08082b19190808, 0x2b08190808080819, 0x2b08190808081908,
|
1988
|
-
0x2b0819080808192b, 0x2b08190808082b19, 0x2b08190808190808, 0x2b0819080819082b,
|
1989
|
-
0x2b08190808191919, 0x2b08190808192b08, 0x2b081908082b0819, 0x2b08190819080808,
|
1990
|
-
0x2b0819081908082b, 0x2b08190819081919, 0x2b08190819082b08, 0x2b08190819190819,
|
1991
|
-
0x2b08190819191908, 0x2b081908192b0808, 0x2b0819082b080819, 0x2b0819082b081908,
|
1992
|
-
0x2b0819082b190808, 0x2b08191908080808, 0x2b0819190808082b, 0x2b08191908081919,
|
1993
|
-
0x2b08191908082b08, 0x2b08191908190819, 0x2b08191908191908, 0x2b081919082b0808,
|
1994
|
-
0x2b08191919080819, 0x2b08191919081908, 0x2b08191919190808, 0x2b0819192b080808,
|
1995
|
-
0x2b0819192b082b2b, 0x2b08192b08080819, 0x2b08192b08081908, 0x2b08192b08190808,
|
1996
|
-
0x2b08192b082b2b19, 0x2b08192b19080808, 0x2b082b0808080808, 0x2b082b0808081919,
|
1997
|
-
0x2b082b0808190819, 0x2b082b0808191908, 0x2b082b0819080819, 0x2b082b0819081908,
|
1998
|
-
0x2b082b0819190808, 0x2b082b082b2b082b, 0x2b082b1908080819, 0x2b082b1908081908,
|
1999
|
-
0x2b082b1919080808, 0x2b082b19192b1919, 0x2b082b2b082b082b, 0x2b082b2b19192b08,
|
2000
|
-
0x2b082b2b19192b2b, 0x2b082b2b2b08082b, 0x2b082b2b2b2b082b, 0x2b19080808080819,
|
2001
|
-
0x2b19080808081908, 0x2b19080808082b19, 0x2b19080808190808, 0x2b1908080819082b,
|
2002
|
-
0x2b19080808191919, 0x2b19080808192b08, 0x2b190808082b1908, 0x2b19080819080808,
|
2003
|
-
0x2b1908081908082b, 0x2b19080819081919, 0x2b19080819082b08, 0x2b19080819190819,
|
2004
|
-
0x2b19080819191908, 0x2b190808192b0808, 0x2b1908082b080819, 0x2b1908082b081908,
|
2005
|
-
0x2b1908082b190808, 0x2b19081908080808, 0x2b19081908081919, 0x2b19081908190819,
|
2006
|
-
0x2b19081908191908, 0x2b19081919080819, 0x2b19081919081908, 0x2b19081919190808,
|
2007
|
-
0x2b19081919192b2b, 0x2b19082b08080819, 0x2b19082b08081908, 0x2b19082b08190808,
|
2008
|
-
0x2b19082b19080808, 0x2b19082b2b2b192b, 0x2b19190808080808, 0x2b1919080808082b,
|
2009
|
-
0x2b19190808081919, 0x2b19190808082b08, 0x2b19190808190819, 0x2b19190808191908,
|
2010
|
-
0x2b191908082b0808, 0x2b19190819080819, 0x2b19190819081908, 0x2b19190819190808,
|
2011
|
-
0x2b1919082b080808, 0x2b1919082b19192b, 0x2b19191908080819, 0x2b19191908081908,
|
2012
|
-
0x2b19191908190808, 0x2b19191919080808, 0x2b1919192b192b08, 0x2b1919192b2b0819,
|
2013
|
-
0x2b19192b08080808, 0x2b19192b1908192b, 0x2b19192b192b1908, 0x2b192b0808080819,
|
2014
|
-
0x2b192b0808081908, 0x2b192b0808190808, 0x2b192b08082b192b, 0x2b192b0819080808,
|
2015
|
-
0x2b192b082b2b2b19, 0x2b192b1908080808, 0x2b192b1919082b19, 0x2b192b191919082b,
|
2016
|
-
0x2b192b2b2b190808, 0x2b2b080808080808, 0x2b2b080808081919, 0x2b2b080808082b2b,
|
2017
|
-
0x2b2b080808191908, 0x2b2b0808082b082b, 0x2b2b0808082b2b2b, 0x2b2b080819080819,
|
2018
|
-
0x2b2b080819081908, 0x2b2b080819190808, 0x2b2b08082b2b082b, 0x2b2b08082b2b2b2b,
|
2019
|
-
0x2b2b081919080808, 0x2b2b0819192b1919, 0x2b2b082b0808082b, 0x2b2b082b08082b2b,
|
2020
|
-
0x2b2b082b082b082b, 0x2b2b082b082b2b08, 0x2b2b082b082b2b2b, 0x2b2b082b2b08082b,
|
2021
|
-
0x2b2b082b2b082b08, 0x2b2b082b2b082b2b, 0x2b2b082b2b2b2b08, 0x2b2b190808080819,
|
2022
|
-
0x2b2b190808081908, 0x2b2b190808190808, 0x2b2b190819080808, 0x2b2b19082b082b19,
|
2023
|
-
0x2b2b19082b2b1908, 0x2b2b191908080808, 0x2b2b191908192b19, 0x2b2b192b19190819,
|
2024
|
-
0x2b2b2b0808082b2b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b082b, 0x2b2b2b1919191908,
|
2025
|
-
0x2b2b2b192b08192b, 0x2b2b2b2b08082b08, 0x2b2b2b2b08082b2b, 0x2b2b2b2b082b0808,
|
2026
|
-
0x2b2b2b2b082b082b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b2b082b08, 0x2b2b2b2b2b2b2b2b,
|
2027
|
-
};
|
2028
|
-
|
2029
|
-
static const __device__ uint32_t iq3xxs_grid[256] = {
|
2030
|
-
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
|
2031
|
-
0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
|
2032
|
-
0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
2033
|
-
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
|
2034
|
-
0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
|
2035
|
-
0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
2036
|
-
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
|
2037
|
-
0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
|
2038
|
-
0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
2039
|
-
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
|
2040
|
-
0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
|
2041
|
-
0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
2042
|
-
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
|
2043
|
-
0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
|
2044
|
-
0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
2045
|
-
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
|
2046
|
-
0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
|
2047
|
-
0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
2048
|
-
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
|
2049
|
-
0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
|
2050
|
-
0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
2051
|
-
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
|
2052
|
-
0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
|
2053
|
-
0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
2054
|
-
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
|
2055
|
-
0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
|
2056
|
-
0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
2057
|
-
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
|
2058
|
-
0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
|
2059
|
-
0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
2060
|
-
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
|
2061
|
-
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
2062
|
-
};
|
2063
|
-
|
2064
|
-
static const __device__ uint32_t iq3s_grid[512] = {
|
2065
|
-
0x01010101, 0x01010103, 0x01010105, 0x0101010b, 0x0101010f, 0x01010301, 0x01010303, 0x01010305,
|
2066
|
-
0x01010309, 0x0101030d, 0x01010501, 0x01010503, 0x0101050b, 0x01010707, 0x01010901, 0x01010905,
|
2067
|
-
0x0101090b, 0x0101090f, 0x01010b03, 0x01010b07, 0x01010d01, 0x01010d05, 0x01010f03, 0x01010f09,
|
2068
|
-
0x01010f0f, 0x01030101, 0x01030103, 0x01030105, 0x01030109, 0x01030301, 0x01030303, 0x0103030b,
|
2069
|
-
0x01030501, 0x01030507, 0x0103050f, 0x01030703, 0x0103070b, 0x01030909, 0x01030d03, 0x01030d0b,
|
2070
|
-
0x01030f05, 0x01050101, 0x01050103, 0x0105010b, 0x0105010f, 0x01050301, 0x01050307, 0x0105030d,
|
2071
|
-
0x01050503, 0x0105050b, 0x01050701, 0x01050709, 0x01050905, 0x0105090b, 0x0105090f, 0x01050b03,
|
2072
|
-
0x01050b07, 0x01050f01, 0x01050f07, 0x01070107, 0x01070303, 0x0107030b, 0x01070501, 0x01070505,
|
2073
|
-
0x01070703, 0x01070707, 0x0107070d, 0x01070909, 0x01070b01, 0x01070b05, 0x01070d0f, 0x01070f03,
|
2074
|
-
0x01070f0b, 0x01090101, 0x01090307, 0x0109030f, 0x01090503, 0x01090509, 0x01090705, 0x01090901,
|
2075
|
-
0x01090907, 0x01090b03, 0x01090f01, 0x010b0105, 0x010b0109, 0x010b0501, 0x010b0505, 0x010b050d,
|
2076
|
-
0x010b0707, 0x010b0903, 0x010b090b, 0x010b090f, 0x010b0d0d, 0x010b0f07, 0x010d010d, 0x010d0303,
|
2077
|
-
0x010d0307, 0x010d0703, 0x010d0b05, 0x010d0f03, 0x010f0101, 0x010f0105, 0x010f0109, 0x010f0501,
|
2078
|
-
0x010f0505, 0x010f050d, 0x010f0707, 0x010f0b01, 0x010f0b09, 0x03010101, 0x03010103, 0x03010105,
|
2079
|
-
0x03010109, 0x03010301, 0x03010303, 0x03010307, 0x0301030b, 0x0301030f, 0x03010501, 0x03010505,
|
2080
|
-
0x03010703, 0x03010709, 0x0301070d, 0x03010b09, 0x03010b0d, 0x03010d03, 0x03010f05, 0x03030101,
|
2081
|
-
0x03030103, 0x03030107, 0x0303010d, 0x03030301, 0x03030309, 0x03030503, 0x03030701, 0x03030707,
|
2082
|
-
0x03030903, 0x03030b01, 0x03030b05, 0x03030f01, 0x03030f0d, 0x03050101, 0x03050305, 0x0305030b,
|
2083
|
-
0x0305030f, 0x03050501, 0x03050509, 0x03050705, 0x03050901, 0x03050907, 0x03050b0b, 0x03050d01,
|
2084
|
-
0x03050f05, 0x03070103, 0x03070109, 0x0307010f, 0x03070301, 0x03070307, 0x03070503, 0x0307050f,
|
2085
|
-
0x03070701, 0x03070709, 0x03070903, 0x03070d05, 0x03070f01, 0x03090107, 0x0309010b, 0x03090305,
|
2086
|
-
0x03090309, 0x03090703, 0x03090707, 0x03090905, 0x0309090d, 0x03090b01, 0x03090b09, 0x030b0103,
|
2087
|
-
0x030b0301, 0x030b0307, 0x030b0503, 0x030b0701, 0x030b0705, 0x030b0b03, 0x030d0501, 0x030d0509,
|
2088
|
-
0x030d050f, 0x030d0909, 0x030d090d, 0x030f0103, 0x030f0107, 0x030f0301, 0x030f0305, 0x030f0503,
|
2089
|
-
0x030f070b, 0x030f0903, 0x030f0d05, 0x030f0f01, 0x05010101, 0x05010103, 0x05010107, 0x0501010b,
|
2090
|
-
0x0501010f, 0x05010301, 0x05010305, 0x05010309, 0x0501030d, 0x05010503, 0x05010507, 0x0501050f,
|
2091
|
-
0x05010701, 0x05010705, 0x05010903, 0x05010907, 0x0501090b, 0x05010b01, 0x05010b05, 0x05010d0f,
|
2092
|
-
0x05010f01, 0x05010f07, 0x05010f0b, 0x05030101, 0x05030105, 0x05030301, 0x05030307, 0x0503030f,
|
2093
|
-
0x05030505, 0x0503050b, 0x05030703, 0x05030709, 0x05030905, 0x05030b03, 0x05050103, 0x05050109,
|
2094
|
-
0x0505010f, 0x05050503, 0x05050507, 0x05050701, 0x0505070f, 0x05050903, 0x05050b07, 0x05050b0f,
|
2095
|
-
0x05050f03, 0x05050f09, 0x05070101, 0x05070105, 0x0507010b, 0x05070303, 0x05070505, 0x05070509,
|
2096
|
-
0x05070703, 0x05070707, 0x05070905, 0x05070b01, 0x05070d0d, 0x05090103, 0x0509010f, 0x05090501,
|
2097
|
-
0x05090507, 0x05090705, 0x0509070b, 0x05090903, 0x05090f05, 0x05090f0b, 0x050b0109, 0x050b0303,
|
2098
|
-
0x050b0505, 0x050b070f, 0x050b0901, 0x050b0b07, 0x050b0f01, 0x050d0101, 0x050d0105, 0x050d010f,
|
2099
|
-
0x050d0503, 0x050d0b0b, 0x050d0d03, 0x050f010b, 0x050f0303, 0x050f050d, 0x050f0701, 0x050f0907,
|
2100
|
-
0x050f0b01, 0x07010105, 0x07010303, 0x07010307, 0x0701030b, 0x0701030f, 0x07010505, 0x07010703,
|
2101
|
-
0x07010707, 0x0701070b, 0x07010905, 0x07010909, 0x0701090f, 0x07010b03, 0x07010d07, 0x07010f03,
|
2102
|
-
0x07030103, 0x07030107, 0x0703010b, 0x07030309, 0x07030503, 0x07030507, 0x07030901, 0x07030d01,
|
2103
|
-
0x07030f05, 0x07030f0d, 0x07050101, 0x07050305, 0x07050501, 0x07050705, 0x07050709, 0x07050b01,
|
2104
|
-
0x07070103, 0x07070301, 0x07070309, 0x07070503, 0x07070507, 0x0707050f, 0x07070701, 0x07070903,
|
2105
|
-
0x07070907, 0x0707090f, 0x07070b0b, 0x07070f07, 0x07090107, 0x07090303, 0x0709030d, 0x07090505,
|
2106
|
-
0x07090703, 0x07090b05, 0x07090d01, 0x07090d09, 0x070b0103, 0x070b0301, 0x070b0305, 0x070b050b,
|
2107
|
-
0x070b0705, 0x070b0909, 0x070b0b0d, 0x070b0f07, 0x070d030d, 0x070d0903, 0x070f0103, 0x070f0107,
|
2108
|
-
0x070f0501, 0x070f0505, 0x070f070b, 0x09010101, 0x09010109, 0x09010305, 0x09010501, 0x09010509,
|
2109
|
-
0x0901050f, 0x09010705, 0x09010903, 0x09010b01, 0x09010f01, 0x09030105, 0x0903010f, 0x09030303,
|
2110
|
-
0x09030307, 0x09030505, 0x09030701, 0x0903070b, 0x09030907, 0x09030b03, 0x09030b0b, 0x09050103,
|
2111
|
-
0x09050107, 0x09050301, 0x0905030b, 0x09050503, 0x09050707, 0x09050901, 0x09050b0f, 0x09050d05,
|
2112
|
-
0x09050f01, 0x09070109, 0x09070303, 0x09070307, 0x09070501, 0x09070505, 0x09070703, 0x0907070b,
|
2113
|
-
0x09090101, 0x09090105, 0x09090509, 0x0909070f, 0x09090901, 0x09090f03, 0x090b010b, 0x090b010f,
|
2114
|
-
0x090b0503, 0x090b0d05, 0x090d0307, 0x090d0709, 0x090d0d01, 0x090f0301, 0x090f030b, 0x090f0701,
|
2115
|
-
0x090f0907, 0x090f0b03, 0x0b010105, 0x0b010301, 0x0b010309, 0x0b010505, 0x0b010901, 0x0b010909,
|
2116
|
-
0x0b01090f, 0x0b010b05, 0x0b010d0d, 0x0b010f09, 0x0b030103, 0x0b030107, 0x0b03010b, 0x0b030305,
|
2117
|
-
0x0b030503, 0x0b030705, 0x0b030f05, 0x0b050101, 0x0b050303, 0x0b050507, 0x0b050701, 0x0b05070d,
|
2118
|
-
0x0b050b07, 0x0b070105, 0x0b07010f, 0x0b070301, 0x0b07050f, 0x0b070909, 0x0b070b03, 0x0b070d0b,
|
2119
|
-
0x0b070f07, 0x0b090103, 0x0b090109, 0x0b090501, 0x0b090705, 0x0b09090d, 0x0b0b0305, 0x0b0b050d,
|
2120
|
-
0x0b0b0b03, 0x0b0b0b07, 0x0b0d0905, 0x0b0f0105, 0x0b0f0109, 0x0b0f0505, 0x0d010303, 0x0d010307,
|
2121
|
-
0x0d01030b, 0x0d010703, 0x0d010707, 0x0d010d01, 0x0d030101, 0x0d030501, 0x0d03050f, 0x0d030d09,
|
2122
|
-
0x0d050305, 0x0d050709, 0x0d050905, 0x0d050b0b, 0x0d050d05, 0x0d050f01, 0x0d070101, 0x0d070309,
|
2123
|
-
0x0d070503, 0x0d070901, 0x0d09050b, 0x0d090907, 0x0d090d05, 0x0d0b0101, 0x0d0b0107, 0x0d0b0709,
|
2124
|
-
0x0d0b0d01, 0x0d0d010b, 0x0d0d0901, 0x0d0f0303, 0x0d0f0307, 0x0f010101, 0x0f010109, 0x0f01010f,
|
2125
|
-
0x0f010501, 0x0f010505, 0x0f01070d, 0x0f010901, 0x0f010b09, 0x0f010d05, 0x0f030105, 0x0f030303,
|
2126
|
-
0x0f030509, 0x0f030907, 0x0f03090b, 0x0f050103, 0x0f050109, 0x0f050301, 0x0f05030d, 0x0f050503,
|
2127
|
-
0x0f050701, 0x0f050b03, 0x0f070105, 0x0f070705, 0x0f07070b, 0x0f070b07, 0x0f090103, 0x0f09010b,
|
2128
|
-
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
|
2129
|
-
};
|
2130
|
-
|
2131
|
-
static const __device__ uint64_t iq1s_grid[512] = {
|
2132
|
-
0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000,
|
2133
|
-
0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01,
|
2134
|
-
0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100,
|
2135
|
-
0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00,
|
2136
|
-
0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101,
|
2137
|
-
0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100,
|
2138
|
-
0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00,
|
2139
|
-
0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff,
|
2140
|
-
0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000,
|
2141
|
-
0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000,
|
2142
|
-
0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001,
|
2143
|
-
0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff,
|
2144
|
-
0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01,
|
2145
|
-
0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001,
|
2146
|
-
0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00,
|
2147
|
-
0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001,
|
2148
|
-
0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100,
|
2149
|
-
0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000,
|
2150
|
-
0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000,
|
2151
|
-
0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000,
|
2152
|
-
0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff,
|
2153
|
-
0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff,
|
2154
|
-
0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01,
|
2155
|
-
0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100,
|
2156
|
-
0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff,
|
2157
|
-
0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000,
|
2158
|
-
0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101,
|
2159
|
-
0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff,
|
2160
|
-
0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff,
|
2161
|
-
0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001,
|
2162
|
-
0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01,
|
2163
|
-
0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101,
|
2164
|
-
0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100,
|
2165
|
-
0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00,
|
2166
|
-
0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001,
|
2167
|
-
0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff,
|
2168
|
-
0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000,
|
2169
|
-
0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000,
|
2170
|
-
0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100,
|
2171
|
-
0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100,
|
2172
|
-
0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01,
|
2173
|
-
0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff,
|
2174
|
-
0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101,
|
2175
|
-
0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000,
|
2176
|
-
0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff,
|
2177
|
-
0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000,
|
2178
|
-
0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff,
|
2179
|
-
0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00,
|
2180
|
-
0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101,
|
2181
|
-
0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000,
|
2182
|
-
0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000,
|
2183
|
-
0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000,
|
2184
|
-
0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100,
|
2185
|
-
0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000,
|
2186
|
-
0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001,
|
2187
|
-
0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff,
|
2188
|
-
0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000,
|
2189
|
-
0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000,
|
2190
|
-
0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000,
|
2191
|
-
0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000,
|
2192
|
-
0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff,
|
2193
|
-
0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000,
|
2194
|
-
0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001,
|
2195
|
-
0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01,
|
2196
|
-
0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100,
|
2197
|
-
0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000,
|
2198
|
-
0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00,
|
2199
|
-
0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100,
|
2200
|
-
0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000,
|
2201
|
-
0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001,
|
2202
|
-
0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00,
|
2203
|
-
0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff,
|
2204
|
-
0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100,
|
2205
|
-
0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff,
|
2206
|
-
0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000,
|
2207
|
-
0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff,
|
2208
|
-
0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff,
|
2209
|
-
0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00,
|
2210
|
-
0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001,
|
2211
|
-
0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001,
|
2212
|
-
0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01,
|
2213
|
-
0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000,
|
2214
|
-
0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101,
|
2215
|
-
0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00,
|
2216
|
-
0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100,
|
2217
|
-
0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101,
|
2218
|
-
0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101,
|
2219
|
-
0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000,
|
2220
|
-
0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff,
|
2221
|
-
0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff,
|
2222
|
-
0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101,
|
2223
|
-
0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff,
|
2224
|
-
0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101,
|
2225
|
-
0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001,
|
2226
|
-
0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff,
|
2227
|
-
0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff,
|
2228
|
-
0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01,
|
2229
|
-
0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff,
|
2230
|
-
0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100,
|
2231
|
-
0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001,
|
2232
|
-
0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00,
|
2233
|
-
0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff,
|
2234
|
-
0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff,
|
2235
|
-
0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000,
|
2236
|
-
0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000,
|
2237
|
-
0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101,
|
2238
|
-
0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001,
|
2239
|
-
0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000,
|
2240
|
-
0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101,
|
2241
|
-
0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000,
|
2242
|
-
0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001,
|
2243
|
-
0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000,
|
2244
|
-
0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100,
|
2245
|
-
0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000,
|
2246
|
-
0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000,
|
2247
|
-
0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100,
|
2248
|
-
0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff,
|
2249
|
-
0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff,
|
2250
|
-
0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00,
|
2251
|
-
0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101,
|
2252
|
-
0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000,
|
2253
|
-
0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00,
|
2254
|
-
0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000,
|
2255
|
-
0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff,
|
2256
|
-
0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101,
|
2257
|
-
0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff,
|
2258
|
-
0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00,
|
2259
|
-
0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff,
|
2260
|
-
};
|
2261
|
-
|
2262
|
-
static const __device__ uint8_t ksigns_iq2xs[128] = {
|
2263
|
-
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
|
2264
|
-
144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
|
2265
|
-
160, 33, 34, 163, 36, 165, 166, 39, 40, 169, 170, 43, 172, 45, 46, 175,
|
2266
|
-
48, 177, 178, 51, 180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63,
|
2267
|
-
192, 65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77, 78, 207,
|
2268
|
-
80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90, 219, 92, 221, 222, 95,
|
2269
|
-
96, 225, 226, 99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
|
2270
|
-
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
2271
|
-
};
|
2272
|
-
|
2273
|
-
//#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
|
2274
|
-
static const __device__ uint64_t ksigns64[128] = {
|
2275
|
-
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
|
2276
|
-
0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
|
2277
|
-
0xff000000ff000000, 0x00000000ff0000ff, 0x00000000ff00ff00, 0xff000000ff00ffff,
|
2278
|
-
0x00000000ffff0000, 0xff000000ffff00ff, 0xff000000ffffff00, 0x00000000ffffffff,
|
2279
|
-
0xff0000ff00000000, 0x000000ff000000ff, 0x000000ff0000ff00, 0xff0000ff0000ffff,
|
2280
|
-
0x000000ff00ff0000, 0xff0000ff00ff00ff, 0xff0000ff00ffff00, 0x000000ff00ffffff,
|
2281
|
-
0x000000ffff000000, 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0x000000ffff00ffff,
|
2282
|
-
0xff0000ffffff0000, 0x000000ffffff00ff, 0x000000ffffffff00, 0xff0000ffffffffff,
|
2283
|
-
0xff00ff0000000000, 0x0000ff00000000ff, 0x0000ff000000ff00, 0xff00ff000000ffff,
|
2284
|
-
0x0000ff0000ff0000, 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0x0000ff0000ffffff,
|
2285
|
-
0x0000ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00, 0x0000ff00ff00ffff,
|
2286
|
-
0xff00ff00ffff0000, 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0xff00ff00ffffffff,
|
2287
|
-
0x0000ffff00000000, 0xff00ffff000000ff, 0xff00ffff0000ff00, 0x0000ffff0000ffff,
|
2288
|
-
0xff00ffff00ff0000, 0x0000ffff00ff00ff, 0x0000ffff00ffff00, 0xff00ffff00ffffff,
|
2289
|
-
0xff00ffffff000000, 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0xff00ffffff00ffff,
|
2290
|
-
0x0000ffffffff0000, 0xff00ffffffff00ff, 0xff00ffffffffff00, 0x0000ffffffffffff,
|
2291
|
-
0xffff000000000000, 0x00ff0000000000ff, 0x00ff00000000ff00, 0xffff00000000ffff,
|
2292
|
-
0x00ff000000ff0000, 0xffff000000ff00ff, 0xffff000000ffff00, 0x00ff000000ffffff,
|
2293
|
-
0x00ff0000ff000000, 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0x00ff0000ff00ffff,
|
2294
|
-
0xffff0000ffff0000, 0x00ff0000ffff00ff, 0x00ff0000ffffff00, 0xffff0000ffffffff,
|
2295
|
-
0x00ff00ff00000000, 0xffff00ff000000ff, 0xffff00ff0000ff00, 0x00ff00ff0000ffff,
|
2296
|
-
0xffff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00, 0xffff00ff00ffffff,
|
2297
|
-
0xffff00ffff000000, 0x00ff00ffff0000ff, 0x00ff00ffff00ff00, 0xffff00ffff00ffff,
|
2298
|
-
0x00ff00ffffff0000, 0xffff00ffffff00ff, 0xffff00ffffffff00, 0x00ff00ffffffffff,
|
2299
|
-
0x00ffff0000000000, 0xffffff00000000ff, 0xffffff000000ff00, 0x00ffff000000ffff,
|
2300
|
-
0xffffff0000ff0000, 0x00ffff0000ff00ff, 0x00ffff0000ffff00, 0xffffff0000ffffff,
|
2301
|
-
0xffffff00ff000000, 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0xffffff00ff00ffff,
|
2302
|
-
0x00ffff00ffff0000, 0xffffff00ffff00ff, 0xffffff00ffffff00, 0x00ffff00ffffffff,
|
2303
|
-
0xffffffff00000000, 0x00ffffff000000ff, 0x00ffffff0000ff00, 0xffffffff0000ffff,
|
2304
|
-
0x00ffffff00ff0000, 0xffffffff00ff00ff, 0xffffffff00ffff00, 0x00ffffff00ffffff,
|
2305
|
-
0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
|
2306
|
-
0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
|
2307
|
-
};
|
2308
|
-
//#endif
|
2309
|
-
|
2310
|
-
static const __device__ uint8_t kmask_iq2xs[8] = {1, 2, 4, 8, 16, 32, 64, 128};
|
2311
|
-
|
2312
1356
|
inline bool ggml_cuda_supports_mmq(enum ggml_type type) {
|
2313
1357
|
switch (type) {
|
2314
1358
|
case GGML_TYPE_Q4_0:
|
@@ -2459,11 +1503,15 @@ static __global__ void dequantize_block_iq1_s(const void * __restrict__ vx, dst_
|
|
2459
1503
|
const int il = tid/8; // 0...3
|
2460
1504
|
const int ib = tid%8; // 0...7
|
2461
1505
|
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
2462
|
-
const
|
2463
|
-
|
2464
|
-
const int8_t *
|
2465
|
-
|
2466
|
-
|
1506
|
+
const float delta = x[i].qh[ib] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA;
|
1507
|
+
const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 7) + 1);
|
1508
|
+
uint32_t grid32[2]; const int8_t * q = (const int8_t *)grid32;
|
1509
|
+
grid32[0] = iq1s_grid_gpu[x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)];
|
1510
|
+
grid32[1] = (grid32[0] >> 4) & 0x0f0f0f0f;
|
1511
|
+
grid32[0] &= 0x0f0f0f0f;
|
1512
|
+
for (int j = 0; j < 8; ++j) {
|
1513
|
+
y[j] = d * (q[j] + delta);
|
1514
|
+
}
|
2467
1515
|
#else
|
2468
1516
|
assert(false);
|
2469
1517
|
#endif
|
@@ -4303,7 +3351,7 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
|
|
4303
3351
|
#pragma unroll
|
4304
3352
|
for (int i = 0; i < QR2_K; ++ i) {
|
4305
3353
|
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1);
|
4306
|
-
d8[i] =
|
3354
|
+
d8[i] = __low2float(bq8_1[bq8_offset + i].ds);
|
4307
3355
|
}
|
4308
3356
|
|
4309
3357
|
return vec_dot_q2_K_q8_1_impl_mmvq(v, u, scales, bq2_K->dm, d8);
|
@@ -4425,7 +3473,7 @@ static __device__ __forceinline__ float vec_dot_q3_K_q8_1(
|
|
4425
3473
|
#pragma unroll
|
4426
3474
|
for (int i = 0; i < QR3_K; ++i) {
|
4427
3475
|
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1);
|
4428
|
-
d8[i] =
|
3476
|
+
d8[i] = __low2float(bq8_1[bq8_offset + i].ds);
|
4429
3477
|
}
|
4430
3478
|
|
4431
3479
|
return vec_dot_q3_K_q8_1_impl_mmvq(vl, vh, u, bq3_K->scales, scale_offset, d, d8);
|
@@ -4594,7 +3642,7 @@ static __device__ __forceinline__ float vec_dot_q4_K_q8_1(
|
|
4594
3642
|
|
4595
3643
|
for (int i = 0; i < QR4_K; ++i) {
|
4596
3644
|
const block_q8_1 * bq8i = bq8_1 + bq8_offset + i;
|
4597
|
-
d8[i] =
|
3645
|
+
d8[i] = __low2float(bq8i->ds);
|
4598
3646
|
|
4599
3647
|
const int * q8 = (const int *)bq8i->qs + ((iqs/2)%4);
|
4600
3648
|
u[2*i+0] = q8[0];
|
@@ -4959,7 +4007,7 @@ static __device__ __forceinline__ float vec_dot_q6_K_q8_1(
|
|
4959
4007
|
#pragma unroll
|
4960
4008
|
for (int i = 0; i < QR6_K; ++i) {
|
4961
4009
|
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + 2*i].qs, iqs % QI8_1);
|
4962
|
-
d8[i] =
|
4010
|
+
d8[i] = __low2float(bq8_1[bq8_offset + 2*i].ds);
|
4963
4011
|
}
|
4964
4012
|
|
4965
4013
|
return vec_dot_q6_K_q8_1_impl_mmvq(vl, vh, u, scales, bq6_K->d, d8);
|
@@ -5275,44 +4323,36 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1(
|
|
5275
4323
|
#endif
|
5276
4324
|
}
|
5277
4325
|
|
5278
|
-
|
5279
4326
|
static __device__ __forceinline__ float vec_dot_iq1_s_q8_1(
|
5280
4327
|
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) {
|
5281
4328
|
#if QK_K == 256
|
5282
4329
|
const block_iq1_s * bq1 = (const block_iq1_s *) vbq;
|
5283
4330
|
|
5284
4331
|
const int ib32 = iqs;
|
5285
|
-
int
|
5286
|
-
const uint8_t h1 = bq1->scales[2*ib32+0];
|
5287
|
-
const uint8_t h2 = bq1->scales[2*ib32+1];
|
4332
|
+
int sumi = 0;
|
5288
4333
|
#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
|
5289
4334
|
const int * q8 = (const int *)bq8_1[ib32].qs;
|
5290
|
-
|
5291
|
-
|
5292
|
-
|
5293
|
-
|
5294
|
-
|
5295
|
-
sumi1 = __dp4a(q8[j+0], grid1[j], sumi1);
|
5296
|
-
sumi2 = __dp4a(q8[j+2], grid2[j], sumi2);
|
5297
|
-
sumi3 = __dp4a(q8[j+4], grid3[j], sumi3);
|
5298
|
-
sumi4 = __dp4a(q8[j+6], grid4[j], sumi4);
|
4335
|
+
for (int l = 0; l < 4; ++l) {
|
4336
|
+
const int * grid = (const int *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8)));
|
4337
|
+
int grid0 = grid[0] & 0x0f0f0f0f;
|
4338
|
+
int grid1 = (grid[0] >> 4) & 0x0f0f0f0f;
|
4339
|
+
sumi = __dp4a(q8[2*l+1], grid1, __dp4a(q8[2*l+0], grid0, sumi));
|
5299
4340
|
}
|
5300
4341
|
#else
|
5301
|
-
const int8_t
|
5302
|
-
|
5303
|
-
|
5304
|
-
|
5305
|
-
|
5306
|
-
|
5307
|
-
|
5308
|
-
sumi2 += q8[j+ 8] * grid2[j];
|
5309
|
-
sumi3 += q8[j+16] * grid3[j];
|
5310
|
-
sumi4 += q8[j+24] * grid4[j];
|
4342
|
+
const int8_t * q8 = bq8_1[ib32].qs;
|
4343
|
+
for (int l = 0; l < 4; ++l) {
|
4344
|
+
const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8)));
|
4345
|
+
for (int j = 0; j < 4; ++j) {
|
4346
|
+
sumi += q8[j] * (grid[j] & 0xf) + q8[j+4] * (grid[j] >> 4);
|
4347
|
+
}
|
4348
|
+
q8 += 8;
|
5311
4349
|
}
|
5312
4350
|
#endif
|
5313
|
-
const float
|
5314
|
-
|
5315
|
-
|
4351
|
+
const float delta = bq1->qh[ib32] & 0x8000 ? -1-IQ1S_DELTA : -1+IQ1S_DELTA;
|
4352
|
+
const float d1q = (float)bq1->d * (2*((bq1->qh[ib32] >> 12) & 7) + 1);
|
4353
|
+
const float d = d1q * __low2float (bq8_1[ib32].ds);
|
4354
|
+
const float m = d1q * __high2float(bq8_1[ib32].ds);
|
4355
|
+
return d * sumi + m * delta;
|
5316
4356
|
#else
|
5317
4357
|
assert(false);
|
5318
4358
|
return 0.f;
|
@@ -5504,7 +4544,7 @@ static __device__ __forceinline__ void mul_mat_q(
|
|
5504
4544
|
*dsi_dst = *dsi_src;
|
5505
4545
|
} else {
|
5506
4546
|
float * dfi_dst = (float *) dsi_dst;
|
5507
|
-
*dfi_dst =
|
4547
|
+
*dfi_dst = __low2float(*dsi_src);
|
5508
4548
|
}
|
5509
4549
|
}
|
5510
4550
|
|
@@ -11604,8 +10644,20 @@ GGML_CALL void ggml_cuda_get_device_description(int device, char * description,
|
|
11604
10644
|
#define UNUSED GGML_UNUSED
|
11605
10645
|
|
11606
10646
|
struct ggml_backend_cuda_context {
|
10647
|
+
explicit ggml_backend_cuda_context(int device) :
|
10648
|
+
device(device),
|
10649
|
+
name(GGML_CUDA_NAME + std::to_string(device)) {
|
10650
|
+
}
|
10651
|
+
|
10652
|
+
~ggml_backend_cuda_context() {
|
10653
|
+
if (copy_event != nullptr) {
|
10654
|
+
CUDA_CHECK(cudaEventDestroy(copy_event));
|
10655
|
+
}
|
10656
|
+
}
|
10657
|
+
|
11607
10658
|
int device;
|
11608
10659
|
std::string name;
|
10660
|
+
cudaEvent_t copy_event = nullptr;
|
11609
10661
|
};
|
11610
10662
|
|
11611
10663
|
// cuda buffer
|
@@ -11695,9 +10747,8 @@ GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t
|
|
11695
10747
|
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
11696
10748
|
|
11697
10749
|
ggml_cuda_set_device(ctx->device);
|
11698
|
-
CUDA_CHECK(
|
11699
|
-
CUDA_CHECK(
|
11700
|
-
CUDA_CHECK(cudaDeviceSynchronize());
|
10750
|
+
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, cudaStreamPerThread));
|
10751
|
+
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
11701
10752
|
}
|
11702
10753
|
|
11703
10754
|
GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
@@ -11706,26 +10757,25 @@ GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t
|
|
11706
10757
|
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
11707
10758
|
|
11708
10759
|
ggml_cuda_set_device(ctx->device);
|
11709
|
-
CUDA_CHECK(
|
11710
|
-
CUDA_CHECK(
|
11711
|
-
CUDA_CHECK(cudaDeviceSynchronize());
|
10760
|
+
CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, cudaStreamPerThread));
|
10761
|
+
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
11712
10762
|
}
|
11713
10763
|
|
11714
10764
|
GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
|
11715
10765
|
if (ggml_backend_buffer_is_cuda(src->buffer)) {
|
11716
10766
|
ggml_backend_cuda_buffer_context * src_ctx = (ggml_backend_cuda_buffer_context *)src->buffer->context;
|
11717
|
-
ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
11718
|
-
|
11719
|
-
|
11720
|
-
|
11721
|
-
|
11722
|
-
|
11723
|
-
CUDA_CHECK(
|
11724
|
-
CUDA_CHECK(cudaDeviceSynchronize());
|
11725
|
-
|
10767
|
+
ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *)dst->buffer->context;
|
10768
|
+
if (src_ctx->device == dst_ctx->device) {
|
10769
|
+
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(src), cudaMemcpyDeviceToDevice, cudaStreamPerThread));
|
10770
|
+
} else {
|
10771
|
+
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread));
|
10772
|
+
}
|
10773
|
+
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
11726
10774
|
return true;
|
11727
10775
|
}
|
11728
10776
|
return false;
|
10777
|
+
|
10778
|
+
UNUSED(buffer);
|
11729
10779
|
}
|
11730
10780
|
|
11731
10781
|
GGML_CALL static void ggml_backend_cuda_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
@@ -11970,7 +11020,11 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buf
|
|
11970
11020
|
}
|
11971
11021
|
|
11972
11022
|
const char * buf_host = (const char *)data + offset_split;
|
11973
|
-
CUDA_CHECK(
|
11023
|
+
CUDA_CHECK(cudaMemcpyAsync(extra->data_device[id], buf_host, original_size, cudaMemcpyHostToDevice, cudaStreamPerThread));
|
11024
|
+
}
|
11025
|
+
|
11026
|
+
for (int id = 0; id < g_device_count; ++id) {
|
11027
|
+
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
11974
11028
|
}
|
11975
11029
|
}
|
11976
11030
|
|
@@ -12004,7 +11058,11 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_get_tensor(ggml_backend_buf
|
|
12004
11058
|
}
|
12005
11059
|
|
12006
11060
|
char * buf_host = (char *)data + offset_split;
|
12007
|
-
CUDA_CHECK(
|
11061
|
+
CUDA_CHECK(cudaMemcpyAsync(buf_host, extra->data_device[id], original_size, cudaMemcpyDeviceToHost, cudaStreamPerThread));
|
11062
|
+
}
|
11063
|
+
|
11064
|
+
for (int id = 0; id < g_device_count; ++id) {
|
11065
|
+
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
12008
11066
|
}
|
12009
11067
|
}
|
12010
11068
|
|
@@ -12183,6 +11241,10 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() {
|
|
12183
11241
|
return &ggml_backend_cuda_buffer_type_host;
|
12184
11242
|
}
|
12185
11243
|
|
11244
|
+
//static bool ggml_backend_buffer_is_cuda_host(ggml_backend_buffer_t buffer) {
|
11245
|
+
// return buffer->buft->iface.get_name == ggml_backend_cuda_host_buffer_type_name;
|
11246
|
+
//}
|
11247
|
+
|
12186
11248
|
// backend
|
12187
11249
|
|
12188
11250
|
GGML_CALL static const char * ggml_backend_cuda_name(ggml_backend_t backend) {
|
@@ -12206,8 +11268,9 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_cuda_get_default_buffer
|
|
12206
11268
|
|
12207
11269
|
GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
12208
11270
|
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
11271
|
+
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
12209
11272
|
|
12210
|
-
GGML_ASSERT(
|
11273
|
+
GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
12211
11274
|
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
12212
11275
|
|
12213
11276
|
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx->device][0]));
|
@@ -12215,22 +11278,61 @@ GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend,
|
|
12215
11278
|
|
12216
11279
|
GGML_CALL static void ggml_backend_cuda_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
12217
11280
|
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
11281
|
+
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
12218
11282
|
|
12219
|
-
GGML_ASSERT(
|
11283
|
+
GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
12220
11284
|
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
12221
11285
|
|
12222
11286
|
CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx->device][0]));
|
12223
11287
|
}
|
12224
11288
|
|
12225
|
-
GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t
|
12226
|
-
|
11289
|
+
GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, ggml_tensor * dst) {
|
11290
|
+
GGML_ASSERT(ggml_backend_is_cuda(backend_src) || ggml_backend_is_cuda(backend_dst));
|
12227
11291
|
|
12228
|
-
|
12229
|
-
|
12230
|
-
|
11292
|
+
ggml_backend_buffer_t buf_src = src->view_src ? src->view_src->buffer : src->buffer;
|
11293
|
+
ggml_backend_buffer_t buf_dst = dst->view_src ? dst->view_src->buffer : dst->buffer;
|
11294
|
+
|
11295
|
+
if (!ggml_backend_buffer_is_cuda(src->buffer)) {
|
11296
|
+
return false;
|
12231
11297
|
}
|
12232
11298
|
|
12233
|
-
|
11299
|
+
if (!ggml_backend_buffer_is_cuda(dst->buffer)) {
|
11300
|
+
return false;
|
11301
|
+
}
|
11302
|
+
|
11303
|
+
// device -> device
|
11304
|
+
ggml_backend_cuda_context * cuda_ctx_src = (ggml_backend_cuda_context *)backend_src->context;
|
11305
|
+
ggml_backend_cuda_context * cuda_ctx_dst = (ggml_backend_cuda_context *)backend_dst->context;
|
11306
|
+
|
11307
|
+
if (backend_src != backend_dst) {
|
11308
|
+
ggml_backend_cuda_buffer_context * buf_ctx_src = (ggml_backend_cuda_buffer_context *)buf_src->context;
|
11309
|
+
ggml_backend_cuda_buffer_context * buf_ctx_dst = (ggml_backend_cuda_buffer_context *)buf_dst->context;
|
11310
|
+
|
11311
|
+
GGML_ASSERT(cuda_ctx_src->device == buf_ctx_src->device);
|
11312
|
+
GGML_ASSERT(cuda_ctx_dst->device == buf_ctx_dst->device);
|
11313
|
+
|
11314
|
+
if (!cuda_ctx_src->copy_event) {
|
11315
|
+
ggml_cuda_set_device(cuda_ctx_src->device);
|
11316
|
+
CUDA_CHECK(cudaEventCreateWithFlags(&cuda_ctx_src->copy_event, cudaEventDisableTiming));
|
11317
|
+
}
|
11318
|
+
|
11319
|
+
// copy on src stream
|
11320
|
+
if (cuda_ctx_src->device == cuda_ctx_dst->device) {
|
11321
|
+
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx_dst->device][0]));
|
11322
|
+
} else {
|
11323
|
+
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), g_cudaStreams[cuda_ctx_src->device][0]));
|
11324
|
+
}
|
11325
|
+
|
11326
|
+
// record event on src stream
|
11327
|
+
CUDA_CHECK(cudaEventRecord(cuda_ctx_src->copy_event, g_cudaStreams[cuda_ctx_src->device][0]));
|
11328
|
+
|
11329
|
+
// wait on dst stream for the copy to complete
|
11330
|
+
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[cuda_ctx_dst->device][0], cuda_ctx_src->copy_event, 0));
|
11331
|
+
} else {
|
11332
|
+
// src and dst are on the same backend
|
11333
|
+
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx_dst->device][0]));
|
11334
|
+
}
|
11335
|
+
return true;
|
12234
11336
|
}
|
12235
11337
|
|
12236
11338
|
GGML_CALL static void ggml_backend_cuda_synchronize(ggml_backend_t backend) {
|
@@ -12407,6 +11509,52 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
|
|
12407
11509
|
UNUSED(backend);
|
12408
11510
|
}
|
12409
11511
|
|
11512
|
+
static ggml_backend_event_t ggml_backend_cuda_event_new(ggml_backend_t backend) {
|
11513
|
+
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
11514
|
+
|
11515
|
+
ggml_cuda_set_device(cuda_ctx->device);
|
11516
|
+
|
11517
|
+
cudaEvent_t event;
|
11518
|
+
CUDA_CHECK(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
|
11519
|
+
|
11520
|
+
return new ggml_backend_event {
|
11521
|
+
/* .backend = */ backend,
|
11522
|
+
/* .context = */ event,
|
11523
|
+
};
|
11524
|
+
}
|
11525
|
+
|
11526
|
+
static void ggml_backend_cuda_event_free(ggml_backend_event_t event) {
|
11527
|
+
CUDA_CHECK(cudaEventDestroy((cudaEvent_t)event->context));
|
11528
|
+
|
11529
|
+
delete event;
|
11530
|
+
}
|
11531
|
+
|
11532
|
+
static void ggml_backend_cuda_event_record(ggml_backend_event_t event) {
|
11533
|
+
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)event->backend->context;
|
11534
|
+
|
11535
|
+
CUDA_CHECK(cudaEventRecord((cudaEvent_t)event->context, g_cudaStreams[cuda_ctx->device][0]));
|
11536
|
+
}
|
11537
|
+
|
11538
|
+
static void ggml_backend_cuda_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
|
11539
|
+
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
11540
|
+
|
11541
|
+
if (ggml_backend_is_cuda(event->backend)) {
|
11542
|
+
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[cuda_ctx->device][0], (cudaEvent_t)event->context, 0));
|
11543
|
+
} else {
|
11544
|
+
// untested
|
11545
|
+
auto wait_fn = [](void * user_data) {
|
11546
|
+
ggml_backend_event_t event = (ggml_backend_event_t)user_data;
|
11547
|
+
ggml_backend_event_synchronize(event);
|
11548
|
+
};
|
11549
|
+
|
11550
|
+
CUDA_CHECK(cudaLaunchHostFunc(g_cudaStreams[cuda_ctx->device][0], wait_fn, event));
|
11551
|
+
}
|
11552
|
+
}
|
11553
|
+
|
11554
|
+
static void ggml_backend_cuda_event_synchronize(ggml_backend_event_t event) {
|
11555
|
+
CUDA_CHECK(cudaEventSynchronize((cudaEvent_t)event->context));
|
11556
|
+
}
|
11557
|
+
|
12410
11558
|
static ggml_backend_i ggml_backend_cuda_interface = {
|
12411
11559
|
/* .get_name = */ ggml_backend_cuda_name,
|
12412
11560
|
/* .free = */ ggml_backend_cuda_free,
|
@@ -12420,6 +11568,11 @@ static ggml_backend_i ggml_backend_cuda_interface = {
|
|
12420
11568
|
/* .graph_plan_compute = */ NULL,
|
12421
11569
|
/* .graph_compute = */ ggml_backend_cuda_graph_compute,
|
12422
11570
|
/* .supports_op = */ ggml_backend_cuda_supports_op,
|
11571
|
+
/* .event_new = */ ggml_backend_cuda_event_new,
|
11572
|
+
/* .event_free = */ ggml_backend_cuda_event_free,
|
11573
|
+
/* .event_record = */ ggml_backend_cuda_event_record,
|
11574
|
+
/* .event_wait = */ ggml_backend_cuda_event_wait,
|
11575
|
+
/* .event_synchronize = */ ggml_backend_cuda_event_synchronize,
|
12423
11576
|
};
|
12424
11577
|
|
12425
11578
|
static ggml_guid_t ggml_backend_cuda_guid() {
|
@@ -12438,10 +11591,11 @@ GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device) {
|
|
12438
11591
|
// not strictly necessary, but it may reduce the overhead of the first graph_compute
|
12439
11592
|
ggml_cuda_set_main_device(device);
|
12440
11593
|
|
12441
|
-
ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context
|
12442
|
-
|
12443
|
-
|
12444
|
-
|
11594
|
+
ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context(device);
|
11595
|
+
if (ctx == nullptr) {
|
11596
|
+
fprintf(stderr, "%s: error: failed to allocate context\n", __func__);
|
11597
|
+
return nullptr;
|
11598
|
+
}
|
12445
11599
|
|
12446
11600
|
ggml_backend_t cuda_backend = new ggml_backend {
|
12447
11601
|
/* .guid = */ ggml_backend_cuda_guid(),
|