llama_cpp 0.14.0 → 0.14.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/ext/llama_cpp/extconf.rb +3 -1
- data/ext/llama_cpp/llama_cpp.cpp +71 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +9 -0
- data/vendor/tmp/llama.cpp/Makefile +28 -12
- data/vendor/tmp/llama.cpp/ggml-alloc.c +45 -64
- data/vendor/tmp/llama.cpp/ggml-alloc.h +13 -5
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +358 -135
- data/vendor/tmp/llama.cpp/ggml-backend.h +41 -17
- data/vendor/tmp/llama.cpp/ggml-common.h +1830 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +187 -1033
- data/vendor/tmp/llama.cpp/ggml-impl.h +6 -2
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +42 -20
- data/vendor/tmp/llama.cpp/ggml-metal.metal +44 -910
- data/vendor/tmp/llama.cpp/ggml-quants.c +457 -1074
- data/vendor/tmp/llama.cpp/ggml-quants.h +27 -259
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +388 -565
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +6 -39
- data/vendor/tmp/llama.cpp/ggml.c +509 -343
- data/vendor/tmp/llama.cpp/ggml.h +61 -47
- data/vendor/tmp/llama.cpp/llama.cpp +1446 -687
- data/vendor/tmp/llama.cpp/llama.h +25 -11
- data/vendor/tmp/llama.cpp/unicode.cpp +1672 -0
- data/vendor/tmp/llama.cpp/unicode.h +16 -774
- metadata +4 -2
@@ -2,6 +2,15 @@
|
|
2
2
|
#include "ggml.h"
|
3
3
|
#include "ggml-backend-impl.h"
|
4
4
|
|
5
|
+
#if defined(GGML_USE_HIPBLAS)
|
6
|
+
#define GGML_COMMON_DECL_HIP
|
7
|
+
#define GGML_COMMON_IMPL_HIP
|
8
|
+
#else
|
9
|
+
#define GGML_COMMON_DECL_CUDA
|
10
|
+
#define GGML_COMMON_IMPL_CUDA
|
11
|
+
#endif
|
12
|
+
#include "ggml-common.h"
|
13
|
+
|
5
14
|
#include <algorithm>
|
6
15
|
#include <assert.h>
|
7
16
|
#include <atomic>
|
@@ -63,6 +72,7 @@
|
|
63
72
|
#define cudaEventCreateWithFlags hipEventCreateWithFlags
|
64
73
|
#define cudaEventDisableTiming hipEventDisableTiming
|
65
74
|
#define cudaEventRecord hipEventRecord
|
75
|
+
#define cudaEventSynchronize hipEventSynchronize
|
66
76
|
#define cudaEvent_t hipEvent_t
|
67
77
|
#define cudaEventDestroy hipEventDestroy
|
68
78
|
#define cudaFree hipFree
|
@@ -72,6 +82,7 @@
|
|
72
82
|
#define cudaGetDeviceProperties hipGetDeviceProperties
|
73
83
|
#define cudaGetErrorString hipGetErrorString
|
74
84
|
#define cudaGetLastError hipGetLastError
|
85
|
+
#define cudaLaunchHostFunc hipLaunchHostFunc
|
75
86
|
#ifdef GGML_HIP_UMA
|
76
87
|
#define cudaMalloc hipMallocManaged
|
77
88
|
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size)
|
@@ -95,6 +106,7 @@
|
|
95
106
|
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
|
96
107
|
#define cudaStreamFireAndForget hipStreamFireAndForget
|
97
108
|
#define cudaStreamNonBlocking hipStreamNonBlocking
|
109
|
+
#define cudaStreamPerThread hipStreamPerThread
|
98
110
|
#define cudaStreamSynchronize hipStreamSynchronize
|
99
111
|
#define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags)
|
100
112
|
#define cudaStream_t hipStream_t
|
@@ -356,66 +368,6 @@ typedef void (*ggml_cuda_op_flatten_t)(
|
|
356
368
|
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
|
357
369
|
const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream);
|
358
370
|
|
359
|
-
// QK = number of values after dequantization
|
360
|
-
// QR = QK / number of values before dequantization
|
361
|
-
// QI = number of 32 bit integers before dequantization
|
362
|
-
|
363
|
-
#define QK4_0 32
|
364
|
-
#define QR4_0 2
|
365
|
-
#define QI4_0 (QK4_0 / (4 * QR4_0))
|
366
|
-
typedef struct {
|
367
|
-
half d; // delta
|
368
|
-
uint8_t qs[QK4_0 / 2]; // nibbles / quants
|
369
|
-
} block_q4_0;
|
370
|
-
static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
|
371
|
-
|
372
|
-
#define QK4_1 32
|
373
|
-
#define QR4_1 2
|
374
|
-
#define QI4_1 (QK4_1 / (4 * QR4_1))
|
375
|
-
typedef struct {
|
376
|
-
half2 dm; // dm.x = delta, dm.y = min
|
377
|
-
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
378
|
-
} block_q4_1;
|
379
|
-
static_assert(sizeof(block_q4_1) == sizeof(ggml_fp16_t) * 2 + QK4_1 / 2, "wrong q4_1 block size/padding");
|
380
|
-
|
381
|
-
#define QK5_0 32
|
382
|
-
#define QR5_0 2
|
383
|
-
#define QI5_0 (QK5_0 / (4 * QR5_0))
|
384
|
-
typedef struct {
|
385
|
-
half d; // delta
|
386
|
-
uint8_t qh[4]; // 5-th bit of quants
|
387
|
-
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
388
|
-
} block_q5_0;
|
389
|
-
static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
|
390
|
-
|
391
|
-
#define QK5_1 32
|
392
|
-
#define QR5_1 2
|
393
|
-
#define QI5_1 (QK5_1 / (4 * QR5_1))
|
394
|
-
typedef struct {
|
395
|
-
half2 dm; // dm.x = delta, dm.y = min
|
396
|
-
uint8_t qh[4]; // 5-th bit of quants
|
397
|
-
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
398
|
-
} block_q5_1;
|
399
|
-
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
400
|
-
|
401
|
-
#define QK8_0 32
|
402
|
-
#define QR8_0 1
|
403
|
-
#define QI8_0 (QK8_0 / (4 * QR8_0))
|
404
|
-
typedef struct {
|
405
|
-
half d; // delta
|
406
|
-
int8_t qs[QK8_0]; // quants
|
407
|
-
} block_q8_0;
|
408
|
-
static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
|
409
|
-
|
410
|
-
#define QK8_1 32
|
411
|
-
#define QR8_1 1
|
412
|
-
#define QI8_1 (QK8_1 / (4 * QR8_1))
|
413
|
-
typedef struct {
|
414
|
-
half2 ds; // ds.x = delta, ds.y = sum
|
415
|
-
int8_t qs[QK8_0]; // quants
|
416
|
-
} block_q8_1;
|
417
|
-
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_fp16_t) + QK8_0, "wrong q8_1 block size/padding");
|
418
|
-
|
419
371
|
typedef float (*vec_dot_q_cuda_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs);
|
420
372
|
typedef void (*allocate_tiles_cuda_t)(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc);
|
421
373
|
typedef void (*load_tiles_cuda_t)(
|
@@ -425,174 +377,6 @@ typedef float (*vec_dot_q_mul_mat_cuda_t)(
|
|
425
377
|
const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
|
426
378
|
const int * __restrict__ y_qs, const half2 * __restrict__ y_ms, const int & i, const int & j, const int & k);
|
427
379
|
|
428
|
-
//================================= k-quants
|
429
|
-
|
430
|
-
#ifdef GGML_QKK_64
|
431
|
-
#define QK_K 64
|
432
|
-
#define K_SCALE_SIZE 4
|
433
|
-
#else
|
434
|
-
#define QK_K 256
|
435
|
-
#define K_SCALE_SIZE 12
|
436
|
-
#endif
|
437
|
-
|
438
|
-
#define QR2_K 4
|
439
|
-
#define QI2_K (QK_K / (4*QR2_K))
|
440
|
-
typedef struct {
|
441
|
-
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
442
|
-
uint8_t qs[QK_K/4]; // quants
|
443
|
-
half2 dm; // super-block scale for quantized scales/mins
|
444
|
-
} block_q2_K;
|
445
|
-
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
446
|
-
|
447
|
-
#define QR3_K 4
|
448
|
-
#define QI3_K (QK_K / (4*QR3_K))
|
449
|
-
typedef struct {
|
450
|
-
uint8_t hmask[QK_K/8]; // quants - high bit
|
451
|
-
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
452
|
-
#ifdef GGML_QKK_64
|
453
|
-
uint8_t scales[2]; // scales, quantized with 8 bits
|
454
|
-
#else
|
455
|
-
uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
|
456
|
-
#endif
|
457
|
-
half d; // super-block scale
|
458
|
-
} block_q3_K;
|
459
|
-
//static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + K_SCALE_SIZE, "wrong q3_K block size/padding");
|
460
|
-
|
461
|
-
#define QR4_K 2
|
462
|
-
#define QI4_K (QK_K / (4*QR4_K))
|
463
|
-
#ifdef GGML_QKK_64
|
464
|
-
typedef struct {
|
465
|
-
half dm[2]; // super-block scales/mins
|
466
|
-
uint8_t scales[2]; // 4-bit block scales/mins
|
467
|
-
uint8_t qs[QK_K/2]; // 4--bit quants
|
468
|
-
} block_q4_K;
|
469
|
-
static_assert(sizeof(block_q4_K) == sizeof(half2) + QK_K/2 + 2, "wrong q4_K block size/padding");
|
470
|
-
#else
|
471
|
-
typedef struct {
|
472
|
-
half2 dm; // super-block scale for quantized scales/mins
|
473
|
-
uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits
|
474
|
-
uint8_t qs[QK_K/2]; // 4--bit quants
|
475
|
-
} block_q4_K;
|
476
|
-
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2, "wrong q4_K block size/padding");
|
477
|
-
#endif
|
478
|
-
|
479
|
-
#define QR5_K 2
|
480
|
-
#define QI5_K (QK_K / (4*QR5_K))
|
481
|
-
#ifdef GGML_QKK_64
|
482
|
-
typedef struct {
|
483
|
-
half d; // super-block scale
|
484
|
-
int8_t scales[QK_K/16]; // block scales
|
485
|
-
uint8_t qh[QK_K/8]; // quants, high bit
|
486
|
-
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
487
|
-
} block_q5_K;
|
488
|
-
static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
|
489
|
-
#else
|
490
|
-
typedef struct {
|
491
|
-
half2 dm; // super-block scale for quantized scales/mins
|
492
|
-
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
493
|
-
uint8_t qh[QK_K/8]; // quants, high bit
|
494
|
-
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
495
|
-
} block_q5_K;
|
496
|
-
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
|
497
|
-
#endif
|
498
|
-
|
499
|
-
#define QR6_K 2
|
500
|
-
#define QI6_K (QK_K / (4*QR6_K))
|
501
|
-
typedef struct {
|
502
|
-
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
503
|
-
uint8_t qh[QK_K/4]; // quants, upper 2 bits
|
504
|
-
int8_t scales[QK_K/16]; // scales
|
505
|
-
half d; // delta
|
506
|
-
} block_q6_K;
|
507
|
-
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_K block size/padding");
|
508
|
-
|
509
|
-
#define QR2_XXS 8
|
510
|
-
#define QI2_XXS (QK_K / (4*QR2_XXS))
|
511
|
-
typedef struct {
|
512
|
-
half d;
|
513
|
-
uint16_t qs[QK_K/8];
|
514
|
-
} block_iq2_xxs;
|
515
|
-
static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
|
516
|
-
|
517
|
-
#define QR2_XS 8
|
518
|
-
#define QI2_XS (QK_K / (4*QR2_XS))
|
519
|
-
typedef struct {
|
520
|
-
half d;
|
521
|
-
uint16_t qs[QK_K/8];
|
522
|
-
uint8_t scales[QK_K/32];
|
523
|
-
} block_iq2_xs;
|
524
|
-
static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
|
525
|
-
|
526
|
-
// 2.5625 bpw quants
|
527
|
-
#define QR2_S 8
|
528
|
-
#define QI2_S (QK_K / (4*QR2_S))
|
529
|
-
typedef struct {
|
530
|
-
half d;
|
531
|
-
uint8_t qs[QK_K/4];
|
532
|
-
uint8_t qh[QK_K/32];
|
533
|
-
uint8_t scales[QK_K/32];
|
534
|
-
} block_iq2_s;
|
535
|
-
static_assert(sizeof(block_iq2_s) == sizeof(ggml_fp16_t) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding");
|
536
|
-
|
537
|
-
#define QR3_XXS 8
|
538
|
-
#define QI3_XXS (QK_K / (4*QR3_XXS))
|
539
|
-
typedef struct {
|
540
|
-
half d;
|
541
|
-
uint8_t qs[3*(QK_K/8)];
|
542
|
-
} block_iq3_xxs;
|
543
|
-
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
|
544
|
-
|
545
|
-
#define QR3_XS 8
|
546
|
-
#define QI3_XS (QK_K / (4*QR3_XS))
|
547
|
-
#if QK_K == 64
|
548
|
-
#define IQ3S_N_SCALE 2
|
549
|
-
#else
|
550
|
-
#define IQ3S_N_SCALE QK_K/64
|
551
|
-
#endif
|
552
|
-
typedef struct {
|
553
|
-
half d;
|
554
|
-
uint8_t qs[QK_K/4];
|
555
|
-
uint8_t qh[QK_K/32];
|
556
|
-
uint8_t signs[QK_K/8];
|
557
|
-
uint8_t scales[IQ3S_N_SCALE];
|
558
|
-
} block_iq3_s;
|
559
|
-
static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
|
560
|
-
|
561
|
-
#define QR1_S 8
|
562
|
-
#define QI1_S (QK_K / (4*QR1_S))
|
563
|
-
typedef struct {
|
564
|
-
half d;
|
565
|
-
uint8_t qs[QK_K/8];
|
566
|
-
uint8_t scales[QK_K/16];
|
567
|
-
} block_iq1_s;
|
568
|
-
static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
|
569
|
-
|
570
|
-
#define QK4_NL 32
|
571
|
-
#define QR4_NL 2
|
572
|
-
#define QI4_NL (QK4_NL / (4*QR4_NL))
|
573
|
-
typedef struct {
|
574
|
-
half d;
|
575
|
-
uint8_t qs[QK4_NL/2];
|
576
|
-
} block_iq4_nl;
|
577
|
-
static_assert(sizeof(block_iq4_nl) == sizeof(ggml_fp16_t) + QK4_NL/2, "wrong iq4_nl block size/padding");
|
578
|
-
|
579
|
-
#if QK_K == 64
|
580
|
-
#define block_iq4_xs block_iq4_nl
|
581
|
-
#define QR4_XS QR4_NL
|
582
|
-
#define QI4_XS QI4_NL
|
583
|
-
#else
|
584
|
-
// QR4_XS = 8 is very slightly faster than QR4_XS = 4
|
585
|
-
#define QR4_XS 8
|
586
|
-
#define QI4_XS (QK_K / (4*QR4_XS))
|
587
|
-
typedef struct {
|
588
|
-
half d;
|
589
|
-
uint16_t scales_h;
|
590
|
-
uint8_t scales_l[QK_K/64];
|
591
|
-
uint8_t qs[QK_K/2];
|
592
|
-
} block_iq4_xs;
|
593
|
-
static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
|
594
|
-
#endif
|
595
|
-
|
596
380
|
#define WARP_SIZE 32
|
597
381
|
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
|
598
382
|
|
@@ -1569,746 +1353,6 @@ static __global__ void dequantize_block_q6_K(const void * __restrict__ vx, dst_t
|
|
1569
1353
|
#endif
|
1570
1354
|
}
|
1571
1355
|
|
1572
|
-
static const __device__ uint64_t iq2xxs_grid[256] = {
|
1573
|
-
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
1574
|
-
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x08080808082b0808,
|
1575
|
-
0x08080808082b082b, 0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
|
1576
|
-
0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
|
1577
|
-
0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
|
1578
|
-
0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
|
1579
|
-
0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
|
1580
|
-
0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
|
1581
|
-
0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
|
1582
|
-
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
|
1583
|
-
0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
|
1584
|
-
0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
|
1585
|
-
0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
|
1586
|
-
0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
|
1587
|
-
0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
|
1588
|
-
0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
|
1589
|
-
0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
|
1590
|
-
0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
|
1591
|
-
0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
|
1592
|
-
0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
|
1593
|
-
0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
|
1594
|
-
0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
|
1595
|
-
0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
|
1596
|
-
0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
|
1597
|
-
0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
|
1598
|
-
0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
|
1599
|
-
0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
|
1600
|
-
0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
|
1601
|
-
0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
|
1602
|
-
0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
|
1603
|
-
0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
|
1604
|
-
0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
|
1605
|
-
0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
|
1606
|
-
0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
|
1607
|
-
0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
|
1608
|
-
0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
|
1609
|
-
0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
|
1610
|
-
0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
|
1611
|
-
0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
|
1612
|
-
0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
|
1613
|
-
0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
|
1614
|
-
0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
|
1615
|
-
0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
|
1616
|
-
0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
|
1617
|
-
0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
|
1618
|
-
0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
|
1619
|
-
0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
|
1620
|
-
0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
|
1621
|
-
0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
|
1622
|
-
0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
|
1623
|
-
0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
|
1624
|
-
0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
|
1625
|
-
0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
|
1626
|
-
0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
|
1627
|
-
0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
|
1628
|
-
0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
|
1629
|
-
0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
|
1630
|
-
0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
|
1631
|
-
0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
|
1632
|
-
0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
|
1633
|
-
0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
|
1634
|
-
0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
|
1635
|
-
0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
|
1636
|
-
0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
|
1637
|
-
};
|
1638
|
-
|
1639
|
-
static const __device__ uint64_t iq2xs_grid[512] = {
|
1640
|
-
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
1641
|
-
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
1642
|
-
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
1643
|
-
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
1644
|
-
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
1645
|
-
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
1646
|
-
0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08, 0x080808082b190819,
|
1647
|
-
0x080808082b191908, 0x080808082b192b19, 0x080808082b2b0808, 0x0808081908080819,
|
1648
|
-
0x0808081908081908, 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
|
1649
|
-
0x080808190819082b, 0x0808081908191919, 0x0808081908192b08, 0x0808081908192b2b,
|
1650
|
-
0x08080819082b0819, 0x08080819082b1908, 0x0808081919080808, 0x080808191908082b,
|
1651
|
-
0x0808081919081919, 0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
|
1652
|
-
0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819, 0x080808192b081908,
|
1653
|
-
0x080808192b190808, 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b08081919,
|
1654
|
-
0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
|
1655
|
-
0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808, 0x0808082b19191919,
|
1656
|
-
0x0808082b2b080808, 0x0808082b2b082b2b, 0x0808190808080819, 0x0808190808081908,
|
1657
|
-
0x080819080808192b, 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
|
1658
|
-
0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908,
|
1659
|
-
0x0808190819080808, 0x080819081908082b, 0x0808190819081919, 0x0808190819082b08,
|
1660
|
-
0x0808190819190819, 0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
|
1661
|
-
0x080819082b080819, 0x080819082b081908, 0x080819082b190808, 0x0808191908080808,
|
1662
|
-
0x080819190808082b, 0x0808191908081919, 0x0808191908082b08, 0x0808191908190819,
|
1663
|
-
0x0808191908191908, 0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
|
1664
|
-
0x0808191919190808, 0x08081919192b0819, 0x080819192b080808, 0x0808192b08080819,
|
1665
|
-
0x0808192b08081908, 0x0808192b08190808, 0x0808192b082b192b, 0x0808192b19080808,
|
1666
|
-
0x0808192b1908082b, 0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
|
1667
|
-
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b, 0x08082b0808190819,
|
1668
|
-
0x08082b0808191908, 0x08082b08082b0808, 0x08082b08082b1919, 0x08082b0819080819,
|
1669
|
-
0x08082b0819081908, 0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
|
1670
|
-
0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819, 0x08082b1908081908,
|
1671
|
-
0x08082b1908190808, 0x08082b1919080808, 0x08082b192b080819, 0x08082b192b082b19,
|
1672
|
-
0x08082b2b08080808, 0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
|
1673
|
-
0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908, 0x081908080808192b,
|
1674
|
-
0x0819080808082b19, 0x0819080808190808, 0x081908080819082b, 0x0819080808191919,
|
1675
|
-
0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
|
1676
|
-
0x081908081908082b, 0x0819080819081919, 0x0819080819082b08, 0x0819080819190819,
|
1677
|
-
0x0819080819191908, 0x08190808192b0808, 0x08190808192b2b2b, 0x081908082b080819,
|
1678
|
-
0x081908082b081908, 0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
|
1679
|
-
0x0819081908081919, 0x0819081908082b08, 0x0819081908190819, 0x0819081908191908,
|
1680
|
-
0x08190819082b0808, 0x0819081919080819, 0x0819081919081908, 0x0819081919190808,
|
1681
|
-
0x081908192b080808, 0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
|
1682
|
-
0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808, 0x0819082b19080808,
|
1683
|
-
0x0819082b192b0808, 0x0819190808080808, 0x081919080808082b, 0x0819190808081919,
|
1684
|
-
0x0819190808082b08, 0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
|
1685
|
-
0x0819190819080819, 0x0819190819081908, 0x0819190819082b19, 0x0819190819190808,
|
1686
|
-
0x08191908192b1908, 0x081919082b080808, 0x0819191908080819, 0x0819191908081908,
|
1687
|
-
0x0819191908190808, 0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
|
1688
|
-
0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
|
1689
|
-
0x08192b080819082b, 0x08192b0819080808, 0x08192b0819191908, 0x08192b082b08192b,
|
1690
|
-
0x08192b1908080808, 0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
|
1691
|
-
0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919,
|
1692
|
-
0x082b080808082b08, 0x082b080808082b2b, 0x082b080808190819, 0x082b080808191908,
|
1693
|
-
0x082b0808082b0808, 0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
|
1694
|
-
0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819, 0x082b081908081908,
|
1695
|
-
0x082b081908190808, 0x082b081919080808, 0x082b081919082b08, 0x082b0819192b1919,
|
1696
|
-
0x082b082b08080808, 0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
|
1697
|
-
0x082b190808080819, 0x082b190808081908, 0x082b190808190808, 0x082b1908082b2b19,
|
1698
|
-
0x082b190819080808, 0x082b191908080808, 0x082b191919080819, 0x082b19191919082b,
|
1699
|
-
0x082b19192b192b19, 0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
|
1700
|
-
0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b, 0x082b2b08082b0808,
|
1701
|
-
0x082b2b0819191919, 0x082b2b082b082b08, 0x082b2b082b2b082b, 0x082b2b19192b2b08,
|
1702
|
-
0x082b2b192b190808, 0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
|
1703
|
-
0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819, 0x1908080808081908,
|
1704
|
-
0x190808080808192b, 0x1908080808082b19, 0x1908080808190808, 0x190808080819082b,
|
1705
|
-
0x1908080808191919, 0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
1706
|
-
0x1908080819080808, 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08,
|
1707
|
-
0x1908080819082b2b, 0x1908080819190819, 0x1908080819191908, 0x19080808192b0808,
|
1708
|
-
0x19080808192b1919, 0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
|
1709
|
-
0x1908081908080808, 0x190808190808082b, 0x1908081908081919, 0x1908081908082b08,
|
1710
|
-
0x1908081908190819, 0x1908081908191908, 0x19080819082b0808, 0x1908081919080819,
|
1711
|
-
0x1908081919081908, 0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
|
1712
|
-
0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908, 0x1908082b08190808,
|
1713
|
-
0x1908082b0819082b, 0x1908082b082b2b19, 0x1908082b19080808, 0x1908190808080808,
|
1714
|
-
0x190819080808082b, 0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
|
1715
|
-
0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808, 0x1908190819080819,
|
1716
|
-
0x1908190819081908, 0x1908190819190808, 0x190819082b080808, 0x190819082b191908,
|
1717
|
-
0x1908191908080819, 0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
|
1718
|
-
0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808, 0x1908192b08082b2b,
|
1719
|
-
0x1908192b19081908, 0x1908192b19190808, 0x19082b0808080819, 0x19082b0808081908,
|
1720
|
-
0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
|
1721
|
-
0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819, 0x19082b1919081908,
|
1722
|
-
0x19082b1919190808, 0x19082b19192b2b19, 0x19082b2b08081908, 0x1919080808080808,
|
1723
|
-
0x191908080808082b, 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
|
1724
|
-
0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08, 0x1919080819080819,
|
1725
|
-
0x1919080819081908, 0x1919080819190808, 0x191908082b080808, 0x1919081908080819,
|
1726
|
-
0x1919081908081908, 0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
|
1727
|
-
0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908, 0x1919082b2b2b2b2b,
|
1728
|
-
0x1919190808080819, 0x1919190808081908, 0x1919190808190808, 0x19191908082b0819,
|
1729
|
-
0x1919190819080808, 0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
|
1730
|
-
0x1919191908080808, 0x1919191908082b08, 0x191919192b080808, 0x191919192b082b08,
|
1731
|
-
0x1919192b082b0819, 0x1919192b192b2b08, 0x1919192b2b2b0819, 0x19192b0808080808,
|
1732
|
-
0x19192b0808191908, 0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
|
1733
|
-
0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b, 0x19192b2b2b081919,
|
1734
|
-
0x192b080808080819, 0x192b080808081908, 0x192b080808190808, 0x192b080819080808,
|
1735
|
-
0x192b080819191908, 0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
|
1736
|
-
0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b, 0x192b082b2b19082b,
|
1737
|
-
0x192b190808080808, 0x192b19080819192b, 0x192b191908190808, 0x192b191919080808,
|
1738
|
-
0x192b191919081919, 0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
|
1739
|
-
0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908, 0x192b2b2b192b082b,
|
1740
|
-
0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08,
|
1741
|
-
0x2b08080808190819, 0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
|
1742
|
-
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808082b080808,
|
1743
|
-
0x2b0808082b08082b, 0x2b0808082b2b2b08, 0x2b0808082b2b2b2b, 0x2b08081908080819,
|
1744
|
-
0x2b08081908081908, 0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
|
1745
|
-
0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808, 0x2b08082b082b0808,
|
1746
|
-
0x2b08082b2b080808, 0x2b08082b2b08082b, 0x2b08082b2b2b0808, 0x2b08082b2b2b2b08,
|
1747
|
-
0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
|
1748
|
-
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808, 0x2b0819082b082b19,
|
1749
|
-
0x2b08191908080808, 0x2b08191919081908, 0x2b0819192b2b1919, 0x2b08192b08192b08,
|
1750
|
-
0x2b08192b192b2b2b, 0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
|
1751
|
-
0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b, 0x2b082b082b2b2b08,
|
1752
|
-
0x2b082b190808192b, 0x2b082b2b082b082b, 0x2b082b2b2b080808, 0x2b082b2b2b082b08,
|
1753
|
-
0x2b082b2b2b19192b, 0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
|
1754
|
-
0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b, 0x2b1908082b081908,
|
1755
|
-
0x2b19081908080808, 0x2b190819082b082b, 0x2b190819192b1908, 0x2b19082b1919192b,
|
1756
|
-
0x2b19082b2b082b19, 0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
|
1757
|
-
0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19, 0x2b1919192b190808,
|
1758
|
-
0x2b1919192b19082b, 0x2b19192b19080819, 0x2b192b0819190819, 0x2b192b082b2b192b,
|
1759
|
-
0x2b192b1919082b19, 0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
|
1760
|
-
0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b, 0x2b2b0808082b0808,
|
1761
|
-
0x2b2b0808082b2b2b, 0x2b2b08082b2b0808, 0x2b2b081919190819, 0x2b2b081919192b19,
|
1762
|
-
0x2b2b08192b2b192b, 0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
|
1763
|
-
0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808, 0x2b2b190819080808,
|
1764
|
-
0x2b2b19082b191919, 0x2b2b192b192b1919, 0x2b2b192b2b192b08, 0x2b2b2b0808082b2b,
|
1765
|
-
0x2b2b2b08082b0808, 0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
|
1766
|
-
0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908, 0x2b2b2b192b08192b,
|
1767
|
-
0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
1768
|
-
};
|
1769
|
-
|
1770
|
-
static const __device__ uint64_t iq2s_grid[1024] = {
|
1771
|
-
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
1772
|
-
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
1773
|
-
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
1774
|
-
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
1775
|
-
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
1776
|
-
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x08080808192b192b,
|
1777
|
-
0x08080808192b2b19, 0x080808082b080808, 0x080808082b08082b, 0x080808082b081919,
|
1778
|
-
0x080808082b082b08, 0x080808082b190819, 0x080808082b191908, 0x080808082b2b0808,
|
1779
|
-
0x080808082b2b1919, 0x080808082b2b2b2b, 0x0808081908080819, 0x0808081908081908,
|
1780
|
-
0x080808190808192b, 0x0808081908082b19, 0x0808081908190808, 0x080808190819082b,
|
1781
|
-
0x0808081908191919, 0x0808081908192b08, 0x08080819082b0819, 0x08080819082b1908,
|
1782
|
-
0x0808081919080808, 0x080808191908082b, 0x0808081919081919, 0x0808081919082b08,
|
1783
|
-
0x0808081919190819, 0x0808081919191908, 0x080808191919192b, 0x0808081919192b19,
|
1784
|
-
0x08080819192b0808, 0x08080819192b1919, 0x08080819192b2b08, 0x080808192b080819,
|
1785
|
-
0x080808192b081908, 0x080808192b190808, 0x080808192b19082b, 0x080808192b191919,
|
1786
|
-
0x080808192b2b0819, 0x080808192b2b1908, 0x0808082b08080808, 0x0808082b0808082b,
|
1787
|
-
0x0808082b08081919, 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908,
|
1788
|
-
0x0808082b082b0808, 0x0808082b082b2b2b, 0x0808082b19080819, 0x0808082b19081908,
|
1789
|
-
0x0808082b1908192b, 0x0808082b19082b19, 0x0808082b19190808, 0x0808082b19191919,
|
1790
|
-
0x0808082b2b080808, 0x0808082b2b081919, 0x0808082b2b082b2b, 0x0808082b2b191908,
|
1791
|
-
0x0808082b2b2b082b, 0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
|
1792
|
-
0x0808190808082b19, 0x0808190808190808, 0x080819080819082b, 0x0808190808191919,
|
1793
|
-
0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908, 0x08081908082b192b,
|
1794
|
-
0x08081908082b2b19, 0x0808190819080808, 0x080819081908082b, 0x0808190819081919,
|
1795
|
-
0x0808190819082b08, 0x0808190819082b2b, 0x0808190819190819, 0x0808190819191908,
|
1796
|
-
0x080819081919192b, 0x0808190819192b19, 0x08081908192b0808, 0x08081908192b082b,
|
1797
|
-
0x08081908192b1919, 0x080819082b080819, 0x080819082b081908, 0x080819082b08192b,
|
1798
|
-
0x080819082b082b19, 0x080819082b190808, 0x080819082b191919, 0x080819082b192b08,
|
1799
|
-
0x080819082b2b0819, 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b,
|
1800
|
-
0x0808191908081919, 0x0808191908082b08, 0x0808191908082b2b, 0x0808191908190819,
|
1801
|
-
0x0808191908191908, 0x080819190819192b, 0x0808191908192b19, 0x08081919082b0808,
|
1802
|
-
0x08081919082b1919, 0x08081919082b2b08, 0x0808191919080819, 0x0808191919081908,
|
1803
|
-
0x080819191908192b, 0x0808191919082b19, 0x0808191919190808, 0x080819191919082b,
|
1804
|
-
0x0808191919191919, 0x0808191919192b08, 0x08081919192b0819, 0x08081919192b1908,
|
1805
|
-
0x080819192b080808, 0x080819192b08082b, 0x080819192b081919, 0x080819192b082b08,
|
1806
|
-
0x080819192b190819, 0x080819192b191908, 0x080819192b2b0808, 0x0808192b08080819,
|
1807
|
-
0x0808192b08081908, 0x0808192b0808192b, 0x0808192b08082b19, 0x0808192b08190808,
|
1808
|
-
0x0808192b08191919, 0x0808192b19080808, 0x0808192b19081919, 0x0808192b19082b08,
|
1809
|
-
0x0808192b19190819, 0x0808192b19191908, 0x0808192b192b0808, 0x0808192b2b080819,
|
1810
|
-
0x0808192b2b081908, 0x0808192b2b190808, 0x08082b0808080808, 0x08082b080808082b,
|
1811
|
-
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808190819, 0x08082b0808191908,
|
1812
|
-
0x08082b080819192b, 0x08082b0808192b19, 0x08082b08082b0808, 0x08082b08082b1919,
|
1813
|
-
0x08082b08082b2b2b, 0x08082b0819080819, 0x08082b0819081908, 0x08082b081908192b,
|
1814
|
-
0x08082b0819082b19, 0x08082b0819190808, 0x08082b081919082b, 0x08082b0819191919,
|
1815
|
-
0x08082b0819192b08, 0x08082b08192b0819, 0x08082b08192b1908, 0x08082b082b080808,
|
1816
|
-
0x08082b082b081919, 0x08082b082b191908, 0x08082b082b2b2b2b, 0x08082b1908080819,
|
1817
|
-
0x08082b1908081908, 0x08082b1908190808, 0x08082b190819082b, 0x08082b1908191919,
|
1818
|
-
0x08082b1908192b08, 0x08082b19082b0819, 0x08082b1919080808, 0x08082b1919081919,
|
1819
|
-
0x08082b1919082b08, 0x08082b1919190819, 0x08082b1919191908, 0x08082b19192b0808,
|
1820
|
-
0x08082b192b080819, 0x08082b192b190808, 0x08082b2b08080808, 0x08082b2b08190819,
|
1821
|
-
0x08082b2b08191908, 0x08082b2b082b082b, 0x08082b2b082b2b08, 0x08082b2b082b2b2b,
|
1822
|
-
0x08082b2b19190808, 0x08082b2b2b192b19, 0x0819080808080819, 0x0819080808081908,
|
1823
|
-
0x081908080808192b, 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b,
|
1824
|
-
0x0819080808191919, 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908,
|
1825
|
-
0x08190808082b192b, 0x0819080819080808, 0x081908081908082b, 0x0819080819081919,
|
1826
|
-
0x0819080819082b08, 0x0819080819190819, 0x0819080819191908, 0x081908081919192b,
|
1827
|
-
0x0819080819192b19, 0x08190808192b0808, 0x08190808192b082b, 0x08190808192b1919,
|
1828
|
-
0x08190808192b2b08, 0x081908082b080819, 0x081908082b081908, 0x081908082b08192b,
|
1829
|
-
0x081908082b190808, 0x081908082b191919, 0x081908082b192b08, 0x081908082b2b0819,
|
1830
|
-
0x081908082b2b1908, 0x0819081908080808, 0x081908190808082b, 0x0819081908081919,
|
1831
|
-
0x0819081908082b08, 0x0819081908082b2b, 0x0819081908190819, 0x0819081908191908,
|
1832
|
-
0x081908190819192b, 0x0819081908192b19, 0x08190819082b0808, 0x08190819082b082b,
|
1833
|
-
0x08190819082b1919, 0x08190819082b2b08, 0x0819081919080819, 0x0819081919081908,
|
1834
|
-
0x081908191908192b, 0x0819081919082b19, 0x0819081919190808, 0x081908191919082b,
|
1835
|
-
0x0819081919191919, 0x0819081919192b08, 0x08190819192b0819, 0x08190819192b1908,
|
1836
|
-
0x081908192b080808, 0x081908192b08082b, 0x081908192b081919, 0x081908192b082b08,
|
1837
|
-
0x081908192b190819, 0x081908192b191908, 0x0819082b08080819, 0x0819082b08081908,
|
1838
|
-
0x0819082b08082b19, 0x0819082b08190808, 0x0819082b08191919, 0x0819082b082b0819,
|
1839
|
-
0x0819082b082b1908, 0x0819082b19080808, 0x0819082b19081919, 0x0819082b19190819,
|
1840
|
-
0x0819082b19191908, 0x0819082b2b080819, 0x0819082b2b081908, 0x0819082b2b190808,
|
1841
|
-
0x0819190808080808, 0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
|
1842
|
-
0x0819190808190819, 0x0819190808191908, 0x081919080819192b, 0x0819190808192b19,
|
1843
|
-
0x08191908082b0808, 0x08191908082b1919, 0x08191908082b2b08, 0x0819190819080819,
|
1844
|
-
0x0819190819081908, 0x081919081908192b, 0x0819190819082b19, 0x0819190819190808,
|
1845
|
-
0x081919081919082b, 0x0819190819191919, 0x0819190819192b08, 0x08191908192b0819,
|
1846
|
-
0x08191908192b1908, 0x081919082b080808, 0x081919082b08082b, 0x081919082b081919,
|
1847
|
-
0x081919082b082b08, 0x081919082b190819, 0x081919082b191908, 0x081919082b2b0808,
|
1848
|
-
0x0819191908080819, 0x0819191908081908, 0x081919190808192b, 0x0819191908082b19,
|
1849
|
-
0x0819191908190808, 0x081919190819082b, 0x0819191908191919, 0x0819191908192b08,
|
1850
|
-
0x08191919082b0819, 0x08191919082b1908, 0x0819191919080808, 0x081919191908082b,
|
1851
|
-
0x0819191919081919, 0x0819191919082b08, 0x0819191919190819, 0x0819191919191908,
|
1852
|
-
0x08191919192b0808, 0x081919192b080819, 0x081919192b081908, 0x081919192b190808,
|
1853
|
-
0x0819192b08080808, 0x0819192b08081919, 0x0819192b08082b08, 0x0819192b08190819,
|
1854
|
-
0x0819192b08191908, 0x0819192b082b0808, 0x0819192b19080819, 0x0819192b19081908,
|
1855
|
-
0x0819192b19190808, 0x0819192b2b080808, 0x0819192b2b2b2b2b, 0x08192b0808080819,
|
1856
|
-
0x08192b0808081908, 0x08192b080808192b, 0x08192b0808082b19, 0x08192b0808190808,
|
1857
|
-
0x08192b0808191919, 0x08192b0808192b08, 0x08192b08082b0819, 0x08192b0819080808,
|
1858
|
-
0x08192b081908082b, 0x08192b0819081919, 0x08192b0819082b08, 0x08192b0819190819,
|
1859
|
-
0x08192b0819191908, 0x08192b08192b0808, 0x08192b082b080819, 0x08192b082b081908,
|
1860
|
-
0x08192b1908080808, 0x08192b190808082b, 0x08192b1908081919, 0x08192b1908082b08,
|
1861
|
-
0x08192b1908190819, 0x08192b1908191908, 0x08192b19082b0808, 0x08192b1919080819,
|
1862
|
-
0x08192b1919081908, 0x08192b1919190808, 0x08192b19192b2b19, 0x08192b192b2b082b,
|
1863
|
-
0x08192b2b08081908, 0x08192b2b08190808, 0x08192b2b19080808, 0x08192b2b1919192b,
|
1864
|
-
0x082b080808080808, 0x082b08080808082b, 0x082b080808081919, 0x082b080808082b08,
|
1865
|
-
0x082b080808190819, 0x082b080808191908, 0x082b08080819192b, 0x082b080808192b19,
|
1866
|
-
0x082b0808082b0808, 0x082b0808082b1919, 0x082b0808082b2b2b, 0x082b080819080819,
|
1867
|
-
0x082b080819081908, 0x082b080819190808, 0x082b08081919082b, 0x082b080819191919,
|
1868
|
-
0x082b0808192b1908, 0x082b08082b080808, 0x082b08082b082b2b, 0x082b08082b191908,
|
1869
|
-
0x082b08082b2b2b2b, 0x082b081908080819, 0x082b081908081908, 0x082b081908190808,
|
1870
|
-
0x082b08190819082b, 0x082b081908191919, 0x082b0819082b0819, 0x082b081919080808,
|
1871
|
-
0x082b08191908082b, 0x082b081919081919, 0x082b081919190819, 0x082b081919191908,
|
1872
|
-
0x082b0819192b0808, 0x082b08192b080819, 0x082b08192b081908, 0x082b08192b190808,
|
1873
|
-
0x082b082b08080808, 0x082b082b08082b2b, 0x082b082b082b082b, 0x082b082b082b2b08,
|
1874
|
-
0x082b082b082b2b2b, 0x082b082b19081908, 0x082b082b19190808, 0x082b082b2b082b08,
|
1875
|
-
0x082b082b2b082b2b, 0x082b082b2b2b2b08, 0x082b190808080819, 0x082b190808081908,
|
1876
|
-
0x082b19080808192b, 0x082b190808082b19, 0x082b190808190808, 0x082b190808191919,
|
1877
|
-
0x082b190808192b08, 0x082b1908082b0819, 0x082b1908082b1908, 0x082b190819080808,
|
1878
|
-
0x082b19081908082b, 0x082b190819081919, 0x082b190819082b08, 0x082b190819190819,
|
1879
|
-
0x082b190819191908, 0x082b1908192b0808, 0x082b19082b080819, 0x082b19082b081908,
|
1880
|
-
0x082b19082b190808, 0x082b191908080808, 0x082b191908081919, 0x082b191908082b08,
|
1881
|
-
0x082b191908190819, 0x082b191908191908, 0x082b1919082b0808, 0x082b191919080819,
|
1882
|
-
0x082b191919081908, 0x082b191919190808, 0x082b1919192b192b, 0x082b19192b080808,
|
1883
|
-
0x082b192b08080819, 0x082b192b08081908, 0x082b192b08190808, 0x082b192b19080808,
|
1884
|
-
0x082b192b19192b19, 0x082b2b0808080808, 0x082b2b0808081919, 0x082b2b0808190819,
|
1885
|
-
0x082b2b0808191908, 0x082b2b0819080819, 0x082b2b0819081908, 0x082b2b0819190808,
|
1886
|
-
0x082b2b082b082b2b, 0x082b2b082b2b2b2b, 0x082b2b1908080819, 0x082b2b1908081908,
|
1887
|
-
0x082b2b1908190808, 0x082b2b192b191919, 0x082b2b2b08082b2b, 0x082b2b2b082b082b,
|
1888
|
-
0x082b2b2b192b1908, 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
|
1889
|
-
0x1908080808081908, 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808,
|
1890
|
-
0x190808080819082b, 0x1908080808191919, 0x1908080808192b08, 0x1908080808192b2b,
|
1891
|
-
0x19080808082b0819, 0x19080808082b1908, 0x19080808082b192b, 0x1908080819080808,
|
1892
|
-
0x190808081908082b, 0x1908080819081919, 0x1908080819082b08, 0x1908080819082b2b,
|
1893
|
-
0x1908080819190819, 0x1908080819191908, 0x190808081919192b, 0x1908080819192b19,
|
1894
|
-
0x19080808192b0808, 0x19080808192b082b, 0x19080808192b1919, 0x190808082b080819,
|
1895
|
-
0x190808082b081908, 0x190808082b190808, 0x190808082b191919, 0x190808082b192b08,
|
1896
|
-
0x190808082b2b0819, 0x190808082b2b1908, 0x1908081908080808, 0x190808190808082b,
|
1897
|
-
0x1908081908081919, 0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
|
1898
|
-
0x190808190819192b, 0x1908081908192b19, 0x19080819082b0808, 0x19080819082b082b,
|
1899
|
-
0x19080819082b1919, 0x1908081919080819, 0x1908081919081908, 0x190808191908192b,
|
1900
|
-
0x1908081919082b19, 0x1908081919190808, 0x190808191919082b, 0x1908081919191919,
|
1901
|
-
0x1908081919192b08, 0x19080819192b0819, 0x19080819192b1908, 0x190808192b080808,
|
1902
|
-
0x190808192b08082b, 0x190808192b081919, 0x190808192b082b08, 0x190808192b190819,
|
1903
|
-
0x190808192b191908, 0x190808192b2b0808, 0x1908082b08080819, 0x1908082b08081908,
|
1904
|
-
0x1908082b08190808, 0x1908082b0819082b, 0x1908082b08191919, 0x1908082b08192b08,
|
1905
|
-
0x1908082b082b1908, 0x1908082b19080808, 0x1908082b19081919, 0x1908082b19082b08,
|
1906
|
-
0x1908082b19190819, 0x1908082b19191908, 0x1908082b192b0808, 0x1908082b2b080819,
|
1907
|
-
0x1908082b2b081908, 0x1908190808080808, 0x190819080808082b, 0x1908190808081919,
|
1908
|
-
0x1908190808082b08, 0x1908190808082b2b, 0x1908190808190819, 0x1908190808191908,
|
1909
|
-
0x190819080819192b, 0x1908190808192b19, 0x19081908082b0808, 0x19081908082b082b,
|
1910
|
-
0x19081908082b1919, 0x19081908082b2b08, 0x1908190819080819, 0x1908190819081908,
|
1911
|
-
0x190819081908192b, 0x1908190819082b19, 0x1908190819190808, 0x190819081919082b,
|
1912
|
-
0x1908190819191919, 0x1908190819192b08, 0x19081908192b0819, 0x19081908192b1908,
|
1913
|
-
0x190819082b080808, 0x190819082b08082b, 0x190819082b081919, 0x190819082b082b08,
|
1914
|
-
0x190819082b190819, 0x190819082b191908, 0x190819082b2b0808, 0x1908191908080819,
|
1915
|
-
0x1908191908081908, 0x190819190808192b, 0x1908191908082b19, 0x1908191908190808,
|
1916
|
-
0x190819190819082b, 0x1908191908191919, 0x1908191908192b08, 0x19081919082b0819,
|
1917
|
-
0x19081919082b1908, 0x1908191919080808, 0x190819191908082b, 0x1908191919081919,
|
1918
|
-
0x1908191919082b08, 0x1908191919190819, 0x1908191919191908, 0x19081919192b0808,
|
1919
|
-
0x19081919192b2b2b, 0x190819192b080819, 0x190819192b081908, 0x190819192b190808,
|
1920
|
-
0x1908192b08080808, 0x1908192b0808082b, 0x1908192b08081919, 0x1908192b08082b08,
|
1921
|
-
0x1908192b08190819, 0x1908192b08191908, 0x1908192b082b0808, 0x1908192b19080819,
|
1922
|
-
0x1908192b19081908, 0x1908192b19190808, 0x1908192b2b080808, 0x1908192b2b2b1919,
|
1923
|
-
0x19082b0808080819, 0x19082b0808081908, 0x19082b0808082b19, 0x19082b0808190808,
|
1924
|
-
0x19082b080819082b, 0x19082b0808191919, 0x19082b0808192b08, 0x19082b08082b0819,
|
1925
|
-
0x19082b08082b1908, 0x19082b0819080808, 0x19082b081908082b, 0x19082b0819081919,
|
1926
|
-
0x19082b0819082b08, 0x19082b0819190819, 0x19082b0819191908, 0x19082b08192b0808,
|
1927
|
-
0x19082b082b081908, 0x19082b082b190808, 0x19082b1908080808, 0x19082b190808082b,
|
1928
|
-
0x19082b1908081919, 0x19082b1908082b08, 0x19082b1908190819, 0x19082b1908191908,
|
1929
|
-
0x19082b19082b0808, 0x19082b1919080819, 0x19082b1919081908, 0x19082b1919190808,
|
1930
|
-
0x19082b192b080808, 0x19082b192b19192b, 0x19082b2b08080819, 0x19082b2b08081908,
|
1931
|
-
0x19082b2b08190808, 0x19082b2b19080808, 0x1919080808080808, 0x191908080808082b,
|
1932
|
-
0x1919080808081919, 0x1919080808082b08, 0x1919080808190819, 0x1919080808191908,
|
1933
|
-
0x191908080819192b, 0x1919080808192b19, 0x19190808082b0808, 0x19190808082b082b,
|
1934
|
-
0x19190808082b1919, 0x19190808082b2b08, 0x1919080819080819, 0x1919080819081908,
|
1935
|
-
0x191908081908192b, 0x1919080819082b19, 0x1919080819190808, 0x191908081919082b,
|
1936
|
-
0x1919080819191919, 0x1919080819192b08, 0x19190808192b0819, 0x19190808192b1908,
|
1937
|
-
0x191908082b080808, 0x191908082b08082b, 0x191908082b081919, 0x191908082b082b08,
|
1938
|
-
0x191908082b190819, 0x191908082b191908, 0x1919081908080819, 0x1919081908081908,
|
1939
|
-
0x191908190808192b, 0x1919081908082b19, 0x1919081908190808, 0x191908190819082b,
|
1940
|
-
0x1919081908191919, 0x1919081908192b08, 0x19190819082b0819, 0x19190819082b1908,
|
1941
|
-
0x1919081919080808, 0x191908191908082b, 0x1919081919081919, 0x1919081919082b08,
|
1942
|
-
0x1919081919190819, 0x1919081919191908, 0x19190819192b0808, 0x191908192b080819,
|
1943
|
-
0x191908192b081908, 0x191908192b190808, 0x1919082b08080808, 0x1919082b08081919,
|
1944
|
-
0x1919082b08082b08, 0x1919082b08190819, 0x1919082b08191908, 0x1919082b082b0808,
|
1945
|
-
0x1919082b19080819, 0x1919082b19081908, 0x1919082b19190808, 0x1919082b192b2b19,
|
1946
|
-
0x1919082b2b080808, 0x1919190808080819, 0x1919190808081908, 0x191919080808192b,
|
1947
|
-
0x1919190808082b19, 0x1919190808190808, 0x191919080819082b, 0x1919190808191919,
|
1948
|
-
0x1919190808192b08, 0x19191908082b0819, 0x19191908082b1908, 0x1919190819080808,
|
1949
|
-
0x191919081908082b, 0x1919190819081919, 0x1919190819082b08, 0x1919190819190819,
|
1950
|
-
0x1919190819191908, 0x19191908192b0808, 0x191919082b080819, 0x191919082b081908,
|
1951
|
-
0x191919082b190808, 0x1919191908080808, 0x191919190808082b, 0x1919191908081919,
|
1952
|
-
0x1919191908082b08, 0x1919191908190819, 0x1919191908191908, 0x19191919082b0808,
|
1953
|
-
0x1919191919080819, 0x1919191919081908, 0x1919191919190808, 0x191919192b080808,
|
1954
|
-
0x1919192b08080819, 0x1919192b08081908, 0x1919192b08190808, 0x1919192b082b192b,
|
1955
|
-
0x1919192b19080808, 0x19192b0808080808, 0x19192b080808082b, 0x19192b0808081919,
|
1956
|
-
0x19192b0808082b08, 0x19192b0808190819, 0x19192b0808191908, 0x19192b08082b0808,
|
1957
|
-
0x19192b0819080819, 0x19192b0819081908, 0x19192b0819190808, 0x19192b0819192b2b,
|
1958
|
-
0x19192b082b080808, 0x19192b1908080819, 0x19192b1908081908, 0x19192b1908190808,
|
1959
|
-
0x19192b1919080808, 0x19192b2b08080808, 0x19192b2b08192b19, 0x19192b2b2b081919,
|
1960
|
-
0x19192b2b2b2b2b08, 0x192b080808080819, 0x192b080808081908, 0x192b08080808192b,
|
1961
|
-
0x192b080808190808, 0x192b08080819082b, 0x192b080808191919, 0x192b080808192b08,
|
1962
|
-
0x192b0808082b0819, 0x192b0808082b1908, 0x192b080819080808, 0x192b080819081919,
|
1963
|
-
0x192b080819082b08, 0x192b080819190819, 0x192b080819191908, 0x192b0808192b0808,
|
1964
|
-
0x192b08082b081908, 0x192b08082b190808, 0x192b081908080808, 0x192b08190808082b,
|
1965
|
-
0x192b081908081919, 0x192b081908082b08, 0x192b081908190819, 0x192b081908191908,
|
1966
|
-
0x192b0819082b0808, 0x192b081919080819, 0x192b081919081908, 0x192b081919190808,
|
1967
|
-
0x192b08192b080808, 0x192b08192b192b19, 0x192b082b08081908, 0x192b082b08190808,
|
1968
|
-
0x192b082b19080808, 0x192b082b1919192b, 0x192b082b2b2b0819, 0x192b190808080808,
|
1969
|
-
0x192b190808081919, 0x192b190808082b08, 0x192b190808190819, 0x192b190808191908,
|
1970
|
-
0x192b1908082b0808, 0x192b190819080819, 0x192b190819081908, 0x192b190819190808,
|
1971
|
-
0x192b19082b080808, 0x192b191908080819, 0x192b191908081908, 0x192b191908190808,
|
1972
|
-
0x192b191919080808, 0x192b191919082b2b, 0x192b1919192b2b08, 0x192b19192b19082b,
|
1973
|
-
0x192b192b08080808, 0x192b192b2b191908, 0x192b2b0808080819, 0x192b2b0808081908,
|
1974
|
-
0x192b2b0808190808, 0x192b2b08192b1919, 0x192b2b082b192b08, 0x192b2b1908080808,
|
1975
|
-
0x192b2b19082b2b2b, 0x192b2b2b1908082b, 0x192b2b2b2b2b0819, 0x2b08080808080808,
|
1976
|
-
0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
|
1977
|
-
0x2b08080808191908, 0x2b08080808192b19, 0x2b080808082b0808, 0x2b080808082b1919,
|
1978
|
-
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808081919082b,
|
1979
|
-
0x2b08080819191919, 0x2b08080819192b08, 0x2b080808192b0819, 0x2b0808082b080808,
|
1980
|
-
0x2b0808082b081919, 0x2b0808082b190819, 0x2b0808082b191908, 0x2b08081908080819,
|
1981
|
-
0x2b08081908081908, 0x2b08081908082b19, 0x2b08081908190808, 0x2b0808190819082b,
|
1982
|
-
0x2b08081908191919, 0x2b08081908192b08, 0x2b080819082b0819, 0x2b080819082b1908,
|
1983
|
-
0x2b08081919080808, 0x2b0808191908082b, 0x2b08081919081919, 0x2b08081919082b08,
|
1984
|
-
0x2b08081919190819, 0x2b08081919191908, 0x2b0808192b080819, 0x2b0808192b081908,
|
1985
|
-
0x2b0808192b190808, 0x2b0808192b2b2b19, 0x2b08082b08080808, 0x2b08082b08081919,
|
1986
|
-
0x2b08082b08082b2b, 0x2b08082b08190819, 0x2b08082b08191908, 0x2b08082b19080819,
|
1987
|
-
0x2b08082b19081908, 0x2b08082b19190808, 0x2b08190808080819, 0x2b08190808081908,
|
1988
|
-
0x2b0819080808192b, 0x2b08190808082b19, 0x2b08190808190808, 0x2b0819080819082b,
|
1989
|
-
0x2b08190808191919, 0x2b08190808192b08, 0x2b081908082b0819, 0x2b08190819080808,
|
1990
|
-
0x2b0819081908082b, 0x2b08190819081919, 0x2b08190819082b08, 0x2b08190819190819,
|
1991
|
-
0x2b08190819191908, 0x2b081908192b0808, 0x2b0819082b080819, 0x2b0819082b081908,
|
1992
|
-
0x2b0819082b190808, 0x2b08191908080808, 0x2b0819190808082b, 0x2b08191908081919,
|
1993
|
-
0x2b08191908082b08, 0x2b08191908190819, 0x2b08191908191908, 0x2b081919082b0808,
|
1994
|
-
0x2b08191919080819, 0x2b08191919081908, 0x2b08191919190808, 0x2b0819192b080808,
|
1995
|
-
0x2b0819192b082b2b, 0x2b08192b08080819, 0x2b08192b08081908, 0x2b08192b08190808,
|
1996
|
-
0x2b08192b082b2b19, 0x2b08192b19080808, 0x2b082b0808080808, 0x2b082b0808081919,
|
1997
|
-
0x2b082b0808190819, 0x2b082b0808191908, 0x2b082b0819080819, 0x2b082b0819081908,
|
1998
|
-
0x2b082b0819190808, 0x2b082b082b2b082b, 0x2b082b1908080819, 0x2b082b1908081908,
|
1999
|
-
0x2b082b1919080808, 0x2b082b19192b1919, 0x2b082b2b082b082b, 0x2b082b2b19192b08,
|
2000
|
-
0x2b082b2b19192b2b, 0x2b082b2b2b08082b, 0x2b082b2b2b2b082b, 0x2b19080808080819,
|
2001
|
-
0x2b19080808081908, 0x2b19080808082b19, 0x2b19080808190808, 0x2b1908080819082b,
|
2002
|
-
0x2b19080808191919, 0x2b19080808192b08, 0x2b190808082b1908, 0x2b19080819080808,
|
2003
|
-
0x2b1908081908082b, 0x2b19080819081919, 0x2b19080819082b08, 0x2b19080819190819,
|
2004
|
-
0x2b19080819191908, 0x2b190808192b0808, 0x2b1908082b080819, 0x2b1908082b081908,
|
2005
|
-
0x2b1908082b190808, 0x2b19081908080808, 0x2b19081908081919, 0x2b19081908190819,
|
2006
|
-
0x2b19081908191908, 0x2b19081919080819, 0x2b19081919081908, 0x2b19081919190808,
|
2007
|
-
0x2b19081919192b2b, 0x2b19082b08080819, 0x2b19082b08081908, 0x2b19082b08190808,
|
2008
|
-
0x2b19082b19080808, 0x2b19082b2b2b192b, 0x2b19190808080808, 0x2b1919080808082b,
|
2009
|
-
0x2b19190808081919, 0x2b19190808082b08, 0x2b19190808190819, 0x2b19190808191908,
|
2010
|
-
0x2b191908082b0808, 0x2b19190819080819, 0x2b19190819081908, 0x2b19190819190808,
|
2011
|
-
0x2b1919082b080808, 0x2b1919082b19192b, 0x2b19191908080819, 0x2b19191908081908,
|
2012
|
-
0x2b19191908190808, 0x2b19191919080808, 0x2b1919192b192b08, 0x2b1919192b2b0819,
|
2013
|
-
0x2b19192b08080808, 0x2b19192b1908192b, 0x2b19192b192b1908, 0x2b192b0808080819,
|
2014
|
-
0x2b192b0808081908, 0x2b192b0808190808, 0x2b192b08082b192b, 0x2b192b0819080808,
|
2015
|
-
0x2b192b082b2b2b19, 0x2b192b1908080808, 0x2b192b1919082b19, 0x2b192b191919082b,
|
2016
|
-
0x2b192b2b2b190808, 0x2b2b080808080808, 0x2b2b080808081919, 0x2b2b080808082b2b,
|
2017
|
-
0x2b2b080808191908, 0x2b2b0808082b082b, 0x2b2b0808082b2b2b, 0x2b2b080819080819,
|
2018
|
-
0x2b2b080819081908, 0x2b2b080819190808, 0x2b2b08082b2b082b, 0x2b2b08082b2b2b2b,
|
2019
|
-
0x2b2b081919080808, 0x2b2b0819192b1919, 0x2b2b082b0808082b, 0x2b2b082b08082b2b,
|
2020
|
-
0x2b2b082b082b082b, 0x2b2b082b082b2b08, 0x2b2b082b082b2b2b, 0x2b2b082b2b08082b,
|
2021
|
-
0x2b2b082b2b082b08, 0x2b2b082b2b082b2b, 0x2b2b082b2b2b2b08, 0x2b2b190808080819,
|
2022
|
-
0x2b2b190808081908, 0x2b2b190808190808, 0x2b2b190819080808, 0x2b2b19082b082b19,
|
2023
|
-
0x2b2b19082b2b1908, 0x2b2b191908080808, 0x2b2b191908192b19, 0x2b2b192b19190819,
|
2024
|
-
0x2b2b2b0808082b2b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b082b, 0x2b2b2b1919191908,
|
2025
|
-
0x2b2b2b192b08192b, 0x2b2b2b2b08082b08, 0x2b2b2b2b08082b2b, 0x2b2b2b2b082b0808,
|
2026
|
-
0x2b2b2b2b082b082b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b2b082b08, 0x2b2b2b2b2b2b2b2b,
|
2027
|
-
};
|
2028
|
-
|
2029
|
-
static const __device__ uint32_t iq3xxs_grid[256] = {
|
2030
|
-
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
|
2031
|
-
0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
|
2032
|
-
0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
2033
|
-
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
|
2034
|
-
0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
|
2035
|
-
0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
2036
|
-
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
|
2037
|
-
0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
|
2038
|
-
0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
2039
|
-
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
|
2040
|
-
0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
|
2041
|
-
0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
2042
|
-
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
|
2043
|
-
0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
|
2044
|
-
0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
2045
|
-
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
|
2046
|
-
0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
|
2047
|
-
0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
2048
|
-
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
|
2049
|
-
0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
|
2050
|
-
0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
2051
|
-
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
|
2052
|
-
0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
|
2053
|
-
0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
2054
|
-
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
|
2055
|
-
0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
|
2056
|
-
0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
2057
|
-
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
|
2058
|
-
0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
|
2059
|
-
0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
2060
|
-
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
|
2061
|
-
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
2062
|
-
};
|
2063
|
-
|
2064
|
-
static const __device__ uint32_t iq3s_grid[512] = {
|
2065
|
-
0x01010101, 0x01010103, 0x01010105, 0x0101010b, 0x0101010f, 0x01010301, 0x01010303, 0x01010305,
|
2066
|
-
0x01010309, 0x0101030d, 0x01010501, 0x01010503, 0x0101050b, 0x01010707, 0x01010901, 0x01010905,
|
2067
|
-
0x0101090b, 0x0101090f, 0x01010b03, 0x01010b07, 0x01010d01, 0x01010d05, 0x01010f03, 0x01010f09,
|
2068
|
-
0x01010f0f, 0x01030101, 0x01030103, 0x01030105, 0x01030109, 0x01030301, 0x01030303, 0x0103030b,
|
2069
|
-
0x01030501, 0x01030507, 0x0103050f, 0x01030703, 0x0103070b, 0x01030909, 0x01030d03, 0x01030d0b,
|
2070
|
-
0x01030f05, 0x01050101, 0x01050103, 0x0105010b, 0x0105010f, 0x01050301, 0x01050307, 0x0105030d,
|
2071
|
-
0x01050503, 0x0105050b, 0x01050701, 0x01050709, 0x01050905, 0x0105090b, 0x0105090f, 0x01050b03,
|
2072
|
-
0x01050b07, 0x01050f01, 0x01050f07, 0x01070107, 0x01070303, 0x0107030b, 0x01070501, 0x01070505,
|
2073
|
-
0x01070703, 0x01070707, 0x0107070d, 0x01070909, 0x01070b01, 0x01070b05, 0x01070d0f, 0x01070f03,
|
2074
|
-
0x01070f0b, 0x01090101, 0x01090307, 0x0109030f, 0x01090503, 0x01090509, 0x01090705, 0x01090901,
|
2075
|
-
0x01090907, 0x01090b03, 0x01090f01, 0x010b0105, 0x010b0109, 0x010b0501, 0x010b0505, 0x010b050d,
|
2076
|
-
0x010b0707, 0x010b0903, 0x010b090b, 0x010b090f, 0x010b0d0d, 0x010b0f07, 0x010d010d, 0x010d0303,
|
2077
|
-
0x010d0307, 0x010d0703, 0x010d0b05, 0x010d0f03, 0x010f0101, 0x010f0105, 0x010f0109, 0x010f0501,
|
2078
|
-
0x010f0505, 0x010f050d, 0x010f0707, 0x010f0b01, 0x010f0b09, 0x03010101, 0x03010103, 0x03010105,
|
2079
|
-
0x03010109, 0x03010301, 0x03010303, 0x03010307, 0x0301030b, 0x0301030f, 0x03010501, 0x03010505,
|
2080
|
-
0x03010703, 0x03010709, 0x0301070d, 0x03010b09, 0x03010b0d, 0x03010d03, 0x03010f05, 0x03030101,
|
2081
|
-
0x03030103, 0x03030107, 0x0303010d, 0x03030301, 0x03030309, 0x03030503, 0x03030701, 0x03030707,
|
2082
|
-
0x03030903, 0x03030b01, 0x03030b05, 0x03030f01, 0x03030f0d, 0x03050101, 0x03050305, 0x0305030b,
|
2083
|
-
0x0305030f, 0x03050501, 0x03050509, 0x03050705, 0x03050901, 0x03050907, 0x03050b0b, 0x03050d01,
|
2084
|
-
0x03050f05, 0x03070103, 0x03070109, 0x0307010f, 0x03070301, 0x03070307, 0x03070503, 0x0307050f,
|
2085
|
-
0x03070701, 0x03070709, 0x03070903, 0x03070d05, 0x03070f01, 0x03090107, 0x0309010b, 0x03090305,
|
2086
|
-
0x03090309, 0x03090703, 0x03090707, 0x03090905, 0x0309090d, 0x03090b01, 0x03090b09, 0x030b0103,
|
2087
|
-
0x030b0301, 0x030b0307, 0x030b0503, 0x030b0701, 0x030b0705, 0x030b0b03, 0x030d0501, 0x030d0509,
|
2088
|
-
0x030d050f, 0x030d0909, 0x030d090d, 0x030f0103, 0x030f0107, 0x030f0301, 0x030f0305, 0x030f0503,
|
2089
|
-
0x030f070b, 0x030f0903, 0x030f0d05, 0x030f0f01, 0x05010101, 0x05010103, 0x05010107, 0x0501010b,
|
2090
|
-
0x0501010f, 0x05010301, 0x05010305, 0x05010309, 0x0501030d, 0x05010503, 0x05010507, 0x0501050f,
|
2091
|
-
0x05010701, 0x05010705, 0x05010903, 0x05010907, 0x0501090b, 0x05010b01, 0x05010b05, 0x05010d0f,
|
2092
|
-
0x05010f01, 0x05010f07, 0x05010f0b, 0x05030101, 0x05030105, 0x05030301, 0x05030307, 0x0503030f,
|
2093
|
-
0x05030505, 0x0503050b, 0x05030703, 0x05030709, 0x05030905, 0x05030b03, 0x05050103, 0x05050109,
|
2094
|
-
0x0505010f, 0x05050503, 0x05050507, 0x05050701, 0x0505070f, 0x05050903, 0x05050b07, 0x05050b0f,
|
2095
|
-
0x05050f03, 0x05050f09, 0x05070101, 0x05070105, 0x0507010b, 0x05070303, 0x05070505, 0x05070509,
|
2096
|
-
0x05070703, 0x05070707, 0x05070905, 0x05070b01, 0x05070d0d, 0x05090103, 0x0509010f, 0x05090501,
|
2097
|
-
0x05090507, 0x05090705, 0x0509070b, 0x05090903, 0x05090f05, 0x05090f0b, 0x050b0109, 0x050b0303,
|
2098
|
-
0x050b0505, 0x050b070f, 0x050b0901, 0x050b0b07, 0x050b0f01, 0x050d0101, 0x050d0105, 0x050d010f,
|
2099
|
-
0x050d0503, 0x050d0b0b, 0x050d0d03, 0x050f010b, 0x050f0303, 0x050f050d, 0x050f0701, 0x050f0907,
|
2100
|
-
0x050f0b01, 0x07010105, 0x07010303, 0x07010307, 0x0701030b, 0x0701030f, 0x07010505, 0x07010703,
|
2101
|
-
0x07010707, 0x0701070b, 0x07010905, 0x07010909, 0x0701090f, 0x07010b03, 0x07010d07, 0x07010f03,
|
2102
|
-
0x07030103, 0x07030107, 0x0703010b, 0x07030309, 0x07030503, 0x07030507, 0x07030901, 0x07030d01,
|
2103
|
-
0x07030f05, 0x07030f0d, 0x07050101, 0x07050305, 0x07050501, 0x07050705, 0x07050709, 0x07050b01,
|
2104
|
-
0x07070103, 0x07070301, 0x07070309, 0x07070503, 0x07070507, 0x0707050f, 0x07070701, 0x07070903,
|
2105
|
-
0x07070907, 0x0707090f, 0x07070b0b, 0x07070f07, 0x07090107, 0x07090303, 0x0709030d, 0x07090505,
|
2106
|
-
0x07090703, 0x07090b05, 0x07090d01, 0x07090d09, 0x070b0103, 0x070b0301, 0x070b0305, 0x070b050b,
|
2107
|
-
0x070b0705, 0x070b0909, 0x070b0b0d, 0x070b0f07, 0x070d030d, 0x070d0903, 0x070f0103, 0x070f0107,
|
2108
|
-
0x070f0501, 0x070f0505, 0x070f070b, 0x09010101, 0x09010109, 0x09010305, 0x09010501, 0x09010509,
|
2109
|
-
0x0901050f, 0x09010705, 0x09010903, 0x09010b01, 0x09010f01, 0x09030105, 0x0903010f, 0x09030303,
|
2110
|
-
0x09030307, 0x09030505, 0x09030701, 0x0903070b, 0x09030907, 0x09030b03, 0x09030b0b, 0x09050103,
|
2111
|
-
0x09050107, 0x09050301, 0x0905030b, 0x09050503, 0x09050707, 0x09050901, 0x09050b0f, 0x09050d05,
|
2112
|
-
0x09050f01, 0x09070109, 0x09070303, 0x09070307, 0x09070501, 0x09070505, 0x09070703, 0x0907070b,
|
2113
|
-
0x09090101, 0x09090105, 0x09090509, 0x0909070f, 0x09090901, 0x09090f03, 0x090b010b, 0x090b010f,
|
2114
|
-
0x090b0503, 0x090b0d05, 0x090d0307, 0x090d0709, 0x090d0d01, 0x090f0301, 0x090f030b, 0x090f0701,
|
2115
|
-
0x090f0907, 0x090f0b03, 0x0b010105, 0x0b010301, 0x0b010309, 0x0b010505, 0x0b010901, 0x0b010909,
|
2116
|
-
0x0b01090f, 0x0b010b05, 0x0b010d0d, 0x0b010f09, 0x0b030103, 0x0b030107, 0x0b03010b, 0x0b030305,
|
2117
|
-
0x0b030503, 0x0b030705, 0x0b030f05, 0x0b050101, 0x0b050303, 0x0b050507, 0x0b050701, 0x0b05070d,
|
2118
|
-
0x0b050b07, 0x0b070105, 0x0b07010f, 0x0b070301, 0x0b07050f, 0x0b070909, 0x0b070b03, 0x0b070d0b,
|
2119
|
-
0x0b070f07, 0x0b090103, 0x0b090109, 0x0b090501, 0x0b090705, 0x0b09090d, 0x0b0b0305, 0x0b0b050d,
|
2120
|
-
0x0b0b0b03, 0x0b0b0b07, 0x0b0d0905, 0x0b0f0105, 0x0b0f0109, 0x0b0f0505, 0x0d010303, 0x0d010307,
|
2121
|
-
0x0d01030b, 0x0d010703, 0x0d010707, 0x0d010d01, 0x0d030101, 0x0d030501, 0x0d03050f, 0x0d030d09,
|
2122
|
-
0x0d050305, 0x0d050709, 0x0d050905, 0x0d050b0b, 0x0d050d05, 0x0d050f01, 0x0d070101, 0x0d070309,
|
2123
|
-
0x0d070503, 0x0d070901, 0x0d09050b, 0x0d090907, 0x0d090d05, 0x0d0b0101, 0x0d0b0107, 0x0d0b0709,
|
2124
|
-
0x0d0b0d01, 0x0d0d010b, 0x0d0d0901, 0x0d0f0303, 0x0d0f0307, 0x0f010101, 0x0f010109, 0x0f01010f,
|
2125
|
-
0x0f010501, 0x0f010505, 0x0f01070d, 0x0f010901, 0x0f010b09, 0x0f010d05, 0x0f030105, 0x0f030303,
|
2126
|
-
0x0f030509, 0x0f030907, 0x0f03090b, 0x0f050103, 0x0f050109, 0x0f050301, 0x0f05030d, 0x0f050503,
|
2127
|
-
0x0f050701, 0x0f050b03, 0x0f070105, 0x0f070705, 0x0f07070b, 0x0f070b07, 0x0f090103, 0x0f09010b,
|
2128
|
-
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
|
2129
|
-
};
|
2130
|
-
|
2131
|
-
static const __device__ uint64_t iq1s_grid[512] = {
|
2132
|
-
0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000,
|
2133
|
-
0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01,
|
2134
|
-
0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100,
|
2135
|
-
0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00,
|
2136
|
-
0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101,
|
2137
|
-
0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100,
|
2138
|
-
0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00,
|
2139
|
-
0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff,
|
2140
|
-
0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000,
|
2141
|
-
0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000,
|
2142
|
-
0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001,
|
2143
|
-
0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff,
|
2144
|
-
0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01,
|
2145
|
-
0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001,
|
2146
|
-
0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00,
|
2147
|
-
0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001,
|
2148
|
-
0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100,
|
2149
|
-
0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000,
|
2150
|
-
0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000,
|
2151
|
-
0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000,
|
2152
|
-
0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff,
|
2153
|
-
0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff,
|
2154
|
-
0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01,
|
2155
|
-
0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100,
|
2156
|
-
0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff,
|
2157
|
-
0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000,
|
2158
|
-
0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101,
|
2159
|
-
0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff,
|
2160
|
-
0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff,
|
2161
|
-
0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001,
|
2162
|
-
0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01,
|
2163
|
-
0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101,
|
2164
|
-
0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100,
|
2165
|
-
0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00,
|
2166
|
-
0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001,
|
2167
|
-
0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff,
|
2168
|
-
0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000,
|
2169
|
-
0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000,
|
2170
|
-
0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100,
|
2171
|
-
0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100,
|
2172
|
-
0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01,
|
2173
|
-
0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff,
|
2174
|
-
0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101,
|
2175
|
-
0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000,
|
2176
|
-
0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff,
|
2177
|
-
0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000,
|
2178
|
-
0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff,
|
2179
|
-
0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00,
|
2180
|
-
0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101,
|
2181
|
-
0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000,
|
2182
|
-
0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000,
|
2183
|
-
0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000,
|
2184
|
-
0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100,
|
2185
|
-
0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000,
|
2186
|
-
0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001,
|
2187
|
-
0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff,
|
2188
|
-
0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000,
|
2189
|
-
0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000,
|
2190
|
-
0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000,
|
2191
|
-
0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000,
|
2192
|
-
0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff,
|
2193
|
-
0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000,
|
2194
|
-
0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001,
|
2195
|
-
0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01,
|
2196
|
-
0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100,
|
2197
|
-
0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000,
|
2198
|
-
0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00,
|
2199
|
-
0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100,
|
2200
|
-
0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000,
|
2201
|
-
0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001,
|
2202
|
-
0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00,
|
2203
|
-
0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff,
|
2204
|
-
0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100,
|
2205
|
-
0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff,
|
2206
|
-
0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000,
|
2207
|
-
0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff,
|
2208
|
-
0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff,
|
2209
|
-
0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00,
|
2210
|
-
0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001,
|
2211
|
-
0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001,
|
2212
|
-
0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01,
|
2213
|
-
0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000,
|
2214
|
-
0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101,
|
2215
|
-
0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00,
|
2216
|
-
0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100,
|
2217
|
-
0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101,
|
2218
|
-
0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101,
|
2219
|
-
0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000,
|
2220
|
-
0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff,
|
2221
|
-
0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff,
|
2222
|
-
0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101,
|
2223
|
-
0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff,
|
2224
|
-
0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101,
|
2225
|
-
0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001,
|
2226
|
-
0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff,
|
2227
|
-
0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff,
|
2228
|
-
0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01,
|
2229
|
-
0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff,
|
2230
|
-
0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100,
|
2231
|
-
0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001,
|
2232
|
-
0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00,
|
2233
|
-
0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff,
|
2234
|
-
0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff,
|
2235
|
-
0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000,
|
2236
|
-
0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000,
|
2237
|
-
0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101,
|
2238
|
-
0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001,
|
2239
|
-
0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000,
|
2240
|
-
0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101,
|
2241
|
-
0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000,
|
2242
|
-
0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001,
|
2243
|
-
0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000,
|
2244
|
-
0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100,
|
2245
|
-
0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000,
|
2246
|
-
0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000,
|
2247
|
-
0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100,
|
2248
|
-
0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff,
|
2249
|
-
0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff,
|
2250
|
-
0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00,
|
2251
|
-
0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101,
|
2252
|
-
0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000,
|
2253
|
-
0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00,
|
2254
|
-
0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000,
|
2255
|
-
0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff,
|
2256
|
-
0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101,
|
2257
|
-
0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff,
|
2258
|
-
0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00,
|
2259
|
-
0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff,
|
2260
|
-
};
|
2261
|
-
|
2262
|
-
static const __device__ uint8_t ksigns_iq2xs[128] = {
|
2263
|
-
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
|
2264
|
-
144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
|
2265
|
-
160, 33, 34, 163, 36, 165, 166, 39, 40, 169, 170, 43, 172, 45, 46, 175,
|
2266
|
-
48, 177, 178, 51, 180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63,
|
2267
|
-
192, 65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77, 78, 207,
|
2268
|
-
80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90, 219, 92, 221, 222, 95,
|
2269
|
-
96, 225, 226, 99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
|
2270
|
-
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
2271
|
-
};
|
2272
|
-
|
2273
|
-
//#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
|
2274
|
-
static const __device__ uint64_t ksigns64[128] = {
|
2275
|
-
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
|
2276
|
-
0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
|
2277
|
-
0xff000000ff000000, 0x00000000ff0000ff, 0x00000000ff00ff00, 0xff000000ff00ffff,
|
2278
|
-
0x00000000ffff0000, 0xff000000ffff00ff, 0xff000000ffffff00, 0x00000000ffffffff,
|
2279
|
-
0xff0000ff00000000, 0x000000ff000000ff, 0x000000ff0000ff00, 0xff0000ff0000ffff,
|
2280
|
-
0x000000ff00ff0000, 0xff0000ff00ff00ff, 0xff0000ff00ffff00, 0x000000ff00ffffff,
|
2281
|
-
0x000000ffff000000, 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0x000000ffff00ffff,
|
2282
|
-
0xff0000ffffff0000, 0x000000ffffff00ff, 0x000000ffffffff00, 0xff0000ffffffffff,
|
2283
|
-
0xff00ff0000000000, 0x0000ff00000000ff, 0x0000ff000000ff00, 0xff00ff000000ffff,
|
2284
|
-
0x0000ff0000ff0000, 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0x0000ff0000ffffff,
|
2285
|
-
0x0000ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00, 0x0000ff00ff00ffff,
|
2286
|
-
0xff00ff00ffff0000, 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0xff00ff00ffffffff,
|
2287
|
-
0x0000ffff00000000, 0xff00ffff000000ff, 0xff00ffff0000ff00, 0x0000ffff0000ffff,
|
2288
|
-
0xff00ffff00ff0000, 0x0000ffff00ff00ff, 0x0000ffff00ffff00, 0xff00ffff00ffffff,
|
2289
|
-
0xff00ffffff000000, 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0xff00ffffff00ffff,
|
2290
|
-
0x0000ffffffff0000, 0xff00ffffffff00ff, 0xff00ffffffffff00, 0x0000ffffffffffff,
|
2291
|
-
0xffff000000000000, 0x00ff0000000000ff, 0x00ff00000000ff00, 0xffff00000000ffff,
|
2292
|
-
0x00ff000000ff0000, 0xffff000000ff00ff, 0xffff000000ffff00, 0x00ff000000ffffff,
|
2293
|
-
0x00ff0000ff000000, 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0x00ff0000ff00ffff,
|
2294
|
-
0xffff0000ffff0000, 0x00ff0000ffff00ff, 0x00ff0000ffffff00, 0xffff0000ffffffff,
|
2295
|
-
0x00ff00ff00000000, 0xffff00ff000000ff, 0xffff00ff0000ff00, 0x00ff00ff0000ffff,
|
2296
|
-
0xffff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00, 0xffff00ff00ffffff,
|
2297
|
-
0xffff00ffff000000, 0x00ff00ffff0000ff, 0x00ff00ffff00ff00, 0xffff00ffff00ffff,
|
2298
|
-
0x00ff00ffffff0000, 0xffff00ffffff00ff, 0xffff00ffffffff00, 0x00ff00ffffffffff,
|
2299
|
-
0x00ffff0000000000, 0xffffff00000000ff, 0xffffff000000ff00, 0x00ffff000000ffff,
|
2300
|
-
0xffffff0000ff0000, 0x00ffff0000ff00ff, 0x00ffff0000ffff00, 0xffffff0000ffffff,
|
2301
|
-
0xffffff00ff000000, 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0xffffff00ff00ffff,
|
2302
|
-
0x00ffff00ffff0000, 0xffffff00ffff00ff, 0xffffff00ffffff00, 0x00ffff00ffffffff,
|
2303
|
-
0xffffffff00000000, 0x00ffffff000000ff, 0x00ffffff0000ff00, 0xffffffff0000ffff,
|
2304
|
-
0x00ffffff00ff0000, 0xffffffff00ff00ff, 0xffffffff00ffff00, 0x00ffffff00ffffff,
|
2305
|
-
0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
|
2306
|
-
0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
|
2307
|
-
};
|
2308
|
-
//#endif
|
2309
|
-
|
2310
|
-
static const __device__ uint8_t kmask_iq2xs[8] = {1, 2, 4, 8, 16, 32, 64, 128};
|
2311
|
-
|
2312
1356
|
inline bool ggml_cuda_supports_mmq(enum ggml_type type) {
|
2313
1357
|
switch (type) {
|
2314
1358
|
case GGML_TYPE_Q4_0:
|
@@ -2459,11 +1503,15 @@ static __global__ void dequantize_block_iq1_s(const void * __restrict__ vx, dst_
|
|
2459
1503
|
const int il = tid/8; // 0...3
|
2460
1504
|
const int ib = tid%8; // 0...7
|
2461
1505
|
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
2462
|
-
const
|
2463
|
-
|
2464
|
-
const int8_t *
|
2465
|
-
|
2466
|
-
|
1506
|
+
const float delta = x[i].qh[ib] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA;
|
1507
|
+
const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 7) + 1);
|
1508
|
+
uint32_t grid32[2]; const int8_t * q = (const int8_t *)grid32;
|
1509
|
+
grid32[0] = iq1s_grid_gpu[x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)];
|
1510
|
+
grid32[1] = (grid32[0] >> 4) & 0x0f0f0f0f;
|
1511
|
+
grid32[0] &= 0x0f0f0f0f;
|
1512
|
+
for (int j = 0; j < 8; ++j) {
|
1513
|
+
y[j] = d * (q[j] + delta);
|
1514
|
+
}
|
2467
1515
|
#else
|
2468
1516
|
assert(false);
|
2469
1517
|
#endif
|
@@ -4303,7 +3351,7 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
|
|
4303
3351
|
#pragma unroll
|
4304
3352
|
for (int i = 0; i < QR2_K; ++ i) {
|
4305
3353
|
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1);
|
4306
|
-
d8[i] =
|
3354
|
+
d8[i] = __low2float(bq8_1[bq8_offset + i].ds);
|
4307
3355
|
}
|
4308
3356
|
|
4309
3357
|
return vec_dot_q2_K_q8_1_impl_mmvq(v, u, scales, bq2_K->dm, d8);
|
@@ -4425,7 +3473,7 @@ static __device__ __forceinline__ float vec_dot_q3_K_q8_1(
|
|
4425
3473
|
#pragma unroll
|
4426
3474
|
for (int i = 0; i < QR3_K; ++i) {
|
4427
3475
|
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1);
|
4428
|
-
d8[i] =
|
3476
|
+
d8[i] = __low2float(bq8_1[bq8_offset + i].ds);
|
4429
3477
|
}
|
4430
3478
|
|
4431
3479
|
return vec_dot_q3_K_q8_1_impl_mmvq(vl, vh, u, bq3_K->scales, scale_offset, d, d8);
|
@@ -4594,7 +3642,7 @@ static __device__ __forceinline__ float vec_dot_q4_K_q8_1(
|
|
4594
3642
|
|
4595
3643
|
for (int i = 0; i < QR4_K; ++i) {
|
4596
3644
|
const block_q8_1 * bq8i = bq8_1 + bq8_offset + i;
|
4597
|
-
d8[i] =
|
3645
|
+
d8[i] = __low2float(bq8i->ds);
|
4598
3646
|
|
4599
3647
|
const int * q8 = (const int *)bq8i->qs + ((iqs/2)%4);
|
4600
3648
|
u[2*i+0] = q8[0];
|
@@ -4959,7 +4007,7 @@ static __device__ __forceinline__ float vec_dot_q6_K_q8_1(
|
|
4959
4007
|
#pragma unroll
|
4960
4008
|
for (int i = 0; i < QR6_K; ++i) {
|
4961
4009
|
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + 2*i].qs, iqs % QI8_1);
|
4962
|
-
d8[i] =
|
4010
|
+
d8[i] = __low2float(bq8_1[bq8_offset + 2*i].ds);
|
4963
4011
|
}
|
4964
4012
|
|
4965
4013
|
return vec_dot_q6_K_q8_1_impl_mmvq(vl, vh, u, scales, bq6_K->d, d8);
|
@@ -5275,44 +4323,36 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1(
|
|
5275
4323
|
#endif
|
5276
4324
|
}
|
5277
4325
|
|
5278
|
-
|
5279
4326
|
static __device__ __forceinline__ float vec_dot_iq1_s_q8_1(
|
5280
4327
|
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) {
|
5281
4328
|
#if QK_K == 256
|
5282
4329
|
const block_iq1_s * bq1 = (const block_iq1_s *) vbq;
|
5283
4330
|
|
5284
4331
|
const int ib32 = iqs;
|
5285
|
-
int
|
5286
|
-
const uint8_t h1 = bq1->scales[2*ib32+0];
|
5287
|
-
const uint8_t h2 = bq1->scales[2*ib32+1];
|
4332
|
+
int sumi = 0;
|
5288
4333
|
#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
|
5289
4334
|
const int * q8 = (const int *)bq8_1[ib32].qs;
|
5290
|
-
|
5291
|
-
|
5292
|
-
|
5293
|
-
|
5294
|
-
|
5295
|
-
sumi1 = __dp4a(q8[j+0], grid1[j], sumi1);
|
5296
|
-
sumi2 = __dp4a(q8[j+2], grid2[j], sumi2);
|
5297
|
-
sumi3 = __dp4a(q8[j+4], grid3[j], sumi3);
|
5298
|
-
sumi4 = __dp4a(q8[j+6], grid4[j], sumi4);
|
4335
|
+
for (int l = 0; l < 4; ++l) {
|
4336
|
+
const int * grid = (const int *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8)));
|
4337
|
+
int grid0 = grid[0] & 0x0f0f0f0f;
|
4338
|
+
int grid1 = (grid[0] >> 4) & 0x0f0f0f0f;
|
4339
|
+
sumi = __dp4a(q8[2*l+1], grid1, __dp4a(q8[2*l+0], grid0, sumi));
|
5299
4340
|
}
|
5300
4341
|
#else
|
5301
|
-
const int8_t
|
5302
|
-
|
5303
|
-
|
5304
|
-
|
5305
|
-
|
5306
|
-
|
5307
|
-
|
5308
|
-
sumi2 += q8[j+ 8] * grid2[j];
|
5309
|
-
sumi3 += q8[j+16] * grid3[j];
|
5310
|
-
sumi4 += q8[j+24] * grid4[j];
|
4342
|
+
const int8_t * q8 = bq8_1[ib32].qs;
|
4343
|
+
for (int l = 0; l < 4; ++l) {
|
4344
|
+
const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8)));
|
4345
|
+
for (int j = 0; j < 4; ++j) {
|
4346
|
+
sumi += q8[j] * (grid[j] & 0xf) + q8[j+4] * (grid[j] >> 4);
|
4347
|
+
}
|
4348
|
+
q8 += 8;
|
5311
4349
|
}
|
5312
4350
|
#endif
|
5313
|
-
const float
|
5314
|
-
|
5315
|
-
|
4351
|
+
const float delta = bq1->qh[ib32] & 0x8000 ? -1-IQ1S_DELTA : -1+IQ1S_DELTA;
|
4352
|
+
const float d1q = (float)bq1->d * (2*((bq1->qh[ib32] >> 12) & 7) + 1);
|
4353
|
+
const float d = d1q * __low2float (bq8_1[ib32].ds);
|
4354
|
+
const float m = d1q * __high2float(bq8_1[ib32].ds);
|
4355
|
+
return d * sumi + m * delta;
|
5316
4356
|
#else
|
5317
4357
|
assert(false);
|
5318
4358
|
return 0.f;
|
@@ -5504,7 +4544,7 @@ static __device__ __forceinline__ void mul_mat_q(
|
|
5504
4544
|
*dsi_dst = *dsi_src;
|
5505
4545
|
} else {
|
5506
4546
|
float * dfi_dst = (float *) dsi_dst;
|
5507
|
-
*dfi_dst =
|
4547
|
+
*dfi_dst = __low2float(*dsi_src);
|
5508
4548
|
}
|
5509
4549
|
}
|
5510
4550
|
|
@@ -11604,8 +10644,20 @@ GGML_CALL void ggml_cuda_get_device_description(int device, char * description,
|
|
11604
10644
|
#define UNUSED GGML_UNUSED
|
11605
10645
|
|
11606
10646
|
struct ggml_backend_cuda_context {
|
10647
|
+
explicit ggml_backend_cuda_context(int device) :
|
10648
|
+
device(device),
|
10649
|
+
name(GGML_CUDA_NAME + std::to_string(device)) {
|
10650
|
+
}
|
10651
|
+
|
10652
|
+
~ggml_backend_cuda_context() {
|
10653
|
+
if (copy_event != nullptr) {
|
10654
|
+
CUDA_CHECK(cudaEventDestroy(copy_event));
|
10655
|
+
}
|
10656
|
+
}
|
10657
|
+
|
11607
10658
|
int device;
|
11608
10659
|
std::string name;
|
10660
|
+
cudaEvent_t copy_event = nullptr;
|
11609
10661
|
};
|
11610
10662
|
|
11611
10663
|
// cuda buffer
|
@@ -11695,9 +10747,8 @@ GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t
|
|
11695
10747
|
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
11696
10748
|
|
11697
10749
|
ggml_cuda_set_device(ctx->device);
|
11698
|
-
CUDA_CHECK(
|
11699
|
-
CUDA_CHECK(
|
11700
|
-
CUDA_CHECK(cudaDeviceSynchronize());
|
10750
|
+
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, cudaStreamPerThread));
|
10751
|
+
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
11701
10752
|
}
|
11702
10753
|
|
11703
10754
|
GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
@@ -11706,26 +10757,25 @@ GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t
|
|
11706
10757
|
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
11707
10758
|
|
11708
10759
|
ggml_cuda_set_device(ctx->device);
|
11709
|
-
CUDA_CHECK(
|
11710
|
-
CUDA_CHECK(
|
11711
|
-
CUDA_CHECK(cudaDeviceSynchronize());
|
10760
|
+
CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, cudaStreamPerThread));
|
10761
|
+
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
11712
10762
|
}
|
11713
10763
|
|
11714
10764
|
GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
|
11715
10765
|
if (ggml_backend_buffer_is_cuda(src->buffer)) {
|
11716
10766
|
ggml_backend_cuda_buffer_context * src_ctx = (ggml_backend_cuda_buffer_context *)src->buffer->context;
|
11717
|
-
ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
11718
|
-
|
11719
|
-
|
11720
|
-
|
11721
|
-
|
11722
|
-
|
11723
|
-
CUDA_CHECK(
|
11724
|
-
CUDA_CHECK(cudaDeviceSynchronize());
|
11725
|
-
|
10767
|
+
ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *)dst->buffer->context;
|
10768
|
+
if (src_ctx->device == dst_ctx->device) {
|
10769
|
+
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(src), cudaMemcpyDeviceToDevice, cudaStreamPerThread));
|
10770
|
+
} else {
|
10771
|
+
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread));
|
10772
|
+
}
|
10773
|
+
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
11726
10774
|
return true;
|
11727
10775
|
}
|
11728
10776
|
return false;
|
10777
|
+
|
10778
|
+
UNUSED(buffer);
|
11729
10779
|
}
|
11730
10780
|
|
11731
10781
|
GGML_CALL static void ggml_backend_cuda_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
@@ -11970,7 +11020,11 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buf
|
|
11970
11020
|
}
|
11971
11021
|
|
11972
11022
|
const char * buf_host = (const char *)data + offset_split;
|
11973
|
-
CUDA_CHECK(
|
11023
|
+
CUDA_CHECK(cudaMemcpyAsync(extra->data_device[id], buf_host, original_size, cudaMemcpyHostToDevice, cudaStreamPerThread));
|
11024
|
+
}
|
11025
|
+
|
11026
|
+
for (int id = 0; id < g_device_count; ++id) {
|
11027
|
+
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
11974
11028
|
}
|
11975
11029
|
}
|
11976
11030
|
|
@@ -12004,7 +11058,11 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_get_tensor(ggml_backend_buf
|
|
12004
11058
|
}
|
12005
11059
|
|
12006
11060
|
char * buf_host = (char *)data + offset_split;
|
12007
|
-
CUDA_CHECK(
|
11061
|
+
CUDA_CHECK(cudaMemcpyAsync(buf_host, extra->data_device[id], original_size, cudaMemcpyDeviceToHost, cudaStreamPerThread));
|
11062
|
+
}
|
11063
|
+
|
11064
|
+
for (int id = 0; id < g_device_count; ++id) {
|
11065
|
+
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
12008
11066
|
}
|
12009
11067
|
}
|
12010
11068
|
|
@@ -12183,6 +11241,10 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() {
|
|
12183
11241
|
return &ggml_backend_cuda_buffer_type_host;
|
12184
11242
|
}
|
12185
11243
|
|
11244
|
+
//static bool ggml_backend_buffer_is_cuda_host(ggml_backend_buffer_t buffer) {
|
11245
|
+
// return buffer->buft->iface.get_name == ggml_backend_cuda_host_buffer_type_name;
|
11246
|
+
//}
|
11247
|
+
|
12186
11248
|
// backend
|
12187
11249
|
|
12188
11250
|
GGML_CALL static const char * ggml_backend_cuda_name(ggml_backend_t backend) {
|
@@ -12206,8 +11268,9 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_cuda_get_default_buffer
|
|
12206
11268
|
|
12207
11269
|
GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
12208
11270
|
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
11271
|
+
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
12209
11272
|
|
12210
|
-
GGML_ASSERT(
|
11273
|
+
GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
12211
11274
|
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
12212
11275
|
|
12213
11276
|
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx->device][0]));
|
@@ -12215,22 +11278,61 @@ GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend,
|
|
12215
11278
|
|
12216
11279
|
GGML_CALL static void ggml_backend_cuda_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
12217
11280
|
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
11281
|
+
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
12218
11282
|
|
12219
|
-
GGML_ASSERT(
|
11283
|
+
GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
12220
11284
|
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
12221
11285
|
|
12222
11286
|
CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx->device][0]));
|
12223
11287
|
}
|
12224
11288
|
|
12225
|
-
GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t
|
12226
|
-
|
11289
|
+
GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, ggml_tensor * dst) {
|
11290
|
+
GGML_ASSERT(ggml_backend_is_cuda(backend_src) || ggml_backend_is_cuda(backend_dst));
|
12227
11291
|
|
12228
|
-
|
12229
|
-
|
12230
|
-
|
11292
|
+
ggml_backend_buffer_t buf_src = src->view_src ? src->view_src->buffer : src->buffer;
|
11293
|
+
ggml_backend_buffer_t buf_dst = dst->view_src ? dst->view_src->buffer : dst->buffer;
|
11294
|
+
|
11295
|
+
if (!ggml_backend_buffer_is_cuda(src->buffer)) {
|
11296
|
+
return false;
|
12231
11297
|
}
|
12232
11298
|
|
12233
|
-
|
11299
|
+
if (!ggml_backend_buffer_is_cuda(dst->buffer)) {
|
11300
|
+
return false;
|
11301
|
+
}
|
11302
|
+
|
11303
|
+
// device -> device
|
11304
|
+
ggml_backend_cuda_context * cuda_ctx_src = (ggml_backend_cuda_context *)backend_src->context;
|
11305
|
+
ggml_backend_cuda_context * cuda_ctx_dst = (ggml_backend_cuda_context *)backend_dst->context;
|
11306
|
+
|
11307
|
+
if (backend_src != backend_dst) {
|
11308
|
+
ggml_backend_cuda_buffer_context * buf_ctx_src = (ggml_backend_cuda_buffer_context *)buf_src->context;
|
11309
|
+
ggml_backend_cuda_buffer_context * buf_ctx_dst = (ggml_backend_cuda_buffer_context *)buf_dst->context;
|
11310
|
+
|
11311
|
+
GGML_ASSERT(cuda_ctx_src->device == buf_ctx_src->device);
|
11312
|
+
GGML_ASSERT(cuda_ctx_dst->device == buf_ctx_dst->device);
|
11313
|
+
|
11314
|
+
if (!cuda_ctx_src->copy_event) {
|
11315
|
+
ggml_cuda_set_device(cuda_ctx_src->device);
|
11316
|
+
CUDA_CHECK(cudaEventCreateWithFlags(&cuda_ctx_src->copy_event, cudaEventDisableTiming));
|
11317
|
+
}
|
11318
|
+
|
11319
|
+
// copy on src stream
|
11320
|
+
if (cuda_ctx_src->device == cuda_ctx_dst->device) {
|
11321
|
+
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx_dst->device][0]));
|
11322
|
+
} else {
|
11323
|
+
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), g_cudaStreams[cuda_ctx_src->device][0]));
|
11324
|
+
}
|
11325
|
+
|
11326
|
+
// record event on src stream
|
11327
|
+
CUDA_CHECK(cudaEventRecord(cuda_ctx_src->copy_event, g_cudaStreams[cuda_ctx_src->device][0]));
|
11328
|
+
|
11329
|
+
// wait on dst stream for the copy to complete
|
11330
|
+
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[cuda_ctx_dst->device][0], cuda_ctx_src->copy_event, 0));
|
11331
|
+
} else {
|
11332
|
+
// src and dst are on the same backend
|
11333
|
+
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx_dst->device][0]));
|
11334
|
+
}
|
11335
|
+
return true;
|
12234
11336
|
}
|
12235
11337
|
|
12236
11338
|
GGML_CALL static void ggml_backend_cuda_synchronize(ggml_backend_t backend) {
|
@@ -12407,6 +11509,52 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
|
|
12407
11509
|
UNUSED(backend);
|
12408
11510
|
}
|
12409
11511
|
|
11512
|
+
static ggml_backend_event_t ggml_backend_cuda_event_new(ggml_backend_t backend) {
|
11513
|
+
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
11514
|
+
|
11515
|
+
ggml_cuda_set_device(cuda_ctx->device);
|
11516
|
+
|
11517
|
+
cudaEvent_t event;
|
11518
|
+
CUDA_CHECK(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
|
11519
|
+
|
11520
|
+
return new ggml_backend_event {
|
11521
|
+
/* .backend = */ backend,
|
11522
|
+
/* .context = */ event,
|
11523
|
+
};
|
11524
|
+
}
|
11525
|
+
|
11526
|
+
static void ggml_backend_cuda_event_free(ggml_backend_event_t event) {
|
11527
|
+
CUDA_CHECK(cudaEventDestroy((cudaEvent_t)event->context));
|
11528
|
+
|
11529
|
+
delete event;
|
11530
|
+
}
|
11531
|
+
|
11532
|
+
static void ggml_backend_cuda_event_record(ggml_backend_event_t event) {
|
11533
|
+
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)event->backend->context;
|
11534
|
+
|
11535
|
+
CUDA_CHECK(cudaEventRecord((cudaEvent_t)event->context, g_cudaStreams[cuda_ctx->device][0]));
|
11536
|
+
}
|
11537
|
+
|
11538
|
+
static void ggml_backend_cuda_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
|
11539
|
+
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
11540
|
+
|
11541
|
+
if (ggml_backend_is_cuda(event->backend)) {
|
11542
|
+
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[cuda_ctx->device][0], (cudaEvent_t)event->context, 0));
|
11543
|
+
} else {
|
11544
|
+
// untested
|
11545
|
+
auto wait_fn = [](void * user_data) {
|
11546
|
+
ggml_backend_event_t event = (ggml_backend_event_t)user_data;
|
11547
|
+
ggml_backend_event_synchronize(event);
|
11548
|
+
};
|
11549
|
+
|
11550
|
+
CUDA_CHECK(cudaLaunchHostFunc(g_cudaStreams[cuda_ctx->device][0], wait_fn, event));
|
11551
|
+
}
|
11552
|
+
}
|
11553
|
+
|
11554
|
+
static void ggml_backend_cuda_event_synchronize(ggml_backend_event_t event) {
|
11555
|
+
CUDA_CHECK(cudaEventSynchronize((cudaEvent_t)event->context));
|
11556
|
+
}
|
11557
|
+
|
12410
11558
|
static ggml_backend_i ggml_backend_cuda_interface = {
|
12411
11559
|
/* .get_name = */ ggml_backend_cuda_name,
|
12412
11560
|
/* .free = */ ggml_backend_cuda_free,
|
@@ -12420,6 +11568,11 @@ static ggml_backend_i ggml_backend_cuda_interface = {
|
|
12420
11568
|
/* .graph_plan_compute = */ NULL,
|
12421
11569
|
/* .graph_compute = */ ggml_backend_cuda_graph_compute,
|
12422
11570
|
/* .supports_op = */ ggml_backend_cuda_supports_op,
|
11571
|
+
/* .event_new = */ ggml_backend_cuda_event_new,
|
11572
|
+
/* .event_free = */ ggml_backend_cuda_event_free,
|
11573
|
+
/* .event_record = */ ggml_backend_cuda_event_record,
|
11574
|
+
/* .event_wait = */ ggml_backend_cuda_event_wait,
|
11575
|
+
/* .event_synchronize = */ ggml_backend_cuda_event_synchronize,
|
12423
11576
|
};
|
12424
11577
|
|
12425
11578
|
static ggml_guid_t ggml_backend_cuda_guid() {
|
@@ -12438,10 +11591,11 @@ GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device) {
|
|
12438
11591
|
// not strictly necessary, but it may reduce the overhead of the first graph_compute
|
12439
11592
|
ggml_cuda_set_main_device(device);
|
12440
11593
|
|
12441
|
-
ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context
|
12442
|
-
|
12443
|
-
|
12444
|
-
|
11594
|
+
ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context(device);
|
11595
|
+
if (ctx == nullptr) {
|
11596
|
+
fprintf(stderr, "%s: error: failed to allocate context\n", __func__);
|
11597
|
+
return nullptr;
|
11598
|
+
}
|
12445
11599
|
|
12446
11600
|
ggml_backend_t cuda_backend = new ggml_backend {
|
12447
11601
|
/* .guid = */ ggml_backend_cuda_guid(),
|