llama_cpp 0.13.0 → 0.14.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,7 @@
1
+ #define GGML_COMMON_DECL_METAL
2
+ #define GGML_COMMON_IMPL_METAL
3
+ #include "ggml-common.h"
4
+
1
5
  #include <metal_stdlib>
2
6
 
3
7
  using namespace metal;
@@ -6,41 +10,6 @@ using namespace metal;
6
10
  #define MIN(x, y) ((x) < (y) ? (x) : (y))
7
11
  #define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; }
8
12
 
9
- #define QK4_0 32
10
- #define QR4_0 2
11
- typedef struct {
12
- half d; // delta
13
- uint8_t qs[QK4_0 / 2]; // nibbles / quants
14
- } block_q4_0;
15
-
16
- #define QK4_1 32
17
- typedef struct {
18
- half d; // delta
19
- half m; // min
20
- uint8_t qs[QK4_1 / 2]; // nibbles / quants
21
- } block_q4_1;
22
-
23
- #define QK5_0 32
24
- typedef struct {
25
- half d; // delta
26
- uint8_t qh[4]; // 5-th bit of quants
27
- uint8_t qs[QK5_0 / 2]; // nibbles / quants
28
- } block_q5_0;
29
-
30
- #define QK5_1 32
31
- typedef struct {
32
- half d; // delta
33
- half m; // min
34
- uint8_t qh[4]; // 5-th bit of quants
35
- uint8_t qs[QK5_1 / 2]; // nibbles / quants
36
- } block_q5_1;
37
-
38
- #define QK8_0 32
39
- typedef struct {
40
- half d; // delta
41
- int8_t qs[QK8_0]; // quants
42
- } block_q8_0;
43
-
44
13
  #define N_SIMDWIDTH 32 // assuming SIMD group size is 32
45
14
 
46
15
  enum ggml_sort_order {
@@ -1959,6 +1928,49 @@ kernel void kernel_pad_f32(
1959
1928
  }
1960
1929
  }
1961
1930
 
1931
+ kernel void kernel_arange_f32(
1932
+ device char * dst,
1933
+ constant int64_t & ne0,
1934
+ constant float & start,
1935
+ constant float & step,
1936
+ uint3 tgpig[[threadgroup_position_in_grid]],
1937
+ uint3 tpitg[[thread_position_in_threadgroup]],
1938
+ uint3 ntg[[threads_per_threadgroup]]) {
1939
+
1940
+ device float * dst_ptr = (device float *) dst;
1941
+
1942
+ for (int i0 = tpitg.x; i0 < ne0; i0 += ntg.x) {
1943
+ dst_ptr[i0] = start + step * i0;
1944
+ }
1945
+ }
1946
+
1947
+ kernel void kernel_timestep_embedding_f32(
1948
+ device const char * src0,
1949
+ device char * dst,
1950
+ constant uint64_t & nb1,
1951
+ constant int & dim,
1952
+ constant int & max_period,
1953
+ uint3 tgpig[[threadgroup_position_in_grid]],
1954
+ uint3 tpitg[[thread_position_in_threadgroup]],
1955
+ uint3 ntg[[threads_per_threadgroup]]) {
1956
+
1957
+ int i = tgpig.x;
1958
+ device float * embed_data = (device float *)(dst + i*nb1);
1959
+
1960
+ int half_ = dim / 2;
1961
+ for (int j = tpitg.x; j < half_; j += ntg.x) {
1962
+ float timestep = ((device float *)src0)[i];
1963
+ float freq = (float)exp(-log((float)max_period) * j / half_);
1964
+ float arg = timestep * freq;
1965
+ embed_data[j ] = cos(arg);
1966
+ embed_data[j + half_] = sin(arg);
1967
+ }
1968
+
1969
+ if (dim % 2 != 0 && tpitg.x == 0) {
1970
+ embed_data[dim] = 0.f;
1971
+ }
1972
+ }
1973
+
1962
1974
  // bitonic sort implementation following the CUDA kernels as reference
1963
1975
  typedef void (argsort_t)(
1964
1976
  device const float * x,
@@ -2432,147 +2444,6 @@ kernel void kernel_concat(
2432
2444
  }
2433
2445
  }
2434
2446
 
2435
- //============================================ k-quants ======================================================
2436
-
2437
- #ifndef QK_K
2438
- #define QK_K 256
2439
- #else
2440
- static_assert(QK_K == 256 || QK_K == 64, "QK_K must be 256 or 64");
2441
- #endif
2442
-
2443
- #if QK_K == 256
2444
- #define K_SCALE_SIZE 12
2445
- #else
2446
- #define K_SCALE_SIZE 4
2447
- #endif
2448
-
2449
- typedef struct {
2450
- uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
2451
- uint8_t qs[QK_K/4]; // quants
2452
- half d; // super-block scale for quantized scales
2453
- half dmin; // super-block scale for quantized mins
2454
- } block_q2_K;
2455
- // 84 bytes / block
2456
-
2457
- typedef struct {
2458
- uint8_t hmask[QK_K/8]; // quants - high bit
2459
- uint8_t qs[QK_K/4]; // quants - low 2 bits
2460
- #if QK_K == 64
2461
- uint8_t scales[2];
2462
- #else
2463
- uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
2464
- #endif
2465
- half d; // super-block scale
2466
- } block_q3_K;
2467
-
2468
- #if QK_K == 64
2469
- typedef struct {
2470
- half d[2]; // super-block scales/mins
2471
- uint8_t scales[2];
2472
- uint8_t qs[QK_K/2]; // 4-bit quants
2473
- } block_q4_K;
2474
- #else
2475
- typedef struct {
2476
- half d; // super-block scale for quantized scales
2477
- half dmin; // super-block scale for quantized mins
2478
- uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
2479
- uint8_t qs[QK_K/2]; // 4--bit quants
2480
- } block_q4_K;
2481
- #endif
2482
-
2483
- #if QK_K == 64
2484
- typedef struct {
2485
- half d; // super-block scales/mins
2486
- int8_t scales[QK_K/16]; // 8-bit block scales
2487
- uint8_t qh[QK_K/8]; // quants, high bit
2488
- uint8_t qs[QK_K/2]; // quants, low 4 bits
2489
- } block_q5_K;
2490
- #else
2491
- typedef struct {
2492
- half d; // super-block scale for quantized scales
2493
- half dmin; // super-block scale for quantized mins
2494
- uint8_t scales[3*QK_K/64]; // scales and mins, quantized with 6 bits
2495
- uint8_t qh[QK_K/8]; // quants, high bit
2496
- uint8_t qs[QK_K/2]; // quants, low 4 bits
2497
- } block_q5_K;
2498
- // 176 bytes / block
2499
- #endif
2500
-
2501
- typedef struct {
2502
- uint8_t ql[QK_K/2]; // quants, lower 4 bits
2503
- uint8_t qh[QK_K/4]; // quants, upper 2 bits
2504
- int8_t scales[QK_K/16]; // scales, quantized with 8 bits
2505
- half d; // super-block scale
2506
- } block_q6_K;
2507
- // 210 bytes / block
2508
-
2509
- typedef struct {
2510
- half d;
2511
- uint16_t qs[QK_K/8];
2512
- } block_iq2_xxs;
2513
- // 66 bytes / block for QK_K = 256, so 2.0625 bpw
2514
-
2515
- typedef struct {
2516
- half d;
2517
- uint16_t qs[QK_K/8];
2518
- uint8_t scales[QK_K/32];
2519
- } block_iq2_xs;
2520
- // 74 bytes / block for QK_K = 256, so 2.3125 bpw
2521
-
2522
- // 2.5625 bpw quants
2523
- typedef struct {
2524
- half d;
2525
- uint8_t qs[QK_K/4];
2526
- uint8_t qh[QK_K/32];
2527
- uint8_t scales[QK_K/32];
2528
- } block_iq2_s;
2529
-
2530
- typedef struct {
2531
- half d;
2532
- uint8_t qs[3*QK_K/8];
2533
- } block_iq3_xxs;
2534
- // 98 bytes / block for QK_K = 256, so 3.0625 bpw
2535
-
2536
- // 3.4375 bpw
2537
- #if QK_K == 64
2538
- #define IQ3S_N_SCALE 2
2539
- #else
2540
- #define IQ3S_N_SCALE QK_K/64
2541
- #endif
2542
- typedef struct {
2543
- half d;
2544
- uint8_t qs[QK_K/4];
2545
- uint8_t qh[QK_K/32];
2546
- uint8_t signs[QK_K/8];
2547
- uint8_t scales[IQ3S_N_SCALE];
2548
- } block_iq3_s;
2549
-
2550
- typedef struct {
2551
- half d;
2552
- uint8_t qs[QK_K/8];
2553
- uint8_t scales[QK_K/16];
2554
- } block_iq1_s;
2555
-
2556
- // Non-linear quants
2557
- #define QK4_NL 32
2558
- typedef struct {
2559
- half d;
2560
- uint8_t qs[QK4_NL/2];
2561
- } block_iq4_nl;
2562
-
2563
- #if QK_K == 64
2564
- #define block_iq4_xs block_iq4_nl
2565
- #else
2566
- typedef struct {
2567
- half d;
2568
- uint16_t scales_h;
2569
- uint8_t scales_l[QK_K/64];
2570
- uint8_t qs[QK_K/2];
2571
- } block_iq4_xs;
2572
- #endif
2573
-
2574
- //====================================== dot products =========================
2575
-
2576
2447
  void kernel_mul_mv_q2_K_f32_impl(
2577
2448
  device const void * src0,
2578
2449
  device const float * src1,
@@ -3595,710 +3466,6 @@ kernel void kernel_mul_mv_q6_K_f32(
3595
3466
 
3596
3467
  // ======================= "True" 2-bit
3597
3468
 
3598
- constexpr constant static uint64_t iq2xxs_grid[256] = {
3599
- 0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
3600
- 0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x08080808082b0808,
3601
- 0x08080808082b082b, 0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
3602
- 0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
3603
- 0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
3604
- 0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
3605
- 0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
3606
- 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
3607
- 0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
3608
- 0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
3609
- 0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
3610
- 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
3611
- 0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
3612
- 0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
3613
- 0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
3614
- 0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
3615
- 0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
3616
- 0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
3617
- 0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
3618
- 0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
3619
- 0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
3620
- 0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
3621
- 0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
3622
- 0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
3623
- 0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
3624
- 0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
3625
- 0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
3626
- 0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
3627
- 0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
3628
- 0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
3629
- 0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
3630
- 0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
3631
- 0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
3632
- 0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
3633
- 0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
3634
- 0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
3635
- 0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
3636
- 0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
3637
- 0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
3638
- 0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
3639
- 0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
3640
- 0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
3641
- 0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
3642
- 0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
3643
- 0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
3644
- 0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
3645
- 0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
3646
- 0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
3647
- 0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
3648
- 0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
3649
- 0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
3650
- 0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
3651
- 0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
3652
- 0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
3653
- 0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
3654
- 0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
3655
- 0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
3656
- 0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
3657
- 0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
3658
- 0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
3659
- 0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
3660
- 0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
3661
- 0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
3662
- 0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
3663
- };
3664
-
3665
- constexpr constant static uint64_t iq2xs_grid[512] = {
3666
- 0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
3667
- 0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
3668
- 0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
3669
- 0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
3670
- 0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
3671
- 0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
3672
- 0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08, 0x080808082b190819,
3673
- 0x080808082b191908, 0x080808082b192b19, 0x080808082b2b0808, 0x0808081908080819,
3674
- 0x0808081908081908, 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
3675
- 0x080808190819082b, 0x0808081908191919, 0x0808081908192b08, 0x0808081908192b2b,
3676
- 0x08080819082b0819, 0x08080819082b1908, 0x0808081919080808, 0x080808191908082b,
3677
- 0x0808081919081919, 0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
3678
- 0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819, 0x080808192b081908,
3679
- 0x080808192b190808, 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b08081919,
3680
- 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
3681
- 0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808, 0x0808082b19191919,
3682
- 0x0808082b2b080808, 0x0808082b2b082b2b, 0x0808190808080819, 0x0808190808081908,
3683
- 0x080819080808192b, 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
3684
- 0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908,
3685
- 0x0808190819080808, 0x080819081908082b, 0x0808190819081919, 0x0808190819082b08,
3686
- 0x0808190819190819, 0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
3687
- 0x080819082b080819, 0x080819082b081908, 0x080819082b190808, 0x0808191908080808,
3688
- 0x080819190808082b, 0x0808191908081919, 0x0808191908082b08, 0x0808191908190819,
3689
- 0x0808191908191908, 0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
3690
- 0x0808191919190808, 0x08081919192b0819, 0x080819192b080808, 0x0808192b08080819,
3691
- 0x0808192b08081908, 0x0808192b08190808, 0x0808192b082b192b, 0x0808192b19080808,
3692
- 0x0808192b1908082b, 0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
3693
- 0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b, 0x08082b0808190819,
3694
- 0x08082b0808191908, 0x08082b08082b0808, 0x08082b08082b1919, 0x08082b0819080819,
3695
- 0x08082b0819081908, 0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
3696
- 0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819, 0x08082b1908081908,
3697
- 0x08082b1908190808, 0x08082b1919080808, 0x08082b192b080819, 0x08082b192b082b19,
3698
- 0x08082b2b08080808, 0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
3699
- 0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908, 0x081908080808192b,
3700
- 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b, 0x0819080808191919,
3701
- 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
3702
- 0x081908081908082b, 0x0819080819081919, 0x0819080819082b08, 0x0819080819190819,
3703
- 0x0819080819191908, 0x08190808192b0808, 0x08190808192b2b2b, 0x081908082b080819,
3704
- 0x081908082b081908, 0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
3705
- 0x0819081908081919, 0x0819081908082b08, 0x0819081908190819, 0x0819081908191908,
3706
- 0x08190819082b0808, 0x0819081919080819, 0x0819081919081908, 0x0819081919190808,
3707
- 0x081908192b080808, 0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
3708
- 0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808, 0x0819082b19080808,
3709
- 0x0819082b192b0808, 0x0819190808080808, 0x081919080808082b, 0x0819190808081919,
3710
- 0x0819190808082b08, 0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
3711
- 0x0819190819080819, 0x0819190819081908, 0x0819190819082b19, 0x0819190819190808,
3712
- 0x08191908192b1908, 0x081919082b080808, 0x0819191908080819, 0x0819191908081908,
3713
- 0x0819191908190808, 0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
3714
- 0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
3715
- 0x08192b080819082b, 0x08192b0819080808, 0x08192b0819191908, 0x08192b082b08192b,
3716
- 0x08192b1908080808, 0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
3717
- 0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919,
3718
- 0x082b080808082b08, 0x082b080808082b2b, 0x082b080808190819, 0x082b080808191908,
3719
- 0x082b0808082b0808, 0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
3720
- 0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819, 0x082b081908081908,
3721
- 0x082b081908190808, 0x082b081919080808, 0x082b081919082b08, 0x082b0819192b1919,
3722
- 0x082b082b08080808, 0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
3723
- 0x082b190808080819, 0x082b190808081908, 0x082b190808190808, 0x082b1908082b2b19,
3724
- 0x082b190819080808, 0x082b191908080808, 0x082b191919080819, 0x082b19191919082b,
3725
- 0x082b19192b192b19, 0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
3726
- 0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b, 0x082b2b08082b0808,
3727
- 0x082b2b0819191919, 0x082b2b082b082b08, 0x082b2b082b2b082b, 0x082b2b19192b2b08,
3728
- 0x082b2b192b190808, 0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
3729
- 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819, 0x1908080808081908,
3730
- 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808, 0x190808080819082b,
3731
- 0x1908080808191919, 0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
3732
- 0x1908080819080808, 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08,
3733
- 0x1908080819082b2b, 0x1908080819190819, 0x1908080819191908, 0x19080808192b0808,
3734
- 0x19080808192b1919, 0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
3735
- 0x1908081908080808, 0x190808190808082b, 0x1908081908081919, 0x1908081908082b08,
3736
- 0x1908081908190819, 0x1908081908191908, 0x19080819082b0808, 0x1908081919080819,
3737
- 0x1908081919081908, 0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
3738
- 0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908, 0x1908082b08190808,
3739
- 0x1908082b0819082b, 0x1908082b082b2b19, 0x1908082b19080808, 0x1908190808080808,
3740
- 0x190819080808082b, 0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
3741
- 0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808, 0x1908190819080819,
3742
- 0x1908190819081908, 0x1908190819190808, 0x190819082b080808, 0x190819082b191908,
3743
- 0x1908191908080819, 0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
3744
- 0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808, 0x1908192b08082b2b,
3745
- 0x1908192b19081908, 0x1908192b19190808, 0x19082b0808080819, 0x19082b0808081908,
3746
- 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
3747
- 0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819, 0x19082b1919081908,
3748
- 0x19082b1919190808, 0x19082b19192b2b19, 0x19082b2b08081908, 0x1919080808080808,
3749
- 0x191908080808082b, 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
3750
- 0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08, 0x1919080819080819,
3751
- 0x1919080819081908, 0x1919080819190808, 0x191908082b080808, 0x1919081908080819,
3752
- 0x1919081908081908, 0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
3753
- 0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908, 0x1919082b2b2b2b2b,
3754
- 0x1919190808080819, 0x1919190808081908, 0x1919190808190808, 0x19191908082b0819,
3755
- 0x1919190819080808, 0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
3756
- 0x1919191908080808, 0x1919191908082b08, 0x191919192b080808, 0x191919192b082b08,
3757
- 0x1919192b082b0819, 0x1919192b192b2b08, 0x1919192b2b2b0819, 0x19192b0808080808,
3758
- 0x19192b0808191908, 0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
3759
- 0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b, 0x19192b2b2b081919,
3760
- 0x192b080808080819, 0x192b080808081908, 0x192b080808190808, 0x192b080819080808,
3761
- 0x192b080819191908, 0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
3762
- 0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b, 0x192b082b2b19082b,
3763
- 0x192b190808080808, 0x192b19080819192b, 0x192b191908190808, 0x192b191919080808,
3764
- 0x192b191919081919, 0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
3765
- 0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908, 0x192b2b2b192b082b,
3766
- 0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08,
3767
- 0x2b08080808190819, 0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
3768
- 0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808082b080808,
3769
- 0x2b0808082b08082b, 0x2b0808082b2b2b08, 0x2b0808082b2b2b2b, 0x2b08081908080819,
3770
- 0x2b08081908081908, 0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
3771
- 0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808, 0x2b08082b082b0808,
3772
- 0x2b08082b2b080808, 0x2b08082b2b08082b, 0x2b08082b2b2b0808, 0x2b08082b2b2b2b08,
3773
- 0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
3774
- 0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808, 0x2b0819082b082b19,
3775
- 0x2b08191908080808, 0x2b08191919081908, 0x2b0819192b2b1919, 0x2b08192b08192b08,
3776
- 0x2b08192b192b2b2b, 0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
3777
- 0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b, 0x2b082b082b2b2b08,
3778
- 0x2b082b190808192b, 0x2b082b2b082b082b, 0x2b082b2b2b080808, 0x2b082b2b2b082b08,
3779
- 0x2b082b2b2b19192b, 0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
3780
- 0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b, 0x2b1908082b081908,
3781
- 0x2b19081908080808, 0x2b190819082b082b, 0x2b190819192b1908, 0x2b19082b1919192b,
3782
- 0x2b19082b2b082b19, 0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
3783
- 0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19, 0x2b1919192b190808,
3784
- 0x2b1919192b19082b, 0x2b19192b19080819, 0x2b192b0819190819, 0x2b192b082b2b192b,
3785
- 0x2b192b1919082b19, 0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
3786
- 0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b, 0x2b2b0808082b0808,
3787
- 0x2b2b0808082b2b2b, 0x2b2b08082b2b0808, 0x2b2b081919190819, 0x2b2b081919192b19,
3788
- 0x2b2b08192b2b192b, 0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
3789
- 0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808, 0x2b2b190819080808,
3790
- 0x2b2b19082b191919, 0x2b2b192b192b1919, 0x2b2b192b2b192b08, 0x2b2b2b0808082b2b,
3791
- 0x2b2b2b08082b0808, 0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
3792
- 0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908, 0x2b2b2b192b08192b,
3793
- 0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
3794
- };
3795
-
3796
- constexpr constant static uint64_t iq2s_grid[1024] = {
3797
- 0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
3798
- 0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
3799
- 0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
3800
- 0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
3801
- 0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
3802
- 0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x08080808192b192b,
3803
- 0x08080808192b2b19, 0x080808082b080808, 0x080808082b08082b, 0x080808082b081919,
3804
- 0x080808082b082b08, 0x080808082b190819, 0x080808082b191908, 0x080808082b2b0808,
3805
- 0x080808082b2b1919, 0x080808082b2b2b2b, 0x0808081908080819, 0x0808081908081908,
3806
- 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808, 0x080808190819082b,
3807
- 0x0808081908191919, 0x0808081908192b08, 0x08080819082b0819, 0x08080819082b1908,
3808
- 0x0808081919080808, 0x080808191908082b, 0x0808081919081919, 0x0808081919082b08,
3809
- 0x0808081919190819, 0x0808081919191908, 0x080808191919192b, 0x0808081919192b19,
3810
- 0x08080819192b0808, 0x08080819192b1919, 0x08080819192b2b08, 0x080808192b080819,
3811
- 0x080808192b081908, 0x080808192b190808, 0x080808192b19082b, 0x080808192b191919,
3812
- 0x080808192b2b0819, 0x080808192b2b1908, 0x0808082b08080808, 0x0808082b0808082b,
3813
- 0x0808082b08081919, 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908,
3814
- 0x0808082b082b0808, 0x0808082b082b2b2b, 0x0808082b19080819, 0x0808082b19081908,
3815
- 0x0808082b1908192b, 0x0808082b19082b19, 0x0808082b19190808, 0x0808082b19191919,
3816
- 0x0808082b2b080808, 0x0808082b2b081919, 0x0808082b2b082b2b, 0x0808082b2b191908,
3817
- 0x0808082b2b2b082b, 0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
3818
- 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b, 0x0808190808191919,
3819
- 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908, 0x08081908082b192b,
3820
- 0x08081908082b2b19, 0x0808190819080808, 0x080819081908082b, 0x0808190819081919,
3821
- 0x0808190819082b08, 0x0808190819082b2b, 0x0808190819190819, 0x0808190819191908,
3822
- 0x080819081919192b, 0x0808190819192b19, 0x08081908192b0808, 0x08081908192b082b,
3823
- 0x08081908192b1919, 0x080819082b080819, 0x080819082b081908, 0x080819082b08192b,
3824
- 0x080819082b082b19, 0x080819082b190808, 0x080819082b191919, 0x080819082b192b08,
3825
- 0x080819082b2b0819, 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b,
3826
- 0x0808191908081919, 0x0808191908082b08, 0x0808191908082b2b, 0x0808191908190819,
3827
- 0x0808191908191908, 0x080819190819192b, 0x0808191908192b19, 0x08081919082b0808,
3828
- 0x08081919082b1919, 0x08081919082b2b08, 0x0808191919080819, 0x0808191919081908,
3829
- 0x080819191908192b, 0x0808191919082b19, 0x0808191919190808, 0x080819191919082b,
3830
- 0x0808191919191919, 0x0808191919192b08, 0x08081919192b0819, 0x08081919192b1908,
3831
- 0x080819192b080808, 0x080819192b08082b, 0x080819192b081919, 0x080819192b082b08,
3832
- 0x080819192b190819, 0x080819192b191908, 0x080819192b2b0808, 0x0808192b08080819,
3833
- 0x0808192b08081908, 0x0808192b0808192b, 0x0808192b08082b19, 0x0808192b08190808,
3834
- 0x0808192b08191919, 0x0808192b19080808, 0x0808192b19081919, 0x0808192b19082b08,
3835
- 0x0808192b19190819, 0x0808192b19191908, 0x0808192b192b0808, 0x0808192b2b080819,
3836
- 0x0808192b2b081908, 0x0808192b2b190808, 0x08082b0808080808, 0x08082b080808082b,
3837
- 0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808190819, 0x08082b0808191908,
3838
- 0x08082b080819192b, 0x08082b0808192b19, 0x08082b08082b0808, 0x08082b08082b1919,
3839
- 0x08082b08082b2b2b, 0x08082b0819080819, 0x08082b0819081908, 0x08082b081908192b,
3840
- 0x08082b0819082b19, 0x08082b0819190808, 0x08082b081919082b, 0x08082b0819191919,
3841
- 0x08082b0819192b08, 0x08082b08192b0819, 0x08082b08192b1908, 0x08082b082b080808,
3842
- 0x08082b082b081919, 0x08082b082b191908, 0x08082b082b2b2b2b, 0x08082b1908080819,
3843
- 0x08082b1908081908, 0x08082b1908190808, 0x08082b190819082b, 0x08082b1908191919,
3844
- 0x08082b1908192b08, 0x08082b19082b0819, 0x08082b1919080808, 0x08082b1919081919,
3845
- 0x08082b1919082b08, 0x08082b1919190819, 0x08082b1919191908, 0x08082b19192b0808,
3846
- 0x08082b192b080819, 0x08082b192b190808, 0x08082b2b08080808, 0x08082b2b08190819,
3847
- 0x08082b2b08191908, 0x08082b2b082b082b, 0x08082b2b082b2b08, 0x08082b2b082b2b2b,
3848
- 0x08082b2b19190808, 0x08082b2b2b192b19, 0x0819080808080819, 0x0819080808081908,
3849
- 0x081908080808192b, 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b,
3850
- 0x0819080808191919, 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908,
3851
- 0x08190808082b192b, 0x0819080819080808, 0x081908081908082b, 0x0819080819081919,
3852
- 0x0819080819082b08, 0x0819080819190819, 0x0819080819191908, 0x081908081919192b,
3853
- 0x0819080819192b19, 0x08190808192b0808, 0x08190808192b082b, 0x08190808192b1919,
3854
- 0x08190808192b2b08, 0x081908082b080819, 0x081908082b081908, 0x081908082b08192b,
3855
- 0x081908082b190808, 0x081908082b191919, 0x081908082b192b08, 0x081908082b2b0819,
3856
- 0x081908082b2b1908, 0x0819081908080808, 0x081908190808082b, 0x0819081908081919,
3857
- 0x0819081908082b08, 0x0819081908082b2b, 0x0819081908190819, 0x0819081908191908,
3858
- 0x081908190819192b, 0x0819081908192b19, 0x08190819082b0808, 0x08190819082b082b,
3859
- 0x08190819082b1919, 0x08190819082b2b08, 0x0819081919080819, 0x0819081919081908,
3860
- 0x081908191908192b, 0x0819081919082b19, 0x0819081919190808, 0x081908191919082b,
3861
- 0x0819081919191919, 0x0819081919192b08, 0x08190819192b0819, 0x08190819192b1908,
3862
- 0x081908192b080808, 0x081908192b08082b, 0x081908192b081919, 0x081908192b082b08,
3863
- 0x081908192b190819, 0x081908192b191908, 0x0819082b08080819, 0x0819082b08081908,
3864
- 0x0819082b08082b19, 0x0819082b08190808, 0x0819082b08191919, 0x0819082b082b0819,
3865
- 0x0819082b082b1908, 0x0819082b19080808, 0x0819082b19081919, 0x0819082b19190819,
3866
- 0x0819082b19191908, 0x0819082b2b080819, 0x0819082b2b081908, 0x0819082b2b190808,
3867
- 0x0819190808080808, 0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
3868
- 0x0819190808190819, 0x0819190808191908, 0x081919080819192b, 0x0819190808192b19,
3869
- 0x08191908082b0808, 0x08191908082b1919, 0x08191908082b2b08, 0x0819190819080819,
3870
- 0x0819190819081908, 0x081919081908192b, 0x0819190819082b19, 0x0819190819190808,
3871
- 0x081919081919082b, 0x0819190819191919, 0x0819190819192b08, 0x08191908192b0819,
3872
- 0x08191908192b1908, 0x081919082b080808, 0x081919082b08082b, 0x081919082b081919,
3873
- 0x081919082b082b08, 0x081919082b190819, 0x081919082b191908, 0x081919082b2b0808,
3874
- 0x0819191908080819, 0x0819191908081908, 0x081919190808192b, 0x0819191908082b19,
3875
- 0x0819191908190808, 0x081919190819082b, 0x0819191908191919, 0x0819191908192b08,
3876
- 0x08191919082b0819, 0x08191919082b1908, 0x0819191919080808, 0x081919191908082b,
3877
- 0x0819191919081919, 0x0819191919082b08, 0x0819191919190819, 0x0819191919191908,
3878
- 0x08191919192b0808, 0x081919192b080819, 0x081919192b081908, 0x081919192b190808,
3879
- 0x0819192b08080808, 0x0819192b08081919, 0x0819192b08082b08, 0x0819192b08190819,
3880
- 0x0819192b08191908, 0x0819192b082b0808, 0x0819192b19080819, 0x0819192b19081908,
3881
- 0x0819192b19190808, 0x0819192b2b080808, 0x0819192b2b2b2b2b, 0x08192b0808080819,
3882
- 0x08192b0808081908, 0x08192b080808192b, 0x08192b0808082b19, 0x08192b0808190808,
3883
- 0x08192b0808191919, 0x08192b0808192b08, 0x08192b08082b0819, 0x08192b0819080808,
3884
- 0x08192b081908082b, 0x08192b0819081919, 0x08192b0819082b08, 0x08192b0819190819,
3885
- 0x08192b0819191908, 0x08192b08192b0808, 0x08192b082b080819, 0x08192b082b081908,
3886
- 0x08192b1908080808, 0x08192b190808082b, 0x08192b1908081919, 0x08192b1908082b08,
3887
- 0x08192b1908190819, 0x08192b1908191908, 0x08192b19082b0808, 0x08192b1919080819,
3888
- 0x08192b1919081908, 0x08192b1919190808, 0x08192b19192b2b19, 0x08192b192b2b082b,
3889
- 0x08192b2b08081908, 0x08192b2b08190808, 0x08192b2b19080808, 0x08192b2b1919192b,
3890
- 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919, 0x082b080808082b08,
3891
- 0x082b080808190819, 0x082b080808191908, 0x082b08080819192b, 0x082b080808192b19,
3892
- 0x082b0808082b0808, 0x082b0808082b1919, 0x082b0808082b2b2b, 0x082b080819080819,
3893
- 0x082b080819081908, 0x082b080819190808, 0x082b08081919082b, 0x082b080819191919,
3894
- 0x082b0808192b1908, 0x082b08082b080808, 0x082b08082b082b2b, 0x082b08082b191908,
3895
- 0x082b08082b2b2b2b, 0x082b081908080819, 0x082b081908081908, 0x082b081908190808,
3896
- 0x082b08190819082b, 0x082b081908191919, 0x082b0819082b0819, 0x082b081919080808,
3897
- 0x082b08191908082b, 0x082b081919081919, 0x082b081919190819, 0x082b081919191908,
3898
- 0x082b0819192b0808, 0x082b08192b080819, 0x082b08192b081908, 0x082b08192b190808,
3899
- 0x082b082b08080808, 0x082b082b08082b2b, 0x082b082b082b082b, 0x082b082b082b2b08,
3900
- 0x082b082b082b2b2b, 0x082b082b19081908, 0x082b082b19190808, 0x082b082b2b082b08,
3901
- 0x082b082b2b082b2b, 0x082b082b2b2b2b08, 0x082b190808080819, 0x082b190808081908,
3902
- 0x082b19080808192b, 0x082b190808082b19, 0x082b190808190808, 0x082b190808191919,
3903
- 0x082b190808192b08, 0x082b1908082b0819, 0x082b1908082b1908, 0x082b190819080808,
3904
- 0x082b19081908082b, 0x082b190819081919, 0x082b190819082b08, 0x082b190819190819,
3905
- 0x082b190819191908, 0x082b1908192b0808, 0x082b19082b080819, 0x082b19082b081908,
3906
- 0x082b19082b190808, 0x082b191908080808, 0x082b191908081919, 0x082b191908082b08,
3907
- 0x082b191908190819, 0x082b191908191908, 0x082b1919082b0808, 0x082b191919080819,
3908
- 0x082b191919081908, 0x082b191919190808, 0x082b1919192b192b, 0x082b19192b080808,
3909
- 0x082b192b08080819, 0x082b192b08081908, 0x082b192b08190808, 0x082b192b19080808,
3910
- 0x082b192b19192b19, 0x082b2b0808080808, 0x082b2b0808081919, 0x082b2b0808190819,
3911
- 0x082b2b0808191908, 0x082b2b0819080819, 0x082b2b0819081908, 0x082b2b0819190808,
3912
- 0x082b2b082b082b2b, 0x082b2b082b2b2b2b, 0x082b2b1908080819, 0x082b2b1908081908,
3913
- 0x082b2b1908190808, 0x082b2b192b191919, 0x082b2b2b08082b2b, 0x082b2b2b082b082b,
3914
- 0x082b2b2b192b1908, 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
3915
- 0x1908080808081908, 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808,
3916
- 0x190808080819082b, 0x1908080808191919, 0x1908080808192b08, 0x1908080808192b2b,
3917
- 0x19080808082b0819, 0x19080808082b1908, 0x19080808082b192b, 0x1908080819080808,
3918
- 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08, 0x1908080819082b2b,
3919
- 0x1908080819190819, 0x1908080819191908, 0x190808081919192b, 0x1908080819192b19,
3920
- 0x19080808192b0808, 0x19080808192b082b, 0x19080808192b1919, 0x190808082b080819,
3921
- 0x190808082b081908, 0x190808082b190808, 0x190808082b191919, 0x190808082b192b08,
3922
- 0x190808082b2b0819, 0x190808082b2b1908, 0x1908081908080808, 0x190808190808082b,
3923
- 0x1908081908081919, 0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
3924
- 0x190808190819192b, 0x1908081908192b19, 0x19080819082b0808, 0x19080819082b082b,
3925
- 0x19080819082b1919, 0x1908081919080819, 0x1908081919081908, 0x190808191908192b,
3926
- 0x1908081919082b19, 0x1908081919190808, 0x190808191919082b, 0x1908081919191919,
3927
- 0x1908081919192b08, 0x19080819192b0819, 0x19080819192b1908, 0x190808192b080808,
3928
- 0x190808192b08082b, 0x190808192b081919, 0x190808192b082b08, 0x190808192b190819,
3929
- 0x190808192b191908, 0x190808192b2b0808, 0x1908082b08080819, 0x1908082b08081908,
3930
- 0x1908082b08190808, 0x1908082b0819082b, 0x1908082b08191919, 0x1908082b08192b08,
3931
- 0x1908082b082b1908, 0x1908082b19080808, 0x1908082b19081919, 0x1908082b19082b08,
3932
- 0x1908082b19190819, 0x1908082b19191908, 0x1908082b192b0808, 0x1908082b2b080819,
3933
- 0x1908082b2b081908, 0x1908190808080808, 0x190819080808082b, 0x1908190808081919,
3934
- 0x1908190808082b08, 0x1908190808082b2b, 0x1908190808190819, 0x1908190808191908,
3935
- 0x190819080819192b, 0x1908190808192b19, 0x19081908082b0808, 0x19081908082b082b,
3936
- 0x19081908082b1919, 0x19081908082b2b08, 0x1908190819080819, 0x1908190819081908,
3937
- 0x190819081908192b, 0x1908190819082b19, 0x1908190819190808, 0x190819081919082b,
3938
- 0x1908190819191919, 0x1908190819192b08, 0x19081908192b0819, 0x19081908192b1908,
3939
- 0x190819082b080808, 0x190819082b08082b, 0x190819082b081919, 0x190819082b082b08,
3940
- 0x190819082b190819, 0x190819082b191908, 0x190819082b2b0808, 0x1908191908080819,
3941
- 0x1908191908081908, 0x190819190808192b, 0x1908191908082b19, 0x1908191908190808,
3942
- 0x190819190819082b, 0x1908191908191919, 0x1908191908192b08, 0x19081919082b0819,
3943
- 0x19081919082b1908, 0x1908191919080808, 0x190819191908082b, 0x1908191919081919,
3944
- 0x1908191919082b08, 0x1908191919190819, 0x1908191919191908, 0x19081919192b0808,
3945
- 0x19081919192b2b2b, 0x190819192b080819, 0x190819192b081908, 0x190819192b190808,
3946
- 0x1908192b08080808, 0x1908192b0808082b, 0x1908192b08081919, 0x1908192b08082b08,
3947
- 0x1908192b08190819, 0x1908192b08191908, 0x1908192b082b0808, 0x1908192b19080819,
3948
- 0x1908192b19081908, 0x1908192b19190808, 0x1908192b2b080808, 0x1908192b2b2b1919,
3949
- 0x19082b0808080819, 0x19082b0808081908, 0x19082b0808082b19, 0x19082b0808190808,
3950
- 0x19082b080819082b, 0x19082b0808191919, 0x19082b0808192b08, 0x19082b08082b0819,
3951
- 0x19082b08082b1908, 0x19082b0819080808, 0x19082b081908082b, 0x19082b0819081919,
3952
- 0x19082b0819082b08, 0x19082b0819190819, 0x19082b0819191908, 0x19082b08192b0808,
3953
- 0x19082b082b081908, 0x19082b082b190808, 0x19082b1908080808, 0x19082b190808082b,
3954
- 0x19082b1908081919, 0x19082b1908082b08, 0x19082b1908190819, 0x19082b1908191908,
3955
- 0x19082b19082b0808, 0x19082b1919080819, 0x19082b1919081908, 0x19082b1919190808,
3956
- 0x19082b192b080808, 0x19082b192b19192b, 0x19082b2b08080819, 0x19082b2b08081908,
3957
- 0x19082b2b08190808, 0x19082b2b19080808, 0x1919080808080808, 0x191908080808082b,
3958
- 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819, 0x1919080808191908,
3959
- 0x191908080819192b, 0x1919080808192b19, 0x19190808082b0808, 0x19190808082b082b,
3960
- 0x19190808082b1919, 0x19190808082b2b08, 0x1919080819080819, 0x1919080819081908,
3961
- 0x191908081908192b, 0x1919080819082b19, 0x1919080819190808, 0x191908081919082b,
3962
- 0x1919080819191919, 0x1919080819192b08, 0x19190808192b0819, 0x19190808192b1908,
3963
- 0x191908082b080808, 0x191908082b08082b, 0x191908082b081919, 0x191908082b082b08,
3964
- 0x191908082b190819, 0x191908082b191908, 0x1919081908080819, 0x1919081908081908,
3965
- 0x191908190808192b, 0x1919081908082b19, 0x1919081908190808, 0x191908190819082b,
3966
- 0x1919081908191919, 0x1919081908192b08, 0x19190819082b0819, 0x19190819082b1908,
3967
- 0x1919081919080808, 0x191908191908082b, 0x1919081919081919, 0x1919081919082b08,
3968
- 0x1919081919190819, 0x1919081919191908, 0x19190819192b0808, 0x191908192b080819,
3969
- 0x191908192b081908, 0x191908192b190808, 0x1919082b08080808, 0x1919082b08081919,
3970
- 0x1919082b08082b08, 0x1919082b08190819, 0x1919082b08191908, 0x1919082b082b0808,
3971
- 0x1919082b19080819, 0x1919082b19081908, 0x1919082b19190808, 0x1919082b192b2b19,
3972
- 0x1919082b2b080808, 0x1919190808080819, 0x1919190808081908, 0x191919080808192b,
3973
- 0x1919190808082b19, 0x1919190808190808, 0x191919080819082b, 0x1919190808191919,
3974
- 0x1919190808192b08, 0x19191908082b0819, 0x19191908082b1908, 0x1919190819080808,
3975
- 0x191919081908082b, 0x1919190819081919, 0x1919190819082b08, 0x1919190819190819,
3976
- 0x1919190819191908, 0x19191908192b0808, 0x191919082b080819, 0x191919082b081908,
3977
- 0x191919082b190808, 0x1919191908080808, 0x191919190808082b, 0x1919191908081919,
3978
- 0x1919191908082b08, 0x1919191908190819, 0x1919191908191908, 0x19191919082b0808,
3979
- 0x1919191919080819, 0x1919191919081908, 0x1919191919190808, 0x191919192b080808,
3980
- 0x1919192b08080819, 0x1919192b08081908, 0x1919192b08190808, 0x1919192b082b192b,
3981
- 0x1919192b19080808, 0x19192b0808080808, 0x19192b080808082b, 0x19192b0808081919,
3982
- 0x19192b0808082b08, 0x19192b0808190819, 0x19192b0808191908, 0x19192b08082b0808,
3983
- 0x19192b0819080819, 0x19192b0819081908, 0x19192b0819190808, 0x19192b0819192b2b,
3984
- 0x19192b082b080808, 0x19192b1908080819, 0x19192b1908081908, 0x19192b1908190808,
3985
- 0x19192b1919080808, 0x19192b2b08080808, 0x19192b2b08192b19, 0x19192b2b2b081919,
3986
- 0x19192b2b2b2b2b08, 0x192b080808080819, 0x192b080808081908, 0x192b08080808192b,
3987
- 0x192b080808190808, 0x192b08080819082b, 0x192b080808191919, 0x192b080808192b08,
3988
- 0x192b0808082b0819, 0x192b0808082b1908, 0x192b080819080808, 0x192b080819081919,
3989
- 0x192b080819082b08, 0x192b080819190819, 0x192b080819191908, 0x192b0808192b0808,
3990
- 0x192b08082b081908, 0x192b08082b190808, 0x192b081908080808, 0x192b08190808082b,
3991
- 0x192b081908081919, 0x192b081908082b08, 0x192b081908190819, 0x192b081908191908,
3992
- 0x192b0819082b0808, 0x192b081919080819, 0x192b081919081908, 0x192b081919190808,
3993
- 0x192b08192b080808, 0x192b08192b192b19, 0x192b082b08081908, 0x192b082b08190808,
3994
- 0x192b082b19080808, 0x192b082b1919192b, 0x192b082b2b2b0819, 0x192b190808080808,
3995
- 0x192b190808081919, 0x192b190808082b08, 0x192b190808190819, 0x192b190808191908,
3996
- 0x192b1908082b0808, 0x192b190819080819, 0x192b190819081908, 0x192b190819190808,
3997
- 0x192b19082b080808, 0x192b191908080819, 0x192b191908081908, 0x192b191908190808,
3998
- 0x192b191919080808, 0x192b191919082b2b, 0x192b1919192b2b08, 0x192b19192b19082b,
3999
- 0x192b192b08080808, 0x192b192b2b191908, 0x192b2b0808080819, 0x192b2b0808081908,
4000
- 0x192b2b0808190808, 0x192b2b08192b1919, 0x192b2b082b192b08, 0x192b2b1908080808,
4001
- 0x192b2b19082b2b2b, 0x192b2b2b1908082b, 0x192b2b2b2b2b0819, 0x2b08080808080808,
4002
- 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
4003
- 0x2b08080808191908, 0x2b08080808192b19, 0x2b080808082b0808, 0x2b080808082b1919,
4004
- 0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808081919082b,
4005
- 0x2b08080819191919, 0x2b08080819192b08, 0x2b080808192b0819, 0x2b0808082b080808,
4006
- 0x2b0808082b081919, 0x2b0808082b190819, 0x2b0808082b191908, 0x2b08081908080819,
4007
- 0x2b08081908081908, 0x2b08081908082b19, 0x2b08081908190808, 0x2b0808190819082b,
4008
- 0x2b08081908191919, 0x2b08081908192b08, 0x2b080819082b0819, 0x2b080819082b1908,
4009
- 0x2b08081919080808, 0x2b0808191908082b, 0x2b08081919081919, 0x2b08081919082b08,
4010
- 0x2b08081919190819, 0x2b08081919191908, 0x2b0808192b080819, 0x2b0808192b081908,
4011
- 0x2b0808192b190808, 0x2b0808192b2b2b19, 0x2b08082b08080808, 0x2b08082b08081919,
4012
- 0x2b08082b08082b2b, 0x2b08082b08190819, 0x2b08082b08191908, 0x2b08082b19080819,
4013
- 0x2b08082b19081908, 0x2b08082b19190808, 0x2b08190808080819, 0x2b08190808081908,
4014
- 0x2b0819080808192b, 0x2b08190808082b19, 0x2b08190808190808, 0x2b0819080819082b,
4015
- 0x2b08190808191919, 0x2b08190808192b08, 0x2b081908082b0819, 0x2b08190819080808,
4016
- 0x2b0819081908082b, 0x2b08190819081919, 0x2b08190819082b08, 0x2b08190819190819,
4017
- 0x2b08190819191908, 0x2b081908192b0808, 0x2b0819082b080819, 0x2b0819082b081908,
4018
- 0x2b0819082b190808, 0x2b08191908080808, 0x2b0819190808082b, 0x2b08191908081919,
4019
- 0x2b08191908082b08, 0x2b08191908190819, 0x2b08191908191908, 0x2b081919082b0808,
4020
- 0x2b08191919080819, 0x2b08191919081908, 0x2b08191919190808, 0x2b0819192b080808,
4021
- 0x2b0819192b082b2b, 0x2b08192b08080819, 0x2b08192b08081908, 0x2b08192b08190808,
4022
- 0x2b08192b082b2b19, 0x2b08192b19080808, 0x2b082b0808080808, 0x2b082b0808081919,
4023
- 0x2b082b0808190819, 0x2b082b0808191908, 0x2b082b0819080819, 0x2b082b0819081908,
4024
- 0x2b082b0819190808, 0x2b082b082b2b082b, 0x2b082b1908080819, 0x2b082b1908081908,
4025
- 0x2b082b1919080808, 0x2b082b19192b1919, 0x2b082b2b082b082b, 0x2b082b2b19192b08,
4026
- 0x2b082b2b19192b2b, 0x2b082b2b2b08082b, 0x2b082b2b2b2b082b, 0x2b19080808080819,
4027
- 0x2b19080808081908, 0x2b19080808082b19, 0x2b19080808190808, 0x2b1908080819082b,
4028
- 0x2b19080808191919, 0x2b19080808192b08, 0x2b190808082b1908, 0x2b19080819080808,
4029
- 0x2b1908081908082b, 0x2b19080819081919, 0x2b19080819082b08, 0x2b19080819190819,
4030
- 0x2b19080819191908, 0x2b190808192b0808, 0x2b1908082b080819, 0x2b1908082b081908,
4031
- 0x2b1908082b190808, 0x2b19081908080808, 0x2b19081908081919, 0x2b19081908190819,
4032
- 0x2b19081908191908, 0x2b19081919080819, 0x2b19081919081908, 0x2b19081919190808,
4033
- 0x2b19081919192b2b, 0x2b19082b08080819, 0x2b19082b08081908, 0x2b19082b08190808,
4034
- 0x2b19082b19080808, 0x2b19082b2b2b192b, 0x2b19190808080808, 0x2b1919080808082b,
4035
- 0x2b19190808081919, 0x2b19190808082b08, 0x2b19190808190819, 0x2b19190808191908,
4036
- 0x2b191908082b0808, 0x2b19190819080819, 0x2b19190819081908, 0x2b19190819190808,
4037
- 0x2b1919082b080808, 0x2b1919082b19192b, 0x2b19191908080819, 0x2b19191908081908,
4038
- 0x2b19191908190808, 0x2b19191919080808, 0x2b1919192b192b08, 0x2b1919192b2b0819,
4039
- 0x2b19192b08080808, 0x2b19192b1908192b, 0x2b19192b192b1908, 0x2b192b0808080819,
4040
- 0x2b192b0808081908, 0x2b192b0808190808, 0x2b192b08082b192b, 0x2b192b0819080808,
4041
- 0x2b192b082b2b2b19, 0x2b192b1908080808, 0x2b192b1919082b19, 0x2b192b191919082b,
4042
- 0x2b192b2b2b190808, 0x2b2b080808080808, 0x2b2b080808081919, 0x2b2b080808082b2b,
4043
- 0x2b2b080808191908, 0x2b2b0808082b082b, 0x2b2b0808082b2b2b, 0x2b2b080819080819,
4044
- 0x2b2b080819081908, 0x2b2b080819190808, 0x2b2b08082b2b082b, 0x2b2b08082b2b2b2b,
4045
- 0x2b2b081919080808, 0x2b2b0819192b1919, 0x2b2b082b0808082b, 0x2b2b082b08082b2b,
4046
- 0x2b2b082b082b082b, 0x2b2b082b082b2b08, 0x2b2b082b082b2b2b, 0x2b2b082b2b08082b,
4047
- 0x2b2b082b2b082b08, 0x2b2b082b2b082b2b, 0x2b2b082b2b2b2b08, 0x2b2b190808080819,
4048
- 0x2b2b190808081908, 0x2b2b190808190808, 0x2b2b190819080808, 0x2b2b19082b082b19,
4049
- 0x2b2b19082b2b1908, 0x2b2b191908080808, 0x2b2b191908192b19, 0x2b2b192b19190819,
4050
- 0x2b2b2b0808082b2b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b082b, 0x2b2b2b1919191908,
4051
- 0x2b2b2b192b08192b, 0x2b2b2b2b08082b08, 0x2b2b2b2b08082b2b, 0x2b2b2b2b082b0808,
4052
- 0x2b2b2b2b082b082b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b2b082b08, 0x2b2b2b2b2b2b2b2b,
4053
- };
4054
-
4055
- constexpr constant static uint32_t iq3xxs_grid[256] = {
4056
- 0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
4057
- 0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
4058
- 0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
4059
- 0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
4060
- 0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
4061
- 0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
4062
- 0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
4063
- 0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
4064
- 0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
4065
- 0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
4066
- 0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
4067
- 0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
4068
- 0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
4069
- 0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
4070
- 0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
4071
- 0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
4072
- 0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
4073
- 0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
4074
- 0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
4075
- 0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
4076
- 0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
4077
- 0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
4078
- 0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
4079
- 0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
4080
- 0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
4081
- 0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
4082
- 0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
4083
- 0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
4084
- 0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
4085
- 0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
4086
- 0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
4087
- 0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
4088
- };
4089
-
4090
- constexpr constant static uint32_t iq3xs_grid[512] = {
4091
- 0x04040404, 0x0404040c, 0x04040414, 0x0404042c, 0x0404043e, 0x04040c04, 0x04040c0c, 0x04040c14,
4092
- 0x04040c24, 0x04040c34, 0x04041404, 0x0404140c, 0x0404142c, 0x04041c1c, 0x04042404, 0x04042414,
4093
- 0x0404242c, 0x0404243e, 0x04042c0c, 0x04042c1c, 0x04043404, 0x04043414, 0x04043e0c, 0x04043e24,
4094
- 0x04043e3e, 0x040c0404, 0x040c040c, 0x040c0414, 0x040c0424, 0x040c0c04, 0x040c0c0c, 0x040c0c2c,
4095
- 0x040c1404, 0x040c141c, 0x040c143e, 0x040c1c0c, 0x040c1c2c, 0x040c2424, 0x040c340c, 0x040c342c,
4096
- 0x040c3e14, 0x04140404, 0x0414040c, 0x0414042c, 0x0414043e, 0x04140c04, 0x04140c1c, 0x04140c34,
4097
- 0x0414140c, 0x0414142c, 0x04141c04, 0x04141c24, 0x04142414, 0x0414242c, 0x0414243e, 0x04142c0c,
4098
- 0x04142c1c, 0x04143e04, 0x04143e1c, 0x041c041c, 0x041c0c0c, 0x041c0c2c, 0x041c1404, 0x041c1414,
4099
- 0x041c1c0c, 0x041c1c1c, 0x041c1c34, 0x041c2424, 0x041c2c04, 0x041c2c14, 0x041c343e, 0x041c3e0c,
4100
- 0x041c3e2c, 0x04240404, 0x04240c1c, 0x04240c3e, 0x0424140c, 0x04241424, 0x04241c14, 0x04242404,
4101
- 0x0424241c, 0x04242c0c, 0x04243e04, 0x042c0414, 0x042c0424, 0x042c1404, 0x042c1414, 0x042c1434,
4102
- 0x042c1c1c, 0x042c240c, 0x042c242c, 0x042c243e, 0x042c3434, 0x042c3e1c, 0x04340434, 0x04340c0c,
4103
- 0x04340c1c, 0x04341c0c, 0x04342c14, 0x04343e0c, 0x043e0404, 0x043e0414, 0x043e0424, 0x043e1404,
4104
- 0x043e1414, 0x043e1434, 0x043e1c1c, 0x043e2c04, 0x043e2c24, 0x0c040404, 0x0c04040c, 0x0c040414,
4105
- 0x0c040424, 0x0c040c04, 0x0c040c0c, 0x0c040c1c, 0x0c040c2c, 0x0c040c3e, 0x0c041404, 0x0c041414,
4106
- 0x0c041c0c, 0x0c041c24, 0x0c041c34, 0x0c042c24, 0x0c042c34, 0x0c04340c, 0x0c043e14, 0x0c0c0404,
4107
- 0x0c0c040c, 0x0c0c041c, 0x0c0c0434, 0x0c0c0c04, 0x0c0c0c24, 0x0c0c140c, 0x0c0c1c04, 0x0c0c1c1c,
4108
- 0x0c0c240c, 0x0c0c2c04, 0x0c0c2c14, 0x0c0c3e04, 0x0c0c3e34, 0x0c140404, 0x0c140c14, 0x0c140c2c,
4109
- 0x0c140c3e, 0x0c141404, 0x0c141424, 0x0c141c14, 0x0c142404, 0x0c14241c, 0x0c142c2c, 0x0c143404,
4110
- 0x0c143e14, 0x0c1c040c, 0x0c1c0424, 0x0c1c043e, 0x0c1c0c04, 0x0c1c0c1c, 0x0c1c140c, 0x0c1c143e,
4111
- 0x0c1c1c04, 0x0c1c1c24, 0x0c1c240c, 0x0c1c3414, 0x0c1c3e04, 0x0c24041c, 0x0c24042c, 0x0c240c14,
4112
- 0x0c240c24, 0x0c241c0c, 0x0c241c1c, 0x0c242414, 0x0c242434, 0x0c242c04, 0x0c242c24, 0x0c2c040c,
4113
- 0x0c2c0c04, 0x0c2c0c1c, 0x0c2c140c, 0x0c2c1c04, 0x0c2c1c14, 0x0c2c2c0c, 0x0c341404, 0x0c341424,
4114
- 0x0c34143e, 0x0c342424, 0x0c342434, 0x0c3e040c, 0x0c3e041c, 0x0c3e0c04, 0x0c3e0c14, 0x0c3e140c,
4115
- 0x0c3e1c2c, 0x0c3e240c, 0x0c3e3414, 0x0c3e3e04, 0x14040404, 0x1404040c, 0x1404041c, 0x1404042c,
4116
- 0x1404043e, 0x14040c04, 0x14040c14, 0x14040c24, 0x14040c34, 0x1404140c, 0x1404141c, 0x1404143e,
4117
- 0x14041c04, 0x14041c14, 0x1404240c, 0x1404241c, 0x1404242c, 0x14042c04, 0x14042c14, 0x1404343e,
4118
- 0x14043e04, 0x14043e1c, 0x14043e2c, 0x140c0404, 0x140c0414, 0x140c0c04, 0x140c0c1c, 0x140c0c3e,
4119
- 0x140c1414, 0x140c142c, 0x140c1c0c, 0x140c1c24, 0x140c2414, 0x140c2c0c, 0x1414040c, 0x14140424,
4120
- 0x1414043e, 0x1414140c, 0x1414141c, 0x14141c04, 0x14141c3e, 0x1414240c, 0x14142c1c, 0x14142c3e,
4121
- 0x14143e0c, 0x14143e24, 0x141c0404, 0x141c0414, 0x141c042c, 0x141c0c0c, 0x141c1414, 0x141c1424,
4122
- 0x141c1c0c, 0x141c1c1c, 0x141c2414, 0x141c2c04, 0x141c3434, 0x1424040c, 0x1424043e, 0x14241404,
4123
- 0x1424141c, 0x14241c14, 0x14241c2c, 0x1424240c, 0x14243e14, 0x14243e2c, 0x142c0424, 0x142c0c0c,
4124
- 0x142c1414, 0x142c1c3e, 0x142c2404, 0x142c2c1c, 0x142c3e04, 0x14340404, 0x14340414, 0x1434043e,
4125
- 0x1434140c, 0x14342c2c, 0x1434340c, 0x143e042c, 0x143e0c0c, 0x143e1434, 0x143e1c04, 0x143e241c,
4126
- 0x143e2c04, 0x1c040414, 0x1c040c0c, 0x1c040c1c, 0x1c040c2c, 0x1c040c3e, 0x1c041414, 0x1c041c0c,
4127
- 0x1c041c1c, 0x1c041c2c, 0x1c042414, 0x1c042424, 0x1c04243e, 0x1c042c0c, 0x1c04341c, 0x1c043e0c,
4128
- 0x1c0c040c, 0x1c0c041c, 0x1c0c042c, 0x1c0c0c24, 0x1c0c140c, 0x1c0c141c, 0x1c0c2404, 0x1c0c3404,
4129
- 0x1c0c3e14, 0x1c0c3e34, 0x1c140404, 0x1c140c14, 0x1c141404, 0x1c141c14, 0x1c141c24, 0x1c142c04,
4130
- 0x1c1c040c, 0x1c1c0c04, 0x1c1c0c24, 0x1c1c140c, 0x1c1c141c, 0x1c1c143e, 0x1c1c1c04, 0x1c1c240c,
4131
- 0x1c1c241c, 0x1c1c243e, 0x1c1c2c2c, 0x1c1c3e1c, 0x1c24041c, 0x1c240c0c, 0x1c240c34, 0x1c241414,
4132
- 0x1c241c0c, 0x1c242c14, 0x1c243404, 0x1c243424, 0x1c2c040c, 0x1c2c0c04, 0x1c2c0c14, 0x1c2c142c,
4133
- 0x1c2c1c14, 0x1c2c2424, 0x1c2c2c34, 0x1c2c3e1c, 0x1c340c34, 0x1c34240c, 0x1c3e040c, 0x1c3e041c,
4134
- 0x1c3e1404, 0x1c3e1414, 0x1c3e1c2c, 0x24040404, 0x24040424, 0x24040c14, 0x24041404, 0x24041424,
4135
- 0x2404143e, 0x24041c14, 0x2404240c, 0x24042c04, 0x24043e04, 0x240c0414, 0x240c043e, 0x240c0c0c,
4136
- 0x240c0c1c, 0x240c1414, 0x240c1c04, 0x240c1c2c, 0x240c241c, 0x240c2c0c, 0x240c2c2c, 0x2414040c,
4137
- 0x2414041c, 0x24140c04, 0x24140c2c, 0x2414140c, 0x24141c1c, 0x24142404, 0x24142c3e, 0x24143414,
4138
- 0x24143e04, 0x241c0424, 0x241c0c0c, 0x241c0c1c, 0x241c1404, 0x241c1414, 0x241c1c0c, 0x241c1c2c,
4139
- 0x24240404, 0x24240414, 0x24241424, 0x24241c3e, 0x24242404, 0x24243e0c, 0x242c042c, 0x242c043e,
4140
- 0x242c140c, 0x242c3414, 0x24340c1c, 0x24341c24, 0x24343404, 0x243e0c04, 0x243e0c2c, 0x243e1c04,
4141
- 0x243e241c, 0x243e2c0c, 0x2c040414, 0x2c040c04, 0x2c040c24, 0x2c041414, 0x2c042404, 0x2c042424,
4142
- 0x2c04243e, 0x2c042c14, 0x2c043434, 0x2c043e24, 0x2c0c040c, 0x2c0c041c, 0x2c0c042c, 0x2c0c0c14,
4143
- 0x2c0c140c, 0x2c0c1c14, 0x2c0c3e14, 0x2c140404, 0x2c140c0c, 0x2c14141c, 0x2c141c04, 0x2c141c34,
4144
- 0x2c142c1c, 0x2c1c0414, 0x2c1c043e, 0x2c1c0c04, 0x2c1c143e, 0x2c1c2424, 0x2c1c2c0c, 0x2c1c342c,
4145
- 0x2c1c3e1c, 0x2c24040c, 0x2c240424, 0x2c241404, 0x2c241c14, 0x2c242434, 0x2c2c0c14, 0x2c2c1434,
4146
- 0x2c2c2c0c, 0x2c2c2c1c, 0x2c342414, 0x2c3e0414, 0x2c3e0424, 0x2c3e1414, 0x34040c0c, 0x34040c1c,
4147
- 0x34040c2c, 0x34041c0c, 0x34041c1c, 0x34043404, 0x340c0404, 0x340c1404, 0x340c143e, 0x340c3424,
4148
- 0x34140c14, 0x34141c24, 0x34142414, 0x34142c2c, 0x34143414, 0x34143e04, 0x341c0404, 0x341c0c24,
4149
- 0x341c140c, 0x341c2404, 0x3424142c, 0x3424241c, 0x34243414, 0x342c0404, 0x342c041c, 0x342c1c24,
4150
- 0x342c3404, 0x3434042c, 0x34342404, 0x343e0c0c, 0x343e0c1c, 0x3e040404, 0x3e040424, 0x3e04043e,
4151
- 0x3e041404, 0x3e041414, 0x3e041c34, 0x3e042404, 0x3e042c24, 0x3e043414, 0x3e0c0414, 0x3e0c0c0c,
4152
- 0x3e0c1424, 0x3e0c241c, 0x3e0c242c, 0x3e14040c, 0x3e140424, 0x3e140c04, 0x3e140c34, 0x3e14140c,
4153
- 0x3e141c04, 0x3e142c0c, 0x3e1c0414, 0x3e1c1c14, 0x3e1c1c2c, 0x3e1c2c1c, 0x3e24040c, 0x3e24042c,
4154
- 0x3e240c1c, 0x3e241404, 0x3e242c04, 0x3e2c1414, 0x3e2c2414, 0x3e340414, 0x3e341c0c, 0x3e3e0404,
4155
- };
4156
-
4157
- #define NGRID_IQ1S 512
4158
- constexpr constant static uint64_t iq1s_grid[NGRID_IQ1S] = {
4159
- 0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000,
4160
- 0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01,
4161
- 0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100,
4162
- 0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00,
4163
- 0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101,
4164
- 0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100,
4165
- 0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00,
4166
- 0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff,
4167
- 0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000,
4168
- 0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000,
4169
- 0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001,
4170
- 0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff,
4171
- 0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01,
4172
- 0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001,
4173
- 0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00,
4174
- 0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001,
4175
- 0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100,
4176
- 0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000,
4177
- 0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000,
4178
- 0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000,
4179
- 0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff,
4180
- 0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff,
4181
- 0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01,
4182
- 0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100,
4183
- 0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff,
4184
- 0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000,
4185
- 0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101,
4186
- 0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff,
4187
- 0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff,
4188
- 0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001,
4189
- 0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01,
4190
- 0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101,
4191
- 0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100,
4192
- 0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00,
4193
- 0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001,
4194
- 0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff,
4195
- 0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000,
4196
- 0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000,
4197
- 0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100,
4198
- 0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100,
4199
- 0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01,
4200
- 0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff,
4201
- 0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101,
4202
- 0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000,
4203
- 0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff,
4204
- 0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000,
4205
- 0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff,
4206
- 0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00,
4207
- 0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101,
4208
- 0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000,
4209
- 0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000,
4210
- 0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000,
4211
- 0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100,
4212
- 0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000,
4213
- 0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001,
4214
- 0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff,
4215
- 0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000,
4216
- 0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000,
4217
- 0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000,
4218
- 0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000,
4219
- 0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff,
4220
- 0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000,
4221
- 0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001,
4222
- 0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01,
4223
- 0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100,
4224
- 0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000,
4225
- 0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00,
4226
- 0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100,
4227
- 0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000,
4228
- 0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001,
4229
- 0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00,
4230
- 0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff,
4231
- 0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100,
4232
- 0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff,
4233
- 0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000,
4234
- 0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff,
4235
- 0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff,
4236
- 0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00,
4237
- 0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001,
4238
- 0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001,
4239
- 0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01,
4240
- 0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000,
4241
- 0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101,
4242
- 0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00,
4243
- 0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100,
4244
- 0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101,
4245
- 0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101,
4246
- 0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000,
4247
- 0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff,
4248
- 0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff,
4249
- 0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101,
4250
- 0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff,
4251
- 0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101,
4252
- 0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001,
4253
- 0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff,
4254
- 0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff,
4255
- 0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01,
4256
- 0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff,
4257
- 0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100,
4258
- 0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001,
4259
- 0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00,
4260
- 0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff,
4261
- 0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff,
4262
- 0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000,
4263
- 0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000,
4264
- 0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101,
4265
- 0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001,
4266
- 0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000,
4267
- 0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101,
4268
- 0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000,
4269
- 0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001,
4270
- 0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000,
4271
- 0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100,
4272
- 0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000,
4273
- 0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000,
4274
- 0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100,
4275
- 0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff,
4276
- 0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff,
4277
- 0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00,
4278
- 0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101,
4279
- 0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000,
4280
- 0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00,
4281
- 0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000,
4282
- 0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff,
4283
- 0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101,
4284
- 0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff,
4285
- 0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00,
4286
- 0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff,
4287
- };
4288
-
4289
- constexpr constant static uint8_t ksigns_iq2xs[128] = {
4290
- 0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
4291
- 144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
4292
- 160, 33, 34, 163, 36, 165, 166, 39, 40, 169, 170, 43, 172, 45, 46, 175,
4293
- 48, 177, 178, 51, 180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63,
4294
- 192, 65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77, 78, 207,
4295
- 80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90, 219, 92, 221, 222, 95,
4296
- 96, 225, 226, 99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
4297
- 240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
4298
- };
4299
-
4300
- constexpr constant static uint8_t kmask_iq2xs[8] = {1, 2, 4, 8, 16, 32, 64, 128};
4301
-
4302
3469
  void kernel_mul_mv_iq2_xxs_f32_impl(
4303
3470
  device const void * src0,
4304
3471
  device const float * src1,
@@ -4742,7 +3909,7 @@ void kernel_mul_mv_iq3_s_f32_impl(
4742
3909
  {
4743
3910
  int nval = 8;
4744
3911
  int pos = (32*sgitg + tiisg)*nval;
4745
- for (int i = 0; i < nval; ++i) values[pos + i] = iq3xs_grid[pos + i];
3912
+ for (int i = 0; i < nval; ++i) values[pos + i] = iq3s_grid[pos + i];
4746
3913
  threadgroup_barrier(mem_flags::mem_threadgroup);
4747
3914
  }
4748
3915
 
@@ -4769,12 +3936,14 @@ void kernel_mul_mv_iq3_s_f32_impl(
4769
3936
  for (int row = 0; row < N_DST; row++) {
4770
3937
 
4771
3938
  const float db = dh[0];
4772
- const float d = db * (0.5f + ((sc[0] >> 4*(ib%2)) & 0xf));
3939
+ const float d = db * (1 + 2*((sc[0] >> 4*(ib%2)) & 0xf));
4773
3940
 
4774
3941
  float2 sum = {0};
4775
3942
  for (int l = 0; l < 4; ++l) {
4776
- const threadgroup uint8_t * grid1 = (const threadgroup uint8_t *)(values + (qs[2*l+0] | ((qh[0] << (8-2*l)) & 256)));
4777
- const threadgroup uint8_t * grid2 = (const threadgroup uint8_t *)(values + (qs[2*l+1] | ((qh[0] << (7-2*l)) & 256)));
3943
+ const threadgroup uint32_t * table1 = qh[0] & kmask_iq2xs[2*l+0] ? values + 256 : values;
3944
+ const threadgroup uint32_t * table2 = qh[0] & kmask_iq2xs[2*l+1] ? values + 256 : values;
3945
+ const threadgroup uint8_t * grid1 = (const threadgroup uint8_t *)(table1 + qs[2*l+0]);
3946
+ const threadgroup uint8_t * grid2 = (const threadgroup uint8_t *)(table2 + qs[2*l+1]);
4778
3947
  for (int j = 0; j < 4; ++j) {
4779
3948
  sum[0] += yl[8*l + j + 0] * grid1[j] * select(1, -1, signs[l] & kmask_iq2xs[j+0]);
4780
3949
  sum[1] += yl[8*l + j + 4] * grid2[j] * select(1, -1, signs[l] & kmask_iq2xs[j+4]);
@@ -4795,7 +3964,7 @@ void kernel_mul_mv_iq3_s_f32_impl(
4795
3964
  for (int row = 0; row < N_DST; ++row) {
4796
3965
  all_sum = simd_sum(sumf[row]);
4797
3966
  if (tiisg == 0) {
4798
- dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum * 0.5f;
3967
+ dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum;
4799
3968
  }
4800
3969
  }
4801
3970
  }
@@ -4994,48 +4163,53 @@ void kernel_mul_mv_iq1_s_f32_impl(
4994
4163
  device const block_iq1_s * x = (device const block_iq1_s *) src0 + ib_row + offset0;
4995
4164
  device const float * y = (device const float *) src1 + r1*ne10 + im*ne00*ne1;
4996
4165
 
4997
- float yl[16];
4166
+ float yl[32];
4998
4167
  float sumf[N_DST]={0.f}, all_sum;
4999
4168
 
5000
4169
  const int nb32 = nb * (QK_K / 32);
5001
4170
 
5002
- const int ix = tiisg/2;
5003
- const int il = tiisg%2;
4171
+ const int ix = tiisg;
5004
4172
 
5005
- device const float * y4 = y + 32 * ix + 16 * il;
4173
+ device const float * y4 = y + 32 * ix;
5006
4174
 
5007
- for (int ib32 = ix; ib32 < nb32; ib32 += 16) {
4175
+ for (int ib32 = ix; ib32 < nb32; ib32 += 32) {
5008
4176
 
5009
- for (int i = 0; i < 16; ++i) {
4177
+ float sumy = 0;
4178
+ for (int i = 0; i < 32; ++i) {
5010
4179
  yl[i] = y4[i];
4180
+ sumy += yl[i];
5011
4181
  }
5012
4182
 
5013
4183
  const int ibl = ib32 / (QK_K / 32);
5014
4184
  const int ib = ib32 % (QK_K / 32);
5015
4185
 
5016
4186
  device const block_iq1_s * xr = x + ibl;
5017
- device const uint8_t * qs = xr->qs + 4 * ib + 2 * il;
5018
- device const uint8_t * sc = xr->scales + 2 * ib + il;
5019
- device const half * dh = &xr->d;
4187
+ device const uint8_t * qs = xr->qs + 4 * ib;
4188
+ device const uint16_t * qh = xr->qh + ib;
4189
+ device const half * dh = &xr->d;
5020
4190
 
5021
4191
  for (int row = 0; row < N_DST; row++) {
5022
4192
 
5023
- constant int8_t * grid1 = (constant int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5)));
5024
- constant int8_t * grid2 = (constant int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1)));
4193
+ constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | ((qh[0] << 8) & 0x700)));
4194
+ constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | ((qh[0] << 5) & 0x700)));
4195
+ constant uint8_t * grid3 = (constant uint8_t *)(iq1s_grid_gpu + (qs[2] | ((qh[0] << 2) & 0x700)));
4196
+ constant uint8_t * grid4 = (constant uint8_t *)(iq1s_grid_gpu + (qs[3] | ((qh[0] >> 1) & 0x700)));
5025
4197
 
5026
- float2 sum = {0};
5027
- for (int j = 0; j < 8; ++j) {
5028
- sum[0] += yl[j+ 0] * grid1[j];
5029
- sum[1] += yl[j+ 8] * grid2[j];
4198
+ float sum = 0;
4199
+ for (int j = 0; j < 4; ++j) {
4200
+ sum += yl[j+ 0] * (grid1[j] & 0xf) + yl[j+ 4] * (grid1[j] >> 4)
4201
+ + yl[j+ 8] * (grid2[j] & 0xf) + yl[j+12] * (grid2[j] >> 4)
4202
+ + yl[j+16] * (grid3[j] & 0xf) + yl[j+20] * (grid3[j] >> 4)
4203
+ + yl[j+24] * (grid4[j] & 0xf) + yl[j+28] * (grid4[j] >> 4);
5030
4204
  }
5031
- sumf[row] += (float)dh[0] * (sum[0] * (2*(sc[0] & 7) + 1) + sum[1] * (2*((sc[0] >> 4) & 7) + 1));
4205
+ sumf[row] += (float)dh[0] * (sum + sumy * (qh[0] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA)) * (2*((qh[0] >> 12) & 7) + 1);
5032
4206
 
5033
4207
  dh += nb*sizeof(block_iq1_s)/2;
5034
4208
  qs += nb*sizeof(block_iq1_s);
5035
- sc += nb*sizeof(block_iq1_s);
4209
+ qh += nb*sizeof(block_iq1_s)/2;
5036
4210
  }
5037
4211
 
5038
- y4 += 16 * 32;
4212
+ y4 += 32 * 32;
5039
4213
  }
5040
4214
 
5041
4215
  for (int row = 0; row < N_DST; ++row) {
@@ -5685,15 +4859,15 @@ void dequantize_iq3_s(device const block_iq3_s * xb, short il, thread type4x4 &
5685
4859
  device const uint8_t * qs = xb->qs + 8*ib32;
5686
4860
  device const uint8_t * signs = xb->signs + 4*ib32 + 2*il;
5687
4861
  const uint8_t qh = xb->qh[ib32] >> 4*il;
5688
- const float dl = d * (0.5f + ((xb->scales[ib32/2] >> 4*(ib32%2)) & 0xf)) * 0.5f;
5689
- constant uint8_t * grid1 = (constant uint8_t *)(iq3xs_grid + (qs[4*il+0] | ((qh << 8) & 256)));
5690
- constant uint8_t * grid2 = (constant uint8_t *)(iq3xs_grid + (qs[4*il+1] | ((qh << 7) & 256)));
4862
+ const float dl = d * (1 + 2*((xb->scales[ib32/2] >> 4*(ib32%2)) & 0xf));
4863
+ constant uint8_t * grid1 = (constant uint8_t *)(iq3s_grid + (qs[4*il+0] | ((qh << 8) & 256)));
4864
+ constant uint8_t * grid2 = (constant uint8_t *)(iq3s_grid + (qs[4*il+1] | ((qh << 7) & 256)));
5691
4865
  for (int i = 0; i < 4; ++i) {
5692
4866
  reg[0][i] = dl * grid1[i] * select(1, -1, signs[0] & kmask_iq2xs[i+0]);
5693
4867
  reg[1][i] = dl * grid2[i] * select(1, -1, signs[0] & kmask_iq2xs[i+4]);
5694
4868
  }
5695
- grid1 = (constant uint8_t *)(iq3xs_grid + (qs[4*il+2] | ((qh << 6) & 256)));
5696
- grid2 = (constant uint8_t *)(iq3xs_grid + (qs[4*il+3] | ((qh << 5) & 256)));
4869
+ grid1 = (constant uint8_t *)(iq3s_grid + (qs[4*il+2] | ((qh << 6) & 256)));
4870
+ grid2 = (constant uint8_t *)(iq3s_grid + (qs[4*il+3] | ((qh << 5) & 256)));
5697
4871
  for (int i = 0; i < 4; ++i) {
5698
4872
  reg[2][i] = dl * grid1[i] * select(1, -1, signs[1] & kmask_iq2xs[i+0]);
5699
4873
  reg[3][i] = dl * grid2[i] * select(1, -1, signs[1] & kmask_iq2xs[i+4]);
@@ -5722,16 +4896,21 @@ void dequantize_iq2_s(device const block_iq2_s * xb, short il, thread type4x4 &
5722
4896
  template <typename type4x4>
5723
4897
  void dequantize_iq1_s(device const block_iq1_s * xb, short il, thread type4x4 & reg) {
5724
4898
  // il is 0...15 for QK_K = 256 => index of block of 32 is il/2
4899
+ const int ib32 = il/2;
4900
+ il = il%2;
5725
4901
  const float d = xb->d;
5726
- device const uint8_t * qs = xb->qs + 2*il;
5727
- device const uint8_t * sc = xb->scales + il;
5728
- const float dl1 = d * (2*(sc[0] & 7) + 1);
5729
- const float dl2 = d * (2*((sc[0] >> 4) & 7) + 1);
5730
- constant int8_t * grid1 = (constant int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5)));
5731
- constant int8_t * grid2 = (constant int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1)));
5732
- for (int i = 0; i < 8; ++i) {
5733
- reg[i/4+0][i%4] = dl1 * grid1[i];
5734
- reg[i/4+2][i%4] = dl2 * grid2[i];
4902
+ device const uint8_t * qs = xb->qs + 4*ib32 + 2*il;
4903
+ device const uint16_t * qh = xb->qh;
4904
+ const float dl = d * (2*((qh[ib32] >> 12) & 7) + 1);
4905
+ const float ml = dl * (qh[ib32] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA);
4906
+ const uint16_t h = qh[ib32] >> 6*il;
4907
+ constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | ((h << 8) & 0x700)));
4908
+ constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | ((h << 5) & 0x700)));
4909
+ for (int i = 0; i < 4; ++i) {
4910
+ reg[0][i] = dl * (grid1[i] & 0xf) + ml;
4911
+ reg[1][i] = dl * (grid1[i] >> 4) + ml;
4912
+ reg[2][i] = dl * (grid2[i] & 0xf) + ml;
4913
+ reg[3][i] = dl * (grid2[i] >> 4) + ml;
5735
4914
  }
5736
4915
  }
5737
4916
 
@@ -6042,7 +5221,7 @@ template<typename block_q, short nl, void (*dequantize_func)(device const block_
6042
5221
  void kernel_mul_mm_id_impl(
6043
5222
  device const uchar * src0,
6044
5223
  device const uchar * src1,
6045
- thread short * src1ids,
5224
+ threadgroup short * src1ids,
6046
5225
  device float * dst,
6047
5226
  constant int64_t & ne00,
6048
5227
  constant int64_t & ne02,
@@ -6245,9 +5424,9 @@ kernel void kernel_mul_mm_id(
6245
5424
  tgpig.z = tgpig.z%(ne12*ne13);
6246
5425
 
6247
5426
  // row indices of src1 for expert id
6248
- int64_t _ne1 = 0;
6249
- short src1ids[512];
5427
+ threadgroup short * src1ids = (threadgroup short *)(shared_memory + 8192);
6250
5428
 
5429
+ int64_t _ne1 = 0;
6251
5430
  for (int64_t i1 = 0; i1 < ne1; i1++) {
6252
5431
  if (((device int32_t *) (ids + i1*nbi1))[idx] == id) {
6253
5432
  src1ids[_ne1++] = i1;