ed25519_blake2b 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +9 -0
 - data/CODE_OF_CONDUCT.md +74 -0
 - data/Gemfile +6 -0
 - data/Gemfile.lock +23 -0
 - data/LICENSE +21 -0
 - data/README.md +39 -0
 - data/Rakefile +13 -0
 - data/bin/console +14 -0
 - data/bin/setup +8 -0
 - data/ed25519_blake2b.gemspec +31 -0
 - data/ext/ed25519_blake2b/blake2-config.h +72 -0
 - data/ext/ed25519_blake2b/blake2-impl.h +160 -0
 - data/ext/ed25519_blake2b/blake2.h +195 -0
 - data/ext/ed25519_blake2b/blake2b-load-sse2.h +68 -0
 - data/ext/ed25519_blake2b/blake2b-load-sse41.h +402 -0
 - data/ext/ed25519_blake2b/blake2b-ref.c +373 -0
 - data/ext/ed25519_blake2b/blake2b-round.h +157 -0
 - data/ext/ed25519_blake2b/curve25519-donna-32bit.h +579 -0
 - data/ext/ed25519_blake2b/curve25519-donna-64bit.h +413 -0
 - data/ext/ed25519_blake2b/curve25519-donna-helpers.h +67 -0
 - data/ext/ed25519_blake2b/curve25519-donna-sse2.h +1112 -0
 - data/ext/ed25519_blake2b/ed25519-donna-32bit-sse2.h +513 -0
 - data/ext/ed25519_blake2b/ed25519-donna-32bit-tables.h +61 -0
 - data/ext/ed25519_blake2b/ed25519-donna-64bit-sse2.h +436 -0
 - data/ext/ed25519_blake2b/ed25519-donna-64bit-tables.h +53 -0
 - data/ext/ed25519_blake2b/ed25519-donna-64bit-x86-32bit.h +435 -0
 - data/ext/ed25519_blake2b/ed25519-donna-64bit-x86.h +351 -0
 - data/ext/ed25519_blake2b/ed25519-donna-basepoint-table.h +259 -0
 - data/ext/ed25519_blake2b/ed25519-donna-batchverify.h +275 -0
 - data/ext/ed25519_blake2b/ed25519-donna-impl-base.h +364 -0
 - data/ext/ed25519_blake2b/ed25519-donna-impl-sse2.h +390 -0
 - data/ext/ed25519_blake2b/ed25519-donna-portable-identify.h +103 -0
 - data/ext/ed25519_blake2b/ed25519-donna-portable.h +135 -0
 - data/ext/ed25519_blake2b/ed25519-donna.h +115 -0
 - data/ext/ed25519_blake2b/ed25519-hash-custom.c +28 -0
 - data/ext/ed25519_blake2b/ed25519-hash-custom.h +30 -0
 - data/ext/ed25519_blake2b/ed25519-hash.h +219 -0
 - data/ext/ed25519_blake2b/ed25519-randombytes-custom.h +10 -0
 - data/ext/ed25519_blake2b/ed25519-randombytes.h +91 -0
 - data/ext/ed25519_blake2b/ed25519.c +150 -0
 - data/ext/ed25519_blake2b/ed25519.h +30 -0
 - data/ext/ed25519_blake2b/extconf.rb +3 -0
 - data/ext/ed25519_blake2b/fuzz/README.md +173 -0
 - data/ext/ed25519_blake2b/fuzz/build-nix.php +134 -0
 - data/ext/ed25519_blake2b/fuzz/curve25519-ref10.c +1272 -0
 - data/ext/ed25519_blake2b/fuzz/curve25519-ref10.h +8 -0
 - data/ext/ed25519_blake2b/fuzz/ed25519-donna-sse2.c +3 -0
 - data/ext/ed25519_blake2b/fuzz/ed25519-donna.c +1 -0
 - data/ext/ed25519_blake2b/fuzz/ed25519-donna.h +34 -0
 - data/ext/ed25519_blake2b/fuzz/ed25519-ref10.c +4647 -0
 - data/ext/ed25519_blake2b/fuzz/ed25519-ref10.h +9 -0
 - data/ext/ed25519_blake2b/fuzz/fuzz-curve25519.c +172 -0
 - data/ext/ed25519_blake2b/fuzz/fuzz-ed25519.c +219 -0
 - data/ext/ed25519_blake2b/modm-donna-32bit.h +469 -0
 - data/ext/ed25519_blake2b/modm-donna-64bit.h +361 -0
 - data/ext/ed25519_blake2b/rbext.c +25 -0
 - data/ext/ed25519_blake2b/regression.h +1024 -0
 - data/lib/ed25519_blake2b/ed25519_blake2b.rb +4 -0
 - data/lib/ed25519_blake2b/version.rb +3 -0
 - metadata +147 -0
 
| 
         @@ -0,0 +1,195 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            /*
         
     | 
| 
      
 2 
     | 
    
         
            +
               BLAKE2 reference source code package - reference C implementations
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
               Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
         
     | 
| 
      
 5 
     | 
    
         
            +
               terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
         
     | 
| 
      
 6 
     | 
    
         
            +
               your option.  The terms of these licenses can be found at:
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
               - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
         
     | 
| 
      
 9 
     | 
    
         
            +
               - OpenSSL license   : https://www.openssl.org/source/license.html
         
     | 
| 
      
 10 
     | 
    
         
            +
               - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
               More information about the BLAKE2 hash function can be found at
         
     | 
| 
      
 13 
     | 
    
         
            +
               https://blake2.net.
         
     | 
| 
      
 14 
     | 
    
         
            +
            */
         
     | 
| 
      
 15 
     | 
    
         
            +
            #ifndef BLAKE2_H
         
     | 
| 
      
 16 
     | 
    
         
            +
            #define BLAKE2_H
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            #include <stddef.h>
         
     | 
| 
      
 19 
     | 
    
         
            +
            #include <stdint.h>
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            #if defined(_MSC_VER)
         
     | 
| 
      
 22 
     | 
    
         
            +
            #define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
         
     | 
| 
      
 23 
     | 
    
         
            +
            #else
         
     | 
| 
      
 24 
     | 
    
         
            +
            #define BLAKE2_PACKED(x) x __attribute__((packed))
         
     | 
| 
      
 25 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            #if defined(__cplusplus)
         
     | 
| 
      
 28 
     | 
    
         
            +
            extern "C" {
         
     | 
| 
      
 29 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
              enum blake2s_constant
         
     | 
| 
      
 32 
     | 
    
         
            +
              {
         
     | 
| 
      
 33 
     | 
    
         
            +
                BLAKE2S_BLOCKBYTES = 64,
         
     | 
| 
      
 34 
     | 
    
         
            +
                BLAKE2S_OUTBYTES   = 32,
         
     | 
| 
      
 35 
     | 
    
         
            +
                BLAKE2S_KEYBYTES   = 32,
         
     | 
| 
      
 36 
     | 
    
         
            +
                BLAKE2S_SALTBYTES  = 8,
         
     | 
| 
      
 37 
     | 
    
         
            +
                BLAKE2S_PERSONALBYTES = 8
         
     | 
| 
      
 38 
     | 
    
         
            +
              };
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
              enum blake2b_constant
         
     | 
| 
      
 41 
     | 
    
         
            +
              {
         
     | 
| 
      
 42 
     | 
    
         
            +
                BLAKE2B_BLOCKBYTES = 128,
         
     | 
| 
      
 43 
     | 
    
         
            +
                BLAKE2B_OUTBYTES   = 64,
         
     | 
| 
      
 44 
     | 
    
         
            +
                BLAKE2B_KEYBYTES   = 64,
         
     | 
| 
      
 45 
     | 
    
         
            +
                BLAKE2B_SALTBYTES  = 16,
         
     | 
| 
      
 46 
     | 
    
         
            +
                BLAKE2B_PERSONALBYTES = 16
         
     | 
| 
      
 47 
     | 
    
         
            +
              };
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
              typedef struct blake2s_state__
         
     | 
| 
      
 50 
     | 
    
         
            +
              {
         
     | 
| 
      
 51 
     | 
    
         
            +
                uint32_t h[8];
         
     | 
| 
      
 52 
     | 
    
         
            +
                uint32_t t[2];
         
     | 
| 
      
 53 
     | 
    
         
            +
                uint32_t f[2];
         
     | 
| 
      
 54 
     | 
    
         
            +
                uint8_t  buf[BLAKE2S_BLOCKBYTES];
         
     | 
| 
      
 55 
     | 
    
         
            +
                size_t   buflen;
         
     | 
| 
      
 56 
     | 
    
         
            +
                size_t   outlen;
         
     | 
| 
      
 57 
     | 
    
         
            +
                uint8_t  last_node;
         
     | 
| 
      
 58 
     | 
    
         
            +
              } blake2s_state;
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
              typedef struct blake2b_state__
         
     | 
| 
      
 61 
     | 
    
         
            +
              {
         
     | 
| 
      
 62 
     | 
    
         
            +
                uint64_t h[8];
         
     | 
| 
      
 63 
     | 
    
         
            +
                uint64_t t[2];
         
     | 
| 
      
 64 
     | 
    
         
            +
                uint64_t f[2];
         
     | 
| 
      
 65 
     | 
    
         
            +
                uint8_t  buf[BLAKE2B_BLOCKBYTES];
         
     | 
| 
      
 66 
     | 
    
         
            +
                size_t   buflen;
         
     | 
| 
      
 67 
     | 
    
         
            +
                size_t   outlen;
         
     | 
| 
      
 68 
     | 
    
         
            +
                uint8_t  last_node;
         
     | 
| 
      
 69 
     | 
    
         
            +
              } blake2b_state;
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
              typedef struct blake2sp_state__
         
     | 
| 
      
 72 
     | 
    
         
            +
              {
         
     | 
| 
      
 73 
     | 
    
         
            +
                blake2s_state S[8][1];
         
     | 
| 
      
 74 
     | 
    
         
            +
                blake2s_state R[1];
         
     | 
| 
      
 75 
     | 
    
         
            +
                uint8_t       buf[8 * BLAKE2S_BLOCKBYTES];
         
     | 
| 
      
 76 
     | 
    
         
            +
                size_t        buflen;
         
     | 
| 
      
 77 
     | 
    
         
            +
                size_t        outlen;
         
     | 
| 
      
 78 
     | 
    
         
            +
              } blake2sp_state;
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
              typedef struct blake2bp_state__
         
     | 
| 
      
 81 
     | 
    
         
            +
              {
         
     | 
| 
      
 82 
     | 
    
         
            +
                blake2b_state S[4][1];
         
     | 
| 
      
 83 
     | 
    
         
            +
                blake2b_state R[1];
         
     | 
| 
      
 84 
     | 
    
         
            +
                uint8_t       buf[4 * BLAKE2B_BLOCKBYTES];
         
     | 
| 
      
 85 
     | 
    
         
            +
                size_t        buflen;
         
     | 
| 
      
 86 
     | 
    
         
            +
                size_t        outlen;
         
     | 
| 
      
 87 
     | 
    
         
            +
              } blake2bp_state;
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
              BLAKE2_PACKED(struct blake2s_param__
         
     | 
| 
      
 91 
     | 
    
         
            +
              {
         
     | 
| 
      
 92 
     | 
    
         
            +
                uint8_t  digest_length; /* 1 */
         
     | 
| 
      
 93 
     | 
    
         
            +
                uint8_t  key_length;    /* 2 */
         
     | 
| 
      
 94 
     | 
    
         
            +
                uint8_t  fanout;        /* 3 */
         
     | 
| 
      
 95 
     | 
    
         
            +
                uint8_t  depth;         /* 4 */
         
     | 
| 
      
 96 
     | 
    
         
            +
                uint32_t leaf_length;   /* 8 */
         
     | 
| 
      
 97 
     | 
    
         
            +
                uint32_t node_offset;  /* 12 */
         
     | 
| 
      
 98 
     | 
    
         
            +
                uint16_t xof_length;    /* 14 */
         
     | 
| 
      
 99 
     | 
    
         
            +
                uint8_t  node_depth;    /* 15 */
         
     | 
| 
      
 100 
     | 
    
         
            +
                uint8_t  inner_length;  /* 16 */
         
     | 
| 
      
 101 
     | 
    
         
            +
                /* uint8_t  reserved[0]; */
         
     | 
| 
      
 102 
     | 
    
         
            +
                uint8_t  salt[BLAKE2S_SALTBYTES]; /* 24 */
         
     | 
| 
      
 103 
     | 
    
         
            +
                uint8_t  personal[BLAKE2S_PERSONALBYTES];  /* 32 */
         
     | 
| 
      
 104 
     | 
    
         
            +
              });
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
              typedef struct blake2s_param__ blake2s_param;
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
              BLAKE2_PACKED(struct blake2b_param__
         
     | 
| 
      
 109 
     | 
    
         
            +
              {
         
     | 
| 
      
 110 
     | 
    
         
            +
                uint8_t  digest_length; /* 1 */
         
     | 
| 
      
 111 
     | 
    
         
            +
                uint8_t  key_length;    /* 2 */
         
     | 
| 
      
 112 
     | 
    
         
            +
                uint8_t  fanout;        /* 3 */
         
     | 
| 
      
 113 
     | 
    
         
            +
                uint8_t  depth;         /* 4 */
         
     | 
| 
      
 114 
     | 
    
         
            +
                uint32_t leaf_length;   /* 8 */
         
     | 
| 
      
 115 
     | 
    
         
            +
                uint32_t node_offset;   /* 12 */
         
     | 
| 
      
 116 
     | 
    
         
            +
                uint32_t xof_length;    /* 16 */
         
     | 
| 
      
 117 
     | 
    
         
            +
                uint8_t  node_depth;    /* 17 */
         
     | 
| 
      
 118 
     | 
    
         
            +
                uint8_t  inner_length;  /* 18 */
         
     | 
| 
      
 119 
     | 
    
         
            +
                uint8_t  reserved[14];  /* 32 */
         
     | 
| 
      
 120 
     | 
    
         
            +
                uint8_t  salt[BLAKE2B_SALTBYTES]; /* 48 */
         
     | 
| 
      
 121 
     | 
    
         
            +
                uint8_t  personal[BLAKE2B_PERSONALBYTES];  /* 64 */
         
     | 
| 
      
 122 
     | 
    
         
            +
              });
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
              typedef struct blake2b_param__ blake2b_param;
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
              typedef struct blake2xs_state__
         
     | 
| 
      
 127 
     | 
    
         
            +
              {
         
     | 
| 
      
 128 
     | 
    
         
            +
                blake2s_state S[1];
         
     | 
| 
      
 129 
     | 
    
         
            +
                blake2s_param P[1];
         
     | 
| 
      
 130 
     | 
    
         
            +
              } blake2xs_state;
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
              typedef struct blake2xb_state__
         
     | 
| 
      
 133 
     | 
    
         
            +
              {
         
     | 
| 
      
 134 
     | 
    
         
            +
                blake2b_state S[1];
         
     | 
| 
      
 135 
     | 
    
         
            +
                blake2b_param P[1];
         
     | 
| 
      
 136 
     | 
    
         
            +
              } blake2xb_state;
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
              /* Padded structs result in a compile-time error */
         
     | 
| 
      
 139 
     | 
    
         
            +
              enum {
         
     | 
| 
      
 140 
     | 
    
         
            +
                BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES),
         
     | 
| 
      
 141 
     | 
    
         
            +
                BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES)
         
     | 
| 
      
 142 
     | 
    
         
            +
              };
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
              /* Streaming API */
         
     | 
| 
      
 145 
     | 
    
         
            +
              int blake2s_init( blake2s_state *S, size_t outlen );
         
     | 
| 
      
 146 
     | 
    
         
            +
              int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
         
     | 
| 
      
 147 
     | 
    
         
            +
              int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
         
     | 
| 
      
 148 
     | 
    
         
            +
              int blake2s_update( blake2s_state *S, const void *in, size_t inlen );
         
     | 
| 
      
 149 
     | 
    
         
            +
              int blake2s_final( blake2s_state *S, void *out, size_t outlen );
         
     | 
| 
      
 150 
     | 
    
         
            +
             
     | 
| 
      
 151 
     | 
    
         
            +
              int blake2b_init( blake2b_state *S, size_t outlen );
         
     | 
| 
      
 152 
     | 
    
         
            +
              int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
         
     | 
| 
      
 153 
     | 
    
         
            +
              int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
         
     | 
| 
      
 154 
     | 
    
         
            +
              int blake2b_update( blake2b_state *S, const void *in, size_t inlen );
         
     | 
| 
      
 155 
     | 
    
         
            +
              int blake2b_final( blake2b_state *S, void *out, size_t outlen );
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
              int blake2sp_init( blake2sp_state *S, size_t outlen );
         
     | 
| 
      
 158 
     | 
    
         
            +
              int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen );
         
     | 
| 
      
 159 
     | 
    
         
            +
              int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen );
         
     | 
| 
      
 160 
     | 
    
         
            +
              int blake2sp_final( blake2sp_state *S, void *out, size_t outlen );
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
              int blake2bp_init( blake2bp_state *S, size_t outlen );
         
     | 
| 
      
 163 
     | 
    
         
            +
              int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen );
         
     | 
| 
      
 164 
     | 
    
         
            +
              int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen );
         
     | 
| 
      
 165 
     | 
    
         
            +
              int blake2bp_final( blake2bp_state *S, void *out, size_t outlen );
         
     | 
| 
      
 166 
     | 
    
         
            +
             
     | 
| 
      
 167 
     | 
    
         
            +
              /* Variable output length API */
         
     | 
| 
      
 168 
     | 
    
         
            +
              int blake2xs_init( blake2xs_state *S, const size_t outlen );
         
     | 
| 
      
 169 
     | 
    
         
            +
              int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen );
         
     | 
| 
      
 170 
     | 
    
         
            +
              int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen );
         
     | 
| 
      
 171 
     | 
    
         
            +
              int blake2xs_final(blake2xs_state *S, void *out, size_t outlen);
         
     | 
| 
      
 172 
     | 
    
         
            +
             
     | 
| 
      
 173 
     | 
    
         
            +
              int blake2xb_init( blake2xb_state *S, const size_t outlen );
         
     | 
| 
      
 174 
     | 
    
         
            +
              int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen );
         
     | 
| 
      
 175 
     | 
    
         
            +
              int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen );
         
     | 
| 
      
 176 
     | 
    
         
            +
              int blake2xb_final(blake2xb_state *S, void *out, size_t outlen);
         
     | 
| 
      
 177 
     | 
    
         
            +
             
     | 
| 
      
 178 
     | 
    
         
            +
              /* Simple API */
         
     | 
| 
      
 179 
     | 
    
         
            +
              int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
         
     | 
| 
      
 180 
     | 
    
         
            +
              int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
         
     | 
| 
      
 181 
     | 
    
         
            +
             
     | 
| 
      
 182 
     | 
    
         
            +
              int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
         
     | 
| 
      
 183 
     | 
    
         
            +
              int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
              int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
         
     | 
| 
      
 186 
     | 
    
         
            +
              int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
         
     | 
| 
      
 187 
     | 
    
         
            +
             
     | 
| 
      
 188 
     | 
    
         
            +
              /* This is simply an alias for blake2b */
         
     | 
| 
      
 189 
     | 
    
         
            +
              int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
         
     | 
| 
      
 190 
     | 
    
         
            +
             
     | 
| 
      
 191 
     | 
    
         
            +
            #if defined(__cplusplus)
         
     | 
| 
      
 192 
     | 
    
         
            +
            }
         
     | 
| 
      
 193 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 194 
     | 
    
         
            +
             
     | 
| 
      
 195 
     | 
    
         
            +
            #endif
         
     | 
| 
         @@ -0,0 +1,68 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            /*
         
     | 
| 
      
 2 
     | 
    
         
            +
               BLAKE2 reference source code package - optimized C implementations
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
               Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
         
     | 
| 
      
 5 
     | 
    
         
            +
               terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
         
     | 
| 
      
 6 
     | 
    
         
            +
               your option.  The terms of these licenses can be found at:
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
               - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
         
     | 
| 
      
 9 
     | 
    
         
            +
               - OpenSSL license   : https://www.openssl.org/source/license.html
         
     | 
| 
      
 10 
     | 
    
         
            +
               - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
               More information about the BLAKE2 hash function can be found at
         
     | 
| 
      
 13 
     | 
    
         
            +
               https://blake2.net.
         
     | 
| 
      
 14 
     | 
    
         
            +
            */
         
     | 
| 
      
 15 
     | 
    
         
            +
            #ifndef BLAKE2B_LOAD_SSE2_H
         
     | 
| 
      
 16 
     | 
    
         
            +
            #define BLAKE2B_LOAD_SSE2_H
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            #define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
         
     | 
| 
      
 19 
     | 
    
         
            +
            #define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
         
     | 
| 
      
 20 
     | 
    
         
            +
            #define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
         
     | 
| 
      
 21 
     | 
    
         
            +
            #define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
         
     | 
| 
      
 22 
     | 
    
         
            +
            #define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
         
     | 
| 
      
 23 
     | 
    
         
            +
            #define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
         
     | 
| 
      
 24 
     | 
    
         
            +
            #define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
         
     | 
| 
      
 25 
     | 
    
         
            +
            #define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
         
     | 
| 
      
 26 
     | 
    
         
            +
            #define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
         
     | 
| 
      
 27 
     | 
    
         
            +
            #define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
         
     | 
| 
      
 28 
     | 
    
         
            +
            #define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
         
     | 
| 
      
 29 
     | 
    
         
            +
            #define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
         
     | 
| 
      
 30 
     | 
    
         
            +
            #define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
         
     | 
| 
      
 31 
     | 
    
         
            +
            #define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
         
     | 
| 
      
 32 
     | 
    
         
            +
            #define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
         
     | 
| 
      
 33 
     | 
    
         
            +
            #define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
         
     | 
| 
      
 34 
     | 
    
         
            +
            #define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
         
     | 
| 
      
 35 
     | 
    
         
            +
            #define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
         
     | 
| 
      
 36 
     | 
    
         
            +
            #define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
         
     | 
| 
      
 37 
     | 
    
         
            +
            #define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
         
     | 
| 
      
 38 
     | 
    
         
            +
            #define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
         
     | 
| 
      
 39 
     | 
    
         
            +
            #define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
         
     | 
| 
      
 40 
     | 
    
         
            +
            #define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
         
     | 
| 
      
 41 
     | 
    
         
            +
            #define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
         
     | 
| 
      
 42 
     | 
    
         
            +
            #define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
         
     | 
| 
      
 43 
     | 
    
         
            +
            #define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
         
     | 
| 
      
 44 
     | 
    
         
            +
            #define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
         
     | 
| 
      
 45 
     | 
    
         
            +
            #define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
         
     | 
| 
      
 46 
     | 
    
         
            +
            #define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
         
     | 
| 
      
 47 
     | 
    
         
            +
            #define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
         
     | 
| 
      
 48 
     | 
    
         
            +
            #define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
         
     | 
| 
      
 49 
     | 
    
         
            +
            #define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
         
     | 
| 
      
 50 
     | 
    
         
            +
            #define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
         
     | 
| 
      
 51 
     | 
    
         
            +
            #define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
         
     | 
| 
      
 52 
     | 
    
         
            +
            #define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
         
     | 
| 
      
 53 
     | 
    
         
            +
            #define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
         
     | 
| 
      
 54 
     | 
    
         
            +
            #define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
         
     | 
| 
      
 55 
     | 
    
         
            +
            #define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
         
     | 
| 
      
 56 
     | 
    
         
            +
            #define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
         
     | 
| 
      
 57 
     | 
    
         
            +
            #define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
         
     | 
| 
      
 58 
     | 
    
         
            +
            #define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
         
     | 
| 
      
 59 
     | 
    
         
            +
            #define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
         
     | 
| 
      
 60 
     | 
    
         
            +
            #define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
         
     | 
| 
      
 61 
     | 
    
         
            +
            #define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
         
     | 
| 
      
 62 
     | 
    
         
            +
            #define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
         
     | 
| 
      
 63 
     | 
    
         
            +
            #define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
         
     | 
| 
      
 64 
     | 
    
         
            +
            #define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
         
     | 
| 
      
 65 
     | 
    
         
            +
            #define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
            #endif
         
     | 
| 
         @@ -0,0 +1,402 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            /*
         
     | 
| 
      
 2 
     | 
    
         
            +
               BLAKE2 reference source code package - optimized C implementations
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
               Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
         
     | 
| 
      
 5 
     | 
    
         
            +
               terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
         
     | 
| 
      
 6 
     | 
    
         
            +
               your option.  The terms of these licenses can be found at:
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
               - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
         
     | 
| 
      
 9 
     | 
    
         
            +
               - OpenSSL license   : https://www.openssl.org/source/license.html
         
     | 
| 
      
 10 
     | 
    
         
            +
               - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
               More information about the BLAKE2 hash function can be found at
         
     | 
| 
      
 13 
     | 
    
         
            +
               https://blake2.net.
         
     | 
| 
      
 14 
     | 
    
         
            +
            */
         
     | 
| 
      
 15 
     | 
    
         
            +
            #ifndef BLAKE2B_LOAD_SSE41_H
         
     | 
| 
      
 16 
     | 
    
         
            +
            #define BLAKE2B_LOAD_SSE41_H
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            #define LOAD_MSG_0_1(b0, b1) \
         
     | 
| 
      
 19 
     | 
    
         
            +
            do \
         
     | 
| 
      
 20 
     | 
    
         
            +
            { \
         
     | 
| 
      
 21 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m0, m1); \
         
     | 
| 
      
 22 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m2, m3); \
         
     | 
| 
      
 23 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            #define LOAD_MSG_0_2(b0, b1) \
         
     | 
| 
      
 27 
     | 
    
         
            +
            do \
         
     | 
| 
      
 28 
     | 
    
         
            +
            { \
         
     | 
| 
      
 29 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m0, m1); \
         
     | 
| 
      
 30 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m2, m3); \
         
     | 
| 
      
 31 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
            #define LOAD_MSG_0_3(b0, b1) \
         
     | 
| 
      
 35 
     | 
    
         
            +
            do \
         
     | 
| 
      
 36 
     | 
    
         
            +
            { \
         
     | 
| 
      
 37 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m4, m5); \
         
     | 
| 
      
 38 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m6, m7); \
         
     | 
| 
      
 39 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
            #define LOAD_MSG_0_4(b0, b1) \
         
     | 
| 
      
 43 
     | 
    
         
            +
            do \
         
     | 
| 
      
 44 
     | 
    
         
            +
            { \
         
     | 
| 
      
 45 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m4, m5); \
         
     | 
| 
      
 46 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m6, m7); \
         
     | 
| 
      
 47 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
            #define LOAD_MSG_1_1(b0, b1) \
         
     | 
| 
      
 51 
     | 
    
         
            +
            do \
         
     | 
| 
      
 52 
     | 
    
         
            +
            { \
         
     | 
| 
      
 53 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m7, m2); \
         
     | 
| 
      
 54 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m4, m6); \
         
     | 
| 
      
 55 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
            #define LOAD_MSG_1_2(b0, b1) \
         
     | 
| 
      
 59 
     | 
    
         
            +
            do \
         
     | 
| 
      
 60 
     | 
    
         
            +
            { \
         
     | 
| 
      
 61 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m5, m4); \
         
     | 
| 
      
 62 
     | 
    
         
            +
            b1 = _mm_alignr_epi8(m3, m7, 8); \
         
     | 
| 
      
 63 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
            #define LOAD_MSG_1_3(b0, b1) \
         
     | 
| 
      
 67 
     | 
    
         
            +
            do \
         
     | 
| 
      
 68 
     | 
    
         
            +
            { \
         
     | 
| 
      
 69 
     | 
    
         
            +
            b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
         
     | 
| 
      
 70 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m5, m2); \
         
     | 
| 
      
 71 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
            #define LOAD_MSG_1_4(b0, b1) \
         
     | 
| 
      
 75 
     | 
    
         
            +
            do \
         
     | 
| 
      
 76 
     | 
    
         
            +
            { \
         
     | 
| 
      
 77 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m6, m1); \
         
     | 
| 
      
 78 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m3, m1); \
         
     | 
| 
      
 79 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
            #define LOAD_MSG_2_1(b0, b1) \
         
     | 
| 
      
 83 
     | 
    
         
            +
            do \
         
     | 
| 
      
 84 
     | 
    
         
            +
            { \
         
     | 
| 
      
 85 
     | 
    
         
            +
            b0 = _mm_alignr_epi8(m6, m5, 8); \
         
     | 
| 
      
 86 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m2, m7); \
         
     | 
| 
      
 87 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
            #define LOAD_MSG_2_2(b0, b1) \
         
     | 
| 
      
 91 
     | 
    
         
            +
            do \
         
     | 
| 
      
 92 
     | 
    
         
            +
            { \
         
     | 
| 
      
 93 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m4, m0); \
         
     | 
| 
      
 94 
     | 
    
         
            +
            b1 = _mm_blend_epi16(m1, m6, 0xF0); \
         
     | 
| 
      
 95 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
            #define LOAD_MSG_2_3(b0, b1) \
         
     | 
| 
      
 99 
     | 
    
         
            +
            do \
         
     | 
| 
      
 100 
     | 
    
         
            +
            { \
         
     | 
| 
      
 101 
     | 
    
         
            +
            b0 = _mm_blend_epi16(m5, m1, 0xF0); \
         
     | 
| 
      
 102 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m3, m4); \
         
     | 
| 
      
 103 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
            #define LOAD_MSG_2_4(b0, b1) \
         
     | 
| 
      
 107 
     | 
    
         
            +
            do \
         
     | 
| 
      
 108 
     | 
    
         
            +
            { \
         
     | 
| 
      
 109 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m7, m3); \
         
     | 
| 
      
 110 
     | 
    
         
            +
            b1 = _mm_alignr_epi8(m2, m0, 8); \
         
     | 
| 
      
 111 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 112 
     | 
    
         
            +
             
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
            #define LOAD_MSG_3_1(b0, b1) \
         
     | 
| 
      
 115 
     | 
    
         
            +
            do \
         
     | 
| 
      
 116 
     | 
    
         
            +
            { \
         
     | 
| 
      
 117 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m3, m1); \
         
     | 
| 
      
 118 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m6, m5); \
         
     | 
| 
      
 119 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
            #define LOAD_MSG_3_2(b0, b1) \
         
     | 
| 
      
 123 
     | 
    
         
            +
            do \
         
     | 
| 
      
 124 
     | 
    
         
            +
            { \
         
     | 
| 
      
 125 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m4, m0); \
         
     | 
| 
      
 126 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m6, m7); \
         
     | 
| 
      
 127 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 128 
     | 
    
         
            +
             
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
            #define LOAD_MSG_3_3(b0, b1) \
         
     | 
| 
      
 131 
     | 
    
         
            +
            do \
         
     | 
| 
      
 132 
     | 
    
         
            +
            { \
         
     | 
| 
      
 133 
     | 
    
         
            +
            b0 = _mm_blend_epi16(m1, m2, 0xF0); \
         
     | 
| 
      
 134 
     | 
    
         
            +
            b1 = _mm_blend_epi16(m2, m7, 0xF0); \
         
     | 
| 
      
 135 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
            #define LOAD_MSG_3_4(b0, b1) \
         
     | 
| 
      
 139 
     | 
    
         
            +
            do \
         
     | 
| 
      
 140 
     | 
    
         
            +
            { \
         
     | 
| 
      
 141 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m3, m5); \
         
     | 
| 
      
 142 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m0, m4); \
         
     | 
| 
      
 143 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 144 
     | 
    
         
            +
             
     | 
| 
      
 145 
     | 
    
         
            +
             
     | 
| 
      
 146 
     | 
    
         
            +
            #define LOAD_MSG_4_1(b0, b1) \
         
     | 
| 
      
 147 
     | 
    
         
            +
            do \
         
     | 
| 
      
 148 
     | 
    
         
            +
            { \
         
     | 
| 
      
 149 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m4, m2); \
         
     | 
| 
      
 150 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m1, m5); \
         
     | 
| 
      
 151 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 152 
     | 
    
         
            +
             
     | 
| 
      
 153 
     | 
    
         
            +
             
     | 
| 
      
 154 
     | 
    
         
            +
            #define LOAD_MSG_4_2(b0, b1) \
         
     | 
| 
      
 155 
     | 
    
         
            +
            do \
         
     | 
| 
      
 156 
     | 
    
         
            +
            { \
         
     | 
| 
      
 157 
     | 
    
         
            +
            b0 = _mm_blend_epi16(m0, m3, 0xF0); \
         
     | 
| 
      
 158 
     | 
    
         
            +
            b1 = _mm_blend_epi16(m2, m7, 0xF0); \
         
     | 
| 
      
 159 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 160 
     | 
    
         
            +
             
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
            #define LOAD_MSG_4_3(b0, b1) \
         
     | 
| 
      
 163 
     | 
    
         
            +
            do \
         
     | 
| 
      
 164 
     | 
    
         
            +
            { \
         
     | 
| 
      
 165 
     | 
    
         
            +
            b0 = _mm_blend_epi16(m7, m5, 0xF0); \
         
     | 
| 
      
 166 
     | 
    
         
            +
            b1 = _mm_blend_epi16(m3, m1, 0xF0); \
         
     | 
| 
      
 167 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
      
 170 
     | 
    
         
            +
            #define LOAD_MSG_4_4(b0, b1) \
         
     | 
| 
      
 171 
     | 
    
         
            +
            do \
         
     | 
| 
      
 172 
     | 
    
         
            +
            { \
         
     | 
| 
      
 173 
     | 
    
         
            +
            b0 = _mm_alignr_epi8(m6, m0, 8); \
         
     | 
| 
      
 174 
     | 
    
         
            +
            b1 = _mm_blend_epi16(m4, m6, 0xF0); \
         
     | 
| 
      
 175 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
      
 177 
     | 
    
         
            +
             
     | 
| 
      
 178 
     | 
    
         
            +
            #define LOAD_MSG_5_1(b0, b1) \
         
     | 
| 
      
 179 
     | 
    
         
            +
            do \
         
     | 
| 
      
 180 
     | 
    
         
            +
            { \
         
     | 
| 
      
 181 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m1, m3); \
         
     | 
| 
      
 182 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m0, m4); \
         
     | 
| 
      
 183 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
             
     | 
| 
      
 186 
     | 
    
         
            +
            #define LOAD_MSG_5_2(b0, b1) \
         
     | 
| 
      
 187 
     | 
    
         
            +
            do \
         
     | 
| 
      
 188 
     | 
    
         
            +
            { \
         
     | 
| 
      
 189 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m6, m5); \
         
     | 
| 
      
 190 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m5, m1); \
         
     | 
| 
      
 191 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 192 
     | 
    
         
            +
             
     | 
| 
      
 193 
     | 
    
         
            +
             
     | 
| 
      
 194 
     | 
    
         
            +
            #define LOAD_MSG_5_3(b0, b1) \
         
     | 
| 
      
 195 
     | 
    
         
            +
            do \
         
     | 
| 
      
 196 
     | 
    
         
            +
            { \
         
     | 
| 
      
 197 
     | 
    
         
            +
            b0 = _mm_blend_epi16(m2, m3, 0xF0); \
         
     | 
| 
      
 198 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m7, m0); \
         
     | 
| 
      
 199 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 200 
     | 
    
         
            +
             
     | 
| 
      
 201 
     | 
    
         
            +
             
     | 
| 
      
 202 
     | 
    
         
            +
            #define LOAD_MSG_5_4(b0, b1) \
         
     | 
| 
      
 203 
     | 
    
         
            +
            do \
         
     | 
| 
      
 204 
     | 
    
         
            +
            { \
         
     | 
| 
      
 205 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m6, m2); \
         
     | 
| 
      
 206 
     | 
    
         
            +
            b1 = _mm_blend_epi16(m7, m4, 0xF0); \
         
     | 
| 
      
 207 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
             
     | 
| 
      
 210 
     | 
    
         
            +
            #define LOAD_MSG_6_1(b0, b1) \
         
     | 
| 
      
 211 
     | 
    
         
            +
            do \
         
     | 
| 
      
 212 
     | 
    
         
            +
            { \
         
     | 
| 
      
 213 
     | 
    
         
            +
            b0 = _mm_blend_epi16(m6, m0, 0xF0); \
         
     | 
| 
      
 214 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m7, m2); \
         
     | 
| 
      
 215 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 216 
     | 
    
         
            +
             
     | 
| 
      
 217 
     | 
    
         
            +
             
     | 
| 
      
 218 
     | 
    
         
            +
            #define LOAD_MSG_6_2(b0, b1) \
         
     | 
| 
      
 219 
     | 
    
         
            +
            do \
         
     | 
| 
      
 220 
     | 
    
         
            +
            { \
         
     | 
| 
      
 221 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m2, m7); \
         
     | 
| 
      
 222 
     | 
    
         
            +
            b1 = _mm_alignr_epi8(m5, m6, 8); \
         
     | 
| 
      
 223 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 224 
     | 
    
         
            +
             
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
            #define LOAD_MSG_6_3(b0, b1) \
         
     | 
| 
      
 227 
     | 
    
         
            +
            do \
         
     | 
| 
      
 228 
     | 
    
         
            +
            { \
         
     | 
| 
      
 229 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m0, m3); \
         
     | 
| 
      
 230 
     | 
    
         
            +
            b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
         
     | 
| 
      
 231 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 232 
     | 
    
         
            +
             
     | 
| 
      
 233 
     | 
    
         
            +
             
     | 
| 
      
 234 
     | 
    
         
            +
            #define LOAD_MSG_6_4(b0, b1) \
         
     | 
| 
      
 235 
     | 
    
         
            +
            do \
         
     | 
| 
      
 236 
     | 
    
         
            +
            { \
         
     | 
| 
      
 237 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m3, m1); \
         
     | 
| 
      
 238 
     | 
    
         
            +
            b1 = _mm_blend_epi16(m1, m5, 0xF0); \
         
     | 
| 
      
 239 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 240 
     | 
    
         
            +
             
     | 
| 
      
 241 
     | 
    
         
            +
             
     | 
| 
      
 242 
     | 
    
         
            +
            #define LOAD_MSG_7_1(b0, b1) \
         
     | 
| 
      
 243 
     | 
    
         
            +
            do \
         
     | 
| 
      
 244 
     | 
    
         
            +
            { \
         
     | 
| 
      
 245 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m6, m3); \
         
     | 
| 
      
 246 
     | 
    
         
            +
            b1 = _mm_blend_epi16(m6, m1, 0xF0); \
         
     | 
| 
      
 247 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 248 
     | 
    
         
            +
             
     | 
| 
      
 249 
     | 
    
         
            +
             
     | 
| 
      
 250 
     | 
    
         
            +
            #define LOAD_MSG_7_2(b0, b1) \
         
     | 
| 
      
 251 
     | 
    
         
            +
            do \
         
     | 
| 
      
 252 
     | 
    
         
            +
            { \
         
     | 
| 
      
 253 
     | 
    
         
            +
            b0 = _mm_alignr_epi8(m7, m5, 8); \
         
     | 
| 
      
 254 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m0, m4); \
         
     | 
| 
      
 255 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 256 
     | 
    
         
            +
             
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
      
 258 
     | 
    
         
            +
            #define LOAD_MSG_7_3(b0, b1) \
         
     | 
| 
      
 259 
     | 
    
         
            +
            do \
         
     | 
| 
      
 260 
     | 
    
         
            +
            { \
         
     | 
| 
      
 261 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m2, m7); \
         
     | 
| 
      
 262 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m4, m1); \
         
     | 
| 
      
 263 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 264 
     | 
    
         
            +
             
     | 
| 
      
 265 
     | 
    
         
            +
             
     | 
| 
      
 266 
     | 
    
         
            +
            #define LOAD_MSG_7_4(b0, b1) \
         
     | 
| 
      
 267 
     | 
    
         
            +
            do \
         
     | 
| 
      
 268 
     | 
    
         
            +
            { \
         
     | 
| 
      
 269 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m0, m2); \
         
     | 
| 
      
 270 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m3, m5); \
         
     | 
| 
      
 271 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 272 
     | 
    
         
            +
             
     | 
| 
      
 273 
     | 
    
         
            +
             
     | 
| 
      
 274 
     | 
    
         
            +
            #define LOAD_MSG_8_1(b0, b1) \
         
     | 
| 
      
 275 
     | 
    
         
            +
            do \
         
     | 
| 
      
 276 
     | 
    
         
            +
            { \
         
     | 
| 
      
 277 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m3, m7); \
         
     | 
| 
      
 278 
     | 
    
         
            +
            b1 = _mm_alignr_epi8(m0, m5, 8); \
         
     | 
| 
      
 279 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 280 
     | 
    
         
            +
             
     | 
| 
      
 281 
     | 
    
         
            +
             
     | 
| 
      
 282 
     | 
    
         
            +
            #define LOAD_MSG_8_2(b0, b1) \
         
     | 
| 
      
 283 
     | 
    
         
            +
            do \
         
     | 
| 
      
 284 
     | 
    
         
            +
            { \
         
     | 
| 
      
 285 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m7, m4); \
         
     | 
| 
      
 286 
     | 
    
         
            +
            b1 = _mm_alignr_epi8(m4, m1, 8); \
         
     | 
| 
      
 287 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 288 
     | 
    
         
            +
             
     | 
| 
      
 289 
     | 
    
         
            +
             
     | 
| 
      
 290 
     | 
    
         
            +
            #define LOAD_MSG_8_3(b0, b1) \
         
     | 
| 
      
 291 
     | 
    
         
            +
            do \
         
     | 
| 
      
 292 
     | 
    
         
            +
            { \
         
     | 
| 
      
 293 
     | 
    
         
            +
            b0 = m6; \
         
     | 
| 
      
 294 
     | 
    
         
            +
            b1 = _mm_alignr_epi8(m5, m0, 8); \
         
     | 
| 
      
 295 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 296 
     | 
    
         
            +
             
     | 
| 
      
 297 
     | 
    
         
            +
             
     | 
| 
      
 298 
     | 
    
         
            +
            #define LOAD_MSG_8_4(b0, b1) \
         
     | 
| 
      
 299 
     | 
    
         
            +
            do \
         
     | 
| 
      
 300 
     | 
    
         
            +
            { \
         
     | 
| 
      
 301 
     | 
    
         
            +
            b0 = _mm_blend_epi16(m1, m3, 0xF0); \
         
     | 
| 
      
 302 
     | 
    
         
            +
            b1 = m2; \
         
     | 
| 
      
 303 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 304 
     | 
    
         
            +
             
     | 
| 
      
 305 
     | 
    
         
            +
             
     | 
| 
      
 306 
     | 
    
         
            +
            #define LOAD_MSG_9_1(b0, b1) \
         
     | 
| 
      
 307 
     | 
    
         
            +
            do \
         
     | 
| 
      
 308 
     | 
    
         
            +
            { \
         
     | 
| 
      
 309 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m5, m4); \
         
     | 
| 
      
 310 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m3, m0); \
         
     | 
| 
      
 311 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 312 
     | 
    
         
            +
             
     | 
| 
      
 313 
     | 
    
         
            +
             
     | 
| 
      
 314 
     | 
    
         
            +
            #define LOAD_MSG_9_2(b0, b1) \
         
     | 
| 
      
 315 
     | 
    
         
            +
            do \
         
     | 
| 
      
 316 
     | 
    
         
            +
            { \
         
     | 
| 
      
 317 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m1, m2); \
         
     | 
| 
      
 318 
     | 
    
         
            +
            b1 = _mm_blend_epi16(m3, m2, 0xF0); \
         
     | 
| 
      
 319 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 320 
     | 
    
         
            +
             
     | 
| 
      
 321 
     | 
    
         
            +
             
     | 
| 
      
 322 
     | 
    
         
            +
            #define LOAD_MSG_9_3(b0, b1) \
         
     | 
| 
      
 323 
     | 
    
         
            +
            do \
         
     | 
| 
      
 324 
     | 
    
         
            +
            { \
         
     | 
| 
      
 325 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m7, m4); \
         
     | 
| 
      
 326 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m1, m6); \
         
     | 
| 
      
 327 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 328 
     | 
    
         
            +
             
     | 
| 
      
 329 
     | 
    
         
            +
             
     | 
| 
      
 330 
     | 
    
         
            +
            #define LOAD_MSG_9_4(b0, b1) \
         
     | 
| 
      
 331 
     | 
    
         
            +
            do \
         
     | 
| 
      
 332 
     | 
    
         
            +
            { \
         
     | 
| 
      
 333 
     | 
    
         
            +
            b0 = _mm_alignr_epi8(m7, m5, 8); \
         
     | 
| 
      
 334 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m6, m0); \
         
     | 
| 
      
 335 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 336 
     | 
    
         
            +
             
     | 
| 
      
 337 
     | 
    
         
            +
             
     | 
| 
      
 338 
     | 
    
         
            +
            #define LOAD_MSG_10_1(b0, b1) \
         
     | 
| 
      
 339 
     | 
    
         
            +
            do \
         
     | 
| 
      
 340 
     | 
    
         
            +
            { \
         
     | 
| 
      
 341 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m0, m1); \
         
     | 
| 
      
 342 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m2, m3); \
         
     | 
| 
      
 343 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 344 
     | 
    
         
            +
             
     | 
| 
      
 345 
     | 
    
         
            +
             
     | 
| 
      
 346 
     | 
    
         
            +
            #define LOAD_MSG_10_2(b0, b1) \
         
     | 
| 
      
 347 
     | 
    
         
            +
            do \
         
     | 
| 
      
 348 
     | 
    
         
            +
            { \
         
     | 
| 
      
 349 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m0, m1); \
         
     | 
| 
      
 350 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m2, m3); \
         
     | 
| 
      
 351 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 352 
     | 
    
         
            +
             
     | 
| 
      
 353 
     | 
    
         
            +
             
     | 
| 
      
 354 
     | 
    
         
            +
            #define LOAD_MSG_10_3(b0, b1) \
         
     | 
| 
      
 355 
     | 
    
         
            +
            do \
         
     | 
| 
      
 356 
     | 
    
         
            +
            { \
         
     | 
| 
      
 357 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m4, m5); \
         
     | 
| 
      
 358 
     | 
    
         
            +
            b1 = _mm_unpacklo_epi64(m6, m7); \
         
     | 
| 
      
 359 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 360 
     | 
    
         
            +
             
     | 
| 
      
 361 
     | 
    
         
            +
             
     | 
| 
      
 362 
     | 
    
         
            +
            #define LOAD_MSG_10_4(b0, b1) \
         
     | 
| 
      
 363 
     | 
    
         
            +
            do \
         
     | 
| 
      
 364 
     | 
    
         
            +
            { \
         
     | 
| 
      
 365 
     | 
    
         
            +
            b0 = _mm_unpackhi_epi64(m4, m5); \
         
     | 
| 
      
 366 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m6, m7); \
         
     | 
| 
      
 367 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 368 
     | 
    
         
            +
             
     | 
| 
      
 369 
     | 
    
         
            +
             
     | 
| 
      
 370 
     | 
    
         
            +
            #define LOAD_MSG_11_1(b0, b1) \
         
     | 
| 
      
 371 
     | 
    
         
            +
            do \
         
     | 
| 
      
 372 
     | 
    
         
            +
            { \
         
     | 
| 
      
 373 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m7, m2); \
         
     | 
| 
      
 374 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m4, m6); \
         
     | 
| 
      
 375 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 376 
     | 
    
         
            +
             
     | 
| 
      
 377 
     | 
    
         
            +
             
     | 
| 
      
 378 
     | 
    
         
            +
            #define LOAD_MSG_11_2(b0, b1) \
         
     | 
| 
      
 379 
     | 
    
         
            +
            do \
         
     | 
| 
      
 380 
     | 
    
         
            +
            { \
         
     | 
| 
      
 381 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m5, m4); \
         
     | 
| 
      
 382 
     | 
    
         
            +
            b1 = _mm_alignr_epi8(m3, m7, 8); \
         
     | 
| 
      
 383 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 384 
     | 
    
         
            +
             
     | 
| 
      
 385 
     | 
    
         
            +
             
     | 
| 
      
 386 
     | 
    
         
            +
            #define LOAD_MSG_11_3(b0, b1) \
         
     | 
| 
      
 387 
     | 
    
         
            +
            do \
         
     | 
| 
      
 388 
     | 
    
         
            +
            { \
         
     | 
| 
      
 389 
     | 
    
         
            +
            b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
         
     | 
| 
      
 390 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m5, m2); \
         
     | 
| 
      
 391 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 392 
     | 
    
         
            +
             
     | 
| 
      
 393 
     | 
    
         
            +
             
     | 
| 
      
 394 
     | 
    
         
            +
            #define LOAD_MSG_11_4(b0, b1) \
         
     | 
| 
      
 395 
     | 
    
         
            +
            do \
         
     | 
| 
      
 396 
     | 
    
         
            +
            { \
         
     | 
| 
      
 397 
     | 
    
         
            +
            b0 = _mm_unpacklo_epi64(m6, m1); \
         
     | 
| 
      
 398 
     | 
    
         
            +
            b1 = _mm_unpackhi_epi64(m3, m1); \
         
     | 
| 
      
 399 
     | 
    
         
            +
            } while(0)
         
     | 
| 
      
 400 
     | 
    
         
            +
             
     | 
| 
      
 401 
     | 
    
         
            +
             
     | 
| 
      
 402 
     | 
    
         
            +
            #endif
         
     |