oj 3.16.10 → 3.16.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +10 -0
 - data/ext/oj/custom.c +10 -9
 - data/ext/oj/dump.c +373 -77
 - data/ext/oj/dump.h +7 -2
 - data/ext/oj/dump_compat.c +10 -9
 - data/ext/oj/dump_leaf.c +1 -1
 - data/ext/oj/dump_object.c +27 -17
 - data/ext/oj/dump_strict.c +7 -6
 - data/ext/oj/fast.c +1 -1
 - data/ext/oj/object.c +8 -8
 - data/ext/oj/oj.c +10 -5
 - data/ext/oj/rails.c +19 -17
 - data/ext/oj/simd.h +10 -0
 - data/ext/oj/wab.c +4 -3
 - data/lib/oj/version.rb +1 -1
 - data/pages/Encoding.md +1 -1
 - metadata +6 -5
 
    
        data/ext/oj/dump.c
    CHANGED
    
    | 
         @@ -152,8 +152,77 @@ inline static size_t newline_friendly_size(const uint8_t *str, size_t len) { 
     | 
|
| 
       152 
152 
     | 
    
         
             
                return calculate_string_size(str, len, newline_friendly_chars);
         
     | 
| 
       153 
153 
     | 
    
         
             
            }
         
     | 
| 
       154 
154 
     | 
    
         | 
| 
      
 155 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 156 
     | 
    
         
            +
            inline static uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
         
     | 
| 
      
 157 
     | 
    
         
            +
                uint8x16x4_t tab;
         
     | 
| 
      
 158 
     | 
    
         
            +
                tab.val[0] = vld1q_u8(table);
         
     | 
| 
      
 159 
     | 
    
         
            +
                tab.val[1] = vld1q_u8(table + 16);
         
     | 
| 
      
 160 
     | 
    
         
            +
                tab.val[2] = vld1q_u8(table + 32);
         
     | 
| 
      
 161 
     | 
    
         
            +
                tab.val[3] = vld1q_u8(table + 48);
         
     | 
| 
      
 162 
     | 
    
         
            +
                return tab;
         
     | 
| 
      
 163 
     | 
    
         
            +
            }
         
     | 
| 
      
 164 
     | 
    
         
            +
             
     | 
| 
      
 165 
     | 
    
         
            +
            static uint8x16x4_t hibit_friendly_chars_neon[2];
         
     | 
| 
      
 166 
     | 
    
         
            +
            static uint8x16x4_t rails_friendly_chars_neon[2];
         
     | 
| 
      
 167 
     | 
    
         
            +
            static uint8x16x4_t rails_xss_friendly_chars_neon[4];
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
            void initialize_neon(void) {
         
     | 
| 
      
 170 
     | 
    
         
            +
                // We only need the first 128 bytes of the hibit friendly chars table. Everything above 127 is
         
     | 
| 
      
 171 
     | 
    
         
            +
                // set to 1. If that ever changes, the code will need to be updated.
         
     | 
| 
      
 172 
     | 
    
         
            +
                hibit_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars);
         
     | 
| 
      
 173 
     | 
    
         
            +
                hibit_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)hibit_friendly_chars + 64);
         
     | 
| 
      
 174 
     | 
    
         
            +
             
     | 
| 
      
 175 
     | 
    
         
            +
                // rails_friendly_chars is the same as hibit_friendly_chars. Only the first 128 bytes have values
         
     | 
| 
      
 176 
     | 
    
         
            +
                // that are not '1'. If that ever changes, the code will need to be updated.
         
     | 
| 
      
 177 
     | 
    
         
            +
                rails_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_friendly_chars);
         
     | 
| 
      
 178 
     | 
    
         
            +
                rails_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_friendly_chars + 64);
         
     | 
| 
      
 179 
     | 
    
         
            +
             
     | 
| 
      
 180 
     | 
    
         
            +
                rails_xss_friendly_chars_neon[0] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars);
         
     | 
| 
      
 181 
     | 
    
         
            +
                rails_xss_friendly_chars_neon[1] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 64);
         
     | 
| 
      
 182 
     | 
    
         
            +
                rails_xss_friendly_chars_neon[2] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 128);
         
     | 
| 
      
 183 
     | 
    
         
            +
                rails_xss_friendly_chars_neon[3] = load_uint8x16_4((const unsigned char *)rails_xss_friendly_chars + 192);
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
                // All bytes should be 0 except for those that need more than 1 byte of output. This will allow the
         
     | 
| 
      
 186 
     | 
    
         
            +
                // code to limit the lookups to the first 128 bytes (values 0 - 127). Bytes above 127 will result
         
     | 
| 
      
 187 
     | 
    
         
            +
                // in 0 with the vqtbl4q_u8 instruction.
         
     | 
| 
      
 188 
     | 
    
         
            +
                uint8x16_t one = vdupq_n_u8('1');
         
     | 
| 
      
 189 
     | 
    
         
            +
                for (int i = 0; i < 2; i++) {
         
     | 
| 
      
 190 
     | 
    
         
            +
                    for (int j = 0; j < 4; j++) {
         
     | 
| 
      
 191 
     | 
    
         
            +
                        hibit_friendly_chars_neon[i].val[j] = vsubq_u8(hibit_friendly_chars_neon[i].val[j], one);
         
     | 
| 
      
 192 
     | 
    
         
            +
                        rails_friendly_chars_neon[i].val[j] = vsubq_u8(rails_friendly_chars_neon[i].val[j], one);
         
     | 
| 
      
 193 
     | 
    
         
            +
                    }
         
     | 
| 
      
 194 
     | 
    
         
            +
                }
         
     | 
| 
      
 195 
     | 
    
         
            +
             
     | 
| 
      
 196 
     | 
    
         
            +
                for (int i = 0; i < 4; i++) {
         
     | 
| 
      
 197 
     | 
    
         
            +
                    for (int j = 0; j < 4; j++) {
         
     | 
| 
      
 198 
     | 
    
         
            +
                        rails_xss_friendly_chars_neon[i].val[j] = vsubq_u8(rails_xss_friendly_chars_neon[i].val[j], one);
         
     | 
| 
      
 199 
     | 
    
         
            +
                    }
         
     | 
| 
      
 200 
     | 
    
         
            +
                }
         
     | 
| 
      
 201 
     | 
    
         
            +
            }
         
     | 
| 
      
 202 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
       155 
204 
     | 
    
         
             
            inline static size_t hibit_friendly_size(const uint8_t *str, size_t len) {
         
     | 
| 
      
 205 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 206 
     | 
    
         
            +
                size_t size = 0;
         
     | 
| 
      
 207 
     | 
    
         
            +
                size_t i    = 0;
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
                for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
         
     | 
| 
      
 210 
     | 
    
         
            +
                    size += sizeof(uint8x16_t);
         
     | 
| 
      
 211 
     | 
    
         
            +
             
     | 
| 
      
 212 
     | 
    
         
            +
                    // See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
         
     | 
| 
      
 213 
     | 
    
         
            +
                    uint8x16_t chunk  = vld1q_u8(str);
         
     | 
| 
      
 214 
     | 
    
         
            +
                    uint8x16_t tmp1   = vqtbl4q_u8(hibit_friendly_chars_neon[0], chunk);
         
     | 
| 
      
 215 
     | 
    
         
            +
                    uint8x16_t tmp2   = vqtbl4q_u8(hibit_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
         
     | 
| 
      
 216 
     | 
    
         
            +
                    uint8x16_t result = vorrq_u8(tmp1, tmp2);
         
     | 
| 
      
 217 
     | 
    
         
            +
                    uint8_t    tmp    = vaddvq_u8(result);
         
     | 
| 
      
 218 
     | 
    
         
            +
                    size += tmp;
         
     | 
| 
      
 219 
     | 
    
         
            +
                }
         
     | 
| 
      
 220 
     | 
    
         
            +
             
     | 
| 
      
 221 
     | 
    
         
            +
                size_t total = size + calculate_string_size(str, len - i, hibit_friendly_chars);
         
     | 
| 
      
 222 
     | 
    
         
            +
                return total;
         
     | 
| 
      
 223 
     | 
    
         
            +
            #else
         
     | 
| 
       156 
224 
     | 
    
         
             
                return calculate_string_size(str, len, hibit_friendly_chars);
         
     | 
| 
      
 225 
     | 
    
         
            +
            #endif
         
     | 
| 
       157 
226 
     | 
    
         
             
            }
         
     | 
| 
       158 
227 
     | 
    
         | 
| 
       159 
228 
     | 
    
         
             
            inline static size_t slash_friendly_size(const uint8_t *str, size_t len) {
         
     | 
| 
         @@ -183,10 +252,52 @@ inline static size_t hixss_friendly_size(const uint8_t *str, size_t len) { 
     | 
|
| 
       183 
252 
     | 
    
         
             
            }
         
     | 
| 
       184 
253 
     | 
    
         | 
| 
       185 
254 
     | 
    
         
             
            inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) {
         
     | 
| 
       186 
     | 
    
         
            -
                long 
     | 
| 
       187 
     | 
    
         
            -
                 
     | 
| 
       188 
     | 
    
         
            -
             
     | 
| 
      
 255 
     | 
    
         
            +
                long     size = 0;
         
     | 
| 
      
 256 
     | 
    
         
            +
                uint32_t hi   = 0;
         
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
      
 258 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 259 
     | 
    
         
            +
                size_t i = 0;
         
     | 
| 
      
 260 
     | 
    
         
            +
             
     | 
| 
      
 261 
     | 
    
         
            +
                if (len >= sizeof(uint8x16_t)) {
         
     | 
| 
      
 262 
     | 
    
         
            +
                    uint8x16_t has_some_hibit = vdupq_n_u8(0);
         
     | 
| 
      
 263 
     | 
    
         
            +
                    uint8x16_t hibit          = vdupq_n_u8(0x80);
         
     | 
| 
      
 264 
     | 
    
         
            +
             
     | 
| 
      
 265 
     | 
    
         
            +
                    for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
         
     | 
| 
      
 266 
     | 
    
         
            +
                        size += sizeof(uint8x16_t);
         
     | 
| 
      
 267 
     | 
    
         
            +
             
     | 
| 
      
 268 
     | 
    
         
            +
                        uint8x16_t chunk = vld1q_u8(str);
         
     | 
| 
      
 269 
     | 
    
         
            +
             
     | 
| 
      
 270 
     | 
    
         
            +
                        // Check to see if any of these bytes have the high bit set.
         
     | 
| 
      
 271 
     | 
    
         
            +
                        has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
         
     | 
| 
      
 272 
     | 
    
         
            +
             
     | 
| 
      
 273 
     | 
    
         
            +
                        uint8x16_t tmp1   = vqtbl4q_u8(rails_xss_friendly_chars_neon[0], chunk);
         
     | 
| 
      
 274 
     | 
    
         
            +
                        uint8x16_t tmp2   = vqtbl4q_u8(rails_xss_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
         
     | 
| 
      
 275 
     | 
    
         
            +
                        uint8x16_t tmp3   = vqtbl4q_u8(rails_xss_friendly_chars_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
         
     | 
| 
      
 276 
     | 
    
         
            +
                        uint8x16_t tmp4   = vqtbl4q_u8(rails_xss_friendly_chars_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
         
     | 
| 
      
 277 
     | 
    
         
            +
                        uint8x16_t result = vorrq_u8(tmp4, vorrq_u8(tmp3, vorrq_u8(tmp1, tmp2)));
         
     | 
| 
      
 278 
     | 
    
         
            +
                        uint8_t    tmp    = vaddvq_u8(result);
         
     | 
| 
      
 279 
     | 
    
         
            +
                        size += tmp;
         
     | 
| 
      
 280 
     | 
    
         
            +
                    }
         
     | 
| 
      
 281 
     | 
    
         
            +
             
     | 
| 
      
 282 
     | 
    
         
            +
                    // 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
         
     | 
| 
      
 283 
     | 
    
         
            +
                    hi = vmaxvq_u8(has_some_hibit) != 0;
         
     | 
| 
      
 284 
     | 
    
         
            +
                }
         
     | 
| 
      
 285 
     | 
    
         
            +
             
     | 
| 
      
 286 
     | 
    
         
            +
                size_t len_remaining = len - i;
         
     | 
| 
       189 
287 
     | 
    
         | 
| 
      
 288 
     | 
    
         
            +
                for (; i < len; str++, i++) {
         
     | 
| 
      
 289 
     | 
    
         
            +
                    size += rails_xss_friendly_chars[*str];
         
     | 
| 
      
 290 
     | 
    
         
            +
                    hi |= *str & 0x80;
         
     | 
| 
      
 291 
     | 
    
         
            +
                }
         
     | 
| 
      
 292 
     | 
    
         
            +
             
     | 
| 
      
 293 
     | 
    
         
            +
                size -= (len_remaining * ((size_t)'0'));
         
     | 
| 
      
 294 
     | 
    
         
            +
             
     | 
| 
      
 295 
     | 
    
         
            +
                if (0 == hi) {
         
     | 
| 
      
 296 
     | 
    
         
            +
                    return size;
         
     | 
| 
      
 297 
     | 
    
         
            +
                }
         
     | 
| 
      
 298 
     | 
    
         
            +
                return -(size);
         
     | 
| 
      
 299 
     | 
    
         
            +
            #else
         
     | 
| 
      
 300 
     | 
    
         
            +
                size_t i = len;
         
     | 
| 
       190 
301 
     | 
    
         
             
                for (; 0 < i; str++, i--) {
         
     | 
| 
       191 
302 
     | 
    
         
             
                    size += rails_xss_friendly_chars[*str];
         
     | 
| 
       192 
303 
     | 
    
         
             
                    hi |= *str & 0x80;
         
     | 
| 
         @@ -195,13 +306,53 @@ inline static long rails_xss_friendly_size(const uint8_t *str, size_t len) { 
     | 
|
| 
       195 
306 
     | 
    
         
             
                    return size - len * (size_t)'0';
         
     | 
| 
       196 
307 
     | 
    
         
             
                }
         
     | 
| 
       197 
308 
     | 
    
         
             
                return -(size - len * (size_t)'0');
         
     | 
| 
      
 309 
     | 
    
         
            +
            #endif /* HAVE_SIMD_NEON */
         
     | 
| 
       198 
310 
     | 
    
         
             
            }
         
     | 
| 
       199 
311 
     | 
    
         | 
| 
       200 
312 
     | 
    
         
             
            inline static size_t rails_friendly_size(const uint8_t *str, size_t len) {
         
     | 
| 
       201 
     | 
    
         
            -
                long 
     | 
| 
       202 
     | 
    
         
            -
                 
     | 
| 
       203 
     | 
    
         
            -
             
     | 
| 
      
 313 
     | 
    
         
            +
                long     size = 0;
         
     | 
| 
      
 314 
     | 
    
         
            +
                uint32_t hi   = 0;
         
     | 
| 
      
 315 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 316 
     | 
    
         
            +
                size_t i     = 0;
         
     | 
| 
      
 317 
     | 
    
         
            +
                long   extra = 0;
         
     | 
| 
      
 318 
     | 
    
         
            +
             
     | 
| 
      
 319 
     | 
    
         
            +
                if (len >= sizeof(uint8x16_t)) {
         
     | 
| 
      
 320 
     | 
    
         
            +
                    uint8x16_t has_some_hibit = vdupq_n_u8(0);
         
     | 
| 
      
 321 
     | 
    
         
            +
                    uint8x16_t hibit          = vdupq_n_u8(0x80);
         
     | 
| 
      
 322 
     | 
    
         
            +
             
     | 
| 
      
 323 
     | 
    
         
            +
                    for (; i + sizeof(uint8x16_t) <= len; i += sizeof(uint8x16_t), str += sizeof(uint8x16_t)) {
         
     | 
| 
      
 324 
     | 
    
         
            +
                        size += sizeof(uint8x16_t);
         
     | 
| 
      
 325 
     | 
    
         
            +
             
     | 
| 
      
 326 
     | 
    
         
            +
                        // See https://lemire.me/blog/2019/07/23/arbitrary-byte-to-byte-maps-using-arm-neon/
         
     | 
| 
      
 327 
     | 
    
         
            +
                        uint8x16_t chunk = vld1q_u8(str);
         
     | 
| 
      
 328 
     | 
    
         
            +
             
     | 
| 
      
 329 
     | 
    
         
            +
                        // Check to see if any of these bytes have the high bit set.
         
     | 
| 
      
 330 
     | 
    
         
            +
                        has_some_hibit = vorrq_u8(has_some_hibit, vandq_u8(chunk, hibit));
         
     | 
| 
      
 331 
     | 
    
         
            +
             
     | 
| 
      
 332 
     | 
    
         
            +
                        uint8x16_t tmp1   = vqtbl4q_u8(rails_friendly_chars_neon[0], chunk);
         
     | 
| 
      
 333 
     | 
    
         
            +
                        uint8x16_t tmp2   = vqtbl4q_u8(rails_friendly_chars_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
         
     | 
| 
      
 334 
     | 
    
         
            +
                        uint8x16_t result = vorrq_u8(tmp1, tmp2);
         
     | 
| 
      
 335 
     | 
    
         
            +
                        uint8_t    tmp    = vaddvq_u8(result);
         
     | 
| 
      
 336 
     | 
    
         
            +
                        size += tmp;
         
     | 
| 
      
 337 
     | 
    
         
            +
                    }
         
     | 
| 
      
 338 
     | 
    
         
            +
             
     | 
| 
      
 339 
     | 
    
         
            +
                    // 'hi' should be set if any of the bytes we processed have the high bit set. It doesn't matter which ones.
         
     | 
| 
      
 340 
     | 
    
         
            +
                    hi = vmaxvq_u8(has_some_hibit) != 0;
         
     | 
| 
      
 341 
     | 
    
         
            +
                }
         
     | 
| 
      
 342 
     | 
    
         
            +
             
     | 
| 
      
 343 
     | 
    
         
            +
                for (; i < len; str++, i++, extra++) {
         
     | 
| 
      
 344 
     | 
    
         
            +
                    size += rails_friendly_chars[*str];
         
     | 
| 
      
 345 
     | 
    
         
            +
                    hi |= *str & 0x80;
         
     | 
| 
      
 346 
     | 
    
         
            +
                }
         
     | 
| 
      
 347 
     | 
    
         
            +
             
     | 
| 
      
 348 
     | 
    
         
            +
                size -= (extra * ((size_t)'0'));
         
     | 
| 
       204 
349 
     | 
    
         | 
| 
      
 350 
     | 
    
         
            +
                if (0 == hi) {
         
     | 
| 
      
 351 
     | 
    
         
            +
                    return size;
         
     | 
| 
      
 352 
     | 
    
         
            +
                }
         
     | 
| 
      
 353 
     | 
    
         
            +
                return -(size);
         
     | 
| 
      
 354 
     | 
    
         
            +
            #else
         
     | 
| 
      
 355 
     | 
    
         
            +
                size_t i = len;
         
     | 
| 
       205 
356 
     | 
    
         
             
                for (; 0 < i; str++, i--) {
         
     | 
| 
       206 
357 
     | 
    
         
             
                    size += rails_friendly_chars[*str];
         
     | 
| 
       207 
358 
     | 
    
         
             
                    hi |= *str & 0x80;
         
     | 
| 
         @@ -210,9 +361,10 @@ inline static size_t rails_friendly_size(const uint8_t *str, size_t len) { 
     | 
|
| 
       210 
361 
     | 
    
         
             
                    return size - len * (size_t)'0';
         
     | 
| 
       211 
362 
     | 
    
         
             
                }
         
     | 
| 
       212 
363 
     | 
    
         
             
                return -(size - len * (size_t)'0');
         
     | 
| 
      
 364 
     | 
    
         
            +
            #endif /* HAVE_SIMD_NEON */
         
     | 
| 
       213 
365 
     | 
    
         
             
            }
         
     | 
| 
       214 
366 
     | 
    
         | 
| 
       215 
     | 
    
         
            -
            const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus,  
     | 
| 
      
 367 
     | 
    
         
            +
            const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, size_t *lenp) {
         
     | 
| 
       216 
368 
     | 
    
         
             
                const char *str = NULL;
         
     | 
| 
       217 
369 
     | 
    
         | 
| 
       218 
370 
     | 
    
         
             
                if (AutoNan == opt) {
         
     | 
| 
         @@ -477,7 +629,7 @@ void oj_dump_time(VALUE obj, Out out, int withZone) { 
     | 
|
| 
       477 
629 
     | 
    
         
             
            void oj_dump_ruby_time(VALUE obj, Out out) {
         
     | 
| 
       478 
630 
     | 
    
         
             
                volatile VALUE rstr = oj_safe_string_convert(obj);
         
     | 
| 
       479 
631 
     | 
    
         | 
| 
       480 
     | 
    
         
            -
                oj_dump_cstr(RSTRING_PTR(rstr),  
     | 
| 
      
 632 
     | 
    
         
            +
                oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
         
     | 
| 
       481 
633 
     | 
    
         
             
            }
         
     | 
| 
       482 
634 
     | 
    
         | 
| 
       483 
635 
     | 
    
         
             
            void oj_dump_xml_time(VALUE obj, Out out) {
         
     | 
| 
         @@ -711,13 +863,13 @@ void oj_dump_str(VALUE obj, int depth, Out out, bool as_ok) { 
     | 
|
| 
       711 
863 
     | 
    
         
             
                    rb_encoding *enc = rb_enc_from_index(idx);
         
     | 
| 
       712 
864 
     | 
    
         
             
                    obj              = rb_str_conv_enc(obj, enc, oj_utf8_encoding);
         
     | 
| 
       713 
865 
     | 
    
         
             
                }
         
     | 
| 
       714 
     | 
    
         
            -
                oj_dump_cstr(RSTRING_PTR(obj),  
     | 
| 
      
 866 
     | 
    
         
            +
                oj_dump_cstr(RSTRING_PTR(obj), RSTRING_LEN(obj), 0, 0, out);
         
     | 
| 
       715 
867 
     | 
    
         
             
            }
         
     | 
| 
       716 
868 
     | 
    
         | 
| 
       717 
869 
     | 
    
         
             
            void oj_dump_sym(VALUE obj, int depth, Out out, bool as_ok) {
         
     | 
| 
       718 
870 
     | 
    
         
             
                volatile VALUE s = rb_sym2str(obj);
         
     | 
| 
       719 
871 
     | 
    
         | 
| 
       720 
     | 
    
         
            -
                oj_dump_cstr(RSTRING_PTR(s),  
     | 
| 
      
 872 
     | 
    
         
            +
                oj_dump_cstr(RSTRING_PTR(s), RSTRING_LEN(s), 0, 0, out);
         
     | 
| 
       721 
873 
     | 
    
         
             
            }
         
     | 
| 
       722 
874 
     | 
    
         | 
| 
       723 
875 
     | 
    
         
             
            static void debug_raise(const char *orig, size_t cnt, int line) {
         
     | 
| 
         @@ -758,9 +910,120 @@ void oj_dump_raw_json(VALUE obj, int depth, Out out) { 
     | 
|
| 
       758 
910 
     | 
    
         
             
                }
         
     | 
| 
       759 
911 
     | 
    
         
             
            }
         
     | 
| 
       760 
912 
     | 
    
         | 
| 
      
 913 
     | 
    
         
            +
            #if defined(__clang__) || defined(__GNUC__)
         
     | 
| 
      
 914 
     | 
    
         
            +
            #define FORCE_INLINE __attribute__((always_inline))
         
     | 
| 
      
 915 
     | 
    
         
            +
            #else
         
     | 
| 
      
 916 
     | 
    
         
            +
            #define FORCE_INLINE
         
     | 
| 
      
 917 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 918 
     | 
    
         
            +
             
     | 
| 
      
 919 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 920 
     | 
    
         
            +
            typedef struct _neon_match_result {
         
     | 
| 
      
 921 
     | 
    
         
            +
                uint8x16_t needs_escape;
         
     | 
| 
      
 922 
     | 
    
         
            +
                bool       has_some_hibit;
         
     | 
| 
      
 923 
     | 
    
         
            +
                bool       do_unicode_validation;
         
     | 
| 
      
 924 
     | 
    
         
            +
            } neon_match_result;
         
     | 
| 
      
 925 
     | 
    
         
            +
             
     | 
| 
      
 926 
     | 
    
         
            +
            static inline FORCE_INLINE neon_match_result
         
     | 
| 
      
 927 
     | 
    
         
            +
            neon_update(const char *str, uint8x16x4_t *cmap_neon, int neon_table_size, bool do_unicode_validation, bool has_hi) {
         
     | 
| 
      
 928 
     | 
    
         
            +
                neon_match_result result = {.has_some_hibit = false, .do_unicode_validation = false};
         
     | 
| 
      
 929 
     | 
    
         
            +
             
     | 
| 
      
 930 
     | 
    
         
            +
                uint8x16_t chunk    = vld1q_u8((const unsigned char *)str);
         
     | 
| 
      
 931 
     | 
    
         
            +
                uint8x16_t tmp1     = vqtbl4q_u8(cmap_neon[0], chunk);
         
     | 
| 
      
 932 
     | 
    
         
            +
                uint8x16_t tmp2     = vqtbl4q_u8(cmap_neon[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
         
     | 
| 
      
 933 
     | 
    
         
            +
                result.needs_escape = vorrq_u8(tmp1, tmp2);
         
     | 
| 
      
 934 
     | 
    
         
            +
                if (neon_table_size > 2) {
         
     | 
| 
      
 935 
     | 
    
         
            +
                    uint8x16_t tmp3     = vqtbl4q_u8(cmap_neon[2], veorq_u8(chunk, vdupq_n_u8(0x80)));
         
     | 
| 
      
 936 
     | 
    
         
            +
                    uint8x16_t tmp4     = vqtbl4q_u8(cmap_neon[3], veorq_u8(chunk, vdupq_n_u8(0xc0)));
         
     | 
| 
      
 937 
     | 
    
         
            +
                    result.needs_escape = vorrq_u8(result.needs_escape, vorrq_u8(tmp4, tmp3));
         
     | 
| 
      
 938 
     | 
    
         
            +
                }
         
     | 
| 
      
 939 
     | 
    
         
            +
                if (has_hi && do_unicode_validation) {
         
     | 
| 
      
 940 
     | 
    
         
            +
                    uint8x16_t has_some_hibit    = vandq_u8(chunk, vdupq_n_u8(0x80));
         
     | 
| 
      
 941 
     | 
    
         
            +
                    result.has_some_hibit        = vmaxvq_u8(has_some_hibit) != 0;
         
     | 
| 
      
 942 
     | 
    
         
            +
                    result.do_unicode_validation = has_hi && do_unicode_validation && result.has_some_hibit;
         
     | 
| 
      
 943 
     | 
    
         
            +
                }
         
     | 
| 
      
 944 
     | 
    
         
            +
                return result;
         
     | 
| 
      
 945 
     | 
    
         
            +
            }
         
     | 
| 
      
 946 
     | 
    
         
            +
             
     | 
| 
      
 947 
     | 
    
         
            +
            #endif /* HAVE_SIMD_NEON */
         
     | 
| 
      
 948 
     | 
    
         
            +
             
     | 
| 
      
 949 
     | 
    
         
            +
            static inline FORCE_INLINE const char *process_character(char         action,
         
     | 
| 
      
 950 
     | 
    
         
            +
                                                                     const char  *str,
         
     | 
| 
      
 951 
     | 
    
         
            +
                                                                     const char  *end,
         
     | 
| 
      
 952 
     | 
    
         
            +
                                                                     Out          out,
         
     | 
| 
      
 953 
     | 
    
         
            +
                                                                     const char  *orig,
         
     | 
| 
      
 954 
     | 
    
         
            +
                                                                     bool         do_unicode_validation,
         
     | 
| 
      
 955 
     | 
    
         
            +
                                                                     const char **check_start_) {
         
     | 
| 
      
 956 
     | 
    
         
            +
                const char *check_start = *check_start_;
         
     | 
| 
      
 957 
     | 
    
         
            +
                switch (action) {
         
     | 
| 
      
 958 
     | 
    
         
            +
                case '1':
         
     | 
| 
      
 959 
     | 
    
         
            +
                    if (do_unicode_validation && check_start <= str) {
         
     | 
| 
      
 960 
     | 
    
         
            +
                        if (0 != (0x80 & (uint8_t)*str)) {
         
     | 
| 
      
 961 
     | 
    
         
            +
                            if (0xC0 == (0xC0 & (uint8_t)*str)) {
         
     | 
| 
      
 962 
     | 
    
         
            +
                                *check_start_ = check_unicode(str, end, orig);
         
     | 
| 
      
 963 
     | 
    
         
            +
                            } else {
         
     | 
| 
      
 964 
     | 
    
         
            +
                                raise_invalid_unicode(orig, (int)(end - orig), (int)(str - orig));
         
     | 
| 
      
 965 
     | 
    
         
            +
                            }
         
     | 
| 
      
 966 
     | 
    
         
            +
                        }
         
     | 
| 
      
 967 
     | 
    
         
            +
                    }
         
     | 
| 
      
 968 
     | 
    
         
            +
                    *out->cur++ = *str;
         
     | 
| 
      
 969 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 970 
     | 
    
         
            +
                case '2':
         
     | 
| 
      
 971 
     | 
    
         
            +
                    *out->cur++ = '\\';
         
     | 
| 
      
 972 
     | 
    
         
            +
                    switch (*str) {
         
     | 
| 
      
 973 
     | 
    
         
            +
                    case '\\': *out->cur++ = '\\'; break;
         
     | 
| 
      
 974 
     | 
    
         
            +
                    case '\b': *out->cur++ = 'b'; break;
         
     | 
| 
      
 975 
     | 
    
         
            +
                    case '\t': *out->cur++ = 't'; break;
         
     | 
| 
      
 976 
     | 
    
         
            +
                    case '\n': *out->cur++ = 'n'; break;
         
     | 
| 
      
 977 
     | 
    
         
            +
                    case '\f': *out->cur++ = 'f'; break;
         
     | 
| 
      
 978 
     | 
    
         
            +
                    case '\r': *out->cur++ = 'r'; break;
         
     | 
| 
      
 979 
     | 
    
         
            +
                    default: *out->cur++ = *str; break;
         
     | 
| 
      
 980 
     | 
    
         
            +
                    }
         
     | 
| 
      
 981 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 982 
     | 
    
         
            +
                case '3':  // Unicode
         
     | 
| 
      
 983 
     | 
    
         
            +
                    if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
         
     | 
| 
      
 984 
     | 
    
         
            +
                        if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
         
     | 
| 
      
 985 
     | 
    
         
            +
                            str = dump_unicode(str, end, out, orig);
         
     | 
| 
      
 986 
     | 
    
         
            +
                        } else {
         
     | 
| 
      
 987 
     | 
    
         
            +
                            *check_start_ = check_unicode(str, end, orig);
         
     | 
| 
      
 988 
     | 
    
         
            +
                            *out->cur++   = *str;
         
     | 
| 
      
 989 
     | 
    
         
            +
                        }
         
     | 
| 
      
 990 
     | 
    
         
            +
                        break;
         
     | 
| 
      
 991 
     | 
    
         
            +
                    }
         
     | 
| 
      
 992 
     | 
    
         
            +
                    str = dump_unicode(str, end, out, orig);
         
     | 
| 
      
 993 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 994 
     | 
    
         
            +
                case '6':  // control characters
         
     | 
| 
      
 995 
     | 
    
         
            +
                    if (*(uint8_t *)str < 0x80) {
         
     | 
| 
      
 996 
     | 
    
         
            +
                        if (0 == (uint8_t)*str && out->opts->dump_opts.omit_null_byte) {
         
     | 
| 
      
 997 
     | 
    
         
            +
                            break;
         
     | 
| 
      
 998 
     | 
    
         
            +
                        }
         
     | 
| 
      
 999 
     | 
    
         
            +
                        APPEND_CHARS(out->cur, "\\u00", 4);
         
     | 
| 
      
 1000 
     | 
    
         
            +
                        dump_hex((uint8_t)*str, out);
         
     | 
| 
      
 1001 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 1002 
     | 
    
         
            +
                        if (0xe2 == (uint8_t)*str && do_unicode_validation && 2 <= end - str) {
         
     | 
| 
      
 1003 
     | 
    
         
            +
                            if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
         
     | 
| 
      
 1004 
     | 
    
         
            +
                                str = dump_unicode(str, end, out, orig);
         
     | 
| 
      
 1005 
     | 
    
         
            +
                            } else {
         
     | 
| 
      
 1006 
     | 
    
         
            +
                                *check_start_ = check_unicode(str, end, orig);
         
     | 
| 
      
 1007 
     | 
    
         
            +
                                *out->cur++   = *str;
         
     | 
| 
      
 1008 
     | 
    
         
            +
                            }
         
     | 
| 
      
 1009 
     | 
    
         
            +
                            break;
         
     | 
| 
      
 1010 
     | 
    
         
            +
                        }
         
     | 
| 
      
 1011 
     | 
    
         
            +
                        str = dump_unicode(str, end, out, orig);
         
     | 
| 
      
 1012 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1013 
     | 
    
         
            +
                    break;
         
     | 
| 
      
 1014 
     | 
    
         
            +
                default: break;  // ignore, should never happen if the table is correct
         
     | 
| 
      
 1015 
     | 
    
         
            +
                }
         
     | 
| 
      
 1016 
     | 
    
         
            +
             
     | 
| 
      
 1017 
     | 
    
         
            +
                return str;
         
     | 
| 
      
 1018 
     | 
    
         
            +
            }
         
     | 
| 
      
 1019 
     | 
    
         
            +
             
     | 
| 
       761 
1020 
     | 
    
         
             
            void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out out) {
         
     | 
| 
       762 
     | 
    
         
            -
                size_t 
     | 
| 
       763 
     | 
    
         
            -
                char 
     | 
| 
      
 1021 
     | 
    
         
            +
                size_t size;
         
     | 
| 
      
 1022 
     | 
    
         
            +
                char  *cmap;
         
     | 
| 
      
 1023 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 1024 
     | 
    
         
            +
                uint8x16x4_t *cmap_neon       = NULL;
         
     | 
| 
      
 1025 
     | 
    
         
            +
                int           neon_table_size = 0;
         
     | 
| 
      
 1026 
     | 
    
         
            +
            #endif /* HAVE_SIMD_NEON */
         
     | 
| 
       764 
1027 
     | 
    
         
             
                const char *orig                  = str;
         
     | 
| 
       765 
1028 
     | 
    
         
             
                bool        has_hi                = false;
         
     | 
| 
       766 
1029 
     | 
    
         
             
                bool        do_unicode_validation = false;
         
     | 
| 
         @@ -792,7 +1055,11 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou 
     | 
|
| 
       792 
1055 
     | 
    
         
             
                    long sz;
         
     | 
| 
       793 
1056 
     | 
    
         | 
| 
       794 
1057 
     | 
    
         
             
                    cmap = rails_xss_friendly_chars;
         
     | 
| 
       795 
     | 
    
         
            -
             
     | 
| 
      
 1058 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 1059 
     | 
    
         
            +
                    cmap_neon       = rails_xss_friendly_chars_neon;
         
     | 
| 
      
 1060 
     | 
    
         
            +
                    neon_table_size = 4;
         
     | 
| 
      
 1061 
     | 
    
         
            +
            #endif /* HAVE_NEON_SIMD */
         
     | 
| 
      
 1062 
     | 
    
         
            +
                    sz = rails_xss_friendly_size((uint8_t *)str, cnt);
         
     | 
| 
       796 
1063 
     | 
    
         
             
                    if (sz < 0) {
         
     | 
| 
       797 
1064 
     | 
    
         
             
                        has_hi = true;
         
     | 
| 
       798 
1065 
     | 
    
         
             
                        size   = (size_t)-sz;
         
     | 
| 
         @@ -805,7 +1072,11 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou 
     | 
|
| 
       805 
1072 
     | 
    
         
             
                case RailsEsc: {
         
     | 
| 
       806 
1073 
     | 
    
         
             
                    long sz;
         
     | 
| 
       807 
1074 
     | 
    
         
             
                    cmap = rails_friendly_chars;
         
     | 
| 
       808 
     | 
    
         
            -
             
     | 
| 
      
 1075 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 1076 
     | 
    
         
            +
                    cmap_neon       = rails_friendly_chars_neon;
         
     | 
| 
      
 1077 
     | 
    
         
            +
                    neon_table_size = 2;
         
     | 
| 
      
 1078 
     | 
    
         
            +
            #endif /* HAVE_NEON_SIMD */
         
     | 
| 
      
 1079 
     | 
    
         
            +
                    sz = rails_friendly_size((uint8_t *)str, cnt);
         
     | 
| 
       809 
1080 
     | 
    
         
             
                    if (sz < 0) {
         
     | 
| 
       810 
1081 
     | 
    
         
             
                        has_hi = true;
         
     | 
| 
       811 
1082 
     | 
    
         
             
                        size   = (size_t)-sz;
         
     | 
| 
         @@ -816,7 +1087,12 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou 
     | 
|
| 
       816 
1087 
     | 
    
         
             
                    break;
         
     | 
| 
       817 
1088 
     | 
    
         
             
                }
         
     | 
| 
       818 
1089 
     | 
    
         
             
                case JSONEsc:
         
     | 
| 
       819 
     | 
    
         
            -
                default: cmap = hibit_friendly_chars; 
     | 
| 
      
 1090 
     | 
    
         
            +
                default: cmap = hibit_friendly_chars;
         
     | 
| 
      
 1091 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 1092 
     | 
    
         
            +
                    cmap_neon       = hibit_friendly_chars_neon;
         
     | 
| 
      
 1093 
     | 
    
         
            +
                    neon_table_size = 2;
         
     | 
| 
      
 1094 
     | 
    
         
            +
            #endif /* HAVE_NEON_SIMD */
         
     | 
| 
      
 1095 
     | 
    
         
            +
                    size = hibit_friendly_size((uint8_t *)str, cnt);
         
     | 
| 
       820 
1096 
     | 
    
         
             
                }
         
     | 
| 
       821 
1097 
     | 
    
         
             
                assure_size(out, size + BUFFER_EXTRA);
         
     | 
| 
       822 
1098 
     | 
    
         
             
                *out->cur++ = '"';
         
     | 
| 
         @@ -842,71 +1118,91 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou 
     | 
|
| 
       842 
1118 
     | 
    
         
             
                    if (is_sym) {
         
     | 
| 
       843 
1119 
     | 
    
         
             
                        *out->cur++ = ':';
         
     | 
| 
       844 
1120 
     | 
    
         
             
                    }
         
     | 
| 
       845 
     | 
    
         
            -
             
     | 
| 
       846 
     | 
    
         
            -
             
     | 
| 
       847 
     | 
    
         
            -
             
     | 
| 
       848 
     | 
    
         
            -
             
     | 
| 
       849 
     | 
    
         
            -
             
     | 
| 
       850 
     | 
    
         
            -
             
     | 
| 
       851 
     | 
    
         
            -
             
     | 
| 
       852 
     | 
    
         
            -
             
     | 
| 
       853 
     | 
    
         
            -
             
     | 
| 
       854 
     | 
    
         
            -
             
     | 
| 
       855 
     | 
    
         
            -
             
     | 
| 
       856 
     | 
    
         
            -
             
     | 
| 
       857 
     | 
    
         
            -
             
     | 
| 
       858 
     | 
    
         
            -
             
     | 
| 
       859 
     | 
    
         
            -
             
     | 
| 
       860 
     | 
    
         
            -
             
     | 
| 
       861 
     | 
    
         
            -
                             
     | 
| 
       862 
     | 
    
         
            -
                             
     | 
| 
       863 
     | 
    
         
            -
             
     | 
| 
       864 
     | 
    
         
            -
             
     | 
| 
       865 
     | 
    
         
            -
             
     | 
| 
       866 
     | 
    
         
            -
                             
     | 
| 
       867 
     | 
    
         
            -
             
     | 
| 
       868 
     | 
    
         
            -
             
     | 
| 
       869 
     | 
    
         
            -
             
     | 
| 
       870 
     | 
    
         
            -
             
     | 
| 
       871 
     | 
    
         
            -
             
     | 
| 
       872 
     | 
    
         
            -
                             
     | 
| 
       873 
     | 
    
         
            -
                                if (0x80 == (uint8_t)str[1] && (0xa8 == (uint8_t)str[2] || 0xa9 == (uint8_t)str[2])) {
         
     | 
| 
       874 
     | 
    
         
            -
                                    str = dump_unicode(str, end, out, orig);
         
     | 
| 
       875 
     | 
    
         
            -
                                } else {
         
     | 
| 
       876 
     | 
    
         
            -
                                    check_start = check_unicode(str, end, orig);
         
     | 
| 
       877 
     | 
    
         
            -
                                    *out->cur++ = *str;
         
     | 
| 
       878 
     | 
    
         
            -
                                }
         
     | 
| 
      
 1121 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 1122 
     | 
    
         
            +
                    const char *chunk_start;
         
     | 
| 
      
 1123 
     | 
    
         
            +
                    const char *chunk_end;
         
     | 
| 
      
 1124 
     | 
    
         
            +
                    const char *cursor   = str;
         
     | 
| 
      
 1125 
     | 
    
         
            +
                    bool        use_neon = (cmap_neon != NULL && cnt >= (sizeof(uint8x16_t))) ? true : false;
         
     | 
| 
      
 1126 
     | 
    
         
            +
                    char        matches[16];
         
     | 
| 
      
 1127 
     | 
    
         
            +
            #define SEARCH_FLUSH                                  \
         
     | 
| 
      
 1128 
     | 
    
         
            +
                if (str > cursor) {                               \
         
     | 
| 
      
 1129 
     | 
    
         
            +
                    APPEND_CHARS(out->cur, cursor, str - cursor); \
         
     | 
| 
      
 1130 
     | 
    
         
            +
                    cursor = str;                                 \
         
     | 
| 
      
 1131 
     | 
    
         
            +
                }
         
     | 
| 
      
 1132 
     | 
    
         
            +
             
     | 
| 
      
 1133 
     | 
    
         
            +
            #endif /* HAVE_SIMD_NEON */
         
     | 
| 
      
 1134 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 1135 
     | 
    
         
            +
                    if (use_neon) {
         
     | 
| 
      
 1136 
     | 
    
         
            +
                        while (str < end) {
         
     | 
| 
      
 1137 
     | 
    
         
            +
                            const char *chunk_ptr = NULL;
         
     | 
| 
      
 1138 
     | 
    
         
            +
                            if (str + sizeof(uint8x16_t) <= end) {
         
     | 
| 
      
 1139 
     | 
    
         
            +
                                chunk_ptr   = str;
         
     | 
| 
      
 1140 
     | 
    
         
            +
                                chunk_start = str;
         
     | 
| 
      
 1141 
     | 
    
         
            +
                                chunk_end   = str + sizeof(uint8x16_t);
         
     | 
| 
      
 1142 
     | 
    
         
            +
                            } else if ((end - str) >= SIMD_MINIMUM_THRESHOLD) {
         
     | 
| 
      
 1143 
     | 
    
         
            +
                                memset(out->cur, 'A', sizeof(uint8x16_t));
         
     | 
| 
      
 1144 
     | 
    
         
            +
                                memcpy(out->cur, str, (end - str));
         
     | 
| 
      
 1145 
     | 
    
         
            +
                                chunk_ptr   = out->cur;
         
     | 
| 
      
 1146 
     | 
    
         
            +
                                chunk_start = str;
         
     | 
| 
      
 1147 
     | 
    
         
            +
                                chunk_end   = end;
         
     | 
| 
      
 1148 
     | 
    
         
            +
                            } else {
         
     | 
| 
       879 
1149 
     | 
    
         
             
                                break;
         
     | 
| 
       880 
1150 
     | 
    
         
             
                            }
         
     | 
| 
       881 
     | 
    
         
            -
                             
     | 
| 
       882 
     | 
    
         
            -
             
     | 
| 
       883 
     | 
    
         
            -
             
     | 
| 
       884 
     | 
    
         
            -
             
     | 
| 
       885 
     | 
    
         
            -
             
     | 
| 
       886 
     | 
    
         
            -
             
     | 
| 
       887 
     | 
    
         
            -
                                 
     | 
| 
       888 
     | 
    
         
            -
                                 
     | 
| 
       889 
     | 
    
         
            -
                                 
     | 
| 
       890 
     | 
    
         
            -
             
     | 
| 
       891 
     | 
    
         
            -
                                 
     | 
| 
       892 
     | 
    
         
            -
             
     | 
| 
       893 
     | 
    
         
            -
             
     | 
| 
       894 
     | 
    
         
            -
             
     | 
| 
       895 
     | 
    
         
            -
             
     | 
| 
       896 
     | 
    
         
            -
                                         
     | 
| 
      
 1151 
     | 
    
         
            +
                            neon_match_result result = neon_update(chunk_ptr,
         
     | 
| 
      
 1152 
     | 
    
         
            +
                                                                   cmap_neon,
         
     | 
| 
      
 1153 
     | 
    
         
            +
                                                                   neon_table_size,
         
     | 
| 
      
 1154 
     | 
    
         
            +
                                                                   do_unicode_validation,
         
     | 
| 
      
 1155 
     | 
    
         
            +
                                                                   has_hi);
         
     | 
| 
      
 1156 
     | 
    
         
            +
                            if ((result.do_unicode_validation) || vmaxvq_u8(result.needs_escape) != 0) {
         
     | 
| 
      
 1157 
     | 
    
         
            +
                                SEARCH_FLUSH;
         
     | 
| 
      
 1158 
     | 
    
         
            +
                                uint8x16_t actions     = vaddq_u8(result.needs_escape, vdupq_n_u8('1'));
         
     | 
| 
      
 1159 
     | 
    
         
            +
                                uint8_t    num_matches = vaddvq_u8(vandq_u8(result.needs_escape, vdupq_n_u8(0x1)));
         
     | 
| 
      
 1160 
     | 
    
         
            +
                                vst1q_u8((unsigned char *)matches, actions);
         
     | 
| 
      
 1161 
     | 
    
         
            +
                                bool process_each = result.do_unicode_validation || (num_matches > sizeof(uint8x16_t) / 2);
         
     | 
| 
      
 1162 
     | 
    
         
            +
                                // If no byte in this chunk had the high bit set then we can skip
         
     | 
| 
      
 1163 
     | 
    
         
            +
                                // all of the '1' bytes by directly copying them to the output.
         
     | 
| 
      
 1164 
     | 
    
         
            +
                                if (!process_each) {
         
     | 
| 
      
 1165 
     | 
    
         
            +
                                    while (str < chunk_end) {
         
     | 
| 
      
 1166 
     | 
    
         
            +
                                        long i = str - chunk_start;
         
     | 
| 
      
 1167 
     | 
    
         
            +
                                        char action;
         
     | 
| 
      
 1168 
     | 
    
         
            +
                                        while (str < chunk_end && (action = matches[i++]) == '1') {
         
     | 
| 
      
 1169 
     | 
    
         
            +
                                            *out->cur++ = *str++;
         
     | 
| 
      
 1170 
     | 
    
         
            +
                                        }
         
     | 
| 
      
 1171 
     | 
    
         
            +
                                        cursor = str;
         
     | 
| 
      
 1172 
     | 
    
         
            +
                                        if (str >= chunk_end) {
         
     | 
| 
      
 1173 
     | 
    
         
            +
                                            break;
         
     | 
| 
      
 1174 
     | 
    
         
            +
                                        }
         
     | 
| 
      
 1175 
     | 
    
         
            +
                                        str = process_character(action, str, end, out, orig, do_unicode_validation, &check_start);
         
     | 
| 
      
 1176 
     | 
    
         
            +
                                        str++;
         
     | 
| 
      
 1177 
     | 
    
         
            +
                                    }
         
     | 
| 
      
 1178 
     | 
    
         
            +
                                } else {
         
     | 
| 
      
 1179 
     | 
    
         
            +
                                    while (str < chunk_end) {
         
     | 
| 
      
 1180 
     | 
    
         
            +
                                        long match_index = str - chunk_start;
         
     | 
| 
      
 1181 
     | 
    
         
            +
                                        str              = process_character(matches[match_index],
         
     | 
| 
      
 1182 
     | 
    
         
            +
                                                                str,
         
     | 
| 
      
 1183 
     | 
    
         
            +
                                                                end,
         
     | 
| 
      
 1184 
     | 
    
         
            +
                                                                out,
         
     | 
| 
      
 1185 
     | 
    
         
            +
                                                                orig,
         
     | 
| 
      
 1186 
     | 
    
         
            +
                                                                do_unicode_validation,
         
     | 
| 
      
 1187 
     | 
    
         
            +
                                                                &check_start);
         
     | 
| 
      
 1188 
     | 
    
         
            +
                                        str++;
         
     | 
| 
       897 
1189 
     | 
    
         
             
                                    }
         
     | 
| 
       898 
     | 
    
         
            -
                                    break;
         
     | 
| 
       899 
1190 
     | 
    
         
             
                                }
         
     | 
| 
       900 
     | 
    
         
            -
                                 
     | 
| 
      
 1191 
     | 
    
         
            +
                                cursor = str;
         
     | 
| 
      
 1192 
     | 
    
         
            +
                                continue;
         
     | 
| 
       901 
1193 
     | 
    
         
             
                            }
         
     | 
| 
       902 
     | 
    
         
            -
                             
     | 
| 
       903 
     | 
    
         
            -
                        default: break;  // ignore, should never happen if the table is correct
         
     | 
| 
      
 1194 
     | 
    
         
            +
                            str = chunk_end;
         
     | 
| 
       904 
1195 
     | 
    
         
             
                        }
         
     | 
| 
      
 1196 
     | 
    
         
            +
                        SEARCH_FLUSH;
         
     | 
| 
      
 1197 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1198 
     | 
    
         
            +
            #endif /* HAVE_SIMD_NEON */
         
     | 
| 
      
 1199 
     | 
    
         
            +
                    for (; str < end; str++) {
         
     | 
| 
      
 1200 
     | 
    
         
            +
                        str = process_character(cmap[(uint8_t)*str], str, end, out, orig, do_unicode_validation, &check_start);
         
     | 
| 
       905 
1201 
     | 
    
         
             
                    }
         
     | 
| 
       906 
1202 
     | 
    
         
             
                    *out->cur++ = '"';
         
     | 
| 
       907 
1203 
     | 
    
         
             
                }
         
     | 
| 
       908 
1204 
     | 
    
         
             
                if (do_unicode_validation && 0 < str - orig && 0 != (0x80 & *(str - 1))) {
         
     | 
| 
       909 
     | 
    
         
            -
                    uint8_t c = (uint8_t) 
     | 
| 
      
 1205 
     | 
    
         
            +
                    uint8_t c = (uint8_t)*(str - 1);
         
     | 
| 
       910 
1206 
     | 
    
         
             
                    int     i;
         
     | 
| 
       911 
1207 
     | 
    
         
             
                    int     scnt = (int)(str - orig);
         
     | 
| 
       912 
1208 
     | 
    
         | 
| 
         @@ -957,7 +1253,7 @@ void oj_dump_class(VALUE obj, int depth, Out out, bool as_ok) { 
     | 
|
| 
       957 
1253 
     | 
    
         
             
            void oj_dump_obj_to_s(VALUE obj, Out out) {
         
     | 
| 
       958 
1254 
     | 
    
         
             
                volatile VALUE rstr = oj_safe_string_convert(obj);
         
     | 
| 
       959 
1255 
     | 
    
         | 
| 
       960 
     | 
    
         
            -
                oj_dump_cstr(RSTRING_PTR(rstr),  
     | 
| 
      
 1256 
     | 
    
         
            +
                oj_dump_cstr(RSTRING_PTR(rstr), RSTRING_LEN(rstr), 0, 0, out);
         
     | 
| 
       961 
1257 
     | 
    
         
             
            }
         
     | 
| 
       962 
1258 
     | 
    
         | 
| 
       963 
1259 
     | 
    
         
             
            void oj_dump_raw(const char *str, size_t cnt, Out out) {
         
     | 
| 
         @@ -1092,7 +1388,7 @@ void oj_dump_fixnum(VALUE obj, int depth, Out out, bool as_ok) { 
     | 
|
| 
       1092 
1388 
     | 
    
         | 
| 
       1093 
1389 
     | 
    
         
             
            void oj_dump_bignum(VALUE obj, int depth, Out out, bool as_ok) {
         
     | 
| 
       1094 
1390 
     | 
    
         
             
                volatile VALUE rs             = rb_big2str(obj, 10);
         
     | 
| 
       1095 
     | 
    
         
            -
                 
     | 
| 
      
 1391 
     | 
    
         
            +
                size_t         cnt            = RSTRING_LEN(rs);
         
     | 
| 
       1096 
1392 
     | 
    
         
             
                bool           dump_as_string = false;
         
     | 
| 
       1097 
1393 
     | 
    
         | 
| 
       1098 
1394 
     | 
    
         
             
                if (out->opts->int_range_max != 0 || out->opts->int_range_min != 0) {  // Bignum cannot be inside of Fixnum range
         
     | 
| 
         @@ -1114,7 +1410,7 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) { 
     | 
|
| 
       1114 
1410 
     | 
    
         
             
                char   buf[64];
         
     | 
| 
       1115 
1411 
     | 
    
         
             
                char  *b;
         
     | 
| 
       1116 
1412 
     | 
    
         
             
                double d   = rb_num2dbl(obj);
         
     | 
| 
       1117 
     | 
    
         
            -
                 
     | 
| 
      
 1413 
     | 
    
         
            +
                size_t cnt = 0;
         
     | 
| 
       1118 
1414 
     | 
    
         | 
| 
       1119 
1415 
     | 
    
         
             
                if (0.0 == d) {
         
     | 
| 
       1120 
1416 
     | 
    
         
             
                    b    = buf;
         
     | 
| 
         @@ -1225,7 +1521,7 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) { 
     | 
|
| 
       1225 
1521 
     | 
    
         
             
                } else if (0 == out->opts->float_prec) {
         
     | 
| 
       1226 
1522 
     | 
    
         
             
                    volatile VALUE rstr = oj_safe_string_convert(obj);
         
     | 
| 
       1227 
1523 
     | 
    
         | 
| 
       1228 
     | 
    
         
            -
                    cnt =  
     | 
| 
      
 1524 
     | 
    
         
            +
                    cnt = RSTRING_LEN(rstr);
         
     | 
| 
       1229 
1525 
     | 
    
         
             
                    if ((int)sizeof(buf) <= cnt) {
         
     | 
| 
       1230 
1526 
     | 
    
         
             
                        cnt = sizeof(buf) - 1;
         
     | 
| 
       1231 
1527 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -1239,8 +1535,8 @@ void oj_dump_float(VALUE obj, int depth, Out out, bool as_ok) { 
     | 
|
| 
       1239 
1535 
     | 
    
         
             
                *out->cur = '\0';
         
     | 
| 
       1240 
1536 
     | 
    
         
             
            }
         
     | 
| 
       1241 
1537 
     | 
    
         | 
| 
       1242 
     | 
    
         
            -
             
     | 
| 
       1243 
     | 
    
         
            -
                 
     | 
| 
      
 1538 
     | 
    
         
            +
            size_t oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format) {
         
     | 
| 
      
 1539 
     | 
    
         
            +
                size_t cnt = snprintf(buf, blen, format, d);
         
     | 
| 
       1244 
1540 
     | 
    
         | 
| 
       1245 
1541 
     | 
    
         
             
                // Round off issues at 16 significant digits so check for obvious ones of
         
     | 
| 
       1246 
1542 
     | 
    
         
             
                // 0001 and 9999.
         
     | 
| 
         @@ -1248,7 +1544,7 @@ int oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char 
     | 
|
| 
       1248 
1544 
     | 
    
         
             
                    volatile VALUE rstr = oj_safe_string_convert(obj);
         
     | 
| 
       1249 
1545 
     | 
    
         | 
| 
       1250 
1546 
     | 
    
         
             
                    strcpy(buf, RSTRING_PTR(rstr));
         
     | 
| 
       1251 
     | 
    
         
            -
                    cnt =  
     | 
| 
      
 1547 
     | 
    
         
            +
                    cnt = RSTRING_LEN(rstr);
         
     | 
| 
       1252 
1548 
     | 
    
         
             
                }
         
     | 
| 
       1253 
1549 
     | 
    
         
             
                return cnt;
         
     | 
| 
       1254 
1550 
     | 
    
         
             
            }
         
     | 
    
        data/ext/oj/dump.h
    CHANGED
    
    | 
         @@ -7,12 +7,17 @@ 
     | 
|
| 
       7 
7 
     | 
    
         
             
            #include <ruby.h>
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         
             
            #include "oj.h"
         
     | 
| 
      
 10 
     | 
    
         
            +
            #include "simd.h"
         
     | 
| 
       10 
11 
     | 
    
         | 
| 
       11 
12 
     | 
    
         
             
            #define MAX_DEPTH 1000
         
     | 
| 
       12 
13 
     | 
    
         | 
| 
       13 
14 
     | 
    
         
             
            // Extra padding at end of buffer.
         
     | 
| 
       14 
15 
     | 
    
         
             
            #define BUFFER_EXTRA 64
         
     | 
| 
       15 
16 
     | 
    
         | 
| 
      
 17 
     | 
    
         
            +
            #ifdef HAVE_SIMD_NEON
         
     | 
| 
      
 18 
     | 
    
         
            +
            extern void initialize_neon(void);
         
     | 
| 
      
 19 
     | 
    
         
            +
            #endif /* HAVE_SIMD_NEON */
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
       16 
21 
     | 
    
         
             
            extern void oj_dump_nil(VALUE obj, int depth, Out out, bool as_ok);
         
     | 
| 
       17 
22 
     | 
    
         
             
            extern void oj_dump_true(VALUE obj, int depth, Out out, bool as_ok);
         
     | 
| 
       18 
23 
     | 
    
         
             
            extern void oj_dump_false(VALUE obj, int depth, Out out, bool as_ok);
         
     | 
| 
         @@ -30,7 +35,7 @@ extern void oj_dump_xml_time(VALUE obj, Out out); 
     | 
|
| 
       30 
35 
     | 
    
         
             
            extern void oj_dump_time(VALUE obj, Out out, int withZone);
         
     | 
| 
       31 
36 
     | 
    
         
             
            extern void oj_dump_obj_to_s(VALUE obj, Out out);
         
     | 
| 
       32 
37 
     | 
    
         | 
| 
       33 
     | 
    
         
            -
            extern const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus,  
     | 
| 
      
 38 
     | 
    
         
            +
            extern const char *oj_nan_str(VALUE obj, int opt, int mode, bool plus, size_t *lenp);
         
     | 
| 
       34 
39 
     | 
    
         | 
| 
       35 
40 
     | 
    
         
             
            // initialize an out buffer with the provided stack allocated memory
         
     | 
| 
       36 
41 
     | 
    
         
             
            extern void oj_out_init(Out out);
         
     | 
| 
         @@ -53,7 +58,7 @@ extern void oj_dump_raw_json(VALUE obj, int depth, Out out); 
     | 
|
| 
       53 
58 
     | 
    
         
             
            extern VALUE oj_add_to_json(int argc, VALUE *argv, VALUE self);
         
     | 
| 
       54 
59 
     | 
    
         
             
            extern VALUE oj_remove_to_json(int argc, VALUE *argv, VALUE self);
         
     | 
| 
       55 
60 
     | 
    
         | 
| 
       56 
     | 
    
         
            -
            extern  
     | 
| 
      
 61 
     | 
    
         
            +
            extern size_t oj_dump_float_printf(char *buf, size_t blen, VALUE obj, double d, const char *format);
         
     | 
| 
       57 
62 
     | 
    
         | 
| 
       58 
63 
     | 
    
         
             
            extern time_t oj_sec_from_time_hard_way(VALUE obj);
         
     | 
| 
       59 
64 
     | 
    
         |