normal-grain-merge 0.0.2__cp313-cp313-win_amd64.whl → 0.1.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of normal-grain-merge might be problematic. Click here for more details.
- normal_grain_merge/normal_grain_merge.c +211 -164
- normal_grain_merge/normal_grain_merge.cp313-win_amd64.pyd +0 -0
- normal_grain_merge/normal_grain_merge.pyi +1 -1
- {normal_grain_merge-0.0.2.dist-info → normal_grain_merge-0.1.1.dist-info}/METADATA +14 -13
- normal_grain_merge-0.1.1.dist-info/RECORD +10 -0
- normal_grain_merge-0.0.2.dist-info/RECORD +0 -10
- {normal_grain_merge-0.0.2.dist-info → normal_grain_merge-0.1.1.dist-info}/WHEEL +0 -0
- {normal_grain_merge-0.0.2.dist-info → normal_grain_merge-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {normal_grain_merge-0.0.2.dist-info → normal_grain_merge-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -2,11 +2,17 @@
|
|
|
2
2
|
#include <stdio.h>
|
|
3
3
|
#include <math.h>
|
|
4
4
|
#include <float.h>
|
|
5
|
+
#include <stdint.h>
|
|
6
|
+
#include <string.h>
|
|
5
7
|
#include <Python.h>
|
|
6
8
|
#include <numpy/arrayobject.h>
|
|
7
9
|
#include <smmintrin.h>
|
|
8
10
|
#include <immintrin.h> /* AVX2 + SSE4.2 */
|
|
9
11
|
|
|
12
|
+
#if defined(__FMA__) || (defined(_MSC_VER) && defined(__AVX2__))
|
|
13
|
+
#define NGM_HAS_FMA 1
|
|
14
|
+
#endif
|
|
15
|
+
|
|
10
16
|
/* ----- Runtime CPU feature detection (GCC/Clang + MSVC) ----- */
|
|
11
17
|
#if defined(_MSC_VER)
|
|
12
18
|
#include <intrin.h>
|
|
@@ -126,8 +132,8 @@ static inline int check_shape_requirements(PyArrayObject *base,
|
|
|
126
132
|
}
|
|
127
133
|
*texture_has_alpha = (tc == 4);
|
|
128
134
|
|
|
129
|
-
if (PyArray_NDIM(skin) != 3 || PyArray_DIMS(skin)[2] !=
|
|
130
|
-
PyErr_SetString(PyExc_ValueError, "skin must have shape (H, W,
|
|
135
|
+
if (PyArray_NDIM(skin) != 3 || PyArray_DIMS(skin)[2] != 3) {
|
|
136
|
+
PyErr_SetString(PyExc_ValueError, "skin must have shape (H, W, 3)");
|
|
131
137
|
return 0;
|
|
132
138
|
}
|
|
133
139
|
if (PyArray_NDIM(im_alpha) != 2) {
|
|
@@ -184,10 +190,9 @@ static void kernel_scalar_rgb(const uint8_t *base, const uint8_t *texture,
|
|
|
184
190
|
const uint8_t t_g = texture[3*i+1];
|
|
185
191
|
const uint8_t t_b = texture[3*i+2];
|
|
186
192
|
|
|
187
|
-
const uint8_t s_r = skin[
|
|
188
|
-
const uint8_t s_g = skin[
|
|
189
|
-
const uint8_t s_b = skin[
|
|
190
|
-
const uint8_t s_a = skin[4*i+3];
|
|
193
|
+
const uint8_t s_r = skin[3*i+0];
|
|
194
|
+
const uint8_t s_g = skin[3*i+1];
|
|
195
|
+
const uint8_t s_b = skin[3*i+2];
|
|
191
196
|
|
|
192
197
|
const uint8_t a_im = im_alpha[i];
|
|
193
198
|
|
|
@@ -203,8 +208,6 @@ static void kernel_scalar_rgb(const uint8_t *base, const uint8_t *texture,
|
|
|
203
208
|
const float fs_r = s_r * (1.0f/255.0f);
|
|
204
209
|
const float fs_g = s_g * (1.0f/255.0f);
|
|
205
210
|
const float fs_b = s_b * (1.0f/255.0f);
|
|
206
|
-
const float fs_a = s_a * (1.0f/255.0f);
|
|
207
|
-
|
|
208
211
|
const float fa_im = a_im * (1.0f/255.0f);
|
|
209
212
|
|
|
210
213
|
/*
|
|
@@ -240,8 +243,8 @@ static void kernel_scalar_rgb(const uint8_t *base, const uint8_t *texture,
|
|
|
240
243
|
|
|
241
244
|
/* Normal merge
|
|
242
245
|
* n_out = gm_out * texture_alpha + base * inverse_tpa
|
|
243
|
-
*
|
|
244
|
-
* In this case, texture_alpha is
|
|
246
|
+
*
|
|
247
|
+
* In this case, texture_alpha is supplied by im_alpha since texture doesn't have an alpha channel here.
|
|
245
248
|
*/
|
|
246
249
|
fr = fr * fa_im + fb_r * fit_a;
|
|
247
250
|
fg = fg * fa_im + fb_g * fit_a;
|
|
@@ -267,10 +270,9 @@ static void kernel_scalar_rgba(const uint8_t *base, const uint8_t *texture,
|
|
|
267
270
|
const uint8_t t_b = texture[4*i+2];
|
|
268
271
|
const uint8_t t_a = texture[4*i+3]; /* present in RGBA branch */
|
|
269
272
|
|
|
270
|
-
const uint8_t s_r = skin[
|
|
271
|
-
const uint8_t s_g = skin[
|
|
272
|
-
const uint8_t s_b = skin[
|
|
273
|
-
const uint8_t s_a = skin[4*i+3];
|
|
273
|
+
const uint8_t s_r = skin[3*i+0];
|
|
274
|
+
const uint8_t s_g = skin[3*i+1];
|
|
275
|
+
const uint8_t s_b = skin[3*i+2];
|
|
274
276
|
|
|
275
277
|
const uint8_t a_im = im_alpha[i];
|
|
276
278
|
|
|
@@ -286,8 +288,6 @@ static void kernel_scalar_rgba(const uint8_t *base, const uint8_t *texture,
|
|
|
286
288
|
const float fs_r = s_r * (1.0f/255.0f);
|
|
287
289
|
const float fs_g = s_g * (1.0f/255.0f);
|
|
288
290
|
const float fs_b = s_b * (1.0f/255.0f);
|
|
289
|
-
const float fs_a = s_a * (1.0f/255.0f);
|
|
290
|
-
|
|
291
291
|
const float fa_im = a_im * (1.0f/255.0f);
|
|
292
292
|
|
|
293
293
|
/*
|
|
@@ -295,7 +295,7 @@ static void kernel_scalar_rgba(const uint8_t *base, const uint8_t *texture,
|
|
|
295
295
|
* normal grain merge *
|
|
296
296
|
**********************
|
|
297
297
|
*/
|
|
298
|
-
/* Merge texture
|
|
298
|
+
/* Merge texture alpha with the external mask */
|
|
299
299
|
|
|
300
300
|
/* texture_alpha = texture[..., 3] * im_alpha*/
|
|
301
301
|
ft_a = ft_a * fa_im;
|
|
@@ -341,34 +341,48 @@ static void kernel_scalar_rgba(const uint8_t *base, const uint8_t *texture,
|
|
|
341
341
|
}
|
|
342
342
|
|
|
343
343
|
/* ---------- AVX2 helpers ----------
|
|
344
|
-
Interleaved RGB(A) is awkward for SIMD.
|
|
345
|
-
|
|
346
|
-
You can later replace gathers with better deinterleaving if needed.
|
|
344
|
+
Interleaved RGB(A) is awkward for SIMD. We build 8-lane vectors per channel by
|
|
345
|
+
reusing the scalar u8x4 -> f32 helpers instead of relying on gathers.
|
|
347
346
|
*/
|
|
348
347
|
|
|
349
|
-
/*
|
|
350
|
-
static inline
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
348
|
+
/* Forward declaration; definition shared with the SSE helpers later in the file. */
|
|
349
|
+
static inline __m128 u8x4_to_unit_f32(uint8_t a, uint8_t b, uint8_t c, uint8_t d);
|
|
350
|
+
|
|
351
|
+
/* Build 8-lane [0,1] floats for one channel from interleaved RGB. No gathers. */
|
|
352
|
+
static inline __m256 load8_rgb_channel_to_unit_f32(const uint8_t *p, int ch /*0,1,2*/) {
|
|
353
|
+
/* pixel i, channel ch is at p[3*i + ch] */
|
|
354
|
+
__m128 lo = u8x4_to_unit_f32(p[3*0 + ch], p[3*1 + ch], p[3*2 + ch], p[3*3 + ch]);
|
|
355
|
+
__m128 hi = u8x4_to_unit_f32(p[3*4 + ch], p[3*5 + ch], p[3*6 + ch], p[3*7 + ch]);
|
|
356
|
+
return _mm256_set_m128(hi, lo);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
static inline __m256 load8_rgba_channel_to_unit_f32(const uint8_t *p, int ch /*0..3*/) {
|
|
360
|
+
__m128 lo = u8x4_to_unit_f32(p[4*0 + ch], p[4*1 + ch], p[4*2 + ch], p[4*3 + ch]);
|
|
361
|
+
__m128 hi = u8x4_to_unit_f32(p[4*4 + ch], p[4*5 + ch], p[4*6 + ch], p[4*7 + ch]);
|
|
362
|
+
return _mm256_set_m128(hi, lo);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
static inline __m256 mul_add_ps256(__m256 a, __m256 b, __m256 c) {
|
|
366
|
+
#ifdef __FMA__
|
|
367
|
+
return _mm256_fmadd_ps(a, b, c);
|
|
368
|
+
#else
|
|
369
|
+
return _mm256_add_ps(_mm256_mul_ps(a, b), c);
|
|
370
|
+
#endif
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
static inline __m256 fnmadd_ps256(__m256 a, __m256 b, __m256 c) {
|
|
374
|
+
#ifdef __FMA__
|
|
375
|
+
return _mm256_fnmadd_ps(a, b, c);
|
|
376
|
+
#else
|
|
377
|
+
return _mm256_sub_ps(c, _mm256_mul_ps(a, b));
|
|
378
|
+
#endif
|
|
365
379
|
}
|
|
366
380
|
|
|
367
381
|
/* Convert 8 consecutive u8 to float32 in [0,1] (for grayscale im_alpha). */
|
|
368
|
-
static inline __m256 load8_u8_to_unit_f32_avx2(const uint8_t *p) {
|
|
382
|
+
static inline __m256 load8_u8_to_unit_f32_avx2(const uint8_t *p, __m256 inv255) {
|
|
369
383
|
__m128i v8 = _mm_loadl_epi64((const __m128i*)p); /* 8 bytes -> XMM */
|
|
370
384
|
__m256i v32 = _mm256_cvtepu8_epi32(v8); /* widen to 8 x u32 */
|
|
371
|
-
return _mm256_mul_ps(_mm256_cvtepi32_ps(v32),
|
|
385
|
+
return _mm256_mul_ps(_mm256_cvtepi32_ps(v32), inv255);
|
|
372
386
|
}
|
|
373
387
|
|
|
374
388
|
static inline __m256 clamp01_ps(__m256 x) {
|
|
@@ -381,32 +395,74 @@ static inline __m256 nan_to_num_ps(__m256 x) {
|
|
|
381
395
|
return _mm256_blendv_ps(_mm256_set1_ps(0.0f), x, cmp);
|
|
382
396
|
}
|
|
383
397
|
|
|
384
|
-
/*
|
|
385
|
-
static inline void
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
398
|
+
/* Convert 4 float32 RGB vectors in [0,1] to uint8_t RGBRGBRGBRGB without branches. */
|
|
399
|
+
static inline void store_unit_f32_to_u8_rgb4(__m128 fr, __m128 fg, __m128 fb,
|
|
400
|
+
uint8_t *out_ptr) {
|
|
401
|
+
const __m128 scale = _mm_set1_ps(255.0f);
|
|
402
|
+
const __m128i zero = _mm_setzero_si128();
|
|
403
|
+
const __m128i max255 = _mm_set1_epi32(255);
|
|
404
|
+
|
|
405
|
+
__m128i ir = _mm_cvttps_epi32(_mm_mul_ps(fr, scale));
|
|
406
|
+
__m128i ig = _mm_cvttps_epi32(_mm_mul_ps(fg, scale));
|
|
407
|
+
__m128i ib = _mm_cvttps_epi32(_mm_mul_ps(fb, scale));
|
|
408
|
+
|
|
409
|
+
ir = _mm_min_epi32(_mm_max_epi32(ir, zero), max255);
|
|
410
|
+
ig = _mm_min_epi32(_mm_max_epi32(ig, zero), max255);
|
|
411
|
+
ib = _mm_min_epi32(_mm_max_epi32(ib, zero), max255);
|
|
412
|
+
|
|
413
|
+
__m128i ir16 = _mm_packus_epi32(ir, zero);
|
|
414
|
+
__m128i ig16 = _mm_packus_epi32(ig, zero);
|
|
415
|
+
__m128i ib16 = _mm_packus_epi32(ib, zero);
|
|
416
|
+
|
|
417
|
+
__m128i ir8 = _mm_packus_epi16(ir16, zero);
|
|
418
|
+
__m128i ig8 = _mm_packus_epi16(ig16, zero);
|
|
419
|
+
__m128i ib8 = _mm_packus_epi16(ib16, zero);
|
|
420
|
+
|
|
421
|
+
const __m128i mask_r = _mm_setr_epi8(
|
|
422
|
+
0, (char)0x80, (char)0x80, 1,
|
|
423
|
+
(char)0x80, (char)0x80, 2, (char)0x80,
|
|
424
|
+
(char)0x80, 3, (char)0x80, (char)0x80,
|
|
425
|
+
(char)0x80, (char)0x80, (char)0x80, (char)0x80);
|
|
426
|
+
const __m128i mask_g = _mm_setr_epi8(
|
|
427
|
+
(char)0x80, 0, (char)0x80, (char)0x80,
|
|
428
|
+
1, (char)0x80, (char)0x80, 2,
|
|
429
|
+
(char)0x80, (char)0x80, 3, (char)0x80,
|
|
430
|
+
(char)0x80, (char)0x80, (char)0x80, (char)0x80);
|
|
431
|
+
const __m128i mask_b = _mm_setr_epi8(
|
|
432
|
+
(char)0x80, (char)0x80, 0, (char)0x80,
|
|
433
|
+
(char)0x80, 1, (char)0x80, (char)0x80,
|
|
434
|
+
2, (char)0x80, (char)0x80, 3,
|
|
435
|
+
(char)0x80, (char)0x80, (char)0x80, (char)0x80);
|
|
436
|
+
|
|
437
|
+
__m128i packed = _mm_or_si128(
|
|
438
|
+
_mm_or_si128(_mm_shuffle_epi8(ir8, mask_r),
|
|
439
|
+
_mm_shuffle_epi8(ig8, mask_g)),
|
|
440
|
+
_mm_shuffle_epi8(ib8, mask_b));
|
|
441
|
+
|
|
442
|
+
_mm_storel_epi64((__m128i*)out_ptr, packed);
|
|
443
|
+
__m128i tail_vec = _mm_srli_si128(packed, 8);
|
|
444
|
+
uint32_t tail = (uint32_t)_mm_cvtsi128_si32(tail_vec);
|
|
445
|
+
memcpy(out_ptr + 8, &tail, sizeof(tail));
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
/* Pack 8 lanes of fr/fg/fb in [0,1] to 24 bytes RGBRGB... and store. */
|
|
449
|
+
static inline void store8_unit_f32_to_u8_rgb(__m256 fr, __m256 fg, __m256 fb, uint8_t *out) {
|
|
450
|
+
__m128 fr_lo = _mm256_castps256_ps128(fr);
|
|
451
|
+
__m128 fg_lo = _mm256_castps256_ps128(fg);
|
|
452
|
+
__m128 fb_lo = _mm256_castps256_ps128(fb);
|
|
453
|
+
store_unit_f32_to_u8_rgb4(fr_lo, fg_lo, fb_lo, out);
|
|
454
|
+
|
|
455
|
+
__m128 fr_hi = _mm256_extractf128_ps(fr, 1);
|
|
456
|
+
__m128 fg_hi = _mm256_extractf128_ps(fg, 1);
|
|
457
|
+
__m128 fb_hi = _mm256_extractf128_ps(fb, 1);
|
|
458
|
+
store_unit_f32_to_u8_rgb4(fr_hi, fg_hi, fb_hi, out + 12);
|
|
403
459
|
}
|
|
404
460
|
|
|
405
461
|
/* texture is RGB: texture_alpha = im_alpha broadcast, inverse_tpa = 1 - texture_alpha */
|
|
406
462
|
static void kernel_avx2_rgb(const uint8_t *base, const uint8_t *texture,
|
|
407
463
|
const uint8_t *skin, const uint8_t *im_alpha,
|
|
408
464
|
uint8_t *out, npy_intp pixels) {
|
|
409
|
-
const
|
|
465
|
+
const __m256 inv255 = _mm256_set1_ps(1.0f/255.0f);
|
|
410
466
|
const __m256 half = _mm256_set1_ps(0.5f);
|
|
411
467
|
const __m256 one = _mm256_set1_ps(1.0f);
|
|
412
468
|
const __m256 w = _mm256_set1_ps((float)SKIN_WEIGHT);
|
|
@@ -414,24 +470,28 @@ static void kernel_avx2_rgb(const uint8_t *base, const uint8_t *texture,
|
|
|
414
470
|
|
|
415
471
|
npy_intp i = 0;
|
|
416
472
|
for (; i + 8 <= pixels; i += 8) {
|
|
473
|
+
const uint8_t *base_blk = base + 3*i;
|
|
474
|
+
const uint8_t *tex_blk = texture + 3*i;
|
|
475
|
+
const uint8_t *skin_blk = skin + 3*i;
|
|
476
|
+
|
|
417
477
|
/* base RGB in [0,1] */
|
|
418
|
-
__m256 fb_r =
|
|
419
|
-
__m256 fb_g =
|
|
420
|
-
__m256 fb_b =
|
|
478
|
+
__m256 fb_r = load8_rgb_channel_to_unit_f32(base_blk, 0);
|
|
479
|
+
__m256 fb_g = load8_rgb_channel_to_unit_f32(base_blk, 1);
|
|
480
|
+
__m256 fb_b = load8_rgb_channel_to_unit_f32(base_blk, 2);
|
|
421
481
|
|
|
422
482
|
/* texture RGB in [0,1] */
|
|
423
|
-
__m256 ft_r =
|
|
424
|
-
__m256 ft_g =
|
|
425
|
-
__m256 ft_b =
|
|
483
|
+
__m256 ft_r = load8_rgb_channel_to_unit_f32(tex_blk, 0);
|
|
484
|
+
__m256 ft_g = load8_rgb_channel_to_unit_f32(tex_blk, 1);
|
|
485
|
+
__m256 ft_b = load8_rgb_channel_to_unit_f32(tex_blk, 2);
|
|
426
486
|
|
|
427
487
|
/* skin RGB in [0,1] */
|
|
428
|
-
__m256 fs_r =
|
|
429
|
-
__m256 fs_g =
|
|
430
|
-
__m256 fs_b =
|
|
488
|
+
__m256 fs_r = load8_rgb_channel_to_unit_f32(skin_blk, 0);
|
|
489
|
+
__m256 fs_g = load8_rgb_channel_to_unit_f32(skin_blk, 1);
|
|
490
|
+
__m256 fs_b = load8_rgb_channel_to_unit_f32(skin_blk, 2);
|
|
431
491
|
|
|
432
492
|
/* texture_alpha = im_alpha */
|
|
433
|
-
__m256 fa_im = load8_u8_to_unit_f32_avx2(im_alpha + i);
|
|
434
|
-
__m256 fit_a =
|
|
493
|
+
__m256 fa_im = load8_u8_to_unit_f32_avx2(im_alpha + i, inv255);
|
|
494
|
+
__m256 fit_a = fnmadd_ps256(fa_im, one, one);
|
|
435
495
|
|
|
436
496
|
/* gm_out = clip(texture + skin - 0.5) */
|
|
437
497
|
__m256 gm_r = clamp01_ps(_mm256_sub_ps(_mm256_add_ps(ft_r, fs_r), half));
|
|
@@ -439,14 +499,14 @@ static void kernel_avx2_rgb(const uint8_t *base, const uint8_t *texture,
|
|
|
439
499
|
__m256 gm_b = clamp01_ps(_mm256_sub_ps(_mm256_add_ps(ft_b, fs_b), half));
|
|
440
500
|
|
|
441
501
|
/* gm_out = gm_out * texture_alpha + texture * inverse_tpa */
|
|
442
|
-
gm_r =
|
|
443
|
-
gm_g =
|
|
444
|
-
gm_b =
|
|
502
|
+
gm_r = mul_add_ps256(gm_r, fa_im, _mm256_mul_ps(ft_r, fit_a));
|
|
503
|
+
gm_g = mul_add_ps256(gm_g, fa_im, _mm256_mul_ps(ft_g, fit_a));
|
|
504
|
+
gm_b = mul_add_ps256(gm_b, fa_im, _mm256_mul_ps(ft_b, fit_a));
|
|
445
505
|
|
|
446
506
|
/* gm_out = gm_out * (1 - w) + skin * w */
|
|
447
|
-
gm_r =
|
|
448
|
-
gm_g =
|
|
449
|
-
gm_b =
|
|
507
|
+
gm_r = mul_add_ps256(gm_r, invw, _mm256_mul_ps(fs_r, w));
|
|
508
|
+
gm_g = mul_add_ps256(gm_g, invw, _mm256_mul_ps(fs_g, w));
|
|
509
|
+
gm_b = mul_add_ps256(gm_b, invw, _mm256_mul_ps(fs_b, w));
|
|
450
510
|
|
|
451
511
|
/* nan_to_num */
|
|
452
512
|
gm_r = nan_to_num_ps(gm_r);
|
|
@@ -454,15 +514,15 @@ static void kernel_avx2_rgb(const uint8_t *base, const uint8_t *texture,
|
|
|
454
514
|
gm_b = nan_to_num_ps(gm_b);
|
|
455
515
|
|
|
456
516
|
/* n_out = gm_out * texture_alpha + base * inverse_tpa */
|
|
457
|
-
__m256 fr =
|
|
458
|
-
__m256 fg =
|
|
459
|
-
__m256 fb =
|
|
517
|
+
__m256 fr = mul_add_ps256(gm_r, fa_im, _mm256_mul_ps(fb_r, fit_a));
|
|
518
|
+
__m256 fg = mul_add_ps256(gm_g, fa_im, _mm256_mul_ps(fb_g, fit_a));
|
|
519
|
+
__m256 fb = mul_add_ps256(gm_b, fa_im, _mm256_mul_ps(fb_b, fit_a));
|
|
460
520
|
|
|
461
|
-
|
|
521
|
+
store8_unit_f32_to_u8_rgb(fr, fg, fb, out + 3*i);
|
|
462
522
|
}
|
|
463
523
|
|
|
464
524
|
if (i < pixels) {
|
|
465
|
-
kernel_scalar_rgb(base + 3*i, texture + 3*i, skin +
|
|
525
|
+
kernel_scalar_rgb(base + 3*i, texture + 3*i, skin + 3*i, im_alpha + i,
|
|
466
526
|
out + 3*i, pixels - i);
|
|
467
527
|
}
|
|
468
528
|
}
|
|
@@ -471,7 +531,7 @@ static void kernel_avx2_rgb(const uint8_t *base, const uint8_t *texture,
|
|
|
471
531
|
static void kernel_avx2_rgba(const uint8_t *base, const uint8_t *texture,
|
|
472
532
|
const uint8_t *skin, const uint8_t *im_alpha,
|
|
473
533
|
uint8_t *out, npy_intp pixels) {
|
|
474
|
-
const
|
|
534
|
+
const __m256 inv255 = _mm256_set1_ps(1.0f/255.0f);
|
|
475
535
|
const __m256 half = _mm256_set1_ps(0.5f);
|
|
476
536
|
const __m256 one = _mm256_set1_ps(1.0f);
|
|
477
537
|
const __m256 w = _mm256_set1_ps((float)SKIN_WEIGHT);
|
|
@@ -479,48 +539,52 @@ static void kernel_avx2_rgba(const uint8_t *base, const uint8_t *texture,
|
|
|
479
539
|
|
|
480
540
|
npy_intp i = 0;
|
|
481
541
|
for (; i + 8 <= pixels; i += 8) {
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
542
|
+
const uint8_t *base_blk = base + 3*i;
|
|
543
|
+
const uint8_t *tex_blk = texture + 4*i;
|
|
544
|
+
const uint8_t *skin_blk = skin + 3*i;
|
|
545
|
+
|
|
546
|
+
__m256 fb_r = load8_rgb_channel_to_unit_f32(base_blk, 0);
|
|
547
|
+
__m256 fb_g = load8_rgb_channel_to_unit_f32(base_blk, 1);
|
|
548
|
+
__m256 fb_b = load8_rgb_channel_to_unit_f32(base_blk, 2);
|
|
485
549
|
|
|
486
|
-
__m256 ft_r =
|
|
487
|
-
__m256 ft_g =
|
|
488
|
-
__m256 ft_b =
|
|
489
|
-
__m256 ft_a =
|
|
550
|
+
__m256 ft_r = load8_rgba_channel_to_unit_f32(tex_blk, 0);
|
|
551
|
+
__m256 ft_g = load8_rgba_channel_to_unit_f32(tex_blk, 1);
|
|
552
|
+
__m256 ft_b = load8_rgba_channel_to_unit_f32(tex_blk, 2);
|
|
553
|
+
__m256 ft_a = load8_rgba_channel_to_unit_f32(tex_blk, 3); /* texture alpha */
|
|
490
554
|
|
|
491
|
-
__m256 fs_r =
|
|
492
|
-
__m256 fs_g =
|
|
493
|
-
__m256 fs_b =
|
|
555
|
+
__m256 fs_r = load8_rgb_channel_to_unit_f32(skin_blk, 0);
|
|
556
|
+
__m256 fs_g = load8_rgb_channel_to_unit_f32(skin_blk, 1);
|
|
557
|
+
__m256 fs_b = load8_rgb_channel_to_unit_f32(skin_blk, 2);
|
|
494
558
|
|
|
495
|
-
__m256 fa_im = load8_u8_to_unit_f32_avx2(im_alpha + i);
|
|
559
|
+
__m256 fa_im = load8_u8_to_unit_f32_avx2(im_alpha + i, inv255);
|
|
496
560
|
__m256 fta = _mm256_mul_ps(ft_a, fa_im); /* texture_alpha */
|
|
497
|
-
__m256 fit_a =
|
|
561
|
+
__m256 fit_a = fnmadd_ps256(fta, one, one); /* inverse_tpa */
|
|
498
562
|
|
|
499
563
|
__m256 gm_r = clamp01_ps(_mm256_sub_ps(_mm256_add_ps(ft_r, fs_r), half));
|
|
500
564
|
__m256 gm_g = clamp01_ps(_mm256_sub_ps(_mm256_add_ps(ft_g, fs_g), half));
|
|
501
565
|
__m256 gm_b = clamp01_ps(_mm256_sub_ps(_mm256_add_ps(ft_b, fs_b), half));
|
|
502
566
|
|
|
503
|
-
gm_r =
|
|
504
|
-
gm_g =
|
|
505
|
-
gm_b =
|
|
567
|
+
gm_r = mul_add_ps256(gm_r, fta, _mm256_mul_ps(ft_r, fit_a));
|
|
568
|
+
gm_g = mul_add_ps256(gm_g, fta, _mm256_mul_ps(ft_g, fit_a));
|
|
569
|
+
gm_b = mul_add_ps256(gm_b, fta, _mm256_mul_ps(ft_b, fit_a));
|
|
506
570
|
|
|
507
|
-
gm_r =
|
|
508
|
-
gm_g =
|
|
509
|
-
gm_b =
|
|
571
|
+
gm_r = mul_add_ps256(gm_r, invw, _mm256_mul_ps(fs_r, w));
|
|
572
|
+
gm_g = mul_add_ps256(gm_g, invw, _mm256_mul_ps(fs_g, w));
|
|
573
|
+
gm_b = mul_add_ps256(gm_b, invw, _mm256_mul_ps(fs_b, w));
|
|
510
574
|
|
|
511
575
|
gm_r = nan_to_num_ps(gm_r);
|
|
512
576
|
gm_g = nan_to_num_ps(gm_g);
|
|
513
577
|
gm_b = nan_to_num_ps(gm_b);
|
|
514
578
|
|
|
515
|
-
__m256 fr =
|
|
516
|
-
__m256 fg =
|
|
517
|
-
__m256 fb =
|
|
579
|
+
__m256 fr = mul_add_ps256(gm_r, fta, _mm256_mul_ps(fb_r, fit_a));
|
|
580
|
+
__m256 fg = mul_add_ps256(gm_g, fta, _mm256_mul_ps(fb_g, fit_a));
|
|
581
|
+
__m256 fb = mul_add_ps256(gm_b, fta, _mm256_mul_ps(fb_b, fit_a));
|
|
518
582
|
|
|
519
|
-
|
|
583
|
+
store8_unit_f32_to_u8_rgb(fr, fg, fb, out + 3*i);
|
|
520
584
|
}
|
|
521
585
|
|
|
522
586
|
if (i < pixels) {
|
|
523
|
-
kernel_scalar_rgba(base + 3*i, texture + 4*i, skin +
|
|
587
|
+
kernel_scalar_rgba(base + 3*i, texture + 4*i, skin + 3*i, im_alpha + i,
|
|
524
588
|
out + 3*i, pixels - i);
|
|
525
589
|
}
|
|
526
590
|
}
|
|
@@ -550,6 +614,13 @@ static inline __m128 nan_to_num_ps128(__m128 x) {
|
|
|
550
614
|
return _mm_blendv_ps(_mm_set1_ps(0.0f), x, cmp);
|
|
551
615
|
}
|
|
552
616
|
|
|
617
|
+
static inline __m128 mul_add_ps128(__m128 a, __m128 b, __m128 c) {
|
|
618
|
+
#ifdef __FMA__
|
|
619
|
+
return _mm_fmadd_ps(a, b, c);
|
|
620
|
+
#else
|
|
621
|
+
return _mm_add_ps(_mm_mul_ps(a, b), c);
|
|
622
|
+
#endif
|
|
623
|
+
}
|
|
553
624
|
|
|
554
625
|
static void kernel_sse42_rgb(const uint8_t *base, const uint8_t *texture,
|
|
555
626
|
const uint8_t *skin, const uint8_t *im_alpha,
|
|
@@ -575,12 +646,12 @@ static void kernel_sse42_rgb(const uint8_t *base, const uint8_t *texture,
|
|
|
575
646
|
__m128 ft_b = u8x4_to_unit_f32(texture[3*(i+0)+2], texture[3*(i+1)+2],
|
|
576
647
|
texture[3*(i+2)+2], texture[3*(i+3)+2]);
|
|
577
648
|
|
|
578
|
-
__m128 fs_r = u8x4_to_unit_f32(skin[
|
|
579
|
-
skin[
|
|
580
|
-
__m128 fs_g = u8x4_to_unit_f32(skin[
|
|
581
|
-
skin[
|
|
582
|
-
__m128 fs_b = u8x4_to_unit_f32(skin[
|
|
583
|
-
skin[
|
|
649
|
+
__m128 fs_r = u8x4_to_unit_f32(skin[3*(i+0)+0], skin[3*(i+1)+0],
|
|
650
|
+
skin[3*(i+2)+0], skin[3*(i+3)+0]);
|
|
651
|
+
__m128 fs_g = u8x4_to_unit_f32(skin[3*(i+0)+1], skin[3*(i+1)+1],
|
|
652
|
+
skin[3*(i+2)+1], skin[3*(i+3)+1]);
|
|
653
|
+
__m128 fs_b = u8x4_to_unit_f32(skin[3*(i+0)+2], skin[3*(i+1)+2],
|
|
654
|
+
skin[3*(i+2)+2], skin[3*(i+3)+2]);
|
|
584
655
|
|
|
585
656
|
__m128 fa_im = load4_u8_to_unit_f32(im_alpha + i);
|
|
586
657
|
__m128 fit_a = _mm_sub_ps(one, fa_im);
|
|
@@ -589,39 +660,27 @@ static void kernel_sse42_rgb(const uint8_t *base, const uint8_t *texture,
|
|
|
589
660
|
__m128 gm_g = clamp01_ps128(_mm_sub_ps(_mm_add_ps(ft_g, fs_g), half));
|
|
590
661
|
__m128 gm_b = clamp01_ps128(_mm_sub_ps(_mm_add_ps(ft_b, fs_b), half));
|
|
591
662
|
|
|
592
|
-
gm_r =
|
|
593
|
-
gm_g =
|
|
594
|
-
gm_b =
|
|
663
|
+
gm_r = mul_add_ps128(gm_r, fa_im, _mm_mul_ps(ft_r, fit_a));
|
|
664
|
+
gm_g = mul_add_ps128(gm_g, fa_im, _mm_mul_ps(ft_g, fit_a));
|
|
665
|
+
gm_b = mul_add_ps128(gm_b, fa_im, _mm_mul_ps(ft_b, fit_a));
|
|
595
666
|
|
|
596
|
-
gm_r =
|
|
597
|
-
gm_g =
|
|
598
|
-
gm_b =
|
|
667
|
+
gm_r = mul_add_ps128(gm_r, invw, _mm_mul_ps(fs_r, w));
|
|
668
|
+
gm_g = mul_add_ps128(gm_g, invw, _mm_mul_ps(fs_g, w));
|
|
669
|
+
gm_b = mul_add_ps128(gm_b, invw, _mm_mul_ps(fs_b, w));
|
|
599
670
|
|
|
600
671
|
gm_r = nan_to_num_ps128(gm_r);
|
|
601
672
|
gm_g = nan_to_num_ps128(gm_g);
|
|
602
673
|
gm_b = nan_to_num_ps128(gm_b);
|
|
603
674
|
|
|
604
|
-
__m128 fr =
|
|
605
|
-
__m128 fg =
|
|
606
|
-
__m128 fb =
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
_mm_storeu_ps(rr, fr);
|
|
610
|
-
_mm_storeu_ps(gg, fg);
|
|
611
|
-
_mm_storeu_ps(bb, fb);
|
|
612
|
-
|
|
613
|
-
for (int k = 0; k < 4; ++k) {
|
|
614
|
-
int r = (int)(rr[k] * 255.0f);
|
|
615
|
-
int g = (int)(gg[k] * 255.0f);
|
|
616
|
-
int b = (int)(bb[k] * 255.0f);
|
|
617
|
-
out[3*(i+k)+0] = (uint8_t)(r < 0 ? 0 : r > 255 ? 255 : r);
|
|
618
|
-
out[3*(i+k)+1] = (uint8_t)(g < 0 ? 0 : g > 255 ? 255 : g);
|
|
619
|
-
out[3*(i+k)+2] = (uint8_t)(b < 0 ? 0 : b > 255 ? 255 : b);
|
|
620
|
-
}
|
|
675
|
+
__m128 fr = mul_add_ps128(gm_r, fa_im, _mm_mul_ps(fb_r, fit_a));
|
|
676
|
+
__m128 fg = mul_add_ps128(gm_g, fa_im, _mm_mul_ps(fb_g, fit_a));
|
|
677
|
+
__m128 fb = mul_add_ps128(gm_b, fa_im, _mm_mul_ps(fb_b, fit_a));
|
|
678
|
+
|
|
679
|
+
store_unit_f32_to_u8_rgb4(fr, fg, fb, out + 3*i);
|
|
621
680
|
}
|
|
622
681
|
|
|
623
682
|
if (i < pixels) {
|
|
624
|
-
kernel_scalar_rgb(base + 3*i, texture + 3*i, skin +
|
|
683
|
+
kernel_scalar_rgb(base + 3*i, texture + 3*i, skin + 3*i, im_alpha + i,
|
|
625
684
|
out + 3*i, pixels - i);
|
|
626
685
|
}
|
|
627
686
|
}
|
|
@@ -652,12 +711,12 @@ static void kernel_sse42_rgba(const uint8_t *base, const uint8_t *texture,
|
|
|
652
711
|
__m128 ft_a = u8x4_to_unit_f32(texture[4*(i+0)+3], texture[4*(i+1)+3],
|
|
653
712
|
texture[4*(i+2)+3], texture[4*(i+3)+3]);
|
|
654
713
|
|
|
655
|
-
__m128 fs_r = u8x4_to_unit_f32(skin[
|
|
656
|
-
skin[
|
|
657
|
-
__m128 fs_g = u8x4_to_unit_f32(skin[
|
|
658
|
-
skin[
|
|
659
|
-
__m128 fs_b = u8x4_to_unit_f32(skin[
|
|
660
|
-
skin[
|
|
714
|
+
__m128 fs_r = u8x4_to_unit_f32(skin[3*(i+0)+0], skin[3*(i+1)+0],
|
|
715
|
+
skin[3*(i+2)+0], skin[3*(i+3)+0]);
|
|
716
|
+
__m128 fs_g = u8x4_to_unit_f32(skin[3*(i+0)+1], skin[3*(i+1)+1],
|
|
717
|
+
skin[3*(i+2)+1], skin[3*(i+3)+1]);
|
|
718
|
+
__m128 fs_b = u8x4_to_unit_f32(skin[3*(i+0)+2], skin[3*(i+1)+2],
|
|
719
|
+
skin[3*(i+2)+2], skin[3*(i+3)+2]);
|
|
661
720
|
|
|
662
721
|
__m128 fa_im = load4_u8_to_unit_f32(im_alpha + i);
|
|
663
722
|
__m128 fta = _mm_mul_ps(ft_a, fa_im); /* texture_alpha */
|
|
@@ -667,39 +726,27 @@ static void kernel_sse42_rgba(const uint8_t *base, const uint8_t *texture,
|
|
|
667
726
|
__m128 gm_g = clamp01_ps128(_mm_sub_ps(_mm_add_ps(ft_g, fs_g), half));
|
|
668
727
|
__m128 gm_b = clamp01_ps128(_mm_sub_ps(_mm_add_ps(ft_b, fs_b), half));
|
|
669
728
|
|
|
670
|
-
gm_r =
|
|
671
|
-
gm_g =
|
|
672
|
-
gm_b =
|
|
729
|
+
gm_r = mul_add_ps128(gm_r, fta, _mm_mul_ps(ft_r, fit_a));
|
|
730
|
+
gm_g = mul_add_ps128(gm_g, fta, _mm_mul_ps(ft_g, fit_a));
|
|
731
|
+
gm_b = mul_add_ps128(gm_b, fta, _mm_mul_ps(ft_b, fit_a));
|
|
673
732
|
|
|
674
|
-
gm_r =
|
|
675
|
-
gm_g =
|
|
676
|
-
gm_b =
|
|
733
|
+
gm_r = mul_add_ps128(gm_r, invw, _mm_mul_ps(fs_r, w));
|
|
734
|
+
gm_g = mul_add_ps128(gm_g, invw, _mm_mul_ps(fs_g, w));
|
|
735
|
+
gm_b = mul_add_ps128(gm_b, invw, _mm_mul_ps(fs_b, w));
|
|
677
736
|
|
|
678
737
|
gm_r = nan_to_num_ps128(gm_r);
|
|
679
738
|
gm_g = nan_to_num_ps128(gm_g);
|
|
680
739
|
gm_b = nan_to_num_ps128(gm_b);
|
|
681
740
|
|
|
682
|
-
__m128 fr =
|
|
683
|
-
__m128 fg =
|
|
684
|
-
__m128 fb =
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
_mm_storeu_ps(rr, fr);
|
|
688
|
-
_mm_storeu_ps(gg, fg);
|
|
689
|
-
_mm_storeu_ps(bb, fb);
|
|
690
|
-
|
|
691
|
-
for (int k = 0; k < 4; ++k) {
|
|
692
|
-
int r = (int)(rr[k] * 255.0f);
|
|
693
|
-
int g = (int)(gg[k] * 255.0f);
|
|
694
|
-
int b = (int)(bb[k] * 255.0f);
|
|
695
|
-
out[3*(i+k)+0] = (uint8_t)(r < 0 ? 0 : r > 255 ? 255 : r);
|
|
696
|
-
out[3*(i+k)+1] = (uint8_t)(g < 0 ? 0 : g > 255 ? 255 : g);
|
|
697
|
-
out[3*(i+k)+2] = (uint8_t)(b < 0 ? 0 : b > 255 ? 255 : b);
|
|
698
|
-
}
|
|
741
|
+
__m128 fr = mul_add_ps128(gm_r, fta, _mm_mul_ps(fb_r, fit_a));
|
|
742
|
+
__m128 fg = mul_add_ps128(gm_g, fta, _mm_mul_ps(fb_g, fit_a));
|
|
743
|
+
__m128 fb = mul_add_ps128(gm_b, fta, _mm_mul_ps(fb_b, fit_a));
|
|
744
|
+
|
|
745
|
+
store_unit_f32_to_u8_rgb4(fr, fg, fb, out + 3*i);
|
|
699
746
|
}
|
|
700
747
|
|
|
701
748
|
if (i < pixels) {
|
|
702
|
-
kernel_scalar_rgba(base + 3*i, texture + 4*i, skin +
|
|
749
|
+
kernel_scalar_rgba(base + 3*i, texture + 4*i, skin + 3*i, im_alpha + i,
|
|
703
750
|
out + 3*i, pixels - i);
|
|
704
751
|
}
|
|
705
752
|
}
|
|
Binary file
|
|
@@ -16,7 +16,7 @@ def normal_grain_merge(
|
|
|
16
16
|
Channel ordering doesn't matter as long as it is consistent.
|
|
17
17
|
:param base: The base RGB image.
|
|
18
18
|
:param texture: The texture, either RGB or RGBA.
|
|
19
|
-
:param skin: The
|
|
19
|
+
:param skin: The RGB skin cutout.
|
|
20
20
|
:param im_alpha: The alpha from the cutout.
|
|
21
21
|
:param kernel: Which kernel to use.
|
|
22
22
|
The `auto` kernel chooses between avx2 and sse4.2 when compiled with gcc and uses `scaler` on Windows.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: normal_grain_merge
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: Fused normal and grain merge C extension
|
|
5
5
|
Author: Samuel Howard
|
|
6
6
|
License: MIT
|
|
@@ -88,21 +88,22 @@ One of `KernelKind`.
|
|
|
88
88
|
The entire reason for me writing this was NumPy being slow when this operation is in the hot path.
|
|
89
89
|
So, I decided to write a SIMD version that does the type casting outside NumPy with only the intermediate values being in FP32.
|
|
90
90
|
|
|
91
|
-
How much of a speedup is this? All numbers are from a Ryzen 7 4800H running
|
|
91
|
+
How much of a speedup is this? All numbers are from a Ryzen 7 4800H running Ubuntu 24.04 and Python 3.12.3.
|
|
92
92
|
|
|
93
93
|
| Method/Kernel | Average Iteration Time |
|
|
94
94
|
|-------------------|------------------------|
|
|
95
|
-
| C scalar kernel | 0.
|
|
96
|
-
| C SSE4.2 kernel | 0.
|
|
97
|
-
| C AVX2 kernel | 0.
|
|
98
|
-
| NumPy version | 0.
|
|
99
|
-
| Old NumPy version | 0.
|
|
95
|
+
| C scalar kernel | 0.016076s |
|
|
96
|
+
| C SSE4.2 kernel | 0.007300s |
|
|
97
|
+
| C AVX2 kernel | 0.007113s |
|
|
98
|
+
| NumPy version | 0.169621s |
|
|
99
|
+
| Old NumPy version | 0.254648s |
|
|
100
100
|
|
|
101
101
|
| Method Comparison | Speedup |
|
|
102
102
|
|--------------------|----------|
|
|
103
|
-
| NumPy -> scalar |
|
|
104
|
-
| NumPy -> SSE4.2 |
|
|
105
|
-
| NumPy -> AVX2 |
|
|
106
|
-
| Old np -> SSE4.2 |
|
|
107
|
-
|
|
|
108
|
-
| C scalar ->
|
|
103
|
+
| NumPy -> scalar | 90.5223% |
|
|
104
|
+
| NumPy -> SSE4.2 | 95.6965% |
|
|
105
|
+
| NumPy -> AVX2 | 95.8063% |
|
|
106
|
+
| Old np -> SSE4.2 | 97.1334% |
|
|
107
|
+
| Old np -> AVX2 | 97.2066% |
|
|
108
|
+
| C scalar -> SSE4.2 | 54.5933% |
|
|
109
|
+
| C scalar -> AVX2 | 55.7525% |
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
normal_grain_merge/__init__.py,sha256=Roc1wQ7_13LG_Z3Bd82zhk8wn7R1BrcO63fCdsvnnJU,89
|
|
2
|
+
normal_grain_merge/kernel_kind.py,sha256=3cP4WRQSG9ZZeHsrXpXJ5Kcc8wABsmRSgex0rwRT8K4,162
|
|
3
|
+
normal_grain_merge/normal_grain_merge.c,sha256=6bwB2LgwW6jzENlRuZVpzXCdQhsnjYSqIcWb0ua0948,37118
|
|
4
|
+
normal_grain_merge/normal_grain_merge.cp313-win_amd64.pyd,sha256=Vucn7WsXnJBZ0oFpUq8wGPrfuNw9SL2aD9lh_zoKru0,24576
|
|
5
|
+
normal_grain_merge/normal_grain_merge.pyi,sha256=HXa55A0wdcmzPpJzi7qgJws5y2q_uGjdJZQXzTkw9vc,1089
|
|
6
|
+
normal_grain_merge-0.1.1.dist-info/licenses/LICENSE,sha256=qbUDFP46iOpV1ouBhpqjX-kS_cCVMHgrLBNcdTlq7Qc,1089
|
|
7
|
+
normal_grain_merge-0.1.1.dist-info/METADATA,sha256=iyQQ6xdYZSd_NxoT9s1yQ4ZvtlsJm8f2cUw8mG7a_30,3597
|
|
8
|
+
normal_grain_merge-0.1.1.dist-info/WHEEL,sha256=qV0EIPljj1XC_vuSatRWjn02nZIz3N1t8jsZz7HBr2U,101
|
|
9
|
+
normal_grain_merge-0.1.1.dist-info/top_level.txt,sha256=jfUAUKWrxBshHvZ0xTu3uF5VJsUpbWp5NkxUj8OXqu8,19
|
|
10
|
+
normal_grain_merge-0.1.1.dist-info/RECORD,,
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
normal_grain_merge/__init__.py,sha256=Roc1wQ7_13LG_Z3Bd82zhk8wn7R1BrcO63fCdsvnnJU,89
|
|
2
|
-
normal_grain_merge/kernel_kind.py,sha256=3cP4WRQSG9ZZeHsrXpXJ5Kcc8wABsmRSgex0rwRT8K4,162
|
|
3
|
-
normal_grain_merge/normal_grain_merge.c,sha256=n2dJ-E_DlpKtDoLW2oQ6XoLDrDPKWx_DknA_lbzuB-g,36136
|
|
4
|
-
normal_grain_merge/normal_grain_merge.cp313-win_amd64.pyd,sha256=UAa_pr5gm3Wkgs49Hq7bI3ZMd0CCy16DGnNTiz7ICfc,25088
|
|
5
|
-
normal_grain_merge/normal_grain_merge.pyi,sha256=Tz5RVlNbBqn_MsQ46WikaohEPctHdWsFxK3bloRZl1M,1090
|
|
6
|
-
normal_grain_merge-0.0.2.dist-info/licenses/LICENSE,sha256=qbUDFP46iOpV1ouBhpqjX-kS_cCVMHgrLBNcdTlq7Qc,1089
|
|
7
|
-
normal_grain_merge-0.0.2.dist-info/METADATA,sha256=jNq6oUROWU6TdMcxyrsPzaQIHKUIHRXwNa8riynTC_s,3560
|
|
8
|
-
normal_grain_merge-0.0.2.dist-info/WHEEL,sha256=qV0EIPljj1XC_vuSatRWjn02nZIz3N1t8jsZz7HBr2U,101
|
|
9
|
-
normal_grain_merge-0.0.2.dist-info/top_level.txt,sha256=jfUAUKWrxBshHvZ0xTu3uF5VJsUpbWp5NkxUj8OXqu8,19
|
|
10
|
-
normal_grain_merge-0.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|