@img/sharp-libvips-dev 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +2 -2
  2. package/cplusplus/VConnection.cpp +54 -54
  3. package/cplusplus/VError.cpp +20 -18
  4. package/cplusplus/VImage.cpp +636 -589
  5. package/cplusplus/VInterpolate.cpp +22 -22
  6. package/cplusplus/VRegion.cpp +4 -4
  7. package/cplusplus/vips-operators.cpp +2326 -2301
  8. package/include/aom/aom_codec.h +10 -6
  9. package/include/aom/aom_decoder.h +1 -1
  10. package/include/aom/aom_encoder.h +9 -2
  11. package/include/aom/aomcx.h +72 -3
  12. package/include/cairo/cairo-ft.h +1 -1
  13. package/include/cairo/cairo-gobject.h +8 -0
  14. package/include/cairo/cairo-svg.h +3 -3
  15. package/include/cairo/cairo-version.h +2 -2
  16. package/include/cairo/cairo.h +91 -24
  17. package/include/harfbuzz/hb-version.h +2 -2
  18. package/include/hwy/aligned_allocator.h +211 -0
  19. package/include/hwy/base.h +1517 -0
  20. package/include/hwy/cache_control.h +108 -0
  21. package/include/hwy/detect_compiler_arch.h +281 -0
  22. package/include/hwy/detect_targets.h +644 -0
  23. package/include/hwy/foreach_target.h +340 -0
  24. package/include/hwy/highway.h +435 -0
  25. package/include/hwy/highway_export.h +74 -0
  26. package/include/hwy/nanobenchmark.h +171 -0
  27. package/include/hwy/ops/arm_neon-inl.h +8913 -0
  28. package/include/hwy/ops/arm_sve-inl.h +5105 -0
  29. package/include/hwy/ops/emu128-inl.h +2811 -0
  30. package/include/hwy/ops/generic_ops-inl.h +4745 -0
  31. package/include/hwy/ops/ppc_vsx-inl.h +5716 -0
  32. package/include/hwy/ops/rvv-inl.h +5070 -0
  33. package/include/hwy/ops/scalar-inl.h +1995 -0
  34. package/include/hwy/ops/set_macros-inl.h +578 -0
  35. package/include/hwy/ops/shared-inl.h +539 -0
  36. package/include/hwy/ops/tuple-inl.h +125 -0
  37. package/include/hwy/ops/wasm_128-inl.h +5917 -0
  38. package/include/hwy/ops/x86_128-inl.h +11173 -0
  39. package/include/hwy/ops/x86_256-inl.h +7529 -0
  40. package/include/hwy/ops/x86_512-inl.h +6849 -0
  41. package/include/hwy/per_target.h +44 -0
  42. package/include/hwy/print-inl.h +62 -0
  43. package/include/hwy/print.h +75 -0
  44. package/include/hwy/robust_statistics.h +148 -0
  45. package/include/hwy/targets.h +338 -0
  46. package/include/hwy/timer-inl.h +200 -0
  47. package/include/hwy/timer.h +55 -0
  48. package/include/jconfig.h +2 -2
  49. package/include/jpeglib.h +3 -2
  50. package/include/libheif/heif.h +443 -377
  51. package/include/libheif/heif_cxx.h +4 -1
  52. package/include/libheif/heif_plugin.h +1 -1
  53. package/include/libheif/heif_properties.h +138 -0
  54. package/include/libheif/heif_regions.h +866 -0
  55. package/include/libheif/heif_version.h +3 -3
  56. package/include/vips/VConnection8.h +43 -49
  57. package/include/vips/VError8.h +27 -24
  58. package/include/vips/VImage8.h +4861 -4597
  59. package/include/vips/VInterpolate8.h +24 -27
  60. package/include/vips/VRegion8.h +32 -33
  61. package/include/vips/arithmetic.h +169 -169
  62. package/include/vips/basic.h +33 -33
  63. package/include/vips/buf.h +56 -54
  64. package/include/vips/colour.h +95 -95
  65. package/include/vips/connection.h +190 -193
  66. package/include/vips/conversion.h +91 -91
  67. package/include/vips/convolution.h +36 -30
  68. package/include/vips/create.h +63 -63
  69. package/include/vips/dbuf.h +35 -37
  70. package/include/vips/debug.h +65 -33
  71. package/include/vips/draw.h +41 -41
  72. package/include/vips/enumtypes.h +54 -51
  73. package/include/vips/error.h +63 -63
  74. package/include/vips/foreign.h +263 -223
  75. package/include/vips/format.h +48 -48
  76. package/include/vips/freqfilt.h +22 -22
  77. package/include/vips/gate.h +55 -47
  78. package/include/vips/generate.h +34 -34
  79. package/include/vips/header.h +111 -101
  80. package/include/vips/histogram.h +28 -28
  81. package/include/vips/image.h +213 -213
  82. package/include/vips/interpolate.h +40 -41
  83. package/include/vips/memory.h +61 -52
  84. package/include/vips/morphology.h +24 -24
  85. package/include/vips/mosaicing.h +32 -33
  86. package/include/vips/object.h +371 -357
  87. package/include/vips/operation.h +68 -67
  88. package/include/vips/private.h +76 -76
  89. package/include/vips/rect.h +26 -26
  90. package/include/vips/region.h +92 -92
  91. package/include/vips/resample.h +38 -38
  92. package/include/vips/sbuf.h +53 -54
  93. package/include/vips/semaphore.h +24 -24
  94. package/include/vips/thread.h +30 -27
  95. package/include/vips/threadpool.h +48 -49
  96. package/include/vips/transform.h +39 -39
  97. package/include/vips/type.h +90 -85
  98. package/include/vips/util.h +274 -229
  99. package/include/vips/vector.h +24 -144
  100. package/include/vips/version.h +9 -9
  101. package/include/vips/vips.h +41 -40
  102. package/package.json +1 -1
  103. package/versions.json +7 -7
@@ -0,0 +1,435 @@
1
+ // Copyright 2020 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ // Main header required before using vector types.
17
+
18
+ // IWYU pragma: begin_exports
19
+ #include "hwy/base.h"
20
+ #include "hwy/detect_compiler_arch.h"
21
+ #include "hwy/highway_export.h"
22
+ #include "hwy/targets.h"
23
+ // IWYU pragma: end_exports
24
+
25
+ // This include guard is checked by foreach_target, so avoid the usual _H_
26
+ // suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included
27
+ // after/outside this include guard.
28
+ #ifndef HWY_HIGHWAY_INCLUDED
29
+ #define HWY_HIGHWAY_INCLUDED
30
+
31
+ namespace hwy {
32
+
33
+ // API version (https://semver.org/); keep in sync with CMakeLists.txt.
34
+ #define HWY_MAJOR 1
35
+ #define HWY_MINOR 0
36
+ #define HWY_PATCH 7
37
+
38
+ //------------------------------------------------------------------------------
39
+ // Shorthand for tags (defined in shared-inl.h) used to select overloads.
40
+ // Note that ScalableTag<T> is preferred over HWY_FULL, and CappedTag<T, N> over
41
+ // HWY_CAPPED(T, N).
42
+
43
+ // HWY_FULL(T[,LMUL=1]) is a native vector/group. LMUL is the number of
44
+ // registers in the group, and is ignored on targets that do not support groups.
45
+ #define HWY_FULL1(T) hwy::HWY_NAMESPACE::ScalableTag<T>
46
+ #define HWY_FULL2(T, LMUL) \
47
+ hwy::HWY_NAMESPACE::ScalableTag<T, hwy::CeilLog2(HWY_MAX(0, LMUL))>
48
+ #define HWY_3TH_ARG(arg1, arg2, arg3, ...) arg3
49
+ // Workaround for MSVC grouping __VA_ARGS__ into a single argument
50
+ #define HWY_FULL_RECOMPOSER(args_with_paren) HWY_3TH_ARG args_with_paren
51
+ // Trailing comma avoids -pedantic false alarm
52
+ #define HWY_CHOOSE_FULL(...) \
53
+ HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, ))
54
+ #define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__)
55
+
56
+ // Vector of up to MAX_N lanes. It's better to use full vectors where possible.
57
+ #define HWY_CAPPED(T, MAX_N) hwy::HWY_NAMESPACE::CappedTag<T, MAX_N>
58
+
59
+ //------------------------------------------------------------------------------
60
+ // Export user functions for static/dynamic dispatch
61
+
62
+ // Evaluates to 0 inside a translation unit if it is generating anything but the
63
+ // static target (the last one if multiple targets are enabled). Used to prevent
64
+ // redefinitions of HWY_EXPORT. Unless foreach_target.h is included, we only
65
+ // compile once anyway, so this is 1 unless it is or has been included.
66
+ #ifndef HWY_ONCE
67
+ #define HWY_ONCE 1
68
+ #endif
69
+
70
+ // HWY_STATIC_DISPATCH(FUNC_NAME) is the namespace-qualified FUNC_NAME for
71
+ // HWY_STATIC_TARGET (the only defined namespace unless HWY_TARGET_INCLUDE is
72
+ // defined), and can be used to deduce the return type of Choose*.
73
+ #if HWY_STATIC_TARGET == HWY_SCALAR
74
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SCALAR::FUNC_NAME
75
+ #elif HWY_STATIC_TARGET == HWY_EMU128
76
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_EMU128::FUNC_NAME
77
+ #elif HWY_STATIC_TARGET == HWY_RVV
78
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_RVV::FUNC_NAME
79
+ #elif HWY_STATIC_TARGET == HWY_WASM_EMU256
80
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM_EMU256::FUNC_NAME
81
+ #elif HWY_STATIC_TARGET == HWY_WASM
82
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME
83
+ #elif HWY_STATIC_TARGET == HWY_NEON_WITHOUT_AES
84
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON_WITHOUT_AES::FUNC_NAME
85
+ #elif HWY_STATIC_TARGET == HWY_NEON
86
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
87
+ #elif HWY_STATIC_TARGET == HWY_SVE
88
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME
89
+ #elif HWY_STATIC_TARGET == HWY_SVE2
90
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2::FUNC_NAME
91
+ #elif HWY_STATIC_TARGET == HWY_SVE_256
92
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE_256::FUNC_NAME
93
+ #elif HWY_STATIC_TARGET == HWY_SVE2_128
94
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2_128::FUNC_NAME
95
+ #elif HWY_STATIC_TARGET == HWY_PPC8
96
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME
97
+ #elif HWY_STATIC_TARGET == HWY_PPC9
98
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC9::FUNC_NAME
99
+ #elif HWY_STATIC_TARGET == HWY_PPC10
100
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC10::FUNC_NAME
101
+ #elif HWY_STATIC_TARGET == HWY_SSE2
102
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE2::FUNC_NAME
103
+ #elif HWY_STATIC_TARGET == HWY_SSSE3
104
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSSE3::FUNC_NAME
105
+ #elif HWY_STATIC_TARGET == HWY_SSE4
106
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE4::FUNC_NAME
107
+ #elif HWY_STATIC_TARGET == HWY_AVX2
108
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX2::FUNC_NAME
109
+ #elif HWY_STATIC_TARGET == HWY_AVX3
110
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3::FUNC_NAME
111
+ #elif HWY_STATIC_TARGET == HWY_AVX3_DL
112
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_DL::FUNC_NAME
113
+ #elif HWY_STATIC_TARGET == HWY_AVX3_ZEN4
114
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_ZEN4::FUNC_NAME
115
+ #elif HWY_STATIC_TARGET == HWY_AVX3_SPR
116
+ #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_SPR::FUNC_NAME
117
+ #endif
118
+
119
+ // HWY_CHOOSE_*(FUNC_NAME) expands to the function pointer for that target or
120
+ // nullptr is that target was not compiled.
121
+ #if HWY_TARGETS & HWY_EMU128
122
+ #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_EMU128::FUNC_NAME
123
+ #elif HWY_TARGETS & HWY_SCALAR
124
+ #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_SCALAR::FUNC_NAME
125
+ #else
126
+ // When HWY_SCALAR/HWY_EMU128 are not present and other targets were disabled at
127
+ // runtime, fall back to the baseline with HWY_STATIC_DISPATCH().
128
+ #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
129
+ #endif
130
+
131
+ #if HWY_TARGETS & HWY_WASM_EMU256
132
+ #define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) &N_WASM_EMU256::FUNC_NAME
133
+ #else
134
+ #define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) nullptr
135
+ #endif
136
+
137
+ #if HWY_TARGETS & HWY_WASM
138
+ #define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME
139
+ #else
140
+ #define HWY_CHOOSE_WASM(FUNC_NAME) nullptr
141
+ #endif
142
+
143
+ #if HWY_TARGETS & HWY_RVV
144
+ #define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME
145
+ #else
146
+ #define HWY_CHOOSE_RVV(FUNC_NAME) nullptr
147
+ #endif
148
+
149
+ #if HWY_TARGETS & HWY_NEON_WITHOUT_AES
150
+ #define HWY_CHOOSE_NEON_WITHOUT_AES(FUNC_NAME) &N_NEON_WITHOUT_AES::FUNC_NAME
151
+ #else
152
+ #define HWY_CHOOSE_NEON_WITHOUT_AES(FUNC_NAME) nullptr
153
+ #endif
154
+
155
+ #if HWY_TARGETS & HWY_NEON
156
+ #define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME
157
+ #else
158
+ #define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
159
+ #endif
160
+
161
+ #if HWY_TARGETS & HWY_SVE
162
+ #define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
163
+ #else
164
+ #define HWY_CHOOSE_SVE(FUNC_NAME) nullptr
165
+ #endif
166
+
167
+ #if HWY_TARGETS & HWY_SVE2
168
+ #define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME
169
+ #else
170
+ #define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr
171
+ #endif
172
+
173
+ #if HWY_TARGETS & HWY_SVE_256
174
+ #define HWY_CHOOSE_SVE_256(FUNC_NAME) &N_SVE_256::FUNC_NAME
175
+ #else
176
+ #define HWY_CHOOSE_SVE_256(FUNC_NAME) nullptr
177
+ #endif
178
+
179
+ #if HWY_TARGETS & HWY_SVE2_128
180
+ #define HWY_CHOOSE_SVE2_128(FUNC_NAME) &N_SVE2_128::FUNC_NAME
181
+ #else
182
+ #define HWY_CHOOSE_SVE2_128(FUNC_NAME) nullptr
183
+ #endif
184
+
185
+ #if HWY_TARGETS & HWY_PPC8
186
+ #define HWY_CHOOSE_PPC8(FUNC_NAME) &N_PPC8::FUNC_NAME
187
+ #else
188
+ #define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr
189
+ #endif
190
+
191
+ #if HWY_TARGETS & HWY_PPC9
192
+ #define HWY_CHOOSE_PPC9(FUNC_NAME) &N_PPC9::FUNC_NAME
193
+ #else
194
+ #define HWY_CHOOSE_PPC9(FUNC_NAME) nullptr
195
+ #endif
196
+
197
+ #if HWY_TARGETS & HWY_PPC10
198
+ #define HWY_CHOOSE_PPC10(FUNC_NAME) &N_PPC10::FUNC_NAME
199
+ #else
200
+ #define HWY_CHOOSE_PPC10(FUNC_NAME) nullptr
201
+ #endif
202
+
203
+ #if HWY_TARGETS & HWY_SSE2
204
+ #define HWY_CHOOSE_SSE2(FUNC_NAME) &N_SSE2::FUNC_NAME
205
+ #else
206
+ #define HWY_CHOOSE_SSE2(FUNC_NAME) nullptr
207
+ #endif
208
+
209
+ #if HWY_TARGETS & HWY_SSSE3
210
+ #define HWY_CHOOSE_SSSE3(FUNC_NAME) &N_SSSE3::FUNC_NAME
211
+ #else
212
+ #define HWY_CHOOSE_SSSE3(FUNC_NAME) nullptr
213
+ #endif
214
+
215
+ #if HWY_TARGETS & HWY_SSE4
216
+ #define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME
217
+ #else
218
+ #define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr
219
+ #endif
220
+
221
+ #if HWY_TARGETS & HWY_AVX2
222
+ #define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME
223
+ #else
224
+ #define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr
225
+ #endif
226
+
227
+ #if HWY_TARGETS & HWY_AVX3
228
+ #define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME
229
+ #else
230
+ #define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr
231
+ #endif
232
+
233
+ #if HWY_TARGETS & HWY_AVX3_DL
234
+ #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) &N_AVX3_DL::FUNC_NAME
235
+ #else
236
+ #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) nullptr
237
+ #endif
238
+
239
+ #if HWY_TARGETS & HWY_AVX3_ZEN4
240
+ #define HWY_CHOOSE_AVX3_ZEN4(FUNC_NAME) &N_AVX3_ZEN4::FUNC_NAME
241
+ #else
242
+ #define HWY_CHOOSE_AVX3_ZEN4(FUNC_NAME) nullptr
243
+ #endif
244
+
245
+ #if HWY_TARGETS & HWY_AVX3_SPR
246
+ #define HWY_CHOOSE_AVX3_SPR(FUNC_NAME) &N_AVX3_SPR::FUNC_NAME
247
+ #else
248
+ #define HWY_CHOOSE_AVX3_SPR(FUNC_NAME) nullptr
249
+ #endif
250
+
251
+ // MSVC 2017 workaround: the non-type template parameter to ChooseAndCall
252
+ // apparently cannot be an array. Use a function pointer instead, which has the
253
+ // disadvantage that we call the static (not best) target on the first call to
254
+ // any HWY_DYNAMIC_DISPATCH.
255
+ #if HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1915
256
+ #define HWY_DISPATCH_WORKAROUND 1
257
+ #else
258
+ #define HWY_DISPATCH_WORKAROUND 0
259
+ #endif
260
+
261
+ // Provides a static member function which is what is called during the first
262
+ // HWY_DYNAMIC_DISPATCH, where GetIndex is still zero, and instantiations of
263
+ // this function are the first entry in the tables created by HWY_EXPORT.
264
+ template <typename RetType, typename... Args>
265
+ struct FunctionCache {
266
+ public:
267
+ typedef RetType(FunctionType)(Args...);
268
+
269
+ #if HWY_DISPATCH_WORKAROUND
270
+ template <FunctionType* const func>
271
+ static RetType ChooseAndCall(Args... args) {
272
+ ChosenTarget& chosen_target = GetChosenTarget();
273
+ chosen_target.Update(SupportedTargets());
274
+ return (*func)(args...);
275
+ }
276
+ #else
277
+ // A template function that when instantiated has the same signature as the
278
+ // function being called. This function initializes the bit array of targets
279
+ // supported by the current CPU and then calls the appropriate entry within
280
+ // the HWY_EXPORT table. Subsequent calls via HWY_DYNAMIC_DISPATCH to any
281
+ // exported functions, even those defined by different translation units,
282
+ // will dispatch directly to the best available target.
283
+ template <FunctionType* const table[]>
284
+ static RetType ChooseAndCall(Args... args) {
285
+ ChosenTarget& chosen_target = GetChosenTarget();
286
+ chosen_target.Update(SupportedTargets());
287
+ return (table[chosen_target.GetIndex()])(args...);
288
+ }
289
+ #endif // HWY_DISPATCH_WORKAROUND
290
+ };
291
+
292
+ // Used to deduce the template parameters RetType and Args from a function.
293
+ template <typename RetType, typename... Args>
294
+ FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
295
+ return FunctionCache<RetType, Args...>();
296
+ }
297
+
298
+ #define HWY_DISPATCH_TABLE(FUNC_NAME) \
299
+ HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
300
+
301
+ // HWY_EXPORT(FUNC_NAME); expands to a static array that is used by
302
+ // HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime. This
303
+ // static array must be defined at the same namespace level as the function
304
+ // it is exporting.
305
+ // After being exported, it can be called from other parts of the same source
306
+ // file using HWY_DYNAMIC_DISPATCH(), in particular from a function wrapper
307
+ // like in the following example:
308
+ //
309
+ // #include "hwy/highway.h"
310
+ // HWY_BEFORE_NAMESPACE();
311
+ // namespace skeleton {
312
+ // namespace HWY_NAMESPACE {
313
+ //
314
+ // void MyFunction(int a, char b, const char* c) { ... }
315
+ //
316
+ // // NOLINTNEXTLINE(google-readability-namespace-comments)
317
+ // } // namespace HWY_NAMESPACE
318
+ // } // namespace skeleton
319
+ // HWY_AFTER_NAMESPACE();
320
+ //
321
+ // namespace skeleton {
322
+ // HWY_EXPORT(MyFunction); // Defines the dispatch table in this scope.
323
+ //
324
+ // void MyFunction(int a, char b, const char* c) {
325
+ // return HWY_DYNAMIC_DISPATCH(MyFunction)(a, b, c);
326
+ // }
327
+ // } // namespace skeleton
328
+ //
329
+
330
+ #if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
331
+
332
+ // Simplified version for IDE or the dynamic dispatch case with only one target.
333
+ // This case still uses a table, although of a single element, to provide the
334
+ // same compile error conditions as with the dynamic dispatch case when multiple
335
+ // targets are being compiled.
336
+ #define HWY_EXPORT(FUNC_NAME) \
337
+ HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const \
338
+ HWY_DISPATCH_TABLE(FUNC_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
339
+ #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
340
+ #define HWY_DYNAMIC_POINTER(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
341
+
342
+ #else
343
+
344
+ // Simplified version for MSVC 2017: function pointer instead of table.
345
+ #if HWY_DISPATCH_WORKAROUND
346
+
347
+ #define HWY_EXPORT(FUNC_NAME) \
348
+ static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
349
+ FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
350
+ /* The first entry in the table initializes the global cache and \
351
+ * calls the function from HWY_STATIC_TARGET. */ \
352
+ &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH( \
353
+ FUNC_NAME)))::ChooseAndCall<&HWY_STATIC_DISPATCH(FUNC_NAME)>, \
354
+ HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
355
+ HWY_CHOOSE_FALLBACK(FUNC_NAME), \
356
+ }
357
+
358
+ #else
359
+
360
+ // Dynamic dispatch case with one entry per dynamic target plus the fallback
361
+ // target and the initialization wrapper.
362
+ #define HWY_EXPORT(FUNC_NAME) \
363
+ static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
364
+ FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
365
+ /* The first entry in the table initializes the global cache and \
366
+ * calls the appropriate function. */ \
367
+ &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH( \
368
+ FUNC_NAME)))::ChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>, \
369
+ HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
370
+ HWY_CHOOSE_FALLBACK(FUNC_NAME), \
371
+ }
372
+
373
+ #endif // HWY_DISPATCH_WORKAROUND
374
+
375
+ #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) \
376
+ (*(HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()]))
377
+ #define HWY_DYNAMIC_POINTER(FUNC_NAME) \
378
+ (HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()])
379
+
380
+ #endif // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
381
+
382
+ // DEPRECATED names; please use HWY_HAVE_* instead.
383
+ #define HWY_CAP_INTEGER64 HWY_HAVE_INTEGER64
384
+ #define HWY_CAP_FLOAT16 HWY_HAVE_FLOAT16
385
+ #define HWY_CAP_FLOAT64 HWY_HAVE_FLOAT64
386
+
387
+ } // namespace hwy
388
+
389
+ #endif // HWY_HIGHWAY_INCLUDED
390
+
391
+ //------------------------------------------------------------------------------
392
+
393
+ // NOTE: the following definitions and ops/*.h depend on HWY_TARGET, so we want
394
+ // to include them once per target, which is ensured by the toggle check.
395
+ // Because ops/*.h are included under it, they do not need their own guard.
396
+ #if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
397
+ #ifdef HWY_HIGHWAY_PER_TARGET
398
+ #undef HWY_HIGHWAY_PER_TARGET
399
+ #else
400
+ #define HWY_HIGHWAY_PER_TARGET
401
+ #endif
402
+
403
+ // These define ops inside namespace hwy::HWY_NAMESPACE.
404
+ #if HWY_TARGET == HWY_SSE2 || HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4
405
+ #include "hwy/ops/x86_128-inl.h"
406
+ #elif HWY_TARGET == HWY_AVX2
407
+ #include "hwy/ops/x86_256-inl.h"
408
+ #elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL || \
409
+ HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR
410
+ #include "hwy/ops/x86_512-inl.h"
411
+ #elif HWY_TARGET == HWY_PPC8 || HWY_TARGET == HWY_PPC9 || \
412
+ HWY_TARGET == HWY_PPC10
413
+ #include "hwy/ops/ppc_vsx-inl.h"
414
+ #elif HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES
415
+ #include "hwy/ops/arm_neon-inl.h"
416
+ #elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2 || \
417
+ HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
418
+ #include "hwy/ops/arm_sve-inl.h"
419
+ #elif HWY_TARGET == HWY_WASM_EMU256
420
+ #include "hwy/ops/wasm_256-inl.h"
421
+ #elif HWY_TARGET == HWY_WASM
422
+ #include "hwy/ops/wasm_128-inl.h"
423
+ #elif HWY_TARGET == HWY_RVV
424
+ #include "hwy/ops/rvv-inl.h"
425
+ #elif HWY_TARGET == HWY_EMU128
426
+ #include "hwy/ops/emu128-inl.h"
427
+ #elif HWY_TARGET == HWY_SCALAR
428
+ #include "hwy/ops/scalar-inl.h"
429
+ #else
430
+ #pragma message("HWY_TARGET does not match any known target")
431
+ #endif // HWY_TARGET
432
+
433
+ #include "hwy/ops/generic_ops-inl.h"
434
+
435
+ #endif // HWY_HIGHWAY_PER_TARGET
@@ -0,0 +1,74 @@
1
+ // Pseudo-generated file to handle both cmake & bazel build system.
2
+
3
+ // Initial generation done using cmake code:
4
+ // include(GenerateExportHeader)
5
+ // generate_export_header(hwy EXPORT_MACRO_NAME HWY_DLLEXPORT EXPORT_FILE_NAME
6
+ // hwy/highway_export.h)
7
+ // code reformatted using clang-format --style=Google
8
+
9
+ #ifndef HWY_DLLEXPORT_H
10
+ #define HWY_DLLEXPORT_H
11
+
12
+ #if !defined(HWY_SHARED_DEFINE)
13
+ #define HWY_DLLEXPORT
14
+ #define HWY_CONTRIB_DLLEXPORT
15
+ #define HWY_TEST_DLLEXPORT
16
+ #else // !HWY_SHARED_DEFINE
17
+
18
+ #ifndef HWY_DLLEXPORT
19
+ #if defined(hwy_EXPORTS)
20
+ /* We are building this library */
21
+ #ifdef _WIN32
22
+ #define HWY_DLLEXPORT __declspec(dllexport)
23
+ #else
24
+ #define HWY_DLLEXPORT __attribute__((visibility("default")))
25
+ #endif
26
+ #else // defined(hwy_EXPORTS)
27
+ /* We are using this library */
28
+ #ifdef _WIN32
29
+ #define HWY_DLLEXPORT __declspec(dllimport)
30
+ #else
31
+ #define HWY_DLLEXPORT __attribute__((visibility("default")))
32
+ #endif
33
+ #endif // defined(hwy_EXPORTS)
34
+ #endif // HWY_DLLEXPORT
35
+
36
+ #ifndef HWY_CONTRIB_DLLEXPORT
37
+ #if defined(hwy_contrib_EXPORTS)
38
+ /* We are building this library */
39
+ #ifdef _WIN32
40
+ #define HWY_CONTRIB_DLLEXPORT __declspec(dllexport)
41
+ #else
42
+ #define HWY_CONTRIB_DLLEXPORT __attribute__((visibility("default")))
43
+ #endif
44
+ #else // defined(hwy_contrib_EXPORTS)
45
+ /* We are using this library */
46
+ #ifdef _WIN32
47
+ #define HWY_CONTRIB_DLLEXPORT __declspec(dllimport)
48
+ #else
49
+ #define HWY_CONTRIB_DLLEXPORT __attribute__((visibility("default")))
50
+ #endif
51
+ #endif // defined(hwy_contrib_EXPORTS)
52
+ #endif // HWY_CONTRIB_DLLEXPORT
53
+
54
+ #ifndef HWY_TEST_DLLEXPORT
55
+ #if defined(hwy_test_EXPORTS)
56
+ /* We are building this library */
57
+ #ifdef _WIN32
58
+ #define HWY_TEST_DLLEXPORT __declspec(dllexport)
59
+ #else
60
+ #define HWY_TEST_DLLEXPORT __attribute__((visibility("default")))
61
+ #endif
62
+ #else // defined(hwy_test_EXPORTS)
63
+ /* We are using this library */
64
+ #ifdef _WIN32
65
+ #define HWY_TEST_DLLEXPORT __declspec(dllimport)
66
+ #else
67
+ #define HWY_TEST_DLLEXPORT __attribute__((visibility("default")))
68
+ #endif
69
+ #endif // defined(hwy_test_EXPORTS)
70
+ #endif // HWY_TEST_DLLEXPORT
71
+
72
+ #endif // !HWY_SHARED_DEFINE
73
+
74
+ #endif /* HWY_DLLEXPORT_H */
@@ -0,0 +1,171 @@
1
+ // Copyright 2019 Google LLC
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ //
4
+ // Licensed under the Apache License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License.
6
+ // You may obtain a copy of the License at
7
+ //
8
+ // http://www.apache.org/licenses/LICENSE-2.0
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef HIGHWAY_HWY_NANOBENCHMARK_H_
17
+ #define HIGHWAY_HWY_NANOBENCHMARK_H_
18
+
19
+ // Benchmarks functions of a single integer argument with realistic branch
20
+ // prediction hit rates. Uses a robust estimator to summarize the measurements.
21
+ // The precision is about 0.2%.
22
+ //
23
+ // Examples: see nanobenchmark_test.cc.
24
+ //
25
+ // Background: Microbenchmarks such as http://github.com/google/benchmark
26
+ // can measure elapsed times on the order of a microsecond. Shorter functions
27
+ // are typically measured by repeating them thousands of times and dividing
28
+ // the total elapsed time by this count. Unfortunately, repetition (especially
29
+ // with the same input parameter!) influences the runtime. In time-critical
30
+ // code, it is reasonable to expect warm instruction/data caches and TLBs,
31
+ // but a perfect record of which branches will be taken is unrealistic.
32
+ // Unless the application also repeatedly invokes the measured function with
33
+ // the same parameter, the benchmark is measuring something very different -
34
+ // a best-case result, almost as if the parameter were made a compile-time
35
+ // constant. This may lead to erroneous conclusions about branch-heavy
36
+ // algorithms outperforming branch-free alternatives.
37
+ //
38
+ // Our approach differs in three ways. Adding fences to the timer functions
39
+ // reduces variability due to instruction reordering, improving the timer
40
+ // resolution to about 40 CPU cycles. However, shorter functions must still
41
+ // be invoked repeatedly. For more realistic branch prediction performance,
42
+ // we vary the input parameter according to a user-specified distribution.
43
+ // Thus, instead of VaryInputs(Measure(Repeat(func))), we change the
44
+ // loop nesting to Measure(Repeat(VaryInputs(func))). We also estimate the
45
+ // central tendency of the measurement samples with the "half sample mode",
46
+ // which is more robust to outliers and skewed data than the mean or median.
47
+
48
+ #include <stddef.h>
49
+ #include <stdint.h>
50
+
51
+ #include "hwy/highway_export.h"
52
+ #include "hwy/timer.h"
53
+
54
+ // Enables sanity checks that verify correct operation at the cost of
55
+ // longer benchmark runs.
56
+ #ifndef NANOBENCHMARK_ENABLE_CHECKS
57
+ #define NANOBENCHMARK_ENABLE_CHECKS 0
58
+ #endif
59
+
60
+ #define NANOBENCHMARK_CHECK_ALWAYS(condition) \
61
+ while (!(condition)) { \
62
+ fprintf(stderr, "Nanobenchmark check failed at line %d\n", __LINE__); \
63
+ abort(); \
64
+ }
65
+
66
+ #if NANOBENCHMARK_ENABLE_CHECKS
67
+ #define NANOBENCHMARK_CHECK(condition) NANOBENCHMARK_CHECK_ALWAYS(condition)
68
+ #else
69
+ #define NANOBENCHMARK_CHECK(condition)
70
+ #endif
71
+
72
+ namespace hwy {
73
+
74
+ // Returns 1, but without the compiler knowing what the value is. This prevents
75
+ // optimizing out code.
76
+ HWY_DLLEXPORT int Unpredictable1();
77
+
78
+ // Input influencing the function being measured (e.g. number of bytes to copy).
79
+ using FuncInput = size_t;
80
+
81
+ // "Proof of work" returned by Func to ensure the compiler does not elide it.
82
+ using FuncOutput = uint64_t;
83
+
84
+ // Function to measure: either 1) a captureless lambda or function with two
85
+ // arguments or 2) a lambda with capture, in which case the first argument
86
+ // is reserved for use by MeasureClosure.
87
+ using Func = FuncOutput (*)(const void*, FuncInput);
88
+
89
+ // Internal parameters that determine precision/resolution/measuring time.
90
+ struct Params {
91
+ // Best-case precision, expressed as a divisor of the timer resolution.
92
+ // Larger => more calls to Func and higher precision.
93
+ size_t precision_divisor = 1024;
94
+
95
+ // Ratio between full and subset input distribution sizes. Cannot be less
96
+ // than 2; larger values increase measurement time but more faithfully
97
+ // model the given input distribution.
98
+ size_t subset_ratio = 2;
99
+
100
+ // Together with the estimated Func duration, determines how many times to
101
+ // call Func before checking the sample variability. Larger values increase
102
+ // measurement time, memory/cache use and precision.
103
+ double seconds_per_eval = 4E-3;
104
+
105
+ // The minimum number of samples before estimating the central tendency.
106
+ size_t min_samples_per_eval = 7;
107
+
108
+ // The mode is better than median for estimating the central tendency of
109
+ // skewed/fat-tailed distributions, but it requires sufficient samples
110
+ // relative to the width of half-ranges.
111
+ size_t min_mode_samples = 64;
112
+
113
+ // Maximum permissible variability (= median absolute deviation / center).
114
+ double target_rel_mad = 0.002;
115
+
116
+ // Abort after this many evals without reaching target_rel_mad. This
117
+ // prevents infinite loops.
118
+ size_t max_evals = 9;
119
+
120
+ // Whether to print additional statistics to stdout.
121
+ bool verbose = true;
122
+ };
123
+
124
+ // Measurement result for each unique input.
125
+ struct Result {
126
+ FuncInput input;
127
+
128
+ // Robust estimate (mode or median) of duration.
129
+ float ticks;
130
+
131
+ // Measure of variability (median absolute deviation relative to "ticks").
132
+ float variability;
133
+ };
134
+
135
+ // Precisely measures the number of ticks elapsed when calling "func" with the
136
+ // given inputs, shuffled to ensure realistic branch prediction hit rates.
137
+ //
138
+ // "func" returns a 'proof of work' to ensure its computations are not elided.
139
+ // "arg" is passed to Func, or reserved for internal use by MeasureClosure.
140
+ // "inputs" is an array of "num_inputs" (not necessarily unique) arguments to
141
+ // "func". The values should be chosen to maximize coverage of "func". This
142
+ // represents a distribution, so a value's frequency should reflect its
143
+ // probability in the real application. Order does not matter; for example, a
144
+ // uniform distribution over [0, 4) could be represented as {3,0,2,1}.
145
+ // Returns how many Result were written to "results": one per unique input, or
146
+ // zero if the measurement failed (an error message goes to stderr).
147
+ HWY_DLLEXPORT size_t Measure(Func func, const uint8_t* arg,
148
+ const FuncInput* inputs, size_t num_inputs,
149
+ Result* results, const Params& p = Params());
150
+
151
+ // Calls operator() of the given closure (lambda function).
152
+ template <class Closure>
153
+ static FuncOutput CallClosure(const Closure* f, const FuncInput input) {
154
+ return (*f)(input);
155
+ }
156
+
157
+ // Same as Measure, except "closure" is typically a lambda function of
158
+ // FuncInput -> FuncOutput with a capture list.
159
+ template <class Closure>
160
+ static inline size_t MeasureClosure(const Closure& closure,
161
+ const FuncInput* inputs,
162
+ const size_t num_inputs, Result* results,
163
+ const Params& p = Params()) {
164
+ return Measure(reinterpret_cast<Func>(&CallClosure<Closure>),
165
+ reinterpret_cast<const uint8_t*>(&closure), inputs, num_inputs,
166
+ results, p);
167
+ }
168
+
169
+ } // namespace hwy
170
+
171
+ #endif // HIGHWAY_HWY_NANOBENCHMARK_H_