@novastera-oss/llamarn 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/CMakeLists.txt +47 -21
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/PureCppImpl.cpp +80 -6
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/convert_hf_to_gguf.py +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/CMakeLists.txt +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +99 -364
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.h +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-exp.c +14 -13
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.c +15 -3
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +36 -25
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/main.c +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +80 -7
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.c +6 -0
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +19 -0
- package/cpp/llama.cpp/src/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +22 -0
- package/cpp/llama.cpp/src/llama-arch.h +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +21 -1
- package/cpp/llama.cpp/src/models/models.h +4 -0
- package/cpp/llama.cpp/src/models/rnd1.cpp +126 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6403 -6395
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6366 -6358
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4815 -4809
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
#include <chrono>
|
|
10
10
|
#include <mutex>
|
|
11
11
|
#include <string>
|
|
12
|
+
#include <stdexcept>
|
|
12
13
|
|
|
13
14
|
#ifdef _WIN32
|
|
14
15
|
# include <sal.h>
|
|
@@ -240,6 +241,23 @@ struct ggml_hexagon_session {
|
|
|
240
241
|
uint32_t prof_pkts;
|
|
241
242
|
};
|
|
242
243
|
|
|
244
|
+
static inline void hex_print_op_info(const ggml_tensor * op, ggml_hexagon_session * sess, const uint32_t req_flags) {
|
|
245
|
+
char dims[64 * GGML_MAX_SRC];
|
|
246
|
+
char strides[64 * GGML_MAX_SRC];
|
|
247
|
+
char types[16 * GGML_MAX_SRC];
|
|
248
|
+
char buffs[64 * GGML_MAX_SRC];
|
|
249
|
+
char names[64 * GGML_MAX_SRC];
|
|
250
|
+
|
|
251
|
+
hex_format_op_dims(dims, op);
|
|
252
|
+
hex_format_op_strides(strides, op);
|
|
253
|
+
hex_format_op_types(types, op);
|
|
254
|
+
hex_format_op_buffs(buffs, op);
|
|
255
|
+
hex_format_op_names(names, op);
|
|
256
|
+
|
|
257
|
+
HEX_VERBOSE("ggml-hex: %s %s: %s : %s : %s : %s : %s: flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
|
|
258
|
+
names, dims, types, strides, buffs, req_flags);
|
|
259
|
+
}
|
|
260
|
+
|
|
243
261
|
void ggml_hexagon_session::enqueue(struct htp_general_req &req, struct dspqueue_buffer *bufs, uint32_t n_bufs, bool sync) {
|
|
244
262
|
// Bump pending flag (cleared in the session::flush once we get the responce)
|
|
245
263
|
this->op_pending++; // atomic inc
|
|
@@ -1912,6 +1930,15 @@ static bool hex_supported_dims(const struct ggml_tensor * x, const struct ggml_t
|
|
|
1912
1930
|
return true;
|
|
1913
1931
|
}
|
|
1914
1932
|
|
|
1933
|
+
template <typename... _TTensor>
|
|
1934
|
+
static inline bool hex_supported_buffer(const struct ggml_hexagon_session * sess, _TTensor... tensors) {
|
|
1935
|
+
return ([&]() -> bool {
|
|
1936
|
+
return !tensors || !tensors->buffer ||
|
|
1937
|
+
(ggml_backend_buffer_is_hexagon(tensors->buffer) &&
|
|
1938
|
+
ggml_backend_hexagon_buffer_get_sess(tensors->buffer) == sess);
|
|
1939
|
+
}() && ...);
|
|
1940
|
+
}
|
|
1941
|
+
|
|
1915
1942
|
static bool ggml_hexagon_supported_mul_mat(const struct ggml_hexagon_session * sess, const struct ggml_tensor * dst) {
|
|
1916
1943
|
const struct ggml_tensor * src0 = dst->src[0];
|
|
1917
1944
|
const struct ggml_tensor * src1 = dst->src[1];
|
|
@@ -1959,16 +1986,7 @@ static bool ggml_hexagon_supported_mul_mat(const struct ggml_hexagon_session * s
|
|
|
1959
1986
|
}
|
|
1960
1987
|
|
|
1961
1988
|
// src0 & src1 & dst must be mapped to the same session
|
|
1962
|
-
if (src0
|
|
1963
|
-
(!ggml_backend_buffer_is_hexagon(src0->buffer) || ggml_backend_hexagon_buffer_get_sess(src0->buffer) != sess)) {
|
|
1964
|
-
return false;
|
|
1965
|
-
}
|
|
1966
|
-
if (src1->buffer &&
|
|
1967
|
-
(!ggml_backend_buffer_is_hexagon(src1->buffer) || ggml_backend_hexagon_buffer_get_sess(src1->buffer) != sess)) {
|
|
1968
|
-
return false;
|
|
1969
|
-
}
|
|
1970
|
-
if (dst->buffer &&
|
|
1971
|
-
(!ggml_backend_buffer_is_hexagon(dst->buffer) || ggml_backend_hexagon_buffer_get_sess(dst->buffer) != sess)) {
|
|
1989
|
+
if (!hex_supported_buffer(sess, src0, src1, dst)) {
|
|
1972
1990
|
return false;
|
|
1973
1991
|
}
|
|
1974
1992
|
|
|
@@ -2016,20 +2034,7 @@ static bool ggml_hexagon_supported_mul_mat_id(const struct ggml_hexagon_session
|
|
|
2016
2034
|
|
|
2017
2035
|
// src0 (weights) must be repacked and mapped to the same session
|
|
2018
2036
|
// src1 & sr2 & dst must be mapped to the same session
|
|
2019
|
-
if (src0
|
|
2020
|
-
(!ggml_backend_buffer_is_hexagon(src0->buffer) || ggml_backend_hexagon_buffer_get_sess(src0->buffer) != sess)) {
|
|
2021
|
-
return false;
|
|
2022
|
-
}
|
|
2023
|
-
if (src1->buffer &&
|
|
2024
|
-
(!ggml_backend_buffer_is_hexagon(src1->buffer) || ggml_backend_hexagon_buffer_get_sess(src1->buffer) != sess)) {
|
|
2025
|
-
return false;
|
|
2026
|
-
}
|
|
2027
|
-
if (src2->buffer &&
|
|
2028
|
-
(!ggml_backend_buffer_is_hexagon(src2->buffer) || ggml_backend_hexagon_buffer_get_sess(src2->buffer) != sess)) {
|
|
2029
|
-
return false;
|
|
2030
|
-
}
|
|
2031
|
-
if (dst->buffer &&
|
|
2032
|
-
(!ggml_backend_buffer_is_hexagon(dst->buffer) || ggml_backend_hexagon_buffer_get_sess(dst->buffer) != sess)) {
|
|
2037
|
+
if (!hex_supported_buffer(sess, src0, src1, src2, dst)) {
|
|
2033
2038
|
return false;
|
|
2034
2039
|
}
|
|
2035
2040
|
|
|
@@ -2063,16 +2068,7 @@ static bool ggml_hexagon_supported_binary(const struct ggml_hexagon_session * se
|
|
|
2063
2068
|
}
|
|
2064
2069
|
|
|
2065
2070
|
// src0, src1 & dst must be mapped to the same session
|
|
2066
|
-
if (src0
|
|
2067
|
-
(!ggml_backend_buffer_is_hexagon(src0->buffer) || ggml_backend_hexagon_buffer_get_sess(src0->buffer) != sess)) {
|
|
2068
|
-
return false;
|
|
2069
|
-
}
|
|
2070
|
-
if (src1->buffer &&
|
|
2071
|
-
(!ggml_backend_buffer_is_hexagon(src1->buffer) || ggml_backend_hexagon_buffer_get_sess(src1->buffer) != sess)) {
|
|
2072
|
-
return false;
|
|
2073
|
-
}
|
|
2074
|
-
if (dst->buffer &&
|
|
2075
|
-
(!ggml_backend_buffer_is_hexagon(dst->buffer) || ggml_backend_hexagon_buffer_get_sess(dst->buffer) != sess)) {
|
|
2071
|
+
if (!hex_supported_buffer(sess, src0, src1, dst)) {
|
|
2076
2072
|
return false;
|
|
2077
2073
|
}
|
|
2078
2074
|
|
|
@@ -2104,20 +2100,7 @@ static bool ggml_hexagon_supported_add_id(const struct ggml_hexagon_session * se
|
|
|
2104
2100
|
}
|
|
2105
2101
|
|
|
2106
2102
|
// src0, src1 & dst must be mapped to the same session
|
|
2107
|
-
if (src0
|
|
2108
|
-
(!ggml_backend_buffer_is_hexagon(src0->buffer) || ggml_backend_hexagon_buffer_get_sess(src0->buffer) != sess)) {
|
|
2109
|
-
return false;
|
|
2110
|
-
}
|
|
2111
|
-
if (src1->buffer &&
|
|
2112
|
-
(!ggml_backend_buffer_is_hexagon(src1->buffer) || ggml_backend_hexagon_buffer_get_sess(src1->buffer) != sess)) {
|
|
2113
|
-
return false;
|
|
2114
|
-
}
|
|
2115
|
-
if (src2->buffer &&
|
|
2116
|
-
(!ggml_backend_buffer_is_hexagon(src2->buffer) || ggml_backend_hexagon_buffer_get_sess(src2->buffer) != sess)) {
|
|
2117
|
-
return false;
|
|
2118
|
-
}
|
|
2119
|
-
if (dst->buffer &&
|
|
2120
|
-
(!ggml_backend_buffer_is_hexagon(dst->buffer) || ggml_backend_hexagon_buffer_get_sess(dst->buffer) != sess)) {
|
|
2103
|
+
if (!hex_supported_buffer(sess, src0, src1, src2, dst)) {
|
|
2121
2104
|
return false;
|
|
2122
2105
|
}
|
|
2123
2106
|
|
|
@@ -2144,12 +2127,7 @@ static bool ggml_hexagon_supported_unary(const struct ggml_hexagon_session * ses
|
|
|
2144
2127
|
}
|
|
2145
2128
|
|
|
2146
2129
|
// src0 & dst must be mapped to the same session
|
|
2147
|
-
if (src0
|
|
2148
|
-
(!ggml_backend_buffer_is_hexagon(src0->buffer) || ggml_backend_hexagon_buffer_get_sess(src0->buffer) != sess)) {
|
|
2149
|
-
return false;
|
|
2150
|
-
}
|
|
2151
|
-
if (dst->buffer &&
|
|
2152
|
-
(!ggml_backend_buffer_is_hexagon(dst->buffer) || ggml_backend_hexagon_buffer_get_sess(dst->buffer) != sess)) {
|
|
2130
|
+
if (!hex_supported_buffer(sess, src0, dst)) {
|
|
2153
2131
|
return false;
|
|
2154
2132
|
}
|
|
2155
2133
|
|
|
@@ -2186,16 +2164,7 @@ static bool ggml_hexagon_supported_activations(const struct ggml_hexagon_session
|
|
|
2186
2164
|
}
|
|
2187
2165
|
|
|
2188
2166
|
// src0, src1 & dst must be mapped to the same session
|
|
2189
|
-
if (src0
|
|
2190
|
-
(!ggml_backend_buffer_is_hexagon(src0->buffer) || ggml_backend_hexagon_buffer_get_sess(src0->buffer) != sess)) {
|
|
2191
|
-
return false;
|
|
2192
|
-
}
|
|
2193
|
-
if (src1 && src1->buffer &&
|
|
2194
|
-
(!ggml_backend_buffer_is_hexagon(src1->buffer) || ggml_backend_hexagon_buffer_get_sess(src1->buffer) != sess)) {
|
|
2195
|
-
return false;
|
|
2196
|
-
}
|
|
2197
|
-
if (dst->buffer &&
|
|
2198
|
-
(!ggml_backend_buffer_is_hexagon(dst->buffer) || ggml_backend_hexagon_buffer_get_sess(dst->buffer) != sess)) {
|
|
2167
|
+
if (!hex_supported_buffer(sess, src0, src1, dst)) {
|
|
2199
2168
|
return false;
|
|
2200
2169
|
}
|
|
2201
2170
|
|
|
@@ -2248,16 +2217,7 @@ static bool ggml_hexagon_supported_softmax(const struct ggml_hexagon_session * s
|
|
|
2248
2217
|
}
|
|
2249
2218
|
|
|
2250
2219
|
// src0, src1 & dst must be mapped to the same session
|
|
2251
|
-
if (src0
|
|
2252
|
-
(!ggml_backend_buffer_is_hexagon(src0->buffer) || ggml_backend_hexagon_buffer_get_sess(src0->buffer) != sess)) {
|
|
2253
|
-
return false;
|
|
2254
|
-
}
|
|
2255
|
-
if (src1 && src1->buffer &&
|
|
2256
|
-
(!ggml_backend_buffer_is_hexagon(src1->buffer) || ggml_backend_hexagon_buffer_get_sess(src1->buffer) != sess)) {
|
|
2257
|
-
return false;
|
|
2258
|
-
}
|
|
2259
|
-
if (dst->buffer &&
|
|
2260
|
-
(!ggml_backend_buffer_is_hexagon(dst->buffer) || ggml_backend_hexagon_buffer_get_sess(dst->buffer) != sess)) {
|
|
2220
|
+
if (!hex_supported_buffer(sess, src0, src1, dst)) {
|
|
2261
2221
|
return false;
|
|
2262
2222
|
}
|
|
2263
2223
|
|
|
@@ -2269,7 +2229,7 @@ static bool ggml_hexagon_supported_rope(const struct ggml_hexagon_session * sess
|
|
|
2269
2229
|
|
|
2270
2230
|
int mode = op_params[2];
|
|
2271
2231
|
|
|
2272
|
-
if ((mode &
|
|
2232
|
+
if ((mode & GGML_ROPE_TYPE_MROPE) || (mode & GGML_ROPE_TYPE_VISION)) {
|
|
2273
2233
|
return false;
|
|
2274
2234
|
}
|
|
2275
2235
|
if (mode & 1) {
|
|
@@ -2312,20 +2272,7 @@ static bool ggml_hexagon_supported_rope(const struct ggml_hexagon_session * sess
|
|
|
2312
2272
|
}
|
|
2313
2273
|
|
|
2314
2274
|
// src0, src1, src2 & dst must be mapped to the same session
|
|
2315
|
-
if (src0
|
|
2316
|
-
(!ggml_backend_buffer_is_hexagon(src0->buffer) || ggml_backend_hexagon_buffer_get_sess(src0->buffer) != sess)) {
|
|
2317
|
-
return false;
|
|
2318
|
-
}
|
|
2319
|
-
if (src1->buffer &&
|
|
2320
|
-
(!ggml_backend_buffer_is_hexagon(src1->buffer) || ggml_backend_hexagon_buffer_get_sess(src1->buffer) != sess)) {
|
|
2321
|
-
return false;
|
|
2322
|
-
}
|
|
2323
|
-
if (src2 && src2->buffer &&
|
|
2324
|
-
(!ggml_backend_buffer_is_hexagon(src2->buffer) || ggml_backend_hexagon_buffer_get_sess(src2->buffer) != sess)) {
|
|
2325
|
-
return false;
|
|
2326
|
-
}
|
|
2327
|
-
if (dst->buffer &&
|
|
2328
|
-
(!ggml_backend_buffer_is_hexagon(dst->buffer) || ggml_backend_hexagon_buffer_get_sess(dst->buffer) != sess)) {
|
|
2275
|
+
if (!hex_supported_buffer(sess, src0, src1, src2, dst)) {
|
|
2329
2276
|
return false;
|
|
2330
2277
|
}
|
|
2331
2278
|
|
|
@@ -2346,6 +2293,26 @@ static void init_htp_tensor(htp_tensor * h, const ggml_tensor * t) {
|
|
|
2346
2293
|
h->nb[3] = t->nb[3];
|
|
2347
2294
|
}
|
|
2348
2295
|
|
|
2296
|
+
static size_t dspqueue_buffers_init(dspqueue_buffer * buf, const ggml_tensor * t, bool flush_host, bool flush_htp) {
|
|
2297
|
+
if (!t) {
|
|
2298
|
+
return 0;
|
|
2299
|
+
}
|
|
2300
|
+
|
|
2301
|
+
memset(buf, 0, sizeof(*buf));
|
|
2302
|
+
auto tensor_buf = static_cast<ggml_backend_hexagon_buffer_context *>(t->buffer->context);
|
|
2303
|
+
buf->fd = tensor_buf->fd;
|
|
2304
|
+
buf->ptr = t->data;
|
|
2305
|
+
buf->offset = (uint8_t *) t->data - tensor_buf->base;
|
|
2306
|
+
buf->size = ggml_nbytes(t);
|
|
2307
|
+
buf->flags = (flush_host ? DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER : 0); // Flush CPU
|
|
2308
|
+
buf->flags |= (flush_htp ? DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT : 0); // Invalidate DSP
|
|
2309
|
+
return 1;
|
|
2310
|
+
}
|
|
2311
|
+
|
|
2312
|
+
static ggml_hexagon_session * get_session_from_tensor(const ggml_tensor * t) {
|
|
2313
|
+
return static_cast<ggml_backend_hexagon_buffer_context *>(t->buffer->context)->sess;
|
|
2314
|
+
}
|
|
2315
|
+
|
|
2349
2316
|
static void hex_dump_dspbuf(const struct ggml_tensor * t, const dspqueue_buffer * d) {
|
|
2350
2317
|
auto buf = static_cast<ggml_backend_hexagon_buffer_context *>(t->buffer->context);
|
|
2351
2318
|
auto sess = buf->sess;
|
|
@@ -2360,10 +2327,6 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
|
|
|
2360
2327
|
const struct ggml_tensor * src1 = op->src[1];
|
|
2361
2328
|
const struct ggml_tensor * dst = op;
|
|
2362
2329
|
|
|
2363
|
-
auto src0_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src0->buffer->context);
|
|
2364
|
-
auto src1_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src1->buffer->context);
|
|
2365
|
-
auto dst_buf = static_cast<ggml_backend_hexagon_buffer_context *>(dst->buffer->context);
|
|
2366
|
-
|
|
2367
2330
|
uint64_t t1, t2;
|
|
2368
2331
|
t1 = ggml_time_us();
|
|
2369
2332
|
|
|
@@ -2385,55 +2348,27 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
|
|
|
2385
2348
|
}
|
|
2386
2349
|
|
|
2387
2350
|
dspqueue_buffer bufs[3];
|
|
2388
|
-
memset(bufs, 0, sizeof(bufs));
|
|
2389
2351
|
|
|
2390
2352
|
// First buffer Weights.
|
|
2391
2353
|
// The content is static, there is no need to do any cache management
|
|
2392
|
-
bufs
|
|
2393
|
-
bufs[0].ptr = src0->data;
|
|
2394
|
-
bufs[0].offset = (uint8_t *) src0->data - src0_buf->base;
|
|
2395
|
-
bufs[0].size = ggml_nbytes(src0);
|
|
2396
|
-
bufs[0].flags = 0;
|
|
2354
|
+
dspqueue_buffers_init(bufs, src0, false, false);
|
|
2397
2355
|
|
|
2398
2356
|
// Second buffer Input Activations. This is a buffer that the CPU
|
|
2399
2357
|
// writes and the DSP reads, so we'll need to flush CPU caches and
|
|
2400
2358
|
// invalidate DSP ones. On platforms with I/O coherency support the
|
|
2401
2359
|
// framework will automatically skip cache operations where possible.
|
|
2402
|
-
bufs[1]
|
|
2403
|
-
bufs[1].ptr = src1->data;
|
|
2404
|
-
bufs[1].offset = (uint8_t *) src1->data - src1_buf->base;
|
|
2405
|
-
bufs[1].size = ggml_nbytes(src1);
|
|
2406
|
-
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
2407
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
|
|
2360
|
+
dspqueue_buffers_init(&bufs[1], src1, true, true);
|
|
2408
2361
|
|
|
2409
2362
|
// Third buffer Output Activations. We'll handle DSP
|
|
2410
2363
|
// cache maintenance in the response message but need to flush
|
|
2411
2364
|
// CPU caches to ensure any previously written dirty lines are
|
|
2412
2365
|
// written out before writes from the DSP start.
|
|
2413
|
-
bufs[2]
|
|
2414
|
-
bufs[2].ptr = dst->data;
|
|
2415
|
-
bufs[2].offset = (uint8_t *) dst->data - dst_buf->base;
|
|
2416
|
-
bufs[2].size = ggml_nbytes(dst);
|
|
2417
|
-
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
|
|
2366
|
+
dspqueue_buffers_init(&bufs[2], dst, true, false);
|
|
2418
2367
|
|
|
2419
|
-
|
|
2420
|
-
auto sess = src0_buf->sess;
|
|
2368
|
+
auto * sess = get_session_from_tensor(src0);
|
|
2421
2369
|
|
|
2422
2370
|
if (opt_verbose) {
|
|
2423
|
-
|
|
2424
|
-
char strides[64 * GGML_MAX_SRC];
|
|
2425
|
-
char types[16 * GGML_MAX_SRC];
|
|
2426
|
-
char buffs[64 * GGML_MAX_SRC];
|
|
2427
|
-
char names[64 * GGML_MAX_SRC];
|
|
2428
|
-
|
|
2429
|
-
hex_format_op_dims(dims, op);
|
|
2430
|
-
hex_format_op_strides(strides, op);
|
|
2431
|
-
hex_format_op_types(types, op);
|
|
2432
|
-
hex_format_op_buffs(buffs, op);
|
|
2433
|
-
hex_format_op_names(names, op);
|
|
2434
|
-
|
|
2435
|
-
HEX_VERBOSE("ggml-hex: %s %s: %s : %s : %s : %s : %s: flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
|
|
2436
|
-
names, dims, types, strides, buffs, req.flags);
|
|
2371
|
+
hex_print_op_info(op, sess, req.flags);
|
|
2437
2372
|
if (opt_verbose > 1) {
|
|
2438
2373
|
hex_dump_dspbuf(src0, &bufs[0]);
|
|
2439
2374
|
hex_dump_dspbuf(src1, &bufs[1]);
|
|
@@ -2463,11 +2398,6 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
|
|
|
2463
2398
|
const struct ggml_tensor * src2 = op->src[2];
|
|
2464
2399
|
const struct ggml_tensor * dst = op;
|
|
2465
2400
|
|
|
2466
|
-
auto src0_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src0->buffer->context);
|
|
2467
|
-
auto src1_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src1->buffer->context);
|
|
2468
|
-
auto src2_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src2->buffer->context);
|
|
2469
|
-
auto dst_buf = static_cast<ggml_backend_hexagon_buffer_context *>(dst->buffer->context);
|
|
2470
|
-
|
|
2471
2401
|
uint64_t t1, t2;
|
|
2472
2402
|
t1 = ggml_time_us();
|
|
2473
2403
|
|
|
@@ -2490,66 +2420,32 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
|
|
|
2490
2420
|
}
|
|
2491
2421
|
|
|
2492
2422
|
dspqueue_buffer bufs[4];
|
|
2493
|
-
memset(bufs, 0, sizeof(bufs));
|
|
2494
|
-
|
|
2495
2423
|
// First buffer Weights.
|
|
2496
2424
|
// The content is static, there is no need to do any cache management
|
|
2497
|
-
bufs
|
|
2498
|
-
bufs[0].ptr = src0->data;
|
|
2499
|
-
bufs[0].offset = (uint8_t *) src0->data - src0_buf->base;
|
|
2500
|
-
bufs[0].size = ggml_nbytes(src0);
|
|
2501
|
-
bufs[0].flags = 0;
|
|
2425
|
+
dspqueue_buffers_init(bufs, src0, false, false);
|
|
2502
2426
|
|
|
2503
2427
|
// Second buffer Input Activations. This is a buffer that the CPU
|
|
2504
2428
|
// writes and the DSP reads, so we'll need to flush CPU caches and
|
|
2505
2429
|
// invalidate DSP ones. On platforms with I/O coherency support the
|
|
2506
2430
|
// framework will automatically skip cache operations where possible.
|
|
2507
|
-
bufs[1]
|
|
2508
|
-
bufs[1].ptr = src1->data;
|
|
2509
|
-
bufs[1].offset = (uint8_t *) src1->data - src1_buf->base;
|
|
2510
|
-
bufs[1].size = ggml_nbytes(src1);
|
|
2511
|
-
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
2512
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
|
|
2431
|
+
dspqueue_buffers_init(&bufs[1], src1, true, true);
|
|
2513
2432
|
|
|
2514
2433
|
// Third buffer expert IDs. This is a buffer that the CPU
|
|
2515
2434
|
// writes and the DSP reads, so we'll need to flush CPU caches and
|
|
2516
2435
|
// invalidate DSP ones. On platforms with I/O coherency support the
|
|
2517
2436
|
// framework will automatically skip cache operations where possible.
|
|
2518
|
-
bufs[2]
|
|
2519
|
-
bufs[2].ptr = src2->data;
|
|
2520
|
-
bufs[2].offset = (uint8_t *) src2->data - src2_buf->base;
|
|
2521
|
-
bufs[2].size = ggml_nbytes(src2);
|
|
2522
|
-
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
2523
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
|
|
2437
|
+
dspqueue_buffers_init(&bufs[2], src2, true, true);
|
|
2524
2438
|
|
|
2525
2439
|
// Forth buffer Output Activations. We'll handle DSP
|
|
2526
2440
|
// cache maintenance in the response message but need to flush
|
|
2527
2441
|
// CPU caches to ensure any previously written dirty lines are
|
|
2528
2442
|
// written out before writes from the DSP start.
|
|
2529
|
-
bufs[3]
|
|
2530
|
-
bufs[3].ptr = dst->data;
|
|
2531
|
-
bufs[3].offset = (uint8_t *) dst->data - dst_buf->base;
|
|
2532
|
-
bufs[3].size = ggml_nbytes(dst);
|
|
2533
|
-
bufs[3].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
|
|
2443
|
+
dspqueue_buffers_init(&bufs[3], dst, true, false);
|
|
2534
2444
|
|
|
2535
|
-
|
|
2536
|
-
auto sess = src0_buf->sess;
|
|
2445
|
+
auto * sess = get_session_from_tensor(src0);
|
|
2537
2446
|
|
|
2538
2447
|
if (opt_verbose) {
|
|
2539
|
-
|
|
2540
|
-
char strides[64 * GGML_MAX_SRC];
|
|
2541
|
-
char types[16 * GGML_MAX_SRC];
|
|
2542
|
-
char buffs[64 * GGML_MAX_SRC];
|
|
2543
|
-
char names[64 * GGML_MAX_SRC];
|
|
2544
|
-
|
|
2545
|
-
hex_format_op_dims(dims, op);
|
|
2546
|
-
hex_format_op_types(types, op);
|
|
2547
|
-
hex_format_op_buffs(buffs, op);
|
|
2548
|
-
hex_format_op_names(names, op);
|
|
2549
|
-
|
|
2550
|
-
HEX_VERBOSE("ggml-hex: %s %s: %s : %s : %s : %s : %s: flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
|
|
2551
|
-
names, dims, types, strides, buffs, req.flags);
|
|
2552
|
-
|
|
2448
|
+
hex_print_op_info(op, sess, req.flags);
|
|
2553
2449
|
if (opt_verbose > 1) {
|
|
2554
2450
|
hex_dump_dspbuf(src0, &bufs[0]);
|
|
2555
2451
|
hex_dump_dspbuf(src1, &bufs[1]);
|
|
@@ -2581,10 +2477,6 @@ static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
|
|
|
2581
2477
|
const struct ggml_tensor * src1 = node->src[1];
|
|
2582
2478
|
const struct ggml_tensor * dst = node;
|
|
2583
2479
|
|
|
2584
|
-
auto src0_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src0->buffer->context);
|
|
2585
|
-
auto src1_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src1->buffer->context);
|
|
2586
|
-
auto dst_buf = static_cast<ggml_backend_hexagon_buffer_context *>(dst->buffer->context);
|
|
2587
|
-
|
|
2588
2480
|
uint64_t t1 = 0;
|
|
2589
2481
|
uint64_t t2 = 0;
|
|
2590
2482
|
|
|
@@ -2621,60 +2513,30 @@ static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
|
|
|
2621
2513
|
init_htp_tensor(&req.dst, dst);
|
|
2622
2514
|
|
|
2623
2515
|
dspqueue_buffer bufs[3];
|
|
2624
|
-
memset(bufs, 0, sizeof(bufs));
|
|
2625
|
-
|
|
2626
2516
|
// First buffer = First Operand of Binary op
|
|
2627
2517
|
// This is a buffer that the CPU writes and the DSP reads, so we'll
|
|
2628
2518
|
// need to flush CPU caches and invalidate DSP ones. On platforms
|
|
2629
2519
|
// with I/O coherency support the framework will automatically skip
|
|
2630
2520
|
// cache operations where possible.
|
|
2631
|
-
bufs
|
|
2632
|
-
bufs[0].ptr = src0->data;
|
|
2633
|
-
bufs[0].offset = (uint8_t *) src0->data - src0_buf->base;
|
|
2634
|
-
bufs[0].size = ggml_nbytes(src0);
|
|
2635
|
-
bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
2636
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP;
|
|
2521
|
+
dspqueue_buffers_init(bufs, src0, true, true);
|
|
2637
2522
|
|
|
2638
2523
|
// Second buffer = Second Operand of Binary op
|
|
2639
2524
|
// This is a buffer that the CPU writes and the DSP reads, so we'll
|
|
2640
2525
|
// need to flush CPU caches and invalidate DSP ones. On platforms
|
|
2641
2526
|
// with I/O coherency support the framework will automatically skip
|
|
2642
2527
|
// cache operations where possible.
|
|
2643
|
-
bufs[1]
|
|
2644
|
-
bufs[1].ptr = src1->data;
|
|
2645
|
-
bufs[1].offset = (uint8_t *) src1->data - src1_buf->base;
|
|
2646
|
-
bufs[1].size = ggml_nbytes(src1);
|
|
2647
|
-
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
2648
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
|
|
2528
|
+
dspqueue_buffers_init(&bufs[1], src1, true, true);
|
|
2649
2529
|
|
|
2650
2530
|
// Third buffer = Output Activations. We'll handle DSP
|
|
2651
2531
|
// cache maintenance in the response message but need to flush
|
|
2652
2532
|
// CPU caches to ensure any previously written dirty lines are
|
|
2653
2533
|
// written out before writes from the DSP start.
|
|
2654
|
-
bufs[2]
|
|
2655
|
-
bufs[2].ptr = dst->data;
|
|
2656
|
-
bufs[2].offset = (uint8_t *) dst->data - dst_buf->base;
|
|
2657
|
-
bufs[2].size = ggml_nbytes(dst);
|
|
2658
|
-
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
|
|
2534
|
+
dspqueue_buffers_init(&bufs[2], dst, true, false);
|
|
2659
2535
|
|
|
2660
|
-
|
|
2661
|
-
ggml_hexagon_session * sess = src0_buf->sess;
|
|
2536
|
+
auto * sess = get_session_from_tensor(src0);
|
|
2662
2537
|
|
|
2663
2538
|
if (opt_verbose) {
|
|
2664
|
-
|
|
2665
|
-
char strides[16 * GGML_MAX_SRC];
|
|
2666
|
-
char types[16 * GGML_MAX_SRC];
|
|
2667
|
-
char buffs[64 * GGML_MAX_SRC];
|
|
2668
|
-
char names[64 * GGML_MAX_SRC];
|
|
2669
|
-
|
|
2670
|
-
hex_format_op_dims(dims, op);
|
|
2671
|
-
hex_format_op_strides(strides, op);
|
|
2672
|
-
hex_format_op_types(types, op);
|
|
2673
|
-
hex_format_op_buffs(buffs, op);
|
|
2674
|
-
hex_format_op_names(names, op);
|
|
2675
|
-
|
|
2676
|
-
HEX_VERBOSE("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(),
|
|
2677
|
-
ggml_op_name(node->op), names, dims, types, strides, buffs, req.flags);
|
|
2539
|
+
hex_print_op_info(op, sess, req.flags);
|
|
2678
2540
|
if (opt_verbose > 1) {
|
|
2679
2541
|
hex_dump_dspbuf(src0, &bufs[0]);
|
|
2680
2542
|
hex_dump_dspbuf(src1, &bufs[1]);
|
|
@@ -2705,11 +2567,6 @@ static void ggml_hexagon_add_id(const struct ggml_tensor * op, uint32_t flags) {
|
|
|
2705
2567
|
const struct ggml_tensor * src2 = node->src[2];
|
|
2706
2568
|
const struct ggml_tensor * dst = node;
|
|
2707
2569
|
|
|
2708
|
-
auto src0_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src0->buffer->context);
|
|
2709
|
-
auto src1_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src1->buffer->context);
|
|
2710
|
-
auto src2_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src2->buffer->context);
|
|
2711
|
-
auto dst_buf = static_cast<ggml_backend_hexagon_buffer_context *>(dst->buffer->context);
|
|
2712
|
-
|
|
2713
2570
|
uint64_t t1 = 0;
|
|
2714
2571
|
uint64_t t2 = 0;
|
|
2715
2572
|
|
|
@@ -2741,58 +2598,19 @@ static void ggml_hexagon_add_id(const struct ggml_tensor * op, uint32_t flags) {
|
|
|
2741
2598
|
init_htp_tensor(&req.dst, dst);
|
|
2742
2599
|
|
|
2743
2600
|
dspqueue_buffer bufs[4];
|
|
2744
|
-
memset(bufs, 0, sizeof(bufs));
|
|
2745
|
-
|
|
2746
2601
|
// First buffer = input activations
|
|
2747
|
-
bufs
|
|
2748
|
-
bufs[0].ptr = src0->data;
|
|
2749
|
-
bufs[0].offset = (uint8_t *) src0->data - src0_buf->base;
|
|
2750
|
-
bufs[0].size = ggml_nbytes(src0);
|
|
2751
|
-
bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
2752
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP;
|
|
2753
|
-
|
|
2602
|
+
dspqueue_buffers_init(bufs, src0, true, true);
|
|
2754
2603
|
// Second buffer = experts bias
|
|
2755
|
-
bufs[1]
|
|
2756
|
-
bufs[1].ptr = src1->data;
|
|
2757
|
-
bufs[1].offset = (uint8_t *) src1->data - src1_buf->base;
|
|
2758
|
-
bufs[1].size = ggml_nbytes(src1);
|
|
2759
|
-
bufs[1].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
2760
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
|
|
2761
|
-
|
|
2604
|
+
dspqueue_buffers_init(&bufs[1], src1, true, true);
|
|
2762
2605
|
// Third buffer = activated experts
|
|
2763
|
-
bufs[2]
|
|
2764
|
-
bufs[2].ptr = src2->data;
|
|
2765
|
-
bufs[2].offset = (uint8_t *) src2->data - src2_buf->base;
|
|
2766
|
-
bufs[2].size = ggml_nbytes(src2);
|
|
2767
|
-
bufs[2].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
2768
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
|
|
2769
|
-
|
|
2606
|
+
dspqueue_buffers_init(&bufs[2], src2, true, true);
|
|
2770
2607
|
// Forth buffer = output activations
|
|
2771
|
-
bufs[3]
|
|
2772
|
-
bufs[3].ptr = dst->data;
|
|
2773
|
-
bufs[3].offset = (uint8_t *) dst->data - dst_buf->base;
|
|
2774
|
-
bufs[3].size = ggml_nbytes(dst);
|
|
2775
|
-
bufs[3].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
|
|
2608
|
+
dspqueue_buffers_init(&bufs[3], dst, true, true);
|
|
2776
2609
|
|
|
2777
|
-
|
|
2778
|
-
ggml_hexagon_session * sess = src0_buf->sess;
|
|
2610
|
+
auto * sess = get_session_from_tensor(src0);
|
|
2779
2611
|
|
|
2780
2612
|
if (opt_verbose) {
|
|
2781
|
-
|
|
2782
|
-
char strides[16 * GGML_MAX_SRC];
|
|
2783
|
-
char types[16 * GGML_MAX_SRC];
|
|
2784
|
-
char buffs[64 * GGML_MAX_SRC];
|
|
2785
|
-
char names[64 * GGML_MAX_SRC];
|
|
2786
|
-
|
|
2787
|
-
hex_format_op_dims(dims, op);
|
|
2788
|
-
hex_format_op_strides(strides, op);
|
|
2789
|
-
hex_format_op_types(types, op);
|
|
2790
|
-
hex_format_op_buffs(buffs, op);
|
|
2791
|
-
hex_format_op_names(names, op);
|
|
2792
|
-
|
|
2793
|
-
HEX_VERBOSE("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(),
|
|
2794
|
-
ggml_op_name(node->op), names, dims, types, strides, buffs, req.flags);
|
|
2795
|
-
|
|
2613
|
+
hex_print_op_info(op, sess, req.flags);
|
|
2796
2614
|
if (opt_verbose > 1) {
|
|
2797
2615
|
hex_dump_dspbuf(src0, &bufs[0]);
|
|
2798
2616
|
hex_dump_dspbuf(src1, &bufs[1]);
|
|
@@ -2886,71 +2704,33 @@ static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
|
|
|
2886
2704
|
}
|
|
2887
2705
|
|
|
2888
2706
|
dspqueue_buffer bufs[3];
|
|
2889
|
-
int n_bufs = 0;
|
|
2890
|
-
|
|
2891
|
-
memset(bufs, 0, sizeof(bufs));
|
|
2892
2707
|
|
|
2893
2708
|
// First buffer = Only Operand of Unary op
|
|
2894
2709
|
// This is a buffer that the CPU writes and the DSP reads, so we'll
|
|
2895
2710
|
// need to flush CPU caches and invalidate DSP ones. On platforms
|
|
2896
2711
|
// with I/O coherency support the framework will automatically skip
|
|
2897
2712
|
// cache operations where possible.
|
|
2898
|
-
|
|
2899
|
-
bufs[n_bufs].fd = src0_buf->fd;
|
|
2900
|
-
bufs[n_bufs].ptr = src0->data;
|
|
2901
|
-
bufs[n_bufs].offset = (uint8_t *) src0->data - src0_buf->base;
|
|
2902
|
-
bufs[n_bufs].size = ggml_nbytes(src0);
|
|
2903
|
-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
2904
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP;
|
|
2905
|
-
++n_bufs;
|
|
2713
|
+
size_t n_bufs = dspqueue_buffers_init(bufs, src0, true, true);
|
|
2906
2714
|
|
|
2907
|
-
|
|
2908
|
-
|
|
2909
|
-
|
|
2910
|
-
|
|
2911
|
-
|
|
2912
|
-
|
|
2913
|
-
auto src1_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src1->buffer->context);
|
|
2914
|
-
bufs[n_bufs].fd = src1_buf->fd;
|
|
2915
|
-
bufs[n_bufs].ptr = src1->data;
|
|
2916
|
-
bufs[n_bufs].offset = (uint8_t *) src1->data - src1_buf->base;
|
|
2917
|
-
bufs[n_bufs].size = ggml_nbytes(src1);
|
|
2918
|
-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
2919
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
|
|
2920
|
-
++n_bufs;
|
|
2921
|
-
}
|
|
2715
|
+
// Second buffer(nullable) = Second Operand of Binary op
|
|
2716
|
+
// This is a buffer that the CPU writes and the DSP reads, so we'll
|
|
2717
|
+
// need to flush CPU caches and invalidate DSP ones. On platforms
|
|
2718
|
+
// with I/O coherency support the framework will automatically skip
|
|
2719
|
+
// cache operations where possible.
|
|
2720
|
+
n_bufs += dspqueue_buffers_init(&bufs[n_bufs], src1, true, true);
|
|
2922
2721
|
|
|
2923
2722
|
// Second or third buffer = Output Activations. We'll handle DSP
|
|
2924
2723
|
// Second buffer = Output Activations. We'll handle DSP
|
|
2925
2724
|
// cache maintenance in the response message but need to flush
|
|
2926
2725
|
// CPU caches to ensure any previously written dirty lines are
|
|
2927
2726
|
// written out before writes from the DSP start.
|
|
2928
|
-
|
|
2929
|
-
bufs[n_bufs].fd = dst_buf->fd;
|
|
2930
|
-
bufs[n_bufs].ptr = dst->data;
|
|
2931
|
-
bufs[n_bufs].offset = (uint8_t *) dst->data - dst_buf->base;
|
|
2932
|
-
bufs[n_bufs].size = ggml_nbytes(dst);
|
|
2933
|
-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
|
|
2934
|
-
++n_bufs;
|
|
2727
|
+
n_bufs += dspqueue_buffers_init(&bufs[n_bufs], dst, true, false);
|
|
2935
2728
|
|
|
2936
2729
|
// Primary DSP session from the src0 tensor
|
|
2937
|
-
|
|
2730
|
+
auto * sess = get_session_from_tensor(src0);
|
|
2938
2731
|
|
|
2939
2732
|
if (opt_verbose) {
|
|
2940
|
-
|
|
2941
|
-
char strides[64 * GGML_MAX_SRC];
|
|
2942
|
-
char types[16 * GGML_MAX_SRC];
|
|
2943
|
-
char buffs[64 * GGML_MAX_SRC];
|
|
2944
|
-
char names[64 * GGML_MAX_SRC];
|
|
2945
|
-
|
|
2946
|
-
hex_format_op_dims(dims, op);
|
|
2947
|
-
hex_format_op_strides(strides, op);
|
|
2948
|
-
hex_format_op_types(types, op);
|
|
2949
|
-
hex_format_op_buffs(buffs, op);
|
|
2950
|
-
hex_format_op_names(names, op);
|
|
2951
|
-
|
|
2952
|
-
HEX_VERBOSE("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
|
|
2953
|
-
names, dims, types, strides, buffs, req.flags);
|
|
2733
|
+
hex_print_op_info(op, sess, req.flags);
|
|
2954
2734
|
if (opt_verbose > 1) {
|
|
2955
2735
|
hex_dump_dspbuf(src0, &bufs[0]);
|
|
2956
2736
|
if (src1) {
|
|
@@ -3023,85 +2803,40 @@ static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
|
|
|
3023
2803
|
}
|
|
3024
2804
|
|
|
3025
2805
|
dspqueue_buffer bufs[4];
|
|
3026
|
-
int n_bufs = 0;
|
|
3027
|
-
|
|
3028
|
-
memset(bufs, 0, sizeof(bufs));
|
|
3029
2806
|
|
|
3030
2807
|
// First buffer
|
|
3031
2808
|
// This is a buffer that the CPU writes and the DSP reads, so we'll
|
|
3032
2809
|
// need to flush CPU caches and invalidate DSP ones. On platforms
|
|
3033
2810
|
// with I/O coherency support the framework will automatically skip
|
|
3034
2811
|
// cache operations where possible.
|
|
3035
|
-
|
|
3036
|
-
bufs[n_bufs].fd = src0_buf->fd;
|
|
3037
|
-
bufs[n_bufs].ptr = src0->data;
|
|
3038
|
-
bufs[n_bufs].offset = (uint8_t *) src0->data - src0_buf->base;
|
|
3039
|
-
bufs[n_bufs].size = ggml_nbytes(src0);
|
|
3040
|
-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
3041
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP;
|
|
3042
|
-
++n_bufs;
|
|
2812
|
+
size_t n_bufs = dspqueue_buffers_init(bufs, src0, true, true);
|
|
3043
2813
|
|
|
3044
2814
|
// Second buffer
|
|
3045
2815
|
// This is a buffer that the CPU writes and the DSP reads, so we'll
|
|
3046
2816
|
// need to flush CPU caches and invalidate DSP ones. On platforms
|
|
3047
2817
|
// with I/O coherency support the framework will automatically skip
|
|
3048
2818
|
// cache operations where possible.
|
|
3049
|
-
|
|
3050
|
-
bufs[n_bufs].fd = src1_buf->fd;
|
|
3051
|
-
bufs[n_bufs].ptr = src1->data;
|
|
3052
|
-
bufs[n_bufs].offset = (uint8_t *) src1->data - src1_buf->base;
|
|
3053
|
-
bufs[n_bufs].size = ggml_nbytes(src1);
|
|
3054
|
-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
3055
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
|
|
3056
|
-
++n_bufs;
|
|
2819
|
+
n_bufs += dspqueue_buffers_init(&bufs[n_bufs], src1, true, true);
|
|
3057
2820
|
|
|
3058
|
-
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3063
|
-
|
|
3064
|
-
auto src2_buf = static_cast<ggml_backend_hexagon_buffer_context *>(src2->buffer->context);
|
|
3065
|
-
bufs[n_bufs].fd = src2_buf->fd;
|
|
3066
|
-
bufs[n_bufs].ptr = src2->data;
|
|
3067
|
-
bufs[n_bufs].offset = (uint8_t *) src2->data - src2_buf->base;
|
|
3068
|
-
bufs[n_bufs].size = ggml_nbytes(src2);
|
|
3069
|
-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush CPU
|
|
3070
|
-
DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate DSP
|
|
3071
|
-
++n_bufs;
|
|
3072
|
-
}
|
|
2821
|
+
// Third buffer(nullable)
|
|
2822
|
+
// This is a buffer that the CPU writes and the DSP reads, so we'll
|
|
2823
|
+
// need to flush CPU caches and invalidate DSP ones. On platforms
|
|
2824
|
+
// with I/O coherency support the framework will automatically skip
|
|
2825
|
+
// cache operations where possible.
|
|
2826
|
+
n_bufs += dspqueue_buffers_init(&bufs[n_bufs], src2, true, true);
|
|
3073
2827
|
|
|
3074
2828
|
// Final buffer = Output Activations. We'll handle DSP
|
|
3075
2829
|
// Second buffer = Output Activations. We'll handle DSP
|
|
3076
2830
|
// cache maintenance in the response message but need to flush
|
|
3077
2831
|
// CPU caches to ensure any previously written dirty lines are
|
|
3078
2832
|
// written out before writes from the DSP start.
|
|
3079
|
-
|
|
3080
|
-
bufs[n_bufs].fd = dst_buf->fd;
|
|
3081
|
-
bufs[n_bufs].ptr = dst->data;
|
|
3082
|
-
bufs[n_bufs].offset = (uint8_t *) dst->data - dst_buf->base;
|
|
3083
|
-
bufs[n_bufs].size = ggml_nbytes(dst);
|
|
3084
|
-
bufs[n_bufs].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER);
|
|
3085
|
-
++n_bufs;
|
|
2833
|
+
n_bufs += dspqueue_buffers_init(&bufs[n_bufs], dst, true, false);
|
|
3086
2834
|
|
|
3087
2835
|
// Primary DSP session from the src0 tensor
|
|
3088
|
-
|
|
2836
|
+
auto * sess = get_session_from_tensor(src0);
|
|
3089
2837
|
|
|
3090
2838
|
if (opt_verbose) {
|
|
3091
|
-
|
|
3092
|
-
char strides[64 * GGML_MAX_SRC];
|
|
3093
|
-
char types[16 * GGML_MAX_SRC];
|
|
3094
|
-
char buffs[64 * GGML_MAX_SRC];
|
|
3095
|
-
char names[64 * GGML_MAX_SRC];
|
|
3096
|
-
|
|
3097
|
-
hex_format_op_dims(dims, op);
|
|
3098
|
-
hex_format_op_strides(strides, op);
|
|
3099
|
-
hex_format_op_types(types, op);
|
|
3100
|
-
hex_format_op_buffs(buffs, op);
|
|
3101
|
-
hex_format_op_names(names, op);
|
|
3102
|
-
|
|
3103
|
-
HEX_VERBOSE("ggml-hex: %s %s : %s : %s : %s : %s : %s : flags 0x%x\n", sess->name.c_str(), ggml_op_name(op->op),
|
|
3104
|
-
names, dims, types, strides, buffs, req.flags);
|
|
2839
|
+
hex_print_op_info(op, sess, req.flags);
|
|
3105
2840
|
if (opt_verbose > 1) {
|
|
3106
2841
|
hex_dump_dspbuf(src0, &bufs[0]);
|
|
3107
2842
|
if (src1) {
|