cui-llama.rn 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jni.cpp +9 -9
- package/cpp/common.cpp +163 -60
- package/cpp/common.h +43 -12
- package/cpp/ggml-alloc.c +1042 -1037
- package/cpp/ggml-backend-impl.h +255 -256
- package/cpp/ggml-backend-reg.cpp +582 -582
- package/cpp/ggml-backend.cpp +2002 -2002
- package/cpp/ggml-backend.h +354 -352
- package/cpp/ggml-common.h +1853 -1853
- package/cpp/ggml-cpp.h +39 -39
- package/cpp/ggml-cpu-aarch64.cpp +4247 -4247
- package/cpp/ggml-cpu-aarch64.h +8 -8
- package/cpp/ggml-cpu-impl.h +386 -386
- package/cpp/ggml-cpu-quants.c +10920 -10839
- package/cpp/ggml-cpu-traits.cpp +36 -36
- package/cpp/ggml-cpu-traits.h +38 -38
- package/cpp/ggml-cpu.c +329 -60
- package/cpp/ggml-cpu.cpp +10 -2
- package/cpp/ggml-cpu.h +135 -135
- package/cpp/ggml-impl.h +567 -567
- package/cpp/ggml-metal-impl.h +17 -17
- package/cpp/ggml-metal.m +4884 -4884
- package/cpp/ggml-quants.c +5238 -5238
- package/cpp/ggml-threading.h +14 -14
- package/cpp/ggml.c +6514 -6448
- package/cpp/ggml.h +2194 -2163
- package/cpp/gguf.cpp +1329 -1325
- package/cpp/gguf.h +202 -202
- package/cpp/json-schema-to-grammar.cpp +1045 -1045
- package/cpp/json-schema-to-grammar.h +8 -8
- package/cpp/json.hpp +24766 -24766
- package/cpp/llama-adapter.cpp +347 -346
- package/cpp/llama-adapter.h +74 -73
- package/cpp/llama-arch.cpp +1487 -1434
- package/cpp/llama-arch.h +400 -395
- package/cpp/llama-batch.cpp +368 -368
- package/cpp/llama-batch.h +88 -88
- package/cpp/llama-chat.cpp +578 -567
- package/cpp/llama-chat.h +52 -51
- package/cpp/llama-context.cpp +1775 -1771
- package/cpp/llama-context.h +128 -128
- package/cpp/llama-cparams.cpp +1 -1
- package/cpp/llama-cparams.h +37 -37
- package/cpp/llama-cpp.h +30 -30
- package/cpp/llama-grammar.cpp +1139 -1139
- package/cpp/llama-grammar.h +143 -143
- package/cpp/llama-hparams.cpp +71 -71
- package/cpp/llama-hparams.h +139 -140
- package/cpp/llama-impl.cpp +167 -167
- package/cpp/llama-impl.h +61 -61
- package/cpp/llama-kv-cache.cpp +718 -718
- package/cpp/llama-kv-cache.h +218 -218
- package/cpp/llama-mmap.cpp +2 -1
- package/cpp/llama-mmap.h +67 -67
- package/cpp/llama-model-loader.cpp +1124 -1011
- package/cpp/llama-model-loader.h +167 -158
- package/cpp/llama-model.cpp +3997 -2202
- package/cpp/llama-model.h +370 -391
- package/cpp/llama-sampling.cpp +2408 -2406
- package/cpp/llama-sampling.h +32 -48
- package/cpp/llama-vocab.cpp +3247 -1982
- package/cpp/llama-vocab.h +125 -182
- package/cpp/llama.cpp +416 -2886
- package/cpp/llama.h +1323 -1285
- package/cpp/log.cpp +401 -401
- package/cpp/log.h +121 -121
- package/cpp/rn-llama.hpp +18 -12
- package/cpp/sampling.cpp +505 -500
- package/cpp/sgemm.cpp +2597 -2597
- package/cpp/speculative.cpp +277 -274
- package/cpp/speculative.h +28 -28
- package/cpp/unicode.cpp +2 -3
- package/package.json +1 -1
package/cpp/ggml-cpu-traits.cpp
CHANGED
@@ -1,36 +1,36 @@
|
|
1
|
-
#include "ggml-cpu-traits.h"
|
2
|
-
|
3
|
-
#include "ggml-backend-impl.h"
|
4
|
-
#include "ggml-backend.h"
|
5
|
-
|
6
|
-
namespace ggml::cpu {
|
7
|
-
tensor_traits::~tensor_traits() {}
|
8
|
-
|
9
|
-
extra_buffer_type::~extra_buffer_type() {}
|
10
|
-
} // namespace ggml::cpu
|
11
|
-
|
12
|
-
bool lm_ggml_cpu_extra_compute_forward(struct lm_ggml_compute_params * params, struct lm_ggml_tensor * op) {
|
13
|
-
for (auto extra : lm_ggml_backend_cpu_get_extra_buffers_type()) {
|
14
|
-
if (extra && extra->context) {
|
15
|
-
auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
|
16
|
-
auto tensor_traits = buf_extra->get_tensor_traits(op);
|
17
|
-
if (tensor_traits && tensor_traits->compute_forward(params, op)) {
|
18
|
-
return true;
|
19
|
-
}
|
20
|
-
}
|
21
|
-
}
|
22
|
-
return false;
|
23
|
-
}
|
24
|
-
|
25
|
-
bool lm_ggml_cpu_extra_work_size(int n_threads, const struct lm_ggml_tensor * op, size_t * size) {
|
26
|
-
for (auto extra : lm_ggml_backend_cpu_get_extra_buffers_type()) {
|
27
|
-
if (extra && extra->context) {
|
28
|
-
auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
|
29
|
-
auto tensor_traits = buf_extra->get_tensor_traits(op);
|
30
|
-
if (tensor_traits && tensor_traits->work_size(n_threads, op, *size)) {
|
31
|
-
return true;
|
32
|
-
}
|
33
|
-
}
|
34
|
-
}
|
35
|
-
return false;
|
36
|
-
}
|
1
|
+
#include "ggml-cpu-traits.h"
|
2
|
+
|
3
|
+
#include "ggml-backend-impl.h"
|
4
|
+
#include "ggml-backend.h"
|
5
|
+
|
6
|
+
namespace ggml::cpu {
|
7
|
+
tensor_traits::~tensor_traits() {}
|
8
|
+
|
9
|
+
extra_buffer_type::~extra_buffer_type() {}
|
10
|
+
} // namespace ggml::cpu
|
11
|
+
|
12
|
+
bool lm_ggml_cpu_extra_compute_forward(struct lm_ggml_compute_params * params, struct lm_ggml_tensor * op) {
|
13
|
+
for (auto extra : lm_ggml_backend_cpu_get_extra_buffers_type()) {
|
14
|
+
if (extra && extra->context) {
|
15
|
+
auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
|
16
|
+
auto tensor_traits = buf_extra->get_tensor_traits(op);
|
17
|
+
if (tensor_traits && tensor_traits->compute_forward(params, op)) {
|
18
|
+
return true;
|
19
|
+
}
|
20
|
+
}
|
21
|
+
}
|
22
|
+
return false;
|
23
|
+
}
|
24
|
+
|
25
|
+
bool lm_ggml_cpu_extra_work_size(int n_threads, const struct lm_ggml_tensor * op, size_t * size) {
|
26
|
+
for (auto extra : lm_ggml_backend_cpu_get_extra_buffers_type()) {
|
27
|
+
if (extra && extra->context) {
|
28
|
+
auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
|
29
|
+
auto tensor_traits = buf_extra->get_tensor_traits(op);
|
30
|
+
if (tensor_traits && tensor_traits->work_size(n_threads, op, *size)) {
|
31
|
+
return true;
|
32
|
+
}
|
33
|
+
}
|
34
|
+
}
|
35
|
+
return false;
|
36
|
+
}
|
package/cpp/ggml-cpu-traits.h
CHANGED
@@ -1,38 +1,38 @@
|
|
1
|
-
#pragma once
|
2
|
-
#include "ggml-backend-impl.h"
|
3
|
-
#include "ggml-cpu-impl.h"
|
4
|
-
#include "ggml.h"
|
5
|
-
|
6
|
-
#ifdef __cplusplus
|
7
|
-
# include <vector>
|
8
|
-
extern "C" {
|
9
|
-
#endif
|
10
|
-
|
11
|
-
// return true if op part of extra "accelerator"
|
12
|
-
bool lm_ggml_cpu_extra_compute_forward(struct lm_ggml_compute_params * params, struct lm_ggml_tensor * op);
|
13
|
-
bool lm_ggml_cpu_extra_work_size(int n_threads, const struct lm_ggml_tensor * op, size_t * size);
|
14
|
-
|
15
|
-
#ifdef __cplusplus
|
16
|
-
}
|
17
|
-
|
18
|
-
namespace ggml::cpu {
|
19
|
-
// register in tensor->extra
|
20
|
-
class tensor_traits {
|
21
|
-
public:
|
22
|
-
virtual ~tensor_traits();
|
23
|
-
virtual bool work_size(int n_threads, const struct lm_ggml_tensor * op, size_t & size) = 0;
|
24
|
-
virtual bool compute_forward(struct lm_ggml_compute_params * params, struct lm_ggml_tensor * op) = 0;
|
25
|
-
};
|
26
|
-
|
27
|
-
class extra_buffer_type {
|
28
|
-
public:
|
29
|
-
virtual ~extra_buffer_type();
|
30
|
-
virtual bool supports_op(lm_ggml_backend_dev_t dev, const struct lm_ggml_tensor * op) = 0;
|
31
|
-
virtual tensor_traits * get_tensor_traits(const struct lm_ggml_tensor * op) = 0;
|
32
|
-
};
|
33
|
-
} // namespace ggml::cpu
|
34
|
-
|
35
|
-
// implemented in ggml-cpu.cpp.
|
36
|
-
std::vector<lm_ggml_backend_buffer_type_t> & lm_ggml_backend_cpu_get_extra_buffers_type();
|
37
|
-
|
38
|
-
#endif
|
1
|
+
#pragma once
|
2
|
+
#include "ggml-backend-impl.h"
|
3
|
+
#include "ggml-cpu-impl.h"
|
4
|
+
#include "ggml.h"
|
5
|
+
|
6
|
+
#ifdef __cplusplus
|
7
|
+
# include <vector>
|
8
|
+
extern "C" {
|
9
|
+
#endif
|
10
|
+
|
11
|
+
// return true if op part of extra "accelerator"
|
12
|
+
bool lm_ggml_cpu_extra_compute_forward(struct lm_ggml_compute_params * params, struct lm_ggml_tensor * op);
|
13
|
+
bool lm_ggml_cpu_extra_work_size(int n_threads, const struct lm_ggml_tensor * op, size_t * size);
|
14
|
+
|
15
|
+
#ifdef __cplusplus
|
16
|
+
}
|
17
|
+
|
18
|
+
namespace ggml::cpu {
|
19
|
+
// register in tensor->extra
|
20
|
+
class tensor_traits {
|
21
|
+
public:
|
22
|
+
virtual ~tensor_traits();
|
23
|
+
virtual bool work_size(int n_threads, const struct lm_ggml_tensor * op, size_t & size) = 0;
|
24
|
+
virtual bool compute_forward(struct lm_ggml_compute_params * params, struct lm_ggml_tensor * op) = 0;
|
25
|
+
};
|
26
|
+
|
27
|
+
class extra_buffer_type {
|
28
|
+
public:
|
29
|
+
virtual ~extra_buffer_type();
|
30
|
+
virtual bool supports_op(lm_ggml_backend_dev_t dev, const struct lm_ggml_tensor * op) = 0;
|
31
|
+
virtual tensor_traits * get_tensor_traits(const struct lm_ggml_tensor * op) = 0;
|
32
|
+
};
|
33
|
+
} // namespace ggml::cpu
|
34
|
+
|
35
|
+
// implemented in ggml-cpu.cpp.
|
36
|
+
std::vector<lm_ggml_backend_buffer_type_t> & lm_ggml_backend_cpu_get_extra_buffers_type();
|
37
|
+
|
38
|
+
#endif
|