llama_cpp 0.12.5 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +46 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +9 -1
- data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +347 -40
- data/vendor/tmp/llama.cpp/ggml-quants.h +14 -14
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +14 -61
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +89 -6
- data/vendor/tmp/llama.cpp/ggml.c +134 -60
- data/vendor/tmp/llama.cpp/ggml.h +26 -6
- data/vendor/tmp/llama.cpp/llama.cpp +654 -130
- data/vendor/tmp/llama.cpp/llama.h +6 -0
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
@@ -61,6 +61,7 @@ extern "C" {
|
|
61
61
|
enum llama_vocab_type {
|
62
62
|
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
|
63
63
|
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
|
64
|
+
LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
|
64
65
|
};
|
65
66
|
|
66
67
|
enum llama_token_type {
|
@@ -235,6 +236,7 @@ extern "C" {
|
|
235
236
|
bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
236
237
|
bool embedding; // embedding mode only
|
237
238
|
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
239
|
+
bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
|
238
240
|
};
|
239
241
|
|
240
242
|
// model quantization parameters
|
@@ -627,6 +629,10 @@ extern "C" {
|
|
627
629
|
// shape: [n_embd] (1-dimensional)
|
628
630
|
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
629
631
|
|
632
|
+
// Get the embeddings for the ith sequence
|
633
|
+
// llama_get_embeddings(ctx) + i*n_embd
|
634
|
+
LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);
|
635
|
+
|
630
636
|
//
|
631
637
|
// Vocab
|
632
638
|
//
|
@@ -264,26 +264,29 @@ static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) {
|
|
264
264
|
offset += 1;
|
265
265
|
return result;
|
266
266
|
}
|
267
|
-
|
267
|
+
if (!(utf8[offset + 0] & 0x40)) {
|
268
268
|
throw std::invalid_argument("invalid character");
|
269
269
|
}
|
270
|
-
|
271
|
-
if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80))
|
270
|
+
if (!(utf8[offset + 0] & 0x20)) {
|
271
|
+
if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80)) {
|
272
272
|
throw std::invalid_argument("invalid character");
|
273
|
+
}
|
273
274
|
auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f);
|
274
275
|
offset += 2;
|
275
276
|
return result;
|
276
277
|
}
|
277
|
-
|
278
|
-
if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80))
|
278
|
+
if (!(utf8[offset + 0] & 0x10)) {
|
279
|
+
if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80)) {
|
279
280
|
throw std::invalid_argument("invalid character");
|
281
|
+
}
|
280
282
|
auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f);
|
281
283
|
offset += 3;
|
282
284
|
return result;
|
283
285
|
}
|
284
|
-
|
285
|
-
if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80))
|
286
|
+
if (!(utf8[offset + 0] & 0x08)) {
|
287
|
+
if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80)) {
|
286
288
|
throw std::invalid_argument("invalid character");
|
289
|
+
}
|
287
290
|
auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6) | (utf8[offset + 3] & 0x3f);
|
288
291
|
offset += 4;
|
289
292
|
return result;
|
@@ -331,21 +334,22 @@ static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t
|
|
331
334
|
offset += 1;
|
332
335
|
return result;
|
333
336
|
}
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
|
338
|
-
offset += 2;
|
339
|
-
return result;
|
337
|
+
|
338
|
+
if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00)) {
|
339
|
+
throw std::invalid_argument("invalid character");
|
340
340
|
}
|
341
|
-
|
341
|
+
|
342
|
+
auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
|
343
|
+
offset += 2;
|
344
|
+
return result;
|
342
345
|
}
|
343
346
|
|
344
347
|
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
|
345
348
|
std::vector<uint32_t> result;
|
346
349
|
size_t offset = 0;
|
347
|
-
while (offset < utf16.size())
|
350
|
+
while (offset < utf16.size()) {
|
348
351
|
result.push_back(codepoint_from_utf16(utf16, offset));
|
352
|
+
}
|
349
353
|
return result;
|
350
354
|
}
|
351
355
|
|
@@ -361,44 +365,52 @@ static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> &
|
|
361
365
|
static std::unordered_map<uint32_t, int> codepoint_type_map() {
|
362
366
|
std::unordered_map<uint32_t, int> codepoint_types;
|
363
367
|
for (auto p : digit_ranges) {
|
364
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
368
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
365
369
|
codepoint_types[i] = CODEPOINT_TYPE_DIGIT;
|
370
|
+
}
|
366
371
|
}
|
367
|
-
for(auto p : letter_ranges) {
|
368
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
372
|
+
for (auto p : letter_ranges) {
|
373
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
369
374
|
codepoint_types[i] = CODEPOINT_TYPE_LETTER;
|
375
|
+
}
|
370
376
|
}
|
371
|
-
for(auto p : whitespace_ranges) {
|
372
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
377
|
+
for (auto p : whitespace_ranges) {
|
378
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
373
379
|
codepoint_types[i] = CODEPOINT_TYPE_WHITESPACE;
|
380
|
+
}
|
374
381
|
}
|
375
|
-
for(auto p : accent_mark_ranges) {
|
376
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
382
|
+
for (auto p : accent_mark_ranges) {
|
383
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
377
384
|
codepoint_types[i] = CODEPOINT_TYPE_ACCENT_MARK;
|
385
|
+
}
|
378
386
|
}
|
379
|
-
for(auto p : punctuation_ranges) {
|
380
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
387
|
+
for (auto p : punctuation_ranges) {
|
388
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
381
389
|
codepoint_types[i] = CODEPOINT_TYPE_PUNCTUATION;
|
390
|
+
}
|
382
391
|
}
|
383
|
-
for
|
384
|
-
for (auto i = p.first; i <= p.second; ++i)
|
392
|
+
for (auto p : symbol_ranges) {
|
393
|
+
for (auto i = p.first; i <= p.second; ++i) {
|
385
394
|
codepoint_types[i] = CODEPOINT_TYPE_SYMBOL;
|
395
|
+
}
|
386
396
|
}
|
387
|
-
for(auto p : control_ranges) {
|
388
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
397
|
+
for (auto p : control_ranges) {
|
398
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
389
399
|
codepoint_types[i] = CODEPOINT_TYPE_CONTROL;
|
400
|
+
}
|
390
401
|
}
|
391
402
|
return codepoint_types;
|
392
403
|
}
|
393
404
|
|
394
405
|
static int codepoint_type(uint32_t cp) {
|
395
406
|
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
|
396
|
-
return codepoint_types
|
407
|
+
return codepoint_types.find(cp) == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : codepoint_types.at(cp);
|
397
408
|
}
|
398
409
|
|
399
410
|
static int codepoint_type(const std::string & utf8) {
|
400
|
-
if (utf8.length() == 0)
|
411
|
+
if (utf8.length() == 0) {
|
401
412
|
return CODEPOINT_TYPE_UNIDENTIFIED;
|
413
|
+
}
|
402
414
|
size_t offset = 0;
|
403
415
|
return codepoint_type(codepoint_from_utf8(utf8, offset));
|
404
416
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-02-
|
11
|
+
date: 2024-02-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|