llama_cpp 0.12.5 → 0.12.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +46 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +9 -1
- data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +347 -40
- data/vendor/tmp/llama.cpp/ggml-quants.h +14 -14
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +14 -61
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +89 -6
- data/vendor/tmp/llama.cpp/ggml.c +134 -60
- data/vendor/tmp/llama.cpp/ggml.h +26 -6
- data/vendor/tmp/llama.cpp/llama.cpp +654 -130
- data/vendor/tmp/llama.cpp/llama.h +6 -0
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
@@ -61,6 +61,7 @@ extern "C" {
|
|
61
61
|
enum llama_vocab_type {
|
62
62
|
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
|
63
63
|
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
|
64
|
+
LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
|
64
65
|
};
|
65
66
|
|
66
67
|
enum llama_token_type {
|
@@ -235,6 +236,7 @@ extern "C" {
|
|
235
236
|
bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
236
237
|
bool embedding; // embedding mode only
|
237
238
|
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
239
|
+
bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
|
238
240
|
};
|
239
241
|
|
240
242
|
// model quantization parameters
|
@@ -627,6 +629,10 @@ extern "C" {
|
|
627
629
|
// shape: [n_embd] (1-dimensional)
|
628
630
|
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
629
631
|
|
632
|
+
// Get the embeddings for the ith sequence
|
633
|
+
// llama_get_embeddings(ctx) + i*n_embd
|
634
|
+
LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);
|
635
|
+
|
630
636
|
//
|
631
637
|
// Vocab
|
632
638
|
//
|
@@ -264,26 +264,29 @@ static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) {
|
|
264
264
|
offset += 1;
|
265
265
|
return result;
|
266
266
|
}
|
267
|
-
|
267
|
+
if (!(utf8[offset + 0] & 0x40)) {
|
268
268
|
throw std::invalid_argument("invalid character");
|
269
269
|
}
|
270
|
-
|
271
|
-
if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80))
|
270
|
+
if (!(utf8[offset + 0] & 0x20)) {
|
271
|
+
if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80)) {
|
272
272
|
throw std::invalid_argument("invalid character");
|
273
|
+
}
|
273
274
|
auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f);
|
274
275
|
offset += 2;
|
275
276
|
return result;
|
276
277
|
}
|
277
|
-
|
278
|
-
if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80))
|
278
|
+
if (!(utf8[offset + 0] & 0x10)) {
|
279
|
+
if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80)) {
|
279
280
|
throw std::invalid_argument("invalid character");
|
281
|
+
}
|
280
282
|
auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f);
|
281
283
|
offset += 3;
|
282
284
|
return result;
|
283
285
|
}
|
284
|
-
|
285
|
-
if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80))
|
286
|
+
if (!(utf8[offset + 0] & 0x08)) {
|
287
|
+
if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80)) {
|
286
288
|
throw std::invalid_argument("invalid character");
|
289
|
+
}
|
287
290
|
auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6) | (utf8[offset + 3] & 0x3f);
|
288
291
|
offset += 4;
|
289
292
|
return result;
|
@@ -331,21 +334,22 @@ static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t
|
|
331
334
|
offset += 1;
|
332
335
|
return result;
|
333
336
|
}
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
|
338
|
-
offset += 2;
|
339
|
-
return result;
|
337
|
+
|
338
|
+
if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00)) {
|
339
|
+
throw std::invalid_argument("invalid character");
|
340
340
|
}
|
341
|
-
|
341
|
+
|
342
|
+
auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
|
343
|
+
offset += 2;
|
344
|
+
return result;
|
342
345
|
}
|
343
346
|
|
344
347
|
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
|
345
348
|
std::vector<uint32_t> result;
|
346
349
|
size_t offset = 0;
|
347
|
-
while (offset < utf16.size())
|
350
|
+
while (offset < utf16.size()) {
|
348
351
|
result.push_back(codepoint_from_utf16(utf16, offset));
|
352
|
+
}
|
349
353
|
return result;
|
350
354
|
}
|
351
355
|
|
@@ -361,44 +365,52 @@ static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> &
|
|
361
365
|
static std::unordered_map<uint32_t, int> codepoint_type_map() {
|
362
366
|
std::unordered_map<uint32_t, int> codepoint_types;
|
363
367
|
for (auto p : digit_ranges) {
|
364
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
368
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
365
369
|
codepoint_types[i] = CODEPOINT_TYPE_DIGIT;
|
370
|
+
}
|
366
371
|
}
|
367
|
-
for(auto p : letter_ranges) {
|
368
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
372
|
+
for (auto p : letter_ranges) {
|
373
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
369
374
|
codepoint_types[i] = CODEPOINT_TYPE_LETTER;
|
375
|
+
}
|
370
376
|
}
|
371
|
-
for(auto p : whitespace_ranges) {
|
372
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
377
|
+
for (auto p : whitespace_ranges) {
|
378
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
373
379
|
codepoint_types[i] = CODEPOINT_TYPE_WHITESPACE;
|
380
|
+
}
|
374
381
|
}
|
375
|
-
for(auto p : accent_mark_ranges) {
|
376
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
382
|
+
for (auto p : accent_mark_ranges) {
|
383
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
377
384
|
codepoint_types[i] = CODEPOINT_TYPE_ACCENT_MARK;
|
385
|
+
}
|
378
386
|
}
|
379
|
-
for(auto p : punctuation_ranges) {
|
380
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
387
|
+
for (auto p : punctuation_ranges) {
|
388
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
381
389
|
codepoint_types[i] = CODEPOINT_TYPE_PUNCTUATION;
|
390
|
+
}
|
382
391
|
}
|
383
|
-
for
|
384
|
-
for (auto i = p.first; i <= p.second; ++i)
|
392
|
+
for (auto p : symbol_ranges) {
|
393
|
+
for (auto i = p.first; i <= p.second; ++i) {
|
385
394
|
codepoint_types[i] = CODEPOINT_TYPE_SYMBOL;
|
395
|
+
}
|
386
396
|
}
|
387
|
-
for(auto p : control_ranges) {
|
388
|
-
for(auto i = p.first; i <= p.second; ++ i)
|
397
|
+
for (auto p : control_ranges) {
|
398
|
+
for (auto i = p.first; i <= p.second; ++ i) {
|
389
399
|
codepoint_types[i] = CODEPOINT_TYPE_CONTROL;
|
400
|
+
}
|
390
401
|
}
|
391
402
|
return codepoint_types;
|
392
403
|
}
|
393
404
|
|
394
405
|
static int codepoint_type(uint32_t cp) {
|
395
406
|
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
|
396
|
-
return codepoint_types
|
407
|
+
return codepoint_types.find(cp) == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : codepoint_types.at(cp);
|
397
408
|
}
|
398
409
|
|
399
410
|
static int codepoint_type(const std::string & utf8) {
|
400
|
-
if (utf8.length() == 0)
|
411
|
+
if (utf8.length() == 0) {
|
401
412
|
return CODEPOINT_TYPE_UNIDENTIFIED;
|
413
|
+
}
|
402
414
|
size_t offset = 0;
|
403
415
|
return codepoint_type(codepoint_from_utf8(utf8, offset));
|
404
416
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-02-
|
11
|
+
date: 2024-02-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|