llama_cpp 0.12.5 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,6 +61,7 @@ extern "C" {
61
61
  enum llama_vocab_type {
62
62
  LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
63
63
  LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
64
+ LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
64
65
  };
65
66
 
66
67
  enum llama_token_type {
@@ -235,6 +236,7 @@ extern "C" {
235
236
  bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
236
237
  bool embedding; // embedding mode only
237
238
  bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
239
+ bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
238
240
  };
239
241
 
240
242
  // model quantization parameters
@@ -627,6 +629,10 @@ extern "C" {
627
629
  // shape: [n_embd] (1-dimensional)
628
630
  LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
629
631
 
632
+ // Get the embeddings for the ith sequence
633
+ // llama_get_embeddings(ctx) + i*n_embd
634
+ LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);
635
+
630
636
  //
631
637
  // Vocab
632
638
  //
@@ -264,26 +264,29 @@ static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) {
264
264
  offset += 1;
265
265
  return result;
266
266
  }
267
- else if (!(utf8[offset + 0] & 0x40)) {
267
+ if (!(utf8[offset + 0] & 0x40)) {
268
268
  throw std::invalid_argument("invalid character");
269
269
  }
270
- else if (!(utf8[offset + 0] & 0x20)) {
271
- if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80))
270
+ if (!(utf8[offset + 0] & 0x20)) {
271
+ if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80)) {
272
272
  throw std::invalid_argument("invalid character");
273
+ }
273
274
  auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f);
274
275
  offset += 2;
275
276
  return result;
276
277
  }
277
- else if (!(utf8[offset + 0] & 0x10)) {
278
- if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80))
278
+ if (!(utf8[offset + 0] & 0x10)) {
279
+ if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80)) {
279
280
  throw std::invalid_argument("invalid character");
281
+ }
280
282
  auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f);
281
283
  offset += 3;
282
284
  return result;
283
285
  }
284
- else if (!(utf8[offset + 0] & 0x08)) {
285
- if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80))
286
+ if (!(utf8[offset + 0] & 0x08)) {
287
+ if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80)) {
286
288
  throw std::invalid_argument("invalid character");
289
+ }
287
290
  auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6) | (utf8[offset + 3] & 0x3f);
288
291
  offset += 4;
289
292
  return result;
@@ -331,21 +334,22 @@ static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t
331
334
  offset += 1;
332
335
  return result;
333
336
  }
334
- else {
335
- if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00))
336
- throw std::invalid_argument("invalid character");
337
- auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
338
- offset += 2;
339
- return result;
337
+
338
+ if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00)) {
339
+ throw std::invalid_argument("invalid character");
340
340
  }
341
- throw std::invalid_argument("invalid string");
341
+
342
+ auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
343
+ offset += 2;
344
+ return result;
342
345
  }
343
346
 
344
347
  static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
345
348
  std::vector<uint32_t> result;
346
349
  size_t offset = 0;
347
- while (offset < utf16.size())
350
+ while (offset < utf16.size()) {
348
351
  result.push_back(codepoint_from_utf16(utf16, offset));
352
+ }
349
353
  return result;
350
354
  }
351
355
 
@@ -361,44 +365,52 @@ static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> &
361
365
  static std::unordered_map<uint32_t, int> codepoint_type_map() {
362
366
  std::unordered_map<uint32_t, int> codepoint_types;
363
367
  for (auto p : digit_ranges) {
364
- for(auto i = p.first; i <= p.second; ++ i)
368
+ for (auto i = p.first; i <= p.second; ++ i) {
365
369
  codepoint_types[i] = CODEPOINT_TYPE_DIGIT;
370
+ }
366
371
  }
367
- for(auto p : letter_ranges) {
368
- for(auto i = p.first; i <= p.second; ++ i)
372
+ for (auto p : letter_ranges) {
373
+ for (auto i = p.first; i <= p.second; ++ i) {
369
374
  codepoint_types[i] = CODEPOINT_TYPE_LETTER;
375
+ }
370
376
  }
371
- for(auto p : whitespace_ranges) {
372
- for(auto i = p.first; i <= p.second; ++ i)
377
+ for (auto p : whitespace_ranges) {
378
+ for (auto i = p.first; i <= p.second; ++ i) {
373
379
  codepoint_types[i] = CODEPOINT_TYPE_WHITESPACE;
380
+ }
374
381
  }
375
- for(auto p : accent_mark_ranges) {
376
- for(auto i = p.first; i <= p.second; ++ i)
382
+ for (auto p : accent_mark_ranges) {
383
+ for (auto i = p.first; i <= p.second; ++ i) {
377
384
  codepoint_types[i] = CODEPOINT_TYPE_ACCENT_MARK;
385
+ }
378
386
  }
379
- for(auto p : punctuation_ranges) {
380
- for(auto i = p.first; i <= p.second; ++ i)
387
+ for (auto p : punctuation_ranges) {
388
+ for (auto i = p.first; i <= p.second; ++ i) {
381
389
  codepoint_types[i] = CODEPOINT_TYPE_PUNCTUATION;
390
+ }
382
391
  }
383
- for (auto p : symbol_ranges) {
384
- for (auto i = p.first; i <= p.second; ++i)
392
+ for (auto p : symbol_ranges) {
393
+ for (auto i = p.first; i <= p.second; ++i) {
385
394
  codepoint_types[i] = CODEPOINT_TYPE_SYMBOL;
395
+ }
386
396
  }
387
- for(auto p : control_ranges) {
388
- for(auto i = p.first; i <= p.second; ++ i)
397
+ for (auto p : control_ranges) {
398
+ for (auto i = p.first; i <= p.second; ++ i) {
389
399
  codepoint_types[i] = CODEPOINT_TYPE_CONTROL;
400
+ }
390
401
  }
391
402
  return codepoint_types;
392
403
  }
393
404
 
394
405
  static int codepoint_type(uint32_t cp) {
395
406
  static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
396
- return codepoint_types[cp];
407
+ return codepoint_types.find(cp) == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : codepoint_types.at(cp);
397
408
  }
398
409
 
399
410
  static int codepoint_type(const std::string & utf8) {
400
- if (utf8.length() == 0)
411
+ if (utf8.length() == 0) {
401
412
  return CODEPOINT_TYPE_UNIDENTIFIED;
413
+ }
402
414
  size_t offset = 0;
403
415
  return codepoint_type(codepoint_from_utf8(utf8, offset));
404
416
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.5
4
+ version: 0.12.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-09 00:00:00.000000000 Z
11
+ date: 2024-02-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: