llama_cpp 0.12.5 → 0.12.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -61,6 +61,7 @@ extern "C" {
61
61
  enum llama_vocab_type {
62
62
  LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
63
63
  LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
64
+ LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
64
65
  };
65
66
 
66
67
  enum llama_token_type {
@@ -235,6 +236,7 @@ extern "C" {
235
236
  bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
236
237
  bool embedding; // embedding mode only
237
238
  bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
239
+ bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
238
240
  };
239
241
 
240
242
  // model quantization parameters
@@ -627,6 +629,10 @@ extern "C" {
627
629
  // shape: [n_embd] (1-dimensional)
628
630
  LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
629
631
 
632
+ // Get the embeddings for the ith sequence
633
+ // llama_get_embeddings(ctx) + i*n_embd
634
+ LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);
635
+
630
636
  //
631
637
  // Vocab
632
638
  //
@@ -264,26 +264,29 @@ static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) {
264
264
  offset += 1;
265
265
  return result;
266
266
  }
267
- else if (!(utf8[offset + 0] & 0x40)) {
267
+ if (!(utf8[offset + 0] & 0x40)) {
268
268
  throw std::invalid_argument("invalid character");
269
269
  }
270
- else if (!(utf8[offset + 0] & 0x20)) {
271
- if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80))
270
+ if (!(utf8[offset + 0] & 0x20)) {
271
+ if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80)) {
272
272
  throw std::invalid_argument("invalid character");
273
+ }
273
274
  auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f);
274
275
  offset += 2;
275
276
  return result;
276
277
  }
277
- else if (!(utf8[offset + 0] & 0x10)) {
278
- if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80))
278
+ if (!(utf8[offset + 0] & 0x10)) {
279
+ if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80)) {
279
280
  throw std::invalid_argument("invalid character");
281
+ }
280
282
  auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f);
281
283
  offset += 3;
282
284
  return result;
283
285
  }
284
- else if (!(utf8[offset + 0] & 0x08)) {
285
- if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80))
286
+ if (!(utf8[offset + 0] & 0x08)) {
287
+ if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80)) {
286
288
  throw std::invalid_argument("invalid character");
289
+ }
287
290
  auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6) | (utf8[offset + 3] & 0x3f);
288
291
  offset += 4;
289
292
  return result;
@@ -331,21 +334,22 @@ static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t
331
334
  offset += 1;
332
335
  return result;
333
336
  }
334
- else {
335
- if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00))
336
- throw std::invalid_argument("invalid character");
337
- auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
338
- offset += 2;
339
- return result;
337
+
338
+ if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00)) {
339
+ throw std::invalid_argument("invalid character");
340
340
  }
341
- throw std::invalid_argument("invalid string");
341
+
342
+ auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
343
+ offset += 2;
344
+ return result;
342
345
  }
343
346
 
344
347
  static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
345
348
  std::vector<uint32_t> result;
346
349
  size_t offset = 0;
347
- while (offset < utf16.size())
350
+ while (offset < utf16.size()) {
348
351
  result.push_back(codepoint_from_utf16(utf16, offset));
352
+ }
349
353
  return result;
350
354
  }
351
355
 
@@ -361,44 +365,52 @@ static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> &
361
365
  static std::unordered_map<uint32_t, int> codepoint_type_map() {
362
366
  std::unordered_map<uint32_t, int> codepoint_types;
363
367
  for (auto p : digit_ranges) {
364
- for(auto i = p.first; i <= p.second; ++ i)
368
+ for (auto i = p.first; i <= p.second; ++ i) {
365
369
  codepoint_types[i] = CODEPOINT_TYPE_DIGIT;
370
+ }
366
371
  }
367
- for(auto p : letter_ranges) {
368
- for(auto i = p.first; i <= p.second; ++ i)
372
+ for (auto p : letter_ranges) {
373
+ for (auto i = p.first; i <= p.second; ++ i) {
369
374
  codepoint_types[i] = CODEPOINT_TYPE_LETTER;
375
+ }
370
376
  }
371
- for(auto p : whitespace_ranges) {
372
- for(auto i = p.first; i <= p.second; ++ i)
377
+ for (auto p : whitespace_ranges) {
378
+ for (auto i = p.first; i <= p.second; ++ i) {
373
379
  codepoint_types[i] = CODEPOINT_TYPE_WHITESPACE;
380
+ }
374
381
  }
375
- for(auto p : accent_mark_ranges) {
376
- for(auto i = p.first; i <= p.second; ++ i)
382
+ for (auto p : accent_mark_ranges) {
383
+ for (auto i = p.first; i <= p.second; ++ i) {
377
384
  codepoint_types[i] = CODEPOINT_TYPE_ACCENT_MARK;
385
+ }
378
386
  }
379
- for(auto p : punctuation_ranges) {
380
- for(auto i = p.first; i <= p.second; ++ i)
387
+ for (auto p : punctuation_ranges) {
388
+ for (auto i = p.first; i <= p.second; ++ i) {
381
389
  codepoint_types[i] = CODEPOINT_TYPE_PUNCTUATION;
390
+ }
382
391
  }
383
- for (auto p : symbol_ranges) {
384
- for (auto i = p.first; i <= p.second; ++i)
392
+ for (auto p : symbol_ranges) {
393
+ for (auto i = p.first; i <= p.second; ++i) {
385
394
  codepoint_types[i] = CODEPOINT_TYPE_SYMBOL;
395
+ }
386
396
  }
387
- for(auto p : control_ranges) {
388
- for(auto i = p.first; i <= p.second; ++ i)
397
+ for (auto p : control_ranges) {
398
+ for (auto i = p.first; i <= p.second; ++ i) {
389
399
  codepoint_types[i] = CODEPOINT_TYPE_CONTROL;
400
+ }
390
401
  }
391
402
  return codepoint_types;
392
403
  }
393
404
 
394
405
  static int codepoint_type(uint32_t cp) {
395
406
  static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
396
- return codepoint_types[cp];
407
+ return codepoint_types.find(cp) == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : codepoint_types.at(cp);
397
408
  }
398
409
 
399
410
  static int codepoint_type(const std::string & utf8) {
400
- if (utf8.length() == 0)
411
+ if (utf8.length() == 0) {
401
412
  return CODEPOINT_TYPE_UNIDENTIFIED;
413
+ }
402
414
  size_t offset = 0;
403
415
  return codepoint_type(codepoint_from_utf8(utf8, offset));
404
416
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.5
4
+ version: 0.12.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-09 00:00:00.000000000 Z
11
+ date: 2024-02-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: