gosu 0.15.0 → 0.15.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
- // Ogg Vorbis audio decoder - v1.14 - public domain
1
+ // Ogg Vorbis audio decoder - v1.17 - public domain
2
2
  // http://nothings.org/stb_vorbis/
3
3
  //
4
4
  // Original version written by Sean Barrett in 2007.
@@ -30,9 +30,12 @@
30
30
  // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
31
31
  // Phillip Bennefall Rohit Thiago Goulart
32
32
  // manxorist@github saga musix github:infatum
33
- // Timur Gagiev
33
+ // Timur Gagiev Maxwell Koo
34
34
  //
35
35
  // Partial history:
36
+ // 1.17 - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
37
+ // 1.16 - 2019-03-04 - fix warnings
38
+ // 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
36
39
  // 1.14 - 2018-02-11 - delete bogus dealloca usage
37
40
  // 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
38
41
  // 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
@@ -253,7 +256,7 @@ extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
253
256
  // create an ogg vorbis decoder from an open FILE *, looking for a stream at
254
257
  // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
255
258
  // note that stb_vorbis must "own" this stream; if you seek it in between
256
- // calls to stb_vorbis, it will become confused. Morever, if you attempt to
259
+ // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
257
260
  // perform stb_vorbis_seek_*() operations on this file, it will assume it
258
261
  // owns the _entire_ rest of the file after the start point. Use the next
259
262
  // function, stb_vorbis_open_file_section(), to limit it.
@@ -374,7 +377,8 @@ enum STBVorbisError
374
377
  VORBIS_invalid_first_page,
375
378
  VORBIS_bad_packet_type,
376
379
  VORBIS_cant_find_last_page,
377
- VORBIS_seek_failed
380
+ VORBIS_seek_failed,
381
+ VORBIS_ogg_skeleton_not_supported
378
382
  };
379
383
 
380
384
 
@@ -1073,7 +1077,7 @@ static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1073
1077
  assert(z >= 0 && z < 32);
1074
1078
  available[z] = 0;
1075
1079
  add_entry(c, bit_reverse(res), i, m++, len[i], values);
1076
- // propogate availability up the tree
1080
+ // propagate availability up the tree
1077
1081
  if (z != len[i]) {
1078
1082
  assert(len[i] >= 0 && len[i] < 32);
1079
1083
  for (y=len[i]; y > z; --y) {
@@ -1199,8 +1203,10 @@ static int lookup1_values(int entries, int dim)
1199
1203
  int r = (int) floor(exp((float) log((float) entries) / dim));
1200
1204
  if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1201
1205
  ++r; // floor() to avoid _ftol() when non-CRT
1202
- assert(pow((float) r+1, dim) > entries);
1203
- assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1206
+ if (pow((float) r+1, dim) <= entries)
1207
+ return -1;
1208
+ if ((int) floor(pow((float) r, dim)) > entries)
1209
+ return -1;
1204
1210
  return r;
1205
1211
  }
1206
1212
 
@@ -2010,7 +2016,7 @@ static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y
2010
2016
  ady -= abs(base) * adx;
2011
2017
  if (x1 > n) x1 = n;
2012
2018
  if (x < x1) {
2013
- LINE_OP(output[x], inverse_db_table[y]);
2019
+ LINE_OP(output[x], inverse_db_table[y&255]);
2014
2020
  for (++x; x < x1; ++x) {
2015
2021
  err += ady;
2016
2022
  if (err >= adx) {
@@ -2018,7 +2024,7 @@ static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y
2018
2024
  y += sy;
2019
2025
  } else
2020
2026
  y += base;
2021
- LINE_OP(output[x], inverse_db_table[y]);
2027
+ LINE_OP(output[x], inverse_db_table[y&255]);
2022
2028
  }
2023
2029
  }
2024
2030
  }
@@ -2637,7 +2643,7 @@ static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2637
2643
  // once I combined the passes.
2638
2644
 
2639
2645
  // so there's a missing 'times 2' here (for adding X to itself).
2640
- // this propogates through linearly to the end, where the numbers
2646
+ // this propagates through linearly to the end, where the numbers
2641
2647
  // are 1/2 too small, and need to be compensated for.
2642
2648
 
2643
2649
  {
@@ -3045,7 +3051,6 @@ static float *get_window(vorb *f, int len)
3045
3051
  len <<= 1;
3046
3052
  if (len == f->blocksize_0) return f->window[0];
3047
3053
  if (len == f->blocksize_1) return f->window[1];
3048
- assert(0);
3049
3054
  return NULL;
3050
3055
  }
3051
3056
 
@@ -3451,6 +3456,7 @@ static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3451
3456
  if (f->previous_length) {
3452
3457
  int i,j, n = f->previous_length;
3453
3458
  float *w = get_window(f, n);
3459
+ if (w == NULL) return 0;
3454
3460
  for (i=0; i < f->channels; ++i) {
3455
3461
  for (j=0; j < n; ++j)
3456
3462
  f->channel_buffers[i][left+j] =
@@ -3578,7 +3584,22 @@ static int start_decoder(vorb *f)
3578
3584
  if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3579
3585
  // check for expected packet length
3580
3586
  if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3581
- if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page);
3587
+ if (f->segments[0] != 30) {
3588
+ // check for the Ogg skeleton fishead identifying header to refine our error
3589
+ if (f->segments[0] == 64 &&
3590
+ getn(f, header, 6) &&
3591
+ header[0] == 'f' &&
3592
+ header[1] == 'i' &&
3593
+ header[2] == 's' &&
3594
+ header[3] == 'h' &&
3595
+ header[4] == 'e' &&
3596
+ header[5] == 'a' &&
3597
+ get8(f) == 'd' &&
3598
+ get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported);
3599
+ else
3600
+ return error(f, VORBIS_invalid_first_page);
3601
+ }
3602
+
3582
3603
  // read packet
3583
3604
  // check packet header
3584
3605
  if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
@@ -3677,6 +3698,7 @@ static int start_decoder(vorb *f)
3677
3698
  while (current_entry < c->entries) {
3678
3699
  int limit = c->entries - current_entry;
3679
3700
  int n = get_bits(f, ilog(limit));
3701
+ if (current_length >= 32) return error(f, VORBIS_invalid_setup);
3680
3702
  if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3681
3703
  memset(lengths + current_entry, current_length, n);
3682
3704
  current_entry += n;
@@ -3780,7 +3802,9 @@ static int start_decoder(vorb *f)
3780
3802
  c->value_bits = get_bits(f, 4)+1;
3781
3803
  c->sequence_p = get_bits(f,1);
3782
3804
  if (c->lookup_type == 1) {
3783
- c->lookup_values = lookup1_values(c->entries, c->dimensions);
3805
+ int values = lookup1_values(c->entries, c->dimensions);
3806
+ if (values < 0) return error(f, VORBIS_invalid_setup);
3807
+ c->lookup_values = (uint32) values;
3784
3808
  } else {
3785
3809
  c->lookup_values = c->entries * c->dimensions;
3786
3810
  }
@@ -3916,6 +3940,9 @@ static int start_decoder(vorb *f)
3916
3940
  p[j].id = j;
3917
3941
  }
3918
3942
  qsort(p, g->values, sizeof(p[0]), point_compare);
3943
+ for (j=0; j < g->values-1; ++j)
3944
+ if (p[j].x == p[j+1].x)
3945
+ return error(f, VORBIS_invalid_setup);
3919
3946
  for (j=0; j < g->values; ++j)
3920
3947
  g->sorted_order[j] = (uint8) p[j].id;
3921
3948
  // precompute the neighbors
@@ -4002,6 +4029,7 @@ static int start_decoder(vorb *f)
4002
4029
  max_submaps = m->submaps;
4003
4030
  if (get_bits(f,1)) {
4004
4031
  m->coupling_steps = get_bits(f,8)+1;
4032
+ if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
4005
4033
  for (k=0; k < m->coupling_steps; ++k) {
4006
4034
  m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4007
4035
  m->chan[k].angle = get_bits(f, ilog(f->channels-1));
@@ -4566,7 +4594,7 @@ static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4566
4594
  return 1;
4567
4595
  }
4568
4596
 
4569
- // rarely used function to seek back to the preceeding page while finding the
4597
+ // rarely used function to seek back to the preceding page while finding the
4570
4598
  // start of a packet
4571
4599
  static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4572
4600
  {
@@ -4973,7 +5001,13 @@ stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, con
4973
5001
 
4974
5002
  stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4975
5003
  {
4976
- FILE *f = fopen(filename, "rb");
5004
+ FILE *f;
5005
+ #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
5006
+ if (0 != fopen_s(&f, filename, "rb"))
5007
+ f = NULL;
5008
+ #else
5009
+ f = fopen(filename, "rb");
5010
+ #endif
4977
5011
  if (f)
4978
5012
  return stb_vorbis_open_file(f, TRUE, error, alloc);
4979
5013
  if (error) *error = VORBIS_file_open_failure;
@@ -5362,6 +5396,12 @@ int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, in
5362
5396
  #endif // STB_VORBIS_NO_PULLDATA_API
5363
5397
 
5364
5398
  /* Version history
5399
+ 1.17 - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
5400
+ found with Mayhem by ForAllSecure
5401
+ 1.16 - 2019-03-04 - fix warnings
5402
+ 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
5403
+ 1.14 - 2018-02-11 - delete bogus dealloca usage
5404
+ 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
5365
5405
  1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5366
5406
  1.11 - 2017-07-23 - fix MinGW compilation
5367
5407
  1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
@@ -1,6 +1,6 @@
1
1
  /* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
2
2
  /*
3
- * Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
3
+ * Copyright (c) 2018 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
4
4
  * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
5
5
  *
6
6
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -42,6 +42,14 @@
42
42
 
43
43
 
44
44
  #include "utf8proc.h"
45
+
46
+ #ifndef SSIZE_MAX
47
+ #define SSIZE_MAX ((size_t)SIZE_MAX/2)
48
+ #endif
49
+ #ifndef UINT16_MAX
50
+ # define UINT16_MAX 65535U
51
+ #endif
52
+
45
53
  #include "utf8proc_data.h"
46
54
 
47
55
 
@@ -92,6 +100,10 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
92
100
  return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
93
101
  }
94
102
 
103
+ UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
104
+ return "12.1.0";
105
+ }
106
+
95
107
  UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
96
108
  switch (errcode) {
97
109
  case UTF8PROC_ERROR_NOMEM:
@@ -188,9 +200,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, ut
188
200
  } else return 0;
189
201
  }
190
202
 
191
- /* internal "unsafe" version that does not check whether uc is in range */
192
- static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
203
+ /* internal version used for inserting 0xff bytes between graphemes */
204
+ static utf8proc_ssize_t charbound_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
193
205
  if (uc < 0x00) {
206
+ if (uc == -1) { /* internal value used for grapheme breaks */
207
+ dst[0] = (utf8proc_uint8_t)0xFF;
208
+ return 1;
209
+ }
194
210
  return 0;
195
211
  } else if (uc < 0x80) {
196
212
  dst[0] = (utf8proc_uint8_t)uc;
@@ -199,12 +215,6 @@ static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t
199
215
  dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
200
216
  dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
201
217
  return 2;
202
- } else if (uc == 0xFFFF) {
203
- dst[0] = (utf8proc_uint8_t)0xFF;
204
- return 1;
205
- } else if (uc == 0xFFFE) {
206
- dst[0] = (utf8proc_uint8_t)0xFE;
207
- return 1;
208
218
  } else if (uc < 0x10000) {
209
219
  dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
210
220
  dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
@@ -271,12 +281,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
271
281
  tbc == UTF8PROC_BOUNDCLASS_ZWJ || // ---
272
282
  tbc == UTF8PROC_BOUNDCLASS_SPACINGMARK || // GB9a
273
283
  lbc == UTF8PROC_BOUNDCLASS_PREPEND) ? false : // GB9b
274
- ((lbc == UTF8PROC_BOUNDCLASS_E_BASE || // GB10 (requires additional handling below)
275
- lbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) && // ----
276
- tbc == UTF8PROC_BOUNDCLASS_E_MODIFIER) ? false : // ----
277
- (lbc == UTF8PROC_BOUNDCLASS_ZWJ && // GB11
278
- (tbc == UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ || // ----
279
- tbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ)) ? false : // ----
284
+ (lbc == UTF8PROC_BOUNDCLASS_E_ZWG && // GB11 (requires additional handling below)
285
+ tbc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) ? false : // ----
280
286
  (lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR && // GB12/13 (requires additional handling below)
281
287
  tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false : // ----
282
288
  true; // GB999
@@ -284,9 +290,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
284
290
 
285
291
  static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
286
292
  {
287
- int lbc_override = lbc;
288
- if (state && *state != UTF8PROC_BOUNDCLASS_START)
289
- lbc_override = *state;
293
+ int lbc_override = ((state && *state != UTF8PROC_BOUNDCLASS_START)
294
+ ? *state : lbc);
290
295
  utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
291
296
  if (state) {
292
297
  // Special support for GB 12/13 made possible by GB999. After two RI
@@ -296,12 +301,15 @@ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t
296
301
  // forbidden by a different rule such as GB9).
297
302
  if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
298
303
  *state = UTF8PROC_BOUNDCLASS_OTHER;
299
- // Special support for GB10. Fold any EXTEND codepoints into the previous
300
- // boundclass if we're dealing with an emoji base boundclass.
301
- else if ((*state == UTF8PROC_BOUNDCLASS_E_BASE ||
302
- *state == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) &&
303
- tbc == UTF8PROC_BOUNDCLASS_EXTEND)
304
- *state = UTF8PROC_BOUNDCLASS_E_BASE;
304
+ // Special support for GB11 (emoji extend* zwj / emoji)
305
+ else if (*state == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
306
+ if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
307
+ *state = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
308
+ else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
309
+ *state = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
310
+ else
311
+ *state = tbc;
312
+ }
305
313
  else
306
314
  *state = tbc;
307
315
  }
@@ -424,6 +432,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
424
432
  if (options & UTF8PROC_IGNORE) {
425
433
  if (property->ignorable) return 0;
426
434
  }
435
+ if (options & UTF8PROC_STRIPNA) {
436
+ if (!category) return 0;
437
+ }
427
438
  if (options & UTF8PROC_LUMP) {
428
439
  if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
429
440
  if (uc == 0x2018 || uc == 0x2019 || uc == 0x02BC || uc == 0x02C8)
@@ -471,7 +482,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
471
482
  int tbc = property->boundclass;
472
483
  boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
473
484
  if (boundary) {
474
- if (bufsize >= 1) dst[0] = 0xFFFF;
485
+ if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
475
486
  if (bufsize >= 2) dst[1] = uc;
476
487
  return 2;
477
488
  }
@@ -632,9 +643,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
632
643
  current_property->comb_index != UINT16_MAX &&
633
644
  current_property->comb_index >= 0x8000) {
634
645
  int sidx = starter_property->comb_index;
635
- int idx = (current_property->comb_index & 0x3FFF) - utf8proc_combinations[sidx];
636
- if (idx >= 0 && idx <= utf8proc_combinations[sidx + 1] ) {
637
- idx += sidx + 2;
646
+ int idx = current_property->comb_index & 0x3FFF;
647
+ if (idx >= utf8proc_combinations[sidx] && idx <= utf8proc_combinations[sidx + 1] ) {
648
+ idx += sidx + 2 - utf8proc_combinations[sidx];
638
649
  if (current_property->comb_index & 0x4000) {
639
650
  composition = (utf8proc_combinations[idx] << 16) | utf8proc_combinations[idx+1];
640
651
  } else
@@ -677,7 +688,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
677
688
  if (options & UTF8PROC_CHARBOUND) {
678
689
  for (rpos = 0; rpos < length; rpos++) {
679
690
  uc = buffer[rpos];
680
- wpos += unsafe_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
691
+ wpos += charbound_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
681
692
  }
682
693
  } else {
683
694
  for (rpos = 0; rpos < length; rpos++) {
@@ -753,3 +764,10 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str)
753
764
  UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
754
765
  return retval;
755
766
  }
767
+
768
+ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str) {
769
+ utf8proc_uint8_t *retval;
770
+ utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
771
+ UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE);
772
+ return retval;
773
+ }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
2
+ * Copyright (c) 2018 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
3
3
  * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
4
4
  *
5
5
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -28,7 +28,7 @@
28
28
  * utf8proc is a free/open-source (MIT/expat licensed) C library
29
29
  * providing Unicode normalization, case-folding, and other operations
30
30
  * for strings in the UTF-8 encoding, supporting Unicode version
31
- * 8.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
31
+ * 9.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
32
32
  * for downloads and other information, or the source code on github
33
33
  * (https://github.com/JuliaLang/utf8proc).
34
34
  *
@@ -71,13 +71,13 @@
71
71
  /** The MAJOR version number (increased when backwards API compatibility is broken). */
72
72
  #define UTF8PROC_VERSION_MAJOR 2
73
73
  /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
74
- #define UTF8PROC_VERSION_MINOR 1
74
+ #define UTF8PROC_VERSION_MINOR 4
75
75
  /** The PATCH version (increased for fixes that do not change the API). */
76
76
  #define UTF8PROC_VERSION_PATCH 0
77
77
  /** @} */
78
78
 
79
79
  #include <stdlib.h>
80
- #include <sys/types.h>
80
+
81
81
  #if defined(_MSC_VER) && _MSC_VER < 1800
82
82
  // MSVC prior to 2013 lacked stdbool.h and inttypes.h
83
83
  typedef signed char utf8proc_int8_t;
@@ -120,30 +120,26 @@ typedef bool utf8proc_bool;
120
120
  #endif
121
121
  #include <limits.h>
122
122
 
123
- #ifdef _WIN32
124
- # ifdef UTF8PROC_EXPORTS
125
- # define UTF8PROC_DLLEXPORT __declspec(dllexport)
123
+ #ifdef UTF8PROC_STATIC
124
+ # define UTF8PROC_DLLEXPORT
125
+ #else
126
+ # ifdef _WIN32
127
+ # ifdef UTF8PROC_EXPORTS
128
+ # define UTF8PROC_DLLEXPORT __declspec(dllexport)
129
+ # else
130
+ # define UTF8PROC_DLLEXPORT __declspec(dllimport)
131
+ # endif
132
+ # elif __GNUC__ >= 4
133
+ # define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
126
134
  # else
127
- # define UTF8PROC_DLLEXPORT __declspec(dllimport)
135
+ # define UTF8PROC_DLLEXPORT
128
136
  # endif
129
- #elif __GNUC__ >= 4
130
- # define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
131
- #else
132
- # define UTF8PROC_DLLEXPORT
133
137
  #endif
134
138
 
135
139
  #ifdef __cplusplus
136
140
  extern "C" {
137
141
  #endif
138
142
 
139
- #ifndef SSIZE_MAX
140
- #define SSIZE_MAX ((size_t)SIZE_MAX/2)
141
- #endif
142
-
143
- #ifndef UINT16_MAX
144
- # define UINT16_MAX 65535U
145
- #endif
146
-
147
143
  /**
148
144
  * Option flags used by several functions in the library.
149
145
  */
@@ -209,6 +205,10 @@ typedef enum {
209
205
  * @ref UTF8PROC_DECOMPOSE
210
206
  */
211
207
  UTF8PROC_STRIPMARK = (1<<13),
208
+ /**
209
+ * Strip unassigned codepoints.
210
+ */
211
+ UTF8PROC_STRIPNA = (1<<14),
212
212
  } utf8proc_option_t;
213
213
 
214
214
  /** @name Error codes
@@ -374,10 +374,18 @@ typedef enum {
374
374
  UTF8PROC_BOUNDCLASS_SPACINGMARK = 12, /**< Spacingmark */
375
375
  UTF8PROC_BOUNDCLASS_PREPEND = 13, /**< Prepend */
376
376
  UTF8PROC_BOUNDCLASS_ZWJ = 14, /**< Zero Width Joiner */
377
+
378
+ /* the following are no longer used in Unicode 11, but we keep
379
+ the constants here for backward compatibility */
377
380
  UTF8PROC_BOUNDCLASS_E_BASE = 15, /**< Emoji Base */
378
381
  UTF8PROC_BOUNDCLASS_E_MODIFIER = 16, /**< Emoji Modifier */
379
382
  UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ = 17, /**< Glue_After_ZWJ */
380
383
  UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */
384
+
385
+ /* the Extended_Pictographic property is used in the Unicode 11
386
+ grapheme-boundary rules, so we store it in the boundclass field */
387
+ UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC = 19,
388
+ UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */
381
389
  } utf8proc_boundclass_t;
382
390
 
383
391
  /**
@@ -400,6 +408,11 @@ UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256];
400
408
  */
401
409
  UTF8PROC_DLLEXPORT const char *utf8proc_version(void);
402
410
 
411
+ /**
412
+ * Returns the utf8proc supported Unicode version as a string MAJOR.MINOR.PATCH.
413
+ */
414
+ UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void);
415
+
403
416
  /**
404
417
  * Returns an informative error string for the given utf8proc error code
405
418
  * (e.g. the error codes returned by @ref utf8proc_map).
@@ -465,6 +478,7 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
465
478
  * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster
466
479
  * - @ref UTF8PROC_LUMP - lump certain different codepoints together
467
480
  * - @ref UTF8PROC_STRIPMARK - remove all character marks
481
+ * - @ref UTF8PROC_STRIPNA - remove unassigned codepoints
468
482
  * @param last_boundclass
469
483
  * Pointer to an integer variable containing
470
484
  * the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND
@@ -576,6 +590,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
576
590
  * Given a pair of consecutive codepoints, return whether a grapheme break is
577
591
  * permitted between them (as defined by the extended grapheme clusters in UAX#29).
578
592
  *
593
+ * @param codepoint1 The first codepoint.
594
+ * @param codepoint2 The second codepoint, occurring consecutively after `codepoint1`.
579
595
  * @param state Beginning with Version 29 (Unicode 9.0.0), this algorithm requires
580
596
  * state to break graphemes. This state can be passed in as a pointer
581
597
  * in the `state` argument and should initially be set to 0. If the
@@ -584,7 +600,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
584
600
  * matching the rules in Unicode 8.0.0.
585
601
  *
586
602
  * @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must
587
- * be called IN ORDER on ALL potential breaks in a string.
603
+ * be called IN ORDER on ALL potential breaks in a string. However, it
604
+ * is safe to reset the state to zero after a grapheme break.
588
605
  */
589
606
  UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
590
607
  utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state);
@@ -651,7 +668,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
651
668
  * contain NULL characters with the string if `str` contained NULL
652
669
  * characters). Other flags in the `options` field are passed to the
653
670
  * functions defined above, and regarded as described. See also
654
- * @ref utfproc_map_custom to supply a custom codepoint transformation.
671
+ * @ref utf8proc_map_custom to supply a custom codepoint transformation.
655
672
  *
656
673
  * In case of success the length of the new string is returned,
657
674
  * otherwise a negative error code is returned.
@@ -676,8 +693,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
676
693
 
677
694
  /** @name Unicode normalization
678
695
  *
679
- * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC
680
- * normalized version of the null-terminated string `str`. These
696
+ * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD, NFKC or
697
+ * NFKC_Casefold normalized version of the null-terminated string `str`. These
681
698
  * are shortcuts to calling @ref utf8proc_map with @ref UTF8PROC_NULLTERM
682
699
  * combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
683
700
  */
@@ -690,6 +707,11 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
690
707
  UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
691
708
  /** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
692
709
  UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
710
+ /**
711
+ * NFKC_Casefold normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT
712
+ * and @ref UTF8PROC_CASEFOLD and @ref UTF8PROC_IGNORE).
713
+ **/
714
+ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str);
693
715
  /** @} */
694
716
 
695
717
  #ifdef __cplusplus