gosu 0.14.5 → 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +1 -0
  3. data/COPYING +1 -1
  4. data/Gosu/Channel.h +25 -0
  5. data/Gosu/Color.h +38 -0
  6. data/Gosu/Font.h +36 -0
  7. data/Gosu/Gosu.h +79 -0
  8. data/Gosu/Image.h +54 -0
  9. data/Gosu/Sample.h +19 -0
  10. data/Gosu/Song.h +24 -0
  11. data/Gosu/TextInput.h +30 -0
  12. data/Gosu/Version.hpp +2 -2
  13. data/Gosu/Window.h +61 -0
  14. data/Gosu/Window.hpp +3 -2
  15. data/README.md +1 -1
  16. data/ext/gosu/extconf.rb +3 -0
  17. data/lib/gosu/compat.rb +12 -7
  18. data/lib/gosu/patches.rb +8 -2
  19. data/lib/gosu/swig_patches.rb +20 -9
  20. data/rdoc/gosu.rb +28 -7
  21. data/src/ChannelWrapper.cpp +50 -0
  22. data/src/ColorWrapper.cpp +126 -0
  23. data/src/Constants.cpp +287 -0
  24. data/src/Font.cpp +1 -0
  25. data/src/FontWrapper.cpp +74 -0
  26. data/src/GosuWrapper.cpp +232 -0
  27. data/src/Graphics.cpp +4 -1
  28. data/src/GraphicsImpl.hpp +0 -1
  29. data/src/ImageWrapper.cpp +168 -0
  30. data/src/LargeImageData.cpp +1 -0
  31. data/src/MarkupParser.cpp +11 -3
  32. data/src/RubyGosu.cxx +185 -121
  33. data/src/RubyGosu.h +2 -2
  34. data/src/SampleWrapper.cpp +30 -0
  35. data/src/SongWrapper.cpp +52 -0
  36. data/src/TexChunk.cpp +29 -19
  37. data/src/Text.cpp +2 -0
  38. data/src/TextBuilder.cpp +3 -3
  39. data/src/TextInputWrapper.cpp +101 -0
  40. data/src/TrueTypeFont.cpp +1 -0
  41. data/src/Window.cpp +62 -28
  42. data/src/WindowUIKit.cpp +8 -4
  43. data/src/WindowWrapper.cpp +289 -0
  44. data/src/stb_image.h +153 -56
  45. data/src/stb_image_write.h +111 -60
  46. data/src/stb_truetype.h +74 -39
  47. data/src/stb_vorbis.c +55 -15
  48. data/src/utf8proc.c +47 -29
  49. data/src/utf8proc.h +46 -24
  50. data/src/utf8proc_data.h +10043 -9609
  51. metadata +23 -4
data/src/stb_vorbis.c CHANGED
@@ -1,4 +1,4 @@
1
- // Ogg Vorbis audio decoder - v1.14 - public domain
1
+ // Ogg Vorbis audio decoder - v1.17 - public domain
2
2
  // http://nothings.org/stb_vorbis/
3
3
  //
4
4
  // Original version written by Sean Barrett in 2007.
@@ -30,9 +30,12 @@
30
30
  // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
31
31
  // Phillip Bennefall Rohit Thiago Goulart
32
32
  // manxorist@github saga musix github:infatum
33
- // Timur Gagiev
33
+ // Timur Gagiev Maxwell Koo
34
34
  //
35
35
  // Partial history:
36
+ // 1.17 - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
37
+ // 1.16 - 2019-03-04 - fix warnings
38
+ // 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
36
39
  // 1.14 - 2018-02-11 - delete bogus dealloca usage
37
40
  // 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
38
41
  // 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
@@ -253,7 +256,7 @@ extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
253
256
  // create an ogg vorbis decoder from an open FILE *, looking for a stream at
254
257
  // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
255
258
  // note that stb_vorbis must "own" this stream; if you seek it in between
256
- // calls to stb_vorbis, it will become confused. Morever, if you attempt to
259
+ // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
257
260
  // perform stb_vorbis_seek_*() operations on this file, it will assume it
258
261
  // owns the _entire_ rest of the file after the start point. Use the next
259
262
  // function, stb_vorbis_open_file_section(), to limit it.
@@ -374,7 +377,8 @@ enum STBVorbisError
374
377
  VORBIS_invalid_first_page,
375
378
  VORBIS_bad_packet_type,
376
379
  VORBIS_cant_find_last_page,
377
- VORBIS_seek_failed
380
+ VORBIS_seek_failed,
381
+ VORBIS_ogg_skeleton_not_supported
378
382
  };
379
383
 
380
384
 
@@ -1073,7 +1077,7 @@ static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1073
1077
  assert(z >= 0 && z < 32);
1074
1078
  available[z] = 0;
1075
1079
  add_entry(c, bit_reverse(res), i, m++, len[i], values);
1076
- // propogate availability up the tree
1080
+ // propagate availability up the tree
1077
1081
  if (z != len[i]) {
1078
1082
  assert(len[i] >= 0 && len[i] < 32);
1079
1083
  for (y=len[i]; y > z; --y) {
@@ -1199,8 +1203,10 @@ static int lookup1_values(int entries, int dim)
1199
1203
  int r = (int) floor(exp((float) log((float) entries) / dim));
1200
1204
  if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1201
1205
  ++r; // floor() to avoid _ftol() when non-CRT
1202
- assert(pow((float) r+1, dim) > entries);
1203
- assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1206
+ if (pow((float) r+1, dim) <= entries)
1207
+ return -1;
1208
+ if ((int) floor(pow((float) r, dim)) > entries)
1209
+ return -1;
1204
1210
  return r;
1205
1211
  }
1206
1212
 
@@ -2010,7 +2016,7 @@ static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y
2010
2016
  ady -= abs(base) * adx;
2011
2017
  if (x1 > n) x1 = n;
2012
2018
  if (x < x1) {
2013
- LINE_OP(output[x], inverse_db_table[y]);
2019
+ LINE_OP(output[x], inverse_db_table[y&255]);
2014
2020
  for (++x; x < x1; ++x) {
2015
2021
  err += ady;
2016
2022
  if (err >= adx) {
@@ -2018,7 +2024,7 @@ static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y
2018
2024
  y += sy;
2019
2025
  } else
2020
2026
  y += base;
2021
- LINE_OP(output[x], inverse_db_table[y]);
2027
+ LINE_OP(output[x], inverse_db_table[y&255]);
2022
2028
  }
2023
2029
  }
2024
2030
  }
@@ -2637,7 +2643,7 @@ static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2637
2643
  // once I combined the passes.
2638
2644
 
2639
2645
  // so there's a missing 'times 2' here (for adding X to itself).
2640
- // this propogates through linearly to the end, where the numbers
2646
+ // this propagates through linearly to the end, where the numbers
2641
2647
  // are 1/2 too small, and need to be compensated for.
2642
2648
 
2643
2649
  {
@@ -3045,7 +3051,6 @@ static float *get_window(vorb *f, int len)
3045
3051
  len <<= 1;
3046
3052
  if (len == f->blocksize_0) return f->window[0];
3047
3053
  if (len == f->blocksize_1) return f->window[1];
3048
- assert(0);
3049
3054
  return NULL;
3050
3055
  }
3051
3056
 
@@ -3451,6 +3456,7 @@ static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3451
3456
  if (f->previous_length) {
3452
3457
  int i,j, n = f->previous_length;
3453
3458
  float *w = get_window(f, n);
3459
+ if (w == NULL) return 0;
3454
3460
  for (i=0; i < f->channels; ++i) {
3455
3461
  for (j=0; j < n; ++j)
3456
3462
  f->channel_buffers[i][left+j] =
@@ -3578,7 +3584,22 @@ static int start_decoder(vorb *f)
3578
3584
  if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3579
3585
  // check for expected packet length
3580
3586
  if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3581
- if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page);
3587
+ if (f->segments[0] != 30) {
3588
+ // check for the Ogg skeleton fishead identifying header to refine our error
3589
+ if (f->segments[0] == 64 &&
3590
+ getn(f, header, 6) &&
3591
+ header[0] == 'f' &&
3592
+ header[1] == 'i' &&
3593
+ header[2] == 's' &&
3594
+ header[3] == 'h' &&
3595
+ header[4] == 'e' &&
3596
+ header[5] == 'a' &&
3597
+ get8(f) == 'd' &&
3598
+ get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported);
3599
+ else
3600
+ return error(f, VORBIS_invalid_first_page);
3601
+ }
3602
+
3582
3603
  // read packet
3583
3604
  // check packet header
3584
3605
  if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
@@ -3677,6 +3698,7 @@ static int start_decoder(vorb *f)
3677
3698
  while (current_entry < c->entries) {
3678
3699
  int limit = c->entries - current_entry;
3679
3700
  int n = get_bits(f, ilog(limit));
3701
+ if (current_length >= 32) return error(f, VORBIS_invalid_setup);
3680
3702
  if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3681
3703
  memset(lengths + current_entry, current_length, n);
3682
3704
  current_entry += n;
@@ -3780,7 +3802,9 @@ static int start_decoder(vorb *f)
3780
3802
  c->value_bits = get_bits(f, 4)+1;
3781
3803
  c->sequence_p = get_bits(f,1);
3782
3804
  if (c->lookup_type == 1) {
3783
- c->lookup_values = lookup1_values(c->entries, c->dimensions);
3805
+ int values = lookup1_values(c->entries, c->dimensions);
3806
+ if (values < 0) return error(f, VORBIS_invalid_setup);
3807
+ c->lookup_values = (uint32) values;
3784
3808
  } else {
3785
3809
  c->lookup_values = c->entries * c->dimensions;
3786
3810
  }
@@ -3916,6 +3940,9 @@ static int start_decoder(vorb *f)
3916
3940
  p[j].id = j;
3917
3941
  }
3918
3942
  qsort(p, g->values, sizeof(p[0]), point_compare);
3943
+ for (j=0; j < g->values-1; ++j)
3944
+ if (p[j].x == p[j+1].x)
3945
+ return error(f, VORBIS_invalid_setup);
3919
3946
  for (j=0; j < g->values; ++j)
3920
3947
  g->sorted_order[j] = (uint8) p[j].id;
3921
3948
  // precompute the neighbors
@@ -4002,6 +4029,7 @@ static int start_decoder(vorb *f)
4002
4029
  max_submaps = m->submaps;
4003
4030
  if (get_bits(f,1)) {
4004
4031
  m->coupling_steps = get_bits(f,8)+1;
4032
+ if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
4005
4033
  for (k=0; k < m->coupling_steps; ++k) {
4006
4034
  m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4007
4035
  m->chan[k].angle = get_bits(f, ilog(f->channels-1));
@@ -4566,7 +4594,7 @@ static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4566
4594
  return 1;
4567
4595
  }
4568
4596
 
4569
- // rarely used function to seek back to the preceeding page while finding the
4597
+ // rarely used function to seek back to the preceding page while finding the
4570
4598
  // start of a packet
4571
4599
  static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4572
4600
  {
@@ -4973,7 +5001,13 @@ stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, con
4973
5001
 
4974
5002
  stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4975
5003
  {
4976
- FILE *f = fopen(filename, "rb");
5004
+ FILE *f;
5005
+ #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
5006
+ if (0 != fopen_s(&f, filename, "rb"))
5007
+ f = NULL;
5008
+ #else
5009
+ f = fopen(filename, "rb");
5010
+ #endif
4977
5011
  if (f)
4978
5012
  return stb_vorbis_open_file(f, TRUE, error, alloc);
4979
5013
  if (error) *error = VORBIS_file_open_failure;
@@ -5362,6 +5396,12 @@ int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, in
5362
5396
  #endif // STB_VORBIS_NO_PULLDATA_API
5363
5397
 
5364
5398
  /* Version history
5399
+ 1.17 - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
5400
+ found with Mayhem by ForAllSecure
5401
+ 1.16 - 2019-03-04 - fix warnings
5402
+ 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
5403
+ 1.14 - 2018-02-11 - delete bogus dealloca usage
5404
+ 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
5365
5405
  1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5366
5406
  1.11 - 2017-07-23 - fix MinGW compilation
5367
5407
  1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
data/src/utf8proc.c CHANGED
@@ -1,6 +1,6 @@
1
1
  /* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
2
2
  /*
3
- * Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
3
+ * Copyright (c) 2018 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
4
4
  * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
5
5
  *
6
6
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -42,6 +42,14 @@
42
42
 
43
43
 
44
44
  #include "utf8proc.h"
45
+
46
+ #ifndef SSIZE_MAX
47
+ #define SSIZE_MAX ((size_t)SIZE_MAX/2)
48
+ #endif
49
+ #ifndef UINT16_MAX
50
+ # define UINT16_MAX 65535U
51
+ #endif
52
+
45
53
  #include "utf8proc_data.h"
46
54
 
47
55
 
@@ -92,6 +100,10 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
92
100
  return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
93
101
  }
94
102
 
103
+ UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
104
+ return "12.1.0";
105
+ }
106
+
95
107
  UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
96
108
  switch (errcode) {
97
109
  case UTF8PROC_ERROR_NOMEM:
@@ -188,9 +200,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, ut
188
200
  } else return 0;
189
201
  }
190
202
 
191
- /* internal "unsafe" version that does not check whether uc is in range */
192
- static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
203
+ /* internal version used for inserting 0xff bytes between graphemes */
204
+ static utf8proc_ssize_t charbound_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
193
205
  if (uc < 0x00) {
206
+ if (uc == -1) { /* internal value used for grapheme breaks */
207
+ dst[0] = (utf8proc_uint8_t)0xFF;
208
+ return 1;
209
+ }
194
210
  return 0;
195
211
  } else if (uc < 0x80) {
196
212
  dst[0] = (utf8proc_uint8_t)uc;
@@ -199,12 +215,6 @@ static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t
199
215
  dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
200
216
  dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
201
217
  return 2;
202
- } else if (uc == 0xFFFF) {
203
- dst[0] = (utf8proc_uint8_t)0xFF;
204
- return 1;
205
- } else if (uc == 0xFFFE) {
206
- dst[0] = (utf8proc_uint8_t)0xFE;
207
- return 1;
208
218
  } else if (uc < 0x10000) {
209
219
  dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
210
220
  dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
@@ -271,12 +281,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
271
281
  tbc == UTF8PROC_BOUNDCLASS_ZWJ || // ---
272
282
  tbc == UTF8PROC_BOUNDCLASS_SPACINGMARK || // GB9a
273
283
  lbc == UTF8PROC_BOUNDCLASS_PREPEND) ? false : // GB9b
274
- ((lbc == UTF8PROC_BOUNDCLASS_E_BASE || // GB10 (requires additional handling below)
275
- lbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) && // ----
276
- tbc == UTF8PROC_BOUNDCLASS_E_MODIFIER) ? false : // ----
277
- (lbc == UTF8PROC_BOUNDCLASS_ZWJ && // GB11
278
- (tbc == UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ || // ----
279
- tbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ)) ? false : // ----
284
+ (lbc == UTF8PROC_BOUNDCLASS_E_ZWG && // GB11 (requires additional handling below)
285
+ tbc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) ? false : // ----
280
286
  (lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR && // GB12/13 (requires additional handling below)
281
287
  tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false : // ----
282
288
  true; // GB999
@@ -284,9 +290,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
284
290
 
285
291
  static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
286
292
  {
287
- int lbc_override = lbc;
288
- if (state && *state != UTF8PROC_BOUNDCLASS_START)
289
- lbc_override = *state;
293
+ int lbc_override = ((state && *state != UTF8PROC_BOUNDCLASS_START)
294
+ ? *state : lbc);
290
295
  utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
291
296
  if (state) {
292
297
  // Special support for GB 12/13 made possible by GB999. After two RI
@@ -296,12 +301,15 @@ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t
296
301
  // forbidden by a different rule such as GB9).
297
302
  if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
298
303
  *state = UTF8PROC_BOUNDCLASS_OTHER;
299
- // Special support for GB10. Fold any EXTEND codepoints into the previous
300
- // boundclass if we're dealing with an emoji base boundclass.
301
- else if ((*state == UTF8PROC_BOUNDCLASS_E_BASE ||
302
- *state == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) &&
303
- tbc == UTF8PROC_BOUNDCLASS_EXTEND)
304
- *state = UTF8PROC_BOUNDCLASS_E_BASE;
304
+ // Special support for GB11 (emoji extend* zwj / emoji)
305
+ else if (*state == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
306
+ if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
307
+ *state = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
308
+ else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
309
+ *state = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
310
+ else
311
+ *state = tbc;
312
+ }
305
313
  else
306
314
  *state = tbc;
307
315
  }
@@ -424,6 +432,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
424
432
  if (options & UTF8PROC_IGNORE) {
425
433
  if (property->ignorable) return 0;
426
434
  }
435
+ if (options & UTF8PROC_STRIPNA) {
436
+ if (!category) return 0;
437
+ }
427
438
  if (options & UTF8PROC_LUMP) {
428
439
  if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
429
440
  if (uc == 0x2018 || uc == 0x2019 || uc == 0x02BC || uc == 0x02C8)
@@ -471,7 +482,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
471
482
  int tbc = property->boundclass;
472
483
  boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
473
484
  if (boundary) {
474
- if (bufsize >= 1) dst[0] = 0xFFFF;
485
+ if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
475
486
  if (bufsize >= 2) dst[1] = uc;
476
487
  return 2;
477
488
  }
@@ -632,9 +643,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
632
643
  current_property->comb_index != UINT16_MAX &&
633
644
  current_property->comb_index >= 0x8000) {
634
645
  int sidx = starter_property->comb_index;
635
- int idx = (current_property->comb_index & 0x3FFF) - utf8proc_combinations[sidx];
636
- if (idx >= 0 && idx <= utf8proc_combinations[sidx + 1] ) {
637
- idx += sidx + 2;
646
+ int idx = current_property->comb_index & 0x3FFF;
647
+ if (idx >= utf8proc_combinations[sidx] && idx <= utf8proc_combinations[sidx + 1] ) {
648
+ idx += sidx + 2 - utf8proc_combinations[sidx];
638
649
  if (current_property->comb_index & 0x4000) {
639
650
  composition = (utf8proc_combinations[idx] << 16) | utf8proc_combinations[idx+1];
640
651
  } else
@@ -677,7 +688,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
677
688
  if (options & UTF8PROC_CHARBOUND) {
678
689
  for (rpos = 0; rpos < length; rpos++) {
679
690
  uc = buffer[rpos];
680
- wpos += unsafe_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
691
+ wpos += charbound_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
681
692
  }
682
693
  } else {
683
694
  for (rpos = 0; rpos < length; rpos++) {
@@ -753,3 +764,10 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str)
753
764
  UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
754
765
  return retval;
755
766
  }
767
+
768
+ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str) {
769
+ utf8proc_uint8_t *retval;
770
+ utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
771
+ UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE);
772
+ return retval;
773
+ }
data/src/utf8proc.h CHANGED
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
2
+ * Copyright (c) 2018 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
3
3
  * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
4
4
  *
5
5
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -28,7 +28,7 @@
28
28
  * utf8proc is a free/open-source (MIT/expat licensed) C library
29
29
  * providing Unicode normalization, case-folding, and other operations
30
30
  * for strings in the UTF-8 encoding, supporting Unicode version
31
- * 8.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
31
+ * 9.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
32
32
  * for downloads and other information, or the source code on github
33
33
  * (https://github.com/JuliaLang/utf8proc).
34
34
  *
@@ -71,13 +71,13 @@
71
71
  /** The MAJOR version number (increased when backwards API compatibility is broken). */
72
72
  #define UTF8PROC_VERSION_MAJOR 2
73
73
  /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
74
- #define UTF8PROC_VERSION_MINOR 1
74
+ #define UTF8PROC_VERSION_MINOR 4
75
75
  /** The PATCH version (increased for fixes that do not change the API). */
76
76
  #define UTF8PROC_VERSION_PATCH 0
77
77
  /** @} */
78
78
 
79
79
  #include <stdlib.h>
80
- #include <sys/types.h>
80
+
81
81
  #if defined(_MSC_VER) && _MSC_VER < 1800
82
82
  // MSVC prior to 2013 lacked stdbool.h and inttypes.h
83
83
  typedef signed char utf8proc_int8_t;
@@ -120,30 +120,26 @@ typedef bool utf8proc_bool;
120
120
  #endif
121
121
  #include <limits.h>
122
122
 
123
- #ifdef _WIN32
124
- # ifdef UTF8PROC_EXPORTS
125
- # define UTF8PROC_DLLEXPORT __declspec(dllexport)
123
+ #ifdef UTF8PROC_STATIC
124
+ # define UTF8PROC_DLLEXPORT
125
+ #else
126
+ # ifdef _WIN32
127
+ # ifdef UTF8PROC_EXPORTS
128
+ # define UTF8PROC_DLLEXPORT __declspec(dllexport)
129
+ # else
130
+ # define UTF8PROC_DLLEXPORT __declspec(dllimport)
131
+ # endif
132
+ # elif __GNUC__ >= 4
133
+ # define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
126
134
  # else
127
- # define UTF8PROC_DLLEXPORT __declspec(dllimport)
135
+ # define UTF8PROC_DLLEXPORT
128
136
  # endif
129
- #elif __GNUC__ >= 4
130
- # define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
131
- #else
132
- # define UTF8PROC_DLLEXPORT
133
137
  #endif
134
138
 
135
139
  #ifdef __cplusplus
136
140
  extern "C" {
137
141
  #endif
138
142
 
139
- #ifndef SSIZE_MAX
140
- #define SSIZE_MAX ((size_t)SIZE_MAX/2)
141
- #endif
142
-
143
- #ifndef UINT16_MAX
144
- # define UINT16_MAX 65535U
145
- #endif
146
-
147
143
  /**
148
144
  * Option flags used by several functions in the library.
149
145
  */
@@ -209,6 +205,10 @@ typedef enum {
209
205
  * @ref UTF8PROC_DECOMPOSE
210
206
  */
211
207
  UTF8PROC_STRIPMARK = (1<<13),
208
+ /**
209
+ * Strip unassigned codepoints.
210
+ */
211
+ UTF8PROC_STRIPNA = (1<<14),
212
212
  } utf8proc_option_t;
213
213
 
214
214
  /** @name Error codes
@@ -374,10 +374,18 @@ typedef enum {
374
374
  UTF8PROC_BOUNDCLASS_SPACINGMARK = 12, /**< Spacingmark */
375
375
  UTF8PROC_BOUNDCLASS_PREPEND = 13, /**< Prepend */
376
376
  UTF8PROC_BOUNDCLASS_ZWJ = 14, /**< Zero Width Joiner */
377
+
378
+ /* the following are no longer used in Unicode 11, but we keep
379
+ the constants here for backward compatibility */
377
380
  UTF8PROC_BOUNDCLASS_E_BASE = 15, /**< Emoji Base */
378
381
  UTF8PROC_BOUNDCLASS_E_MODIFIER = 16, /**< Emoji Modifier */
379
382
  UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ = 17, /**< Glue_After_ZWJ */
380
383
  UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */
384
+
385
+ /* the Extended_Pictographic property is used in the Unicode 11
386
+ grapheme-boundary rules, so we store it in the boundclass field */
387
+ UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC = 19,
388
+ UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */
381
389
  } utf8proc_boundclass_t;
382
390
 
383
391
  /**
@@ -400,6 +408,11 @@ UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256];
400
408
  */
401
409
  UTF8PROC_DLLEXPORT const char *utf8proc_version(void);
402
410
 
411
+ /**
412
+ * Returns the utf8proc supported Unicode version as a string MAJOR.MINOR.PATCH.
413
+ */
414
+ UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void);
415
+
403
416
  /**
404
417
  * Returns an informative error string for the given utf8proc error code
405
418
  * (e.g. the error codes returned by @ref utf8proc_map).
@@ -465,6 +478,7 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
465
478
  * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster
466
479
  * - @ref UTF8PROC_LUMP - lump certain different codepoints together
467
480
  * - @ref UTF8PROC_STRIPMARK - remove all character marks
481
+ * - @ref UTF8PROC_STRIPNA - remove unassigned codepoints
468
482
  * @param last_boundclass
469
483
  * Pointer to an integer variable containing
470
484
  * the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND
@@ -576,6 +590,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
576
590
  * Given a pair of consecutive codepoints, return whether a grapheme break is
577
591
  * permitted between them (as defined by the extended grapheme clusters in UAX#29).
578
592
  *
593
+ * @param codepoint1 The first codepoint.
594
+ * @param codepoint2 The second codepoint, occurring consecutively after `codepoint1`.
579
595
  * @param state Beginning with Version 29 (Unicode 9.0.0), this algorithm requires
580
596
  * state to break graphemes. This state can be passed in as a pointer
581
597
  * in the `state` argument and should initially be set to 0. If the
@@ -584,7 +600,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
584
600
  * matching the rules in Unicode 8.0.0.
585
601
  *
586
602
  * @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must
587
- * be called IN ORDER on ALL potential breaks in a string.
603
+ * be called IN ORDER on ALL potential breaks in a string. However, it
604
+ * is safe to reset the state to zero after a grapheme break.
588
605
  */
589
606
  UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
590
607
  utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state);
@@ -651,7 +668,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
651
668
  * contain NULL characters with the string if `str` contained NULL
652
669
  * characters). Other flags in the `options` field are passed to the
653
670
  * functions defined above, and regarded as described. See also
654
- * @ref utfproc_map_custom to supply a custom codepoint transformation.
671
+ * @ref utf8proc_map_custom to supply a custom codepoint transformation.
655
672
  *
656
673
  * In case of success the length of the new string is returned,
657
674
  * otherwise a negative error code is returned.
@@ -676,8 +693,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
676
693
 
677
694
  /** @name Unicode normalization
678
695
  *
679
- * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC
680
- * normalized version of the null-terminated string `str`. These
696
+ * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD, NFKC or
697
+ * NFKC_Casefold normalized version of the null-terminated string `str`. These
681
698
  * are shortcuts to calling @ref utf8proc_map with @ref UTF8PROC_NULLTERM
682
699
  * combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
683
700
  */
@@ -690,6 +707,11 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
690
707
  UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
691
708
  /** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
692
709
  UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
710
+ /**
711
+ * NFKC_Casefold normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT
712
+ * and @ref UTF8PROC_CASEFOLD and @ref UTF8PROC_IGNORE).
713
+ **/
714
+ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str);
693
715
  /** @} */
694
716
 
695
717
  #ifdef __cplusplus