gosu 0.14.4 → 0.15.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +1 -0
- data/COPYING +1 -1
- data/Gosu/Buttons.hpp +1 -0
- data/Gosu/Channel.h +25 -0
- data/Gosu/Color.h +38 -0
- data/Gosu/Font.h +36 -0
- data/Gosu/Gosu.h +79 -0
- data/Gosu/Image.h +54 -0
- data/Gosu/Sample.h +19 -0
- data/Gosu/Song.h +24 -0
- data/Gosu/TextInput.h +30 -0
- data/Gosu/Version.hpp +2 -2
- data/Gosu/Window.h +61 -0
- data/Gosu/Window.hpp +3 -2
- data/README.md +1 -1
- data/ext/gosu/extconf.rb +3 -0
- data/lib/gosu/compat.rb +12 -7
- data/lib/gosu/patches.rb +8 -2
- data/lib/gosu/swig_patches.rb +20 -9
- data/rdoc/gosu.rb +28 -7
- data/src/ChannelWrapper.cpp +50 -0
- data/src/ColorWrapper.cpp +126 -0
- data/src/Constants.cpp +287 -0
- data/src/Font.cpp +1 -0
- data/src/FontWrapper.cpp +74 -0
- data/src/GosuWrapper.cpp +232 -0
- data/src/Graphics.cpp +4 -1
- data/src/GraphicsImpl.hpp +0 -1
- data/src/ImageWrapper.cpp +168 -0
- data/src/LargeImageData.cpp +1 -0
- data/src/MarkupParser.cpp +11 -3
- data/src/RubyGosu.cxx +186 -121
- data/src/RubyGosu.h +2 -2
- data/src/SampleWrapper.cpp +30 -0
- data/src/SongWrapper.cpp +52 -0
- data/src/TexChunk.cpp +29 -19
- data/src/Text.cpp +2 -0
- data/src/TextBuilder.cpp +3 -3
- data/src/TextInputWrapper.cpp +101 -0
- data/src/TrueTypeFont.cpp +1 -0
- data/src/Window.cpp +62 -28
- data/src/WindowUIKit.cpp +8 -4
- data/src/WindowWrapper.cpp +289 -0
- data/src/stb_image.h +153 -56
- data/src/stb_image_write.h +111 -60
- data/src/stb_truetype.h +74 -39
- data/src/stb_vorbis.c +55 -15
- data/src/utf8proc.c +47 -29
- data/src/utf8proc.h +46 -24
- data/src/utf8proc_data.h +10043 -9609
- metadata +23 -4
data/src/stb_vorbis.c
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
// Ogg Vorbis audio decoder - v1.
|
1
|
+
// Ogg Vorbis audio decoder - v1.17 - public domain
|
2
2
|
// http://nothings.org/stb_vorbis/
|
3
3
|
//
|
4
4
|
// Original version written by Sean Barrett in 2007.
|
@@ -30,9 +30,12 @@
|
|
30
30
|
// Tom Beaumont Ingo Leitgeb Nicolas Guillemot
|
31
31
|
// Phillip Bennefall Rohit Thiago Goulart
|
32
32
|
// manxorist@github saga musix github:infatum
|
33
|
-
// Timur Gagiev
|
33
|
+
// Timur Gagiev Maxwell Koo
|
34
34
|
//
|
35
35
|
// Partial history:
|
36
|
+
// 1.17 - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
|
37
|
+
// 1.16 - 2019-03-04 - fix warnings
|
38
|
+
// 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
|
36
39
|
// 1.14 - 2018-02-11 - delete bogus dealloca usage
|
37
40
|
// 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
|
38
41
|
// 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
|
@@ -253,7 +256,7 @@ extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
|
|
253
256
|
// create an ogg vorbis decoder from an open FILE *, looking for a stream at
|
254
257
|
// the _current_ seek point (ftell). on failure, returns NULL and sets *error.
|
255
258
|
// note that stb_vorbis must "own" this stream; if you seek it in between
|
256
|
-
// calls to stb_vorbis, it will become confused.
|
259
|
+
// calls to stb_vorbis, it will become confused. Moreover, if you attempt to
|
257
260
|
// perform stb_vorbis_seek_*() operations on this file, it will assume it
|
258
261
|
// owns the _entire_ rest of the file after the start point. Use the next
|
259
262
|
// function, stb_vorbis_open_file_section(), to limit it.
|
@@ -374,7 +377,8 @@ enum STBVorbisError
|
|
374
377
|
VORBIS_invalid_first_page,
|
375
378
|
VORBIS_bad_packet_type,
|
376
379
|
VORBIS_cant_find_last_page,
|
377
|
-
VORBIS_seek_failed
|
380
|
+
VORBIS_seek_failed,
|
381
|
+
VORBIS_ogg_skeleton_not_supported
|
378
382
|
};
|
379
383
|
|
380
384
|
|
@@ -1073,7 +1077,7 @@ static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
|
|
1073
1077
|
assert(z >= 0 && z < 32);
|
1074
1078
|
available[z] = 0;
|
1075
1079
|
add_entry(c, bit_reverse(res), i, m++, len[i], values);
|
1076
|
-
//
|
1080
|
+
// propagate availability up the tree
|
1077
1081
|
if (z != len[i]) {
|
1078
1082
|
assert(len[i] >= 0 && len[i] < 32);
|
1079
1083
|
for (y=len[i]; y > z; --y) {
|
@@ -1199,8 +1203,10 @@ static int lookup1_values(int entries, int dim)
|
|
1199
1203
|
int r = (int) floor(exp((float) log((float) entries) / dim));
|
1200
1204
|
if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
|
1201
1205
|
++r; // floor() to avoid _ftol() when non-CRT
|
1202
|
-
|
1203
|
-
|
1206
|
+
if (pow((float) r+1, dim) <= entries)
|
1207
|
+
return -1;
|
1208
|
+
if ((int) floor(pow((float) r, dim)) > entries)
|
1209
|
+
return -1;
|
1204
1210
|
return r;
|
1205
1211
|
}
|
1206
1212
|
|
@@ -2010,7 +2016,7 @@ static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y
|
|
2010
2016
|
ady -= abs(base) * adx;
|
2011
2017
|
if (x1 > n) x1 = n;
|
2012
2018
|
if (x < x1) {
|
2013
|
-
LINE_OP(output[x], inverse_db_table[y]);
|
2019
|
+
LINE_OP(output[x], inverse_db_table[y&255]);
|
2014
2020
|
for (++x; x < x1; ++x) {
|
2015
2021
|
err += ady;
|
2016
2022
|
if (err >= adx) {
|
@@ -2018,7 +2024,7 @@ static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y
|
|
2018
2024
|
y += sy;
|
2019
2025
|
} else
|
2020
2026
|
y += base;
|
2021
|
-
LINE_OP(output[x], inverse_db_table[y]);
|
2027
|
+
LINE_OP(output[x], inverse_db_table[y&255]);
|
2022
2028
|
}
|
2023
2029
|
}
|
2024
2030
|
}
|
@@ -2637,7 +2643,7 @@ static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
|
|
2637
2643
|
// once I combined the passes.
|
2638
2644
|
|
2639
2645
|
// so there's a missing 'times 2' here (for adding X to itself).
|
2640
|
-
// this
|
2646
|
+
// this propagates through linearly to the end, where the numbers
|
2641
2647
|
// are 1/2 too small, and need to be compensated for.
|
2642
2648
|
|
2643
2649
|
{
|
@@ -3045,7 +3051,6 @@ static float *get_window(vorb *f, int len)
|
|
3045
3051
|
len <<= 1;
|
3046
3052
|
if (len == f->blocksize_0) return f->window[0];
|
3047
3053
|
if (len == f->blocksize_1) return f->window[1];
|
3048
|
-
assert(0);
|
3049
3054
|
return NULL;
|
3050
3055
|
}
|
3051
3056
|
|
@@ -3451,6 +3456,7 @@ static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
|
|
3451
3456
|
if (f->previous_length) {
|
3452
3457
|
int i,j, n = f->previous_length;
|
3453
3458
|
float *w = get_window(f, n);
|
3459
|
+
if (w == NULL) return 0;
|
3454
3460
|
for (i=0; i < f->channels; ++i) {
|
3455
3461
|
for (j=0; j < n; ++j)
|
3456
3462
|
f->channel_buffers[i][left+j] =
|
@@ -3578,7 +3584,22 @@ static int start_decoder(vorb *f)
|
|
3578
3584
|
if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
|
3579
3585
|
// check for expected packet length
|
3580
3586
|
if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
|
3581
|
-
if (f->segments[0] != 30)
|
3587
|
+
if (f->segments[0] != 30) {
|
3588
|
+
// check for the Ogg skeleton fishead identifying header to refine our error
|
3589
|
+
if (f->segments[0] == 64 &&
|
3590
|
+
getn(f, header, 6) &&
|
3591
|
+
header[0] == 'f' &&
|
3592
|
+
header[1] == 'i' &&
|
3593
|
+
header[2] == 's' &&
|
3594
|
+
header[3] == 'h' &&
|
3595
|
+
header[4] == 'e' &&
|
3596
|
+
header[5] == 'a' &&
|
3597
|
+
get8(f) == 'd' &&
|
3598
|
+
get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported);
|
3599
|
+
else
|
3600
|
+
return error(f, VORBIS_invalid_first_page);
|
3601
|
+
}
|
3602
|
+
|
3582
3603
|
// read packet
|
3583
3604
|
// check packet header
|
3584
3605
|
if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
|
@@ -3677,6 +3698,7 @@ static int start_decoder(vorb *f)
|
|
3677
3698
|
while (current_entry < c->entries) {
|
3678
3699
|
int limit = c->entries - current_entry;
|
3679
3700
|
int n = get_bits(f, ilog(limit));
|
3701
|
+
if (current_length >= 32) return error(f, VORBIS_invalid_setup);
|
3680
3702
|
if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
|
3681
3703
|
memset(lengths + current_entry, current_length, n);
|
3682
3704
|
current_entry += n;
|
@@ -3780,7 +3802,9 @@ static int start_decoder(vorb *f)
|
|
3780
3802
|
c->value_bits = get_bits(f, 4)+1;
|
3781
3803
|
c->sequence_p = get_bits(f,1);
|
3782
3804
|
if (c->lookup_type == 1) {
|
3783
|
-
|
3805
|
+
int values = lookup1_values(c->entries, c->dimensions);
|
3806
|
+
if (values < 0) return error(f, VORBIS_invalid_setup);
|
3807
|
+
c->lookup_values = (uint32) values;
|
3784
3808
|
} else {
|
3785
3809
|
c->lookup_values = c->entries * c->dimensions;
|
3786
3810
|
}
|
@@ -3916,6 +3940,9 @@ static int start_decoder(vorb *f)
|
|
3916
3940
|
p[j].id = j;
|
3917
3941
|
}
|
3918
3942
|
qsort(p, g->values, sizeof(p[0]), point_compare);
|
3943
|
+
for (j=0; j < g->values-1; ++j)
|
3944
|
+
if (p[j].x == p[j+1].x)
|
3945
|
+
return error(f, VORBIS_invalid_setup);
|
3919
3946
|
for (j=0; j < g->values; ++j)
|
3920
3947
|
g->sorted_order[j] = (uint8) p[j].id;
|
3921
3948
|
// precompute the neighbors
|
@@ -4002,6 +4029,7 @@ static int start_decoder(vorb *f)
|
|
4002
4029
|
max_submaps = m->submaps;
|
4003
4030
|
if (get_bits(f,1)) {
|
4004
4031
|
m->coupling_steps = get_bits(f,8)+1;
|
4032
|
+
if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
|
4005
4033
|
for (k=0; k < m->coupling_steps; ++k) {
|
4006
4034
|
m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
|
4007
4035
|
m->chan[k].angle = get_bits(f, ilog(f->channels-1));
|
@@ -4566,7 +4594,7 @@ static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
|
|
4566
4594
|
return 1;
|
4567
4595
|
}
|
4568
4596
|
|
4569
|
-
// rarely used function to seek back to the
|
4597
|
+
// rarely used function to seek back to the preceding page while finding the
|
4570
4598
|
// start of a packet
|
4571
4599
|
static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
|
4572
4600
|
{
|
@@ -4973,7 +5001,13 @@ stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, con
|
|
4973
5001
|
|
4974
5002
|
stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
|
4975
5003
|
{
|
4976
|
-
FILE *f
|
5004
|
+
FILE *f;
|
5005
|
+
#if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
|
5006
|
+
if (0 != fopen_s(&f, filename, "rb"))
|
5007
|
+
f = NULL;
|
5008
|
+
#else
|
5009
|
+
f = fopen(filename, "rb");
|
5010
|
+
#endif
|
4977
5011
|
if (f)
|
4978
5012
|
return stb_vorbis_open_file(f, TRUE, error, alloc);
|
4979
5013
|
if (error) *error = VORBIS_file_open_failure;
|
@@ -5362,6 +5396,12 @@ int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, in
|
|
5362
5396
|
#endif // STB_VORBIS_NO_PULLDATA_API
|
5363
5397
|
|
5364
5398
|
/* Version history
|
5399
|
+
1.17 - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
|
5400
|
+
found with Mayhem by ForAllSecure
|
5401
|
+
1.16 - 2019-03-04 - fix warnings
|
5402
|
+
1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
|
5403
|
+
1.14 - 2018-02-11 - delete bogus dealloca usage
|
5404
|
+
1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
|
5365
5405
|
1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
|
5366
5406
|
1.11 - 2017-07-23 - fix MinGW compilation
|
5367
5407
|
1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
|
data/src/utf8proc.c
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
|
2
2
|
/*
|
3
|
-
* Copyright (c)
|
3
|
+
* Copyright (c) 2018 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
4
4
|
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
5
5
|
*
|
6
6
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
@@ -42,6 +42,14 @@
|
|
42
42
|
|
43
43
|
|
44
44
|
#include "utf8proc.h"
|
45
|
+
|
46
|
+
#ifndef SSIZE_MAX
|
47
|
+
#define SSIZE_MAX ((size_t)SIZE_MAX/2)
|
48
|
+
#endif
|
49
|
+
#ifndef UINT16_MAX
|
50
|
+
# define UINT16_MAX 65535U
|
51
|
+
#endif
|
52
|
+
|
45
53
|
#include "utf8proc_data.h"
|
46
54
|
|
47
55
|
|
@@ -92,6 +100,10 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
|
|
92
100
|
return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
|
93
101
|
}
|
94
102
|
|
103
|
+
UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
|
104
|
+
return "12.1.0";
|
105
|
+
}
|
106
|
+
|
95
107
|
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
96
108
|
switch (errcode) {
|
97
109
|
case UTF8PROC_ERROR_NOMEM:
|
@@ -188,9 +200,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, ut
|
|
188
200
|
} else return 0;
|
189
201
|
}
|
190
202
|
|
191
|
-
/* internal
|
192
|
-
static utf8proc_ssize_t
|
203
|
+
/* internal version used for inserting 0xff bytes between graphemes */
|
204
|
+
static utf8proc_ssize_t charbound_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
|
193
205
|
if (uc < 0x00) {
|
206
|
+
if (uc == -1) { /* internal value used for grapheme breaks */
|
207
|
+
dst[0] = (utf8proc_uint8_t)0xFF;
|
208
|
+
return 1;
|
209
|
+
}
|
194
210
|
return 0;
|
195
211
|
} else if (uc < 0x80) {
|
196
212
|
dst[0] = (utf8proc_uint8_t)uc;
|
@@ -199,12 +215,6 @@ static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t
|
|
199
215
|
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
|
200
216
|
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
201
217
|
return 2;
|
202
|
-
} else if (uc == 0xFFFF) {
|
203
|
-
dst[0] = (utf8proc_uint8_t)0xFF;
|
204
|
-
return 1;
|
205
|
-
} else if (uc == 0xFFFE) {
|
206
|
-
dst[0] = (utf8proc_uint8_t)0xFE;
|
207
|
-
return 1;
|
208
218
|
} else if (uc < 0x10000) {
|
209
219
|
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
|
210
220
|
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
@@ -271,12 +281,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
|
|
271
281
|
tbc == UTF8PROC_BOUNDCLASS_ZWJ || // ---
|
272
282
|
tbc == UTF8PROC_BOUNDCLASS_SPACINGMARK || // GB9a
|
273
283
|
lbc == UTF8PROC_BOUNDCLASS_PREPEND) ? false : // GB9b
|
274
|
-
(
|
275
|
-
|
276
|
-
tbc == UTF8PROC_BOUNDCLASS_E_MODIFIER) ? false : // ----
|
277
|
-
(lbc == UTF8PROC_BOUNDCLASS_ZWJ && // GB11
|
278
|
-
(tbc == UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ || // ----
|
279
|
-
tbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ)) ? false : // ----
|
284
|
+
(lbc == UTF8PROC_BOUNDCLASS_E_ZWG && // GB11 (requires additional handling below)
|
285
|
+
tbc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) ? false : // ----
|
280
286
|
(lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR && // GB12/13 (requires additional handling below)
|
281
287
|
tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false : // ----
|
282
288
|
true; // GB999
|
@@ -284,9 +290,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
|
|
284
290
|
|
285
291
|
static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
|
286
292
|
{
|
287
|
-
int lbc_override =
|
288
|
-
|
289
|
-
lbc_override = *state;
|
293
|
+
int lbc_override = ((state && *state != UTF8PROC_BOUNDCLASS_START)
|
294
|
+
? *state : lbc);
|
290
295
|
utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
|
291
296
|
if (state) {
|
292
297
|
// Special support for GB 12/13 made possible by GB999. After two RI
|
@@ -296,12 +301,15 @@ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t
|
|
296
301
|
// forbidden by a different rule such as GB9).
|
297
302
|
if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
|
298
303
|
*state = UTF8PROC_BOUNDCLASS_OTHER;
|
299
|
-
// Special support for
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
304
|
+
// Special support for GB11 (emoji extend* zwj / emoji)
|
305
|
+
else if (*state == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
|
306
|
+
if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
|
307
|
+
*state = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
|
308
|
+
else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
|
309
|
+
*state = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
|
310
|
+
else
|
311
|
+
*state = tbc;
|
312
|
+
}
|
305
313
|
else
|
306
314
|
*state = tbc;
|
307
315
|
}
|
@@ -424,6 +432,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
|
|
424
432
|
if (options & UTF8PROC_IGNORE) {
|
425
433
|
if (property->ignorable) return 0;
|
426
434
|
}
|
435
|
+
if (options & UTF8PROC_STRIPNA) {
|
436
|
+
if (!category) return 0;
|
437
|
+
}
|
427
438
|
if (options & UTF8PROC_LUMP) {
|
428
439
|
if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
|
429
440
|
if (uc == 0x2018 || uc == 0x2019 || uc == 0x02BC || uc == 0x02C8)
|
@@ -471,7 +482,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
|
|
471
482
|
int tbc = property->boundclass;
|
472
483
|
boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
|
473
484
|
if (boundary) {
|
474
|
-
if (bufsize >= 1) dst[0] =
|
485
|
+
if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
|
475
486
|
if (bufsize >= 2) dst[1] = uc;
|
476
487
|
return 2;
|
477
488
|
}
|
@@ -632,9 +643,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
|
|
632
643
|
current_property->comb_index != UINT16_MAX &&
|
633
644
|
current_property->comb_index >= 0x8000) {
|
634
645
|
int sidx = starter_property->comb_index;
|
635
|
-
int idx =
|
636
|
-
if (idx >=
|
637
|
-
idx += sidx + 2;
|
646
|
+
int idx = current_property->comb_index & 0x3FFF;
|
647
|
+
if (idx >= utf8proc_combinations[sidx] && idx <= utf8proc_combinations[sidx + 1] ) {
|
648
|
+
idx += sidx + 2 - utf8proc_combinations[sidx];
|
638
649
|
if (current_property->comb_index & 0x4000) {
|
639
650
|
composition = (utf8proc_combinations[idx] << 16) | utf8proc_combinations[idx+1];
|
640
651
|
} else
|
@@ -677,7 +688,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
|
|
677
688
|
if (options & UTF8PROC_CHARBOUND) {
|
678
689
|
for (rpos = 0; rpos < length; rpos++) {
|
679
690
|
uc = buffer[rpos];
|
680
|
-
wpos +=
|
691
|
+
wpos += charbound_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
|
681
692
|
}
|
682
693
|
} else {
|
683
694
|
for (rpos = 0; rpos < length; rpos++) {
|
@@ -753,3 +764,10 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str)
|
|
753
764
|
UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
|
754
765
|
return retval;
|
755
766
|
}
|
767
|
+
|
768
|
+
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str) {
|
769
|
+
utf8proc_uint8_t *retval;
|
770
|
+
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
|
771
|
+
UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE);
|
772
|
+
return retval;
|
773
|
+
}
|
data/src/utf8proc.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) 2018 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
3
3
|
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
4
4
|
*
|
5
5
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
@@ -28,7 +28,7 @@
|
|
28
28
|
* utf8proc is a free/open-source (MIT/expat licensed) C library
|
29
29
|
* providing Unicode normalization, case-folding, and other operations
|
30
30
|
* for strings in the UTF-8 encoding, supporting Unicode version
|
31
|
-
*
|
31
|
+
* 9.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
|
32
32
|
* for downloads and other information, or the source code on github
|
33
33
|
* (https://github.com/JuliaLang/utf8proc).
|
34
34
|
*
|
@@ -71,13 +71,13 @@
|
|
71
71
|
/** The MAJOR version number (increased when backwards API compatibility is broken). */
|
72
72
|
#define UTF8PROC_VERSION_MAJOR 2
|
73
73
|
/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
|
74
|
-
#define UTF8PROC_VERSION_MINOR
|
74
|
+
#define UTF8PROC_VERSION_MINOR 4
|
75
75
|
/** The PATCH version (increased for fixes that do not change the API). */
|
76
76
|
#define UTF8PROC_VERSION_PATCH 0
|
77
77
|
/** @} */
|
78
78
|
|
79
79
|
#include <stdlib.h>
|
80
|
-
|
80
|
+
|
81
81
|
#if defined(_MSC_VER) && _MSC_VER < 1800
|
82
82
|
// MSVC prior to 2013 lacked stdbool.h and inttypes.h
|
83
83
|
typedef signed char utf8proc_int8_t;
|
@@ -120,30 +120,26 @@ typedef bool utf8proc_bool;
|
|
120
120
|
#endif
|
121
121
|
#include <limits.h>
|
122
122
|
|
123
|
-
#ifdef
|
124
|
-
#
|
125
|
-
#
|
123
|
+
#ifdef UTF8PROC_STATIC
|
124
|
+
# define UTF8PROC_DLLEXPORT
|
125
|
+
#else
|
126
|
+
# ifdef _WIN32
|
127
|
+
# ifdef UTF8PROC_EXPORTS
|
128
|
+
# define UTF8PROC_DLLEXPORT __declspec(dllexport)
|
129
|
+
# else
|
130
|
+
# define UTF8PROC_DLLEXPORT __declspec(dllimport)
|
131
|
+
# endif
|
132
|
+
# elif __GNUC__ >= 4
|
133
|
+
# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
|
126
134
|
# else
|
127
|
-
# define UTF8PROC_DLLEXPORT
|
135
|
+
# define UTF8PROC_DLLEXPORT
|
128
136
|
# endif
|
129
|
-
#elif __GNUC__ >= 4
|
130
|
-
# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
|
131
|
-
#else
|
132
|
-
# define UTF8PROC_DLLEXPORT
|
133
137
|
#endif
|
134
138
|
|
135
139
|
#ifdef __cplusplus
|
136
140
|
extern "C" {
|
137
141
|
#endif
|
138
142
|
|
139
|
-
#ifndef SSIZE_MAX
|
140
|
-
#define SSIZE_MAX ((size_t)SIZE_MAX/2)
|
141
|
-
#endif
|
142
|
-
|
143
|
-
#ifndef UINT16_MAX
|
144
|
-
# define UINT16_MAX 65535U
|
145
|
-
#endif
|
146
|
-
|
147
143
|
/**
|
148
144
|
* Option flags used by several functions in the library.
|
149
145
|
*/
|
@@ -209,6 +205,10 @@ typedef enum {
|
|
209
205
|
* @ref UTF8PROC_DECOMPOSE
|
210
206
|
*/
|
211
207
|
UTF8PROC_STRIPMARK = (1<<13),
|
208
|
+
/**
|
209
|
+
* Strip unassigned codepoints.
|
210
|
+
*/
|
211
|
+
UTF8PROC_STRIPNA = (1<<14),
|
212
212
|
} utf8proc_option_t;
|
213
213
|
|
214
214
|
/** @name Error codes
|
@@ -374,10 +374,18 @@ typedef enum {
|
|
374
374
|
UTF8PROC_BOUNDCLASS_SPACINGMARK = 12, /**< Spacingmark */
|
375
375
|
UTF8PROC_BOUNDCLASS_PREPEND = 13, /**< Prepend */
|
376
376
|
UTF8PROC_BOUNDCLASS_ZWJ = 14, /**< Zero Width Joiner */
|
377
|
+
|
378
|
+
/* the following are no longer used in Unicode 11, but we keep
|
379
|
+
the constants here for backward compatibility */
|
377
380
|
UTF8PROC_BOUNDCLASS_E_BASE = 15, /**< Emoji Base */
|
378
381
|
UTF8PROC_BOUNDCLASS_E_MODIFIER = 16, /**< Emoji Modifier */
|
379
382
|
UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ = 17, /**< Glue_After_ZWJ */
|
380
383
|
UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */
|
384
|
+
|
385
|
+
/* the Extended_Pictographic property is used in the Unicode 11
|
386
|
+
grapheme-boundary rules, so we store it in the boundclass field */
|
387
|
+
UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC = 19,
|
388
|
+
UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */
|
381
389
|
} utf8proc_boundclass_t;
|
382
390
|
|
383
391
|
/**
|
@@ -400,6 +408,11 @@ UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256];
|
|
400
408
|
*/
|
401
409
|
UTF8PROC_DLLEXPORT const char *utf8proc_version(void);
|
402
410
|
|
411
|
+
/**
|
412
|
+
* Returns the utf8proc supported Unicode version as a string MAJOR.MINOR.PATCH.
|
413
|
+
*/
|
414
|
+
UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void);
|
415
|
+
|
403
416
|
/**
|
404
417
|
* Returns an informative error string for the given utf8proc error code
|
405
418
|
* (e.g. the error codes returned by @ref utf8proc_map).
|
@@ -465,6 +478,7 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
|
|
465
478
|
* - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster
|
466
479
|
* - @ref UTF8PROC_LUMP - lump certain different codepoints together
|
467
480
|
* - @ref UTF8PROC_STRIPMARK - remove all character marks
|
481
|
+
* - @ref UTF8PROC_STRIPNA - remove unassigned codepoints
|
468
482
|
* @param last_boundclass
|
469
483
|
* Pointer to an integer variable containing
|
470
484
|
* the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND
|
@@ -576,6 +590,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
|
|
576
590
|
* Given a pair of consecutive codepoints, return whether a grapheme break is
|
577
591
|
* permitted between them (as defined by the extended grapheme clusters in UAX#29).
|
578
592
|
*
|
593
|
+
* @param codepoint1 The first codepoint.
|
594
|
+
* @param codepoint2 The second codepoint, occurring consecutively after `codepoint1`.
|
579
595
|
* @param state Beginning with Version 29 (Unicode 9.0.0), this algorithm requires
|
580
596
|
* state to break graphemes. This state can be passed in as a pointer
|
581
597
|
* in the `state` argument and should initially be set to 0. If the
|
@@ -584,7 +600,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
|
|
584
600
|
* matching the rules in Unicode 8.0.0.
|
585
601
|
*
|
586
602
|
* @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must
|
587
|
-
* be called IN ORDER on ALL potential breaks in a string.
|
603
|
+
* be called IN ORDER on ALL potential breaks in a string. However, it
|
604
|
+
* is safe to reset the state to zero after a grapheme break.
|
588
605
|
*/
|
589
606
|
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
|
590
607
|
utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state);
|
@@ -651,7 +668,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
|
|
651
668
|
* contain NULL characters with the string if `str` contained NULL
|
652
669
|
* characters). Other flags in the `options` field are passed to the
|
653
670
|
* functions defined above, and regarded as described. See also
|
654
|
-
* @ref
|
671
|
+
* @ref utf8proc_map_custom to supply a custom codepoint transformation.
|
655
672
|
*
|
656
673
|
* In case of success the length of the new string is returned,
|
657
674
|
* otherwise a negative error code is returned.
|
@@ -676,8 +693,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
|
|
676
693
|
|
677
694
|
/** @name Unicode normalization
|
678
695
|
*
|
679
|
-
* Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or
|
680
|
-
* normalized version of the null-terminated string `str`. These
|
696
|
+
* Returns a pointer to newly allocated memory of a NFD, NFC, NFKD, NFKC or
|
697
|
+
* NFKC_Casefold normalized version of the null-terminated string `str`. These
|
681
698
|
* are shortcuts to calling @ref utf8proc_map with @ref UTF8PROC_NULLTERM
|
682
699
|
* combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
|
683
700
|
*/
|
@@ -690,6 +707,11 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
|
|
690
707
|
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
|
691
708
|
/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
|
692
709
|
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
|
710
|
+
/**
|
711
|
+
* NFKC_Casefold normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT
|
712
|
+
* and @ref UTF8PROC_CASEFOLD and @ref UTF8PROC_IGNORE).
|
713
|
+
**/
|
714
|
+
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str);
|
693
715
|
/** @} */
|
694
716
|
|
695
717
|
#ifdef __cplusplus
|