gosu 0.14.5 → 0.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +1 -0
- data/COPYING +1 -1
- data/Gosu/Channel.h +25 -0
- data/Gosu/Color.h +38 -0
- data/Gosu/Font.h +36 -0
- data/Gosu/Gosu.h +79 -0
- data/Gosu/Image.h +54 -0
- data/Gosu/Sample.h +19 -0
- data/Gosu/Song.h +24 -0
- data/Gosu/TextInput.h +30 -0
- data/Gosu/Version.hpp +2 -2
- data/Gosu/Window.h +61 -0
- data/Gosu/Window.hpp +3 -2
- data/README.md +1 -1
- data/ext/gosu/extconf.rb +3 -0
- data/lib/gosu/compat.rb +12 -7
- data/lib/gosu/patches.rb +8 -2
- data/lib/gosu/swig_patches.rb +20 -9
- data/rdoc/gosu.rb +28 -7
- data/src/ChannelWrapper.cpp +50 -0
- data/src/ColorWrapper.cpp +126 -0
- data/src/Constants.cpp +287 -0
- data/src/Font.cpp +1 -0
- data/src/FontWrapper.cpp +74 -0
- data/src/GosuWrapper.cpp +232 -0
- data/src/Graphics.cpp +4 -1
- data/src/GraphicsImpl.hpp +0 -1
- data/src/ImageWrapper.cpp +168 -0
- data/src/LargeImageData.cpp +1 -0
- data/src/MarkupParser.cpp +11 -3
- data/src/RubyGosu.cxx +185 -121
- data/src/RubyGosu.h +2 -2
- data/src/SampleWrapper.cpp +30 -0
- data/src/SongWrapper.cpp +52 -0
- data/src/TexChunk.cpp +29 -19
- data/src/Text.cpp +2 -0
- data/src/TextBuilder.cpp +3 -3
- data/src/TextInputWrapper.cpp +101 -0
- data/src/TrueTypeFont.cpp +1 -0
- data/src/Window.cpp +62 -28
- data/src/WindowUIKit.cpp +8 -4
- data/src/WindowWrapper.cpp +289 -0
- data/src/stb_image.h +153 -56
- data/src/stb_image_write.h +111 -60
- data/src/stb_truetype.h +74 -39
- data/src/stb_vorbis.c +55 -15
- data/src/utf8proc.c +47 -29
- data/src/utf8proc.h +46 -24
- data/src/utf8proc_data.h +10043 -9609
- metadata +23 -4
data/src/stb_vorbis.c
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
// Ogg Vorbis audio decoder - v1.
|
1
|
+
// Ogg Vorbis audio decoder - v1.17 - public domain
|
2
2
|
// http://nothings.org/stb_vorbis/
|
3
3
|
//
|
4
4
|
// Original version written by Sean Barrett in 2007.
|
@@ -30,9 +30,12 @@
|
|
30
30
|
// Tom Beaumont Ingo Leitgeb Nicolas Guillemot
|
31
31
|
// Phillip Bennefall Rohit Thiago Goulart
|
32
32
|
// manxorist@github saga musix github:infatum
|
33
|
-
// Timur Gagiev
|
33
|
+
// Timur Gagiev Maxwell Koo
|
34
34
|
//
|
35
35
|
// Partial history:
|
36
|
+
// 1.17 - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
|
37
|
+
// 1.16 - 2019-03-04 - fix warnings
|
38
|
+
// 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
|
36
39
|
// 1.14 - 2018-02-11 - delete bogus dealloca usage
|
37
40
|
// 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
|
38
41
|
// 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
|
@@ -253,7 +256,7 @@ extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
|
|
253
256
|
// create an ogg vorbis decoder from an open FILE *, looking for a stream at
|
254
257
|
// the _current_ seek point (ftell). on failure, returns NULL and sets *error.
|
255
258
|
// note that stb_vorbis must "own" this stream; if you seek it in between
|
256
|
-
// calls to stb_vorbis, it will become confused.
|
259
|
+
// calls to stb_vorbis, it will become confused. Moreover, if you attempt to
|
257
260
|
// perform stb_vorbis_seek_*() operations on this file, it will assume it
|
258
261
|
// owns the _entire_ rest of the file after the start point. Use the next
|
259
262
|
// function, stb_vorbis_open_file_section(), to limit it.
|
@@ -374,7 +377,8 @@ enum STBVorbisError
|
|
374
377
|
VORBIS_invalid_first_page,
|
375
378
|
VORBIS_bad_packet_type,
|
376
379
|
VORBIS_cant_find_last_page,
|
377
|
-
VORBIS_seek_failed
|
380
|
+
VORBIS_seek_failed,
|
381
|
+
VORBIS_ogg_skeleton_not_supported
|
378
382
|
};
|
379
383
|
|
380
384
|
|
@@ -1073,7 +1077,7 @@ static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
|
|
1073
1077
|
assert(z >= 0 && z < 32);
|
1074
1078
|
available[z] = 0;
|
1075
1079
|
add_entry(c, bit_reverse(res), i, m++, len[i], values);
|
1076
|
-
//
|
1080
|
+
// propagate availability up the tree
|
1077
1081
|
if (z != len[i]) {
|
1078
1082
|
assert(len[i] >= 0 && len[i] < 32);
|
1079
1083
|
for (y=len[i]; y > z; --y) {
|
@@ -1199,8 +1203,10 @@ static int lookup1_values(int entries, int dim)
|
|
1199
1203
|
int r = (int) floor(exp((float) log((float) entries) / dim));
|
1200
1204
|
if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
|
1201
1205
|
++r; // floor() to avoid _ftol() when non-CRT
|
1202
|
-
|
1203
|
-
|
1206
|
+
if (pow((float) r+1, dim) <= entries)
|
1207
|
+
return -1;
|
1208
|
+
if ((int) floor(pow((float) r, dim)) > entries)
|
1209
|
+
return -1;
|
1204
1210
|
return r;
|
1205
1211
|
}
|
1206
1212
|
|
@@ -2010,7 +2016,7 @@ static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y
|
|
2010
2016
|
ady -= abs(base) * adx;
|
2011
2017
|
if (x1 > n) x1 = n;
|
2012
2018
|
if (x < x1) {
|
2013
|
-
LINE_OP(output[x], inverse_db_table[y]);
|
2019
|
+
LINE_OP(output[x], inverse_db_table[y&255]);
|
2014
2020
|
for (++x; x < x1; ++x) {
|
2015
2021
|
err += ady;
|
2016
2022
|
if (err >= adx) {
|
@@ -2018,7 +2024,7 @@ static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y
|
|
2018
2024
|
y += sy;
|
2019
2025
|
} else
|
2020
2026
|
y += base;
|
2021
|
-
LINE_OP(output[x], inverse_db_table[y]);
|
2027
|
+
LINE_OP(output[x], inverse_db_table[y&255]);
|
2022
2028
|
}
|
2023
2029
|
}
|
2024
2030
|
}
|
@@ -2637,7 +2643,7 @@ static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
|
|
2637
2643
|
// once I combined the passes.
|
2638
2644
|
|
2639
2645
|
// so there's a missing 'times 2' here (for adding X to itself).
|
2640
|
-
// this
|
2646
|
+
// this propagates through linearly to the end, where the numbers
|
2641
2647
|
// are 1/2 too small, and need to be compensated for.
|
2642
2648
|
|
2643
2649
|
{
|
@@ -3045,7 +3051,6 @@ static float *get_window(vorb *f, int len)
|
|
3045
3051
|
len <<= 1;
|
3046
3052
|
if (len == f->blocksize_0) return f->window[0];
|
3047
3053
|
if (len == f->blocksize_1) return f->window[1];
|
3048
|
-
assert(0);
|
3049
3054
|
return NULL;
|
3050
3055
|
}
|
3051
3056
|
|
@@ -3451,6 +3456,7 @@ static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
|
|
3451
3456
|
if (f->previous_length) {
|
3452
3457
|
int i,j, n = f->previous_length;
|
3453
3458
|
float *w = get_window(f, n);
|
3459
|
+
if (w == NULL) return 0;
|
3454
3460
|
for (i=0; i < f->channels; ++i) {
|
3455
3461
|
for (j=0; j < n; ++j)
|
3456
3462
|
f->channel_buffers[i][left+j] =
|
@@ -3578,7 +3584,22 @@ static int start_decoder(vorb *f)
|
|
3578
3584
|
if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
|
3579
3585
|
// check for expected packet length
|
3580
3586
|
if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
|
3581
|
-
if (f->segments[0] != 30)
|
3587
|
+
if (f->segments[0] != 30) {
|
3588
|
+
// check for the Ogg skeleton fishead identifying header to refine our error
|
3589
|
+
if (f->segments[0] == 64 &&
|
3590
|
+
getn(f, header, 6) &&
|
3591
|
+
header[0] == 'f' &&
|
3592
|
+
header[1] == 'i' &&
|
3593
|
+
header[2] == 's' &&
|
3594
|
+
header[3] == 'h' &&
|
3595
|
+
header[4] == 'e' &&
|
3596
|
+
header[5] == 'a' &&
|
3597
|
+
get8(f) == 'd' &&
|
3598
|
+
get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported);
|
3599
|
+
else
|
3600
|
+
return error(f, VORBIS_invalid_first_page);
|
3601
|
+
}
|
3602
|
+
|
3582
3603
|
// read packet
|
3583
3604
|
// check packet header
|
3584
3605
|
if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
|
@@ -3677,6 +3698,7 @@ static int start_decoder(vorb *f)
|
|
3677
3698
|
while (current_entry < c->entries) {
|
3678
3699
|
int limit = c->entries - current_entry;
|
3679
3700
|
int n = get_bits(f, ilog(limit));
|
3701
|
+
if (current_length >= 32) return error(f, VORBIS_invalid_setup);
|
3680
3702
|
if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
|
3681
3703
|
memset(lengths + current_entry, current_length, n);
|
3682
3704
|
current_entry += n;
|
@@ -3780,7 +3802,9 @@ static int start_decoder(vorb *f)
|
|
3780
3802
|
c->value_bits = get_bits(f, 4)+1;
|
3781
3803
|
c->sequence_p = get_bits(f,1);
|
3782
3804
|
if (c->lookup_type == 1) {
|
3783
|
-
|
3805
|
+
int values = lookup1_values(c->entries, c->dimensions);
|
3806
|
+
if (values < 0) return error(f, VORBIS_invalid_setup);
|
3807
|
+
c->lookup_values = (uint32) values;
|
3784
3808
|
} else {
|
3785
3809
|
c->lookup_values = c->entries * c->dimensions;
|
3786
3810
|
}
|
@@ -3916,6 +3940,9 @@ static int start_decoder(vorb *f)
|
|
3916
3940
|
p[j].id = j;
|
3917
3941
|
}
|
3918
3942
|
qsort(p, g->values, sizeof(p[0]), point_compare);
|
3943
|
+
for (j=0; j < g->values-1; ++j)
|
3944
|
+
if (p[j].x == p[j+1].x)
|
3945
|
+
return error(f, VORBIS_invalid_setup);
|
3919
3946
|
for (j=0; j < g->values; ++j)
|
3920
3947
|
g->sorted_order[j] = (uint8) p[j].id;
|
3921
3948
|
// precompute the neighbors
|
@@ -4002,6 +4029,7 @@ static int start_decoder(vorb *f)
|
|
4002
4029
|
max_submaps = m->submaps;
|
4003
4030
|
if (get_bits(f,1)) {
|
4004
4031
|
m->coupling_steps = get_bits(f,8)+1;
|
4032
|
+
if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
|
4005
4033
|
for (k=0; k < m->coupling_steps; ++k) {
|
4006
4034
|
m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
|
4007
4035
|
m->chan[k].angle = get_bits(f, ilog(f->channels-1));
|
@@ -4566,7 +4594,7 @@ static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
|
|
4566
4594
|
return 1;
|
4567
4595
|
}
|
4568
4596
|
|
4569
|
-
// rarely used function to seek back to the
|
4597
|
+
// rarely used function to seek back to the preceding page while finding the
|
4570
4598
|
// start of a packet
|
4571
4599
|
static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
|
4572
4600
|
{
|
@@ -4973,7 +5001,13 @@ stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, con
|
|
4973
5001
|
|
4974
5002
|
stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
|
4975
5003
|
{
|
4976
|
-
FILE *f
|
5004
|
+
FILE *f;
|
5005
|
+
#if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
|
5006
|
+
if (0 != fopen_s(&f, filename, "rb"))
|
5007
|
+
f = NULL;
|
5008
|
+
#else
|
5009
|
+
f = fopen(filename, "rb");
|
5010
|
+
#endif
|
4977
5011
|
if (f)
|
4978
5012
|
return stb_vorbis_open_file(f, TRUE, error, alloc);
|
4979
5013
|
if (error) *error = VORBIS_file_open_failure;
|
@@ -5362,6 +5396,12 @@ int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, in
|
|
5362
5396
|
#endif // STB_VORBIS_NO_PULLDATA_API
|
5363
5397
|
|
5364
5398
|
/* Version history
|
5399
|
+
1.17 - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
|
5400
|
+
found with Mayhem by ForAllSecure
|
5401
|
+
1.16 - 2019-03-04 - fix warnings
|
5402
|
+
1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
|
5403
|
+
1.14 - 2018-02-11 - delete bogus dealloca usage
|
5404
|
+
1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
|
5365
5405
|
1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
|
5366
5406
|
1.11 - 2017-07-23 - fix MinGW compilation
|
5367
5407
|
1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
|
data/src/utf8proc.c
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
|
2
2
|
/*
|
3
|
-
* Copyright (c)
|
3
|
+
* Copyright (c) 2018 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
4
4
|
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
5
5
|
*
|
6
6
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
@@ -42,6 +42,14 @@
|
|
42
42
|
|
43
43
|
|
44
44
|
#include "utf8proc.h"
|
45
|
+
|
46
|
+
#ifndef SSIZE_MAX
|
47
|
+
#define SSIZE_MAX ((size_t)SIZE_MAX/2)
|
48
|
+
#endif
|
49
|
+
#ifndef UINT16_MAX
|
50
|
+
# define UINT16_MAX 65535U
|
51
|
+
#endif
|
52
|
+
|
45
53
|
#include "utf8proc_data.h"
|
46
54
|
|
47
55
|
|
@@ -92,6 +100,10 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
|
|
92
100
|
return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
|
93
101
|
}
|
94
102
|
|
103
|
+
UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
|
104
|
+
return "12.1.0";
|
105
|
+
}
|
106
|
+
|
95
107
|
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
96
108
|
switch (errcode) {
|
97
109
|
case UTF8PROC_ERROR_NOMEM:
|
@@ -188,9 +200,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, ut
|
|
188
200
|
} else return 0;
|
189
201
|
}
|
190
202
|
|
191
|
-
/* internal
|
192
|
-
static utf8proc_ssize_t
|
203
|
+
/* internal version used for inserting 0xff bytes between graphemes */
|
204
|
+
static utf8proc_ssize_t charbound_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
|
193
205
|
if (uc < 0x00) {
|
206
|
+
if (uc == -1) { /* internal value used for grapheme breaks */
|
207
|
+
dst[0] = (utf8proc_uint8_t)0xFF;
|
208
|
+
return 1;
|
209
|
+
}
|
194
210
|
return 0;
|
195
211
|
} else if (uc < 0x80) {
|
196
212
|
dst[0] = (utf8proc_uint8_t)uc;
|
@@ -199,12 +215,6 @@ static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t
|
|
199
215
|
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
|
200
216
|
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
201
217
|
return 2;
|
202
|
-
} else if (uc == 0xFFFF) {
|
203
|
-
dst[0] = (utf8proc_uint8_t)0xFF;
|
204
|
-
return 1;
|
205
|
-
} else if (uc == 0xFFFE) {
|
206
|
-
dst[0] = (utf8proc_uint8_t)0xFE;
|
207
|
-
return 1;
|
208
218
|
} else if (uc < 0x10000) {
|
209
219
|
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
|
210
220
|
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
@@ -271,12 +281,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
|
|
271
281
|
tbc == UTF8PROC_BOUNDCLASS_ZWJ || // ---
|
272
282
|
tbc == UTF8PROC_BOUNDCLASS_SPACINGMARK || // GB9a
|
273
283
|
lbc == UTF8PROC_BOUNDCLASS_PREPEND) ? false : // GB9b
|
274
|
-
(
|
275
|
-
|
276
|
-
tbc == UTF8PROC_BOUNDCLASS_E_MODIFIER) ? false : // ----
|
277
|
-
(lbc == UTF8PROC_BOUNDCLASS_ZWJ && // GB11
|
278
|
-
(tbc == UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ || // ----
|
279
|
-
tbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ)) ? false : // ----
|
284
|
+
(lbc == UTF8PROC_BOUNDCLASS_E_ZWG && // GB11 (requires additional handling below)
|
285
|
+
tbc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) ? false : // ----
|
280
286
|
(lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR && // GB12/13 (requires additional handling below)
|
281
287
|
tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false : // ----
|
282
288
|
true; // GB999
|
@@ -284,9 +290,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
|
|
284
290
|
|
285
291
|
static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
|
286
292
|
{
|
287
|
-
int lbc_override =
|
288
|
-
|
289
|
-
lbc_override = *state;
|
293
|
+
int lbc_override = ((state && *state != UTF8PROC_BOUNDCLASS_START)
|
294
|
+
? *state : lbc);
|
290
295
|
utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
|
291
296
|
if (state) {
|
292
297
|
// Special support for GB 12/13 made possible by GB999. After two RI
|
@@ -296,12 +301,15 @@ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t
|
|
296
301
|
// forbidden by a different rule such as GB9).
|
297
302
|
if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
|
298
303
|
*state = UTF8PROC_BOUNDCLASS_OTHER;
|
299
|
-
// Special support for
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
304
|
+
// Special support for GB11 (emoji extend* zwj / emoji)
|
305
|
+
else if (*state == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
|
306
|
+
if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
|
307
|
+
*state = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
|
308
|
+
else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
|
309
|
+
*state = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
|
310
|
+
else
|
311
|
+
*state = tbc;
|
312
|
+
}
|
305
313
|
else
|
306
314
|
*state = tbc;
|
307
315
|
}
|
@@ -424,6 +432,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
|
|
424
432
|
if (options & UTF8PROC_IGNORE) {
|
425
433
|
if (property->ignorable) return 0;
|
426
434
|
}
|
435
|
+
if (options & UTF8PROC_STRIPNA) {
|
436
|
+
if (!category) return 0;
|
437
|
+
}
|
427
438
|
if (options & UTF8PROC_LUMP) {
|
428
439
|
if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
|
429
440
|
if (uc == 0x2018 || uc == 0x2019 || uc == 0x02BC || uc == 0x02C8)
|
@@ -471,7 +482,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
|
|
471
482
|
int tbc = property->boundclass;
|
472
483
|
boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
|
473
484
|
if (boundary) {
|
474
|
-
if (bufsize >= 1) dst[0] =
|
485
|
+
if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
|
475
486
|
if (bufsize >= 2) dst[1] = uc;
|
476
487
|
return 2;
|
477
488
|
}
|
@@ -632,9 +643,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
|
|
632
643
|
current_property->comb_index != UINT16_MAX &&
|
633
644
|
current_property->comb_index >= 0x8000) {
|
634
645
|
int sidx = starter_property->comb_index;
|
635
|
-
int idx =
|
636
|
-
if (idx >=
|
637
|
-
idx += sidx + 2;
|
646
|
+
int idx = current_property->comb_index & 0x3FFF;
|
647
|
+
if (idx >= utf8proc_combinations[sidx] && idx <= utf8proc_combinations[sidx + 1] ) {
|
648
|
+
idx += sidx + 2 - utf8proc_combinations[sidx];
|
638
649
|
if (current_property->comb_index & 0x4000) {
|
639
650
|
composition = (utf8proc_combinations[idx] << 16) | utf8proc_combinations[idx+1];
|
640
651
|
} else
|
@@ -677,7 +688,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
|
|
677
688
|
if (options & UTF8PROC_CHARBOUND) {
|
678
689
|
for (rpos = 0; rpos < length; rpos++) {
|
679
690
|
uc = buffer[rpos];
|
680
|
-
wpos +=
|
691
|
+
wpos += charbound_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
|
681
692
|
}
|
682
693
|
} else {
|
683
694
|
for (rpos = 0; rpos < length; rpos++) {
|
@@ -753,3 +764,10 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str)
|
|
753
764
|
UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
|
754
765
|
return retval;
|
755
766
|
}
|
767
|
+
|
768
|
+
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str) {
|
769
|
+
utf8proc_uint8_t *retval;
|
770
|
+
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
|
771
|
+
UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE);
|
772
|
+
return retval;
|
773
|
+
}
|
data/src/utf8proc.h
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) 2018 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
3
3
|
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
4
4
|
*
|
5
5
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
@@ -28,7 +28,7 @@
|
|
28
28
|
* utf8proc is a free/open-source (MIT/expat licensed) C library
|
29
29
|
* providing Unicode normalization, case-folding, and other operations
|
30
30
|
* for strings in the UTF-8 encoding, supporting Unicode version
|
31
|
-
*
|
31
|
+
* 9.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
|
32
32
|
* for downloads and other information, or the source code on github
|
33
33
|
* (https://github.com/JuliaLang/utf8proc).
|
34
34
|
*
|
@@ -71,13 +71,13 @@
|
|
71
71
|
/** The MAJOR version number (increased when backwards API compatibility is broken). */
|
72
72
|
#define UTF8PROC_VERSION_MAJOR 2
|
73
73
|
/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
|
74
|
-
#define UTF8PROC_VERSION_MINOR
|
74
|
+
#define UTF8PROC_VERSION_MINOR 4
|
75
75
|
/** The PATCH version (increased for fixes that do not change the API). */
|
76
76
|
#define UTF8PROC_VERSION_PATCH 0
|
77
77
|
/** @} */
|
78
78
|
|
79
79
|
#include <stdlib.h>
|
80
|
-
|
80
|
+
|
81
81
|
#if defined(_MSC_VER) && _MSC_VER < 1800
|
82
82
|
// MSVC prior to 2013 lacked stdbool.h and inttypes.h
|
83
83
|
typedef signed char utf8proc_int8_t;
|
@@ -120,30 +120,26 @@ typedef bool utf8proc_bool;
|
|
120
120
|
#endif
|
121
121
|
#include <limits.h>
|
122
122
|
|
123
|
-
#ifdef
|
124
|
-
#
|
125
|
-
#
|
123
|
+
#ifdef UTF8PROC_STATIC
|
124
|
+
# define UTF8PROC_DLLEXPORT
|
125
|
+
#else
|
126
|
+
# ifdef _WIN32
|
127
|
+
# ifdef UTF8PROC_EXPORTS
|
128
|
+
# define UTF8PROC_DLLEXPORT __declspec(dllexport)
|
129
|
+
# else
|
130
|
+
# define UTF8PROC_DLLEXPORT __declspec(dllimport)
|
131
|
+
# endif
|
132
|
+
# elif __GNUC__ >= 4
|
133
|
+
# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
|
126
134
|
# else
|
127
|
-
# define UTF8PROC_DLLEXPORT
|
135
|
+
# define UTF8PROC_DLLEXPORT
|
128
136
|
# endif
|
129
|
-
#elif __GNUC__ >= 4
|
130
|
-
# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
|
131
|
-
#else
|
132
|
-
# define UTF8PROC_DLLEXPORT
|
133
137
|
#endif
|
134
138
|
|
135
139
|
#ifdef __cplusplus
|
136
140
|
extern "C" {
|
137
141
|
#endif
|
138
142
|
|
139
|
-
#ifndef SSIZE_MAX
|
140
|
-
#define SSIZE_MAX ((size_t)SIZE_MAX/2)
|
141
|
-
#endif
|
142
|
-
|
143
|
-
#ifndef UINT16_MAX
|
144
|
-
# define UINT16_MAX 65535U
|
145
|
-
#endif
|
146
|
-
|
147
143
|
/**
|
148
144
|
* Option flags used by several functions in the library.
|
149
145
|
*/
|
@@ -209,6 +205,10 @@ typedef enum {
|
|
209
205
|
* @ref UTF8PROC_DECOMPOSE
|
210
206
|
*/
|
211
207
|
UTF8PROC_STRIPMARK = (1<<13),
|
208
|
+
/**
|
209
|
+
* Strip unassigned codepoints.
|
210
|
+
*/
|
211
|
+
UTF8PROC_STRIPNA = (1<<14),
|
212
212
|
} utf8proc_option_t;
|
213
213
|
|
214
214
|
/** @name Error codes
|
@@ -374,10 +374,18 @@ typedef enum {
|
|
374
374
|
UTF8PROC_BOUNDCLASS_SPACINGMARK = 12, /**< Spacingmark */
|
375
375
|
UTF8PROC_BOUNDCLASS_PREPEND = 13, /**< Prepend */
|
376
376
|
UTF8PROC_BOUNDCLASS_ZWJ = 14, /**< Zero Width Joiner */
|
377
|
+
|
378
|
+
/* the following are no longer used in Unicode 11, but we keep
|
379
|
+
the constants here for backward compatibility */
|
377
380
|
UTF8PROC_BOUNDCLASS_E_BASE = 15, /**< Emoji Base */
|
378
381
|
UTF8PROC_BOUNDCLASS_E_MODIFIER = 16, /**< Emoji Modifier */
|
379
382
|
UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ = 17, /**< Glue_After_ZWJ */
|
380
383
|
UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */
|
384
|
+
|
385
|
+
/* the Extended_Pictographic property is used in the Unicode 11
|
386
|
+
grapheme-boundary rules, so we store it in the boundclass field */
|
387
|
+
UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC = 19,
|
388
|
+
UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */
|
381
389
|
} utf8proc_boundclass_t;
|
382
390
|
|
383
391
|
/**
|
@@ -400,6 +408,11 @@ UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256];
|
|
400
408
|
*/
|
401
409
|
UTF8PROC_DLLEXPORT const char *utf8proc_version(void);
|
402
410
|
|
411
|
+
/**
|
412
|
+
* Returns the utf8proc supported Unicode version as a string MAJOR.MINOR.PATCH.
|
413
|
+
*/
|
414
|
+
UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void);
|
415
|
+
|
403
416
|
/**
|
404
417
|
* Returns an informative error string for the given utf8proc error code
|
405
418
|
* (e.g. the error codes returned by @ref utf8proc_map).
|
@@ -465,6 +478,7 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
|
|
465
478
|
* - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster
|
466
479
|
* - @ref UTF8PROC_LUMP - lump certain different codepoints together
|
467
480
|
* - @ref UTF8PROC_STRIPMARK - remove all character marks
|
481
|
+
* - @ref UTF8PROC_STRIPNA - remove unassigned codepoints
|
468
482
|
* @param last_boundclass
|
469
483
|
* Pointer to an integer variable containing
|
470
484
|
* the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND
|
@@ -576,6 +590,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
|
|
576
590
|
* Given a pair of consecutive codepoints, return whether a grapheme break is
|
577
591
|
* permitted between them (as defined by the extended grapheme clusters in UAX#29).
|
578
592
|
*
|
593
|
+
* @param codepoint1 The first codepoint.
|
594
|
+
* @param codepoint2 The second codepoint, occurring consecutively after `codepoint1`.
|
579
595
|
* @param state Beginning with Version 29 (Unicode 9.0.0), this algorithm requires
|
580
596
|
* state to break graphemes. This state can be passed in as a pointer
|
581
597
|
* in the `state` argument and should initially be set to 0. If the
|
@@ -584,7 +600,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
|
|
584
600
|
* matching the rules in Unicode 8.0.0.
|
585
601
|
*
|
586
602
|
* @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must
|
587
|
-
* be called IN ORDER on ALL potential breaks in a string.
|
603
|
+
* be called IN ORDER on ALL potential breaks in a string. However, it
|
604
|
+
* is safe to reset the state to zero after a grapheme break.
|
588
605
|
*/
|
589
606
|
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
|
590
607
|
utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state);
|
@@ -651,7 +668,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
|
|
651
668
|
* contain NULL characters with the string if `str` contained NULL
|
652
669
|
* characters). Other flags in the `options` field are passed to the
|
653
670
|
* functions defined above, and regarded as described. See also
|
654
|
-
* @ref
|
671
|
+
* @ref utf8proc_map_custom to supply a custom codepoint transformation.
|
655
672
|
*
|
656
673
|
* In case of success the length of the new string is returned,
|
657
674
|
* otherwise a negative error code is returned.
|
@@ -676,8 +693,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
|
|
676
693
|
|
677
694
|
/** @name Unicode normalization
|
678
695
|
*
|
679
|
-
* Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or
|
680
|
-
* normalized version of the null-terminated string `str`. These
|
696
|
+
* Returns a pointer to newly allocated memory of a NFD, NFC, NFKD, NFKC or
|
697
|
+
* NFKC_Casefold normalized version of the null-terminated string `str`. These
|
681
698
|
* are shortcuts to calling @ref utf8proc_map with @ref UTF8PROC_NULLTERM
|
682
699
|
* combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
|
683
700
|
*/
|
@@ -690,6 +707,11 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
|
|
690
707
|
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
|
691
708
|
/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
|
692
709
|
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
|
710
|
+
/**
|
711
|
+
* NFKC_Casefold normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT
|
712
|
+
* and @ref UTF8PROC_CASEFOLD and @ref UTF8PROC_IGNORE).
|
713
|
+
**/
|
714
|
+
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str);
|
693
715
|
/** @} */
|
694
716
|
|
695
717
|
#ifdef __cplusplus
|