meerk40t 0.9.7900__py2.py3-none-any.whl → 0.9.7930__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerk40t/balormk/controller.py +46 -13
- meerk40t/balormk/livelightjob.py +34 -7
- meerk40t/core/cutcode/plotcut.py +2 -1
- meerk40t/core/elements/branches.py +35 -14
- meerk40t/core/elements/clipboard.py +10 -12
- meerk40t/core/elements/elements.py +23 -0
- meerk40t/core/elements/geometry.py +48 -14
- meerk40t/core/elements/grid.py +56 -24
- meerk40t/core/elements/offset_clpr.py +2 -4
- meerk40t/core/elements/placements.py +17 -22
- meerk40t/core/elements/render.py +30 -11
- meerk40t/core/elements/shapes.py +205 -125
- meerk40t/core/spoolers.py +1 -1
- meerk40t/core/units.py +4 -0
- meerk40t/grbl/emulator.py +10 -8
- meerk40t/grbl/gcodejob.py +11 -3
- meerk40t/grbl/plugin.py +10 -1
- meerk40t/gui/help_assets/help_assets.py +126 -2
- meerk40t/gui/navigationpanels.py +9 -1
- meerk40t/gui/wxmeerk40t.py +45 -17
- meerk40t/gui/wxmmain.py +7 -0
- meerk40t/lihuiyu/driver.py +6 -6
- meerk40t/main.py +2 -2
- meerk40t/ruida/emulator.py +12 -9
- meerk40t/ruida/plugin.py +5 -0
- meerk40t/ruida/rdjob.py +5 -5
- meerk40t/tools/geomstr.py +89 -1
- meerk40t/tools/ttfparser.py +793 -113
- {meerk40t-0.9.7900.dist-info → meerk40t-0.9.7930.dist-info}/METADATA +1 -1
- {meerk40t-0.9.7900.dist-info → meerk40t-0.9.7930.dist-info}/RECORD +35 -35
- {meerk40t-0.9.7900.dist-info → meerk40t-0.9.7930.dist-info}/LICENSE +0 -0
- {meerk40t-0.9.7900.dist-info → meerk40t-0.9.7930.dist-info}/WHEEL +0 -0
- {meerk40t-0.9.7900.dist-info → meerk40t-0.9.7930.dist-info}/entry_points.txt +0 -0
- {meerk40t-0.9.7900.dist-info → meerk40t-0.9.7930.dist-info}/top_level.txt +0 -0
- {meerk40t-0.9.7900.dist-info → meerk40t-0.9.7930.dist-info}/zip-safe +0 -0
meerk40t/tools/ttfparser.py
CHANGED
@@ -14,32 +14,25 @@ WE_HAVE_INSTRUCTIONS = 1 << 8
|
|
14
14
|
USE_MY_METRICS = 1 << 9
|
15
15
|
OVERLAP_COMPOUND = 1 << 10
|
16
16
|
|
17
|
+
_FLAG_NAMES = {
|
18
|
+
ON_CURVE_POINT: "ON_CURVE_POINT",
|
19
|
+
ARG_1_AND_2_ARE_WORDS: "ARG_1_AND_2_ARE_WORDS",
|
20
|
+
ARGS_ARE_XY_VALUES: "ARGS_ARE_XY_VALUES",
|
21
|
+
ROUND_XY_TO_GRID: "ROUND_XY_TO_GRID",
|
22
|
+
WE_HAVE_A_SCALE: "WE_HAVE_A_SCALE",
|
23
|
+
MORE_COMPONENTS: "MORE_COMPONENTS",
|
24
|
+
WE_HAVE_AN_X_AND_Y_SCALE: "WE_HAVE_AN_X_AND_Y_SCALE",
|
25
|
+
WE_HAVE_A_TWO_BY_TWO: "WE_HAVE_A_TWO_BY_TWO",
|
26
|
+
WE_HAVE_INSTRUCTIONS: "WE_HAVE_INSTRUCTIONS",
|
27
|
+
USE_MY_METRICS: "USE_MY_METRICS",
|
28
|
+
OVERLAP_COMPOUND: "OVERLAP_COMPOUND",
|
29
|
+
}
|
30
|
+
|
17
31
|
|
18
32
|
def flagname(flag):
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
return "ARG_1_AND_2_ARE_WORDS"
|
23
|
-
elif flag & ARGS_ARE_XY_VALUES:
|
24
|
-
return "ARGS_ARE_XY_VALUES"
|
25
|
-
elif flag & ROUND_XY_TO_GRID:
|
26
|
-
return "ROUND_XY_TO_GRID"
|
27
|
-
elif flag & WE_HAVE_A_SCALE:
|
28
|
-
return "WE_HAVE_A_SCALE"
|
29
|
-
elif flag & MORE_COMPONENTS:
|
30
|
-
return "MORE_COMPONENTS"
|
31
|
-
elif flag & WE_HAVE_AN_X_AND_Y_SCALE:
|
32
|
-
return "WE_HAVE_AN_X_AND_Y_SCALE"
|
33
|
-
elif flag & WE_HAVE_A_TWO_BY_TWO:
|
34
|
-
return "WE_HAVE_A_TWO_BY_TWO"
|
35
|
-
elif flag & WE_HAVE_INSTRUCTIONS:
|
36
|
-
return "WE_HAVE_INSTRUCTIONS"
|
37
|
-
elif flag & USE_MY_METRICS:
|
38
|
-
return "USE_MY_METRICS"
|
39
|
-
elif flag & OVERLAP_COMPOUND:
|
40
|
-
return "OVERLAP_COMPOUND"
|
41
|
-
else:
|
42
|
-
return f"UNKNOWN_FLAG_{flag}"
|
33
|
+
"""Return all active flag names for the given flag value."""
|
34
|
+
names = [name for bit, name in _FLAG_NAMES.items() if flag & bit]
|
35
|
+
return " | ".join(names) if names else f"UNKNOWN_FLAG_{flag}"
|
43
36
|
|
44
37
|
|
45
38
|
class TTFParsingError(ValueError):
|
@@ -54,7 +47,7 @@ class TrueTypeFont:
|
|
54
47
|
self.checksum_adjust = None
|
55
48
|
self.magic_number = None
|
56
49
|
self.flags = None
|
57
|
-
self.units_per_em =
|
50
|
+
self.units_per_em = 1000 # Default value, will be overwritten during parsing
|
58
51
|
self.created = None
|
59
52
|
self.modified = None
|
60
53
|
self.active = True
|
@@ -78,23 +71,29 @@ class TrueTypeFont:
|
|
78
71
|
self.caret_slope_run = None
|
79
72
|
self.caret_offset = None
|
80
73
|
self.metric_data_format = None
|
81
|
-
self.number_of_long_hor_metrics =
|
74
|
+
self.number_of_long_hor_metrics = (
|
75
|
+
0 # Default value, will be overwritten during parsing
|
76
|
+
)
|
82
77
|
|
83
78
|
self.font_family = None
|
84
79
|
self.font_subfamily = None
|
85
80
|
self.font_name = None
|
86
81
|
self._character_map = {}
|
87
|
-
self.
|
88
|
-
self.
|
82
|
+
self._variation_sequences = {} # Unicode variation sequences mapping
|
83
|
+
self._glyph_offsets = []
|
84
|
+
self.horizontal_metrics = []
|
89
85
|
|
90
86
|
self.is_okay = False
|
87
|
+
self.cmap_version = -1
|
91
88
|
self.parse_ttf(filename, require_checksum=require_checksum)
|
92
89
|
if (
|
93
90
|
b"CFF " in self._raw_tables
|
94
91
|
and b"glyf" not in self._raw_tables
|
95
92
|
and b"loca" not in self._raw_tables
|
96
93
|
):
|
97
|
-
|
94
|
+
error_msg = "Format CFF font file is not supported."
|
95
|
+
self._logger(error_msg)
|
96
|
+
raise TTFParsingError(error_msg)
|
98
97
|
try:
|
99
98
|
self.parse_head()
|
100
99
|
self.parse_hhea()
|
@@ -103,11 +102,18 @@ class TrueTypeFont:
|
|
103
102
|
self.parse_cmap()
|
104
103
|
self.parse_name()
|
105
104
|
except Exception as e:
|
106
|
-
|
107
|
-
|
105
|
+
error_msg = f"TTF init for {filename} crashed: {e}"
|
106
|
+
self._logger(error_msg)
|
107
|
+
raise TTFParsingError(error_msg) from e
|
108
108
|
self.glyph_data = list(self.parse_glyf())
|
109
109
|
self._line_information = []
|
110
110
|
|
111
|
+
def _logger(self, message):
|
112
|
+
DEBUG = True
|
113
|
+
# This can be replaced with an actual logging implementation
|
114
|
+
if DEBUG:
|
115
|
+
print(message)
|
116
|
+
|
111
117
|
def line_information(self):
|
112
118
|
return self._line_information
|
113
119
|
|
@@ -124,12 +130,14 @@ class TrueTypeFont:
|
|
124
130
|
f.seek(off)
|
125
131
|
string = f.read(length)
|
126
132
|
f.seek(location)
|
133
|
+
if string is None:
|
134
|
+
return ""
|
127
135
|
return string.decode("UTF-16BE")
|
128
136
|
except UnicodeDecodeError:
|
129
137
|
try:
|
130
|
-
return string.decode("UTF8")
|
138
|
+
return string.decode("UTF8") if string is not None else ""
|
131
139
|
except UnicodeDecodeError:
|
132
|
-
return string
|
140
|
+
return string if string is not None else ""
|
133
141
|
|
134
142
|
try:
|
135
143
|
with open(filename, "rb") as f:
|
@@ -179,13 +187,13 @@ class TrueTypeFont:
|
|
179
187
|
if name_id == 1:
|
180
188
|
font_family = get_string(f, pos, length)
|
181
189
|
elif name_id == 2:
|
182
|
-
|
190
|
+
font_subfamily = get_string(f, pos, length)
|
183
191
|
elif name_id == 4:
|
184
192
|
font_name = get_string(f, pos, length)
|
185
193
|
if font_family and font_subfamily and font_name:
|
186
194
|
break
|
187
195
|
return font_family, font_subfamily, font_name
|
188
|
-
except Exception
|
196
|
+
except Exception:
|
189
197
|
# Anything fishy
|
190
198
|
return None
|
191
199
|
|
@@ -212,8 +220,13 @@ class TrueTypeFont:
|
|
212
220
|
line_start_y = offset_y * scale
|
213
221
|
offset_x = offs
|
214
222
|
# print (f"{offset_x}, {offset_y}: '{text}', fs={font_size}, em:{self.units_per_em}")
|
215
|
-
for
|
216
|
-
|
223
|
+
for (
|
224
|
+
base_char_code,
|
225
|
+
variation_selector,
|
226
|
+
) in self.parse_text_with_variation_sequences(text):
|
227
|
+
index = self.lookup_glyph_with_variation(
|
228
|
+
base_char_code, variation_selector
|
229
|
+
)
|
217
230
|
if index >= len(self.glyph_data):
|
218
231
|
continue
|
219
232
|
if index >= len(self.horizontal_metrics):
|
@@ -247,7 +260,6 @@ class TrueTypeFont:
|
|
247
260
|
if self.active:
|
248
261
|
path.move(start_x, start_y)
|
249
262
|
for i in range(len(contour)):
|
250
|
-
prev = curr
|
251
263
|
curr = next
|
252
264
|
next = contour[(i + 1) % len(contour)]
|
253
265
|
if curr[2] & ON_CURVE_POINT:
|
@@ -325,7 +337,7 @@ class TrueTypeFont:
|
|
325
337
|
entry_selector,
|
326
338
|
range_shift,
|
327
339
|
) = struct.unpack(">LHHHH", header)
|
328
|
-
for
|
340
|
+
for _ in range(num_tables):
|
329
341
|
tag, checksum, offset, length = struct.unpack(">4sLLL", f.read(16))
|
330
342
|
p = f.tell()
|
331
343
|
f.seek(offset)
|
@@ -334,14 +346,15 @@ class TrueTypeFont:
|
|
334
346
|
if require_checksum:
|
335
347
|
for b, byte in enumerate(data):
|
336
348
|
checksum -= byte << 24 - (8 * (b % 4))
|
337
|
-
if tag == b"head":
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
)
|
349
|
+
if tag == b"head" and checksum % (1 << 32) != 0:
|
350
|
+
error_msg = f"Invalid checksum for table {tag.decode('ascii')}: {checksum % (1 << 32)} != 0"
|
351
|
+
self._logger(error_msg)
|
352
|
+
raise TTFParsingError(error_msg)
|
342
353
|
self._raw_tables[tag] = data
|
343
354
|
except Exception as e:
|
344
|
-
|
355
|
+
error_msg = f"Error parsing TTF file {font_path}: {e}"
|
356
|
+
self._logger(error_msg)
|
357
|
+
raise TTFParsingError(error_msg) from e
|
345
358
|
|
346
359
|
def parse_head(self):
|
347
360
|
data = self._raw_tables[b"head"]
|
@@ -398,6 +411,7 @@ class TrueTypeFont:
|
|
398
411
|
|
399
412
|
def _parse_cmap_table(self, data):
|
400
413
|
_fmt = struct.unpack(">H", data.read(2))[0]
|
414
|
+
self.cmap_version = _fmt
|
401
415
|
if _fmt == 0:
|
402
416
|
return self._parse_cmap_format_0(data)
|
403
417
|
elif _fmt == 2:
|
@@ -416,6 +430,7 @@ class TrueTypeFont:
|
|
416
430
|
return self._parse_cmap_format_13(data)
|
417
431
|
elif _fmt == 14:
|
418
432
|
return self._parse_cmap_format_14(data)
|
433
|
+
self.cmap_version = -1
|
419
434
|
return False
|
420
435
|
|
421
436
|
def _parse_cmap_format_0(self, data):
|
@@ -425,9 +440,75 @@ class TrueTypeFont:
|
|
425
440
|
return True
|
426
441
|
|
427
442
|
def _parse_cmap_format_2(self, data):
|
428
|
-
|
429
|
-
|
430
|
-
|
443
|
+
"""
|
444
|
+
Format 2: high-byte mapping through table
|
445
|
+
Used for mixed 8/16-bit encoding (primarily for CJK fonts)
|
446
|
+
This is a complex format - implementing basic support
|
447
|
+
"""
|
448
|
+
try:
|
449
|
+
length, language = struct.unpack(">HH", data.read(4))
|
450
|
+
|
451
|
+
# Read subheader keys (256 entries, each 2 bytes)
|
452
|
+
subheader_keys = struct.unpack(">256H", data.read(256 * 2))
|
453
|
+
|
454
|
+
# Find the maximum subheader index to determine how many subheaders we have
|
455
|
+
max_subheader_index = max(subheader_keys)
|
456
|
+
num_subheaders = (max_subheader_index // 8) + 1 # Each subheader is 8 bytes
|
457
|
+
|
458
|
+
# Calculate remaining data size for validation
|
459
|
+
remaining_data_size = len(data.getvalue()) - data.tell()
|
460
|
+
expected_subheader_size = num_subheaders * 8
|
461
|
+
|
462
|
+
if remaining_data_size < expected_subheader_size:
|
463
|
+
error_msg = f"Insufficient data for subheaders in cmap format 2: expected {expected_subheader_size} bytes, got {remaining_data_size} bytes"
|
464
|
+
self._logger(error_msg)
|
465
|
+
raise TTFParsingError(error_msg)
|
466
|
+
|
467
|
+
# Read subheaders
|
468
|
+
subheaders = []
|
469
|
+
for _ in range(num_subheaders):
|
470
|
+
first_code, entry_count, id_delta, id_range_offset = struct.unpack(
|
471
|
+
">HHHH", data.read(8)
|
472
|
+
)
|
473
|
+
subheaders.append((first_code, entry_count, id_delta, id_range_offset))
|
474
|
+
|
475
|
+
# For format 2, character mapping is complex and depends on:
|
476
|
+
# - High byte determining which subheader to use
|
477
|
+
# - Low byte being processed through that subheader
|
478
|
+
#
|
479
|
+
# This is primarily used for CJK encodings and requires careful handling
|
480
|
+
# For now, we'll implement basic single-byte mapping (subheader 0)
|
481
|
+
|
482
|
+
if subheaders:
|
483
|
+
first_code, entry_count, id_delta, id_range_offset = subheaders[0]
|
484
|
+
|
485
|
+
# For single-byte characters (using subheader 0)
|
486
|
+
for byte_val in range(256):
|
487
|
+
if (
|
488
|
+
subheader_keys[byte_val] == 0
|
489
|
+
and byte_val >= first_code
|
490
|
+
and byte_val < first_code + entry_count
|
491
|
+
):
|
492
|
+
# This character has a mapping in subheader 0
|
493
|
+
try:
|
494
|
+
char_code = byte_val
|
495
|
+
if 0 <= char_code <= 0x10FFFF:
|
496
|
+
# Simple mapping for basic characters
|
497
|
+
glyph_id = (char_code + id_delta) & 0xFFFF
|
498
|
+
if glyph_id != 0: # 0 means missing glyph
|
499
|
+
self._character_map[chr(char_code)] = glyph_id
|
500
|
+
except ValueError:
|
501
|
+
continue
|
502
|
+
|
503
|
+
return True
|
504
|
+
except struct.error as e:
|
505
|
+
error_msg = f"Struct unpacking error in cmap format 2: {e}"
|
506
|
+
self._logger(error_msg)
|
507
|
+
raise TTFParsingError(error_msg) from e
|
508
|
+
except Exception as e:
|
509
|
+
error_msg = f"Error parsing cmap format 2: {e}"
|
510
|
+
self._logger(error_msg)
|
511
|
+
raise TTFParsingError(error_msg) from e
|
431
512
|
|
432
513
|
def _parse_cmap_format_4(self, data):
|
433
514
|
(
|
@@ -443,7 +524,7 @@ class TrueTypeFont:
|
|
443
524
|
# We need to have an even amount of bytes for unpack
|
444
525
|
if len(data) % 2 == 1:
|
445
526
|
data = data[:-1]
|
446
|
-
data = struct.unpack(f">{
|
527
|
+
data = struct.unpack(f">{len(data)//2}H", data)
|
447
528
|
ends = data[:seg_count]
|
448
529
|
starts = data[seg_count + 1 : seg_count * 2 + 1]
|
449
530
|
deltas = data[seg_count * 2 + 1 : seg_count * 3 + 1]
|
@@ -453,7 +534,7 @@ class TrueTypeFont:
|
|
453
534
|
start = starts[seg]
|
454
535
|
delta = deltas[seg]
|
455
536
|
offset = offsets[seg]
|
456
|
-
if start == end
|
537
|
+
if start == end == 0xFFFF:
|
457
538
|
break
|
458
539
|
|
459
540
|
for c in range(start, end + 1):
|
@@ -473,16 +554,125 @@ class TrueTypeFont:
|
|
473
554
|
language,
|
474
555
|
first_code,
|
475
556
|
entry_count,
|
476
|
-
) = struct.unpack(">
|
477
|
-
|
478
|
-
|
557
|
+
) = struct.unpack(">HHHH", data.read(8))
|
558
|
+
glyph_indices = struct.unpack(f">{entry_count}H", data.read(entry_count * 2))
|
559
|
+
for i, glyph_index in enumerate(glyph_indices):
|
560
|
+
try:
|
561
|
+
char_code = i + first_code
|
562
|
+
if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
|
563
|
+
self._character_map[chr(char_code)] = glyph_index
|
564
|
+
except ValueError:
|
565
|
+
# Invalid Unicode character, skip
|
566
|
+
continue
|
479
567
|
return True
|
480
568
|
|
481
569
|
def _parse_cmap_format_8(self, data):
|
482
|
-
|
570
|
+
"""
|
571
|
+
Format 8: mixed 16-bit and 32-bit coverage
|
572
|
+
Used for Unicode variation sequences and supplementary characters
|
573
|
+
"""
|
574
|
+
try:
|
575
|
+
# Read header
|
576
|
+
reserved, length, language = struct.unpack(">HII", data.read(10))
|
577
|
+
|
578
|
+
# Read is32 array (8192 bytes = 65536 bits, one bit per 16-bit code)
|
579
|
+
is32_data = data.read(8192)
|
580
|
+
if len(is32_data) < 8192:
|
581
|
+
error_msg = "Insufficient data for is32 array in cmap format 8"
|
582
|
+
self._logger(error_msg)
|
583
|
+
raise TTFParsingError(error_msg)
|
584
|
+
|
585
|
+
# Read number of groups
|
586
|
+
n_groups = struct.unpack(">I", data.read(4))[0]
|
587
|
+
|
588
|
+
# Process each group
|
589
|
+
for group_idx in range(n_groups):
|
590
|
+
if len(data.getvalue()) - data.tell() < 12:
|
591
|
+
error_msg = (
|
592
|
+
f"Insufficient data for group {group_idx} in cmap format 8"
|
593
|
+
)
|
594
|
+
self._logger(error_msg)
|
595
|
+
raise TTFParsingError(error_msg)
|
596
|
+
|
597
|
+
start_char_code, end_char_code, start_glyph_id = struct.unpack(
|
598
|
+
">III", data.read(12)
|
599
|
+
)
|
600
|
+
|
601
|
+
# Validate group
|
602
|
+
if start_char_code > end_char_code:
|
603
|
+
continue # Skip invalid group
|
604
|
+
|
605
|
+
# Map characters in this group
|
606
|
+
for char_code in range(start_char_code, end_char_code + 1):
|
607
|
+
try:
|
608
|
+
if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
|
609
|
+
glyph_id = start_glyph_id + (char_code - start_char_code)
|
610
|
+
self._character_map[chr(char_code)] = glyph_id
|
611
|
+
except ValueError:
|
612
|
+
# Invalid Unicode character, skip
|
613
|
+
continue
|
614
|
+
|
615
|
+
return True
|
616
|
+
except struct.error as e:
|
617
|
+
error_msg = f"Struct unpacking error in cmap format 8: {e}"
|
618
|
+
self._logger(error_msg)
|
619
|
+
raise TTFParsingError(error_msg) from e
|
620
|
+
except Exception as e:
|
621
|
+
error_msg = f"Error parsing cmap format 8: {e}"
|
622
|
+
self._logger(error_msg)
|
623
|
+
raise TTFParsingError(error_msg) from e
|
483
624
|
|
484
625
|
def _parse_cmap_format_10(self, data):
|
485
|
-
|
626
|
+
"""
|
627
|
+
Format 10: trimmed table
|
628
|
+
Similar to format 6 but uses 32-bit character codes and glyph IDs
|
629
|
+
"""
|
630
|
+
try:
|
631
|
+
# Read header (reserved, length, language, startCharCode, numChars)
|
632
|
+
reserved, length, language, start_char_code, num_chars = struct.unpack(
|
633
|
+
">HIIII", data.read(18)
|
634
|
+
)
|
635
|
+
|
636
|
+
# Validate parameters
|
637
|
+
if num_chars == 0:
|
638
|
+
return True # Empty table is valid
|
639
|
+
|
640
|
+
if start_char_code > 0x10FFFF:
|
641
|
+
error_msg = (
|
642
|
+
f"Invalid start character code in cmap format 10: {start_char_code}"
|
643
|
+
)
|
644
|
+
self._logger(error_msg)
|
645
|
+
raise TTFParsingError(error_msg)
|
646
|
+
|
647
|
+
# Check we have enough data for the glyph array
|
648
|
+
expected_data_size = num_chars * 2 # 2 bytes per glyph ID
|
649
|
+
if len(data.getvalue()) - data.tell() < expected_data_size:
|
650
|
+
error_msg = f"Insufficient data for glyph array in cmap format 10: expected {expected_data_size} bytes"
|
651
|
+
self._logger(error_msg)
|
652
|
+
raise TTFParsingError(error_msg)
|
653
|
+
|
654
|
+
# Read glyph IDs
|
655
|
+
glyph_ids = struct.unpack(f">{num_chars}H", data.read(expected_data_size))
|
656
|
+
|
657
|
+
# Map characters to glyphs
|
658
|
+
for i, glyph_id in enumerate(glyph_ids):
|
659
|
+
char_code = start_char_code + i
|
660
|
+
try:
|
661
|
+
if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
|
662
|
+
self._character_map[chr(char_code)] = glyph_id
|
663
|
+
except ValueError:
|
664
|
+
# Invalid Unicode character, skip
|
665
|
+
continue
|
666
|
+
|
667
|
+
return True
|
668
|
+
except struct.error as e:
|
669
|
+
error_msg = f"Struct unpacking error in cmap format 10: {e}"
|
670
|
+
self._logger(error_msg)
|
671
|
+
raise TTFParsingError(error_msg) from e
|
672
|
+
except Exception as e:
|
673
|
+
error_msg = f"Error parsing cmap format 10: {e}"
|
674
|
+
self._logger(error_msg)
|
675
|
+
raise TTFParsingError(error_msg) from e
|
486
676
|
|
487
677
|
def _parse_cmap_format_12(self, data):
|
488
678
|
(
|
@@ -491,13 +681,19 @@ class TrueTypeFont:
|
|
491
681
|
language,
|
492
682
|
n_groups,
|
493
683
|
) = struct.unpack(">HIII", data.read(14))
|
494
|
-
for
|
684
|
+
for _ in range(n_groups):
|
495
685
|
(start_char_code, end_char_code, start_glyph_code) = struct.unpack(
|
496
686
|
">III", data.read(12)
|
497
687
|
)
|
498
688
|
|
499
|
-
for
|
500
|
-
|
689
|
+
for char_code in range(start_char_code, end_char_code + 1):
|
690
|
+
try:
|
691
|
+
if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
|
692
|
+
glyph_index = start_glyph_code + (char_code - start_char_code)
|
693
|
+
self._character_map[chr(char_code)] = glyph_index
|
694
|
+
except ValueError:
|
695
|
+
# Invalid Unicode character, skip
|
696
|
+
continue
|
501
697
|
return True
|
502
698
|
|
503
699
|
def _parse_cmap_format_13(self, data):
|
@@ -507,17 +703,174 @@ class TrueTypeFont:
|
|
507
703
|
language,
|
508
704
|
n_groups,
|
509
705
|
) = struct.unpack(">HIII", data.read(14))
|
510
|
-
for
|
706
|
+
for _ in range(n_groups):
|
511
707
|
(start_char_code, end_char_code, glyph_code) = struct.unpack(
|
512
708
|
">III", data.read(12)
|
513
709
|
)
|
514
710
|
|
515
|
-
for
|
516
|
-
|
711
|
+
for char_code in range(start_char_code, end_char_code + 1):
|
712
|
+
try:
|
713
|
+
if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
|
714
|
+
self._character_map[chr(char_code)] = glyph_code
|
715
|
+
except ValueError:
|
716
|
+
# Invalid Unicode character, skip
|
717
|
+
continue
|
517
718
|
return True
|
518
719
|
|
519
720
|
def _parse_cmap_format_14(self, data):
|
520
|
-
|
721
|
+
"""
|
722
|
+
Format 14: Unicode variation sequences
|
723
|
+
Maps variation selector sequences to glyphs
|
724
|
+
This format handles Unicode Variation Sequences (UVS) where a base character
|
725
|
+
combined with a variation selector can map to a specific glyph variant.
|
726
|
+
|
727
|
+
Performance optimized version to handle large ranges efficiently.
|
728
|
+
"""
|
729
|
+
try:
|
730
|
+
# Store current position to calculate relative offsets
|
731
|
+
subtable_start = (
|
732
|
+
data.tell() - 6
|
733
|
+
) # Subtract 6 for format and length already read
|
734
|
+
|
735
|
+
# Read header
|
736
|
+
length, num_var_selector_records = struct.unpack(">IH", data.read(6))
|
737
|
+
|
738
|
+
# Limit processing to avoid infinite loops on malformed fonts
|
739
|
+
MAX_VAR_SELECTOR_RECORDS = 100
|
740
|
+
MAX_UNICODE_RANGES = 1000
|
741
|
+
MAX_UVS_MAPPINGS = 10000
|
742
|
+
MAX_RANGE_SIZE = 10000 # Limit individual range processing
|
743
|
+
|
744
|
+
if num_var_selector_records > MAX_VAR_SELECTOR_RECORDS:
|
745
|
+
warning_msg = f"Warning: Too many variation selector records ({num_var_selector_records}), limiting to {MAX_VAR_SELECTOR_RECORDS}"
|
746
|
+
self._logger(warning_msg)
|
747
|
+
num_var_selector_records = MAX_VAR_SELECTOR_RECORDS
|
748
|
+
|
749
|
+
# Each variation selector record is 11 bytes
|
750
|
+
for record_idx in range(num_var_selector_records):
|
751
|
+
if len(data.getvalue()) - data.tell() < 11:
|
752
|
+
error_msg = (
|
753
|
+
f"Insufficient data for variation selector record {record_idx}"
|
754
|
+
)
|
755
|
+
self._logger(error_msg)
|
756
|
+
break # Skip remaining records instead of crashing
|
757
|
+
|
758
|
+
# Read variation selector record (24-bit variation selector + 2 offsets)
|
759
|
+
vs_bytes = data.read(3)
|
760
|
+
variation_selector = struct.unpack(">I", vs_bytes + b"\x00")[
|
761
|
+
0
|
762
|
+
] # Convert 24-bit to 32-bit
|
763
|
+
default_uvs_offset, non_default_uvs_offset = struct.unpack(
|
764
|
+
">II", data.read(8)
|
765
|
+
)
|
766
|
+
|
767
|
+
# Save current position to return to after processing tables
|
768
|
+
current_pos = data.tell()
|
769
|
+
|
770
|
+
# Process Default UVS Table (if present) - OPTIMIZED
|
771
|
+
if default_uvs_offset != 0:
|
772
|
+
try:
|
773
|
+
# Seek to default UVS table (offset is from start of cmap subtable)
|
774
|
+
data.seek(subtable_start + default_uvs_offset)
|
775
|
+
|
776
|
+
# Read number of Unicode ranges
|
777
|
+
num_unicode_ranges = struct.unpack(">I", data.read(4))[0]
|
778
|
+
|
779
|
+
if num_unicode_ranges > MAX_UNICODE_RANGES:
|
780
|
+
warning_msg = f"Warning: Too many Unicode ranges ({num_unicode_ranges}), limiting to {MAX_UNICODE_RANGES}"
|
781
|
+
self._logger(warning_msg)
|
782
|
+
num_unicode_ranges = MAX_UNICODE_RANGES
|
783
|
+
|
784
|
+
# Process each Unicode range - WITH LIMITS
|
785
|
+
for _ in range(num_unicode_ranges):
|
786
|
+
if len(data.getvalue()) - data.tell() < 4:
|
787
|
+
break # Not enough data for this range
|
788
|
+
|
789
|
+
# Each range is 4 bytes: 3-byte start code + 1-byte additional count
|
790
|
+
range_data = data.read(4)
|
791
|
+
start_unicode_value = struct.unpack(
|
792
|
+
">I", range_data[:3] + b"\x00"
|
793
|
+
)[0]
|
794
|
+
additional_count = range_data[3]
|
795
|
+
|
796
|
+
# Limit range size to prevent infinite loops
|
797
|
+
if additional_count > MAX_RANGE_SIZE:
|
798
|
+
warning_msg = f"Warning: Large range size ({additional_count}), limiting to {MAX_RANGE_SIZE}"
|
799
|
+
self._logger(warning_msg)
|
800
|
+
additional_count = MAX_RANGE_SIZE
|
801
|
+
|
802
|
+
# Pre-build character map for efficient lookup
|
803
|
+
char_map_keys = set(
|
804
|
+
ord(c) for c in self._character_map.keys()
|
805
|
+
)
|
806
|
+
|
807
|
+
# Map all characters in this range - OPTIMIZED
|
808
|
+
for offset in range(additional_count + 1):
|
809
|
+
base_char = start_unicode_value + offset
|
810
|
+
if (
|
811
|
+
0 <= base_char <= 0x10FFFF
|
812
|
+
and base_char in char_map_keys
|
813
|
+
):
|
814
|
+
try:
|
815
|
+
# For default UVS, use the default glyph mapping
|
816
|
+
base_char_obj = chr(base_char)
|
817
|
+
# Store variation sequence mapping
|
818
|
+
vs_key = (base_char, variation_selector)
|
819
|
+
self._variation_sequences[
|
820
|
+
vs_key
|
821
|
+
] = self._character_map[base_char_obj]
|
822
|
+
except (ValueError, KeyError):
|
823
|
+
continue
|
824
|
+
except (struct.error, IndexError) as e:
|
825
|
+
error_msg = f"Error processing default UVS table: {e}"
|
826
|
+
self._logger(error_msg)
|
827
|
+
|
828
|
+
# Process Non-Default UVS Table (if present) - OPTIMIZED
|
829
|
+
if non_default_uvs_offset != 0:
|
830
|
+
try:
|
831
|
+
# Seek to non-default UVS table
|
832
|
+
data.seek(subtable_start + non_default_uvs_offset)
|
833
|
+
|
834
|
+
# Read number of UVS mappings
|
835
|
+
num_uvs_mappings = struct.unpack(">I", data.read(4))[0]
|
836
|
+
|
837
|
+
if num_uvs_mappings > MAX_UVS_MAPPINGS:
|
838
|
+
warning_msg = f"Warning: Too many UVS mappings ({num_uvs_mappings}), limiting to {MAX_UVS_MAPPINGS}"
|
839
|
+
self._logger(warning_msg)
|
840
|
+
num_uvs_mappings = MAX_UVS_MAPPINGS
|
841
|
+
|
842
|
+
# Process each UVS mapping
|
843
|
+
for _ in range(num_uvs_mappings):
|
844
|
+
if len(data.getvalue()) - data.tell() < 5:
|
845
|
+
break # Not enough data for this mapping
|
846
|
+
|
847
|
+
# Each mapping is 5 bytes: 3-byte Unicode value + 2-byte glyph ID
|
848
|
+
mapping_data = data.read(5)
|
849
|
+
unicode_value = struct.unpack(
|
850
|
+
">I", mapping_data[:3] + b"\x00"
|
851
|
+
)[0]
|
852
|
+
glyph_id = struct.unpack(">H", mapping_data[3:5])[0]
|
853
|
+
|
854
|
+
if 0 <= unicode_value <= 0x10FFFF:
|
855
|
+
# Store non-default variation sequence mapping
|
856
|
+
vs_key = (unicode_value, variation_selector)
|
857
|
+
self._variation_sequences[vs_key] = glyph_id
|
858
|
+
except (struct.error, IndexError) as e:
|
859
|
+
error_msg = f"Error processing non-default UVS table: {e}"
|
860
|
+
self._logger(error_msg)
|
861
|
+
|
862
|
+
# Return to position after variation selector record
|
863
|
+
data.seek(current_pos)
|
864
|
+
|
865
|
+
return True
|
866
|
+
except struct.error as e:
|
867
|
+
error_msg = f"Struct unpacking error in cmap format 14: {e}"
|
868
|
+
self._logger(error_msg)
|
869
|
+
return False # Don't crash, just return False
|
870
|
+
except Exception as e:
|
871
|
+
error_msg = f"Error parsing cmap format 14: {e}"
|
872
|
+
self._logger(error_msg)
|
873
|
+
return False # Don't crash, just return False
|
521
874
|
|
522
875
|
def parse_hhea(self):
|
523
876
|
data = self._raw_tables[b"hhea"]
|
@@ -544,18 +897,31 @@ class TrueTypeFont:
|
|
544
897
|
def parse_hmtx(self):
|
545
898
|
data = self._raw_tables[b"hmtx"]
|
546
899
|
count = self.number_of_long_hor_metrics
|
900
|
+
|
901
|
+
# Check if we have enough data for the long horizontal metrics
|
902
|
+
if len(data) < count * 4:
|
903
|
+
error_msg = f"Insufficient data in hmtx table: expected {count * 4} bytes, got {len(data)}"
|
904
|
+
self._logger(error_msg)
|
905
|
+
raise TTFParsingError(error_msg)
|
906
|
+
|
547
907
|
hm = struct.unpack(f">{'Hh' * count}", data[: count * 4])
|
548
908
|
self.horizontal_metrics = [
|
549
909
|
(hm[2 * i], hm[2 * i + 1]) for i in range(len(hm) // 2)
|
550
910
|
]
|
551
|
-
|
911
|
+
|
912
|
+
# Handle additional left side bearings for remaining glyphs
|
913
|
+
last_advance = hm[-2] if hm else 0
|
552
914
|
table_start = count * 4
|
553
915
|
if len(data) > table_start:
|
554
916
|
remaining = (len(data) - table_start) // 2
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
917
|
+
if remaining > 0:
|
918
|
+
left_bearings = struct.unpack(
|
919
|
+
f">{remaining}h", data[table_start : table_start + remaining * 2]
|
920
|
+
)
|
921
|
+
# Extend with tuples of (last_advance, left_bearing)
|
922
|
+
self.horizontal_metrics.extend(
|
923
|
+
[(last_advance, lb) for lb in left_bearings]
|
924
|
+
)
|
559
925
|
|
560
926
|
def parse_loca(self):
|
561
927
|
try:
|
@@ -564,10 +930,10 @@ class TrueTypeFont:
|
|
564
930
|
self._glyph_offsets = []
|
565
931
|
return
|
566
932
|
if self.index_to_loc_format == 0:
|
567
|
-
n =
|
933
|
+
n = len(data) // 2
|
568
934
|
self._glyph_offsets = [g * 2 for g in struct.unpack(f">{n}H", data)]
|
569
935
|
else:
|
570
|
-
n =
|
936
|
+
n = len(data) // 4
|
571
937
|
self._glyph_offsets = struct.unpack(f">{n}I", data)
|
572
938
|
|
573
939
|
def parse_glyf(self):
|
@@ -579,7 +945,7 @@ class TrueTypeFont:
|
|
579
945
|
start = self._glyph_offsets[index]
|
580
946
|
end = self._glyph_offsets[index + 1]
|
581
947
|
if start == end:
|
582
|
-
yield
|
948
|
+
yield []
|
583
949
|
return
|
584
950
|
yield from self._parse_glyph(BytesIO(data[start:end]))
|
585
951
|
|
@@ -669,7 +1035,6 @@ class TrueTypeFont:
|
|
669
1035
|
transform_dx, transform_dy = float(arg1), float(arg2)
|
670
1036
|
else:
|
671
1037
|
# Arguments are point indices for point matching
|
672
|
-
dest_point_index, src_point_index = arg1, arg2
|
673
1038
|
# Point matching not fully implemented - would need to find
|
674
1039
|
# matching points in already processed contours and source glyph
|
675
1040
|
transform_dx, transform_dy = 0.0, 0.0
|
@@ -716,45 +1081,123 @@ class TrueTypeFont:
|
|
716
1081
|
yield from all_contours
|
717
1082
|
|
718
1083
|
def _parse_simple_glyph(self, num_contours, data):
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
1084
|
+
try:
|
1085
|
+
# Check we have enough data for contour endpoints
|
1086
|
+
if len(data.getvalue()) - data.tell() < num_contours * 2:
|
1087
|
+
error_msg = "Insufficient data for contour endpoints"
|
1088
|
+
self._logger(error_msg)
|
1089
|
+
raise TTFParsingError(error_msg)
|
1090
|
+
|
1091
|
+
end_pts = struct.unpack(f">{num_contours}H", data.read(2 * num_contours))
|
1092
|
+
|
1093
|
+
# Check we have enough data for instruction length
|
1094
|
+
if len(data.getvalue()) - data.tell() < 2:
|
1095
|
+
error_msg = "Insufficient data for instruction length"
|
1096
|
+
self._logger(error_msg)
|
1097
|
+
raise TTFParsingError(error_msg)
|
1098
|
+
|
1099
|
+
inst_len = struct.unpack(">H", data.read(2))[0]
|
1100
|
+
|
1101
|
+
# Check we have enough data for instructions
|
1102
|
+
if len(data.getvalue()) - data.tell() < inst_len:
|
1103
|
+
error_msg = "Insufficient data for instructions"
|
1104
|
+
self._logger(error_msg)
|
1105
|
+
raise TTFParsingError(error_msg)
|
1106
|
+
|
1107
|
+
_ = data.read(inst_len) # Read instructions but don't store unused variable
|
1108
|
+
|
1109
|
+
if not end_pts:
|
1110
|
+
return
|
1111
|
+
|
1112
|
+
num_points = max(end_pts) + 1
|
1113
|
+
if num_points <= 0:
|
1114
|
+
return
|
1115
|
+
|
1116
|
+
# Read flags with bounds checking
|
1117
|
+
flags = []
|
1118
|
+
while len(flags) < num_points:
|
1119
|
+
if len(data.getvalue()) - data.tell() < 1:
|
1120
|
+
error_msg = "Insufficient data for flags"
|
1121
|
+
self._logger(error_msg)
|
1122
|
+
raise TTFParsingError(error_msg)
|
1123
|
+
|
1124
|
+
flag = ord(data.read(1))
|
1125
|
+
flags.append(flag)
|
1126
|
+
if flag & 0x8: # Repeat flag
|
1127
|
+
if len(data.getvalue()) - data.tell() < 1:
|
1128
|
+
error_msg = "Insufficient data for repeat count"
|
1129
|
+
self._logger(error_msg)
|
1130
|
+
raise TTFParsingError(error_msg)
|
1131
|
+
repeat_count = ord(data.read(1))
|
1132
|
+
flags.extend([flag] * repeat_count)
|
1133
|
+
|
1134
|
+
# Truncate flags if we read too many
|
1135
|
+
flags = flags[:num_points]
|
1136
|
+
|
1137
|
+
x_coords = list(self._read_coords(num_points, 0x2, 0x10, flags, data))
|
1138
|
+
y_coords = list(self._read_coords(num_points, 0x4, 0x20, flags, data))
|
1139
|
+
|
1140
|
+
start = 0
|
1141
|
+
for end in end_pts:
|
1142
|
+
if end >= num_points:
|
1143
|
+
error_msg = f"Invalid contour endpoint: {end} >= {num_points}"
|
1144
|
+
self._logger(error_msg)
|
1145
|
+
raise TTFParsingError(error_msg)
|
1146
|
+
yield list(
|
1147
|
+
zip(
|
1148
|
+
x_coords[start : end + 1],
|
1149
|
+
y_coords[start : end + 1],
|
1150
|
+
flags[start : end + 1],
|
1151
|
+
)
|
739
1152
|
)
|
740
|
-
|
741
|
-
|
1153
|
+
start = end + 1
|
1154
|
+
except struct.error as e:
|
1155
|
+
error_msg = f"Struct unpacking error in simple glyph: {e}"
|
1156
|
+
self._logger(error_msg)
|
1157
|
+
raise TTFParsingError(error_msg) from e
|
1158
|
+
except (IndexError, ValueError) as e:
|
1159
|
+
error_msg = f"Error parsing simple glyph: {e}"
|
1160
|
+
self._logger(error_msg)
|
1161
|
+
raise TTFParsingError(error_msg) from e
|
742
1162
|
|
743
1163
|
def _read_coords(self, num_points, bit_byte, bit_delta, flags, data):
|
744
1164
|
value = 0
|
745
1165
|
for i in range(num_points):
|
1166
|
+
if i >= len(flags):
|
1167
|
+
error_msg = f"Flag index {i} out of range (flags length: {len(flags)})"
|
1168
|
+
self._logger(error_msg)
|
1169
|
+
raise TTFParsingError(error_msg)
|
1170
|
+
|
746
1171
|
flag = flags[i]
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
1172
|
+
try:
|
1173
|
+
if flag & bit_byte:
|
1174
|
+
# Single byte coordinate
|
1175
|
+
if len(data.getvalue()) - data.tell() < 1:
|
1176
|
+
error_msg = "Insufficient data for single byte coordinate"
|
1177
|
+
self._logger(error_msg)
|
1178
|
+
raise TTFParsingError(
|
1179
|
+
"Insufficient data for single byte coordinate"
|
1180
|
+
)
|
1181
|
+
x = struct.unpack("B", data.read(1))[0]
|
1182
|
+
if flag & bit_delta:
|
1183
|
+
value += x
|
1184
|
+
else:
|
1185
|
+
value -= x
|
1186
|
+
elif ~flag & bit_delta:
|
1187
|
+
# Two byte coordinate
|
1188
|
+
if len(data.getvalue()) - data.tell() < 2:
|
1189
|
+
error_msg = "Insufficient data for two byte coordinate"
|
1190
|
+
self._logger(error_msg)
|
1191
|
+
raise TTFParsingError(
|
1192
|
+
"Insufficient data for two byte coordinate"
|
1193
|
+
)
|
1194
|
+
value += struct.unpack(">h", data.read(2))[0]
|
1195
|
+
# Coordinate unchanged from previous
|
1196
|
+
yield value
|
1197
|
+
except struct.error as e:
|
1198
|
+
error_msg = f"Struct unpacking error in coordinates: {e}"
|
1199
|
+
self._logger(error_msg)
|
1200
|
+
raise TTFParsingError(error_msg) from e
|
758
1201
|
|
759
1202
|
def parse_name(self):
|
760
1203
|
def decode(string):
|
@@ -788,12 +1231,249 @@ class TrueTypeFont:
|
|
788
1231
|
length,
|
789
1232
|
str_offset,
|
790
1233
|
) in records:
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
1234
|
+
try:
|
1235
|
+
if name_id == 1:
|
1236
|
+
self.font_family = decode(strings[str_offset : str_offset + length])
|
1237
|
+
elif name_id == 2:
|
1238
|
+
self.font_subfamily = decode(
|
1239
|
+
strings[str_offset : str_offset + length]
|
1240
|
+
)
|
1241
|
+
elif name_id == 3:
|
1242
|
+
# Unique Subfamily Name
|
1243
|
+
pass
|
1244
|
+
elif name_id == 4:
|
1245
|
+
self.font_name = decode(strings[str_offset : str_offset + length])
|
1246
|
+
except (IndexError, UnicodeDecodeError) as e:
|
1247
|
+
# Log error but continue parsing other name records
|
1248
|
+
warning_msg = f"Warning: Error decoding name record {name_id}: {e}"
|
1249
|
+
self._logger(warning_msg)
|
1250
|
+
continue
|
1251
|
+
|
1252
|
+
def get_variation_sequences(self):
|
1253
|
+
"""
|
1254
|
+
Get Unicode variation sequences mapping.
|
1255
|
+
|
1256
|
+
Returns:
|
1257
|
+
dict: Dictionary mapping (base_char, variation_selector) tuples to glyph IDs.
|
1258
|
+
For example: {(0x4E00, 0xFE00): 1234} means base character U+4E00
|
1259
|
+
with variation selector U+FE00 maps to glyph ID 1234.
|
1260
|
+
"""
|
1261
|
+
return getattr(self, "_variation_sequences", {})
|
1262
|
+
|
1263
|
+
def has_variation_sequences(self):
|
1264
|
+
"""
|
1265
|
+
Check if this font contains Unicode variation sequences (cmap format 14).
|
1266
|
+
|
1267
|
+
Returns:
|
1268
|
+
bool: True if the font has variation sequence mappings, False otherwise.
|
1269
|
+
"""
|
1270
|
+
return bool(getattr(self, "_variation_sequences", {}))
|
1271
|
+
|
1272
|
+
def get_glyph_index(self, char, variation_selector=None):
|
1273
|
+
"""
|
1274
|
+
Get the glyph index for a character, optionally with a variation selector.
|
1275
|
+
|
1276
|
+
Args:
|
1277
|
+
char (str): The base character
|
1278
|
+
variation_selector (int, optional): Unicode variation selector code point (e.g., 0xFE00-0xFE0F)
|
1279
|
+
|
1280
|
+
Returns:
|
1281
|
+
int: Glyph index, or 0 if not found
|
1282
|
+
"""
|
1283
|
+
if variation_selector is not None:
|
1284
|
+
# Try to find variation sequence first
|
1285
|
+
char_code = ord(char) if isinstance(char, str) else char
|
1286
|
+
vs_key = (char_code, variation_selector)
|
1287
|
+
if vs_key in self._variation_sequences:
|
1288
|
+
return self._variation_sequences[vs_key]
|
1289
|
+
|
1290
|
+
# Fall back to regular character mapping
|
1291
|
+
if isinstance(char, str):
|
1292
|
+
return self._character_map.get(char, 0)
|
1293
|
+
|
1294
|
+
# Handle numeric character codes
|
1295
|
+
try:
|
1296
|
+
return self._character_map.get(chr(char), 0)
|
1297
|
+
except ValueError:
|
1298
|
+
return 0
|
1299
|
+
|
1300
|
+
def has_variation_selector(self, char, variation_selector):
|
1301
|
+
"""
|
1302
|
+
Check if a character has a specific variation selector mapping.
|
1303
|
+
|
1304
|
+
Args:
|
1305
|
+
char (str or int): The base character (string) or character code (int)
|
1306
|
+
variation_selector (int): Unicode variation selector code point
|
1307
|
+
|
1308
|
+
Returns:
|
1309
|
+
bool: True if the variation sequence exists, False otherwise
|
1310
|
+
"""
|
1311
|
+
char_code = ord(char) if isinstance(char, str) else char
|
1312
|
+
vs_key = (char_code, variation_selector)
|
1313
|
+
return vs_key in self._variation_sequences
|
1314
|
+
|
1315
|
+
def get_available_variation_selectors(self, char):
|
1316
|
+
"""
|
1317
|
+
Get all variation selectors available for a given character.
|
1318
|
+
|
1319
|
+
Args:
|
1320
|
+
char (str or int): The base character (string) or character code (int)
|
1321
|
+
|
1322
|
+
Returns:
|
1323
|
+
list: List of variation selector code points available for this character
|
1324
|
+
"""
|
1325
|
+
char_code = ord(char) if isinstance(char, str) else char
|
1326
|
+
return [
|
1327
|
+
vs
|
1328
|
+
for (base_char, vs) in self._variation_sequences.keys()
|
1329
|
+
if base_char == char_code
|
1330
|
+
]
|
1331
|
+
|
1332
|
+
def lookup_glyph_with_variation(self, base_char, variation_selector=None):
|
1333
|
+
"""
|
1334
|
+
Look up a glyph ID for a character, optionally with a variation selector.
|
1335
|
+
|
1336
|
+
Args:
|
1337
|
+
base_char (str or int): The base character (string) or Unicode code point (int)
|
1338
|
+
variation_selector (int, optional): Unicode code point of variation selector
|
1339
|
+
|
1340
|
+
Returns:
|
1341
|
+
int: Glyph ID for the character/variation sequence, or 0 if not found
|
1342
|
+
"""
|
1343
|
+
# Convert base_char to Unicode code point if it's a string
|
1344
|
+
base_char_code = ord(base_char) if isinstance(base_char, str) else base_char
|
1345
|
+
|
1346
|
+
if variation_selector is not None:
|
1347
|
+
# Check for variation sequence first
|
1348
|
+
vs_key = (base_char_code, variation_selector)
|
1349
|
+
if vs_key in self._variation_sequences:
|
1350
|
+
return self._variation_sequences[vs_key]
|
1351
|
+
|
1352
|
+
# Fall back to regular character map - convert code point back to character for lookup
|
1353
|
+
try:
|
1354
|
+
base_char_str = chr(base_char_code)
|
1355
|
+
return self._character_map.get(base_char_str, 0)
|
1356
|
+
except (ValueError, OverflowError):
|
1357
|
+
# Invalid Unicode code point
|
1358
|
+
return 0
|
1359
|
+
|
1360
|
+
def parse_text_with_variation_sequences(self, text):
|
1361
|
+
"""
|
1362
|
+
Parse text and extract base characters with their variation selectors.
|
1363
|
+
|
1364
|
+
This method correctly handles Unicode code points, including surrogate pairs
|
1365
|
+
and non-BMP characters, ensuring that variation selectors are properly
|
1366
|
+
detected even for astral-plane base characters.
|
1367
|
+
|
1368
|
+
Args:
|
1369
|
+
text (str): Input text that may contain variation sequences
|
1370
|
+
|
1371
|
+
Yields:
|
1372
|
+
tuple: (base_char_code, variation_selector) where variation_selector is None
|
1373
|
+
for regular characters or the Unicode code point for variation sequences
|
1374
|
+
"""
|
1375
|
+
# Convert string to list of Unicode code points to handle surrogate pairs correctly
|
1376
|
+
code_points = []
|
1377
|
+
i = 0
|
1378
|
+
while i < len(text):
|
1379
|
+
char = text[i]
|
1380
|
+
char_code = ord(char)
|
1381
|
+
|
1382
|
+
# Check if this is the start of a surrogate pair (high surrogate)
|
1383
|
+
if 0xD800 <= char_code <= 0xDBFF and i + 1 < len(text):
|
1384
|
+
next_char = text[i + 1]
|
1385
|
+
next_char_code = ord(next_char)
|
1386
|
+
|
1387
|
+
# Check if next character is low surrogate
|
1388
|
+
if 0xDC00 <= next_char_code <= 0xDFFF:
|
1389
|
+
# Combine surrogate pair into single code point
|
1390
|
+
combined_code_point = (
|
1391
|
+
0x10000
|
1392
|
+
+ ((char_code - 0xD800) << 10)
|
1393
|
+
+ (next_char_code - 0xDC00)
|
1394
|
+
)
|
1395
|
+
code_points.append(combined_code_point)
|
1396
|
+
i += 2 # Skip both surrogate characters
|
1397
|
+
else:
|
1398
|
+
# High surrogate without low surrogate - treat as individual character
|
1399
|
+
code_points.append(char_code)
|
1400
|
+
i += 1
|
1401
|
+
else:
|
1402
|
+
# Regular BMP character or unpaired low surrogate
|
1403
|
+
code_points.append(char_code)
|
1404
|
+
i += 1
|
1405
|
+
|
1406
|
+
# Now iterate over Unicode code points
|
1407
|
+
i = 0
|
1408
|
+
while i < len(code_points):
|
1409
|
+
base_char_code = code_points[i]
|
1410
|
+
|
1411
|
+
# Check if the next code point is a variation selector
|
1412
|
+
variation_selector = None
|
1413
|
+
if i + 1 < len(code_points):
|
1414
|
+
next_code_point = code_points[i + 1]
|
1415
|
+
# Check for standardized variation selectors (U+FE00-U+FE0F)
|
1416
|
+
# or additional variation selectors (U+E0100-U+E01EF)
|
1417
|
+
if (
|
1418
|
+
0xFE00 <= next_code_point <= 0xFE0F
|
1419
|
+
or 0xE0100 <= next_code_point <= 0xE01EF
|
1420
|
+
):
|
1421
|
+
variation_selector = next_code_point
|
1422
|
+
i += 1 # Skip the variation selector in next iteration
|
1423
|
+
|
1424
|
+
yield (base_char_code, variation_selector)
|
1425
|
+
i += 1
|
1426
|
+
|
1427
|
+
def debug_variation_sequences(self):
|
1428
|
+
"""
|
1429
|
+
Debug method to print information about parsed variation sequences.
|
1430
|
+
|
1431
|
+
Returns:
|
1432
|
+
str: Debug information about variation sequences
|
1433
|
+
"""
|
1434
|
+
if not self._variation_sequences:
|
1435
|
+
return "No variation sequences found in font"
|
1436
|
+
|
1437
|
+
debug_info = [f"Found {len(self._variation_sequences)} variation sequences:"]
|
1438
|
+
for (base_char, vs), glyph_id in self._variation_sequences.items():
|
1439
|
+
try:
|
1440
|
+
base_char_str = (
|
1441
|
+
chr(base_char) if isinstance(base_char, int) else str(base_char)
|
1442
|
+
)
|
1443
|
+
vs_str = f"U+{vs:04X}" if vs else "None"
|
1444
|
+
debug_info.append(
|
1445
|
+
f" {base_char_str} (U+{base_char:04X}) + {vs_str} -> glyph {glyph_id}"
|
1446
|
+
)
|
1447
|
+
except (ValueError, TypeError):
|
1448
|
+
debug_info.append(f" {base_char} + {vs} -> glyph {glyph_id}")
|
1449
|
+
|
1450
|
+
return "\n".join(debug_info)
|
1451
|
+
|
1452
|
+
def test_variation_sequence_lookup(self, base_char, variation_selector):
|
1453
|
+
"""
|
1454
|
+
Test method to check if a specific variation sequence is supported.
|
1455
|
+
|
1456
|
+
Args:
|
1457
|
+
base_char (str): The base character
|
1458
|
+
variation_selector (int): Unicode code point of variation selector
|
1459
|
+
|
1460
|
+
Returns:
|
1461
|
+
dict: Information about the lookup result
|
1462
|
+
"""
|
1463
|
+
base_char_code = ord(base_char) if isinstance(base_char, str) else base_char
|
1464
|
+
vs_key = (base_char_code, variation_selector)
|
1465
|
+
|
1466
|
+
regular_glyph = self._character_map.get(base_char, 0)
|
1467
|
+
variation_glyph = self.lookup_glyph_with_variation(
|
1468
|
+
base_char, variation_selector
|
1469
|
+
)
|
1470
|
+
|
1471
|
+
return {
|
1472
|
+
"base_char": base_char,
|
1473
|
+
"base_char_code": f"U+{base_char_code:04X}",
|
1474
|
+
"variation_selector": f"U+{variation_selector:04X}",
|
1475
|
+
"regular_glyph_id": regular_glyph,
|
1476
|
+
"variation_glyph_id": variation_glyph,
|
1477
|
+
"has_variation": vs_key in self._variation_sequences,
|
1478
|
+
"uses_different_glyph": regular_glyph != variation_glyph,
|
1479
|
+
}
|