meerk40t 0.9.7051__py2.py3-none-any.whl → 0.9.7910__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerk40t/balormk/controller.py +3 -3
- meerk40t/balormk/device.py +7 -0
- meerk40t/balormk/driver.py +23 -14
- meerk40t/balormk/galvo_commands.py +18 -3
- meerk40t/balormk/gui/balorconfig.py +6 -0
- meerk40t/balormk/livelightjob.py +36 -14
- meerk40t/camera/camera.py +1 -0
- meerk40t/camera/gui/camerapanel.py +154 -58
- meerk40t/camera/plugin.py +46 -5
- meerk40t/core/elements/branches.py +90 -20
- meerk40t/core/elements/elements.py +59 -37
- meerk40t/core/elements/trace.py +10 -6
- meerk40t/core/node/node.py +2 -0
- meerk40t/core/plotplanner.py +7 -4
- meerk40t/device/gui/defaultactions.py +78 -14
- meerk40t/dxf/dxf_io.py +42 -0
- meerk40t/grbl/controller.py +245 -35
- meerk40t/grbl/device.py +102 -26
- meerk40t/grbl/driver.py +8 -2
- meerk40t/grbl/gui/grblconfiguration.py +6 -0
- meerk40t/grbl/gui/grblcontroller.py +1 -1
- meerk40t/gui/about.py +7 -0
- meerk40t/gui/choicepropertypanel.py +20 -30
- meerk40t/gui/devicepanel.py +27 -16
- meerk40t/gui/help_assets/help_assets.py +126 -2
- meerk40t/gui/icons.py +15 -0
- meerk40t/gui/laserpanel.py +102 -54
- meerk40t/gui/materialtest.py +10 -0
- meerk40t/gui/mkdebug.py +268 -9
- meerk40t/gui/navigationpanels.py +74 -8
- meerk40t/gui/propertypanels/operationpropertymain.py +185 -91
- meerk40t/gui/scenewidgets/elementswidget.py +7 -1
- meerk40t/gui/scenewidgets/selectionwidget.py +24 -9
- meerk40t/gui/simulation.py +1 -1
- meerk40t/gui/statusbarwidgets/shapepropwidget.py +50 -40
- meerk40t/gui/statusbarwidgets/statusbar.py +2 -2
- meerk40t/gui/toolwidgets/toolmeasure.py +1 -1
- meerk40t/gui/toolwidgets/toolnodeedit.py +4 -1
- meerk40t/gui/toolwidgets/tooltabedit.py +9 -7
- meerk40t/gui/wxmeerk40t.py +45 -15
- meerk40t/gui/wxmmain.py +23 -9
- meerk40t/gui/wxmribbon.py +36 -0
- meerk40t/gui/wxutils.py +66 -42
- meerk40t/kernel/inhibitor.py +120 -0
- meerk40t/kernel/kernel.py +38 -0
- meerk40t/lihuiyu/controller.py +33 -3
- meerk40t/lihuiyu/device.py +99 -4
- meerk40t/lihuiyu/driver.py +65 -5
- meerk40t/lihuiyu/gui/lhycontrollergui.py +69 -24
- meerk40t/lihuiyu/gui/lhydrivergui.py +6 -0
- meerk40t/lihuiyu/laserspeed.py +17 -10
- meerk40t/lihuiyu/parser.py +23 -0
- meerk40t/main.py +2 -2
- meerk40t/moshi/gui/moshidrivergui.py +7 -0
- meerk40t/newly/controller.py +3 -2
- meerk40t/newly/device.py +23 -2
- meerk40t/newly/driver.py +8 -3
- meerk40t/newly/gui/newlyconfig.py +7 -0
- meerk40t/ruida/gui/ruidaconfig.py +7 -0
- meerk40t/tools/geomstr.py +142 -49
- meerk40t/tools/rasterplotter.py +0 -5
- meerk40t/tools/ttfparser.py +921 -168
- {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/METADATA +1 -1
- {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/RECORD +69 -68
- {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/LICENSE +0 -0
- {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/WHEEL +0 -0
- {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/entry_points.txt +0 -0
- {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/top_level.txt +0 -0
- {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/zip-safe +0 -0
meerk40t/tools/ttfparser.py
CHANGED
@@ -14,6 +14,26 @@ WE_HAVE_INSTRUCTIONS = 1 << 8
|
|
14
14
|
USE_MY_METRICS = 1 << 9
|
15
15
|
OVERLAP_COMPOUND = 1 << 10
|
16
16
|
|
17
|
+
_FLAG_NAMES = {
|
18
|
+
ON_CURVE_POINT: "ON_CURVE_POINT",
|
19
|
+
ARG_1_AND_2_ARE_WORDS: "ARG_1_AND_2_ARE_WORDS",
|
20
|
+
ARGS_ARE_XY_VALUES: "ARGS_ARE_XY_VALUES",
|
21
|
+
ROUND_XY_TO_GRID: "ROUND_XY_TO_GRID",
|
22
|
+
WE_HAVE_A_SCALE: "WE_HAVE_A_SCALE",
|
23
|
+
MORE_COMPONENTS: "MORE_COMPONENTS",
|
24
|
+
WE_HAVE_AN_X_AND_Y_SCALE: "WE_HAVE_AN_X_AND_Y_SCALE",
|
25
|
+
WE_HAVE_A_TWO_BY_TWO: "WE_HAVE_A_TWO_BY_TWO",
|
26
|
+
WE_HAVE_INSTRUCTIONS: "WE_HAVE_INSTRUCTIONS",
|
27
|
+
USE_MY_METRICS: "USE_MY_METRICS",
|
28
|
+
OVERLAP_COMPOUND: "OVERLAP_COMPOUND",
|
29
|
+
}
|
30
|
+
|
31
|
+
|
32
|
+
def flagname(flag):
|
33
|
+
"""Return all active flag names for the given flag value."""
|
34
|
+
names = [name for bit, name in _FLAG_NAMES.items() if flag & bit]
|
35
|
+
return " | ".join(names) if names else f"UNKNOWN_FLAG_{flag}"
|
36
|
+
|
17
37
|
|
18
38
|
class TTFParsingError(ValueError):
|
19
39
|
"""Parsing error"""
|
@@ -27,7 +47,7 @@ class TrueTypeFont:
|
|
27
47
|
self.checksum_adjust = None
|
28
48
|
self.magic_number = None
|
29
49
|
self.flags = None
|
30
|
-
self.units_per_em =
|
50
|
+
self.units_per_em = 1000 # Default value, will be overwritten during parsing
|
31
51
|
self.created = None
|
32
52
|
self.modified = None
|
33
53
|
self.active = True
|
@@ -51,23 +71,29 @@ class TrueTypeFont:
|
|
51
71
|
self.caret_slope_run = None
|
52
72
|
self.caret_offset = None
|
53
73
|
self.metric_data_format = None
|
54
|
-
self.number_of_long_hor_metrics =
|
74
|
+
self.number_of_long_hor_metrics = (
|
75
|
+
0 # Default value, will be overwritten during parsing
|
76
|
+
)
|
55
77
|
|
56
78
|
self.font_family = None
|
57
79
|
self.font_subfamily = None
|
58
80
|
self.font_name = None
|
59
81
|
self._character_map = {}
|
60
|
-
self.
|
61
|
-
self.
|
82
|
+
self._variation_sequences = {} # Unicode variation sequences mapping
|
83
|
+
self._glyph_offsets = []
|
84
|
+
self.horizontal_metrics = []
|
62
85
|
|
63
86
|
self.is_okay = False
|
87
|
+
self.cmap_version = -1
|
64
88
|
self.parse_ttf(filename, require_checksum=require_checksum)
|
65
89
|
if (
|
66
90
|
b"CFF " in self._raw_tables
|
67
91
|
and b"glyf" not in self._raw_tables
|
68
92
|
and b"loca" not in self._raw_tables
|
69
93
|
):
|
70
|
-
|
94
|
+
error_msg = "Format CFF font file is not supported."
|
95
|
+
self._logger(error_msg)
|
96
|
+
raise TTFParsingError(error_msg)
|
71
97
|
try:
|
72
98
|
self.parse_head()
|
73
99
|
self.parse_hhea()
|
@@ -76,11 +102,18 @@ class TrueTypeFont:
|
|
76
102
|
self.parse_cmap()
|
77
103
|
self.parse_name()
|
78
104
|
except Exception as e:
|
79
|
-
|
80
|
-
|
105
|
+
error_msg = f"TTF init for {filename} crashed: {e}"
|
106
|
+
self._logger(error_msg)
|
107
|
+
raise TTFParsingError(error_msg) from e
|
81
108
|
self.glyph_data = list(self.parse_glyf())
|
82
109
|
self._line_information = []
|
83
110
|
|
111
|
+
def _logger(self, message):
|
112
|
+
DEBUG = True
|
113
|
+
# This can be replaced with an actual logging implementation
|
114
|
+
if DEBUG:
|
115
|
+
print(message)
|
116
|
+
|
84
117
|
def line_information(self):
|
85
118
|
return self._line_information
|
86
119
|
|
@@ -97,12 +130,14 @@ class TrueTypeFont:
|
|
97
130
|
f.seek(off)
|
98
131
|
string = f.read(length)
|
99
132
|
f.seek(location)
|
133
|
+
if string is None:
|
134
|
+
return ""
|
100
135
|
return string.decode("UTF-16BE")
|
101
136
|
except UnicodeDecodeError:
|
102
137
|
try:
|
103
|
-
return string.decode("UTF8")
|
138
|
+
return string.decode("UTF8") if string is not None else ""
|
104
139
|
except UnicodeDecodeError:
|
105
|
-
return string
|
140
|
+
return string if string is not None else ""
|
106
141
|
|
107
142
|
try:
|
108
143
|
with open(filename, "rb") as f:
|
@@ -152,13 +187,13 @@ class TrueTypeFont:
|
|
152
187
|
if name_id == 1:
|
153
188
|
font_family = get_string(f, pos, length)
|
154
189
|
elif name_id == 2:
|
155
|
-
|
190
|
+
font_subfamily = get_string(f, pos, length)
|
156
191
|
elif name_id == 4:
|
157
192
|
font_name = get_string(f, pos, length)
|
158
193
|
if font_family and font_subfamily and font_name:
|
159
194
|
break
|
160
195
|
return font_family, font_subfamily, font_name
|
161
|
-
except Exception
|
196
|
+
except Exception:
|
162
197
|
# Anything fishy
|
163
198
|
return None
|
164
199
|
|
@@ -185,8 +220,13 @@ class TrueTypeFont:
|
|
185
220
|
line_start_y = offset_y * scale
|
186
221
|
offset_x = offs
|
187
222
|
# print (f"{offset_x}, {offset_y}: '{text}', fs={font_size}, em:{self.units_per_em}")
|
188
|
-
for
|
189
|
-
|
223
|
+
for (
|
224
|
+
base_char_code,
|
225
|
+
variation_selector,
|
226
|
+
) in self.parse_text_with_variation_sequences(text):
|
227
|
+
index = self.lookup_glyph_with_variation(
|
228
|
+
base_char_code, variation_selector
|
229
|
+
)
|
190
230
|
if index >= len(self.glyph_data):
|
191
231
|
continue
|
192
232
|
if index >= len(self.horizontal_metrics):
|
@@ -209,26 +249,17 @@ class TrueTypeFont:
|
|
209
249
|
curr = contour[-1]
|
210
250
|
next = contour[0]
|
211
251
|
if curr[2] & ON_CURVE_POINT:
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
252
|
+
start_x = (offset_x + curr[0]) * scale
|
253
|
+
start_y = (offset_y + curr[1]) * scale
|
254
|
+
elif next[2] & ON_CURVE_POINT:
|
255
|
+
start_x = (offset_x + next[0]) * scale
|
256
|
+
start_y = (offset_y + next[1]) * scale
|
217
257
|
else:
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
(offset_y + next[1]) * scale,
|
223
|
-
)
|
224
|
-
else:
|
225
|
-
if self.active:
|
226
|
-
path.move(
|
227
|
-
(offset_x + (curr[0] + next[0]) / 2) * scale,
|
228
|
-
(offset_y + (curr[1] + next[1]) / 2) * scale,
|
229
|
-
)
|
258
|
+
start_x = (offset_x + (curr[0] + next[0]) / 2) * scale
|
259
|
+
start_y = (offset_y + (curr[1] + next[1]) / 2) * scale
|
260
|
+
if self.active:
|
261
|
+
path.move(start_x, start_y)
|
230
262
|
for i in range(len(contour)):
|
231
|
-
prev = curr
|
232
263
|
curr = next
|
233
264
|
next = contour[(i + 1) % len(contour)]
|
234
265
|
if curr[2] & ON_CURVE_POINT:
|
@@ -242,9 +273,10 @@ class TrueTypeFont:
|
|
242
273
|
else:
|
243
274
|
next2 = next
|
244
275
|
if not next[2] & ON_CURVE_POINT:
|
245
|
-
next2 = (
|
246
|
-
curr[
|
247
|
-
|
276
|
+
next2 = (
|
277
|
+
(curr[0] + next[0]) / 2,
|
278
|
+
(curr[1] + next[1]) / 2,
|
279
|
+
)
|
248
280
|
if self.active:
|
249
281
|
path.quad(
|
250
282
|
None,
|
@@ -305,7 +337,7 @@ class TrueTypeFont:
|
|
305
337
|
entry_selector,
|
306
338
|
range_shift,
|
307
339
|
) = struct.unpack(">LHHHH", header)
|
308
|
-
for
|
340
|
+
for _ in range(num_tables):
|
309
341
|
tag, checksum, offset, length = struct.unpack(">4sLLL", f.read(16))
|
310
342
|
p = f.tell()
|
311
343
|
f.seek(offset)
|
@@ -314,17 +346,15 @@ class TrueTypeFont:
|
|
314
346
|
if require_checksum:
|
315
347
|
for b, byte in enumerate(data):
|
316
348
|
checksum -= byte << 24 - (8 * (b % 4))
|
317
|
-
if tag == b"head":
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
)
|
349
|
+
if tag == b"head" and checksum % (1 << 32) != 0:
|
350
|
+
error_msg = f"Invalid checksum for table {tag.decode('ascii')}: {checksum % (1 << 32)} != 0"
|
351
|
+
self._logger(error_msg)
|
352
|
+
raise TTFParsingError(error_msg)
|
322
353
|
self._raw_tables[tag] = data
|
323
354
|
except Exception as e:
|
324
|
-
|
325
|
-
|
326
|
-
) from e
|
327
|
-
|
355
|
+
error_msg = f"Error parsing TTF file {font_path}: {e}"
|
356
|
+
self._logger(error_msg)
|
357
|
+
raise TTFParsingError(error_msg) from e
|
328
358
|
|
329
359
|
def parse_head(self):
|
330
360
|
data = self._raw_tables[b"head"]
|
@@ -381,6 +411,7 @@ class TrueTypeFont:
|
|
381
411
|
|
382
412
|
def _parse_cmap_table(self, data):
|
383
413
|
_fmt = struct.unpack(">H", data.read(2))[0]
|
414
|
+
self.cmap_version = _fmt
|
384
415
|
if _fmt == 0:
|
385
416
|
return self._parse_cmap_format_0(data)
|
386
417
|
elif _fmt == 2:
|
@@ -399,6 +430,7 @@ class TrueTypeFont:
|
|
399
430
|
return self._parse_cmap_format_13(data)
|
400
431
|
elif _fmt == 14:
|
401
432
|
return self._parse_cmap_format_14(data)
|
433
|
+
self.cmap_version = -1
|
402
434
|
return False
|
403
435
|
|
404
436
|
def _parse_cmap_format_0(self, data):
|
@@ -408,9 +440,75 @@ class TrueTypeFont:
|
|
408
440
|
return True
|
409
441
|
|
410
442
|
def _parse_cmap_format_2(self, data):
|
411
|
-
|
412
|
-
|
413
|
-
|
443
|
+
"""
|
444
|
+
Format 2: high-byte mapping through table
|
445
|
+
Used for mixed 8/16-bit encoding (primarily for CJK fonts)
|
446
|
+
This is a complex format - implementing basic support
|
447
|
+
"""
|
448
|
+
try:
|
449
|
+
length, language = struct.unpack(">HH", data.read(4))
|
450
|
+
|
451
|
+
# Read subheader keys (256 entries, each 2 bytes)
|
452
|
+
subheader_keys = struct.unpack(">256H", data.read(256 * 2))
|
453
|
+
|
454
|
+
# Find the maximum subheader index to determine how many subheaders we have
|
455
|
+
max_subheader_index = max(subheader_keys)
|
456
|
+
num_subheaders = (max_subheader_index // 8) + 1 # Each subheader is 8 bytes
|
457
|
+
|
458
|
+
# Calculate remaining data size for validation
|
459
|
+
remaining_data_size = len(data.getvalue()) - data.tell()
|
460
|
+
expected_subheader_size = num_subheaders * 8
|
461
|
+
|
462
|
+
if remaining_data_size < expected_subheader_size:
|
463
|
+
error_msg = f"Insufficient data for subheaders in cmap format 2: expected {expected_subheader_size} bytes, got {remaining_data_size} bytes"
|
464
|
+
self._logger(error_msg)
|
465
|
+
raise TTFParsingError(error_msg)
|
466
|
+
|
467
|
+
# Read subheaders
|
468
|
+
subheaders = []
|
469
|
+
for _ in range(num_subheaders):
|
470
|
+
first_code, entry_count, id_delta, id_range_offset = struct.unpack(
|
471
|
+
">HHHH", data.read(8)
|
472
|
+
)
|
473
|
+
subheaders.append((first_code, entry_count, id_delta, id_range_offset))
|
474
|
+
|
475
|
+
# For format 2, character mapping is complex and depends on:
|
476
|
+
# - High byte determining which subheader to use
|
477
|
+
# - Low byte being processed through that subheader
|
478
|
+
#
|
479
|
+
# This is primarily used for CJK encodings and requires careful handling
|
480
|
+
# For now, we'll implement basic single-byte mapping (subheader 0)
|
481
|
+
|
482
|
+
if subheaders:
|
483
|
+
first_code, entry_count, id_delta, id_range_offset = subheaders[0]
|
484
|
+
|
485
|
+
# For single-byte characters (using subheader 0)
|
486
|
+
for byte_val in range(256):
|
487
|
+
if (
|
488
|
+
subheader_keys[byte_val] == 0
|
489
|
+
and byte_val >= first_code
|
490
|
+
and byte_val < first_code + entry_count
|
491
|
+
):
|
492
|
+
# This character has a mapping in subheader 0
|
493
|
+
try:
|
494
|
+
char_code = byte_val
|
495
|
+
if 0 <= char_code <= 0x10FFFF:
|
496
|
+
# Simple mapping for basic characters
|
497
|
+
glyph_id = (char_code + id_delta) & 0xFFFF
|
498
|
+
if glyph_id != 0: # 0 means missing glyph
|
499
|
+
self._character_map[chr(char_code)] = glyph_id
|
500
|
+
except ValueError:
|
501
|
+
continue
|
502
|
+
|
503
|
+
return True
|
504
|
+
except struct.error as e:
|
505
|
+
error_msg = f"Struct unpacking error in cmap format 2: {e}"
|
506
|
+
self._logger(error_msg)
|
507
|
+
raise TTFParsingError(error_msg) from e
|
508
|
+
except Exception as e:
|
509
|
+
error_msg = f"Error parsing cmap format 2: {e}"
|
510
|
+
self._logger(error_msg)
|
511
|
+
raise TTFParsingError(error_msg) from e
|
414
512
|
|
415
513
|
def _parse_cmap_format_4(self, data):
|
416
514
|
(
|
@@ -426,7 +524,7 @@ class TrueTypeFont:
|
|
426
524
|
# We need to have an even amount of bytes for unpack
|
427
525
|
if len(data) % 2 == 1:
|
428
526
|
data = data[:-1]
|
429
|
-
data = struct.unpack(f">{
|
527
|
+
data = struct.unpack(f">{len(data)//2}H", data)
|
430
528
|
ends = data[:seg_count]
|
431
529
|
starts = data[seg_count + 1 : seg_count * 2 + 1]
|
432
530
|
deltas = data[seg_count * 2 + 1 : seg_count * 3 + 1]
|
@@ -436,7 +534,7 @@ class TrueTypeFont:
|
|
436
534
|
start = starts[seg]
|
437
535
|
delta = deltas[seg]
|
438
536
|
offset = offsets[seg]
|
439
|
-
if start == end
|
537
|
+
if start == end == 0xFFFF:
|
440
538
|
break
|
441
539
|
|
442
540
|
for c in range(start, end + 1):
|
@@ -456,16 +554,125 @@ class TrueTypeFont:
|
|
456
554
|
language,
|
457
555
|
first_code,
|
458
556
|
entry_count,
|
459
|
-
) = struct.unpack(">
|
460
|
-
|
461
|
-
|
557
|
+
) = struct.unpack(">HHHH", data.read(8))
|
558
|
+
glyph_indices = struct.unpack(f">{entry_count}H", data.read(entry_count * 2))
|
559
|
+
for i, glyph_index in enumerate(glyph_indices):
|
560
|
+
try:
|
561
|
+
char_code = i + first_code
|
562
|
+
if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
|
563
|
+
self._character_map[chr(char_code)] = glyph_index
|
564
|
+
except ValueError:
|
565
|
+
# Invalid Unicode character, skip
|
566
|
+
continue
|
462
567
|
return True
|
463
568
|
|
464
569
|
def _parse_cmap_format_8(self, data):
|
465
|
-
|
570
|
+
"""
|
571
|
+
Format 8: mixed 16-bit and 32-bit coverage
|
572
|
+
Used for Unicode variation sequences and supplementary characters
|
573
|
+
"""
|
574
|
+
try:
|
575
|
+
# Read header
|
576
|
+
reserved, length, language = struct.unpack(">HII", data.read(10))
|
577
|
+
|
578
|
+
# Read is32 array (8192 bytes = 65536 bits, one bit per 16-bit code)
|
579
|
+
is32_data = data.read(8192)
|
580
|
+
if len(is32_data) < 8192:
|
581
|
+
error_msg = "Insufficient data for is32 array in cmap format 8"
|
582
|
+
self._logger(error_msg)
|
583
|
+
raise TTFParsingError(error_msg)
|
584
|
+
|
585
|
+
# Read number of groups
|
586
|
+
n_groups = struct.unpack(">I", data.read(4))[0]
|
587
|
+
|
588
|
+
# Process each group
|
589
|
+
for group_idx in range(n_groups):
|
590
|
+
if len(data.getvalue()) - data.tell() < 12:
|
591
|
+
error_msg = (
|
592
|
+
f"Insufficient data for group {group_idx} in cmap format 8"
|
593
|
+
)
|
594
|
+
self._logger(error_msg)
|
595
|
+
raise TTFParsingError(error_msg)
|
596
|
+
|
597
|
+
start_char_code, end_char_code, start_glyph_id = struct.unpack(
|
598
|
+
">III", data.read(12)
|
599
|
+
)
|
600
|
+
|
601
|
+
# Validate group
|
602
|
+
if start_char_code > end_char_code:
|
603
|
+
continue # Skip invalid group
|
604
|
+
|
605
|
+
# Map characters in this group
|
606
|
+
for char_code in range(start_char_code, end_char_code + 1):
|
607
|
+
try:
|
608
|
+
if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
|
609
|
+
glyph_id = start_glyph_id + (char_code - start_char_code)
|
610
|
+
self._character_map[chr(char_code)] = glyph_id
|
611
|
+
except ValueError:
|
612
|
+
# Invalid Unicode character, skip
|
613
|
+
continue
|
614
|
+
|
615
|
+
return True
|
616
|
+
except struct.error as e:
|
617
|
+
error_msg = f"Struct unpacking error in cmap format 8: {e}"
|
618
|
+
self._logger(error_msg)
|
619
|
+
raise TTFParsingError(error_msg) from e
|
620
|
+
except Exception as e:
|
621
|
+
error_msg = f"Error parsing cmap format 8: {e}"
|
622
|
+
self._logger(error_msg)
|
623
|
+
raise TTFParsingError(error_msg) from e
|
466
624
|
|
467
625
|
def _parse_cmap_format_10(self, data):
|
468
|
-
|
626
|
+
"""
|
627
|
+
Format 10: trimmed table
|
628
|
+
Similar to format 6 but uses 32-bit character codes and glyph IDs
|
629
|
+
"""
|
630
|
+
try:
|
631
|
+
# Read header (reserved, length, language, startCharCode, numChars)
|
632
|
+
reserved, length, language, start_char_code, num_chars = struct.unpack(
|
633
|
+
">HIIII", data.read(18)
|
634
|
+
)
|
635
|
+
|
636
|
+
# Validate parameters
|
637
|
+
if num_chars == 0:
|
638
|
+
return True # Empty table is valid
|
639
|
+
|
640
|
+
if start_char_code > 0x10FFFF:
|
641
|
+
error_msg = (
|
642
|
+
f"Invalid start character code in cmap format 10: {start_char_code}"
|
643
|
+
)
|
644
|
+
self._logger(error_msg)
|
645
|
+
raise TTFParsingError(error_msg)
|
646
|
+
|
647
|
+
# Check we have enough data for the glyph array
|
648
|
+
expected_data_size = num_chars * 2 # 2 bytes per glyph ID
|
649
|
+
if len(data.getvalue()) - data.tell() < expected_data_size:
|
650
|
+
error_msg = f"Insufficient data for glyph array in cmap format 10: expected {expected_data_size} bytes"
|
651
|
+
self._logger(error_msg)
|
652
|
+
raise TTFParsingError(error_msg)
|
653
|
+
|
654
|
+
# Read glyph IDs
|
655
|
+
glyph_ids = struct.unpack(f">{num_chars}H", data.read(expected_data_size))
|
656
|
+
|
657
|
+
# Map characters to glyphs
|
658
|
+
for i, glyph_id in enumerate(glyph_ids):
|
659
|
+
char_code = start_char_code + i
|
660
|
+
try:
|
661
|
+
if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
|
662
|
+
self._character_map[chr(char_code)] = glyph_id
|
663
|
+
except ValueError:
|
664
|
+
# Invalid Unicode character, skip
|
665
|
+
continue
|
666
|
+
|
667
|
+
return True
|
668
|
+
except struct.error as e:
|
669
|
+
error_msg = f"Struct unpacking error in cmap format 10: {e}"
|
670
|
+
self._logger(error_msg)
|
671
|
+
raise TTFParsingError(error_msg) from e
|
672
|
+
except Exception as e:
|
673
|
+
error_msg = f"Error parsing cmap format 10: {e}"
|
674
|
+
self._logger(error_msg)
|
675
|
+
raise TTFParsingError(error_msg) from e
|
469
676
|
|
470
677
|
def _parse_cmap_format_12(self, data):
|
471
678
|
(
|
@@ -474,13 +681,19 @@ class TrueTypeFont:
|
|
474
681
|
language,
|
475
682
|
n_groups,
|
476
683
|
) = struct.unpack(">HIII", data.read(14))
|
477
|
-
for
|
684
|
+
for _ in range(n_groups):
|
478
685
|
(start_char_code, end_char_code, start_glyph_code) = struct.unpack(
|
479
686
|
">III", data.read(12)
|
480
687
|
)
|
481
688
|
|
482
|
-
for
|
483
|
-
|
689
|
+
for char_code in range(start_char_code, end_char_code + 1):
|
690
|
+
try:
|
691
|
+
if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
|
692
|
+
glyph_index = start_glyph_code + (char_code - start_char_code)
|
693
|
+
self._character_map[chr(char_code)] = glyph_index
|
694
|
+
except ValueError:
|
695
|
+
# Invalid Unicode character, skip
|
696
|
+
continue
|
484
697
|
return True
|
485
698
|
|
486
699
|
def _parse_cmap_format_13(self, data):
|
@@ -490,17 +703,174 @@ class TrueTypeFont:
|
|
490
703
|
language,
|
491
704
|
n_groups,
|
492
705
|
) = struct.unpack(">HIII", data.read(14))
|
493
|
-
for
|
706
|
+
for _ in range(n_groups):
|
494
707
|
(start_char_code, end_char_code, glyph_code) = struct.unpack(
|
495
708
|
">III", data.read(12)
|
496
709
|
)
|
497
710
|
|
498
|
-
for
|
499
|
-
|
711
|
+
for char_code in range(start_char_code, end_char_code + 1):
|
712
|
+
try:
|
713
|
+
if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
|
714
|
+
self._character_map[chr(char_code)] = glyph_code
|
715
|
+
except ValueError:
|
716
|
+
# Invalid Unicode character, skip
|
717
|
+
continue
|
500
718
|
return True
|
501
719
|
|
502
720
|
def _parse_cmap_format_14(self, data):
|
503
|
-
|
721
|
+
"""
|
722
|
+
Format 14: Unicode variation sequences
|
723
|
+
Maps variation selector sequences to glyphs
|
724
|
+
This format handles Unicode Variation Sequences (UVS) where a base character
|
725
|
+
combined with a variation selector can map to a specific glyph variant.
|
726
|
+
|
727
|
+
Performance optimized version to handle large ranges efficiently.
|
728
|
+
"""
|
729
|
+
try:
|
730
|
+
# Store current position to calculate relative offsets
|
731
|
+
subtable_start = (
|
732
|
+
data.tell() - 6
|
733
|
+
) # Subtract 6 for format and length already read
|
734
|
+
|
735
|
+
# Read header
|
736
|
+
length, num_var_selector_records = struct.unpack(">IH", data.read(6))
|
737
|
+
|
738
|
+
# Limit processing to avoid infinite loops on malformed fonts
|
739
|
+
MAX_VAR_SELECTOR_RECORDS = 100
|
740
|
+
MAX_UNICODE_RANGES = 1000
|
741
|
+
MAX_UVS_MAPPINGS = 10000
|
742
|
+
MAX_RANGE_SIZE = 10000 # Limit individual range processing
|
743
|
+
|
744
|
+
if num_var_selector_records > MAX_VAR_SELECTOR_RECORDS:
|
745
|
+
warning_msg = f"Warning: Too many variation selector records ({num_var_selector_records}), limiting to {MAX_VAR_SELECTOR_RECORDS}"
|
746
|
+
self._logger(warning_msg)
|
747
|
+
num_var_selector_records = MAX_VAR_SELECTOR_RECORDS
|
748
|
+
|
749
|
+
# Each variation selector record is 11 bytes
|
750
|
+
for record_idx in range(num_var_selector_records):
|
751
|
+
if len(data.getvalue()) - data.tell() < 11:
|
752
|
+
error_msg = (
|
753
|
+
f"Insufficient data for variation selector record {record_idx}"
|
754
|
+
)
|
755
|
+
self._logger(error_msg)
|
756
|
+
break # Skip remaining records instead of crashing
|
757
|
+
|
758
|
+
# Read variation selector record (24-bit variation selector + 2 offsets)
|
759
|
+
vs_bytes = data.read(3)
|
760
|
+
variation_selector = struct.unpack(">I", vs_bytes + b"\x00")[
|
761
|
+
0
|
762
|
+
] # Convert 24-bit to 32-bit
|
763
|
+
default_uvs_offset, non_default_uvs_offset = struct.unpack(
|
764
|
+
">II", data.read(8)
|
765
|
+
)
|
766
|
+
|
767
|
+
# Save current position to return to after processing tables
|
768
|
+
current_pos = data.tell()
|
769
|
+
|
770
|
+
# Process Default UVS Table (if present) - OPTIMIZED
|
771
|
+
if default_uvs_offset != 0:
|
772
|
+
try:
|
773
|
+
# Seek to default UVS table (offset is from start of cmap subtable)
|
774
|
+
data.seek(subtable_start + default_uvs_offset)
|
775
|
+
|
776
|
+
# Read number of Unicode ranges
|
777
|
+
num_unicode_ranges = struct.unpack(">I", data.read(4))[0]
|
778
|
+
|
779
|
+
if num_unicode_ranges > MAX_UNICODE_RANGES:
|
780
|
+
warning_msg = f"Warning: Too many Unicode ranges ({num_unicode_ranges}), limiting to {MAX_UNICODE_RANGES}"
|
781
|
+
self._logger(warning_msg)
|
782
|
+
num_unicode_ranges = MAX_UNICODE_RANGES
|
783
|
+
|
784
|
+
# Process each Unicode range - WITH LIMITS
|
785
|
+
for _ in range(num_unicode_ranges):
|
786
|
+
if len(data.getvalue()) - data.tell() < 4:
|
787
|
+
break # Not enough data for this range
|
788
|
+
|
789
|
+
# Each range is 4 bytes: 3-byte start code + 1-byte additional count
|
790
|
+
range_data = data.read(4)
|
791
|
+
start_unicode_value = struct.unpack(
|
792
|
+
">I", range_data[:3] + b"\x00"
|
793
|
+
)[0]
|
794
|
+
additional_count = range_data[3]
|
795
|
+
|
796
|
+
# Limit range size to prevent infinite loops
|
797
|
+
if additional_count > MAX_RANGE_SIZE:
|
798
|
+
warning_msg = f"Warning: Large range size ({additional_count}), limiting to {MAX_RANGE_SIZE}"
|
799
|
+
self._logger(warning_msg)
|
800
|
+
additional_count = MAX_RANGE_SIZE
|
801
|
+
|
802
|
+
# Pre-build character map for efficient lookup
|
803
|
+
char_map_keys = set(
|
804
|
+
ord(c) for c in self._character_map.keys()
|
805
|
+
)
|
806
|
+
|
807
|
+
# Map all characters in this range - OPTIMIZED
|
808
|
+
for offset in range(additional_count + 1):
|
809
|
+
base_char = start_unicode_value + offset
|
810
|
+
if (
|
811
|
+
0 <= base_char <= 0x10FFFF
|
812
|
+
and base_char in char_map_keys
|
813
|
+
):
|
814
|
+
try:
|
815
|
+
# For default UVS, use the default glyph mapping
|
816
|
+
base_char_obj = chr(base_char)
|
817
|
+
# Store variation sequence mapping
|
818
|
+
vs_key = (base_char, variation_selector)
|
819
|
+
self._variation_sequences[
|
820
|
+
vs_key
|
821
|
+
] = self._character_map[base_char_obj]
|
822
|
+
except (ValueError, KeyError):
|
823
|
+
continue
|
824
|
+
except (struct.error, IndexError) as e:
|
825
|
+
error_msg = f"Error processing default UVS table: {e}"
|
826
|
+
self._logger(error_msg)
|
827
|
+
|
828
|
+
# Process Non-Default UVS Table (if present) - OPTIMIZED
|
829
|
+
if non_default_uvs_offset != 0:
|
830
|
+
try:
|
831
|
+
# Seek to non-default UVS table
|
832
|
+
data.seek(subtable_start + non_default_uvs_offset)
|
833
|
+
|
834
|
+
# Read number of UVS mappings
|
835
|
+
num_uvs_mappings = struct.unpack(">I", data.read(4))[0]
|
836
|
+
|
837
|
+
if num_uvs_mappings > MAX_UVS_MAPPINGS:
|
838
|
+
warning_msg = f"Warning: Too many UVS mappings ({num_uvs_mappings}), limiting to {MAX_UVS_MAPPINGS}"
|
839
|
+
self._logger(warning_msg)
|
840
|
+
num_uvs_mappings = MAX_UVS_MAPPINGS
|
841
|
+
|
842
|
+
# Process each UVS mapping
|
843
|
+
for _ in range(num_uvs_mappings):
|
844
|
+
if len(data.getvalue()) - data.tell() < 5:
|
845
|
+
break # Not enough data for this mapping
|
846
|
+
|
847
|
+
# Each mapping is 5 bytes: 3-byte Unicode value + 2-byte glyph ID
|
848
|
+
mapping_data = data.read(5)
|
849
|
+
unicode_value = struct.unpack(
|
850
|
+
">I", mapping_data[:3] + b"\x00"
|
851
|
+
)[0]
|
852
|
+
glyph_id = struct.unpack(">H", mapping_data[3:5])[0]
|
853
|
+
|
854
|
+
if 0 <= unicode_value <= 0x10FFFF:
|
855
|
+
# Store non-default variation sequence mapping
|
856
|
+
vs_key = (unicode_value, variation_selector)
|
857
|
+
self._variation_sequences[vs_key] = glyph_id
|
858
|
+
except (struct.error, IndexError) as e:
|
859
|
+
error_msg = f"Error processing non-default UVS table: {e}"
|
860
|
+
self._logger(error_msg)
|
861
|
+
|
862
|
+
# Return to position after variation selector record
|
863
|
+
data.seek(current_pos)
|
864
|
+
|
865
|
+
return True
|
866
|
+
except struct.error as e:
|
867
|
+
error_msg = f"Struct unpacking error in cmap format 14: {e}"
|
868
|
+
self._logger(error_msg)
|
869
|
+
return False # Don't crash, just return False
|
870
|
+
except Exception as e:
|
871
|
+
error_msg = f"Error parsing cmap format 14: {e}"
|
872
|
+
self._logger(error_msg)
|
873
|
+
return False # Don't crash, just return False
|
504
874
|
|
505
875
|
def parse_hhea(self):
|
506
876
|
data = self._raw_tables[b"hhea"]
|
@@ -527,18 +897,31 @@ class TrueTypeFont:
|
|
527
897
|
def parse_hmtx(self):
|
528
898
|
data = self._raw_tables[b"hmtx"]
|
529
899
|
count = self.number_of_long_hor_metrics
|
900
|
+
|
901
|
+
# Check if we have enough data for the long horizontal metrics
|
902
|
+
if len(data) < count * 4:
|
903
|
+
error_msg = f"Insufficient data in hmtx table: expected {count * 4} bytes, got {len(data)}"
|
904
|
+
self._logger(error_msg)
|
905
|
+
raise TTFParsingError(error_msg)
|
906
|
+
|
530
907
|
hm = struct.unpack(f">{'Hh' * count}", data[: count * 4])
|
531
908
|
self.horizontal_metrics = [
|
532
909
|
(hm[2 * i], hm[2 * i + 1]) for i in range(len(hm) // 2)
|
533
910
|
]
|
534
|
-
|
911
|
+
|
912
|
+
# Handle additional left side bearings for remaining glyphs
|
913
|
+
last_advance = hm[-2] if hm else 0
|
535
914
|
table_start = count * 4
|
536
915
|
if len(data) > table_start:
|
537
916
|
remaining = (len(data) - table_start) // 2
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
917
|
+
if remaining > 0:
|
918
|
+
left_bearings = struct.unpack(
|
919
|
+
f">{remaining}h", data[table_start : table_start + remaining * 2]
|
920
|
+
)
|
921
|
+
# Extend with tuples of (last_advance, left_bearing)
|
922
|
+
self.horizontal_metrics.extend(
|
923
|
+
[(last_advance, lb) for lb in left_bearings]
|
924
|
+
)
|
542
925
|
|
543
926
|
def parse_loca(self):
|
544
927
|
try:
|
@@ -547,10 +930,10 @@ class TrueTypeFont:
|
|
547
930
|
self._glyph_offsets = []
|
548
931
|
return
|
549
932
|
if self.index_to_loc_format == 0:
|
550
|
-
n =
|
933
|
+
n = len(data) // 2
|
551
934
|
self._glyph_offsets = [g * 2 for g in struct.unpack(f">{n}H", data)]
|
552
935
|
else:
|
553
|
-
n =
|
936
|
+
n = len(data) // 4
|
554
937
|
self._glyph_offsets = struct.unpack(f">{n}I", data)
|
555
938
|
|
556
939
|
def parse_glyf(self):
|
@@ -562,7 +945,7 @@ class TrueTypeFont:
|
|
562
945
|
start = self._glyph_offsets[index]
|
563
946
|
end = self._glyph_offsets[index + 1]
|
564
947
|
if start == end:
|
565
|
-
yield
|
948
|
+
yield []
|
566
949
|
return
|
567
950
|
yield from self._parse_glyph(BytesIO(data[start:end]))
|
568
951
|
|
@@ -576,112 +959,245 @@ class TrueTypeFont:
|
|
576
959
|
yield from self._parse_compound_glyph(data)
|
577
960
|
|
578
961
|
def _parse_compound_glyph(self, data):
|
962
|
+
"""
|
963
|
+
Parses a compound glyph, which can consist of multiple components.
|
964
|
+
Each component can have its own transformation matrix applied to it.
|
965
|
+
The transformation matrix can include scaling, translation, and rotation.
|
966
|
+
The flags indicate how the arguments are interpreted, whether they are
|
967
|
+
absolute coordinates or relative offsets, and whether the glyph is
|
968
|
+
transformed by a scale, x and y scale, or a two-by-two matrix.
|
969
|
+
The glyphs are returned as a list of contours, where each contour is a
|
970
|
+
list of points. Each point is a tuple of (x, y, flag), where
|
971
|
+
x and y are the coordinates of the point, and flag indicates whether
|
972
|
+
the point is an on-curve point or a control point.
|
973
|
+
|
974
|
+
The flags used in the compound glyphs are defined as follows:
|
975
|
+
- ON_CURVE_POINT: Indicates that the point is an on-curve point.
|
976
|
+
- ARG_1_AND_2_ARE_WORDS: Indicates that the first two arguments are
|
977
|
+
16-bit signed integers instead of 8-bit unsigned integers.
|
978
|
+
- ARGS_ARE_XY_VALUES: Indicates that the arguments are interpreted as
|
979
|
+
x and y coordinates instead of relative offsets.
|
980
|
+
- ROUND_XY_TO_GRID: Indicates that the x and y coordinates should be
|
981
|
+
rounded to the nearest grid point.
|
982
|
+
- WE_HAVE_A_SCALE: Indicates that the glyph is transformed by a single
|
983
|
+
scale factor applied to both x and y coordinates.
|
984
|
+
- MORE_COMPONENTS: Indicates that there are more components in the
|
985
|
+
compound glyph. This flag is used to indicate that the glyph has
|
986
|
+
additional components that need to be processed.
|
987
|
+
- WE_HAVE_AN_X_AND_Y_SCALE: Indicates that the glyph is transformed by
|
988
|
+
separate scale factors for x and y coordinates.
|
989
|
+
- WE_HAVE_A_TWO_BY_TWO: Indicates that the glyph is transformed by a
|
990
|
+
two-by-two matrix, which allows for more complex transformations
|
991
|
+
including rotation and shearing.
|
992
|
+
- WE_HAVE_INSTRUCTIONS: Indicates that the glyph has instructions that
|
993
|
+
modify the rendering of the glyph. These instructions can include
|
994
|
+
additional transformations or adjustments to the glyph's shape.
|
995
|
+
- USE_MY_METRICS: Indicates that the glyph should use its own metrics
|
996
|
+
instead of the metrics defined in the font's horizontal metrics table.
|
997
|
+
- OVERLAP_COMPOUND: Indicates that the components of the compound glyph
|
998
|
+
may overlap. This flag is used to indicate that the components of the
|
999
|
+
compound glyph may overlap, which can affect how the glyph is rendered.
|
1000
|
+
|
1001
|
+
"""
|
579
1002
|
flags = MORE_COMPONENTS
|
580
|
-
|
1003
|
+
scale_factor = 1 << 14 # Fixed point scale factor (16384)
|
1004
|
+
|
1005
|
+
# Collect all contours from all components
|
1006
|
+
all_contours = []
|
1007
|
+
|
581
1008
|
while flags & MORE_COMPONENTS:
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
dest, src = -1, -1
|
1009
|
+
# Initialize transformation matrix as identity
|
1010
|
+
# Matrix format: [xx, xy, yx, yy, dx, dy]
|
1011
|
+
# Represents: [x'] = [xx xy] [x] + [dx]
|
1012
|
+
# [y'] [yx yy] [y] [dy]
|
1013
|
+
transform_xx, transform_xy, transform_yx, transform_yy = 1.0, 0.0, 0.0, 1.0
|
1014
|
+
transform_dx, transform_dy = 0.0, 0.0
|
1015
|
+
|
1016
|
+
# Read component header
|
591
1017
|
flags, glyph_index = struct.unpack(">HH", data.read(4))
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
e, f = args1 / s, args2 / s
|
1018
|
+
|
1019
|
+
# Read arguments (either offsets or point indices)
|
1020
|
+
if flags & ARG_1_AND_2_ARE_WORDS:
|
1021
|
+
# 16-bit arguments
|
1022
|
+
arg1, arg2 = struct.unpack(">hh", data.read(4))
|
598
1023
|
else:
|
599
|
-
|
600
|
-
|
1024
|
+
# 8-bit arguments
|
1025
|
+
if flags & ARGS_ARE_XY_VALUES:
|
1026
|
+
# Signed bytes for offsets
|
1027
|
+
arg1, arg2 = struct.unpack(">bb", data.read(2))
|
601
1028
|
else:
|
602
|
-
|
603
|
-
|
1029
|
+
# Unsigned bytes for point indices
|
1030
|
+
arg1, arg2 = struct.unpack(">BB", data.read(2))
|
1031
|
+
|
1032
|
+
# Interpret arguments
|
1033
|
+
if flags & ARGS_ARE_XY_VALUES:
|
1034
|
+
# Arguments are x,y offsets
|
1035
|
+
transform_dx, transform_dy = float(arg1), float(arg2)
|
1036
|
+
else:
|
1037
|
+
# Arguments are point indices for point matching
|
1038
|
+
# Point matching not fully implemented - would need to find
|
1039
|
+
# matching points in already processed contours and source glyph
|
1040
|
+
transform_dx, transform_dy = 0.0, 0.0
|
1041
|
+
|
1042
|
+
# Read transformation matrix components
|
604
1043
|
if flags & WE_HAVE_A_SCALE:
|
605
|
-
|
606
|
-
|
1044
|
+
# Single scale factor for both x and y
|
1045
|
+
scale = struct.unpack(">h", data.read(2))[0] / scale_factor
|
1046
|
+
transform_xx = transform_yy = scale
|
607
1047
|
elif flags & WE_HAVE_AN_X_AND_Y_SCALE:
|
608
|
-
|
609
|
-
|
1048
|
+
# Separate scale factors for x and y
|
1049
|
+
scale_x, scale_y = struct.unpack(">hh", data.read(4))
|
1050
|
+
transform_xx = scale_x / scale_factor
|
1051
|
+
transform_yy = scale_y / scale_factor
|
610
1052
|
elif flags & WE_HAVE_A_TWO_BY_TWO:
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
for
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
)
|
641
|
-
for x, y, flag in contour
|
642
|
-
]
|
643
|
-
data.seek(original)
|
1053
|
+
# Full 2x2 transformation matrix
|
1054
|
+
xx, xy, yx, yy = struct.unpack(">hhhh", data.read(8))
|
1055
|
+
transform_xx = xx / scale_factor
|
1056
|
+
transform_xy = xy / scale_factor
|
1057
|
+
transform_yx = yx / scale_factor
|
1058
|
+
transform_yy = yy / scale_factor
|
1059
|
+
|
1060
|
+
# Get the component glyph's contours
|
1061
|
+
component_contours = list(self._parse_glyph_index(glyph_index))
|
1062
|
+
|
1063
|
+
# Apply transformation to each contour
|
1064
|
+
for contour in component_contours:
|
1065
|
+
transformed_contour = []
|
1066
|
+
for x, y, flag in contour:
|
1067
|
+
# Apply 2D transformation matrix
|
1068
|
+
new_x = transform_xx * x + transform_xy * y + transform_dx
|
1069
|
+
new_y = transform_yx * x + transform_yy * y + transform_dy
|
1070
|
+
|
1071
|
+
# Round to grid if requested
|
1072
|
+
if flags & ROUND_XY_TO_GRID:
|
1073
|
+
new_x = round(new_x)
|
1074
|
+
new_y = round(new_y)
|
1075
|
+
|
1076
|
+
transformed_contour.append((new_x, new_y, flag))
|
1077
|
+
|
1078
|
+
# Add transformed contour to our collection
|
1079
|
+
all_contours.append(transformed_contour)
|
1080
|
+
# Yield all collected contours
|
1081
|
+
yield from all_contours
|
644
1082
|
|
645
1083
|
def _parse_simple_glyph(self, num_contours, data):
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
1084
|
+
try:
|
1085
|
+
# Check we have enough data for contour endpoints
|
1086
|
+
if len(data.getvalue()) - data.tell() < num_contours * 2:
|
1087
|
+
error_msg = "Insufficient data for contour endpoints"
|
1088
|
+
self._logger(error_msg)
|
1089
|
+
raise TTFParsingError(error_msg)
|
1090
|
+
|
1091
|
+
end_pts = struct.unpack(f">{num_contours}H", data.read(2 * num_contours))
|
1092
|
+
|
1093
|
+
# Check we have enough data for instruction length
|
1094
|
+
if len(data.getvalue()) - data.tell() < 2:
|
1095
|
+
error_msg = "Insufficient data for instruction length"
|
1096
|
+
self._logger(error_msg)
|
1097
|
+
raise TTFParsingError(error_msg)
|
1098
|
+
|
1099
|
+
inst_len = struct.unpack(">H", data.read(2))[0]
|
1100
|
+
|
1101
|
+
# Check we have enough data for instructions
|
1102
|
+
if len(data.getvalue()) - data.tell() < inst_len:
|
1103
|
+
error_msg = "Insufficient data for instructions"
|
1104
|
+
self._logger(error_msg)
|
1105
|
+
raise TTFParsingError(error_msg)
|
1106
|
+
|
1107
|
+
_ = data.read(inst_len) # Read instructions but don't store unused variable
|
1108
|
+
|
1109
|
+
if not end_pts:
|
1110
|
+
return
|
1111
|
+
|
1112
|
+
num_points = max(end_pts) + 1
|
1113
|
+
if num_points <= 0:
|
1114
|
+
return
|
1115
|
+
|
1116
|
+
# Read flags with bounds checking
|
1117
|
+
flags = []
|
1118
|
+
while len(flags) < num_points:
|
1119
|
+
if len(data.getvalue()) - data.tell() < 1:
|
1120
|
+
error_msg = "Insufficient data for flags"
|
1121
|
+
self._logger(error_msg)
|
1122
|
+
raise TTFParsingError(error_msg)
|
1123
|
+
|
1124
|
+
flag = ord(data.read(1))
|
1125
|
+
flags.append(flag)
|
1126
|
+
if flag & 0x8: # Repeat flag
|
1127
|
+
if len(data.getvalue()) - data.tell() < 1:
|
1128
|
+
error_msg = "Insufficient data for repeat count"
|
1129
|
+
self._logger(error_msg)
|
1130
|
+
raise TTFParsingError(error_msg)
|
1131
|
+
repeat_count = ord(data.read(1))
|
1132
|
+
flags.extend([flag] * repeat_count)
|
1133
|
+
|
1134
|
+
# Truncate flags if we read too many
|
1135
|
+
flags = flags[:num_points]
|
1136
|
+
|
1137
|
+
x_coords = list(self._read_coords(num_points, 0x2, 0x10, flags, data))
|
1138
|
+
y_coords = list(self._read_coords(num_points, 0x4, 0x20, flags, data))
|
1139
|
+
|
1140
|
+
start = 0
|
1141
|
+
for end in end_pts:
|
1142
|
+
if end >= num_points:
|
1143
|
+
error_msg = f"Invalid contour endpoint: {end} >= {num_points}"
|
1144
|
+
self._logger(error_msg)
|
1145
|
+
raise TTFParsingError(error_msg)
|
1146
|
+
yield list(
|
1147
|
+
zip(
|
1148
|
+
x_coords[start : end + 1],
|
1149
|
+
y_coords[start : end + 1],
|
1150
|
+
flags[start : end + 1],
|
1151
|
+
)
|
666
1152
|
)
|
667
|
-
|
668
|
-
|
1153
|
+
start = end + 1
|
1154
|
+
except struct.error as e:
|
1155
|
+
error_msg = f"Struct unpacking error in simple glyph: {e}"
|
1156
|
+
self._logger(error_msg)
|
1157
|
+
raise TTFParsingError(error_msg) from e
|
1158
|
+
except (IndexError, ValueError) as e:
|
1159
|
+
error_msg = f"Error parsing simple glyph: {e}"
|
1160
|
+
self._logger(error_msg)
|
1161
|
+
raise TTFParsingError(error_msg) from e
|
669
1162
|
|
670
1163
|
def _read_coords(self, num_points, bit_byte, bit_delta, flags, data):
|
671
1164
|
value = 0
|
672
1165
|
for i in range(num_points):
|
1166
|
+
if i >= len(flags):
|
1167
|
+
error_msg = f"Flag index {i} out of range (flags length: {len(flags)})"
|
1168
|
+
self._logger(error_msg)
|
1169
|
+
raise TTFParsingError(error_msg)
|
1170
|
+
|
673
1171
|
flag = flags[i]
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
1172
|
+
try:
|
1173
|
+
if flag & bit_byte:
|
1174
|
+
# Single byte coordinate
|
1175
|
+
if len(data.getvalue()) - data.tell() < 1:
|
1176
|
+
error_msg = "Insufficient data for single byte coordinate"
|
1177
|
+
self._logger(error_msg)
|
1178
|
+
raise TTFParsingError(
|
1179
|
+
"Insufficient data for single byte coordinate"
|
1180
|
+
)
|
1181
|
+
x = struct.unpack("B", data.read(1))[0]
|
1182
|
+
if flag & bit_delta:
|
1183
|
+
value += x
|
1184
|
+
else:
|
1185
|
+
value -= x
|
1186
|
+
elif ~flag & bit_delta:
|
1187
|
+
# Two byte coordinate
|
1188
|
+
if len(data.getvalue()) - data.tell() < 2:
|
1189
|
+
error_msg = "Insufficient data for two byte coordinate"
|
1190
|
+
self._logger(error_msg)
|
1191
|
+
raise TTFParsingError(
|
1192
|
+
"Insufficient data for two byte coordinate"
|
1193
|
+
)
|
1194
|
+
value += struct.unpack(">h", data.read(2))[0]
|
1195
|
+
# Coordinate unchanged from previous
|
1196
|
+
yield value
|
1197
|
+
except struct.error as e:
|
1198
|
+
error_msg = f"Struct unpacking error in coordinates: {e}"
|
1199
|
+
self._logger(error_msg)
|
1200
|
+
raise TTFParsingError(error_msg) from e
|
685
1201
|
|
686
1202
|
def parse_name(self):
|
687
1203
|
def decode(string):
|
@@ -715,12 +1231,249 @@ class TrueTypeFont:
|
|
715
1231
|
length,
|
716
1232
|
str_offset,
|
717
1233
|
) in records:
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
1234
|
+
try:
|
1235
|
+
if name_id == 1:
|
1236
|
+
self.font_family = decode(strings[str_offset : str_offset + length])
|
1237
|
+
elif name_id == 2:
|
1238
|
+
self.font_subfamily = decode(
|
1239
|
+
strings[str_offset : str_offset + length]
|
1240
|
+
)
|
1241
|
+
elif name_id == 3:
|
1242
|
+
# Unique Subfamily Name
|
1243
|
+
pass
|
1244
|
+
elif name_id == 4:
|
1245
|
+
self.font_name = decode(strings[str_offset : str_offset + length])
|
1246
|
+
except (IndexError, UnicodeDecodeError) as e:
|
1247
|
+
# Log error but continue parsing other name records
|
1248
|
+
warning_msg = f"Warning: Error decoding name record {name_id}: {e}"
|
1249
|
+
self._logger(warning_msg)
|
1250
|
+
continue
|
1251
|
+
|
1252
|
+
def get_variation_sequences(self):
|
1253
|
+
"""
|
1254
|
+
Get Unicode variation sequences mapping.
|
1255
|
+
|
1256
|
+
Returns:
|
1257
|
+
dict: Dictionary mapping (base_char, variation_selector) tuples to glyph IDs.
|
1258
|
+
For example: {(0x4E00, 0xFE00): 1234} means base character U+4E00
|
1259
|
+
with variation selector U+FE00 maps to glyph ID 1234.
|
1260
|
+
"""
|
1261
|
+
return getattr(self, "_variation_sequences", {})
|
1262
|
+
|
1263
|
+
def has_variation_sequences(self):
|
1264
|
+
"""
|
1265
|
+
Check if this font contains Unicode variation sequences (cmap format 14).
|
1266
|
+
|
1267
|
+
Returns:
|
1268
|
+
bool: True if the font has variation sequence mappings, False otherwise.
|
1269
|
+
"""
|
1270
|
+
return bool(getattr(self, "_variation_sequences", {}))
|
1271
|
+
|
1272
|
+
def get_glyph_index(self, char, variation_selector=None):
|
1273
|
+
"""
|
1274
|
+
Get the glyph index for a character, optionally with a variation selector.
|
1275
|
+
|
1276
|
+
Args:
|
1277
|
+
char (str): The base character
|
1278
|
+
variation_selector (int, optional): Unicode variation selector code point (e.g., 0xFE00-0xFE0F)
|
1279
|
+
|
1280
|
+
Returns:
|
1281
|
+
int: Glyph index, or 0 if not found
|
1282
|
+
"""
|
1283
|
+
if variation_selector is not None:
|
1284
|
+
# Try to find variation sequence first
|
1285
|
+
char_code = ord(char) if isinstance(char, str) else char
|
1286
|
+
vs_key = (char_code, variation_selector)
|
1287
|
+
if vs_key in self._variation_sequences:
|
1288
|
+
return self._variation_sequences[vs_key]
|
1289
|
+
|
1290
|
+
# Fall back to regular character mapping
|
1291
|
+
if isinstance(char, str):
|
1292
|
+
return self._character_map.get(char, 0)
|
1293
|
+
|
1294
|
+
# Handle numeric character codes
|
1295
|
+
try:
|
1296
|
+
return self._character_map.get(chr(char), 0)
|
1297
|
+
except ValueError:
|
1298
|
+
return 0
|
1299
|
+
|
1300
|
+
def has_variation_selector(self, char, variation_selector):
|
1301
|
+
"""
|
1302
|
+
Check if a character has a specific variation selector mapping.
|
1303
|
+
|
1304
|
+
Args:
|
1305
|
+
char (str or int): The base character (string) or character code (int)
|
1306
|
+
variation_selector (int): Unicode variation selector code point
|
1307
|
+
|
1308
|
+
Returns:
|
1309
|
+
bool: True if the variation sequence exists, False otherwise
|
1310
|
+
"""
|
1311
|
+
char_code = ord(char) if isinstance(char, str) else char
|
1312
|
+
vs_key = (char_code, variation_selector)
|
1313
|
+
return vs_key in self._variation_sequences
|
1314
|
+
|
1315
|
+
def get_available_variation_selectors(self, char):
|
1316
|
+
"""
|
1317
|
+
Get all variation selectors available for a given character.
|
1318
|
+
|
1319
|
+
Args:
|
1320
|
+
char (str or int): The base character (string) or character code (int)
|
1321
|
+
|
1322
|
+
Returns:
|
1323
|
+
list: List of variation selector code points available for this character
|
1324
|
+
"""
|
1325
|
+
char_code = ord(char) if isinstance(char, str) else char
|
1326
|
+
return [
|
1327
|
+
vs
|
1328
|
+
for (base_char, vs) in self._variation_sequences.keys()
|
1329
|
+
if base_char == char_code
|
1330
|
+
]
|
1331
|
+
|
1332
|
+
def lookup_glyph_with_variation(self, base_char, variation_selector=None):
|
1333
|
+
"""
|
1334
|
+
Look up a glyph ID for a character, optionally with a variation selector.
|
1335
|
+
|
1336
|
+
Args:
|
1337
|
+
base_char (str or int): The base character (string) or Unicode code point (int)
|
1338
|
+
variation_selector (int, optional): Unicode code point of variation selector
|
1339
|
+
|
1340
|
+
Returns:
|
1341
|
+
int: Glyph ID for the character/variation sequence, or 0 if not found
|
1342
|
+
"""
|
1343
|
+
# Convert base_char to Unicode code point if it's a string
|
1344
|
+
base_char_code = ord(base_char) if isinstance(base_char, str) else base_char
|
1345
|
+
|
1346
|
+
if variation_selector is not None:
|
1347
|
+
# Check for variation sequence first
|
1348
|
+
vs_key = (base_char_code, variation_selector)
|
1349
|
+
if vs_key in self._variation_sequences:
|
1350
|
+
return self._variation_sequences[vs_key]
|
1351
|
+
|
1352
|
+
# Fall back to regular character map - convert code point back to character for lookup
|
1353
|
+
try:
|
1354
|
+
base_char_str = chr(base_char_code)
|
1355
|
+
return self._character_map.get(base_char_str, 0)
|
1356
|
+
except (ValueError, OverflowError):
|
1357
|
+
# Invalid Unicode code point
|
1358
|
+
return 0
|
1359
|
+
|
1360
|
+
def parse_text_with_variation_sequences(self, text):
|
1361
|
+
"""
|
1362
|
+
Parse text and extract base characters with their variation selectors.
|
1363
|
+
|
1364
|
+
This method correctly handles Unicode code points, including surrogate pairs
|
1365
|
+
and non-BMP characters, ensuring that variation selectors are properly
|
1366
|
+
detected even for astral-plane base characters.
|
1367
|
+
|
1368
|
+
Args:
|
1369
|
+
text (str): Input text that may contain variation sequences
|
1370
|
+
|
1371
|
+
Yields:
|
1372
|
+
tuple: (base_char_code, variation_selector) where variation_selector is None
|
1373
|
+
for regular characters or the Unicode code point for variation sequences
|
1374
|
+
"""
|
1375
|
+
# Convert string to list of Unicode code points to handle surrogate pairs correctly
|
1376
|
+
code_points = []
|
1377
|
+
i = 0
|
1378
|
+
while i < len(text):
|
1379
|
+
char = text[i]
|
1380
|
+
char_code = ord(char)
|
1381
|
+
|
1382
|
+
# Check if this is the start of a surrogate pair (high surrogate)
|
1383
|
+
if 0xD800 <= char_code <= 0xDBFF and i + 1 < len(text):
|
1384
|
+
next_char = text[i + 1]
|
1385
|
+
next_char_code = ord(next_char)
|
1386
|
+
|
1387
|
+
# Check if next character is low surrogate
|
1388
|
+
if 0xDC00 <= next_char_code <= 0xDFFF:
|
1389
|
+
# Combine surrogate pair into single code point
|
1390
|
+
combined_code_point = (
|
1391
|
+
0x10000
|
1392
|
+
+ ((char_code - 0xD800) << 10)
|
1393
|
+
+ (next_char_code - 0xDC00)
|
1394
|
+
)
|
1395
|
+
code_points.append(combined_code_point)
|
1396
|
+
i += 2 # Skip both surrogate characters
|
1397
|
+
else:
|
1398
|
+
# High surrogate without low surrogate - treat as individual character
|
1399
|
+
code_points.append(char_code)
|
1400
|
+
i += 1
|
1401
|
+
else:
|
1402
|
+
# Regular BMP character or unpaired low surrogate
|
1403
|
+
code_points.append(char_code)
|
1404
|
+
i += 1
|
1405
|
+
|
1406
|
+
# Now iterate over Unicode code points
|
1407
|
+
i = 0
|
1408
|
+
while i < len(code_points):
|
1409
|
+
base_char_code = code_points[i]
|
1410
|
+
|
1411
|
+
# Check if the next code point is a variation selector
|
1412
|
+
variation_selector = None
|
1413
|
+
if i + 1 < len(code_points):
|
1414
|
+
next_code_point = code_points[i + 1]
|
1415
|
+
# Check for standardized variation selectors (U+FE00-U+FE0F)
|
1416
|
+
# or additional variation selectors (U+E0100-U+E01EF)
|
1417
|
+
if (
|
1418
|
+
0xFE00 <= next_code_point <= 0xFE0F
|
1419
|
+
or 0xE0100 <= next_code_point <= 0xE01EF
|
1420
|
+
):
|
1421
|
+
variation_selector = next_code_point
|
1422
|
+
i += 1 # Skip the variation selector in next iteration
|
1423
|
+
|
1424
|
+
yield (base_char_code, variation_selector)
|
1425
|
+
i += 1
|
1426
|
+
|
1427
|
+
def debug_variation_sequences(self):
|
1428
|
+
"""
|
1429
|
+
Debug method to print information about parsed variation sequences.
|
1430
|
+
|
1431
|
+
Returns:
|
1432
|
+
str: Debug information about variation sequences
|
1433
|
+
"""
|
1434
|
+
if not self._variation_sequences:
|
1435
|
+
return "No variation sequences found in font"
|
1436
|
+
|
1437
|
+
debug_info = [f"Found {len(self._variation_sequences)} variation sequences:"]
|
1438
|
+
for (base_char, vs), glyph_id in self._variation_sequences.items():
|
1439
|
+
try:
|
1440
|
+
base_char_str = (
|
1441
|
+
chr(base_char) if isinstance(base_char, int) else str(base_char)
|
1442
|
+
)
|
1443
|
+
vs_str = f"U+{vs:04X}" if vs else "None"
|
1444
|
+
debug_info.append(
|
1445
|
+
f" {base_char_str} (U+{base_char:04X}) + {vs_str} -> glyph {glyph_id}"
|
1446
|
+
)
|
1447
|
+
except (ValueError, TypeError):
|
1448
|
+
debug_info.append(f" {base_char} + {vs} -> glyph {glyph_id}")
|
1449
|
+
|
1450
|
+
return "\n".join(debug_info)
|
1451
|
+
|
1452
|
+
def test_variation_sequence_lookup(self, base_char, variation_selector):
|
1453
|
+
"""
|
1454
|
+
Test method to check if a specific variation sequence is supported.
|
1455
|
+
|
1456
|
+
Args:
|
1457
|
+
base_char (str): The base character
|
1458
|
+
variation_selector (int): Unicode code point of variation selector
|
1459
|
+
|
1460
|
+
Returns:
|
1461
|
+
dict: Information about the lookup result
|
1462
|
+
"""
|
1463
|
+
base_char_code = ord(base_char) if isinstance(base_char, str) else base_char
|
1464
|
+
vs_key = (base_char_code, variation_selector)
|
1465
|
+
|
1466
|
+
regular_glyph = self._character_map.get(base_char, 0)
|
1467
|
+
variation_glyph = self.lookup_glyph_with_variation(
|
1468
|
+
base_char, variation_selector
|
1469
|
+
)
|
1470
|
+
|
1471
|
+
return {
|
1472
|
+
"base_char": base_char,
|
1473
|
+
"base_char_code": f"U+{base_char_code:04X}",
|
1474
|
+
"variation_selector": f"U+{variation_selector:04X}",
|
1475
|
+
"regular_glyph_id": regular_glyph,
|
1476
|
+
"variation_glyph_id": variation_glyph,
|
1477
|
+
"has_variation": vs_key in self._variation_sequences,
|
1478
|
+
"uses_different_glyph": regular_glyph != variation_glyph,
|
1479
|
+
}
|