meerk40t 0.9.7051__py2.py3-none-any.whl → 0.9.7910__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. meerk40t/balormk/controller.py +3 -3
  2. meerk40t/balormk/device.py +7 -0
  3. meerk40t/balormk/driver.py +23 -14
  4. meerk40t/balormk/galvo_commands.py +18 -3
  5. meerk40t/balormk/gui/balorconfig.py +6 -0
  6. meerk40t/balormk/livelightjob.py +36 -14
  7. meerk40t/camera/camera.py +1 -0
  8. meerk40t/camera/gui/camerapanel.py +154 -58
  9. meerk40t/camera/plugin.py +46 -5
  10. meerk40t/core/elements/branches.py +90 -20
  11. meerk40t/core/elements/elements.py +59 -37
  12. meerk40t/core/elements/trace.py +10 -6
  13. meerk40t/core/node/node.py +2 -0
  14. meerk40t/core/plotplanner.py +7 -4
  15. meerk40t/device/gui/defaultactions.py +78 -14
  16. meerk40t/dxf/dxf_io.py +42 -0
  17. meerk40t/grbl/controller.py +245 -35
  18. meerk40t/grbl/device.py +102 -26
  19. meerk40t/grbl/driver.py +8 -2
  20. meerk40t/grbl/gui/grblconfiguration.py +6 -0
  21. meerk40t/grbl/gui/grblcontroller.py +1 -1
  22. meerk40t/gui/about.py +7 -0
  23. meerk40t/gui/choicepropertypanel.py +20 -30
  24. meerk40t/gui/devicepanel.py +27 -16
  25. meerk40t/gui/help_assets/help_assets.py +126 -2
  26. meerk40t/gui/icons.py +15 -0
  27. meerk40t/gui/laserpanel.py +102 -54
  28. meerk40t/gui/materialtest.py +10 -0
  29. meerk40t/gui/mkdebug.py +268 -9
  30. meerk40t/gui/navigationpanels.py +74 -8
  31. meerk40t/gui/propertypanels/operationpropertymain.py +185 -91
  32. meerk40t/gui/scenewidgets/elementswidget.py +7 -1
  33. meerk40t/gui/scenewidgets/selectionwidget.py +24 -9
  34. meerk40t/gui/simulation.py +1 -1
  35. meerk40t/gui/statusbarwidgets/shapepropwidget.py +50 -40
  36. meerk40t/gui/statusbarwidgets/statusbar.py +2 -2
  37. meerk40t/gui/toolwidgets/toolmeasure.py +1 -1
  38. meerk40t/gui/toolwidgets/toolnodeedit.py +4 -1
  39. meerk40t/gui/toolwidgets/tooltabedit.py +9 -7
  40. meerk40t/gui/wxmeerk40t.py +45 -15
  41. meerk40t/gui/wxmmain.py +23 -9
  42. meerk40t/gui/wxmribbon.py +36 -0
  43. meerk40t/gui/wxutils.py +66 -42
  44. meerk40t/kernel/inhibitor.py +120 -0
  45. meerk40t/kernel/kernel.py +38 -0
  46. meerk40t/lihuiyu/controller.py +33 -3
  47. meerk40t/lihuiyu/device.py +99 -4
  48. meerk40t/lihuiyu/driver.py +65 -5
  49. meerk40t/lihuiyu/gui/lhycontrollergui.py +69 -24
  50. meerk40t/lihuiyu/gui/lhydrivergui.py +6 -0
  51. meerk40t/lihuiyu/laserspeed.py +17 -10
  52. meerk40t/lihuiyu/parser.py +23 -0
  53. meerk40t/main.py +2 -2
  54. meerk40t/moshi/gui/moshidrivergui.py +7 -0
  55. meerk40t/newly/controller.py +3 -2
  56. meerk40t/newly/device.py +23 -2
  57. meerk40t/newly/driver.py +8 -3
  58. meerk40t/newly/gui/newlyconfig.py +7 -0
  59. meerk40t/ruida/gui/ruidaconfig.py +7 -0
  60. meerk40t/tools/geomstr.py +142 -49
  61. meerk40t/tools/rasterplotter.py +0 -5
  62. meerk40t/tools/ttfparser.py +921 -168
  63. {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/METADATA +1 -1
  64. {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/RECORD +69 -68
  65. {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/LICENSE +0 -0
  66. {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/WHEEL +0 -0
  67. {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/entry_points.txt +0 -0
  68. {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/top_level.txt +0 -0
  69. {meerk40t-0.9.7051.dist-info → meerk40t-0.9.7910.dist-info}/zip-safe +0 -0
@@ -14,6 +14,26 @@ WE_HAVE_INSTRUCTIONS = 1 << 8
14
14
  USE_MY_METRICS = 1 << 9
15
15
  OVERLAP_COMPOUND = 1 << 10
16
16
 
17
+ _FLAG_NAMES = {
18
+ ON_CURVE_POINT: "ON_CURVE_POINT",
19
+ ARG_1_AND_2_ARE_WORDS: "ARG_1_AND_2_ARE_WORDS",
20
+ ARGS_ARE_XY_VALUES: "ARGS_ARE_XY_VALUES",
21
+ ROUND_XY_TO_GRID: "ROUND_XY_TO_GRID",
22
+ WE_HAVE_A_SCALE: "WE_HAVE_A_SCALE",
23
+ MORE_COMPONENTS: "MORE_COMPONENTS",
24
+ WE_HAVE_AN_X_AND_Y_SCALE: "WE_HAVE_AN_X_AND_Y_SCALE",
25
+ WE_HAVE_A_TWO_BY_TWO: "WE_HAVE_A_TWO_BY_TWO",
26
+ WE_HAVE_INSTRUCTIONS: "WE_HAVE_INSTRUCTIONS",
27
+ USE_MY_METRICS: "USE_MY_METRICS",
28
+ OVERLAP_COMPOUND: "OVERLAP_COMPOUND",
29
+ }
30
+
31
+
32
+ def flagname(flag):
33
+ """Return all active flag names for the given flag value."""
34
+ names = [name for bit, name in _FLAG_NAMES.items() if flag & bit]
35
+ return " | ".join(names) if names else f"UNKNOWN_FLAG_{flag}"
36
+
17
37
 
18
38
  class TTFParsingError(ValueError):
19
39
  """Parsing error"""
@@ -27,7 +47,7 @@ class TrueTypeFont:
27
47
  self.checksum_adjust = None
28
48
  self.magic_number = None
29
49
  self.flags = None
30
- self.units_per_em = None
50
+ self.units_per_em = 1000 # Default value, will be overwritten during parsing
31
51
  self.created = None
32
52
  self.modified = None
33
53
  self.active = True
@@ -51,23 +71,29 @@ class TrueTypeFont:
51
71
  self.caret_slope_run = None
52
72
  self.caret_offset = None
53
73
  self.metric_data_format = None
54
- self.number_of_long_hor_metrics = None
74
+ self.number_of_long_hor_metrics = (
75
+ 0 # Default value, will be overwritten during parsing
76
+ )
55
77
 
56
78
  self.font_family = None
57
79
  self.font_subfamily = None
58
80
  self.font_name = None
59
81
  self._character_map = {}
60
- self._glyph_offsets = None
61
- self.horizontal_metrics = None
82
+ self._variation_sequences = {} # Unicode variation sequences mapping
83
+ self._glyph_offsets = []
84
+ self.horizontal_metrics = []
62
85
 
63
86
  self.is_okay = False
87
+ self.cmap_version = -1
64
88
  self.parse_ttf(filename, require_checksum=require_checksum)
65
89
  if (
66
90
  b"CFF " in self._raw_tables
67
91
  and b"glyf" not in self._raw_tables
68
92
  and b"loca" not in self._raw_tables
69
93
  ):
70
- raise TTFParsingError("Format CFF font file is not supported.")
94
+ error_msg = "Format CFF font file is not supported."
95
+ self._logger(error_msg)
96
+ raise TTFParsingError(error_msg)
71
97
  try:
72
98
  self.parse_head()
73
99
  self.parse_hhea()
@@ -76,11 +102,18 @@ class TrueTypeFont:
76
102
  self.parse_cmap()
77
103
  self.parse_name()
78
104
  except Exception as e:
79
- print (f"TTF init for {filename} crashed: {e}")
80
- raise TTFParsingError("Error while parsing data") from e
105
+ error_msg = f"TTF init for {filename} crashed: {e}"
106
+ self._logger(error_msg)
107
+ raise TTFParsingError(error_msg) from e
81
108
  self.glyph_data = list(self.parse_glyf())
82
109
  self._line_information = []
83
110
 
111
+ def _logger(self, message):
112
+ DEBUG = True
113
+ # This can be replaced with an actual logging implementation
114
+ if DEBUG:
115
+ print(message)
116
+
84
117
  def line_information(self):
85
118
  return self._line_information
86
119
 
@@ -97,12 +130,14 @@ class TrueTypeFont:
97
130
  f.seek(off)
98
131
  string = f.read(length)
99
132
  f.seek(location)
133
+ if string is None:
134
+ return ""
100
135
  return string.decode("UTF-16BE")
101
136
  except UnicodeDecodeError:
102
137
  try:
103
- return string.decode("UTF8")
138
+ return string.decode("UTF8") if string is not None else ""
104
139
  except UnicodeDecodeError:
105
- return string
140
+ return string if string is not None else ""
106
141
 
107
142
  try:
108
143
  with open(filename, "rb") as f:
@@ -152,13 +187,13 @@ class TrueTypeFont:
152
187
  if name_id == 1:
153
188
  font_family = get_string(f, pos, length)
154
189
  elif name_id == 2:
155
- font_family = get_string(f, pos, length)
190
+ font_subfamily = get_string(f, pos, length)
156
191
  elif name_id == 4:
157
192
  font_name = get_string(f, pos, length)
158
193
  if font_family and font_subfamily and font_name:
159
194
  break
160
195
  return font_family, font_subfamily, font_name
161
- except Exception as e:
196
+ except Exception:
162
197
  # Anything fishy
163
198
  return None
164
199
 
@@ -185,8 +220,13 @@ class TrueTypeFont:
185
220
  line_start_y = offset_y * scale
186
221
  offset_x = offs
187
222
  # print (f"{offset_x}, {offset_y}: '{text}', fs={font_size}, em:{self.units_per_em}")
188
- for c in text:
189
- index = self._character_map.get(c, 0)
223
+ for (
224
+ base_char_code,
225
+ variation_selector,
226
+ ) in self.parse_text_with_variation_sequences(text):
227
+ index = self.lookup_glyph_with_variation(
228
+ base_char_code, variation_selector
229
+ )
190
230
  if index >= len(self.glyph_data):
191
231
  continue
192
232
  if index >= len(self.horizontal_metrics):
@@ -209,26 +249,17 @@ class TrueTypeFont:
209
249
  curr = contour[-1]
210
250
  next = contour[0]
211
251
  if curr[2] & ON_CURVE_POINT:
212
- if self.active:
213
- path.move(
214
- (offset_x + curr[0]) * scale,
215
- (offset_y + curr[1]) * scale,
216
- )
252
+ start_x = (offset_x + curr[0]) * scale
253
+ start_y = (offset_y + curr[1]) * scale
254
+ elif next[2] & ON_CURVE_POINT:
255
+ start_x = (offset_x + next[0]) * scale
256
+ start_y = (offset_y + next[1]) * scale
217
257
  else:
218
- if next[2] & ON_CURVE_POINT:
219
- if self.active:
220
- path.move(
221
- (offset_x + next[0]) * scale,
222
- (offset_y + next[1]) * scale,
223
- )
224
- else:
225
- if self.active:
226
- path.move(
227
- (offset_x + (curr[0] + next[0]) / 2) * scale,
228
- (offset_y + (curr[1] + next[1]) / 2) * scale,
229
- )
258
+ start_x = (offset_x + (curr[0] + next[0]) / 2) * scale
259
+ start_y = (offset_y + (curr[1] + next[1]) / 2) * scale
260
+ if self.active:
261
+ path.move(start_x, start_y)
230
262
  for i in range(len(contour)):
231
- prev = curr
232
263
  curr = next
233
264
  next = contour[(i + 1) % len(contour)]
234
265
  if curr[2] & ON_CURVE_POINT:
@@ -242,9 +273,10 @@ class TrueTypeFont:
242
273
  else:
243
274
  next2 = next
244
275
  if not next[2] & ON_CURVE_POINT:
245
- next2 = (curr[0] + next[0]) / 2, (
246
- curr[1] + next[1]
247
- ) / 2
276
+ next2 = (
277
+ (curr[0] + next[0]) / 2,
278
+ (curr[1] + next[1]) / 2,
279
+ )
248
280
  if self.active:
249
281
  path.quad(
250
282
  None,
@@ -305,7 +337,7 @@ class TrueTypeFont:
305
337
  entry_selector,
306
338
  range_shift,
307
339
  ) = struct.unpack(">LHHHH", header)
308
- for i in range(num_tables):
340
+ for _ in range(num_tables):
309
341
  tag, checksum, offset, length = struct.unpack(">4sLLL", f.read(16))
310
342
  p = f.tell()
311
343
  f.seek(offset)
@@ -314,17 +346,15 @@ class TrueTypeFont:
314
346
  if require_checksum:
315
347
  for b, byte in enumerate(data):
316
348
  checksum -= byte << 24 - (8 * (b % 4))
317
- if tag == b"head":
318
- if checksum % (1 << 32) != 0:
319
- raise TTFParsingError(
320
- f"invalid checksum: {checksum % (1 << 32)} != 0"
321
- )
349
+ if tag == b"head" and checksum % (1 << 32) != 0:
350
+ error_msg = f"Invalid checksum for table {tag.decode('ascii')}: {checksum % (1 << 32)} != 0"
351
+ self._logger(error_msg)
352
+ raise TTFParsingError(error_msg)
322
353
  self._raw_tables[tag] = data
323
354
  except Exception as e:
324
- raise TTFParsingError(
325
- f"invalid format: {e}"
326
- ) from e
327
-
355
+ error_msg = f"Error parsing TTF file {font_path}: {e}"
356
+ self._logger(error_msg)
357
+ raise TTFParsingError(error_msg) from e
328
358
 
329
359
  def parse_head(self):
330
360
  data = self._raw_tables[b"head"]
@@ -381,6 +411,7 @@ class TrueTypeFont:
381
411
 
382
412
  def _parse_cmap_table(self, data):
383
413
  _fmt = struct.unpack(">H", data.read(2))[0]
414
+ self.cmap_version = _fmt
384
415
  if _fmt == 0:
385
416
  return self._parse_cmap_format_0(data)
386
417
  elif _fmt == 2:
@@ -399,6 +430,7 @@ class TrueTypeFont:
399
430
  return self._parse_cmap_format_13(data)
400
431
  elif _fmt == 14:
401
432
  return self._parse_cmap_format_14(data)
433
+ self.cmap_version = -1
402
434
  return False
403
435
 
404
436
  def _parse_cmap_format_0(self, data):
@@ -408,9 +440,75 @@ class TrueTypeFont:
408
440
  return True
409
441
 
410
442
  def _parse_cmap_format_2(self, data):
411
- length, language = struct.unpack(">HH", data.read(4))
412
- subheader_keys = struct.unpack(">256H", data.read(256 * 2))
413
- return False
443
+ """
444
+ Format 2: high-byte mapping through table
445
+ Used for mixed 8/16-bit encoding (primarily for CJK fonts)
446
+ This is a complex format - implementing basic support
447
+ """
448
+ try:
449
+ length, language = struct.unpack(">HH", data.read(4))
450
+
451
+ # Read subheader keys (256 entries, each 2 bytes)
452
+ subheader_keys = struct.unpack(">256H", data.read(256 * 2))
453
+
454
+ # Find the maximum subheader index to determine how many subheaders we have
455
+ max_subheader_index = max(subheader_keys)
456
+ num_subheaders = (max_subheader_index // 8) + 1 # Each subheader is 8 bytes
457
+
458
+ # Calculate remaining data size for validation
459
+ remaining_data_size = len(data.getvalue()) - data.tell()
460
+ expected_subheader_size = num_subheaders * 8
461
+
462
+ if remaining_data_size < expected_subheader_size:
463
+ error_msg = f"Insufficient data for subheaders in cmap format 2: expected {expected_subheader_size} bytes, got {remaining_data_size} bytes"
464
+ self._logger(error_msg)
465
+ raise TTFParsingError(error_msg)
466
+
467
+ # Read subheaders
468
+ subheaders = []
469
+ for _ in range(num_subheaders):
470
+ first_code, entry_count, id_delta, id_range_offset = struct.unpack(
471
+ ">HHHH", data.read(8)
472
+ )
473
+ subheaders.append((first_code, entry_count, id_delta, id_range_offset))
474
+
475
+ # For format 2, character mapping is complex and depends on:
476
+ # - High byte determining which subheader to use
477
+ # - Low byte being processed through that subheader
478
+ #
479
+ # This is primarily used for CJK encodings and requires careful handling
480
+ # For now, we'll implement basic single-byte mapping (subheader 0)
481
+
482
+ if subheaders:
483
+ first_code, entry_count, id_delta, id_range_offset = subheaders[0]
484
+
485
+ # For single-byte characters (using subheader 0)
486
+ for byte_val in range(256):
487
+ if (
488
+ subheader_keys[byte_val] == 0
489
+ and byte_val >= first_code
490
+ and byte_val < first_code + entry_count
491
+ ):
492
+ # This character has a mapping in subheader 0
493
+ try:
494
+ char_code = byte_val
495
+ if 0 <= char_code <= 0x10FFFF:
496
+ # Simple mapping for basic characters
497
+ glyph_id = (char_code + id_delta) & 0xFFFF
498
+ if glyph_id != 0: # 0 means missing glyph
499
+ self._character_map[chr(char_code)] = glyph_id
500
+ except ValueError:
501
+ continue
502
+
503
+ return True
504
+ except struct.error as e:
505
+ error_msg = f"Struct unpacking error in cmap format 2: {e}"
506
+ self._logger(error_msg)
507
+ raise TTFParsingError(error_msg) from e
508
+ except Exception as e:
509
+ error_msg = f"Error parsing cmap format 2: {e}"
510
+ self._logger(error_msg)
511
+ raise TTFParsingError(error_msg) from e
414
512
 
415
513
  def _parse_cmap_format_4(self, data):
416
514
  (
@@ -426,7 +524,7 @@ class TrueTypeFont:
426
524
  # We need to have an even amount of bytes for unpack
427
525
  if len(data) % 2 == 1:
428
526
  data = data[:-1]
429
- data = struct.unpack(f">{int(len(data)/2)}H", data)
527
+ data = struct.unpack(f">{len(data)//2}H", data)
430
528
  ends = data[:seg_count]
431
529
  starts = data[seg_count + 1 : seg_count * 2 + 1]
432
530
  deltas = data[seg_count * 2 + 1 : seg_count * 3 + 1]
@@ -436,7 +534,7 @@ class TrueTypeFont:
436
534
  start = starts[seg]
437
535
  delta = deltas[seg]
438
536
  offset = offsets[seg]
439
- if start == end and end == 0xFFFF:
537
+ if start == end == 0xFFFF:
440
538
  break
441
539
 
442
540
  for c in range(start, end + 1):
@@ -456,16 +554,125 @@ class TrueTypeFont:
456
554
  language,
457
555
  first_code,
458
556
  entry_count,
459
- ) = struct.unpack(">HHHHHH", data.read(12))
460
- for i, c in struct.unpack(f">{entry_count}H", data.read(entry_count * 2)):
461
- self._character_map[chr(i + 1 + first_code)] = c
557
+ ) = struct.unpack(">HHHH", data.read(8))
558
+ glyph_indices = struct.unpack(f">{entry_count}H", data.read(entry_count * 2))
559
+ for i, glyph_index in enumerate(glyph_indices):
560
+ try:
561
+ char_code = i + first_code
562
+ if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
563
+ self._character_map[chr(char_code)] = glyph_index
564
+ except ValueError:
565
+ # Invalid Unicode character, skip
566
+ continue
462
567
  return True
463
568
 
464
569
  def _parse_cmap_format_8(self, data):
465
- return False
570
+ """
571
+ Format 8: mixed 16-bit and 32-bit coverage
572
+ Used for Unicode variation sequences and supplementary characters
573
+ """
574
+ try:
575
+ # Read header
576
+ reserved, length, language = struct.unpack(">HII", data.read(10))
577
+
578
+ # Read is32 array (8192 bytes = 65536 bits, one bit per 16-bit code)
579
+ is32_data = data.read(8192)
580
+ if len(is32_data) < 8192:
581
+ error_msg = "Insufficient data for is32 array in cmap format 8"
582
+ self._logger(error_msg)
583
+ raise TTFParsingError(error_msg)
584
+
585
+ # Read number of groups
586
+ n_groups = struct.unpack(">I", data.read(4))[0]
587
+
588
+ # Process each group
589
+ for group_idx in range(n_groups):
590
+ if len(data.getvalue()) - data.tell() < 12:
591
+ error_msg = (
592
+ f"Insufficient data for group {group_idx} in cmap format 8"
593
+ )
594
+ self._logger(error_msg)
595
+ raise TTFParsingError(error_msg)
596
+
597
+ start_char_code, end_char_code, start_glyph_id = struct.unpack(
598
+ ">III", data.read(12)
599
+ )
600
+
601
+ # Validate group
602
+ if start_char_code > end_char_code:
603
+ continue # Skip invalid group
604
+
605
+ # Map characters in this group
606
+ for char_code in range(start_char_code, end_char_code + 1):
607
+ try:
608
+ if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
609
+ glyph_id = start_glyph_id + (char_code - start_char_code)
610
+ self._character_map[chr(char_code)] = glyph_id
611
+ except ValueError:
612
+ # Invalid Unicode character, skip
613
+ continue
614
+
615
+ return True
616
+ except struct.error as e:
617
+ error_msg = f"Struct unpacking error in cmap format 8: {e}"
618
+ self._logger(error_msg)
619
+ raise TTFParsingError(error_msg) from e
620
+ except Exception as e:
621
+ error_msg = f"Error parsing cmap format 8: {e}"
622
+ self._logger(error_msg)
623
+ raise TTFParsingError(error_msg) from e
466
624
 
467
625
  def _parse_cmap_format_10(self, data):
468
- return False
626
+ """
627
+ Format 10: trimmed table
628
+ Similar to format 6 but uses 32-bit character codes and glyph IDs
629
+ """
630
+ try:
631
+ # Read header (reserved, length, language, startCharCode, numChars)
632
+ reserved, length, language, start_char_code, num_chars = struct.unpack(
633
+ ">HIIII", data.read(18)
634
+ )
635
+
636
+ # Validate parameters
637
+ if num_chars == 0:
638
+ return True # Empty table is valid
639
+
640
+ if start_char_code > 0x10FFFF:
641
+ error_msg = (
642
+ f"Invalid start character code in cmap format 10: {start_char_code}"
643
+ )
644
+ self._logger(error_msg)
645
+ raise TTFParsingError(error_msg)
646
+
647
+ # Check we have enough data for the glyph array
648
+ expected_data_size = num_chars * 2 # 2 bytes per glyph ID
649
+ if len(data.getvalue()) - data.tell() < expected_data_size:
650
+ error_msg = f"Insufficient data for glyph array in cmap format 10: expected {expected_data_size} bytes"
651
+ self._logger(error_msg)
652
+ raise TTFParsingError(error_msg)
653
+
654
+ # Read glyph IDs
655
+ glyph_ids = struct.unpack(f">{num_chars}H", data.read(expected_data_size))
656
+
657
+ # Map characters to glyphs
658
+ for i, glyph_id in enumerate(glyph_ids):
659
+ char_code = start_char_code + i
660
+ try:
661
+ if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
662
+ self._character_map[chr(char_code)] = glyph_id
663
+ except ValueError:
664
+ # Invalid Unicode character, skip
665
+ continue
666
+
667
+ return True
668
+ except struct.error as e:
669
+ error_msg = f"Struct unpacking error in cmap format 10: {e}"
670
+ self._logger(error_msg)
671
+ raise TTFParsingError(error_msg) from e
672
+ except Exception as e:
673
+ error_msg = f"Error parsing cmap format 10: {e}"
674
+ self._logger(error_msg)
675
+ raise TTFParsingError(error_msg) from e
469
676
 
470
677
  def _parse_cmap_format_12(self, data):
471
678
  (
@@ -474,13 +681,19 @@ class TrueTypeFont:
474
681
  language,
475
682
  n_groups,
476
683
  ) = struct.unpack(">HIII", data.read(14))
477
- for seg in range(n_groups):
684
+ for _ in range(n_groups):
478
685
  (start_char_code, end_char_code, start_glyph_code) = struct.unpack(
479
686
  ">III", data.read(12)
480
687
  )
481
688
 
482
- for i, c in enumerate(range(start_char_code, end_char_code)):
483
- self._character_map[chr(c)] = start_glyph_code + i
689
+ for char_code in range(start_char_code, end_char_code + 1):
690
+ try:
691
+ if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
692
+ glyph_index = start_glyph_code + (char_code - start_char_code)
693
+ self._character_map[chr(char_code)] = glyph_index
694
+ except ValueError:
695
+ # Invalid Unicode character, skip
696
+ continue
484
697
  return True
485
698
 
486
699
  def _parse_cmap_format_13(self, data):
@@ -490,17 +703,174 @@ class TrueTypeFont:
490
703
  language,
491
704
  n_groups,
492
705
  ) = struct.unpack(">HIII", data.read(14))
493
- for seg in range(n_groups):
706
+ for _ in range(n_groups):
494
707
  (start_char_code, end_char_code, glyph_code) = struct.unpack(
495
708
  ">III", data.read(12)
496
709
  )
497
710
 
498
- for c in enumerate(range(start_char_code, end_char_code)):
499
- self._character_map[chr(c)] = glyph_code
711
+ for char_code in range(start_char_code, end_char_code + 1):
712
+ try:
713
+ if 0 <= char_code <= 0x10FFFF: # Valid Unicode range
714
+ self._character_map[chr(char_code)] = glyph_code
715
+ except ValueError:
716
+ # Invalid Unicode character, skip
717
+ continue
500
718
  return True
501
719
 
502
720
  def _parse_cmap_format_14(self, data):
503
- return False
721
+ """
722
+ Format 14: Unicode variation sequences
723
+ Maps variation selector sequences to glyphs
724
+ This format handles Unicode Variation Sequences (UVS) where a base character
725
+ combined with a variation selector can map to a specific glyph variant.
726
+
727
+ Performance optimized version to handle large ranges efficiently.
728
+ """
729
+ try:
730
+ # Store current position to calculate relative offsets
731
+ subtable_start = (
732
+ data.tell() - 6
733
+ ) # Subtract 6 for format and length already read
734
+
735
+ # Read header
736
+ length, num_var_selector_records = struct.unpack(">IH", data.read(6))
737
+
738
+ # Limit processing to avoid infinite loops on malformed fonts
739
+ MAX_VAR_SELECTOR_RECORDS = 100
740
+ MAX_UNICODE_RANGES = 1000
741
+ MAX_UVS_MAPPINGS = 10000
742
+ MAX_RANGE_SIZE = 10000 # Limit individual range processing
743
+
744
+ if num_var_selector_records > MAX_VAR_SELECTOR_RECORDS:
745
+ warning_msg = f"Warning: Too many variation selector records ({num_var_selector_records}), limiting to {MAX_VAR_SELECTOR_RECORDS}"
746
+ self._logger(warning_msg)
747
+ num_var_selector_records = MAX_VAR_SELECTOR_RECORDS
748
+
749
+ # Each variation selector record is 11 bytes
750
+ for record_idx in range(num_var_selector_records):
751
+ if len(data.getvalue()) - data.tell() < 11:
752
+ error_msg = (
753
+ f"Insufficient data for variation selector record {record_idx}"
754
+ )
755
+ self._logger(error_msg)
756
+ break # Skip remaining records instead of crashing
757
+
758
+ # Read variation selector record (24-bit variation selector + 2 offsets)
759
+ vs_bytes = data.read(3)
760
+ variation_selector = struct.unpack(">I", vs_bytes + b"\x00")[
761
+ 0
762
+ ] # Convert 24-bit to 32-bit
763
+ default_uvs_offset, non_default_uvs_offset = struct.unpack(
764
+ ">II", data.read(8)
765
+ )
766
+
767
+ # Save current position to return to after processing tables
768
+ current_pos = data.tell()
769
+
770
+ # Process Default UVS Table (if present) - OPTIMIZED
771
+ if default_uvs_offset != 0:
772
+ try:
773
+ # Seek to default UVS table (offset is from start of cmap subtable)
774
+ data.seek(subtable_start + default_uvs_offset)
775
+
776
+ # Read number of Unicode ranges
777
+ num_unicode_ranges = struct.unpack(">I", data.read(4))[0]
778
+
779
+ if num_unicode_ranges > MAX_UNICODE_RANGES:
780
+ warning_msg = f"Warning: Too many Unicode ranges ({num_unicode_ranges}), limiting to {MAX_UNICODE_RANGES}"
781
+ self._logger(warning_msg)
782
+ num_unicode_ranges = MAX_UNICODE_RANGES
783
+
784
+ # Process each Unicode range - WITH LIMITS
785
+ for _ in range(num_unicode_ranges):
786
+ if len(data.getvalue()) - data.tell() < 4:
787
+ break # Not enough data for this range
788
+
789
+ # Each range is 4 bytes: 3-byte start code + 1-byte additional count
790
+ range_data = data.read(4)
791
+ start_unicode_value = struct.unpack(
792
+ ">I", range_data[:3] + b"\x00"
793
+ )[0]
794
+ additional_count = range_data[3]
795
+
796
+ # Limit range size to prevent infinite loops
797
+ if additional_count > MAX_RANGE_SIZE:
798
+ warning_msg = f"Warning: Large range size ({additional_count}), limiting to {MAX_RANGE_SIZE}"
799
+ self._logger(warning_msg)
800
+ additional_count = MAX_RANGE_SIZE
801
+
802
+ # Pre-build character map for efficient lookup
803
+ char_map_keys = set(
804
+ ord(c) for c in self._character_map.keys()
805
+ )
806
+
807
+ # Map all characters in this range - OPTIMIZED
808
+ for offset in range(additional_count + 1):
809
+ base_char = start_unicode_value + offset
810
+ if (
811
+ 0 <= base_char <= 0x10FFFF
812
+ and base_char in char_map_keys
813
+ ):
814
+ try:
815
+ # For default UVS, use the default glyph mapping
816
+ base_char_obj = chr(base_char)
817
+ # Store variation sequence mapping
818
+ vs_key = (base_char, variation_selector)
819
+ self._variation_sequences[
820
+ vs_key
821
+ ] = self._character_map[base_char_obj]
822
+ except (ValueError, KeyError):
823
+ continue
824
+ except (struct.error, IndexError) as e:
825
+ error_msg = f"Error processing default UVS table: {e}"
826
+ self._logger(error_msg)
827
+
828
+ # Process Non-Default UVS Table (if present) - OPTIMIZED
829
+ if non_default_uvs_offset != 0:
830
+ try:
831
+ # Seek to non-default UVS table
832
+ data.seek(subtable_start + non_default_uvs_offset)
833
+
834
+ # Read number of UVS mappings
835
+ num_uvs_mappings = struct.unpack(">I", data.read(4))[0]
836
+
837
+ if num_uvs_mappings > MAX_UVS_MAPPINGS:
838
+ warning_msg = f"Warning: Too many UVS mappings ({num_uvs_mappings}), limiting to {MAX_UVS_MAPPINGS}"
839
+ self._logger(warning_msg)
840
+ num_uvs_mappings = MAX_UVS_MAPPINGS
841
+
842
+ # Process each UVS mapping
843
+ for _ in range(num_uvs_mappings):
844
+ if len(data.getvalue()) - data.tell() < 5:
845
+ break # Not enough data for this mapping
846
+
847
+ # Each mapping is 5 bytes: 3-byte Unicode value + 2-byte glyph ID
848
+ mapping_data = data.read(5)
849
+ unicode_value = struct.unpack(
850
+ ">I", mapping_data[:3] + b"\x00"
851
+ )[0]
852
+ glyph_id = struct.unpack(">H", mapping_data[3:5])[0]
853
+
854
+ if 0 <= unicode_value <= 0x10FFFF:
855
+ # Store non-default variation sequence mapping
856
+ vs_key = (unicode_value, variation_selector)
857
+ self._variation_sequences[vs_key] = glyph_id
858
+ except (struct.error, IndexError) as e:
859
+ error_msg = f"Error processing non-default UVS table: {e}"
860
+ self._logger(error_msg)
861
+
862
+ # Return to position after variation selector record
863
+ data.seek(current_pos)
864
+
865
+ return True
866
+ except struct.error as e:
867
+ error_msg = f"Struct unpacking error in cmap format 14: {e}"
868
+ self._logger(error_msg)
869
+ return False # Don't crash, just return False
870
+ except Exception as e:
871
+ error_msg = f"Error parsing cmap format 14: {e}"
872
+ self._logger(error_msg)
873
+ return False # Don't crash, just return False
504
874
 
505
875
  def parse_hhea(self):
506
876
  data = self._raw_tables[b"hhea"]
@@ -527,18 +897,31 @@ class TrueTypeFont:
527
897
  def parse_hmtx(self):
528
898
  data = self._raw_tables[b"hmtx"]
529
899
  count = self.number_of_long_hor_metrics
900
+
901
+ # Check if we have enough data for the long horizontal metrics
902
+ if len(data) < count * 4:
903
+ error_msg = f"Insufficient data in hmtx table: expected {count * 4} bytes, got {len(data)}"
904
+ self._logger(error_msg)
905
+ raise TTFParsingError(error_msg)
906
+
530
907
  hm = struct.unpack(f">{'Hh' * count}", data[: count * 4])
531
908
  self.horizontal_metrics = [
532
909
  (hm[2 * i], hm[2 * i + 1]) for i in range(len(hm) // 2)
533
910
  ]
534
- last_advance = hm[-2]
911
+
912
+ # Handle additional left side bearings for remaining glyphs
913
+ last_advance = hm[-2] if hm else 0
535
914
  table_start = count * 4
536
915
  if len(data) > table_start:
537
916
  remaining = (len(data) - table_start) // 2
538
- left_bearing = struct.unpack(
539
- f">{remaining}h", data[count * 4 : count * 4 + remaining * 2]
540
- )
541
- self.horizontal_metrics.extend((last_advance, left_bearing))
917
+ if remaining > 0:
918
+ left_bearings = struct.unpack(
919
+ f">{remaining}h", data[table_start : table_start + remaining * 2]
920
+ )
921
+ # Extend with tuples of (last_advance, left_bearing)
922
+ self.horizontal_metrics.extend(
923
+ [(last_advance, lb) for lb in left_bearings]
924
+ )
542
925
 
543
926
  def parse_loca(self):
544
927
  try:
@@ -547,10 +930,10 @@ class TrueTypeFont:
547
930
  self._glyph_offsets = []
548
931
  return
549
932
  if self.index_to_loc_format == 0:
550
- n = int(len(data) / 2)
933
+ n = len(data) // 2
551
934
  self._glyph_offsets = [g * 2 for g in struct.unpack(f">{n}H", data)]
552
935
  else:
553
- n = int(len(data) / 4)
936
+ n = len(data) // 4
554
937
  self._glyph_offsets = struct.unpack(f">{n}I", data)
555
938
 
556
939
  def parse_glyf(self):
@@ -562,7 +945,7 @@ class TrueTypeFont:
562
945
  start = self._glyph_offsets[index]
563
946
  end = self._glyph_offsets[index + 1]
564
947
  if start == end:
565
- yield list()
948
+ yield []
566
949
  return
567
950
  yield from self._parse_glyph(BytesIO(data[start:end]))
568
951
 
@@ -576,112 +959,245 @@ class TrueTypeFont:
576
959
  yield from self._parse_compound_glyph(data)
577
960
 
578
961
  def _parse_compound_glyph(self, data):
962
+ """
963
+ Parses a compound glyph, which can consist of multiple components.
964
+ Each component can have its own transformation matrix applied to it.
965
+ The transformation matrix can include scaling, translation, and rotation.
966
+ The flags indicate how the arguments are interpreted, whether they are
967
+ absolute coordinates or relative offsets, and whether the glyph is
968
+ transformed by a scale, x and y scale, or a two-by-two matrix.
969
+ The glyphs are returned as a list of contours, where each contour is a
970
+ list of points. Each point is a tuple of (x, y, flag), where
971
+ x and y are the coordinates of the point, and flag indicates whether
972
+ the point is an on-curve point or a control point.
973
+
974
+ The flags used in the compound glyphs are defined as follows:
975
+ - ON_CURVE_POINT: Indicates that the point is an on-curve point.
976
+ - ARG_1_AND_2_ARE_WORDS: Indicates that the first two arguments are
977
+ 16-bit signed integers instead of 8-bit unsigned integers.
978
+ - ARGS_ARE_XY_VALUES: Indicates that the arguments are interpreted as
979
+ x and y coordinates instead of relative offsets.
980
+ - ROUND_XY_TO_GRID: Indicates that the x and y coordinates should be
981
+ rounded to the nearest grid point.
982
+ - WE_HAVE_A_SCALE: Indicates that the glyph is transformed by a single
983
+ scale factor applied to both x and y coordinates.
984
+ - MORE_COMPONENTS: Indicates that there are more components in the
985
+ compound glyph. This flag is used to indicate that the glyph has
986
+ additional components that need to be processed.
987
+ - WE_HAVE_AN_X_AND_Y_SCALE: Indicates that the glyph is transformed by
988
+ separate scale factors for x and y coordinates.
989
+ - WE_HAVE_A_TWO_BY_TWO: Indicates that the glyph is transformed by a
990
+ two-by-two matrix, which allows for more complex transformations
991
+ including rotation and shearing.
992
+ - WE_HAVE_INSTRUCTIONS: Indicates that the glyph has instructions that
993
+ modify the rendering of the glyph. These instructions can include
994
+ additional transformations or adjustments to the glyph's shape.
995
+ - USE_MY_METRICS: Indicates that the glyph should use its own metrics
996
+ instead of the metrics defined in the font's horizontal metrics table.
997
+ - OVERLAP_COMPOUND: Indicates that the components of the compound glyph
998
+ may overlap. This flag is used to indicate that the components of the
999
+ compound glyph may overlap, which can affect how the glyph is rendered.
1000
+
1001
+ """
579
1002
  flags = MORE_COMPONENTS
580
- s = 1 << 14
1003
+ scale_factor = 1 << 14 # Fixed point scale factor (16384)
1004
+
1005
+ # Collect all contours from all components
1006
+ all_contours = []
1007
+
581
1008
  while flags & MORE_COMPONENTS:
582
- a, b, c, d, e, f = (
583
- 1.0,
584
- 0.0,
585
- 0.0,
586
- 1.0,
587
- 0.0,
588
- 0.0,
589
- )
590
- dest, src = -1, -1
1009
+ # Initialize transformation matrix as identity
1010
+ # Matrix format: [xx, xy, yx, yy, dx, dy]
1011
+ # Represents: [x'] = [xx xy] [x] + [dx]
1012
+ # [y'] [yx yy] [y] [dy]
1013
+ transform_xx, transform_xy, transform_yx, transform_yy = 1.0, 0.0, 0.0, 1.0
1014
+ transform_dx, transform_dy = 0.0, 0.0
1015
+
1016
+ # Read component header
591
1017
  flags, glyph_index = struct.unpack(">HH", data.read(4))
592
- if flags & ARGS_ARE_XY_VALUES:
593
- if flags & ARG_1_AND_2_ARE_WORDS:
594
- args1, args2 = struct.unpack(">hh", data.read(4))
595
- else:
596
- args1, args2 = struct.unpack(">bb", data.read(2))
597
- e, f = args1 / s, args2 / s
1018
+
1019
+ # Read arguments (either offsets or point indices)
1020
+ if flags & ARG_1_AND_2_ARE_WORDS:
1021
+ # 16-bit arguments
1022
+ arg1, arg2 = struct.unpack(">hh", data.read(4))
598
1023
  else:
599
- if flags & ARG_1_AND_2_ARE_WORDS:
600
- args1, args2 = struct.unpack(">HH", data.read(4))
1024
+ # 8-bit arguments
1025
+ if flags & ARGS_ARE_XY_VALUES:
1026
+ # Signed bytes for offsets
1027
+ arg1, arg2 = struct.unpack(">bb", data.read(2))
601
1028
  else:
602
- args1, args2 = struct.unpack(">BB", data.read(2))
603
- dest, src = args1, args2
1029
+ # Unsigned bytes for point indices
1030
+ arg1, arg2 = struct.unpack(">BB", data.read(2))
1031
+
1032
+ # Interpret arguments
1033
+ if flags & ARGS_ARE_XY_VALUES:
1034
+ # Arguments are x,y offsets
1035
+ transform_dx, transform_dy = float(arg1), float(arg2)
1036
+ else:
1037
+ # Arguments are point indices for point matching
1038
+ # Point matching not fully implemented - would need to find
1039
+ # matching points in already processed contours and source glyph
1040
+ transform_dx, transform_dy = 0.0, 0.0
1041
+
1042
+ # Read transformation matrix components
604
1043
  if flags & WE_HAVE_A_SCALE:
605
- a = struct.unpack(">h", data.read(2))[0] / s
606
- d = a
1044
+ # Single scale factor for both x and y
1045
+ scale = struct.unpack(">h", data.read(2))[0] / scale_factor
1046
+ transform_xx = transform_yy = scale
607
1047
  elif flags & WE_HAVE_AN_X_AND_Y_SCALE:
608
- a, d = struct.unpack(">hh", data.read(4))
609
- a, d = a / s, d / s
1048
+ # Separate scale factors for x and y
1049
+ scale_x, scale_y = struct.unpack(">hh", data.read(4))
1050
+ transform_xx = scale_x / scale_factor
1051
+ transform_yy = scale_y / scale_factor
610
1052
  elif flags & WE_HAVE_A_TWO_BY_TWO:
611
- a, b, c, d = struct.unpack(">hhhh", data.read(8))
612
- a, b, c, d = a / s, b / s, c / s, d / s
613
- original = data.tell()
614
- m = max(abs(a), abs(b))
615
- if abs(abs(a) - abs(c)) < 33.0 / s:
616
- m *= 2
617
- n = max(abs(c), abs(d))
618
- if abs(abs(b) - abs(c)) < 33.0 / s:
619
- n *= 2
620
- contours = list(self._parse_glyph_index(glyph_index))
621
- if src != -1 and dest != -1:
622
- pass # Not properly supported.
623
- if flags & ROUND_XY_TO_GRID:
624
- for contour in contours:
625
- yield [
626
- (
627
- round(m * (x * a / m + y * b / m + e)),
628
- round(n * (x * c / n + y * d / n + f)),
629
- flag,
630
- )
631
- for x, y, flag in contour
632
- ]
633
- else:
634
- for contour in contours:
635
- yield [
636
- (
637
- m * (x * a / m + y * b / m + e),
638
- n * (x * c / n + y * d / n + f),
639
- flag,
640
- )
641
- for x, y, flag in contour
642
- ]
643
- data.seek(original)
1053
+ # Full 2x2 transformation matrix
1054
+ xx, xy, yx, yy = struct.unpack(">hhhh", data.read(8))
1055
+ transform_xx = xx / scale_factor
1056
+ transform_xy = xy / scale_factor
1057
+ transform_yx = yx / scale_factor
1058
+ transform_yy = yy / scale_factor
1059
+
1060
+ # Get the component glyph's contours
1061
+ component_contours = list(self._parse_glyph_index(glyph_index))
1062
+
1063
+ # Apply transformation to each contour
1064
+ for contour in component_contours:
1065
+ transformed_contour = []
1066
+ for x, y, flag in contour:
1067
+ # Apply 2D transformation matrix
1068
+ new_x = transform_xx * x + transform_xy * y + transform_dx
1069
+ new_y = transform_yx * x + transform_yy * y + transform_dy
1070
+
1071
+ # Round to grid if requested
1072
+ if flags & ROUND_XY_TO_GRID:
1073
+ new_x = round(new_x)
1074
+ new_y = round(new_y)
1075
+
1076
+ transformed_contour.append((new_x, new_y, flag))
1077
+
1078
+ # Add transformed contour to our collection
1079
+ all_contours.append(transformed_contour)
1080
+ # Yield all collected contours
1081
+ yield from all_contours
644
1082
 
645
1083
  def _parse_simple_glyph(self, num_contours, data):
646
- end_pts = struct.unpack(f">{num_contours}H", data.read(2 * num_contours))
647
- inst_len = struct.unpack(">H", data.read(2))[0]
648
- instruction = data.read(inst_len)
649
- num_points = max(end_pts) + 1
650
- flags = []
651
- while len(flags) < num_points:
652
- flag = ord(data.read(1))
653
- flags.append(flag)
654
- if flag & 0x8:
655
- repeat_count = ord(data.read(1))
656
- flags.extend([flag] * repeat_count)
657
- x_coords = list(self._read_coords(num_points, 0x2, 0x10, flags, data))
658
- y_coords = list(self._read_coords(num_points, 0x4, 0x20, flags, data))
659
- start = 0
660
- for end in end_pts:
661
- yield list(
662
- zip(
663
- x_coords[start : end + 1],
664
- y_coords[start : end + 1],
665
- flags[start : end + 1],
1084
+ try:
1085
+ # Check we have enough data for contour endpoints
1086
+ if len(data.getvalue()) - data.tell() < num_contours * 2:
1087
+ error_msg = "Insufficient data for contour endpoints"
1088
+ self._logger(error_msg)
1089
+ raise TTFParsingError(error_msg)
1090
+
1091
+ end_pts = struct.unpack(f">{num_contours}H", data.read(2 * num_contours))
1092
+
1093
+ # Check we have enough data for instruction length
1094
+ if len(data.getvalue()) - data.tell() < 2:
1095
+ error_msg = "Insufficient data for instruction length"
1096
+ self._logger(error_msg)
1097
+ raise TTFParsingError(error_msg)
1098
+
1099
+ inst_len = struct.unpack(">H", data.read(2))[0]
1100
+
1101
+ # Check we have enough data for instructions
1102
+ if len(data.getvalue()) - data.tell() < inst_len:
1103
+ error_msg = "Insufficient data for instructions"
1104
+ self._logger(error_msg)
1105
+ raise TTFParsingError(error_msg)
1106
+
1107
+ _ = data.read(inst_len) # Read instructions but don't store unused variable
1108
+
1109
+ if not end_pts:
1110
+ return
1111
+
1112
+ num_points = max(end_pts) + 1
1113
+ if num_points <= 0:
1114
+ return
1115
+
1116
+ # Read flags with bounds checking
1117
+ flags = []
1118
+ while len(flags) < num_points:
1119
+ if len(data.getvalue()) - data.tell() < 1:
1120
+ error_msg = "Insufficient data for flags"
1121
+ self._logger(error_msg)
1122
+ raise TTFParsingError(error_msg)
1123
+
1124
+ flag = ord(data.read(1))
1125
+ flags.append(flag)
1126
+ if flag & 0x8: # Repeat flag
1127
+ if len(data.getvalue()) - data.tell() < 1:
1128
+ error_msg = "Insufficient data for repeat count"
1129
+ self._logger(error_msg)
1130
+ raise TTFParsingError(error_msg)
1131
+ repeat_count = ord(data.read(1))
1132
+ flags.extend([flag] * repeat_count)
1133
+
1134
+ # Truncate flags if we read too many
1135
+ flags = flags[:num_points]
1136
+
1137
+ x_coords = list(self._read_coords(num_points, 0x2, 0x10, flags, data))
1138
+ y_coords = list(self._read_coords(num_points, 0x4, 0x20, flags, data))
1139
+
1140
+ start = 0
1141
+ for end in end_pts:
1142
+ if end >= num_points:
1143
+ error_msg = f"Invalid contour endpoint: {end} >= {num_points}"
1144
+ self._logger(error_msg)
1145
+ raise TTFParsingError(error_msg)
1146
+ yield list(
1147
+ zip(
1148
+ x_coords[start : end + 1],
1149
+ y_coords[start : end + 1],
1150
+ flags[start : end + 1],
1151
+ )
666
1152
  )
667
- )
668
- start = end + 1
1153
+ start = end + 1
1154
+ except struct.error as e:
1155
+ error_msg = f"Struct unpacking error in simple glyph: {e}"
1156
+ self._logger(error_msg)
1157
+ raise TTFParsingError(error_msg) from e
1158
+ except (IndexError, ValueError) as e:
1159
+ error_msg = f"Error parsing simple glyph: {e}"
1160
+ self._logger(error_msg)
1161
+ raise TTFParsingError(error_msg) from e
669
1162
 
670
1163
  def _read_coords(self, num_points, bit_byte, bit_delta, flags, data):
671
1164
  value = 0
672
1165
  for i in range(num_points):
1166
+ if i >= len(flags):
1167
+ error_msg = f"Flag index {i} out of range (flags length: {len(flags)})"
1168
+ self._logger(error_msg)
1169
+ raise TTFParsingError(error_msg)
1170
+
673
1171
  flag = flags[i]
674
- if flag & bit_byte:
675
- x = struct.unpack("B", data.read(1))[0]
676
- if flag & bit_delta:
677
- value += x
678
- else:
679
- value -= x
680
- elif ~flag & bit_delta:
681
- value += struct.unpack(">h", data.read(2))[0]
682
- else:
683
- pass
684
- yield value
1172
+ try:
1173
+ if flag & bit_byte:
1174
+ # Single byte coordinate
1175
+ if len(data.getvalue()) - data.tell() < 1:
1176
+ error_msg = "Insufficient data for single byte coordinate"
1177
+ self._logger(error_msg)
1178
+ raise TTFParsingError(
1179
+ "Insufficient data for single byte coordinate"
1180
+ )
1181
+ x = struct.unpack("B", data.read(1))[0]
1182
+ if flag & bit_delta:
1183
+ value += x
1184
+ else:
1185
+ value -= x
1186
+ elif ~flag & bit_delta:
1187
+ # Two byte coordinate
1188
+ if len(data.getvalue()) - data.tell() < 2:
1189
+ error_msg = "Insufficient data for two byte coordinate"
1190
+ self._logger(error_msg)
1191
+ raise TTFParsingError(
1192
+ "Insufficient data for two byte coordinate"
1193
+ )
1194
+ value += struct.unpack(">h", data.read(2))[0]
1195
+ # Coordinate unchanged from previous
1196
+ yield value
1197
+ except struct.error as e:
1198
+ error_msg = f"Struct unpacking error in coordinates: {e}"
1199
+ self._logger(error_msg)
1200
+ raise TTFParsingError(error_msg) from e
685
1201
 
686
1202
  def parse_name(self):
687
1203
  def decode(string):
@@ -715,12 +1231,249 @@ class TrueTypeFont:
715
1231
  length,
716
1232
  str_offset,
717
1233
  ) in records:
718
- if name_id == 1:
719
- self.font_family = decode(strings[str_offset : str_offset + length])
720
- elif name_id == 2:
721
- self.font_subfamily = decode(strings[str_offset : str_offset + length])
722
- elif name_id == 3:
723
- # Unique Subfamily Name
724
- pass
725
- elif name_id == 4:
726
- self.font_name = decode(strings[str_offset : str_offset + length])
1234
+ try:
1235
+ if name_id == 1:
1236
+ self.font_family = decode(strings[str_offset : str_offset + length])
1237
+ elif name_id == 2:
1238
+ self.font_subfamily = decode(
1239
+ strings[str_offset : str_offset + length]
1240
+ )
1241
+ elif name_id == 3:
1242
+ # Unique Subfamily Name
1243
+ pass
1244
+ elif name_id == 4:
1245
+ self.font_name = decode(strings[str_offset : str_offset + length])
1246
+ except (IndexError, UnicodeDecodeError) as e:
1247
+ # Log error but continue parsing other name records
1248
+ warning_msg = f"Warning: Error decoding name record {name_id}: {e}"
1249
+ self._logger(warning_msg)
1250
+ continue
1251
+
1252
+ def get_variation_sequences(self):
1253
+ """
1254
+ Get Unicode variation sequences mapping.
1255
+
1256
+ Returns:
1257
+ dict: Dictionary mapping (base_char, variation_selector) tuples to glyph IDs.
1258
+ For example: {(0x4E00, 0xFE00): 1234} means base character U+4E00
1259
+ with variation selector U+FE00 maps to glyph ID 1234.
1260
+ """
1261
+ return getattr(self, "_variation_sequences", {})
1262
+
1263
+ def has_variation_sequences(self):
1264
+ """
1265
+ Check if this font contains Unicode variation sequences (cmap format 14).
1266
+
1267
+ Returns:
1268
+ bool: True if the font has variation sequence mappings, False otherwise.
1269
+ """
1270
+ return bool(getattr(self, "_variation_sequences", {}))
1271
+
1272
+ def get_glyph_index(self, char, variation_selector=None):
1273
+ """
1274
+ Get the glyph index for a character, optionally with a variation selector.
1275
+
1276
+ Args:
1277
+ char (str): The base character
1278
+ variation_selector (int, optional): Unicode variation selector code point (e.g., 0xFE00-0xFE0F)
1279
+
1280
+ Returns:
1281
+ int: Glyph index, or 0 if not found
1282
+ """
1283
+ if variation_selector is not None:
1284
+ # Try to find variation sequence first
1285
+ char_code = ord(char) if isinstance(char, str) else char
1286
+ vs_key = (char_code, variation_selector)
1287
+ if vs_key in self._variation_sequences:
1288
+ return self._variation_sequences[vs_key]
1289
+
1290
+ # Fall back to regular character mapping
1291
+ if isinstance(char, str):
1292
+ return self._character_map.get(char, 0)
1293
+
1294
+ # Handle numeric character codes
1295
+ try:
1296
+ return self._character_map.get(chr(char), 0)
1297
+ except ValueError:
1298
+ return 0
1299
+
1300
+ def has_variation_selector(self, char, variation_selector):
1301
+ """
1302
+ Check if a character has a specific variation selector mapping.
1303
+
1304
+ Args:
1305
+ char (str or int): The base character (string) or character code (int)
1306
+ variation_selector (int): Unicode variation selector code point
1307
+
1308
+ Returns:
1309
+ bool: True if the variation sequence exists, False otherwise
1310
+ """
1311
+ char_code = ord(char) if isinstance(char, str) else char
1312
+ vs_key = (char_code, variation_selector)
1313
+ return vs_key in self._variation_sequences
1314
+
1315
+ def get_available_variation_selectors(self, char):
1316
+ """
1317
+ Get all variation selectors available for a given character.
1318
+
1319
+ Args:
1320
+ char (str or int): The base character (string) or character code (int)
1321
+
1322
+ Returns:
1323
+ list: List of variation selector code points available for this character
1324
+ """
1325
+ char_code = ord(char) if isinstance(char, str) else char
1326
+ return [
1327
+ vs
1328
+ for (base_char, vs) in self._variation_sequences.keys()
1329
+ if base_char == char_code
1330
+ ]
1331
+
1332
+ def lookup_glyph_with_variation(self, base_char, variation_selector=None):
1333
+ """
1334
+ Look up a glyph ID for a character, optionally with a variation selector.
1335
+
1336
+ Args:
1337
+ base_char (str or int): The base character (string) or Unicode code point (int)
1338
+ variation_selector (int, optional): Unicode code point of variation selector
1339
+
1340
+ Returns:
1341
+ int: Glyph ID for the character/variation sequence, or 0 if not found
1342
+ """
1343
+ # Convert base_char to Unicode code point if it's a string
1344
+ base_char_code = ord(base_char) if isinstance(base_char, str) else base_char
1345
+
1346
+ if variation_selector is not None:
1347
+ # Check for variation sequence first
1348
+ vs_key = (base_char_code, variation_selector)
1349
+ if vs_key in self._variation_sequences:
1350
+ return self._variation_sequences[vs_key]
1351
+
1352
+ # Fall back to regular character map - convert code point back to character for lookup
1353
+ try:
1354
+ base_char_str = chr(base_char_code)
1355
+ return self._character_map.get(base_char_str, 0)
1356
+ except (ValueError, OverflowError):
1357
+ # Invalid Unicode code point
1358
+ return 0
1359
+
1360
+ def parse_text_with_variation_sequences(self, text):
1361
+ """
1362
+ Parse text and extract base characters with their variation selectors.
1363
+
1364
+ This method correctly handles Unicode code points, including surrogate pairs
1365
+ and non-BMP characters, ensuring that variation selectors are properly
1366
+ detected even for astral-plane base characters.
1367
+
1368
+ Args:
1369
+ text (str): Input text that may contain variation sequences
1370
+
1371
+ Yields:
1372
+ tuple: (base_char_code, variation_selector) where variation_selector is None
1373
+ for regular characters or the Unicode code point for variation sequences
1374
+ """
1375
+ # Convert string to list of Unicode code points to handle surrogate pairs correctly
1376
+ code_points = []
1377
+ i = 0
1378
+ while i < len(text):
1379
+ char = text[i]
1380
+ char_code = ord(char)
1381
+
1382
+ # Check if this is the start of a surrogate pair (high surrogate)
1383
+ if 0xD800 <= char_code <= 0xDBFF and i + 1 < len(text):
1384
+ next_char = text[i + 1]
1385
+ next_char_code = ord(next_char)
1386
+
1387
+ # Check if next character is low surrogate
1388
+ if 0xDC00 <= next_char_code <= 0xDFFF:
1389
+ # Combine surrogate pair into single code point
1390
+ combined_code_point = (
1391
+ 0x10000
1392
+ + ((char_code - 0xD800) << 10)
1393
+ + (next_char_code - 0xDC00)
1394
+ )
1395
+ code_points.append(combined_code_point)
1396
+ i += 2 # Skip both surrogate characters
1397
+ else:
1398
+ # High surrogate without low surrogate - treat as individual character
1399
+ code_points.append(char_code)
1400
+ i += 1
1401
+ else:
1402
+ # Regular BMP character or unpaired low surrogate
1403
+ code_points.append(char_code)
1404
+ i += 1
1405
+
1406
+ # Now iterate over Unicode code points
1407
+ i = 0
1408
+ while i < len(code_points):
1409
+ base_char_code = code_points[i]
1410
+
1411
+ # Check if the next code point is a variation selector
1412
+ variation_selector = None
1413
+ if i + 1 < len(code_points):
1414
+ next_code_point = code_points[i + 1]
1415
+ # Check for standardized variation selectors (U+FE00-U+FE0F)
1416
+ # or additional variation selectors (U+E0100-U+E01EF)
1417
+ if (
1418
+ 0xFE00 <= next_code_point <= 0xFE0F
1419
+ or 0xE0100 <= next_code_point <= 0xE01EF
1420
+ ):
1421
+ variation_selector = next_code_point
1422
+ i += 1 # Skip the variation selector in next iteration
1423
+
1424
+ yield (base_char_code, variation_selector)
1425
+ i += 1
1426
+
1427
+ def debug_variation_sequences(self):
1428
+ """
1429
+ Debug method to print information about parsed variation sequences.
1430
+
1431
+ Returns:
1432
+ str: Debug information about variation sequences
1433
+ """
1434
+ if not self._variation_sequences:
1435
+ return "No variation sequences found in font"
1436
+
1437
+ debug_info = [f"Found {len(self._variation_sequences)} variation sequences:"]
1438
+ for (base_char, vs), glyph_id in self._variation_sequences.items():
1439
+ try:
1440
+ base_char_str = (
1441
+ chr(base_char) if isinstance(base_char, int) else str(base_char)
1442
+ )
1443
+ vs_str = f"U+{vs:04X}" if vs else "None"
1444
+ debug_info.append(
1445
+ f" {base_char_str} (U+{base_char:04X}) + {vs_str} -> glyph {glyph_id}"
1446
+ )
1447
+ except (ValueError, TypeError):
1448
+ debug_info.append(f" {base_char} + {vs} -> glyph {glyph_id}")
1449
+
1450
+ return "\n".join(debug_info)
1451
+
1452
+ def test_variation_sequence_lookup(self, base_char, variation_selector):
1453
+ """
1454
+ Test method to check if a specific variation sequence is supported.
1455
+
1456
+ Args:
1457
+ base_char (str): The base character
1458
+ variation_selector (int): Unicode code point of variation selector
1459
+
1460
+ Returns:
1461
+ dict: Information about the lookup result
1462
+ """
1463
+ base_char_code = ord(base_char) if isinstance(base_char, str) else base_char
1464
+ vs_key = (base_char_code, variation_selector)
1465
+
1466
+ regular_glyph = self._character_map.get(base_char, 0)
1467
+ variation_glyph = self.lookup_glyph_with_variation(
1468
+ base_char, variation_selector
1469
+ )
1470
+
1471
+ return {
1472
+ "base_char": base_char,
1473
+ "base_char_code": f"U+{base_char_code:04X}",
1474
+ "variation_selector": f"U+{variation_selector:04X}",
1475
+ "regular_glyph_id": regular_glyph,
1476
+ "variation_glyph_id": variation_glyph,
1477
+ "has_variation": vs_key in self._variation_sequences,
1478
+ "uses_different_glyph": regular_glyph != variation_glyph,
1479
+ }