mkv-episode-matcher 0.1.13__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mkv-episode-matcher might be problematic. Click here for more details.

@@ -49,8 +49,6 @@
49
49
 
50
50
  import re
51
51
 
52
- from tld import get_tld
53
-
54
52
  from Libraries.SubZero.dictionaries.data import data
55
53
  from Libraries.SubZero.SubZero import (
56
54
  MultipleLineProcessor,
@@ -60,6 +58,7 @@ from Libraries.SubZero.SubZero import (
60
58
  SubtitleTextModification,
61
59
  WholeLineProcessor,
62
60
  )
61
+ from tld import get_tld
63
62
 
64
63
 
65
64
  class CommonFixes(SubtitleTextModification):
@@ -72,105 +71,134 @@ class CommonFixes(SubtitleTextModification):
72
71
 
73
72
  processors = [
74
73
  # normalize hyphens
75
- NReProcessor(re.compile(r'(?u)([‑‐﹘﹣])'), "-", name="CM_hyphens"),
76
-
74
+ NReProcessor(re.compile(r"(?u)([‑‐﹘﹣])"), "-", name="CM_hyphens"),
77
75
  # -- = em dash
78
- NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), r"\1—", name="CM_multidash"),
79
-
76
+ NReProcessor(
77
+ re.compile(r"(?u)(\w|\b|\s|^)(-\s?-{1,2})"), r"\1—", name="CM_multidash"
78
+ ),
80
79
  # line = _/-/\s
81
- NReProcessor(re.compile(r'(?u)(^\W*[-_.:<>~"\']+\W*$)'), "", name="CM_non_word_only"),
82
-
80
+ NReProcessor(
81
+ re.compile(r'(?u)(^\W*[-_.:<>~"\']+\W*$)'), "", name="CM_non_word_only"
82
+ ),
83
83
  # remove >>
84
- NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
85
-
84
+ NReProcessor(re.compile(r"(?u)^\s?>>\s*"), "", name="CM_leading_crocodiles"),
86
85
  # line = : text
87
- NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"),
88
-
86
+ NReProcessor(
87
+ re.compile(r"(?u)(^\W*:\s*(?=\w+))"), "", name="CM_empty_colon_start"
88
+ ),
89
89
  # fix music symbols
90
- NReProcessor(re.compile(r'(?u)(^[-\s>~]*[*#¶]+\s+)|(\s*[*#¶]+\s*$)'),
91
- lambda x: "♪ " if x.group(1) else " ♪",
92
- name="CM_music_symbols"),
93
-
90
+ NReProcessor(
91
+ re.compile(r"(?u)(^[-\s>~]*[*#¶]+\s+)|(\s*[*#¶]+\s*$)"),
92
+ lambda x: "" if x.group(1) else " ♪",
93
+ name="CM_music_symbols",
94
+ ),
94
95
  # '' = "
95
- NReProcessor(re.compile(r'(?u)([\'’ʼ❜‘‛][\'’ʼ❜‘‛]+)'), '"', name="CM_double_apostrophe"),
96
-
96
+ NReProcessor(
97
+ re.compile(r"(?u)([\'’ʼ❜‘‛][\'’ʼ❜‘‛]+)"), '"', name="CM_double_apostrophe"
98
+ ),
97
99
  # double quotes instead of single quotes inside words
98
- NReProcessor(re.compile(r'(?u)([A-zÀ-ž])"([A-zÀ-ž])'), r"\1'\2", name="CM_double_as_single"),
99
-
100
+ NReProcessor(
101
+ re.compile(r'(?u)([A-zÀ-ž])"([A-zÀ-ž])'),
102
+ r"\1'\2",
103
+ name="CM_double_as_single",
104
+ ),
100
105
  # normalize quotes
101
- NReProcessor(re.compile(r'(?u)(\s*["”“‟„])\s*(["”“‟„]["”“‟„\s]*)'),
102
- lambda match: '"' + (" " if match.group(2).endswith(" ") else ""),
103
- name="CM_normalize_quotes"),
104
-
106
+ NReProcessor(
107
+ re.compile(r'(?u)(\s*["”“‟„])\s*(["”“‟„]["”“‟„\s]*)'),
108
+ lambda match: '"' + (" " if match.group(2).endswith(" ") else ""),
109
+ name="CM_normalize_quotes",
110
+ ),
105
111
  # normalize single quotes
106
- NReProcessor(re.compile(r'(?u)([\'’ʼ❜‘‛])'), "'", name="CM_normalize_squotes"),
107
-
112
+ NReProcessor(re.compile(r"(?u)([\'’ʼ❜‘‛])"), "'", name="CM_normalize_squotes"),
108
113
  # remove leading ...
109
- NReProcessor(re.compile(r'(?u)^\.\.\.[\s]*'), "", name="CM_leading_ellipsis"),
110
-
114
+ NReProcessor(re.compile(r"(?u)^\.\.\.[\s]*"), "", name="CM_leading_ellipsis"),
111
115
  # remove "downloaded from" tags
112
- NReProcessor(re.compile(r'(?ui).+downloaded\s+from.+'), "", name="CM_crap"),
113
-
116
+ NReProcessor(re.compile(r"(?ui).+downloaded\s+from.+"), "", name="CM_crap"),
114
117
  # no space after ellipsis
115
- NReProcessor(re.compile(r'(?u)\.\.\.(?![\s.,!?\'"])(?!$)'), "... ", name="CM_ellipsis_no_space"),
116
-
118
+ NReProcessor(
119
+ re.compile(r'(?u)\.\.\.(?![\s.,!?\'"])(?!$)'),
120
+ "... ",
121
+ name="CM_ellipsis_no_space",
122
+ ),
117
123
  # no space before spaced ellipsis
118
- NReProcessor(re.compile(r'(?u)(?<=[^\s])(?<!\s)\. \. \.'), " . . .", name="CM_ellipsis_no_space2"),
119
-
124
+ NReProcessor(
125
+ re.compile(r"(?u)(?<=[^\s])(?<!\s)\. \. \."),
126
+ " . . .",
127
+ name="CM_ellipsis_no_space2",
128
+ ),
120
129
  # multiple spaces
121
- NReProcessor(re.compile(r'(?u)[\s]{2,}'), " ", name="CM_multiple_spaces"),
122
-
130
+ NReProcessor(re.compile(r"(?u)[\s]{2,}"), " ", name="CM_multiple_spaces"),
123
131
  # more than 3 dots
124
- NReProcessor(re.compile(r'(?u)\.{3,}'), "...", name="CM_dots"),
125
-
132
+ NReProcessor(re.compile(r"(?u)\.{3,}"), "...", name="CM_dots"),
126
133
  # no space after starting dash
127
- NReProcessor(re.compile(r'(?u)^-(?![\s-])'), "- ", name="CM_dash_space"),
128
-
134
+ NReProcessor(re.compile(r"(?u)^-(?![\s-])"), "- ", name="CM_dash_space"),
129
135
  # remove starting spaced dots (not matching ellipses)
130
- NReProcessor(re.compile(r'(?u)^(?!\s?(\.\s\.\s\.)|(\s?\.{3}))(?=\.+\s+)[\s.]*'), "",
131
- name="CM_starting_spacedots"),
132
-
136
+ NReProcessor(
137
+ re.compile(r"(?u)^(?!\s?(\.\s\.\s\.)|(\s?\.{3}))(?=\.+\s+)[\s.]*"),
138
+ "",
139
+ name="CM_starting_spacedots",
140
+ ),
133
141
  # space missing before doublequote
134
- ReProcessor(re.compile(r'(?u)(?<!^)(?<![\s(\["])("[^"]+")'), r' \1', name="CM_space_before_dblquote"),
135
-
142
+ ReProcessor(
143
+ re.compile(r'(?u)(?<!^)(?<![\s(\["])("[^"]+")'),
144
+ r" \1",
145
+ name="CM_space_before_dblquote",
146
+ ),
136
147
  # space missing after doublequote
137
- ReProcessor(re.compile(r'(?u)("[^"\s][^"]+")([^\s.,!?)\]]+)'), r"\1 \2", name="CM_space_after_dblquote"),
138
-
148
+ ReProcessor(
149
+ re.compile(r'(?u)("[^"\s][^"]+")([^\s.,!?)\]]+)'),
150
+ r"\1 \2",
151
+ name="CM_space_after_dblquote",
152
+ ),
139
153
  # space before ending doublequote?
140
-
141
154
  # replace uppercase I with lowercase L in words
142
- NReProcessor(re.compile(r'(?u)([a-zà-ž]+)(I+)'),
143
- lambda match: r'%s%s' % (match.group(1), "l" * len(match.group(2))),
144
- name="CM_uppercase_i_in_word"),
145
-
155
+ NReProcessor(
156
+ re.compile(r"(?u)([a-zà-ž]+)(I+)"),
157
+ lambda match: r"{}{}".format(match.group(1), "l" * len(match.group(2))),
158
+ name="CM_uppercase_i_in_word",
159
+ ),
146
160
  # fix spaces in numbers (allows for punctuation: ,.:' (comma/dot only fixed if after space, those may be
147
161
  # countdowns otherwise); don't break up ellipses
148
162
  NReProcessor(
149
- re.compile(r'(?u)(\b[0-9]+[0-9:\']*(?<!\.\.)\s+(?!\.\.)[0-9,.:\'\s]*(?=[0-9]+)[0-9,.:\'])'),
150
- lambda match: match.group(1).replace(" ", "") if match.group(1).count(" ") == 1 else match.group(1),
151
- name="CM_spaces_in_numbers"),
152
-
163
+ re.compile(
164
+ r"(?u)(\b[0-9]+[0-9:\']*(?<!\.\.)\s+(?!\.\.)[0-9,.:\'\s]*(?=[0-9]+)[0-9,.:\'])"
165
+ ),
166
+ lambda match: match.group(1).replace(" ", "")
167
+ if match.group(1).count(" ") == 1
168
+ else match.group(1),
169
+ name="CM_spaces_in_numbers",
170
+ ),
153
171
  # uppercase after dot
154
172
  # NReProcessor(re.compile(r'(?u)((?<!(?=\s*[A-ZÀ-Ž-_0-9.]\s*))(?:[^.\s])+\.\s+)([a-zà-ž])'),
155
173
  # lambda match: r'%s%s' % (match.group(1), match.group(2).upper()), name="CM_uppercase_after_dot"),
156
-
157
174
  # remove double interpunction
158
- NReProcessor(re.compile(r'(?u)(\s*[,!?])\s*([,.!?][,.!?\s]*)'),
159
- lambda match: match.group(1).strip() + (" " if match.group(2).endswith(" ") else ""),
160
- name="CM_double_interpunct"),
161
-
175
+ NReProcessor(
176
+ re.compile(r"(?u)(\s*[,!?])\s*([,.!?][,.!?\s]*)"),
177
+ lambda match: match.group(1).strip()
178
+ + (" " if match.group(2).endswith(" ") else ""),
179
+ name="CM_double_interpunct",
180
+ ),
162
181
  # remove spaces before punctuation; don't break spaced ellipses
163
- NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]| \.))'), r"\1", name="CM_punctuation_space"),
164
-
182
+ NReProcessor(
183
+ re.compile(r"(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]| \.))"),
184
+ r"\1",
185
+ name="CM_punctuation_space",
186
+ ),
165
187
  # add space after punctuation
166
- NReProcessor(re.compile(r'(?u)(([^\s]*)([!?.,:])([A-zÀ-ž]{2,}))'),
167
- lambda match: "%s%s %s" % (match.group(2), match.group(3), match.group(4)) if not get_tld(match.group(1), fail_silently=True, fix_protocol=True) else match.group(1),
168
- name="CM_punctuation_space2"),
169
-
188
+ NReProcessor(
189
+ re.compile(r"(?u)(([^\s]*)([!?.,:])([A-zÀ-ž]{2,}))"),
190
+ lambda match: f"{match.group(2)}{match.group(3)} {match.group(4)}"
191
+ if not get_tld(match.group(1), fail_silently=True, fix_protocol=True)
192
+ else match.group(1),
193
+ name="CM_punctuation_space2",
194
+ ),
170
195
  # fix lowercase I in english
171
- NReProcessor(re.compile(r'(?u)(\b)i(\b)'), r"\1I\2", name="CM_EN_lowercase_i",
172
- # supported=lambda p: p.language == ENGLISH),
173
- ),
196
+ NReProcessor(
197
+ re.compile(r"(?u)(\b)i(\b)"),
198
+ r"\1I\2",
199
+ name="CM_EN_lowercase_i",
200
+ # supported=lambda p: p.language == ENGLISH),
201
+ ),
174
202
  ]
175
203
 
176
204
 
@@ -200,16 +228,33 @@ class FixOCR(SubtitleTextModification):
200
228
  return [
201
229
  # remove broken HI tag colons (ANNOUNCER'., ". instead of :) after at least 3 uppercase chars
202
230
  # don't modify stuff inside quotes
203
- NReProcessor(re.compile(r'(?u)(^[^"\'’ʼ❜‘‛”“‟„]*(?<=[A-ZÀ-Ž]{3})[A-ZÀ-Ž-_\s0-9]+)'
204
- r'(["\'’ʼ❜‘‛”“‟„]*[.,‚،⹁、;]+)(\s*)(?!["\'’ʼ❜‘‛”“‟„])'),
205
- r"\1:\3", name="OCR_fix_HI_colons"),
231
+ NReProcessor(
232
+ re.compile(
233
+ r'(?u)(^[^"\'’ʼ❜‘‛”“‟„]*(?<=[A-ZÀ-Ž]{3})[A-ZÀ-Ž-_\s0-9]+)'
234
+ r'(["\'’ʼ❜‘‛”“‟„]*[.,‚،⹁、;]+)(\s*)(?!["\'’ʼ❜‘‛”“‟„])'
235
+ ),
236
+ r"\1:\3",
237
+ name="OCR_fix_HI_colons",
238
+ ),
206
239
  # fix F'bla
207
- NReProcessor(re.compile(r'(?u)(\bF)(\')([A-zÀ-ž]*\b)'), r"\1\3", name="OCR_fix_F"),
240
+ NReProcessor(
241
+ re.compile(r"(?u)(\bF)(\')([A-zÀ-ž]*\b)"), r"\1\3", name="OCR_fix_F"
242
+ ),
208
243
  WholeLineProcessor(self.data_dict["WholeLines"], name="OCR_replace_line"),
209
- MultipleWordReProcessor(self.data_dict["WholeWords"], name="OCR_replace_word"),
210
- MultipleWordReProcessor(self.data_dict["BeginLines"], name="OCR_replace_beginline"),
211
- MultipleWordReProcessor(self.data_dict["EndLines"], name="OCR_replace_endline"),
212
- MultipleWordReProcessor(self.data_dict["PartialLines"], name="OCR_replace_partialline"),
213
- MultipleLineProcessor(self.data_dict["PartialWordsAlways"], name="OCR_replace_partialwordsalways")
244
+ MultipleWordReProcessor(
245
+ self.data_dict["WholeWords"], name="OCR_replace_word"
246
+ ),
247
+ MultipleWordReProcessor(
248
+ self.data_dict["BeginLines"], name="OCR_replace_beginline"
249
+ ),
250
+ MultipleWordReProcessor(
251
+ self.data_dict["EndLines"], name="OCR_replace_endline"
252
+ ),
253
+ MultipleWordReProcessor(
254
+ self.data_dict["PartialLines"], name="OCR_replace_partialline"
255
+ ),
256
+ MultipleLineProcessor(
257
+ self.data_dict["PartialWordsAlways"],
258
+ name="OCR_replace_partialwordsalways",
259
+ ),
214
260
  ]
215
-
@@ -3,7 +3,6 @@ from PIL import Image
3
3
 
4
4
 
5
5
  def read_rle_bytes(ods_bytes):
6
-
7
6
  pixels = []
8
7
  line_builder = []
9
8
 
@@ -41,12 +40,13 @@ def read_rle_bytes(ods_bytes):
41
40
  i += incr
42
41
 
43
42
  if line_builder:
44
- print(f'Probably an error; hanging pixels: {line_builder}')
43
+ print(f"Probably an error; hanging pixels: {line_builder}")
45
44
 
46
45
  return pixels
47
46
 
47
+
48
48
  def ycbcr2rgb(ar):
49
- xform = np.array([[1, 0, 1.402], [1, -0.34414, -.71414], [1, 1.772, 0]])
49
+ xform = np.array([[1, 0, 1.402], [1, -0.34414, -0.71414], [1, 1.772, 0]])
50
50
  rgb = ar.astype(float)
51
51
  # Subtracting by 128 the R and G channels
52
52
  rgb[:, [1, 2]] -= 128
@@ -58,6 +58,7 @@ def ycbcr2rgb(ar):
58
58
  np.putmask(rgb, rgb < 0, 0)
59
59
  return np.uint8(rgb)
60
60
 
61
+
61
62
  def px_rgb_a(ods, pds, swap):
62
63
  px = read_rle_bytes(ods.img_data)
63
64
  px = np.array([[255] * (ods.width - len(l)) + l for l in px], dtype=np.uint8)
@@ -78,10 +79,11 @@ def px_rgb_a(ods, pds, swap):
78
79
 
79
80
  return px, rgb, a
80
81
 
82
+
81
83
  def make_image(ods, pds, swap=False):
82
84
  px, rgb, a = px_rgb_a(ods, pds, swap)
83
- alpha = Image.fromarray(a, mode='L')
84
- img = Image.fromarray(px, mode='P')
85
+ alpha = Image.fromarray(a, mode="L")
86
+ img = Image.fromarray(px, mode="P")
85
87
  img.putalpha(alpha)
86
88
  img.putpalette(rgb)
87
89
  return img
@@ -6,21 +6,36 @@ from datetime import datetime, timedelta
6
6
 
7
7
  import pytesseract
8
8
  from imagemaker import make_image
9
+ from Libraries.SubZero.post_processing import CommonFixes, FixOCR
9
10
  from pgsreader import PGSReader
10
11
  from PIL import Image, ImageOps
11
12
 
12
- from Libraries.SubZero.post_processing import CommonFixes, FixOCR
13
-
14
- parser = argparse.ArgumentParser(description='Convert PGS subtitles to SubRip format.')
15
-
16
- parser.add_argument('input', type=str, help="The input file (a .sup file).")
17
- parser.add_argument('--output', type=str, help="The output file (a .srt file).")
18
- parser.add_argument('--oem', type=int, help="The OCR Engine Mode to use (Default: 1).", default=1, choices=range(4))
19
- parser.add_argument('--language', type=str, help="The language to use (Default: eng).", default='eng')
20
- parser.add_argument('--fix_common', help='Fixes common whitespace/punctuation issues.',
21
- dest='fix_common', action='store_true')
22
- parser.add_argument('--fix_common_ocr', help='Fixes common OCR issues for supported languages.',
23
- dest='fix_ocr', action='store_true')
13
+ parser = argparse.ArgumentParser(description="Convert PGS subtitles to SubRip format.")
14
+
15
+ parser.add_argument("input", type=str, help="The input file (a .sup file).")
16
+ parser.add_argument("--output", type=str, help="The output file (a .srt file).")
17
+ parser.add_argument(
18
+ "--oem",
19
+ type=int,
20
+ help="The OCR Engine Mode to use (Default: 1).",
21
+ default=1,
22
+ choices=range(4),
23
+ )
24
+ parser.add_argument(
25
+ "--language", type=str, help="The language to use (Default: eng).", default="eng"
26
+ )
27
+ parser.add_argument(
28
+ "--fix_common",
29
+ help="Fixes common whitespace/punctuation issues.",
30
+ dest="fix_common",
31
+ action="store_true",
32
+ )
33
+ parser.add_argument(
34
+ "--fix_common_ocr",
35
+ help="Fixes common OCR issues for supported languages.",
36
+ dest="fix_ocr",
37
+ action="store_true",
38
+ )
24
39
 
25
40
  args = parser.parse_args()
26
41
 
@@ -46,7 +61,11 @@ tesseract_config = f"-c tessedit_char_blacklist=[] --psm 6 --oem {args.oem}"
46
61
  # If an output file for the subrip output is provided, use that.
47
62
  # Otherwise remove the ".sup" extension from the input and append
48
63
  # ".srt".
49
- output_file = args.output if args.output is not None else (args.input.replace('.sup', '') + '.srt')
64
+ output_file = (
65
+ args.output
66
+ if args.output is not None
67
+ else (args.input.replace(".sup", "") + ".srt")
68
+ )
50
69
 
51
70
  # SubRip output
52
71
  output = ""
@@ -66,7 +85,7 @@ for ds in pgs.iter_displaysets():
66
85
 
67
86
  if pds and ods:
68
87
  # Create and show the bitmap image and convert it to RGBA
69
- src = make_image(ods, pds).convert('RGBA')
88
+ src = make_image(ods, pds).convert("RGBA")
70
89
 
71
90
  # Create grayscale image with black background
72
91
  img = Image.new("L", src.size, "BLACK")
@@ -76,13 +95,15 @@ for ds in pgs.iter_displaysets():
76
95
  img = ImageOps.invert(img)
77
96
 
78
97
  # Parse the image with tesesract
79
- text = pytesseract.image_to_string(img, lang=tesseract_lang, config=tesseract_config).strip()
98
+ text = pytesseract.image_to_string(
99
+ img, lang=tesseract_lang, config=tesseract_config
100
+ ).strip()
80
101
 
81
102
  # Replace "|" with "I"
82
103
  # Works better than blacklisting "|" in Tesseract,
83
104
  # which results in I becoming "!" "i" and "1"
84
- text = re.sub(r'[|/\\]', 'I', text)
85
- text = re.sub(r'[_]', 'L', text)
105
+ text = re.sub(r"[|/\\]", "I", text)
106
+ text = re.sub(r"[_]", "L", text)
86
107
 
87
108
  if args.fix_common:
88
109
  text = fix_common.process(text)
@@ -100,11 +121,19 @@ for ds in pgs.iter_displaysets():
100
121
  end = datetime.fromtimestamp(pcs.presentation_timestamp / 1000)
101
122
  end = end + timedelta(hours=-1)
102
123
 
103
- if isinstance(start, datetime) and isinstance(end, datetime) and len(text):
124
+ if (
125
+ isinstance(start, datetime)
126
+ and isinstance(end, datetime)
127
+ and len(text)
128
+ ):
104
129
  si = si + 1
105
130
  sub_output = str(si) + "\n"
106
- sub_output += start.strftime("%H:%M:%S,%f")[0:12] + \
107
- " --> " + end.strftime("%H:%M:%S,%f")[0:12] + "\n"
131
+ sub_output += (
132
+ start.strftime("%H:%M:%S,%f")[0:12]
133
+ + " --> "
134
+ + end.strftime("%H:%M:%S,%f")[0:12]
135
+ + "\n"
136
+ )
108
137
  sub_output += text + "\n\n"
109
138
 
110
139
  output += sub_output
@@ -4,27 +4,26 @@ from collections import namedtuple
4
4
  from os.path import split as pathsplit
5
5
 
6
6
  # Constants for Segments
7
- PDS = int('0x14', 16)
8
- ODS = int('0x15', 16)
9
- PCS = int('0x16', 16)
10
- WDS = int('0x17', 16)
11
- END = int('0x80', 16)
7
+ PDS = int("0x14", 16)
8
+ ODS = int("0x15", 16)
9
+ PCS = int("0x16", 16)
10
+ WDS = int("0x17", 16)
11
+ END = int("0x80", 16)
12
12
 
13
13
  # Named tuple access for static PDS palettes
14
- Palette = namedtuple('Palette', "Y Cr Cb Alpha")
14
+ Palette = namedtuple("Palette", "Y Cr Cb Alpha")
15
+
15
16
 
16
17
  class InvalidSegmentError(Exception):
17
- '''Raised when a segment does not match PGS specification'''
18
+ """Raised when a segment does not match PGS specification"""
18
19
 
19
20
 
20
21
  class PGSReader:
21
-
22
22
  def __init__(self, filepath):
23
23
  self.filedir, self.file = pathsplit(filepath)
24
- with open(filepath, 'rb') as f:
24
+ with open(filepath, "rb") as f:
25
25
  self.bytes = f.read()
26
26
 
27
-
28
27
  def make_segment(self, bytes_):
29
28
  cls = SEGMENT_TYPE[bytes_[10]]
30
29
  return cls(bytes_)
@@ -40,35 +39,29 @@ class PGSReader:
40
39
  ds = []
41
40
  for s in self.iter_segments():
42
41
  ds.append(s)
43
- if s.type == 'END':
42
+ if s.type == "END":
44
43
  yield DisplaySet(ds)
45
44
  ds = []
46
45
 
47
46
  @property
48
47
  def segments(self):
49
- if not hasattr(self, '_segments'):
48
+ if not hasattr(self, "_segments"):
50
49
  self._segments = list(self.iter_segments())
51
50
  return self._segments
52
51
 
53
52
  @property
54
53
  def displaysets(self):
55
- if not hasattr(self, '_displaysets'):
54
+ if not hasattr(self, "_displaysets"):
56
55
  self._displaysets = list(self.iter_displaysets())
57
56
  return self._displaysets
58
57
 
59
- class BaseSegment:
60
58
 
61
- SEGMENT = {
62
- PDS: 'PDS',
63
- ODS: 'ODS',
64
- PCS: 'PCS',
65
- WDS: 'WDS',
66
- END: 'END'
67
- }
59
+ class BaseSegment:
60
+ SEGMENT = {PDS: "PDS", ODS: "ODS", PCS: "PCS", WDS: "WDS", END: "END"}
68
61
 
69
62
  def __init__(self, bytes_):
70
63
  self.bytes = bytes_
71
- if bytes_[:2] != b'PG':
64
+ if bytes_[:2] != b"PG":
72
65
  raise InvalidSegmentError
73
66
  self.pts = int(bytes_[2:6].hex(), base=16) / 90
74
67
  self.dts = int(bytes_[6:10].hex(), base=16) / 90
@@ -80,18 +73,20 @@ class BaseSegment:
80
73
  return self.size
81
74
 
82
75
  @property
83
- def presentation_timestamp(self): return self.pts
76
+ def presentation_timestamp(self):
77
+ return self.pts
84
78
 
85
79
  @property
86
- def decoding_timestamp(self): return self.dts
80
+ def decoding_timestamp(self):
81
+ return self.dts
87
82
 
88
83
  @property
89
- def segment_type(self): return self.type
84
+ def segment_type(self):
85
+ return self.type
90
86
 
91
- class PresentationCompositionSegment(BaseSegment):
92
87
 
88
+ class PresentationCompositionSegment(BaseSegment):
93
89
  class CompositionObject:
94
-
95
90
  def __init__(self, bytes_):
96
91
  self.bytes = bytes_
97
92
  self.object_id = int(bytes_[0:2].hex(), base=16)
@@ -106,9 +101,9 @@ class PresentationCompositionSegment(BaseSegment):
106
101
  self.crop_height = int(bytes_[14:16].hex(), base=16)
107
102
 
108
103
  STATE = {
109
- int('0x00', base=16): 'Normal',
110
- int('0x40', base=16): 'Acquisition Point',
111
- int('0x80', base=16): 'Epoch Start'
104
+ int("0x00", base=16): "Normal",
105
+ int("0x40", base=16): "Acquisition Point",
106
+ int("0x80", base=16): "Epoch Start",
112
107
  }
113
108
 
114
109
  def __init__(self, bytes_):
@@ -123,18 +118,22 @@ class PresentationCompositionSegment(BaseSegment):
123
118
  self._num_comps = self.data[10]
124
119
 
125
120
  @property
126
- def composition_number(self): return self._num
121
+ def composition_number(self):
122
+ return self._num
127
123
 
128
124
  @property
129
- def composition_state(self): return self._state
125
+ def composition_state(self):
126
+ return self._state
130
127
 
131
128
  @property
132
129
  def composition_objects(self):
133
- if not hasattr(self, '_composition_objects'):
130
+ if not hasattr(self, "_composition_objects"):
134
131
  self._composition_objects = self.get_composition_objects()
135
132
  if len(self._composition_objects) != self._num_comps:
136
- print('Warning: Number of composition objects asserted '
137
- 'does not match the amount found.')
133
+ print(
134
+ "Warning: Number of composition objects asserted "
135
+ "does not match the amount found."
136
+ )
138
137
  return self._composition_objects
139
138
 
140
139
  def get_composition_objects(self):
@@ -146,8 +145,8 @@ class PresentationCompositionSegment(BaseSegment):
146
145
  bytes_ = bytes_[length:]
147
146
  return comps
148
147
 
149
- class WindowDefinitionSegment(BaseSegment):
150
148
 
149
+ class WindowDefinitionSegment(BaseSegment):
151
150
  def __init__(self, bytes_):
152
151
  BaseSegment.__init__(self, bytes_)
153
152
  self.num_windows = self.data[0]
@@ -157,8 +156,8 @@ class WindowDefinitionSegment(BaseSegment):
157
156
  self.width = int(self.data[6:8].hex(), base=16)
158
157
  self.height = int(self.data[8:10].hex(), base=16)
159
158
 
160
- class PaletteDefinitionSegment(BaseSegment):
161
159
 
160
+ class PaletteDefinitionSegment(BaseSegment):
162
161
  def __init__(self, bytes_):
163
162
  BaseSegment.__init__(self, bytes_)
164
163
  self.palette_id = self.data[0]
@@ -168,14 +167,14 @@ class PaletteDefinitionSegment(BaseSegment):
168
167
  # Iterate entries. Explode the 5 bytes into namedtuple Palette. Must be exploded
169
168
  for entry in range(len(self.data[2:]) // 5):
170
169
  i = 2 + entry * 5
171
- self.palette[self.data[i]] = Palette(*self.data[i + 1:i + 5])
170
+ self.palette[self.data[i]] = Palette(*self.data[i + 1 : i + 5])
172
171
 
173
- class ObjectDefinitionSegment(BaseSegment):
174
172
 
173
+ class ObjectDefinitionSegment(BaseSegment):
175
174
  SEQUENCE = {
176
- int('0x40', base=16): 'Last',
177
- int('0x80', base=16): 'First',
178
- int('0xc0', base=16): 'First and last'
175
+ int("0x40", base=16): "Last",
176
+ int("0x80", base=16): "First",
177
+ int("0xc0", base=16): "First and last",
179
178
  }
180
179
 
181
180
  def __init__(self, bytes_):
@@ -188,13 +187,15 @@ class ObjectDefinitionSegment(BaseSegment):
188
187
  self.height = int(self.data[9:11].hex(), base=16)
189
188
  self.img_data = self.data[11:]
190
189
  if len(self.img_data) != self.data_len - 4:
191
- print('Warning: Image data length asserted does not match the '
192
- 'length found.')
190
+ print(
191
+ "Warning: Image data length asserted does not match the length found."
192
+ )
193
193
 
194
- class EndSegment(BaseSegment):
195
194
 
195
+ class EndSegment(BaseSegment):
196
196
  @property
197
- def is_end(self): return True
197
+ def is_end(self):
198
+ return True
198
199
 
199
200
 
200
201
  SEGMENT_TYPE = {
@@ -202,20 +203,23 @@ SEGMENT_TYPE = {
202
203
  ODS: ObjectDefinitionSegment,
203
204
  PCS: PresentationCompositionSegment,
204
205
  WDS: WindowDefinitionSegment,
205
- END: EndSegment
206
+ END: EndSegment,
206
207
  }
207
208
 
208
- class DisplaySet:
209
209
 
210
+ class DisplaySet:
210
211
  def __init__(self, segments):
211
212
  self.segments = segments
212
213
  self.segment_types = [s.type for s in segments]
213
- self.has_image = 'ODS' in self.segment_types
214
+ self.has_image = "ODS" in self.segment_types
215
+
214
216
 
215
217
  def segment_by_type_getter(type_):
216
218
  def f(self):
217
219
  return [s for s in self.segments if s.type == type_]
220
+
218
221
  return f
219
222
 
223
+
220
224
  for type_ in BaseSegment.SEGMENT.values():
221
225
  setattr(DisplaySet, type_.lower(), property(segment_by_type_getter(type_)))