PyPI - mkv-episode-matcher - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

mkv-episode-matcher 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mkv-episode-matcher might be problematic. Click here for more details.

Files changed (17) hide show

mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py ADDED Viewed

@@ -0,0 +1,215 @@
+# MIT License
+#
+# Copyright (c) 2018 Hannes Tismer
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+#
+# Copyright for portions of project Sub-Zero are held by Bram Walet, 2014 as part of project Subliminal.bundle.
+# The original license is supplied below.
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2014 Bram Walet
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+import re
+from tld import get_tld
+from Libraries.SubZero.dictionaries.data import data
+from Libraries.SubZero.SubZero import (
+    MultipleLineProcessor,
+    MultipleWordReProcessor,
+    NReProcessor,
+    ReProcessor,
+    SubtitleTextModification,
+    WholeLineProcessor,
+)
+class CommonFixes(SubtitleTextModification):
+    identifier = "common"
+    description = "Basic common fixes"
+    exclusive = True
+    order = 40
+    long_description = "Fix common and whitespace/punctuation issues in subtitles"
+    processors = [
+        # normalize hyphens
+        NReProcessor(re.compile(r'(?u)([‑‐﹘﹣])'), "-", name="CM_hyphens"),
+        # -- = em dash
+        NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), r"\1—", name="CM_multidash"),
+        # line = _/-/\s
+        NReProcessor(re.compile(r'(?u)(^\W*[-_.:<>~"\']+\W*$)'), "", name="CM_non_word_only"),
+        # remove >>
+        NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
+        # line = : text
+        NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"),
+        # fix music symbols
+        NReProcessor(re.compile(r'(?u)(^[-\s>~]*[*#¶]+\s+)|(\s*[*#¶]+\s*$)'),
+                     lambda x: "♪ " if x.group(1) else " ♪",
+                     name="CM_music_symbols"),
+        # '' = "
+        NReProcessor(re.compile(r'(?u)([\'’ʼ❜‘‛][\'’ʼ❜‘‛]+)'), '"', name="CM_double_apostrophe"),
+        # double quotes instead of single quotes inside words
+        NReProcessor(re.compile(r'(?u)([A-zÀ-ž])"([A-zÀ-ž])'), r"\1'\2", name="CM_double_as_single"),
+        # normalize quotes
+        NReProcessor(re.compile(r'(?u)(\s*["”“‟„])\s*(["”“‟„]["”“‟„\s]*)'),
+                     lambda match: '"' + (" " if match.group(2).endswith(" ") else ""),
+                     name="CM_normalize_quotes"),
+        # normalize single quotes
+        NReProcessor(re.compile(r'(?u)([\'’ʼ❜‘‛])'), "'", name="CM_normalize_squotes"),
+        # remove leading ...
+        NReProcessor(re.compile(r'(?u)^\.\.\.[\s]*'), "", name="CM_leading_ellipsis"),
+        # remove "downloaded from" tags
+        NReProcessor(re.compile(r'(?ui).+downloaded\s+from.+'), "", name="CM_crap"),
+        # no space after ellipsis
+        NReProcessor(re.compile(r'(?u)\.\.\.(?![\s.,!?\'"])(?!$)'), "... ", name="CM_ellipsis_no_space"),
+        # no space before spaced ellipsis
+        NReProcessor(re.compile(r'(?u)(?<=[^\s])(?<!\s)\. \. \.'), " . . .", name="CM_ellipsis_no_space2"),
+        # multiple spaces
+        NReProcessor(re.compile(r'(?u)[\s]{2,}'), " ", name="CM_multiple_spaces"),
+        # more than 3 dots
+        NReProcessor(re.compile(r'(?u)\.{3,}'), "...", name="CM_dots"),
+        # no space after starting dash
+        NReProcessor(re.compile(r'(?u)^-(?![\s-])'), "- ", name="CM_dash_space"),
+        # remove starting spaced dots (not matching ellipses)
+        NReProcessor(re.compile(r'(?u)^(?!\s?(\.\s\.\s\.)|(\s?\.{3}))(?=\.+\s+)[\s.]*'), "",
+                     name="CM_starting_spacedots"),
+        # space missing before doublequote
+        ReProcessor(re.compile(r'(?u)(?<!^)(?<![\s(\["])("[^"]+")'), r' \1', name="CM_space_before_dblquote"),
+        # space missing after doublequote
+        ReProcessor(re.compile(r'(?u)("[^"\s][^"]+")([^\s.,!?)\]]+)'), r"\1 \2", name="CM_space_after_dblquote"),
+        # space before ending doublequote?
+        # replace uppercase I with lowercase L in words
+        NReProcessor(re.compile(r'(?u)([a-zà-ž]+)(I+)'),
+                     lambda match: r'%s%s' % (match.group(1), "l" * len(match.group(2))),
+                     name="CM_uppercase_i_in_word"),
+        # fix spaces in numbers (allows for punctuation: ,.:' (comma/dot only fixed if after space, those may be
+        # countdowns otherwise); don't break up ellipses
+        NReProcessor(
+            re.compile(r'(?u)(\b[0-9]+[0-9:\']*(?<!\.\.)\s+(?!\.\.)[0-9,.:\'\s]*(?=[0-9]+)[0-9,.:\'])'),
+            lambda match: match.group(1).replace(" ", "") if match.group(1).count(" ") == 1 else match.group(1),
+            name="CM_spaces_in_numbers"),
+        # uppercase after dot
+        # NReProcessor(re.compile(r'(?u)((?<!(?=\s*[A-ZÀ-Ž-_0-9.]\s*))(?:[^.\s])+\.\s+)([a-zà-ž])'),
+        #              lambda match: r'%s%s' % (match.group(1), match.group(2).upper()), name="CM_uppercase_after_dot"),
+        # remove double interpunction
+        NReProcessor(re.compile(r'(?u)(\s*[,!?])\s*([,.!?][,.!?\s]*)'),
+                     lambda match: match.group(1).strip() + (" " if match.group(2).endswith(" ") else ""),
+                     name="CM_double_interpunct"),
+        # remove spaces before punctuation; don't break spaced ellipses
+        NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]| \.))'), r"\1", name="CM_punctuation_space"),
+        # add space after punctuation
+        NReProcessor(re.compile(r'(?u)(([^\s]*)([!?.,:])([A-zÀ-ž]{2,}))'),
+                     lambda match: "%s%s %s" % (match.group(2), match.group(3), match.group(4)) if not get_tld(match.group(1), fail_silently=True, fix_protocol=True) else match.group(1),
+                     name="CM_punctuation_space2"),
+        # fix lowercase I in english
+        NReProcessor(re.compile(r'(?u)(\b)i(\b)'), r"\1I\2", name="CM_EN_lowercase_i",
+                     # supported=lambda p: p.language == ENGLISH),
+                     ),
+    ]
+class FixOCR(SubtitleTextModification):
+    identifier = "OCR_fixes"
+    description = "Fix common OCR issues"
+    exclusive = True
+    order = 10
+    data_dict = None
+    long_description = "Fix issues that happen when a subtitle gets converted from bitmap to text through OCR"
+    def __init__(self, language):
+        super(FixOCR, self).__init__()
+        data_dict = data.get(language)
+        if not data_dict:
+            # logger.debug("No SnR-data available for language %s", parent.language)
+            return
+        self.data_dict = data_dict
+        self.processors = self.get_processors()
+    def get_processors(self):
+        if not self.data_dict:
+            return []
+        return [
+            # remove broken HI tag colons (ANNOUNCER'., ". instead of :) after at least 3 uppercase chars
+            # don't modify stuff inside quotes
+            NReProcessor(re.compile(r'(?u)(^[^"\'’ʼ❜‘‛”“‟„]*(?<=[A-ZÀ-Ž]{3})[A-ZÀ-Ž-_\s0-9]+)'
+                                    r'(["\'’ʼ❜‘‛”“‟„]*[.,‚،⹁、;]+)(\s*)(?!["\'’ʼ❜‘‛”“‟„])'),
+                         r"\1:\3", name="OCR_fix_HI_colons"),
+            # fix F'bla
+            NReProcessor(re.compile(r'(?u)(\bF)(\')([A-zÀ-ž]*\b)'), r"\1\3", name="OCR_fix_F"),
+            WholeLineProcessor(self.data_dict["WholeLines"], name="OCR_replace_line"),
+            MultipleWordReProcessor(self.data_dict["WholeWords"], name="OCR_replace_word"),
+            MultipleWordReProcessor(self.data_dict["BeginLines"], name="OCR_replace_beginline"),
+            MultipleWordReProcessor(self.data_dict["EndLines"], name="OCR_replace_endline"),
+            MultipleWordReProcessor(self.data_dict["PartialLines"], name="OCR_replace_partialline"),
+            MultipleLineProcessor(self.data_dict["PartialWordsAlways"], name="OCR_replace_partialwordsalways")
+        ]

mkv_episode_matcher/libraries/pgs2srt/README.md ADDED Viewed

@@ -0,0 +1,26 @@
+# pgs2srt
+Uses [pgsreader](https://github.com/EzraBC/pgsreader) and [pyteseract](https://pypi.org/project/pytesseract/) to convert image based pgs subtitles files (.sup) to text based subrip (.srt) files.
+## Requirements
+Python3, pip3, and Tesseract
+## Installation
+* Run ```git clone https://github.com/PimvanderLoos/pgs2srt.git```
+* Inside the repo folder, run ```pip3 install -r requirements.txt```
+* In your .bashrc or .zshrc add ```alias pgs2srt='<absolute path to repo>/pgs2srt.py'```
+## How to run
+    pgs2srt <pgs filename>.sup
+## Improving accuracy
+On Debian and Ubuntu, the default trained models files for Tesseract are from the [fast](https://github.com/tesseract-ocr/tessdata_fast) set. While these are a bit faster than other options, this comes at the cost of accuracy. If you want higher accuracy, I'd recommend using either the [legacy](https://github.com/tesseract-ocr/tessdata) or the [best](https://github.com/tesseract-ocr/tessdata_best) trained models. Note that the fast and best options only support the LSTM OCR Engine Mode (oem 1).
+## Caveats
+This is in no way a perfect converter, and tesseract will make incorrect interpretations of characters. Extremely alpha, issues, pull requests and suggestions welcome!
+## Credits
+This project uses the common + OCR fixes developed by [Sub-Zero.bundle](https://github.com/pannal/Sub-Zero.bundle).

mkv_episode_matcher/libraries/pgs2srt/__init__.py ADDED Viewed

File without changes

mkv_episode_matcher/libraries/pgs2srt/imagemaker.py ADDED Viewed

@@ -0,0 +1,87 @@
+import numpy as np
+from PIL import Image
+def read_rle_bytes(ods_bytes):
+    pixels = []
+    line_builder = []
+    i = 0
+    while i < len(ods_bytes):
+        if ods_bytes[i]:
+            incr = 1
+            color = ods_bytes[i]
+            length = 1
+        else:
+            check = ods_bytes[i + 1]
+            if check == 0:
+                incr = 2
+                color = 0
+                length = 0
+                pixels.append(line_builder)
+                line_builder = []
+            elif check < 64:
+                incr = 2
+                color = 0
+                length = check
+            elif check < 128:
+                incr = 3
+                color = 0
+                length = ((check - 64) << 8) + ods_bytes[i + 2]
+            elif check < 192:
+                incr = 3
+                color = ods_bytes[i + 2]
+                length = check - 128
+            else:
+                incr = 4
+                color = ods_bytes[i + 3]
+                length = ((check - 192) << 8) + ods_bytes[i + 2]
+        line_builder.extend([color] * length)
+        i += incr
+    if line_builder:
+        print(f'Probably an error; hanging pixels: {line_builder}')
+    return pixels
+def ycbcr2rgb(ar):
+    xform = np.array([[1, 0, 1.402], [1, -0.34414, -.71414], [1, 1.772, 0]])
+    rgb = ar.astype(float)
+    # Subtracting by 128 the R and G channels
+    rgb[:, [1, 2]] -= 128
+    # .dot is multiplication of the matrices and xform.T is a transpose of the array axes
+    rgb = rgb.dot(xform.T)
+    # Makes any pixel value greater than 255 just be 255 (Max for RGB colorspace)
+    np.putmask(rgb, rgb > 255, 255)
+    # Sets any pixel value less than 0 to 0 (Min for RGB colorspace)
+    np.putmask(rgb, rgb < 0, 0)
+    return np.uint8(rgb)
+def px_rgb_a(ods, pds, swap):
+    px = read_rle_bytes(ods.img_data)
+    px = np.array([[255] * (ods.width - len(l)) + l for l in px], dtype=np.uint8)
+    # Extract the YCbCrA palette data, swapping channels if requested.
+    if swap:
+        ycbcr = np.array([(entry.Y, entry.Cb, entry.Cr) for entry in pds.palette])
+    else:
+        ycbcr = np.array([(entry.Y, entry.Cr, entry.Cb) for entry in pds.palette])
+    try:
+        rgb = ycbcr2rgb(ycbcr)
+    except AttributeError:
+        print("Error: The image is not in YCbCr format.")
+        exit(1)
+    # Separate the Alpha channel from the YCbCr palette data
+    a = [entry.Alpha for entry in pds.palette]
+    a = np.array([[a[x] for x in l] for l in px], dtype=np.uint8)
+    return px, rgb, a
+def make_image(ods, pds, swap=False):
+    px, rgb, a = px_rgb_a(ods, pds, swap)
+    alpha = Image.fromarray(a, mode='L')
+    img = Image.fromarray(px, mode='P')
+    img.putalpha(alpha)
+    img.putpalette(rgb)
+    return img

mkv_episode_matcher/libraries/pgs2srt/pgs2srt.py ADDED Viewed

@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+import argparse
+import re
+from datetime import datetime, timedelta
+import pytesseract
+from imagemaker import make_image
+from pgsreader import PGSReader
+from PIL import Image, ImageOps
+from Libraries.SubZero.post_processing import CommonFixes, FixOCR
+parser = argparse.ArgumentParser(description='Convert PGS subtitles to SubRip format.')
+parser.add_argument('input', type=str, help="The input file (a .sup file).")
+parser.add_argument('--output', type=str, help="The output file (a .srt file).")
+parser.add_argument('--oem', type=int, help="The OCR Engine Mode to use (Default: 1).", default=1, choices=range(4))
+parser.add_argument('--language', type=str, help="The language to use (Default: eng).", default='eng')
+parser.add_argument('--fix_common', help='Fixes common whitespace/punctuation issues.',
+                    dest='fix_common', action='store_true')
+parser.add_argument('--fix_common_ocr', help='Fixes common OCR issues for supported languages.',
+                    dest='fix_ocr', action='store_true')
+args = parser.parse_args()
+assert args.input is not None
+# Unescape escaped spaces
+file = args.input.replace("\\ ", " ")
+print(f"Parsing: {file}")
+# Load a PGS/SUP file.
+pgs = PGSReader(file)
+# Set index
+i = 0
+# Complete subtitle track index
+si = 0
+tesseract_lang = args.language
+tesseract_config = f"-c tessedit_char_blacklist=[] --psm 6 --oem {args.oem}"
+# If an output file for the subrip output is provided, use that.
+# Otherwise remove the ".sup" extension from the input and append
+# ".srt".
+output_file = args.output if args.output is not None else (args.input.replace('.sup', '') + '.srt')
+# SubRip output
+output = ""
+fix_common = CommonFixes() if args.fix_common else None
+fix_ocr = FixOCR(args.language) if args.fix_ocr else None
+# Iterate the pgs generator
+for ds in pgs.iter_displaysets():
+    try:
+        # If set has image, parse the image
+        if ds.has_image:
+            # Get Palette Display Segment
+            pds = ds.pds[0]
+            # Get Object Display Segment
+            ods = ds.ods[0]
+            if pds and ods:
+                # Create and show the bitmap image and convert it to RGBA
+                src = make_image(ods, pds).convert('RGBA')
+                # Create grayscale image with black background
+                img = Image.new("L", src.size, "BLACK")
+                # Paste the subtitle bitmap
+                img.paste(src, (0, 0), src)
+                # Invert images so the text is readable by Tesseract
+                img = ImageOps.invert(img)
+                # Parse the image with tesesract
+                text = pytesseract.image_to_string(img, lang=tesseract_lang, config=tesseract_config).strip()
+                # Replace "|" with "I"
+                # Works better than blacklisting "|" in Tesseract,
+                # which results in I becoming "!" "i" and "1"
+                text = re.sub(r'[|/\\]', 'I', text)
+                text = re.sub(r'[_]', 'L', text)
+                if args.fix_common:
+                    text = fix_common.process(text)
+                if args.fix_ocr:
+                    text = fix_ocr.modify(text)
+                start = datetime.fromtimestamp(ods.presentation_timestamp / 1000)
+                start = start + timedelta(hours=-1)
+        else:
+            # Get Presentation Composition Segment
+            pcs = ds.pcs[0]
+            if pcs:
+                end = datetime.fromtimestamp(pcs.presentation_timestamp / 1000)
+                end = end + timedelta(hours=-1)
+                if isinstance(start, datetime) and isinstance(end, datetime) and len(text):
+                    si = si + 1
+                    sub_output = str(si) + "\n"
+                    sub_output += start.strftime("%H:%M:%S,%f")[0:12] + \
+                        " --> " + end.strftime("%H:%M:%S,%f")[0:12] + "\n"
+                    sub_output += text + "\n\n"
+                    output += sub_output
+                    start = end = text = None
+        i = i + 1
+    except Exception as e:
+        print(e)
+        exit(1)
+f = open(output_file, "w")
+f.write(output)
+f.close()
+print(f"Saved to: {output_file}")

mkv_episode_matcher/libraries/pgs2srt/pgsreader.py ADDED Viewed

@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+from collections import namedtuple
+from os.path import split as pathsplit
+# Constants for Segments
+PDS = int('0x14', 16)
+ODS = int('0x15', 16)
+PCS = int('0x16', 16)
+WDS = int('0x17', 16)
+END = int('0x80', 16)
+# Named tuple access for static PDS palettes
+Palette = namedtuple('Palette', "Y Cr Cb Alpha")
+class InvalidSegmentError(Exception):
+    '''Raised when a segment does not match PGS specification'''
+class PGSReader:
+    def __init__(self, filepath):
+        self.filedir, self.file = pathsplit(filepath)
+        with open(filepath, 'rb') as f:
+            self.bytes = f.read()
+    def make_segment(self, bytes_):
+        cls = SEGMENT_TYPE[bytes_[10]]
+        return cls(bytes_)
+    def iter_segments(self):
+        bytes_ = self.bytes[:]
+        while bytes_:
+            size = 13 + int(bytes_[11:13].hex(), 16)
+            yield self.make_segment(bytes_[:size])
+            bytes_ = bytes_[size:]
+    def iter_displaysets(self):
+        ds = []
+        for s in self.iter_segments():
+            ds.append(s)
+            if s.type == 'END':
+                yield DisplaySet(ds)
+                ds = []
+    @property
+    def segments(self):
+        if not hasattr(self, '_segments'):
+            self._segments = list(self.iter_segments())
+        return self._segments
+    @property
+    def displaysets(self):
+        if not hasattr(self, '_displaysets'):
+            self._displaysets = list(self.iter_displaysets())
+        return self._displaysets
+class BaseSegment:
+    SEGMENT = {
+        PDS: 'PDS',
+        ODS: 'ODS',
+        PCS: 'PCS',
+        WDS: 'WDS',
+        END: 'END'
+    }
+    def __init__(self, bytes_):
+        self.bytes = bytes_
+        if bytes_[:2] != b'PG':
+            raise InvalidSegmentError
+        self.pts = int(bytes_[2:6].hex(), base=16) / 90
+        self.dts = int(bytes_[6:10].hex(), base=16) / 90
+        self.type = self.SEGMENT[bytes_[10]]
+        self.size = int(bytes_[11:13].hex(), base=16)
+        self.data = bytes_[13:]
+    def __len__(self):
+        return self.size
+    @property
+    def presentation_timestamp(self): return self.pts
+    @property
+    def decoding_timestamp(self): return self.dts
+    @property
+    def segment_type(self): return self.type
+class PresentationCompositionSegment(BaseSegment):
+    class CompositionObject:
+        def __init__(self, bytes_):
+            self.bytes = bytes_
+            self.object_id = int(bytes_[0:2].hex(), base=16)
+            self.window_id = bytes_[2]
+            self.cropped = bool(bytes_[3])
+            self.x_offset = int(bytes_[4:6].hex(), base=16)
+            self.y_offset = int(bytes_[6:8].hex(), base=16)
+            if self.cropped:
+                self.crop_x_offset = int(bytes_[8:10].hex(), base=16)
+                self.crop_y_offset = int(bytes_[10:12].hex(), base=16)
+                self.crop_width = int(bytes_[12:14].hex(), base=16)
+                self.crop_height = int(bytes_[14:16].hex(), base=16)
+    STATE = {
+        int('0x00', base=16): 'Normal',
+        int('0x40', base=16): 'Acquisition Point',
+        int('0x80', base=16): 'Epoch Start'
+    }
+    def __init__(self, bytes_):
+        BaseSegment.__init__(self, bytes_)
+        self.width = int(self.data[0:2].hex(), base=16)
+        self.height = int(self.data[2:4].hex(), base=16)
+        self.frame_rate = self.data[4]
+        self._num = int(self.data[5:7].hex(), base=16)
+        self._state = self.STATE[self.data[7]]
+        self.palette_update = bool(self.data[8])
+        self.palette_id = self.data[9]
+        self._num_comps = self.data[10]
+    @property
+    def composition_number(self): return self._num
+    @property
+    def composition_state(self): return self._state
+    @property
+    def composition_objects(self):
+        if not hasattr(self, '_composition_objects'):
+            self._composition_objects = self.get_composition_objects()
+            if len(self._composition_objects) != self._num_comps:
+                print('Warning: Number of composition objects asserted '
+                      'does not match the amount found.')
+        return self._composition_objects
+    def get_composition_objects(self):
+        bytes_ = self.data[11:]
+        comps = []
+        while bytes_:
+            length = 8 * (1 + bool(bytes_[3]))
+            comps.append(self.CompositionObject(bytes_[:length]))
+            bytes_ = bytes_[length:]
+        return comps
+class WindowDefinitionSegment(BaseSegment):
+    def __init__(self, bytes_):
+        BaseSegment.__init__(self, bytes_)
+        self.num_windows = self.data[0]
+        self.window_id = self.data[1]
+        self.x_offset = int(self.data[2:4].hex(), base=16)
+        self.y_offset = int(self.data[4:6].hex(), base=16)
+        self.width = int(self.data[6:8].hex(), base=16)
+        self.height = int(self.data[8:10].hex(), base=16)
+class PaletteDefinitionSegment(BaseSegment):
+    def __init__(self, bytes_):
+        BaseSegment.__init__(self, bytes_)
+        self.palette_id = self.data[0]
+        self.version = self.data[1]
+        self.palette = [Palette(0, 0, 0, 0)] * 256
+        # Slice from byte 2 til end of segment. Divide by 5 to determine number of palette entries
+        # Iterate entries. Explode the 5 bytes into namedtuple Palette. Must be exploded
+        for entry in range(len(self.data[2:]) // 5):
+            i = 2 + entry * 5
+            self.palette[self.data[i]] = Palette(*self.data[i + 1:i + 5])
+class ObjectDefinitionSegment(BaseSegment):
+    SEQUENCE = {
+        int('0x40', base=16): 'Last',
+        int('0x80', base=16): 'First',
+        int('0xc0', base=16): 'First and last'
+    }
+    def __init__(self, bytes_):
+        BaseSegment.__init__(self, bytes_)
+        self.id = int(self.data[0:2].hex(), base=16)
+        self.version = self.data[2]
+        self.in_sequence = self.SEQUENCE[self.data[3]]
+        self.data_len = int(self.data[4:7].hex(), base=16)
+        self.width = int(self.data[7:9].hex(), base=16)
+        self.height = int(self.data[9:11].hex(), base=16)
+        self.img_data = self.data[11:]
+        if len(self.img_data) != self.data_len - 4:
+            print('Warning: Image data length asserted does not match the '
+                  'length found.')
+class EndSegment(BaseSegment):
+    @property
+    def is_end(self): return True
+SEGMENT_TYPE = {
+    PDS: PaletteDefinitionSegment,
+    ODS: ObjectDefinitionSegment,
+    PCS: PresentationCompositionSegment,
+    WDS: WindowDefinitionSegment,
+    END: EndSegment
+}
+class DisplaySet:
+    def __init__(self, segments):
+        self.segments = segments
+        self.segment_types = [s.type for s in segments]
+        self.has_image = 'ODS' in self.segment_types
+def segment_by_type_getter(type_):
+    def f(self):
+        return [s for s in self.segments if s.type == type_]
+    return f
+for type_ in BaseSegment.SEGMENT.values():
+    setattr(DisplaySet, type_.lower(), property(segment_by_type_getter(type_)))

mkv_episode_matcher/libraries/pgs2srt/requirements.txt ADDED Viewed

@@ -0,0 +1,4 @@
+pytesseract==0.3.7
+numpy==1.19.4
+Pillow==8.2.0
+tld~=0.12.3

{mkv_episode_matcher-0.1.1.dist-info → mkv_episode_matcher-0.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: mkv-episode-matcher
-Version: 0.1.1
+Version: 0.1.3
 Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
 Project-URL: Documentation, https://github.com/Jsakkos/mkv-episode-matcher#readme
 Project-URL: Issues, https://github.com/Jsakkos/mkv-episode-matcher/issues

mkv-episode-matcher 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

Potentially problematic release.

mkv-episode-matcher 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl