RubyGems - html-to-markdown - Versions diffs - 2.24.6 → 2.25.0 - Mend

html-to-markdown 2.24.6 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (203) hide show

data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi DELETED Viewed

@@ -1,83 +0,0 @@
-from typing import (
-    Optional,
-    Generic,
-    TypeVar,
-    Union,
-    Callable,
-    Tuple,
-    Sequence,
-    Any,
-    List,
-    Text,
-    overload,
-)
-from funcparserlib.lexer import Token
-_A = TypeVar("_A")
-_B = TypeVar("_B")
-_C = TypeVar("_C")
-_D = TypeVar("_D")
-class State:
-    pos: int
-    max: int
-    parser: Union[Parser, _ParserCallable, None]
-    def __init__(
-        self,
-        pos: int,
-        max: int,
-        parser: Union[Parser, _ParserCallable, None] = ...,
-    ) -> None: ...
-_ParserCallable = Callable[[_A, State], Tuple[_B, State]]
-class Parser(Generic[_A, _B]):
-    name: Text
-    def __init__(self, p: Union[Parser[_A, _B], _ParserCallable]) -> None: ...
-    def named(self, name: Text) -> Parser[_A, _B]: ...
-    def define(self, p: Union[Parser[_A, _B], _ParserCallable]) -> None: ...
-    def run(self, tokens: Sequence[_A], s: State) -> Tuple[_B, State]: ...
-    def parse(self, tokens: Sequence[_A]) -> _B: ...
-    @overload
-    def __add__(  # type: ignore[misc]
-        self, other: _IgnoredParser[_A]
-    ) -> Parser[_A, _B]: ...
-    @overload
-    def __add__(self, other: Parser[_A, _C]) -> _TupleParser[_A, Tuple[_B, _C]]: ...
-    def __or__(self, other: Parser[_A, _C]) -> Parser[_A, Union[_B, _C]]: ...
-    def __rshift__(self, f: Callable[[_B], _C]) -> Parser[_A, _C]: ...
-    def bind(self, f: Callable[[_B], Parser[_A, _C]]) -> Parser[_A, _C]: ...
-    def __neg__(self) -> _IgnoredParser[_A]: ...
-class _Ignored:
-    value: Any
-    def __init__(self, value: Any) -> None: ...
-class _IgnoredParser(Parser[_A, _Ignored]):
-    @overload  # type: ignore[override]
-    def __add__(self, other: _IgnoredParser[_A]) -> _IgnoredParser[_A]: ...
-    @overload  # type: ignore[override]
-    def __add__(self, other: Parser[_A, _C]) -> Parser[_A, _C]: ...
-class _TupleParser(Parser[_A, _B]):
-    @overload  # type: ignore[override]
-    def __add__(self, other: _IgnoredParser[_A]) -> _TupleParser[_A, _B]: ...
-    @overload
-    def __add__(self, other: Parser[_A, Any]) -> Parser[_A, Any]: ...
-finished: Parser[Any, None]
-def many(p: Parser[_A, _B]) -> Parser[_A, List[_B]]: ...
-def some(pred: Callable[[_A], bool]) -> Parser[_A, _A]: ...
-def a(value: _A) -> Parser[_A, _A]: ...
-def tok(type: Text, value: Optional[Text] = ...) -> Parser[Token, Text]: ...
-def pure(x: _A) -> Parser[_A, _A]: ...
-def maybe(p: Parser[_A, _B]) -> Parser[_A, Optional[_B]]: ...
-def skip(p: Parser[_A, Any]) -> _IgnoredParser[_A]: ...
-def oneplus(p: Parser[_A, _B]) -> Parser[_A, List[_B]]: ...
-def forward_decl() -> Parser[Any, Any]: ...
-class NoParseError(Exception):
-    msg: Text
-    state: State
-    def __init__(self, msg: Text, state: State) -> None: ...

data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed DELETED Viewed

File without changes

data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py DELETED Viewed

@@ -1,72 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright © 2009/2021 Andrey Vlasovskikh
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this
-# software and associated documentation files (the "Software"), to deal in the Software
-# without restriction, including without limitation the rights to use, copy, modify,
-# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be included in all copies
-# or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
-# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
-# PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
-# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-from __future__ import unicode_literals
-def pretty_tree(x, kids, show):
-    """Return a pseudo-graphic tree representation of the object `x` similar to the
-    `tree` command in Unix.
-    Type: `(T, Callable[[T], List[T]], Callable[[T], str]) -> str`
-    It applies the parameter `show` (which is a function of type `(T) -> str`) to get a
-    textual representation of the objects to show.
-    It applies the parameter `kids` (which is a function of type `(T) -> List[T]`) to
-    list the children of the object to show.
-    Examples:
-    ```pycon
-    >>> print(pretty_tree(
-    ...     ["foo", ["bar", "baz"], "quux"],
-    ...     lambda obj: obj if isinstance(obj, list) else [],
-    ...     lambda obj: "[]" if isinstance(obj, list) else str(obj),
-    ... ))
-    []
-    |-- foo
-    |-- []
-    |   |-- bar
-    |   `-- baz
-    `-- quux
-    ```
-    """
-    (MID, END, CONT, LAST, ROOT) = ("|-- ", "`-- ", "|   ", "    ", "")
-    def rec(obj, indent, sym):
-        line = indent + sym + show(obj)
-        obj_kids = kids(obj)
-        if len(obj_kids) == 0:
-            return line
-        else:
-            if sym == MID:
-                next_indent = indent + CONT
-            elif sym == ROOT:
-                next_indent = indent + ROOT
-            else:
-                next_indent = indent + LAST
-            chars = [MID] * (len(obj_kids) - 1) + [END]
-            lines = [rec(kid, next_indent, sym) for kid, sym in zip(obj_kids, chars)]
-            return "\n".join([line] + lines)
-    return rec(x, "", ROOT)

data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi DELETED Viewed

@@ -1,7 +0,0 @@
-from typing import TypeVar, Callable, List, Text
-_A = TypeVar("_A")
-def pretty_tree(
-    x: _A, kids: Callable[[_A], List[_A]], show: Callable[[_A], Text]
-) -> Text: ...

data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- funcparserlib==1.0.1

data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch DELETED Viewed

@@ -1,24 +0,0 @@
-diff --git a/lint_lib/_vendor/funcparserlib/parser.py b/lint_lib/_vendor/funcparserlib/parser.py
-index eb2f53f..0f86e6c 100644
---- a/lint_lib/_vendor/funcparserlib/parser.py
-+++ b/lint_lib/_vendor/funcparserlib/parser.py
-@@ -137,19 +137,6 @@ class Parser(object):
-         "('x', 'y')"
-         ```
--
--        !!! Note
--
--            You can enable the parsing log this way:
--
--            ```python
--            import logging
--            logging.basicConfig(level=logging.DEBUG)
--            import funcparserlib.parser
--            funcparserlib.parser.debug = True
--            ```
--
--            The way to enable the parsing log may be changed in future versions.
-         """
-         self.name = name
-         return self

data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py DELETED Viewed

@@ -1,280 +0,0 @@
-import codecs
-import contextlib
-import io
-import json
-import os
-import re
-import sys
-from collections import Counter
-from os.path import dirname, join, pardir, relpath
-from typing import Any, Dict, List, Optional, Set, TypeVar
-from . import parser
-from ._vendor.funcparserlib.parser import NoParseError
-text_type = str
-binary_type = bytes
-StringLike = TypeVar("StringLike", str, bytes)
-base = join(dirname(__file__), pardir)
-_surrogateRe = re.compile(r"\\u([0-9A-Fa-f]{4})(?:\\u([0-9A-Fa-f]{4}))?")
-def clean_path(path: str) -> str:
-    return relpath(path, base)
-def is_subsequence(l1: List[StringLike], l2: List[StringLike]) -> bool:
-    """checks if l1 is a subsequence of l2"""
-    i = 0
-    for x in l2:
-        if l1[i] == x:
-            i += 1
-            if i == len(l1):
-                return True
-    return False
-def unescape_json(obj: Any) -> Any:
-    def decode_str(inp):
-        """Decode \\uXXXX escapes
-        This decodes \\uXXXX escapes, possibly into non-BMP characters when
-        two surrogate character escapes are adjacent to each other.
-        """
-        # This cannot be implemented using the unicode_escape codec
-        # because that requires its input be ISO-8859-1, and we need
-        # arbitrary unicode as input.
-        def repl(m):
-            if m.group(2) is not None:
-                high = int(m.group(1), 16)
-                low = int(m.group(2), 16)
-                if (
-                    0xD800 <= high <= 0xDBFF
-                    and 0xDC00 <= low <= 0xDFFF
-                    and sys.maxunicode == 0x10FFFF
-                ):
-                    cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000
-                    return chr(cp)
-                else:
-                    return chr(high) + chr(low)
-            else:
-                return chr(int(m.group(1), 16))
-        return _surrogateRe.sub(repl, inp)
-    if isinstance(obj, dict):
-        return {decode_str(k): unescape_json(v) for k, v in obj.items()}
-    elif isinstance(obj, list):
-        return [unescape_json(x) for x in obj]
-    elif isinstance(obj, text_type):
-        return decode_str(obj)
-    else:
-        return obj
-def lint_dat_format(
-    path: str,
-    encoding: Optional[str],
-    first_header: StringLike,
-    expected_headers: Optional[List[StringLike]] = None,
-    input_headers: Optional[Set[StringLike]] = None,
-) -> List[Dict[StringLike, StringLike]]:
-    if expected_headers is not None and first_header not in expected_headers:
-        raise ValueError("First header must be an expected header. (lint config error)")
-    if (
-        input_headers is not None
-        and expected_headers is not None
-        and not (set(input_headers) < set(expected_headers))
-    ):
-        raise ValueError(
-            "Input header must be a subset of expected headers. (lint config error)"
-        )
-    if expected_headers is not None and len(set(expected_headers)) < len(
-        expected_headers
-    ):
-        raise ValueError(
-            "Can't expect a single header multiple times. (lint config error)"
-        )
-    if input_headers is None:
-        input_headers = set(expected_headers)
-    try:
-        if encoding is not None:
-            with codecs.open(path, "r", encoding=encoding) as fp:
-                dat = fp.read()
-                parsed = parser.parse(dat, first_header)
-        else:
-            with open(path, "rb") as fp:
-                dat = fp.read()
-                parsed = parser.parse(dat, first_header)
-    except NoParseError as e:
-        print("Parse error in {}, {}".format(path, e))
-        return
-    seen_items = {}
-    for item in parsed:
-        # Check we don't have duplicate headers within one item.
-        headers = Counter(x[0] for x in item.data)
-        headers.subtract(set(headers.elements()))  # remove one instance of each
-        for header in set(headers.elements()):
-            c = headers[header]
-            print(
-                f"Duplicate header {header!r} occurs {c+1} times in one item in {path} at line {item.lineno}"
-            )
-        item_dict = dict(item.data)
-        # Check we only have expected headers.
-        if expected_headers is not None:
-            if not is_subsequence(
-                list(item_dict.keys()),
-                expected_headers,
-            ):
-                unexpected = item_dict.keys()
-                print(
-                    f"Unexpected item headings in {list(unexpected)!r} in {path} at line {item.lineno}"
-                )
-        # Check for duplicated items.
-        if input_headers is not None:
-            found_input = set()
-            for input_header in input_headers:
-                found_input.add((input_header, item_dict.get(input_header)))
-        else:
-            found_input = set(item_dict.items())
-        first_line = seen_items.setdefault(frozenset(found_input), item.lineno)
-        if first_line is not None and first_line != item.lineno:
-            print(
-                f"Duplicate item in {path} at line {item.lineno} previously seen on line {first_line}"
-            )
-    return [dict(x.data) for x in parsed]
-def lint_encoding_test(path: str) -> None:
-    parsed = lint_dat_format(
-        path,
-        None,
-        b"data",
-        expected_headers=[b"data", b"encoding"],
-        input_headers={b"data"},
-    )
-    if not parsed:
-        # We'll already have output if there's a parse error.
-        return
-    # We'd put extra linting here, if we ever have anything specific to the
-    # encoding tests here.
-def lint_encoding_tests(path: str) -> None:
-    for root, dirs, files in os.walk(path):
-        for file in sorted(files):
-            if not file.endswith(".dat"):
-                continue
-            lint_encoding_test(clean_path(join(root, file)))
-def lint_tokenizer_test(path: str) -> None:
-    all_keys = {
-        "description",
-        "input",
-        "output",
-        "initialStates",
-        "lastStartTag",
-        "ignoreErrorOrder",
-        "doubleEscaped",
-        "errors",
-    }
-    required = {"input", "output"}
-    with codecs.open(path, "r", "utf-8") as fp:
-        parsed = json.load(fp)
-    if not parsed:
-        return
-    if not isinstance(parsed, dict):
-        print("Top-level must be an object in %s" % path)
-        return
-    for test_group in parsed.values():
-        if not isinstance(test_group, list):
-            print("Test groups must be a lists in %s" % path)
-            continue
-        for test in test_group:
-            if "doubleEscaped" in test and test["doubleEscaped"] is True:
-                test = unescape_json(test)
-            keys = set(test.keys())
-            if not (required <= keys):
-                print(
-                    "missing test properties {!r} in {}".format(required - keys, path)
-                )
-            if not (keys <= all_keys):
-                print(
-                    "unknown test properties {!r} in {}".format(keys - all_keys, path)
-                )
-def lint_tokenizer_tests(path: str) -> None:
-    for root, dirs, files in os.walk(path):
-        for file in sorted(files):
-            if not file.endswith(".test"):
-                continue
-            lint_tokenizer_test(clean_path(join(root, file)))
-def lint_tree_construction_test(path: str) -> None:
-    parsed = lint_dat_format(
-        path,
-        "utf-8",
-        "data",
-        expected_headers=[
-            "data",
-            "errors",
-            "new-errors",
-            "document-fragment",
-            "script-off",
-            "script-on",
-            "document",
-        ],
-        input_headers={
-            "data",
-            "document-fragment",
-            "script-on",
-            "script-off",
-        },
-    )
-    if not parsed:
-        # We'll already have output if there's a parse error.
-        return
-    # We'd put extra linting here, if we ever have anything specific to the
-    # tree construction tests here.
-def lint_tree_construction_tests(path: str) -> None:
-    for root, dirs, files in os.walk(path):
-        for file in sorted(files):
-            if not file.endswith(".dat"):
-                continue
-            lint_tree_construction_test(clean_path(join(root, file)))
-def main() -> int:
-    with contextlib.redirect_stdout(io.StringIO()) as f:
-        lint_encoding_tests(join(base, "encoding"))
-        lint_tokenizer_tests(join(base, "tokenizer"))
-        lint_tree_construction_tests(join(base, "tree-construction"))
-    print(f.getvalue(), end="")
-    return 0 if f.getvalue() == "" else 1
-if __name__ == "__main__":
-    sys.exit(main())

data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py DELETED Viewed

@@ -1,177 +0,0 @@
-import re
-from typing import Callable, List, Optional, Tuple, Type, TypeVar, Union
-from ._vendor.funcparserlib.lexer import LexerError, Token
-from ._vendor.funcparserlib.parser import (
-    NoParseError,
-    Parser,
-    _Tuple,
-    finished,
-    many,
-    pure,
-    skip,
-    some,
-    tok,
-)
-StringLike = TypeVar("StringLike", str, bytes)
-class Test:
-    def __init__(
-        self, data: List[Tuple[StringLike, StringLike]], lineno: Optional[int] = None
-    ) -> None:
-        self.data = data
-        self.lineno = lineno
-def _make_tokenizer(specs: List[Tuple[str, Tuple[StringLike]]]) -> Callable:
-    # Forked from upstream funcparserlib.lexer to fix #46
-    def compile_spec(spec):
-        name, args = spec
-        return name, re.compile(*args)
-    compiled = [compile_spec(s) for s in specs]
-    def match_specs(specs, s, i, position):
-        if isinstance(s, str):
-            lf = "\n"
-        else:
-            lf = b"\n"
-        line, pos = position
-        for type, regexp in specs:
-            m = regexp.match(s, i)
-            if m is not None:
-                value = m.group()
-                nls = value.count(lf)
-                n_line = line + nls
-                if nls == 0:
-                    n_pos = pos + len(value)
-                else:
-                    n_pos = len(value) - value.rfind(lf) - 1
-                return Token(type, value, (line, pos + 1), (n_line, n_pos))
-        else:
-            errline = s.splitlines()[line - 1]
-            raise LexerError((line, pos + 1), errline)
-    def f(s):
-        length = len(s)
-        line, pos = 1, 0
-        i = 0
-        while i < length:
-            t = match_specs(compiled, s, i, (line, pos))
-            yield t
-            line, pos = t.end
-            i += len(t.value)
-    return f
-_token_specs_u = [
-    ("HEADER", (r"[ \t]*#[^\n]*",)),
-    ("BODY", (r"[^#\n][^\n]*",)),
-    ("EOL", (r"\n",)),
-]
-_token_specs_b = [
-    (name, (regexp.encode("ascii"),)) for (name, (regexp,)) in _token_specs_u
-]
-_tokenizer_u = _make_tokenizer(_token_specs_u)
-_tokenizer_b = _make_tokenizer(_token_specs_b)
-def _many_merge(toks: _Tuple) -> List[Test]:
-    x, xs = toks
-    return [x] + xs
-def _notFollowedBy(p: Parser) -> Parser:
-    @Parser
-    def __notFollowedBy(tokens, s):
-        try:
-            p.run(tokens, s)
-        except NoParseError:
-            return skip(pure(None)).run(tokens, s)
-        else:
-            raise NoParseError("is followed by", s)
-    __notFollowedBy.name = "(notFollowedBy {})".format(p)
-    return __notFollowedBy
-def _trim_prefix(s: StringLike, prefix: StringLike) -> StringLike:
-    if s.startswith(prefix):
-        return s[len(prefix) :]
-    else:
-        return s
-def _make_test(result: _Tuple) -> Test:
-    first, rest = result
-    (first_header, first_lineno), first_body = first
-    return Test([(first_header, first_body)] + rest, lineno=first_lineno)
-def _parser(
-    tokens: List[Token],
-    new_test_header: StringLike,
-    tok_type: Union[Type[str], Type[bytes]],
-) -> List[Test]:
-    if tok_type is str:
-        header_prefix = "#"
-    elif tok_type is bytes:
-        header_prefix = b"#"
-    else:
-        assert False, "unreachable"
-    first_header = (
-        some(
-            lambda tok: tok.type == "HEADER"
-            and tok.value == header_prefix + new_test_header
-        )
-        >> (
-            lambda x: (
-                _trim_prefix(x.value, header_prefix),
-                x.start[0] if x.start is not None else None,
-            )
-        )
-    ) + skip(tok("EOL"))
-    header = (
-        some(
-            lambda tok: tok.type == "HEADER"
-            and tok.value != header_prefix + new_test_header
-        )
-        >> (lambda x: _trim_prefix(x.value, header_prefix))
-    ) + skip(tok("EOL"))
-    body = tok("BODY") + tok("EOL") >> (lambda x: x[0] + x[1])
-    empty = tok("EOL")
-    actual_body = many(body | (empty + skip(_notFollowedBy(first_header)))) >> (
-        lambda xs: tok_type().join(xs)[:-1]
-    )
-    first_segment = first_header + actual_body >> tuple
-    rest_segment = header + actual_body >> tuple
-    test = first_segment + many(rest_segment) >> _make_test
-    tests = (test + many(skip(empty) + test)) >> _many_merge
-    toplevel = tests + skip(finished)
-    return toplevel.parse(tokens)
-def parse(s: StringLike, new_test_header: StringLike) -> List[Test]:
-    if type(s) != type(new_test_header):
-        raise TypeError("s and new_test_header must have same type")
-    if isinstance(s, str):
-        return _parser(list(_tokenizer_u(s)), new_test_header, str)
-    elif isinstance(s, bytes):
-        return _parser(list(_tokenizer_b(s)), new_test_header, bytes)
-    else:
-        raise TypeError("s must be unicode or bytes object")

data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml DELETED Viewed

@@ -1,7 +0,0 @@
-[tool.vendoring]
-destination = "lint_lib/_vendor/"
-requirements = "lint_lib/_vendor/vendor.txt"
-namespace = "lint_lib._vendor"
-protected-files = ["__init__.py", "vendor.txt"]
-patches-dir = "lint_lib/_vendor-patches"