html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,83 +0,0 @@
1
- from typing import (
2
- Optional,
3
- Generic,
4
- TypeVar,
5
- Union,
6
- Callable,
7
- Tuple,
8
- Sequence,
9
- Any,
10
- List,
11
- Text,
12
- overload,
13
- )
14
- from funcparserlib.lexer import Token
15
-
16
- _A = TypeVar("_A")
17
- _B = TypeVar("_B")
18
- _C = TypeVar("_C")
19
- _D = TypeVar("_D")
20
-
21
- class State:
22
- pos: int
23
- max: int
24
- parser: Union[Parser, _ParserCallable, None]
25
- def __init__(
26
- self,
27
- pos: int,
28
- max: int,
29
- parser: Union[Parser, _ParserCallable, None] = ...,
30
- ) -> None: ...
31
-
32
- _ParserCallable = Callable[[_A, State], Tuple[_B, State]]
33
-
34
- class Parser(Generic[_A, _B]):
35
- name: Text
36
- def __init__(self, p: Union[Parser[_A, _B], _ParserCallable]) -> None: ...
37
- def named(self, name: Text) -> Parser[_A, _B]: ...
38
- def define(self, p: Union[Parser[_A, _B], _ParserCallable]) -> None: ...
39
- def run(self, tokens: Sequence[_A], s: State) -> Tuple[_B, State]: ...
40
- def parse(self, tokens: Sequence[_A]) -> _B: ...
41
- @overload
42
- def __add__( # type: ignore[misc]
43
- self, other: _IgnoredParser[_A]
44
- ) -> Parser[_A, _B]: ...
45
- @overload
46
- def __add__(self, other: Parser[_A, _C]) -> _TupleParser[_A, Tuple[_B, _C]]: ...
47
- def __or__(self, other: Parser[_A, _C]) -> Parser[_A, Union[_B, _C]]: ...
48
- def __rshift__(self, f: Callable[[_B], _C]) -> Parser[_A, _C]: ...
49
- def bind(self, f: Callable[[_B], Parser[_A, _C]]) -> Parser[_A, _C]: ...
50
- def __neg__(self) -> _IgnoredParser[_A]: ...
51
-
52
- class _Ignored:
53
- value: Any
54
- def __init__(self, value: Any) -> None: ...
55
-
56
- class _IgnoredParser(Parser[_A, _Ignored]):
57
- @overload # type: ignore[override]
58
- def __add__(self, other: _IgnoredParser[_A]) -> _IgnoredParser[_A]: ...
59
- @overload # type: ignore[override]
60
- def __add__(self, other: Parser[_A, _C]) -> Parser[_A, _C]: ...
61
-
62
- class _TupleParser(Parser[_A, _B]):
63
- @overload # type: ignore[override]
64
- def __add__(self, other: _IgnoredParser[_A]) -> _TupleParser[_A, _B]: ...
65
- @overload
66
- def __add__(self, other: Parser[_A, Any]) -> Parser[_A, Any]: ...
67
-
68
- finished: Parser[Any, None]
69
-
70
- def many(p: Parser[_A, _B]) -> Parser[_A, List[_B]]: ...
71
- def some(pred: Callable[[_A], bool]) -> Parser[_A, _A]: ...
72
- def a(value: _A) -> Parser[_A, _A]: ...
73
- def tok(type: Text, value: Optional[Text] = ...) -> Parser[Token, Text]: ...
74
- def pure(x: _A) -> Parser[_A, _A]: ...
75
- def maybe(p: Parser[_A, _B]) -> Parser[_A, Optional[_B]]: ...
76
- def skip(p: Parser[_A, Any]) -> _IgnoredParser[_A]: ...
77
- def oneplus(p: Parser[_A, _B]) -> Parser[_A, List[_B]]: ...
78
- def forward_decl() -> Parser[Any, Any]: ...
79
-
80
- class NoParseError(Exception):
81
- msg: Text
82
- state: State
83
- def __init__(self, msg: Text, state: State) -> None: ...
@@ -1,72 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # Copyright © 2009/2021 Andrey Vlasovskikh
4
- #
5
- # Permission is hereby granted, free of charge, to any person obtaining a copy of this
6
- # software and associated documentation files (the "Software"), to deal in the Software
7
- # without restriction, including without limitation the rights to use, copy, modify,
8
- # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to the following
10
- # conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be included in all copies
13
- # or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16
- # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
17
- # PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18
- # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
- # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
20
- # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
-
22
- from __future__ import unicode_literals
23
-
24
-
25
- def pretty_tree(x, kids, show):
26
- """Return a pseudo-graphic tree representation of the object `x` similar to the
27
- `tree` command in Unix.
28
-
29
- Type: `(T, Callable[[T], List[T]], Callable[[T], str]) -> str`
30
-
31
- It applies the parameter `show` (which is a function of type `(T) -> str`) to get a
32
- textual representation of the objects to show.
33
-
34
- It applies the parameter `kids` (which is a function of type `(T) -> List[T]`) to
35
- list the children of the object to show.
36
-
37
- Examples:
38
-
39
- ```pycon
40
- >>> print(pretty_tree(
41
- ... ["foo", ["bar", "baz"], "quux"],
42
- ... lambda obj: obj if isinstance(obj, list) else [],
43
- ... lambda obj: "[]" if isinstance(obj, list) else str(obj),
44
- ... ))
45
- []
46
- |-- foo
47
- |-- []
48
- | |-- bar
49
- | `-- baz
50
- `-- quux
51
-
52
- ```
53
- """
54
- (MID, END, CONT, LAST, ROOT) = ("|-- ", "`-- ", "| ", " ", "")
55
-
56
- def rec(obj, indent, sym):
57
- line = indent + sym + show(obj)
58
- obj_kids = kids(obj)
59
- if len(obj_kids) == 0:
60
- return line
61
- else:
62
- if sym == MID:
63
- next_indent = indent + CONT
64
- elif sym == ROOT:
65
- next_indent = indent + ROOT
66
- else:
67
- next_indent = indent + LAST
68
- chars = [MID] * (len(obj_kids) - 1) + [END]
69
- lines = [rec(kid, next_indent, sym) for kid, sym in zip(obj_kids, chars)]
70
- return "\n".join([line] + lines)
71
-
72
- return rec(x, "", ROOT)
@@ -1,7 +0,0 @@
1
- from typing import TypeVar, Callable, List, Text
2
-
3
- _A = TypeVar("_A")
4
-
5
- def pretty_tree(
6
- x: _A, kids: Callable[[_A], List[_A]], show: Callable[[_A], Text]
7
- ) -> Text: ...
@@ -1,24 +0,0 @@
1
- diff --git a/lint_lib/_vendor/funcparserlib/parser.py b/lint_lib/_vendor/funcparserlib/parser.py
2
- index eb2f53f..0f86e6c 100644
3
- --- a/lint_lib/_vendor/funcparserlib/parser.py
4
- +++ b/lint_lib/_vendor/funcparserlib/parser.py
5
- @@ -137,19 +137,6 @@ class Parser(object):
6
- "('x', 'y')"
7
-
8
- ```
9
- -
10
- - !!! Note
11
- -
12
- - You can enable the parsing log this way:
13
- -
14
- - ```python
15
- - import logging
16
- - logging.basicConfig(level=logging.DEBUG)
17
- - import funcparserlib.parser
18
- - funcparserlib.parser.debug = True
19
- - ```
20
- -
21
- - The way to enable the parsing log may be changed in future versions.
22
- """
23
- self.name = name
24
- return self
@@ -1,280 +0,0 @@
1
- import codecs
2
- import contextlib
3
- import io
4
- import json
5
- import os
6
- import re
7
- import sys
8
- from collections import Counter
9
- from os.path import dirname, join, pardir, relpath
10
- from typing import Any, Dict, List, Optional, Set, TypeVar
11
-
12
- from . import parser
13
- from ._vendor.funcparserlib.parser import NoParseError
14
-
15
- text_type = str
16
- binary_type = bytes
17
-
18
- StringLike = TypeVar("StringLike", str, bytes)
19
-
20
- base = join(dirname(__file__), pardir)
21
-
22
- _surrogateRe = re.compile(r"\\u([0-9A-Fa-f]{4})(?:\\u([0-9A-Fa-f]{4}))?")
23
-
24
-
25
- def clean_path(path: str) -> str:
26
- return relpath(path, base)
27
-
28
-
29
- def is_subsequence(l1: List[StringLike], l2: List[StringLike]) -> bool:
30
- """checks if l1 is a subsequence of l2"""
31
- i = 0
32
- for x in l2:
33
- if l1[i] == x:
34
- i += 1
35
- if i == len(l1):
36
- return True
37
- return False
38
-
39
-
40
- def unescape_json(obj: Any) -> Any:
41
- def decode_str(inp):
42
- """Decode \\uXXXX escapes
43
-
44
- This decodes \\uXXXX escapes, possibly into non-BMP characters when
45
- two surrogate character escapes are adjacent to each other.
46
- """
47
-
48
- # This cannot be implemented using the unicode_escape codec
49
- # because that requires its input be ISO-8859-1, and we need
50
- # arbitrary unicode as input.
51
- def repl(m):
52
- if m.group(2) is not None:
53
- high = int(m.group(1), 16)
54
- low = int(m.group(2), 16)
55
- if (
56
- 0xD800 <= high <= 0xDBFF
57
- and 0xDC00 <= low <= 0xDFFF
58
- and sys.maxunicode == 0x10FFFF
59
- ):
60
- cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000
61
- return chr(cp)
62
- else:
63
- return chr(high) + chr(low)
64
- else:
65
- return chr(int(m.group(1), 16))
66
-
67
- return _surrogateRe.sub(repl, inp)
68
-
69
- if isinstance(obj, dict):
70
- return {decode_str(k): unescape_json(v) for k, v in obj.items()}
71
- elif isinstance(obj, list):
72
- return [unescape_json(x) for x in obj]
73
- elif isinstance(obj, text_type):
74
- return decode_str(obj)
75
- else:
76
- return obj
77
-
78
-
79
- def lint_dat_format(
80
- path: str,
81
- encoding: Optional[str],
82
- first_header: StringLike,
83
- expected_headers: Optional[List[StringLike]] = None,
84
- input_headers: Optional[Set[StringLike]] = None,
85
- ) -> List[Dict[StringLike, StringLike]]:
86
- if expected_headers is not None and first_header not in expected_headers:
87
- raise ValueError("First header must be an expected header. (lint config error)")
88
-
89
- if (
90
- input_headers is not None
91
- and expected_headers is not None
92
- and not (set(input_headers) < set(expected_headers))
93
- ):
94
- raise ValueError(
95
- "Input header must be a subset of expected headers. (lint config error)"
96
- )
97
-
98
- if expected_headers is not None and len(set(expected_headers)) < len(
99
- expected_headers
100
- ):
101
- raise ValueError(
102
- "Can't expect a single header multiple times. (lint config error)"
103
- )
104
-
105
- if input_headers is None:
106
- input_headers = set(expected_headers)
107
-
108
- try:
109
- if encoding is not None:
110
- with codecs.open(path, "r", encoding=encoding) as fp:
111
- dat = fp.read()
112
- parsed = parser.parse(dat, first_header)
113
- else:
114
- with open(path, "rb") as fp:
115
- dat = fp.read()
116
- parsed = parser.parse(dat, first_header)
117
- except NoParseError as e:
118
- print("Parse error in {}, {}".format(path, e))
119
- return
120
-
121
- seen_items = {}
122
-
123
- for item in parsed:
124
- # Check we don't have duplicate headers within one item.
125
- headers = Counter(x[0] for x in item.data)
126
- headers.subtract(set(headers.elements())) # remove one instance of each
127
- for header in set(headers.elements()):
128
- c = headers[header]
129
- print(
130
- f"Duplicate header {header!r} occurs {c+1} times in one item in {path} at line {item.lineno}"
131
- )
132
-
133
- item_dict = dict(item.data)
134
-
135
- # Check we only have expected headers.
136
- if expected_headers is not None:
137
- if not is_subsequence(
138
- list(item_dict.keys()),
139
- expected_headers,
140
- ):
141
- unexpected = item_dict.keys()
142
- print(
143
- f"Unexpected item headings in {list(unexpected)!r} in {path} at line {item.lineno}"
144
- )
145
-
146
- # Check for duplicated items.
147
- if input_headers is not None:
148
- found_input = set()
149
- for input_header in input_headers:
150
- found_input.add((input_header, item_dict.get(input_header)))
151
- else:
152
- found_input = set(item_dict.items())
153
-
154
- first_line = seen_items.setdefault(frozenset(found_input), item.lineno)
155
- if first_line is not None and first_line != item.lineno:
156
- print(
157
- f"Duplicate item in {path} at line {item.lineno} previously seen on line {first_line}"
158
- )
159
-
160
- return [dict(x.data) for x in parsed]
161
-
162
-
163
- def lint_encoding_test(path: str) -> None:
164
- parsed = lint_dat_format(
165
- path,
166
- None,
167
- b"data",
168
- expected_headers=[b"data", b"encoding"],
169
- input_headers={b"data"},
170
- )
171
- if not parsed:
172
- # We'll already have output if there's a parse error.
173
- return
174
-
175
- # We'd put extra linting here, if we ever have anything specific to the
176
- # encoding tests here.
177
-
178
-
179
- def lint_encoding_tests(path: str) -> None:
180
- for root, dirs, files in os.walk(path):
181
- for file in sorted(files):
182
- if not file.endswith(".dat"):
183
- continue
184
- lint_encoding_test(clean_path(join(root, file)))
185
-
186
-
187
- def lint_tokenizer_test(path: str) -> None:
188
- all_keys = {
189
- "description",
190
- "input",
191
- "output",
192
- "initialStates",
193
- "lastStartTag",
194
- "ignoreErrorOrder",
195
- "doubleEscaped",
196
- "errors",
197
- }
198
- required = {"input", "output"}
199
- with codecs.open(path, "r", "utf-8") as fp:
200
- parsed = json.load(fp)
201
- if not parsed:
202
- return
203
- if not isinstance(parsed, dict):
204
- print("Top-level must be an object in %s" % path)
205
- return
206
- for test_group in parsed.values():
207
- if not isinstance(test_group, list):
208
- print("Test groups must be a lists in %s" % path)
209
- continue
210
- for test in test_group:
211
- if "doubleEscaped" in test and test["doubleEscaped"] is True:
212
- test = unescape_json(test)
213
- keys = set(test.keys())
214
- if not (required <= keys):
215
- print(
216
- "missing test properties {!r} in {}".format(required - keys, path)
217
- )
218
- if not (keys <= all_keys):
219
- print(
220
- "unknown test properties {!r} in {}".format(keys - all_keys, path)
221
- )
222
-
223
-
224
- def lint_tokenizer_tests(path: str) -> None:
225
- for root, dirs, files in os.walk(path):
226
- for file in sorted(files):
227
- if not file.endswith(".test"):
228
- continue
229
- lint_tokenizer_test(clean_path(join(root, file)))
230
-
231
-
232
- def lint_tree_construction_test(path: str) -> None:
233
- parsed = lint_dat_format(
234
- path,
235
- "utf-8",
236
- "data",
237
- expected_headers=[
238
- "data",
239
- "errors",
240
- "new-errors",
241
- "document-fragment",
242
- "script-off",
243
- "script-on",
244
- "document",
245
- ],
246
- input_headers={
247
- "data",
248
- "document-fragment",
249
- "script-on",
250
- "script-off",
251
- },
252
- )
253
- if not parsed:
254
- # We'll already have output if there's a parse error.
255
- return
256
-
257
- # We'd put extra linting here, if we ever have anything specific to the
258
- # tree construction tests here.
259
-
260
-
261
- def lint_tree_construction_tests(path: str) -> None:
262
- for root, dirs, files in os.walk(path):
263
- for file in sorted(files):
264
- if not file.endswith(".dat"):
265
- continue
266
- lint_tree_construction_test(clean_path(join(root, file)))
267
-
268
-
269
- def main() -> int:
270
- with contextlib.redirect_stdout(io.StringIO()) as f:
271
- lint_encoding_tests(join(base, "encoding"))
272
- lint_tokenizer_tests(join(base, "tokenizer"))
273
- lint_tree_construction_tests(join(base, "tree-construction"))
274
-
275
- print(f.getvalue(), end="")
276
- return 0 if f.getvalue() == "" else 1
277
-
278
-
279
- if __name__ == "__main__":
280
- sys.exit(main())
@@ -1,177 +0,0 @@
1
- import re
2
- from typing import Callable, List, Optional, Tuple, Type, TypeVar, Union
3
-
4
- from ._vendor.funcparserlib.lexer import LexerError, Token
5
- from ._vendor.funcparserlib.parser import (
6
- NoParseError,
7
- Parser,
8
- _Tuple,
9
- finished,
10
- many,
11
- pure,
12
- skip,
13
- some,
14
- tok,
15
- )
16
-
17
- StringLike = TypeVar("StringLike", str, bytes)
18
-
19
-
20
- class Test:
21
- def __init__(
22
- self, data: List[Tuple[StringLike, StringLike]], lineno: Optional[int] = None
23
- ) -> None:
24
- self.data = data
25
- self.lineno = lineno
26
-
27
-
28
- def _make_tokenizer(specs: List[Tuple[str, Tuple[StringLike]]]) -> Callable:
29
- # Forked from upstream funcparserlib.lexer to fix #46
30
- def compile_spec(spec):
31
- name, args = spec
32
- return name, re.compile(*args)
33
-
34
- compiled = [compile_spec(s) for s in specs]
35
-
36
- def match_specs(specs, s, i, position):
37
- if isinstance(s, str):
38
- lf = "\n"
39
- else:
40
- lf = b"\n"
41
- line, pos = position
42
- for type, regexp in specs:
43
- m = regexp.match(s, i)
44
- if m is not None:
45
- value = m.group()
46
- nls = value.count(lf)
47
- n_line = line + nls
48
- if nls == 0:
49
- n_pos = pos + len(value)
50
- else:
51
- n_pos = len(value) - value.rfind(lf) - 1
52
- return Token(type, value, (line, pos + 1), (n_line, n_pos))
53
- else:
54
- errline = s.splitlines()[line - 1]
55
- raise LexerError((line, pos + 1), errline)
56
-
57
- def f(s):
58
- length = len(s)
59
- line, pos = 1, 0
60
- i = 0
61
- while i < length:
62
- t = match_specs(compiled, s, i, (line, pos))
63
- yield t
64
- line, pos = t.end
65
- i += len(t.value)
66
-
67
- return f
68
-
69
-
70
- _token_specs_u = [
71
- ("HEADER", (r"[ \t]*#[^\n]*",)),
72
- ("BODY", (r"[^#\n][^\n]*",)),
73
- ("EOL", (r"\n",)),
74
- ]
75
-
76
- _token_specs_b = [
77
- (name, (regexp.encode("ascii"),)) for (name, (regexp,)) in _token_specs_u
78
- ]
79
-
80
- _tokenizer_u = _make_tokenizer(_token_specs_u)
81
- _tokenizer_b = _make_tokenizer(_token_specs_b)
82
-
83
-
84
- def _many_merge(toks: _Tuple) -> List[Test]:
85
- x, xs = toks
86
- return [x] + xs
87
-
88
-
89
- def _notFollowedBy(p: Parser) -> Parser:
90
- @Parser
91
- def __notFollowedBy(tokens, s):
92
- try:
93
- p.run(tokens, s)
94
- except NoParseError:
95
- return skip(pure(None)).run(tokens, s)
96
- else:
97
- raise NoParseError("is followed by", s)
98
-
99
- __notFollowedBy.name = "(notFollowedBy {})".format(p)
100
- return __notFollowedBy
101
-
102
-
103
- def _trim_prefix(s: StringLike, prefix: StringLike) -> StringLike:
104
- if s.startswith(prefix):
105
- return s[len(prefix) :]
106
- else:
107
- return s
108
-
109
-
110
- def _make_test(result: _Tuple) -> Test:
111
- first, rest = result
112
- (first_header, first_lineno), first_body = first
113
- return Test([(first_header, first_body)] + rest, lineno=first_lineno)
114
-
115
-
116
- def _parser(
117
- tokens: List[Token],
118
- new_test_header: StringLike,
119
- tok_type: Union[Type[str], Type[bytes]],
120
- ) -> List[Test]:
121
- if tok_type is str:
122
- header_prefix = "#"
123
- elif tok_type is bytes:
124
- header_prefix = b"#"
125
- else:
126
- assert False, "unreachable"
127
-
128
- first_header = (
129
- some(
130
- lambda tok: tok.type == "HEADER"
131
- and tok.value == header_prefix + new_test_header
132
- )
133
- >> (
134
- lambda x: (
135
- _trim_prefix(x.value, header_prefix),
136
- x.start[0] if x.start is not None else None,
137
- )
138
- )
139
- ) + skip(tok("EOL"))
140
-
141
- header = (
142
- some(
143
- lambda tok: tok.type == "HEADER"
144
- and tok.value != header_prefix + new_test_header
145
- )
146
- >> (lambda x: _trim_prefix(x.value, header_prefix))
147
- ) + skip(tok("EOL"))
148
-
149
- body = tok("BODY") + tok("EOL") >> (lambda x: x[0] + x[1])
150
- empty = tok("EOL")
151
-
152
- actual_body = many(body | (empty + skip(_notFollowedBy(first_header)))) >> (
153
- lambda xs: tok_type().join(xs)[:-1]
154
- )
155
-
156
- first_segment = first_header + actual_body >> tuple
157
- rest_segment = header + actual_body >> tuple
158
-
159
- test = first_segment + many(rest_segment) >> _make_test
160
-
161
- tests = (test + many(skip(empty) + test)) >> _many_merge
162
-
163
- toplevel = tests + skip(finished)
164
-
165
- return toplevel.parse(tokens)
166
-
167
-
168
- def parse(s: StringLike, new_test_header: StringLike) -> List[Test]:
169
- if type(s) != type(new_test_header):
170
- raise TypeError("s and new_test_header must have same type")
171
-
172
- if isinstance(s, str):
173
- return _parser(list(_tokenizer_u(s)), new_test_header, str)
174
- elif isinstance(s, bytes):
175
- return _parser(list(_tokenizer_b(s)), new_test_header, bytes)
176
- else:
177
- raise TypeError("s must be unicode or bytes object")
@@ -1,7 +0,0 @@
1
- [tool.vendoring]
2
- destination = "lint_lib/_vendor/"
3
- requirements = "lint_lib/_vendor/vendor.txt"
4
- namespace = "lint_lib._vendor"
5
-
6
- protected-files = ["__init__.py", "vendor.txt"]
7
- patches-dir = "lint_lib/_vendor-patches"