html-to-markdown 2.24.6 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/html-to-markdown-rb/native/Cargo.lock +9 -32
  5. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  6. data/lib/html_to_markdown/version.rb +1 -1
  7. data/rust-vendor/html-to-markdown-rs/Cargo.toml +0 -1
  8. data/rust-vendor/html-to-markdown-rs/src/converter/main_helpers.rs +1 -1
  9. data/rust-vendor/html-to-markdown-rs/src/hocr/converter/hierarchy.rs +20 -5
  10. data/rust-vendor/html-to-markdown-rs/src/lib.rs +1 -0
  11. data/rust-vendor/{markup5ever_rcdom/lib.rs → html-to-markdown-rs/src/rcdom.rs} +56 -91
  12. data/rust-vendor/html-to-markdown-rs/tests/hocr_compliance_test.rs +157 -0
  13. data/rust-vendor/memmap2/.cargo-checksum.json +1 -1
  14. data/rust-vendor/memmap2/.cargo_vcs_info.json +1 -1
  15. data/rust-vendor/memmap2/CHANGELOG.md +8 -0
  16. data/rust-vendor/memmap2/Cargo.lock +1 -1
  17. data/rust-vendor/memmap2/Cargo.toml +2 -1
  18. data/rust-vendor/memmap2/Cargo.toml.orig +2 -1
  19. data/rust-vendor/memmap2/src/lib.rs +25 -1
  20. data/rust-vendor/memmap2/src/stub.rs +1 -4
  21. data/rust-vendor/memmap2/src/unix.rs +14 -1
  22. data/rust-vendor/png/.cargo-checksum.json +1 -1
  23. data/rust-vendor/png/.cargo_vcs_info.json +1 -1
  24. data/rust-vendor/png/CHANGES.md +44 -0
  25. data/rust-vendor/png/Cargo.lock +124 -171
  26. data/rust-vendor/png/Cargo.toml +1 -1
  27. data/rust-vendor/png/Cargo.toml.orig +1 -1
  28. data/rust-vendor/png/benches/expand_paletted.rs +5 -5
  29. data/rust-vendor/png/benches/unfilter.rs +3 -3
  30. data/rust-vendor/png/src/adam7.rs +17 -10
  31. data/rust-vendor/png/src/common.rs +8 -8
  32. data/rust-vendor/png/src/decoder/mod.rs +53 -20
  33. data/rust-vendor/png/src/decoder/stream.rs +263 -78
  34. data/rust-vendor/png/src/decoder/unfiltering_buffer.rs +210 -53
  35. data/rust-vendor/png/src/decoder/zlib.rs +130 -90
  36. data/rust-vendor/png/src/encoder.rs +4 -2
  37. data/rust-vendor/png/src/{filter.rs → filter/mod.rs} +100 -367
  38. data/rust-vendor/png/src/filter/optimization-notes.md +104 -0
  39. data/rust-vendor/png/src/filter/paeth.rs +398 -0
  40. data/rust-vendor/png/src/filter/simd.rs +308 -0
  41. data/rust-vendor/png/src/lib.rs +1 -0
  42. data/rust-vendor/syn/.cargo-checksum.json +1 -1
  43. data/rust-vendor/syn/.cargo_vcs_info.json +1 -1
  44. data/rust-vendor/syn/Cargo.lock +40 -41
  45. data/rust-vendor/syn/Cargo.toml +1 -1
  46. data/rust-vendor/syn/Cargo.toml.orig +1 -1
  47. data/rust-vendor/syn/src/item.rs +61 -40
  48. data/rust-vendor/syn/src/lib.rs +2 -1
  49. data/rust-vendor/syn/tests/test_item.rs +54 -0
  50. data/rust-vendor/unicode-ident/.cargo-checksum.json +1 -1
  51. data/rust-vendor/unicode-ident/.cargo_vcs_info.json +1 -1
  52. data/rust-vendor/unicode-ident/Cargo.lock +21 -21
  53. data/rust-vendor/unicode-ident/Cargo.toml +1 -1
  54. data/rust-vendor/unicode-ident/Cargo.toml.orig +1 -1
  55. data/rust-vendor/unicode-ident/src/lib.rs +1 -1
  56. data/rust-vendor/unicode-ident/src/tables.rs +87 -97
  57. data/rust-vendor/unicode-ident/tests/static_size.rs +1 -1
  58. metadata +7 -177
  59. data/rust-vendor/markup5ever_rcdom/.cargo-checksum.json +0 -1
  60. data/rust-vendor/markup5ever_rcdom/.cargo_vcs_info.json +0 -7
  61. data/rust-vendor/markup5ever_rcdom/Cargo.lock +0 -658
  62. data/rust-vendor/markup5ever_rcdom/Cargo.toml +0 -109
  63. data/rust-vendor/markup5ever_rcdom/Cargo.toml.orig +0 -42
  64. data/rust-vendor/markup5ever_rcdom/LICENSE-APACHE +0 -201
  65. data/rust-vendor/markup5ever_rcdom/LICENSE-MIT +0 -25
  66. data/rust-vendor/markup5ever_rcdom/README.md +0 -7
  67. data/rust-vendor/markup5ever_rcdom/custom-html5lib-tokenizer-tests/regression.test +0 -69
  68. data/rust-vendor/markup5ever_rcdom/data/test/ignore +0 -1
  69. data/rust-vendor/markup5ever_rcdom/examples/hello_xml.rs +0 -39
  70. data/rust-vendor/markup5ever_rcdom/examples/html2html.rs +0 -51
  71. data/rust-vendor/markup5ever_rcdom/examples/print-rcdom.rs +0 -78
  72. data/rust-vendor/markup5ever_rcdom/examples/xml_tree_printer.rs +0 -67
  73. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitattributes +0 -2
  74. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/downstream.yml +0 -76
  75. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.github/workflows/lint.yml +0 -25
  76. data/rust-vendor/markup5ever_rcdom/html5lib-tests/.gitignore +0 -79
  77. data/rust-vendor/markup5ever_rcdom/html5lib-tests/AUTHORS.rst +0 -34
  78. data/rust-vendor/markup5ever_rcdom/html5lib-tests/LICENSE +0 -21
  79. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/chardet/test_big5.txt +0 -51
  80. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/scripted/tests1.dat +0 -5
  81. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/test-yahoo-jp.dat +0 -10
  82. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests1.dat +0 -388
  83. data/rust-vendor/markup5ever_rcdom/html5lib-tests/encoding/tests2.dat +0 -115
  84. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint +0 -6
  85. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/__init__.py +0 -0
  86. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/__init__.py +0 -0
  87. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/LICENSE +0 -18
  88. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/__init__.py +0 -0
  89. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.py +0 -211
  90. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/lexer.pyi +0 -34
  91. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.py +0 -872
  92. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/parser.pyi +0 -83
  93. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/py.typed +0 -0
  94. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.py +0 -72
  95. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/funcparserlib/util.pyi +0 -7
  96. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor/vendor.txt +0 -1
  97. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/_vendor-patches/funcparserlib.patch +0 -24
  98. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/lint.py +0 -280
  99. data/rust-vendor/markup5ever_rcdom/html5lib-tests/lint_lib/parser.py +0 -177
  100. data/rust-vendor/markup5ever_rcdom/html5lib-tests/pyproject.toml +0 -7
  101. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/core.test +0 -125
  102. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/injectmeta.test +0 -66
  103. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/optionaltags.test +0 -965
  104. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/options.test +0 -60
  105. data/rust-vendor/markup5ever_rcdom/html5lib-tests/serializer/whitespace.test +0 -51
  106. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/README.md +0 -107
  107. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/contentModelFlags.test +0 -93
  108. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/domjs.test +0 -335
  109. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/entities.test +0 -542
  110. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/escapeFlag.test +0 -36
  111. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/namedEntities.test +0 -42422
  112. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/numericEntities.test +0 -1677
  113. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/pendingSpecChanges.test +0 -9
  114. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test1.test +0 -353
  115. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test2.test +0 -275
  116. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test3.test +0 -11233
  117. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/test4.test +0 -532
  118. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeChars.test +0 -1577
  119. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/unicodeCharsProblematic.test +0 -41
  120. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tokenizer/xmlViolation.test +0 -20
  121. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/README.md +0 -108
  122. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption01.dat +0 -354
  123. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/adoption02.dat +0 -39
  124. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/blocks.dat +0 -695
  125. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/comments01.dat +0 -217
  126. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/doctype01.dat +0 -474
  127. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/domjs-unsafe.dat +0 -0
  128. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities01.dat +0 -943
  129. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/entities02.dat +0 -309
  130. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/foreign-fragment.dat +0 -645
  131. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/html5test-com.dat +0 -301
  132. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/inbody01.dat +0 -54
  133. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/isindex.dat +0 -49
  134. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/main-element.dat +0 -46
  135. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/math.dat +0 -104
  136. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/menuitem-element.dat +0 -240
  137. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/namespace-sensitivity.dat +0 -22
  138. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/noscript01.dat +0 -237
  139. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes-plain-text-unsafe.dat +0 -0
  140. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/pending-spec-changes.dat +0 -46
  141. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/plain-text-unsafe.dat +0 -0
  142. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/quirks01.dat +0 -53
  143. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/ruby.dat +0 -302
  144. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scriptdata01.dat +0 -372
  145. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/adoption01.dat +0 -16
  146. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/ark.dat +0 -27
  147. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/scripted/webkit01.dat +0 -30
  148. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/search-element.dat +0 -46
  149. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/svg.dat +0 -104
  150. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tables01.dat +0 -322
  151. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/template.dat +0 -1673
  152. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests1.dat +0 -1956
  153. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests10.dat +0 -849
  154. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests11.dat +0 -523
  155. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests12.dat +0 -62
  156. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests14.dat +0 -75
  157. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests15.dat +0 -216
  158. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests16.dat +0 -2602
  159. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests17.dat +0 -179
  160. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests18.dat +0 -558
  161. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests19.dat +0 -1398
  162. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests2.dat +0 -831
  163. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests20.dat +0 -842
  164. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests21.dat +0 -306
  165. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests22.dat +0 -190
  166. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests23.dat +0 -168
  167. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests24.dat +0 -79
  168. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests25.dat +0 -288
  169. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests26.dat +0 -453
  170. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests3.dat +0 -305
  171. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests4.dat +0 -74
  172. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests5.dat +0 -210
  173. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests6.dat +0 -663
  174. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests7.dat +0 -453
  175. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests8.dat +0 -165
  176. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests9.dat +0 -472
  177. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tests_innerHTML_1.dat +0 -843
  178. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/tricky01.dat +0 -336
  179. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit01.dat +0 -785
  180. data/rust-vendor/markup5ever_rcdom/html5lib-tests/tree-construction/webkit02.dat +0 -554
  181. data/rust-vendor/markup5ever_rcdom/tests/foreach_html5lib_test/mod.rs +0 -41
  182. data/rust-vendor/markup5ever_rcdom/tests/html-driver.rs +0 -29
  183. data/rust-vendor/markup5ever_rcdom/tests/html-serializer.rs +0 -265
  184. data/rust-vendor/markup5ever_rcdom/tests/html-tokenizer.rs +0 -487
  185. data/rust-vendor/markup5ever_rcdom/tests/html-tree-builder.rs +0 -298
  186. data/rust-vendor/markup5ever_rcdom/tests/html-tree-sink.rs +0 -141
  187. data/rust-vendor/markup5ever_rcdom/tests/util/find_tests.rs +0 -34
  188. data/rust-vendor/markup5ever_rcdom/tests/util/runner.rs +0 -48
  189. data/rust-vendor/markup5ever_rcdom/tests/xml-driver.rs +0 -101
  190. data/rust-vendor/markup5ever_rcdom/tests/xml-tokenizer.rs +0 -374
  191. data/rust-vendor/markup5ever_rcdom/tests/xml-tree-builder.rs +0 -237
  192. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/AUTHORS.rst +0 -9
  193. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/LICENSE +0 -21
  194. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/README.md +0 -92
  195. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/comments.test +0 -274
  196. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/doctype.test +0 -3232
  197. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/entities.test +0 -283
  198. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/eof.test +0 -113
  199. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/namedEntities.test +0 -42210
  200. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/numericEntities.test +0 -1349
  201. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test1.test +0 -162
  202. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/test2.test +0 -64
  203. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tokenizer/unicodeChars.test +0 -1295
  204. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/README.md +0 -104
  205. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/namespace.dat +0 -119
  206. data/rust-vendor/markup5ever_rcdom/xml5lib-tests/tree-construction/test1.dat +0 -124
  207. data/rust-vendor/xml5ever/.cargo-checksum.json +0 -1
  208. data/rust-vendor/xml5ever/.cargo_vcs_info.json +0 -6
  209. data/rust-vendor/xml5ever/Cargo.lock +0 -752
  210. data/rust-vendor/xml5ever/Cargo.toml +0 -69
  211. data/rust-vendor/xml5ever/Cargo.toml.orig +0 -29
  212. data/rust-vendor/xml5ever/LICENSE-APACHE +0 -201
  213. data/rust-vendor/xml5ever/LICENSE-MIT +0 -25
  214. data/rust-vendor/xml5ever/README.md +0 -72
  215. data/rust-vendor/xml5ever/benches/xml5ever.rs +0 -77
  216. data/rust-vendor/xml5ever/data/bench/strong.xml +0 -1
  217. data/rust-vendor/xml5ever/examples/README.md +0 -223
  218. data/rust-vendor/xml5ever/examples/example.xml +0 -3
  219. data/rust-vendor/xml5ever/examples/simple_xml_tokenizer.rs +0 -81
  220. data/rust-vendor/xml5ever/examples/xml_tokenizer.rs +0 -115
  221. data/rust-vendor/xml5ever/src/driver.rs +0 -90
  222. data/rust-vendor/xml5ever/src/lib.rs +0 -47
  223. data/rust-vendor/xml5ever/src/macros.rs +0 -18
  224. data/rust-vendor/xml5ever/src/serialize/mod.rs +0 -216
  225. data/rust-vendor/xml5ever/src/tokenizer/char_ref/mod.rs +0 -456
  226. data/rust-vendor/xml5ever/src/tokenizer/interface.rs +0 -116
  227. data/rust-vendor/xml5ever/src/tokenizer/mod.rs +0 -1344
  228. data/rust-vendor/xml5ever/src/tokenizer/qname.rs +0 -84
  229. data/rust-vendor/xml5ever/src/tokenizer/states.rs +0 -167
  230. data/rust-vendor/xml5ever/src/tree_builder/mod.rs +0 -774
  231. data/rust-vendor/xml5ever/src/tree_builder/types.rs +0 -37
@@ -1,872 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # Copyright © 2009/2021 Andrey Vlasovskikh
4
- #
5
- # Permission is hereby granted, free of charge, to any person obtaining a copy of this
6
- # software and associated documentation files (the "Software"), to deal in the Software
7
- # without restriction, including without limitation the rights to use, copy, modify,
8
- # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
9
- # permit persons to whom the Software is furnished to do so, subject to the following
10
- # conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be included in all copies
13
- # or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16
- # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
17
- # PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18
- # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19
- # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
20
- # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
-
22
- """Functional parsing combinators.
23
-
24
- Parsing combinators define an internal domain-specific language (DSL) for describing
25
- the parsing rules of a grammar. The DSL allows you to start with a few primitive
26
- parsers, then combine your parsers to get more complex ones, and finally cover
27
- the whole grammar you want to parse.
28
-
29
- The structure of the language:
30
-
31
- * Class `Parser`
32
- * All the primitives and combinators of the language return `Parser` objects
33
- * It defines the main `Parser.parse(tokens)` method
34
- * Primitive parsers
35
- * `tok(type, value)`, `a(value)`, `some(pred)`, `forward_decl()`, `finished`
36
- * Parser combinators
37
- * `p1 + p2`, `p1 | p2`, `p >> f`, `-p`, `maybe(p)`, `many(p)`, `oneplus(p)`,
38
- `skip(p)`
39
- * Abstraction
40
- * Use regular Python variables `p = ... # Expression of type Parser` to define new
41
- rules (non-terminals) of your grammar
42
-
43
- Every time you apply one of the combinators, you get a new `Parser` object. In other
44
- words, the set of `Parser` objects is closed under the means of combination.
45
-
46
- !!! Note
47
-
48
- We took the parsing combinators language from the book [Introduction to Functional
49
- Programming][1] and translated it from ML into Python.
50
-
51
- [1]: https://www.cl.cam.ac.uk/teaching/Lectures/funprog-jrh-1996/
52
- """
53
-
54
- from __future__ import unicode_literals
55
-
56
- __all__ = [
57
- "some",
58
- "a",
59
- "tok",
60
- "many",
61
- "pure",
62
- "finished",
63
- "maybe",
64
- "skip",
65
- "oneplus",
66
- "forward_decl",
67
- "NoParseError",
68
- "Parser",
69
- ]
70
-
71
- import sys
72
- import logging
73
- import warnings
74
-
75
- from lint_lib._vendor.funcparserlib.lexer import Token
76
-
77
- log = logging.getLogger("funcparserlib")
78
-
79
- debug = False
80
- if sys.version_info < (3,):
81
- string_types = (str, unicode) # noqa
82
- else:
83
- string_types = str
84
-
85
-
86
- class Parser(object):
87
- """A parser object that can parse a sequence of tokens or can be combined with
88
- other parsers using `+`, `|`, `>>`, `many()`, and other parsing combinators.
89
-
90
- Type: `Parser[A, B]`
91
-
92
- The generic variables in the type are: `A` — the type of the tokens in the
93
- sequence to parse,`B` — the type of the parsed value.
94
-
95
- In order to define a parser for your grammar:
96
-
97
- 1. You start with primitive parsers by calling `a(value)`, `some(pred)`,
98
- `forward_decl()`, `finished`
99
- 2. You use parsing combinators `p1 + p2`, `p1 | p2`, `p >> f`, `many(p)`, and
100
- others to combine parsers into a more complex parser
101
- 3. You can assign complex parsers to variables to define names that correspond to
102
- the rules of your grammar
103
-
104
- !!! Note
105
-
106
- The constructor `Parser.__init__()` is considered **internal** and may be
107
- changed in future versions. Use primitive parsers and parsing combinators to
108
- construct new parsers.
109
- """
110
-
111
- def __init__(self, p):
112
- """Wrap the parser function `p` into a `Parser` object."""
113
- self.name = ""
114
- self.define(p)
115
-
116
- def named(self, name):
117
- # noinspection GrazieInspection
118
- """Specify the name of the parser for easier debugging.
119
-
120
- Type: `(str) -> Parser[A, B]`
121
-
122
- This name is used in the debug-level parsing log. You can also get it via the
123
- `Parser.name` attribute.
124
-
125
- Examples:
126
-
127
- ```pycon
128
- >>> expr = (a("x") + a("y")).named("expr")
129
- >>> expr.name
130
- 'expr'
131
-
132
- ```
133
-
134
- ```pycon
135
- >>> expr = a("x") + a("y")
136
- >>> expr.name
137
- "('x', 'y')"
138
-
139
- ```
140
- """
141
- self.name = name
142
- return self
143
-
144
- def define(self, p):
145
- """Define the parser created earlier as a forward declaration.
146
-
147
- Type: `(Parser[A, B]) -> None`
148
-
149
- Use `p = forward_decl()` in combination with `p.define(...)` to define
150
- recursive parsers.
151
-
152
- See the examples in the docs for `forward_decl()`.
153
- """
154
- f = getattr(p, "run", p)
155
- if debug:
156
- setattr(self, "_run", f)
157
- else:
158
- setattr(self, "run", f)
159
- self.named(getattr(p, "name", p.__doc__))
160
-
161
- def run(self, tokens, s):
162
- """Run the parser against the tokens with the specified parsing state.
163
-
164
- Type: `(Sequence[A], State) -> Tuple[B, State]`
165
-
166
- The parsing state includes the current position in the sequence being parsed,
167
- and the position of the rightmost token that has been consumed while parsing for
168
- better error messages.
169
-
170
- If the parser fails to parse the tokens, it raises `NoParseError`.
171
-
172
- !!! Warning
173
-
174
- This is method is **internal** and may be changed in future versions. Use
175
- `Parser.parse(tokens)` instead and let the parser object take care of
176
- updating the parsing state.
177
- """
178
- if debug:
179
- log.debug("trying %s" % self.name)
180
- return self._run(tokens, s) # noqa
181
-
182
- def _run(self, tokens, s):
183
- raise NotImplementedError("you must define() a parser")
184
-
185
- def parse(self, tokens):
186
- """Parse the sequence of tokens and return the parsed value.
187
-
188
- Type: `(Sequence[A]) -> B`
189
-
190
- It takes a sequence of tokens of arbitrary type `A` and returns the parsed value
191
- of arbitrary type `B`.
192
-
193
- If the parser fails to parse the tokens, it raises `NoParseError`.
194
-
195
- !!! Note
196
-
197
- Although `Parser.parse()` can parse sequences of any objects (including
198
- `str` which is a sequence of `str` chars), **the recommended way** is
199
- parsing sequences of `Token` objects.
200
-
201
- You **should** use a regexp-based tokenizer `make_tokenizer()` defined in
202
- `funcparserlib.lexer` to convert your text into a sequence of `Token`
203
- objects before parsing it. You will get more readable parsing error messages
204
- (as `Token` objects contain their position in the source file) and good
205
- separation of the lexical and syntactic levels of the grammar.
206
- """
207
- try:
208
- (tree, _) = self.run(tokens, State(0, 0, None))
209
- return tree
210
- except NoParseError as e:
211
- max = e.state.max
212
- if len(tokens) > max:
213
- t = tokens[max]
214
- if isinstance(t, Token):
215
- if t.start is None or t.end is None:
216
- loc = ""
217
- else:
218
- s_line, s_pos = t.start
219
- e_line, e_pos = t.end
220
- loc = "%d,%d-%d,%d: " % (s_line, s_pos, e_line, e_pos)
221
- msg = "%s%s: %r" % (loc, e.msg, t.value)
222
- elif isinstance(t, string_types):
223
- msg = "%s: %r" % (e.msg, t)
224
- else:
225
- msg = "%s: %s" % (e.msg, t)
226
- else:
227
- msg = "got unexpected end of input"
228
- if e.state.parser is not None:
229
- msg = "%s, expected: %s" % (msg, e.state.parser.name)
230
- e.msg = msg
231
- raise
232
-
233
- def __add__(self, other):
234
- """Sequential combination of parsers. It runs this parser, then the other
235
- parser.
236
-
237
- The return value of the resulting parser is a tuple of each parsed value in
238
- the sum of parsers. We merge all parsing results of `p1 + p2 + ... + pN` into a
239
- single tuple. It means that the parsing result may be a 2-tuple, a 3-tuple,
240
- a 4-tuple, etc. of parsed values. You avoid this by transforming the parsed
241
- pair into a new value using the `>>` combinator.
242
-
243
- You can also skip some parsing results in the resulting parsers by using `-p`
244
- or `skip(p)` for some parsers in your sum of parsers. It means that the parsing
245
- result might be a single value, not a tuple of parsed values. See the docs
246
- for `Parser.__neg__()` for more examples.
247
-
248
- Overloaded types (lots of them to provide stricter checking for the quite
249
- dynamic return type of this method):
250
-
251
- * `(self: Parser[A, B], _IgnoredParser[A]) -> Parser[A, B]`
252
- * `(self: Parser[A, B], Parser[A, C]) -> _TupleParser[A, Tuple[B, C]]`
253
- * `(self: _TupleParser[A, B], _IgnoredParser[A]) -> _TupleParser[A, B]`
254
- * `(self: _TupleParser[A, B], Parser[A, Any]) -> Parser[A, Any]`
255
- * `(self: _IgnoredParser[A], _IgnoredParser[A]) -> _IgnoredParser[A]`
256
- * `(self: _IgnoredParser[A], Parser[A, C]) -> Parser[A, C]`
257
-
258
- Examples:
259
-
260
- ```pycon
261
- >>> expr = a("x") + a("y")
262
- >>> expr.parse("xy")
263
- ('x', 'y')
264
-
265
- ```
266
-
267
- ```pycon
268
- >>> expr = a("x") + a("y") + a("z")
269
- >>> expr.parse("xyz")
270
- ('x', 'y', 'z')
271
-
272
- ```
273
-
274
- ```pycon
275
- >>> expr = a("x") + a("y")
276
- >>> expr.parse("xz")
277
- Traceback (most recent call last):
278
- ...
279
- parser.NoParseError: got unexpected token: 'z', expected: 'y'
280
-
281
- ```
282
- """
283
-
284
- def magic(v1, v2):
285
- if isinstance(v1, _Tuple):
286
- return _Tuple(v1 + (v2,))
287
- else:
288
- return _Tuple((v1, v2))
289
-
290
- @_TupleParser
291
- def _add(tokens, s):
292
- (v1, s2) = self.run(tokens, s)
293
- (v2, s3) = other.run(tokens, s2)
294
- return magic(v1, v2), s3
295
-
296
- @Parser
297
- def ignored_right(tokens, s):
298
- v, s2 = self.run(tokens, s)
299
- _, s3 = other.run(tokens, s2)
300
- return v, s3
301
-
302
- name = "(%s, %s)" % (self.name, other.name)
303
- if isinstance(other, _IgnoredParser):
304
- return ignored_right.named(name)
305
- else:
306
- return _add.named(name)
307
-
308
- def __or__(self, other):
309
- """Choice combination of parsers.
310
-
311
- It runs this parser and returns its result. If the parser fails, it runs the
312
- other parser.
313
-
314
- Examples:
315
-
316
- ```pycon
317
- >>> expr = a("x") | a("y")
318
- >>> expr.parse("x")
319
- 'x'
320
- >>> expr.parse("y")
321
- 'y'
322
- >>> expr.parse("z")
323
- Traceback (most recent call last):
324
- ...
325
- parser.NoParseError: got unexpected token: 'z', expected: 'x' or 'y'
326
-
327
- ```
328
- """
329
-
330
- @Parser
331
- def _or(tokens, s):
332
- try:
333
- return self.run(tokens, s)
334
- except NoParseError as e:
335
- state = e.state
336
- try:
337
- return other.run(tokens, State(s.pos, state.max, state.parser))
338
- except NoParseError as e:
339
- if s.pos == e.state.max:
340
- e.state = State(e.state.pos, e.state.max, _or)
341
- raise
342
-
343
- _or.name = "%s or %s" % (self.name, other.name)
344
- return _or
345
-
346
- def __rshift__(self, f):
347
- """Transform the parsing result by applying the specified function.
348
-
349
- Type: `(Callable[[B], C]) -> Parser[A, C]`
350
-
351
- You can use it for transforming the parsed value into another value before
352
- including it into the parse tree (the AST).
353
-
354
- Examples:
355
-
356
- ```pycon
357
- >>> def make_canonical_name(s):
358
- ... return s.lower()
359
- >>> expr = (a("D") | a("d")) >> make_canonical_name
360
- >>> expr.parse("D")
361
- 'd'
362
- >>> expr.parse("d")
363
- 'd'
364
-
365
- ```
366
- """
367
-
368
- @Parser
369
- def _shift(tokens, s):
370
- (v, s2) = self.run(tokens, s)
371
- return f(v), s2
372
-
373
- return _shift.named(self.name)
374
-
375
- def bind(self, f):
376
- """Bind the parser to a monadic function that returns a new parser.
377
-
378
- Type: `(Callable[[B], Parser[A, C]]) -> Parser[A, C]`
379
-
380
- Also known as `>>=` in Haskell.
381
-
382
- !!! Note
383
-
384
- You can parse any context-free grammar without resorting to `bind`. Due
385
- to its poor performance please use it only when you really need it.
386
- """
387
-
388
- @Parser
389
- def _bind(tokens, s):
390
- (v, s2) = self.run(tokens, s)
391
- return f(v).run(tokens, s2)
392
-
393
- _bind.name = "(%s >>=)" % (self.name,)
394
- return _bind
395
-
396
- def __neg__(self):
397
- """Return a parser that parses the same tokens, but its parsing result is
398
- ignored by the sequential `+` combinator.
399
-
400
- Type: `(Parser[A, B]) -> _IgnoredParser[A]`
401
-
402
- You can use it for throwing away elements of concrete syntax (e.g. `","`,
403
- `";"`).
404
-
405
- Examples:
406
-
407
- ```pycon
408
- >>> expr = -a("x") + a("y")
409
- >>> expr.parse("xy")
410
- 'y'
411
-
412
- ```
413
-
414
- ```pycon
415
- >>> expr = a("x") + -a("y")
416
- >>> expr.parse("xy")
417
- 'x'
418
-
419
- ```
420
-
421
- ```pycon
422
- >>> expr = a("x") + -a("y") + a("z")
423
- >>> expr.parse("xyz")
424
- ('x', 'z')
425
-
426
- ```
427
-
428
- ```pycon
429
- >>> expr = -a("x") + a("y") + -a("z")
430
- >>> expr.parse("xyz")
431
- 'y'
432
-
433
- ```
434
-
435
- ```pycon
436
- >>> expr = -a("x") + a("y")
437
- >>> expr.parse("yz")
438
- Traceback (most recent call last):
439
- ...
440
- parser.NoParseError: got unexpected token: 'y', expected: 'x'
441
-
442
- ```
443
-
444
- ```pycon
445
- >>> expr = a("x") + -a("y")
446
- >>> expr.parse("xz")
447
- Traceback (most recent call last):
448
- ...
449
- parser.NoParseError: got unexpected token: 'z', expected: 'y'
450
-
451
- ```
452
-
453
- !!! Note
454
-
455
- You **should not** pass the resulting parser to any combinators other than
456
- `+`. You **should** have at least one non-skipped value in your
457
- `p1 + p2 + ... + pN`. The parsed value of `-p` is an **internal** `_Ignored`
458
- object, not intended for actual use.
459
- """
460
- return _IgnoredParser(self)
461
-
462
- def __class_getitem__(cls, key):
463
- return cls
464
-
465
-
466
- class State(object):
467
- """Parsing state that is maintained basically for error reporting.
468
-
469
- It consists of the current position `pos` in the sequence being parsed, and the
470
- position `max` of the rightmost token that has been consumed while parsing.
471
- """
472
-
473
- def __init__(self, pos, max, parser=None):
474
- self.pos = pos
475
- self.max = max
476
- self.parser = parser
477
-
478
- def __str__(self):
479
- return str((self.pos, self.max))
480
-
481
- def __repr__(self):
482
- return "State(%r, %r)" % (self.pos, self.max)
483
-
484
-
485
- class NoParseError(Exception):
486
- def __init__(self, msg, state):
487
- self.msg = msg
488
- self.state = state
489
-
490
- def __str__(self):
491
- return self.msg
492
-
493
-
494
- class _Tuple(tuple):
495
- pass
496
-
497
-
498
- class _TupleParser(Parser):
499
- pass
500
-
501
-
502
- class _Ignored(object):
503
- def __init__(self, value):
504
- self.value = value
505
-
506
- def __repr__(self):
507
- return "_Ignored(%s)" % repr(self.value)
508
-
509
- def __eq__(self, other):
510
- return isinstance(other, _Ignored) and self.value == other.value
511
-
512
-
513
- @Parser
514
- def finished(tokens, s):
515
- """A parser that throws an exception if there are any unparsed tokens left in the
516
- sequence."""
517
- if s.pos >= len(tokens):
518
- return None, s
519
- else:
520
- s2 = State(s.pos, s.max, finished if s.pos == s.max else s.parser)
521
- raise NoParseError("got unexpected token", s2)
522
-
523
-
524
- finished.name = "end of input"
525
-
526
-
527
- def many(p):
528
- """Return a parser that applies the parser `p` as many times as it succeeds at
529
- parsing the tokens.
530
-
531
- Return a parser that infinitely applies the parser `p` to the input sequence
532
- of tokens as long as it successfully parses them. The parsed value is a list of
533
- the sequentially parsed values.
534
-
535
- Examples:
536
-
537
- ```pycon
538
- >>> expr = many(a("x"))
539
- >>> expr.parse("x")
540
- ['x']
541
- >>> expr.parse("xx")
542
- ['x', 'x']
543
- >>> expr.parse("xxxy") # noqa
544
- ['x', 'x', 'x']
545
- >>> expr.parse("y")
546
- []
547
-
548
- ```
549
- """
550
-
551
- @Parser
552
- def _many(tokens, s):
553
- res = []
554
- try:
555
- while True:
556
- (v, s) = p.run(tokens, s)
557
- res.append(v)
558
- except NoParseError as e:
559
- s2 = State(s.pos, e.state.max, e.state.parser)
560
- if debug:
561
- log.debug(
562
- "*matched* %d instances of %s, new state = %s"
563
- % (len(res), _many.name, s2)
564
- )
565
- return res, s2
566
-
567
- _many.name = "{ %s }" % p.name
568
- return _many
569
-
570
-
571
- def some(pred):
572
- """Return a parser that parses a token if it satisfies the predicate `pred`.
573
-
574
- Type: `(Callable[[A], bool]) -> Parser[A, A]`
575
-
576
- Examples:
577
-
578
- ```pycon
579
- >>> expr = some(lambda s: s.isalpha()).named('alpha')
580
- >>> expr.parse("x")
581
- 'x'
582
- >>> expr.parse("y")
583
- 'y'
584
- >>> expr.parse("1")
585
- Traceback (most recent call last):
586
- ...
587
- parser.NoParseError: got unexpected token: '1', expected: alpha
588
-
589
- ```
590
-
591
- !!! Warning
592
-
593
- The `some()` combinator is quite slow and may be changed or removed in future
594
- versions. If you need a parser for a token by its type (e.g. any identifier)
595
- and maybe its value, use `tok(type[, value])` instead. You should use
596
- `make_tokenizer()` from `funcparserlib.lexer` to tokenize your text first.
597
- """
598
-
599
- @Parser
600
- def _some(tokens, s):
601
- if s.pos >= len(tokens):
602
- s2 = State(s.pos, s.max, _some if s.pos == s.max else s.parser)
603
- raise NoParseError("got unexpected end of input", s2)
604
- else:
605
- t = tokens[s.pos]
606
- if pred(t):
607
- pos = s.pos + 1
608
- s2 = State(pos, max(pos, s.max), s.parser)
609
- if debug:
610
- log.debug("*matched* %r, new state = %s" % (t, s2))
611
- return t, s2
612
- else:
613
- s2 = State(s.pos, s.max, _some if s.pos == s.max else s.parser)
614
- if debug:
615
- log.debug(
616
- "failed %r, state = %s, expected = %s" % (t, s2, s2.parser.name)
617
- )
618
- raise NoParseError("got unexpected token", s2)
619
-
620
- _some.name = "some(...)"
621
- return _some
622
-
623
-
624
- def a(value):
625
- """Return a parser that parses a token if it's equal to `value`.
626
-
627
- Type: `(A) -> Parser[A, A]`
628
-
629
- Examples:
630
-
631
- ```pycon
632
- >>> expr = a("x")
633
- >>> expr.parse("x")
634
- 'x'
635
- >>> expr.parse("y")
636
- Traceback (most recent call last):
637
- ...
638
- parser.NoParseError: got unexpected token: 'y', expected: 'x'
639
-
640
- ```
641
-
642
- !!! Note
643
-
644
- Although `Parser.parse()` can parse sequences of any objects (including
645
- `str` which is a sequence of `str` chars), **the recommended way** is
646
- parsing sequences of `Token` objects.
647
-
648
- You **should** use a regexp-based tokenizer `make_tokenizer()` defined in
649
- `funcparserlib.lexer` to convert your text into a sequence of `Token` objects
650
- before parsing it. You will get more readable parsing error messages (as `Token`
651
- objects contain their position in the source file) and good separation of the
652
- lexical and syntactic levels of the grammar.
653
- """
654
- name = getattr(value, "name", value)
655
- return some(lambda t: t == value).named(repr(name))
656
-
657
-
658
- def tok(type, value=None):
659
- """Return a parser that parses a `Token` and returns the string value of the token.
660
-
661
- Type: `(str, Optional[str]) -> Parser[Token, str]`
662
-
663
- You can match any token of the specified `type` or you can match a specific token by
664
- its `type` and `value`.
665
-
666
- Examples:
667
-
668
- ```pycon
669
- >>> expr = tok("expr")
670
- >>> expr.parse([Token("expr", "foo")])
671
- 'foo'
672
- >>> expr.parse([Token("expr", "bar")])
673
- 'bar'
674
- >>> expr.parse([Token("op", "=")])
675
- Traceback (most recent call last):
676
- ...
677
- parser.NoParseError: got unexpected token: '=', expected: expr
678
-
679
- ```
680
-
681
- ```pycon
682
- >>> expr = tok("op", "=")
683
- >>> expr.parse([Token("op", "=")])
684
- '='
685
- >>> expr.parse([Token("op", "+")])
686
- Traceback (most recent call last):
687
- ...
688
- parser.NoParseError: got unexpected token: '+', expected: '='
689
-
690
- ```
691
-
692
- !!! Note
693
-
694
- In order to convert your text to parse into a sequence of `Token` objects,
695
- use a regexp-based tokenizer `make_tokenizer()` defined in
696
- `funcparserlib.lexer`. You will get more readable parsing error messages (as
697
- `Token` objects contain their position in the source file) and good separation
698
- of the lexical and syntactic levels of the grammar.
699
- """
700
- if value is not None:
701
- p = a(Token(type, value))
702
- else:
703
- p = some(lambda t: t.type == type).named(type)
704
- return (p >> (lambda t: t.value)).named(p.name)
705
-
706
-
707
- def pure(x):
708
- """Wrap any object into a parser.
709
-
710
- Type: `(A) -> Parser[A, A]`
711
-
712
- A pure parser doesn't touch the tokens sequence, it just returns its pure `x`
713
- value.
714
-
715
- Also known as `return` in Haskell.
716
- """
717
-
718
- @Parser
719
- def _pure(_, s):
720
- return x, s
721
-
722
- _pure.name = "(pure %r)" % (x,)
723
- return _pure
724
-
725
-
726
- def maybe(p):
727
- """Return a parser that returns `None` if the parser `p` fails.
728
-
729
- Examples:
730
-
731
- ```pycon
732
- >>> expr = maybe(a("x"))
733
- >>> expr.parse("x")
734
- 'x'
735
- >>> expr.parse("y") is None
736
- True
737
-
738
- ```
739
- """
740
- return (p | pure(None)).named("[ %s ]" % (p.name,))
741
-
742
-
743
- def skip(p):
744
- """An alias for `-p`.
745
-
746
- See also the docs for `Parser.__neg__()`.
747
- """
748
- return -p
749
-
750
-
751
- class _IgnoredParser(Parser):
752
- def __init__(self, p):
753
- super(_IgnoredParser, self).__init__(p)
754
- run = self._run if debug else self.run
755
-
756
- def ignored(tokens, s):
757
- v, s2 = run(tokens, s)
758
- return v if isinstance(v, _Ignored) else _Ignored(v), s2
759
-
760
- self.define(ignored)
761
- self.name = getattr(p, "name", p.__doc__)
762
-
763
- def __add__(self, other):
764
- def ignored_left(tokens, s):
765
- _, s2 = self.run(tokens, s)
766
- v, s3 = other.run(tokens, s2)
767
- return v, s3
768
-
769
- if isinstance(other, _IgnoredParser):
770
- return _IgnoredParser(ignored_left).named(
771
- "(%s, %s)" % (self.name, other.name)
772
- )
773
- else:
774
- return Parser(ignored_left).named("(%s, %s)" % (self.name, other.name))
775
-
776
-
777
- def oneplus(p):
778
- """Return a parser that applies the parser `p` one or more times.
779
-
780
- A similar parser combinator `many(p)` means apply `p` zero or more times, whereas
781
- `oneplus(p)` means apply `p` one or more times.
782
-
783
- Examples:
784
-
785
- ```pycon
786
- >>> expr = oneplus(a("x"))
787
- >>> expr.parse("x")
788
- ['x']
789
- >>> expr.parse("xx")
790
- ['x', 'x']
791
- >>> expr.parse("y")
792
- Traceback (most recent call last):
793
- ...
794
- parser.NoParseError: got unexpected token: 'y', expected: 'x'
795
-
796
- ```
797
- """
798
-
799
- @Parser
800
- def _oneplus(tokens, s):
801
- (v1, s2) = p.run(tokens, s)
802
- (v2, s3) = many(p).run(tokens, s2)
803
- return [v1] + v2, s3
804
-
805
- _oneplus.name = "(%s, { %s })" % (p.name, p.name)
806
- return _oneplus
807
-
808
-
809
- def with_forward_decls(suspension):
810
- warnings.warn(
811
- "Use forward_decl() instead:\n"
812
- "\n"
813
- " p = forward_decl()\n"
814
- " ...\n"
815
- " p.define(parser_value)\n",
816
- DeprecationWarning,
817
- )
818
-
819
- @Parser
820
- def f(tokens, s):
821
- return suspension().run(tokens, s)
822
-
823
- return f
824
-
825
-
826
- def forward_decl():
827
- """Return an undefined parser that can be used as a forward declaration.
828
-
829
- Type: `Parser[Any, Any]`
830
-
831
- Use `p = forward_decl()` in combination with `p.define(...)` to define recursive
832
- parsers.
833
-
834
-
835
- Examples:
836
-
837
- ```pycon
838
- >>> expr = forward_decl()
839
- >>> expr.define(a("x") + maybe(expr) + a("y"))
840
- >>> expr.parse("xxyy") # noqa
841
- ('x', ('x', None, 'y'), 'y')
842
- >>> expr.parse("xxy")
843
- Traceback (most recent call last):
844
- ...
845
- parser.NoParseError: got unexpected end of input, expected: 'y'
846
-
847
- ```
848
-
849
- !!! Note
850
-
851
- If you care about static types, you should add a type hint for your forward
852
- declaration, so that your type checker can check types in `p.define(...)` later:
853
-
854
- ```python
855
- p: Parser[str, int] = forward_decl()
856
- p.define(a("x")) # Type checker error
857
- p.define(a("1") >> int) # OK
858
- ```
859
- """
860
-
861
- @Parser
862
- def f(_tokens, _s):
863
- raise NotImplementedError("you must define() a forward_decl somewhere")
864
-
865
- f.name = "forward_decl()"
866
- return f
867
-
868
-
869
- if __name__ == "__main__":
870
- import doctest
871
-
872
- doctest.testmod()