parse_qwantz 2026.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. parse_qwantz-2026.5.4/LICENSE +19 -0
  2. parse_qwantz-2026.5.4/MANIFEST.in +3 -0
  3. parse_qwantz-2026.5.4/PKG-INFO +107 -0
  4. parse_qwantz-2026.5.4/README.md +82 -0
  5. parse_qwantz-2026.5.4/parse_qwantz/__init__.py +2 -0
  6. parse_qwantz-2026.5.4/parse_qwantz/__main__.py +5 -0
  7. parse_qwantz-2026.5.4/parse_qwantz/box.py +103 -0
  8. parse_qwantz-2026.5.4/parse_qwantz/char_variants.py +871 -0
  9. parse_qwantz-2026.5.4/parse_qwantz/cli.py +82 -0
  10. parse_qwantz-2026.5.4/parse_qwantz/color_logs.py +39 -0
  11. parse_qwantz-2026.5.4/parse_qwantz/colors.py +29 -0
  12. parse_qwantz-2026.5.4/parse_qwantz/data/panel_overrides.json +5501 -0
  13. parse_qwantz-2026.5.4/parse_qwantz/detect_thought.py +51 -0
  14. parse_qwantz-2026.5.4/parse_qwantz/dict/html-words.txt +23012 -0
  15. parse_qwantz-2026.5.4/parse_qwantz/dict/manual-additions.txt +327 -0
  16. parse_qwantz-2026.5.4/parse_qwantz/dict/manual-removed.txt +13 -0
  17. parse_qwantz-2026.5.4/parse_qwantz/dict/unambiguous-qwantz.txt +26052 -0
  18. parse_qwantz-2026.5.4/parse_qwantz/elements.py +82 -0
  19. parse_qwantz-2026.5.4/parse_qwantz/fonts.py +469 -0
  20. parse_qwantz-2026.5.4/parse_qwantz/hyphens.py +54 -0
  21. parse_qwantz-2026.5.4/parse_qwantz/image_viewer.py +12 -0
  22. parse_qwantz-2026.5.4/parse_qwantz/img/ask-professor-science.svg +90 -0
  23. parse_qwantz-2026.5.4/parse_qwantz/img/batman-left.svg +33 -0
  24. parse_qwantz-2026.5.4/parse_qwantz/img/batman-right.svg +33 -0
  25. parse_qwantz-2026.5.4/parse_qwantz/img/blank.svg +7388 -0
  26. parse_qwantz-2026.5.4/parse_qwantz/img/italic13.png +0 -0
  27. parse_qwantz-2026.5.4/parse_qwantz/img/mask.png +0 -0
  28. parse_qwantz-2026.5.4/parse_qwantz/img/regular10.png +0 -0
  29. parse_qwantz-2026.5.4/parse_qwantz/img/regular11.png +0 -0
  30. parse_qwantz-2026.5.4/parse_qwantz/img/regular12.png +0 -0
  31. parse_qwantz-2026.5.4/parse_qwantz/img/regular13.png +0 -0
  32. parse_qwantz-2026.5.4/parse_qwantz/img/regular8.png +0 -0
  33. parse_qwantz-2026.5.4/parse_qwantz/img/regular9.png +0 -0
  34. parse_qwantz-2026.5.4/parse_qwantz/img/serif13.png +0 -0
  35. parse_qwantz-2026.5.4/parse_qwantz/lines.py +55 -0
  36. parse_qwantz-2026.5.4/parse_qwantz/main.py +86 -0
  37. parse_qwantz-2026.5.4/parse_qwantz/match_blocks.py +108 -0
  38. parse_qwantz-2026.5.4/parse_qwantz/match_lines.py +424 -0
  39. parse_qwantz-2026.5.4/parse_qwantz/match_thought.py +16 -0
  40. parse_qwantz-2026.5.4/parse_qwantz/panel_overrides.py +16 -0
  41. parse_qwantz-2026.5.4/parse_qwantz/panels.py +59 -0
  42. parse_qwantz-2026.5.4/parse_qwantz/parser.py +229 -0
  43. parse_qwantz-2026.5.4/parse_qwantz/pixels.py +80 -0
  44. parse_qwantz-2026.5.4/parse_qwantz/prepare_image.py +58 -0
  45. parse_qwantz-2026.5.4/parse_qwantz/shape.py +50 -0
  46. parse_qwantz-2026.5.4/parse_qwantz/simple_image.py +32 -0
  47. parse_qwantz-2026.5.4/parse_qwantz/svg_gen.py +171 -0
  48. parse_qwantz-2026.5.4/parse_qwantz/text_blocks.py +288 -0
  49. parse_qwantz-2026.5.4/parse_qwantz/text_lines.py +223 -0
  50. parse_qwantz-2026.5.4/parse_qwantz.egg-info/PKG-INFO +107 -0
  51. parse_qwantz-2026.5.4/parse_qwantz.egg-info/SOURCES.txt +55 -0
  52. parse_qwantz-2026.5.4/parse_qwantz.egg-info/dependency_links.txt +1 -0
  53. parse_qwantz-2026.5.4/parse_qwantz.egg-info/entry_points.txt +2 -0
  54. parse_qwantz-2026.5.4/parse_qwantz.egg-info/requires.txt +5 -0
  55. parse_qwantz-2026.5.4/parse_qwantz.egg-info/top_level.txt +1 -0
  56. parse_qwantz-2026.5.4/pyproject.toml +38 -0
  57. parse_qwantz-2026.5.4/setup.cfg +4 -0
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2023 Jan Szejko
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -0,0 +1,3 @@
1
+ include parse_qwantz/img/*
2
+ include parse_qwantz/data/*
3
+ include parse_qwantz/dict/*
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.4
2
+ Name: parse_qwantz
3
+ Version: 2026.5.4
4
+ Summary: Transcript generator for Dinosaur Comics
5
+ Author-email: Jan Szejko <jan.szejko@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/janek37/parse_qwantz
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: End Users/Desktop
12
+ Classifier: Natural Language :: English
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Topic :: Multimedia :: Graphics :: Graphics Conversion
15
+ Classifier: Topic :: Scientific/Engineering :: Image Recognition
16
+ Classifier: Typing :: Typed
17
+ Requires-Python: >=3.10
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: Pillow
21
+ Requires-Dist: typer
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest; extra == "dev"
24
+ Dynamic: license-file
25
+
26
+ # Dinosaur Comic Parser
27
+
28
+ A transcript generator for [Ryan North](https://www.ryannorth.ca/)'s [Dinosaur Comics](https://qwantz.com)
29
+
30
+ ## Installation
31
+
32
+ Install `parse-qwantz` with `pip`
33
+
34
+ ```bash
35
+ pip install parse-qwantz
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ You need to download the image file for the comic you want transcribed, for example https://qwantz.com/comics/comic2-02.png. Then run `parse-qwantz`:
41
+
42
+ ```
43
+ $ parse-qwantz comic2-02.png
44
+ T-Rex: Today is a beautiful day to be stomping on things! As a dinosaur, stomping is the best part of my day indeed!
45
+
46
+ T-Rex: *gasp*
47
+
48
+ T-Rex: What's that, little house? You wish you were back in your own time? THAT IS TOO BAD FOR YOU
49
+
50
+ T-Rex: Perhaps you too will get a stomping, little girl!
51
+ Utahraptor: WAIT!
52
+
53
+ Utahraptor: Is stomping really the answer to your problem(s)?
54
+ T-Rex: Problem(s)?
55
+
56
+ T-Rex: My only problem(s) have to do with you interrupting my stomping!
57
+ T-Rex: 〚small〛 crazy utahraptor!
58
+ ```
59
+
60
+ You can also call it with
61
+ ```bash
62
+ python -m parse_qwantz
63
+ ```
64
+
65
+ The argument can also be a directory path instead of a file path. In such case the program will run on all files in the specified directory.
66
+
67
+ ## Options
68
+
69
+ ### `--output-dir`
70
+
71
+ By default, the program outputs to stdout and logs to stderr. With this option, when processing file `image_name.png` it will output to `OUTPUT_DIR/image_name.png.txt` and log to `OUTPUT_DIR/image_name.log`.
72
+
73
+ ### `--generate-svg`
74
+
75
+ Instead of transcribing the comic, generate a vectorized version in the SVG format and print it to the standard output.
76
+
77
+ ### `--parse-footer`
78
+
79
+ Instead of transcribing the comic, transcribe just the footer.
80
+
81
+ ## Conventions
82
+
83
+ Bold and italics are marked with "◖◗" and "▹◃" respectively. This is to avoid ambiguity which may result from using characters like "*" or "_".
84
+
85
+ All descriptions are in "〚〛" brackets. Each line that isn't just description starts from a "character" name followed by a colon. That "character" might be one of the actual characters, but also "Narrator", "Off panel", "Banner", "Book cover" etc.
86
+
87
+ When some text in a panel is obscured but can be reconstructed, it's in "⦃⦄" braces. So far this applies only to 2 comics: #59 and #61.
88
+
89
+ When some text in a panel is obscured and not reconstructed, it's replaced either by the special "…" character, or a description of how it's obscured in "〚〛" brackets.
90
+
91
+ ## Notes
92
+
93
+ This program still does not work on all DC strips, but at this point it should work correctly on pretty much all "standard" strips and some less-standard ones (thanks to the system of overrides). Eventually all existing strips should work, including the guest comics, with updates for new comics coming out regularly.
94
+
95
+ After all comics are working, I might add some other features, like generating SVG images.
96
+
97
+ ## Running Tests
98
+
99
+ To run tests, run the following command:
100
+
101
+ ```bash
102
+ pytest test/
103
+ ```
104
+
105
+ ## Acknowledgments
106
+
107
+ This program would not be possible without the wonderful comics by Ryan North! Thanks, Ryan, and congratulations on the 20th anniversary of your comics! Btw [the anniversary comic](https://qwantz.com/?comic=4005) will totally not work with this script, haha! (at least until I add an override)
@@ -0,0 +1,82 @@
1
+ # Dinosaur Comic Parser
2
+
3
+ A transcript generator for [Ryan North](https://www.ryannorth.ca/)'s [Dinosaur Comics](https://qwantz.com)
4
+
5
+ ## Installation
6
+
7
+ Install `parse-qwantz` with `pip`
8
+
9
+ ```bash
10
+ pip install parse-qwantz
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ You need to download the image file for the comic you want transcribed, for example https://qwantz.com/comics/comic2-02.png. Then run `parse-qwantz`:
16
+
17
+ ```
18
+ $ parse-qwantz comic2-02.png
19
+ T-Rex: Today is a beautiful day to be stomping on things! As a dinosaur, stomping is the best part of my day indeed!
20
+
21
+ T-Rex: *gasp*
22
+
23
+ T-Rex: What's that, little house? You wish you were back in your own time? THAT IS TOO BAD FOR YOU
24
+
25
+ T-Rex: Perhaps you too will get a stomping, little girl!
26
+ Utahraptor: WAIT!
27
+
28
+ Utahraptor: Is stomping really the answer to your problem(s)?
29
+ T-Rex: Problem(s)?
30
+
31
+ T-Rex: My only problem(s) have to do with you interrupting my stomping!
32
+ T-Rex: 〚small〛 crazy utahraptor!
33
+ ```
34
+
35
+ You can also call it with
36
+ ```bash
37
+ python -m parse_qwantz
38
+ ```
39
+
40
+ The argument can also be a directory path instead of a file path. In such case the program will run on all files in the specified directory.
41
+
42
+ ## Options
43
+
44
+ ### `--output-dir`
45
+
46
+ By default, the program outputs to stdout and logs to stderr. With this option, when processing file `image_name.png` it will output to `OUTPUT_DIR/image_name.png.txt` and log to `OUTPUT_DIR/image_name.log`.
47
+
48
+ ### `--generate-svg`
49
+
50
+ Instead of transcribing the comic, generate a vectorized version in the SVG format and print it to the standard output.
51
+
52
+ ### `--parse-footer`
53
+
54
+ Instead of transcribing the comic, transcribe just the footer.
55
+
56
+ ## Conventions
57
+
58
+ Bold and italics are marked with "◖◗" and "▹◃" respectively. This is to avoid ambiguity which may result from using characters like "*" or "_".
59
+
60
+ All descriptions are in "〚〛" brackets. Each line that isn't just description starts from a "character" name followed by a colon. That "character" might be one of the actual characters, but also "Narrator", "Off panel", "Banner", "Book cover" etc.
61
+
62
+ When some text in a panel is obscured but can be reconstructed, it's in "⦃⦄" braces. So far this applies only to 2 comics: #59 and #61.
63
+
64
+ When some text in a panel is obscured and not reconstructed, it's replaced either by the special "…" character, or a description of how it's obscured in "〚〛" brackets.
65
+
66
+ ## Notes
67
+
68
+ This program still does not work on all DC strips, but at this point it should work correctly on pretty much all "standard" strips and some less-standard ones (thanks to the system of overrides). Eventually all existing strips should work, including the guest comics, with updates for new comics coming out regularly.
69
+
70
+ After all comics are working, I might add some other features, like generating SVG images.
71
+
72
+ ## Running Tests
73
+
74
+ To run tests, run the following command:
75
+
76
+ ```bash
77
+ pytest test/
78
+ ```
79
+
80
+ ## Acknowledgments
81
+
82
+ This program would not be possible without the wonderful comics by Ryan North! Thanks, Ryan, and congratulations on the 20th anniversary of your comics! Btw [the anniversary comic](https://qwantz.com/?comic=4005) will totally not work with this script, haha! (at least until I add an override)
@@ -0,0 +1,2 @@
1
+ from parse_qwantz.cli import app
2
+ from parse_qwantz.main import main
@@ -0,0 +1,5 @@
1
+ from parse_qwantz.cli import app
2
+
3
+
4
+ if __name__ == '__main__':
5
+ app()
@@ -0,0 +1,103 @@
1
+ import math
2
+ from typing import NamedTuple
3
+
4
+ from parse_qwantz.pixels import Pixel
5
+
6
+
7
+ class Box(NamedTuple):
8
+ top_left: Pixel
9
+ bottom_right: Pixel
10
+ inactive_sides: tuple[str, ...] = ()
11
+
12
+ def __repr__(self):
13
+ return f"Box(top_left={repr(self.top_left)}, bottom_right={repr(self.bottom_right)})"
14
+
15
+ @property
16
+ def top_right(self) -> Pixel:
17
+ return Pixel(self.bottom_right.x, self.top_left.y)
18
+
19
+ @property
20
+ def bottom_left(self) -> Pixel:
21
+ return Pixel(self.top_left.x, self.bottom_right.y)
22
+
23
+ @property
24
+ def left(self) -> int:
25
+ return self.top_left.x
26
+
27
+ @property
28
+ def right(self) -> int:
29
+ return self.bottom_right.x
30
+
31
+ @property
32
+ def top(self) -> int:
33
+ return self.top_left.y
34
+
35
+ @property
36
+ def bottom(self) -> int:
37
+ return self.bottom_right.y
38
+
39
+ @property
40
+ def width(self) -> int:
41
+ return self.right - self.left
42
+
43
+ @property
44
+ def height(self) -> int:
45
+ return self.bottom - self.top
46
+
47
+ def includes(self, pixel: Pixel) -> bool:
48
+ return self.left <= pixel.x < self.right and self.top <= pixel.y < self.bottom
49
+
50
+ def contains(self, box: "Box") -> bool:
51
+ return self.includes(box.top_left) and self.includes(box.bottom_right)
52
+
53
+ def distance_squared(self, box: "Box") -> int:
54
+ return (
55
+ get_interval_distance((self.left, self.right), (box.left, box.right))**2
56
+ + get_interval_distance((self.top, self.bottom), (box.top, box.bottom))**2
57
+ )
58
+
59
+ def distance(self, pixel: Pixel) -> float | None:
60
+ if self.includes(pixel):
61
+ return 0
62
+ top_active = "top" not in self.inactive_sides
63
+ bottom_active = "bottom" not in self.inactive_sides
64
+ left_active = "left" not in self.inactive_sides
65
+ right_active = "right" not in self.inactive_sides
66
+ if self.left <= pixel.x < self.right:
67
+ if pixel.y < self.top:
68
+ return self.top - pixel.y if top_active else None
69
+ else:
70
+ return pixel.y - self.bottom + 1 if bottom_active else None
71
+ if self.top <= pixel.y < self.bottom:
72
+ if pixel.x < self.left:
73
+ return self.left - pixel.x if left_active else None
74
+ else:
75
+ return pixel.x - self.right + 1 if right_active else None
76
+ if pixel.x >= self.right and pixel.y >= self.bottom and (right_active or bottom_active):
77
+ return get_distance(pixel, self.bottom_right)
78
+ if pixel.x >= self.right and pixel.y < self.top and (right_active or top_active):
79
+ return get_distance(pixel, self.top_right)
80
+ if pixel.x < self.left and pixel.y >= self.bottom and (left_active or bottom_active):
81
+ return get_distance(pixel, self.bottom_left)
82
+ if pixel.x < self.left and pixel.y < self.top and (left_active or top_active):
83
+ return get_distance(pixel, self.top_left)
84
+ return None
85
+
86
+ def with_margin(self, margin_x: int, margin_y: int):
87
+ return Box(
88
+ Pixel(self.left - margin_x, self.top - margin_y),
89
+ Pixel(self.right + margin_x, self.bottom + margin_y),
90
+ )
91
+
92
+ @classmethod
93
+ def dummy(cls) -> "Box":
94
+ return cls(Pixel(0, 0), Pixel(0, 0))
95
+
96
+
97
+ def get_interval_distance(interval1: tuple[int, int], interval2: tuple[int, int]) -> int:
98
+ i1, i2 = sorted((interval1, interval2))
99
+ return max(0, i2[0] - i1[1])
100
+
101
+
102
+ def get_distance(p1: Pixel, p2: Pixel) -> float:
103
+ return math.sqrt((p1.x - p2.x)**2 + (p1.y - p2.y)**2)