parse_qwantz 2026.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parse_qwantz-2026.5.4/LICENSE +19 -0
- parse_qwantz-2026.5.4/MANIFEST.in +3 -0
- parse_qwantz-2026.5.4/PKG-INFO +107 -0
- parse_qwantz-2026.5.4/README.md +82 -0
- parse_qwantz-2026.5.4/parse_qwantz/__init__.py +2 -0
- parse_qwantz-2026.5.4/parse_qwantz/__main__.py +5 -0
- parse_qwantz-2026.5.4/parse_qwantz/box.py +103 -0
- parse_qwantz-2026.5.4/parse_qwantz/char_variants.py +871 -0
- parse_qwantz-2026.5.4/parse_qwantz/cli.py +82 -0
- parse_qwantz-2026.5.4/parse_qwantz/color_logs.py +39 -0
- parse_qwantz-2026.5.4/parse_qwantz/colors.py +29 -0
- parse_qwantz-2026.5.4/parse_qwantz/data/panel_overrides.json +5501 -0
- parse_qwantz-2026.5.4/parse_qwantz/detect_thought.py +51 -0
- parse_qwantz-2026.5.4/parse_qwantz/dict/html-words.txt +23012 -0
- parse_qwantz-2026.5.4/parse_qwantz/dict/manual-additions.txt +327 -0
- parse_qwantz-2026.5.4/parse_qwantz/dict/manual-removed.txt +13 -0
- parse_qwantz-2026.5.4/parse_qwantz/dict/unambiguous-qwantz.txt +26052 -0
- parse_qwantz-2026.5.4/parse_qwantz/elements.py +82 -0
- parse_qwantz-2026.5.4/parse_qwantz/fonts.py +469 -0
- parse_qwantz-2026.5.4/parse_qwantz/hyphens.py +54 -0
- parse_qwantz-2026.5.4/parse_qwantz/image_viewer.py +12 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/ask-professor-science.svg +90 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/batman-left.svg +33 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/batman-right.svg +33 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/blank.svg +7388 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/italic13.png +0 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/mask.png +0 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/regular10.png +0 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/regular11.png +0 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/regular12.png +0 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/regular13.png +0 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/regular8.png +0 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/regular9.png +0 -0
- parse_qwantz-2026.5.4/parse_qwantz/img/serif13.png +0 -0
- parse_qwantz-2026.5.4/parse_qwantz/lines.py +55 -0
- parse_qwantz-2026.5.4/parse_qwantz/main.py +86 -0
- parse_qwantz-2026.5.4/parse_qwantz/match_blocks.py +108 -0
- parse_qwantz-2026.5.4/parse_qwantz/match_lines.py +424 -0
- parse_qwantz-2026.5.4/parse_qwantz/match_thought.py +16 -0
- parse_qwantz-2026.5.4/parse_qwantz/panel_overrides.py +16 -0
- parse_qwantz-2026.5.4/parse_qwantz/panels.py +59 -0
- parse_qwantz-2026.5.4/parse_qwantz/parser.py +229 -0
- parse_qwantz-2026.5.4/parse_qwantz/pixels.py +80 -0
- parse_qwantz-2026.5.4/parse_qwantz/prepare_image.py +58 -0
- parse_qwantz-2026.5.4/parse_qwantz/shape.py +50 -0
- parse_qwantz-2026.5.4/parse_qwantz/simple_image.py +32 -0
- parse_qwantz-2026.5.4/parse_qwantz/svg_gen.py +171 -0
- parse_qwantz-2026.5.4/parse_qwantz/text_blocks.py +288 -0
- parse_qwantz-2026.5.4/parse_qwantz/text_lines.py +223 -0
- parse_qwantz-2026.5.4/parse_qwantz.egg-info/PKG-INFO +107 -0
- parse_qwantz-2026.5.4/parse_qwantz.egg-info/SOURCES.txt +55 -0
- parse_qwantz-2026.5.4/parse_qwantz.egg-info/dependency_links.txt +1 -0
- parse_qwantz-2026.5.4/parse_qwantz.egg-info/entry_points.txt +2 -0
- parse_qwantz-2026.5.4/parse_qwantz.egg-info/requires.txt +5 -0
- parse_qwantz-2026.5.4/parse_qwantz.egg-info/top_level.txt +1 -0
- parse_qwantz-2026.5.4/pyproject.toml +38 -0
- parse_qwantz-2026.5.4/setup.cfg +4 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright (c) 2023 Jan Szejko
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: parse_qwantz
|
|
3
|
+
Version: 2026.5.4
|
|
4
|
+
Summary: Transcript generator for Dinosaur Comics
|
|
5
|
+
Author-email: Jan Szejko <jan.szejko@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: homepage, https://github.com/janek37/parse_qwantz
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
12
|
+
Classifier: Natural Language :: English
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Topic :: Multimedia :: Graphics :: Graphics Conversion
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
16
|
+
Classifier: Typing :: Typed
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: Pillow
|
|
21
|
+
Requires-Dist: typer
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest; extra == "dev"
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# Dinosaur Comic Parser
|
|
27
|
+
|
|
28
|
+
A transcript generator for [Ryan North](https://www.ryannorth.ca/)'s [Dinosaur Comics](https://qwantz.com)
|
|
29
|
+
|
|
30
|
+
## Installation
|
|
31
|
+
|
|
32
|
+
Install `parse-qwantz` with `pip`
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install parse-qwantz
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Usage
|
|
39
|
+
|
|
40
|
+
You need to download the image file for the comic you want transcribed, for example https://qwantz.com/comics/comic2-02.png. Then run `parse-qwantz`:
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
$ parse-qwantz comic2-02.png
|
|
44
|
+
T-Rex: Today is a beautiful day to be stomping on things! As a dinosaur, stomping is the best part of my day indeed!
|
|
45
|
+
|
|
46
|
+
T-Rex: *gasp*
|
|
47
|
+
|
|
48
|
+
T-Rex: What's that, little house? You wish you were back in your own time? THAT IS TOO BAD FOR YOU
|
|
49
|
+
|
|
50
|
+
T-Rex: Perhaps you too will get a stomping, little girl!
|
|
51
|
+
Utahraptor: WAIT!
|
|
52
|
+
|
|
53
|
+
Utahraptor: Is stomping really the answer to your problem(s)?
|
|
54
|
+
T-Rex: Problem(s)?
|
|
55
|
+
|
|
56
|
+
T-Rex: My only problem(s) have to do with you interrupting my stomping!
|
|
57
|
+
T-Rex: 〚small〛 crazy utahraptor!
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
You can also call it with
|
|
61
|
+
```bash
|
|
62
|
+
python -m parse_qwantz
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
The argument can also be a directory path instead of a file path. In such case the program will run on all files in the specified directory.
|
|
66
|
+
|
|
67
|
+
## Options
|
|
68
|
+
|
|
69
|
+
### `--output-dir`
|
|
70
|
+
|
|
71
|
+
By default, the program outputs to stdout and logs to stderr. With this option, when processing file `image_name.png` it will output to `OUTPUT_DIR/image_name.png.txt` and log to `OUTPUT_DIR/image_name.log`.
|
|
72
|
+
|
|
73
|
+
### `--generate-svg`
|
|
74
|
+
|
|
75
|
+
Instead of transcribing the comic, generate a vectorized version in the SVG format and print it to the standard output.
|
|
76
|
+
|
|
77
|
+
### `--parse-footer`
|
|
78
|
+
|
|
79
|
+
Instead of transcribing the comic, transcribe just the footer.
|
|
80
|
+
|
|
81
|
+
## Conventions
|
|
82
|
+
|
|
83
|
+
Bold and italics are marked with "◖◗" and "▹◃" respectively. This is to avoid ambiguity which may result from using characters like "*" or "_".
|
|
84
|
+
|
|
85
|
+
All descriptions are in "〚〛" brackets. Each line that isn't just description starts from a "character" name followed by a colon. That "character" might be one of the actual characters, but also "Narrator", "Off panel", "Banner", "Book cover" etc.
|
|
86
|
+
|
|
87
|
+
When some text in a panel is obscured but can be reconstructed, it's in "⦃⦄" braces. So far this applies only to 2 comics: #59 and #61.
|
|
88
|
+
|
|
89
|
+
When some text in a panel is obscured and not reconstructed, it's replaced either by the special "…" character, or a description of how it's obscured in "〚〛" brackets.
|
|
90
|
+
|
|
91
|
+
## Notes
|
|
92
|
+
|
|
93
|
+
This program still does not work on all DC strips, but at this point it should work correctly on pretty much all "standard" strips and some less-standard ones (thanks to the system of overrides). Eventually all existing strips should work, including the guest comics, with updates for new comics coming out regularly.
|
|
94
|
+
|
|
95
|
+
After all comics are working, I might add some other features, like generating SVG images.
|
|
96
|
+
|
|
97
|
+
## Running Tests
|
|
98
|
+
|
|
99
|
+
To run tests, run the following command:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
pytest test/
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Acknowledgments
|
|
106
|
+
|
|
107
|
+
This program would not be possible without the wonderful comics by Ryan North! Thanks, Ryan, and congratulations on the 20th anniversary of your comics! Btw [the anniversary comic](https://qwantz.com/?comic=4005) will totally not work with this script, haha! (at least until I add an override)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Dinosaur Comic Parser
|
|
2
|
+
|
|
3
|
+
A transcript generator for [Ryan North](https://www.ryannorth.ca/)'s [Dinosaur Comics](https://qwantz.com)
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Install `parse-qwantz` with `pip`
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install parse-qwantz
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
You need to download the image file for the comic you want transcribed, for example https://qwantz.com/comics/comic2-02.png. Then run `parse-qwantz`:
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
$ parse-qwantz comic2-02.png
|
|
19
|
+
T-Rex: Today is a beautiful day to be stomping on things! As a dinosaur, stomping is the best part of my day indeed!
|
|
20
|
+
|
|
21
|
+
T-Rex: *gasp*
|
|
22
|
+
|
|
23
|
+
T-Rex: What's that, little house? You wish you were back in your own time? THAT IS TOO BAD FOR YOU
|
|
24
|
+
|
|
25
|
+
T-Rex: Perhaps you too will get a stomping, little girl!
|
|
26
|
+
Utahraptor: WAIT!
|
|
27
|
+
|
|
28
|
+
Utahraptor: Is stomping really the answer to your problem(s)?
|
|
29
|
+
T-Rex: Problem(s)?
|
|
30
|
+
|
|
31
|
+
T-Rex: My only problem(s) have to do with you interrupting my stomping!
|
|
32
|
+
T-Rex: 〚small〛 crazy utahraptor!
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
You can also call it with
|
|
36
|
+
```bash
|
|
37
|
+
python -m parse_qwantz
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
The argument can also be a directory path instead of a file path. In such case the program will run on all files in the specified directory.
|
|
41
|
+
|
|
42
|
+
## Options
|
|
43
|
+
|
|
44
|
+
### `--output-dir`
|
|
45
|
+
|
|
46
|
+
By default, the program outputs to stdout and logs to stderr. With this option, when processing file `image_name.png` it will output to `OUTPUT_DIR/image_name.png.txt` and log to `OUTPUT_DIR/image_name.log`.
|
|
47
|
+
|
|
48
|
+
### `--generate-svg`
|
|
49
|
+
|
|
50
|
+
Instead of transcribing the comic, generate a vectorized version in the SVG format and print it to the standard output.
|
|
51
|
+
|
|
52
|
+
### `--parse-footer`
|
|
53
|
+
|
|
54
|
+
Instead of transcribing the comic, transcribe just the footer.
|
|
55
|
+
|
|
56
|
+
## Conventions
|
|
57
|
+
|
|
58
|
+
Bold and italics are marked with "◖◗" and "▹◃" respectively. This is to avoid ambiguity which may result from using characters like "*" or "_".
|
|
59
|
+
|
|
60
|
+
All descriptions are in "〚〛" brackets. Each line that isn't just description starts from a "character" name followed by a colon. That "character" might be one of the actual characters, but also "Narrator", "Off panel", "Banner", "Book cover" etc.
|
|
61
|
+
|
|
62
|
+
When some text in a panel is obscured but can be reconstructed, it's in "⦃⦄" braces. So far this applies only to 2 comics: #59 and #61.
|
|
63
|
+
|
|
64
|
+
When some text in a panel is obscured and not reconstructed, it's replaced either by the special "…" character, or a description of how it's obscured in "〚〛" brackets.
|
|
65
|
+
|
|
66
|
+
## Notes
|
|
67
|
+
|
|
68
|
+
This program still does not work on all DC strips, but at this point it should work correctly on pretty much all "standard" strips and some less-standard ones (thanks to the system of overrides). Eventually all existing strips should work, including the guest comics, with updates for new comics coming out regularly.
|
|
69
|
+
|
|
70
|
+
After all comics are working, I might add some other features, like generating SVG images.
|
|
71
|
+
|
|
72
|
+
## Running Tests
|
|
73
|
+
|
|
74
|
+
To run tests, run the following command:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pytest test/
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Acknowledgments
|
|
81
|
+
|
|
82
|
+
This program would not be possible without the wonderful comics by Ryan North! Thanks, Ryan, and congratulations on the 20th anniversary of your comics! Btw [the anniversary comic](https://qwantz.com/?comic=4005) will totally not work with this script, haha! (at least until I add an override)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from typing import NamedTuple
|
|
3
|
+
|
|
4
|
+
from parse_qwantz.pixels import Pixel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Box(NamedTuple):
|
|
8
|
+
top_left: Pixel
|
|
9
|
+
bottom_right: Pixel
|
|
10
|
+
inactive_sides: tuple[str, ...] = ()
|
|
11
|
+
|
|
12
|
+
def __repr__(self):
|
|
13
|
+
return f"Box(top_left={repr(self.top_left)}, bottom_right={repr(self.bottom_right)})"
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def top_right(self) -> Pixel:
|
|
17
|
+
return Pixel(self.bottom_right.x, self.top_left.y)
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def bottom_left(self) -> Pixel:
|
|
21
|
+
return Pixel(self.top_left.x, self.bottom_right.y)
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def left(self) -> int:
|
|
25
|
+
return self.top_left.x
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def right(self) -> int:
|
|
29
|
+
return self.bottom_right.x
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def top(self) -> int:
|
|
33
|
+
return self.top_left.y
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def bottom(self) -> int:
|
|
37
|
+
return self.bottom_right.y
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def width(self) -> int:
|
|
41
|
+
return self.right - self.left
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def height(self) -> int:
|
|
45
|
+
return self.bottom - self.top
|
|
46
|
+
|
|
47
|
+
def includes(self, pixel: Pixel) -> bool:
|
|
48
|
+
return self.left <= pixel.x < self.right and self.top <= pixel.y < self.bottom
|
|
49
|
+
|
|
50
|
+
def contains(self, box: "Box") -> bool:
|
|
51
|
+
return self.includes(box.top_left) and self.includes(box.bottom_right)
|
|
52
|
+
|
|
53
|
+
def distance_squared(self, box: "Box") -> int:
|
|
54
|
+
return (
|
|
55
|
+
get_interval_distance((self.left, self.right), (box.left, box.right))**2
|
|
56
|
+
+ get_interval_distance((self.top, self.bottom), (box.top, box.bottom))**2
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def distance(self, pixel: Pixel) -> float | None:
|
|
60
|
+
if self.includes(pixel):
|
|
61
|
+
return 0
|
|
62
|
+
top_active = "top" not in self.inactive_sides
|
|
63
|
+
bottom_active = "bottom" not in self.inactive_sides
|
|
64
|
+
left_active = "left" not in self.inactive_sides
|
|
65
|
+
right_active = "right" not in self.inactive_sides
|
|
66
|
+
if self.left <= pixel.x < self.right:
|
|
67
|
+
if pixel.y < self.top:
|
|
68
|
+
return self.top - pixel.y if top_active else None
|
|
69
|
+
else:
|
|
70
|
+
return pixel.y - self.bottom + 1 if bottom_active else None
|
|
71
|
+
if self.top <= pixel.y < self.bottom:
|
|
72
|
+
if pixel.x < self.left:
|
|
73
|
+
return self.left - pixel.x if left_active else None
|
|
74
|
+
else:
|
|
75
|
+
return pixel.x - self.right + 1 if right_active else None
|
|
76
|
+
if pixel.x >= self.right and pixel.y >= self.bottom and (right_active or bottom_active):
|
|
77
|
+
return get_distance(pixel, self.bottom_right)
|
|
78
|
+
if pixel.x >= self.right and pixel.y < self.top and (right_active or top_active):
|
|
79
|
+
return get_distance(pixel, self.top_right)
|
|
80
|
+
if pixel.x < self.left and pixel.y >= self.bottom and (left_active or bottom_active):
|
|
81
|
+
return get_distance(pixel, self.bottom_left)
|
|
82
|
+
if pixel.x < self.left and pixel.y < self.top and (left_active or top_active):
|
|
83
|
+
return get_distance(pixel, self.top_left)
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
def with_margin(self, margin_x: int, margin_y: int):
|
|
87
|
+
return Box(
|
|
88
|
+
Pixel(self.left - margin_x, self.top - margin_y),
|
|
89
|
+
Pixel(self.right + margin_x, self.bottom + margin_y),
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def dummy(cls) -> "Box":
|
|
94
|
+
return cls(Pixel(0, 0), Pixel(0, 0))
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_interval_distance(interval1: tuple[int, int], interval2: tuple[int, int]) -> int:
|
|
98
|
+
i1, i2 = sorted((interval1, interval2))
|
|
99
|
+
return max(0, i2[0] - i1[1])
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_distance(p1: Pixel, p2: Pixel) -> float:
|
|
103
|
+
return math.sqrt((p1.x - p2.x)**2 + (p1.y - p2.y)**2)
|