rawmaker 2.40.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. letty/__init__.py +46 -0
  2. letty/cli.py +63 -0
  3. letty/optimizer.py +138 -0
  4. letty/quality/__init__.py +8 -0
  5. letty/quality/whitespace.py +50 -0
  6. letty/strategy.py +8 -0
  7. rawmaker/__init__.py +29 -0
  8. rawmaker/__main__.py +13 -0
  9. rawmaker/__patch__.py +36 -0
  10. rawmaker/cli.py +206 -0
  11. rawmaker/cli_automate.py +69 -0
  12. rawmaker/converter/__init__.py +8 -0
  13. rawmaker/converter/basic.py +174 -0
  14. rawmaker/converter/images.py +168 -0
  15. rawmaker/date.py +83 -0
  16. rawmaker/destination.py +202 -0
  17. rawmaker/error.py +34 -0
  18. rawmaker/features/__init__.py +138 -0
  19. rawmaker/features/annotation.py +254 -0
  20. rawmaker/features/border.py +172 -0
  21. rawmaker/features/boxes.py +153 -0
  22. rawmaker/features/figures.py +24 -0
  23. rawmaker/features/fonts.py +229 -0
  24. rawmaker/features/formula.py +16 -0
  25. rawmaker/features/horizontals.py +132 -0
  26. rawmaker/features/images.py +155 -0
  27. rawmaker/features/line.py +337 -0
  28. rawmaker/features/outlines.py +123 -0
  29. rawmaker/features/text.py +91 -0
  30. rawmaker/fonts/__init__.py +8 -0
  31. rawmaker/fonts/parser.py +354 -0
  32. rawmaker/images/__init__.py +8 -0
  33. rawmaker/images/info.py +35 -0
  34. rawmaker/miner/__init__.py +8 -0
  35. rawmaker/miner/char.py +42 -0
  36. rawmaker/miner/colorspace.py +75 -0
  37. rawmaker/miner/images.py +448 -0
  38. rawmaker/miner/position.py +121 -0
  39. rawmaker/miner/rawchar.py +207 -0
  40. rawmaker/miner/text.py +833 -0
  41. rawmaker/miner/underline.py +66 -0
  42. rawmaker/parameter.py +130 -0
  43. rawmaker/patch/__init__.py +8 -0
  44. rawmaker/patch/ltchar.py +79 -0
  45. rawmaker/reader.py +97 -0
  46. rawmaker/text/__init__.py +8 -0
  47. rawmaker/text/chars.py +24 -0
  48. rawmaker/text/data.py +47 -0
  49. rawmaker/text/superfast.py +91 -0
  50. rawmaker/text/wordbox.py +95 -0
  51. rawmaker/utils.py +44 -0
  52. rawmaker-2.40.3.dist-info/METADATA +51 -0
  53. rawmaker-2.40.3.dist-info/RECORD +63 -0
  54. rawmaker-2.40.3.dist-info/WHEEL +5 -0
  55. rawmaker-2.40.3.dist-info/entry_points.txt +6 -0
  56. rawmaker-2.40.3.dist-info/licenses/LICENSE +21 -0
  57. rawmaker-2.40.3.dist-info/top_level.txt +3 -0
  58. spacestation/__init__.py +18 -0
  59. spacestation/cli.py +51 -0
  60. spacestation/features/__init__.py +8 -0
  61. spacestation/features/chardist.py +85 -0
  62. spacestation/features/worddist.py +57 -0
  63. spacestation/features/wspace.py +130 -0
@@ -0,0 +1,207 @@
1
+ # =============================================================================
2
+ # C O P Y R I G H T
3
+ # -----------------------------------------------------------------------------
4
+ # Copyright (c) 2020-2023 by Helmut Konrad Schewe. All rights reserved.
5
+ # This file is property of Helmut Konrad Schewe. Any unauthorized copy,
6
+ # use or distribution is an offensive act against international law and may
7
+ # be prosecuted under federal law. Its content is company confidential.
8
+ # =============================================================================
9
+ """RawCharacter
10
+ ============
11
+
12
+ The concept of `RawItem`s aims to store the full pdf information by
13
+ current items to use them for further analysis. This information are
14
+ `rawmaker` internal and will be removed before serializing the data.
15
+ """
16
+
17
+ import contextlib
18
+
19
+ import iamraw
20
+ import pdfminer.layout
21
+
22
+
23
+ class RawChar(iamraw.Char):
24
+
25
+ def __init__(self, ltchar: pdfminer.layout.LTChar, **kwargs):
26
+ super().__init__(**kwargs)
27
+ self.ltchar = ltchar
28
+
29
+
30
+ class RawUnicodeChar(iamraw.UnicodeChar):
31
+
32
+ def __init__(self, ltchar: pdfminer.layout.LTChar, **kwargs):
33
+ super().__init__(**kwargs)
34
+ self.ltchar = ltchar
35
+
36
+
37
+ def special_char(item: str, fontname: str = None) -> str:
38
+ """\
39
+ >>> special_char('š')
40
+ 's'
41
+ >>> special_char('é')
42
+ 'e'
43
+ >>> special_char('∗')
44
+ '*'
45
+ >>> special_char('ff')
46
+ 'ff'
47
+ """
48
+ if not item:
49
+ return None
50
+ if 27 <= ord(item[0]) <= 128:
51
+ return item
52
+ if fontname and 'LMMath' in fontname:
53
+ with contextlib.suppress(KeyError):
54
+ return SPECIAL_CHARS_LMMath[item]
55
+ if fontname and 'NPPQGQ' in fontname:
56
+ with contextlib.suppress(KeyError):
57
+ return SPECIAL_CHARS_NPPQGQ[item]
58
+ with contextlib.suppress(KeyError):
59
+ return SPECIAL_CHARS_TABLE[item]
60
+ return None
61
+
62
+
63
+ def special_chars(text: str) -> str:
64
+ """\
65
+ >>> special_chars('Řůř')
66
+ 'Rur'
67
+ >>> special_chars('öäüÖÄÜ')
68
+ 'öäüÖÄÜ'
69
+ """
70
+ collected = []
71
+ for char in text:
72
+ converted = special_char(char)
73
+ if converted is None:
74
+ continue
75
+ collected.append(converted)
76
+ result = ''.join(collected)
77
+ return result
78
+
79
+
80
+ def parse_special_chars(table: str) -> dict:
81
+ result = {
82
+ line.split()[0]: line.split()[1]
83
+ for line in table.strip().splitlines()
84
+ if line and not line.strip().startswith('#')
85
+ }
86
+ return result
87
+
88
+
89
+ SPECIAL_CHARS_LMMath = parse_special_chars("""
90
+ \u03B1 α # alpha
91
+ \u03B2 β # beta
92
+ # \u2211 − # minus
93
+ \u2206 ∆
94
+ \u223c ∼
95
+ \u2212 − # minus
96
+ \u03c0 π
97
+ \u03c6 φ
98
+ \u03c9 ω
99
+ \u25e6 ◦
100
+ \u03c4 τ
101
+ \u03c1 ρ
102
+ \xb7 ·
103
+ \xb5 µ
104
+ # \u03B1 a # alpha
105
+ # \u03B2 b # beta
106
+ # \u2212 - # minus
107
+ # \u03c0 p
108
+ # \u03c6 o
109
+ # \u03c9 w
110
+ """)
111
+
112
+ # TODO: HC_DISS_171_P9
113
+ # TODO: CHECK CRAZY FONT
114
+ SPECIAL_CHARS_NPPQGQ = parse_special_chars("""
115
+ """)
116
+ # 11 is VT VERTICAL TAB
117
+ SPECIAL_CHARS_NPPQGQ['\x0b'] = 'ff'
118
+
119
+ # TODO: REQUIRE BETTER APPROACH OF REPLACING `LEGATURES`
120
+ SPECIAL_CHARS_TABLE = parse_special_chars("""
121
+ # legiaturen
122
+ \uFB00 ff
123
+ \uFB01 fi
124
+ \uFB02 fl
125
+ \uFB03 ffi
126
+
127
+ \u2217 * # hcdiss171p9
128
+ \x03 * # hcdiss171p9
129
+
130
+ \xA8 ¨
131
+
132
+ # umlaute
133
+ \xC4 Ä
134
+ \xD6 Ö
135
+ \xDC Ü
136
+ \xE4 ä
137
+ \xF6 ö
138
+ \xFC ü
139
+
140
+ \u0161 s š
141
+ \xE9 e é
142
+
143
+ \xa1 i ¡
144
+ \xc0 A À
145
+ \xc1 A Á
146
+ \xc2 A Â
147
+ \xc3 A Ã
148
+ # \xc4 A Ä
149
+ \xc5 A Å
150
+ \xc6 A Æ
151
+ \xc7 C Ç
152
+ \xc8 E È
153
+ \xc9 E É
154
+ \xca E Ê
155
+ \xcb E Ë
156
+ \xcc I Ì
157
+ \xcd I Í
158
+ \xce I Î
159
+ \xcf I Ï
160
+ \xd0 D Ð
161
+ \xd1 N Ñ
162
+ \xd2 O Ò
163
+ \xd3 O Ó
164
+ \xd4 O Ô
165
+ \xd5 O Õ
166
+ # \xd6 O Ö
167
+ \xd8 O Ø
168
+ \xd9 U Ù
169
+ \xda U Ú
170
+ \xdb U Û
171
+ # \xdc U Ü
172
+ \xdd Y Ý
173
+ \xe0 a à
174
+ \xe1 a á
175
+ \xe2 a â
176
+ \xe3 a ã
177
+ # \xe4 a ä
178
+ \xe5 a å
179
+ \xe6 a æ
180
+ \xe7 c ç
181
+ \xe8 e è
182
+ \xe9 e é
183
+ \xea e ê
184
+ \xeb e ë
185
+ \xec l ì
186
+ \xed l í
187
+ \xee l î
188
+ \xef l ï
189
+ \xf0 o ð
190
+ \xf1 n ñ
191
+ \xf2 o ò
192
+ \xf3 o ó
193
+ \xf4 o ô
194
+ \xf5 o õ
195
+ # \xf6 ö ö
196
+ \xf8 o ø
197
+ \xf9 u ù
198
+ \xfa u ú
199
+ \xfb u û
200
+ # \xfc ü ü
201
+ \xfd y ý
202
+ \xff y ÿ
203
+ Ř R
204
+ ř r
205
+ ů u
206
+ Ů U
207
+ """)