ebcdic 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ebcdic-2.0.0/LICENSE.txt +24 -0
- ebcdic-2.0.0/PKG-INFO +128 -0
- ebcdic-2.0.0/README.md +101 -0
- ebcdic-2.0.0/ebcdic/__init__.py +99 -0
- ebcdic-2.0.0/ebcdic/_version.py +2 -0
- ebcdic-2.0.0/ebcdic/cp037.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1025.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1047.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1097.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1112.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1122.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1123.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1140.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1141.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1142.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1143.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1144.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1145.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1146.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1147.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1148.py +311 -0
- ebcdic-2.0.0/ebcdic/cp1148ms.py +314 -0
- ebcdic-2.0.0/ebcdic/cp1149.py +311 -0
- ebcdic-2.0.0/ebcdic/cp273.py +311 -0
- ebcdic-2.0.0/ebcdic/cp277.py +311 -0
- ebcdic-2.0.0/ebcdic/cp278.py +311 -0
- ebcdic-2.0.0/ebcdic/cp280.py +311 -0
- ebcdic-2.0.0/ebcdic/cp284.py +311 -0
- ebcdic-2.0.0/ebcdic/cp285.py +311 -0
- ebcdic-2.0.0/ebcdic/cp290.py +311 -0
- ebcdic-2.0.0/ebcdic/cp297.py +311 -0
- ebcdic-2.0.0/ebcdic/cp420.py +311 -0
- ebcdic-2.0.0/ebcdic/cp424.py +311 -0
- ebcdic-2.0.0/ebcdic/cp500.py +311 -0
- ebcdic-2.0.0/ebcdic/cp500ms.py +315 -0
- ebcdic-2.0.0/ebcdic/cp833.py +311 -0
- ebcdic-2.0.0/ebcdic/cp838.py +311 -0
- ebcdic-2.0.0/ebcdic/cp870.py +311 -0
- ebcdic-2.0.0/ebcdic/cp871.py +311 -0
- ebcdic-2.0.0/ebcdic/cp875.py +311 -0
- ebcdic-2.0.0/ebcdic/examples/convert.py +19 -0
- ebcdic-2.0.0/ebcdic/test/__init__.py +1 -0
- ebcdic-2.0.0/ebcdic/test/test_ebcdic.py +59 -0
- ebcdic-2.0.0/pyproject.toml +44 -0
ebcdic-2.0.0/LICENSE.txt
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Copyright (c) 2013 - 2026, Thomas Aglassinger
|
|
2
|
+
All rights reserved.
|
|
3
|
+
|
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
|
6
|
+
|
|
7
|
+
* Redistributions of source code must retain the above copyright notice,
|
|
8
|
+
this list of conditions and the following disclaimer.
|
|
9
|
+
|
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
|
12
|
+
and/or other materials provided with the distribution.
|
|
13
|
+
|
|
14
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
15
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
16
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
17
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
18
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
19
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
20
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
21
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
22
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
23
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
24
|
+
POSSIBILITY OF SUCH DAMAGE.
|
ebcdic-2.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ebcdic
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: Additional EBCDIC codecs
|
|
5
|
+
Keywords: codec,text,unicode,ebcdic
|
|
6
|
+
Author-email: Thomas Aglassinger <roskakori@users.sourceforge.net>
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-Expression: BSD-2-Clause
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Topic :: Text Processing
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
License-File: LICENSE.txt
|
|
20
|
+
Project-URL: Changes, https://github.com/roskakori/CodecMapper/blob/master/CHANGES.md
|
|
21
|
+
Project-URL: Documentation, https://github.com/roskakori/CodecMapper/blob/master/ebcdic/README.md
|
|
22
|
+
Project-URL: Donate, https://roskakori.at/donate/donate-python-ebcdic/
|
|
23
|
+
Project-URL: Homepage, https://pypi.python.org/pypi/ebcdic
|
|
24
|
+
Project-URL: Issue Tracker, https://github.com/roskakori/CodecMapper/issues
|
|
25
|
+
Project-URL: Source, https://github.com/roskakori/CodecMapper.git
|
|
26
|
+
|
|
27
|
+
# ebcdic
|
|
28
|
+
|
|
29
|
+
`ebcdic` is a Python package adding additional EBCDIC codecs for data exchange with legacy systems.
|
|
30
|
+
|
|
31
|
+
[EBCDIC](https://en.wikipedia.org/wiki/EBCDIC) is short for "Extended Binary Coded Decimal Interchange Code" and is a family of character encodings that is mainly used on mainframe computers. There is no real point in using it unless you have to exchange data with legacy systems.
|
|
32
|
+
|
|
33
|
+
This package requires Python 3.9 or later.
|
|
34
|
+
|
|
35
|
+
For compatibility with Python 2.7 to 3.3, use [version 1.1.1](https://pypi.org/project/ebcdic/1.1.1/).
|
|
36
|
+
|
|
37
|
+
For compatibility with Python 2.6 to 3.2, use [version 1.0.0](https://pypi.org/project/ebcdic/1.0.0/).
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
The `ebcdic` package is available from <https://pypi.python.org/pypi/ebcdic> and can be installed using pip:
|
|
42
|
+
|
|
43
|
+
```console
|
|
44
|
+
pip install ebcdic
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Example usage
|
|
48
|
+
|
|
49
|
+
To encode `'hello world'` on EBCDIC systems in German-speaking countries, use:
|
|
50
|
+
|
|
51
|
+
```pycon
|
|
52
|
+
>>> import ebcdic
|
|
53
|
+
>>> 'hello world'.encode('cp1141')
|
|
54
|
+
b'\x88\x85\x93\x93\x96@\xa6\x96\x99\x93\x84O'
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Supported codecs
|
|
58
|
+
|
|
59
|
+
The `ebcdic` package includes EBCDIC codecs for the following regions:
|
|
60
|
+
|
|
61
|
+
- cp290 - Japan (Katakana)
|
|
62
|
+
- cp420 - Arabic bilingual
|
|
63
|
+
- cp424 - Israel (Hebrew)
|
|
64
|
+
- cp833 - Korea Extended (single byte)
|
|
65
|
+
- cp838 - Thailand
|
|
66
|
+
- cp870 - Eastern Europe (Poland, Hungary, Czech, Slovakia, Slovenia, Croatian, Serbia, Bulgarian); represents Latin-2
|
|
67
|
+
- cp1097 - Iran (Farsi)
|
|
68
|
+
- cp1140 - Australia, Brazil, Canada, New Zealand, Portugal, South Africa, USA
|
|
69
|
+
- cp1141 - Austria, Germany, Switzerland
|
|
70
|
+
- cp1142 - Denmark, Norway
|
|
71
|
+
- cp1143 - Finland, Sweden
|
|
72
|
+
- cp1144 - Italy
|
|
73
|
+
- cp1145 - Latin America, Spain
|
|
74
|
+
- cp1146 - Great Britain, Ireland, North Ireland
|
|
75
|
+
- cp1147 - France
|
|
76
|
+
- cp1148 - International
|
|
77
|
+
- cp1148ms - International, Microsoft interpretation; similar to cp1148 except that 0x15 is mapped to 0x85 ("next line") instead of 0x0a ("linefeed")
|
|
78
|
+
- cp1149 - Iceland
|
|
79
|
+
|
|
80
|
+
It also includes legacy codecs:
|
|
81
|
+
|
|
82
|
+
- cp037 - Australia, Brazil, Canada, New Zealand, Portugal, South Africa; similar to cp1140 but without Euro sign
|
|
83
|
+
- cp273 - Austria, Germany, Switzerland; similar to cp1141 but without Euro sign
|
|
84
|
+
- cp277 - Denmark, Norway; similar to cp1142 but without Euro sign
|
|
85
|
+
- cp278 - Finland, Sweden; similar to cp1143 but without Euro sign
|
|
86
|
+
- cp280 - Italy; similar to cp1141 but without Euro sign
|
|
87
|
+
- cp284 - Latin America, Spain; similar to cp1145 but without Euro sign
|
|
88
|
+
- cp285 - Great Britain, Ireland, North Ireland; similar to cp1146 but without Euro sign
|
|
89
|
+
- cp297 - France; similar to cp1147 but without Euro sign
|
|
90
|
+
- cp500 - International; similar to cp1148 but without Euro sign
|
|
91
|
+
- cp500ms - International, Microsoft interpretation; identical to codecs.cp500 similar to ebcdic.cp500 except that 0x15 is mapped to 0x85 ("next line") instead of 0x0a ("linefeed")
|
|
92
|
+
- cp871 - Iceland; similar to cp1149 but without Euro sign
|
|
93
|
+
- cp875 - Greece; similar to cp9067 but without Euro sign and a few other characters
|
|
94
|
+
- cp1025 - Cyrillic
|
|
95
|
+
- cp1047 - Open Systems (MVS C compiler)
|
|
96
|
+
- cp1112 - Estonia, Latvia, Lithuania (Baltic)
|
|
97
|
+
- cp1122 - Estonia; similar to cp1157 but without Euro sign
|
|
98
|
+
- cp1123 - Ukraine; similar to cp1158 but without Euro sign
|
|
99
|
+
|
|
100
|
+
Codecs in the standard library overrule some of these codecs. At the time of this writing this concerns cp037, cp273 (since 3.4), cp500 and cp1140.
|
|
101
|
+
|
|
102
|
+
To see get a list of EBCDIC codecs that are already provided by different sources, use `ebcdic.ignored_codec_names()`. For example, with Python 3.13 the result is:
|
|
103
|
+
|
|
104
|
+
```pycon
|
|
105
|
+
>>> ebcdic.ignored_codec_names()
|
|
106
|
+
['cp037', 'cp1140', 'cp273', 'cp424', 'cp500', 'cp875']
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Unsupported codecs
|
|
110
|
+
|
|
111
|
+
According to a [comprehensive list of code pages](https://www.aivosto.com/articles/charsets-codepages.html), there are additional codecs this package does not support yet. Possible reasons and solutions are:
|
|
112
|
+
|
|
113
|
+
1. It's a double byte codec e.g., cp834 (Korea). Technically `CodecMapper` can support them by increasing the mapping size from 256 to 65536. Due to lack of test data and access to Asian mainframes this was deemed too experimental for now.
|
|
114
|
+
2. The codec contains combining characters e.g., cp1132 (Lao) which allows representing more than 256 characters combining several characters.
|
|
115
|
+
3. Java does not include a mapping for the respective code page e.g., cp410/880 (Cyrillic). You can add such a codec based on the information found at the link above and submit an enhancement request for the Java standard library. Once it is released, add the new codec to the `build.xml` as described below.
|
|
116
|
+
4. I missed a codec. Open an issue on GitHub at <https://github.com/roskakori/CodecMapper/issues>, and it will be added with the next version.
|
|
117
|
+
|
|
118
|
+
## Source code
|
|
119
|
+
|
|
120
|
+
These codecs have been generated using CodecMapper, available from <https://github.com/roskakori/CodecMapper>. Read the [CONTRIBUTING.md](https://github.com/roskakori/CodecMapper/blob/master/CONTRIBUTING.md) to build the ebcdic package from the source and learn how to add more codecs.
|
|
121
|
+
|
|
122
|
+
## License
|
|
123
|
+
|
|
124
|
+
Copyright (c) 2013 - 2026, Thomas Aglassinger
|
|
125
|
+
All rights reserved.
|
|
126
|
+
|
|
127
|
+
Distributed under the BSD license, see [LICENSE.txt](https://github.com/roskakori/CodecMapper/blob/master/LICENSE.txt) for more information.
|
|
128
|
+
|
ebcdic-2.0.0/README.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# ebcdic
|
|
2
|
+
|
|
3
|
+
`ebcdic` is a Python package adding additional EBCDIC codecs for data exchange with legacy systems.
|
|
4
|
+
|
|
5
|
+
[EBCDIC](https://en.wikipedia.org/wiki/EBCDIC) is short for "Extended Binary Coded Decimal Interchange Code" and is a family of character encodings that is mainly used on mainframe computers. There is no real point in using it unless you have to exchange data with legacy systems.
|
|
6
|
+
|
|
7
|
+
This package requires Python 3.9 or later.
|
|
8
|
+
|
|
9
|
+
For compatibility with Python 2.7 to 3.3, use [version 1.1.1](https://pypi.org/project/ebcdic/1.1.1/).
|
|
10
|
+
|
|
11
|
+
For compatibility with Python 2.6 to 3.2, use [version 1.0.0](https://pypi.org/project/ebcdic/1.0.0/).
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
The `ebcdic` package is available from <https://pypi.python.org/pypi/ebcdic> and can be installed using pip:
|
|
16
|
+
|
|
17
|
+
```console
|
|
18
|
+
pip install ebcdic
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Example usage
|
|
22
|
+
|
|
23
|
+
To encode `'hello world'` on EBCDIC systems in German-speaking countries, use:
|
|
24
|
+
|
|
25
|
+
```pycon
|
|
26
|
+
>>> import ebcdic
|
|
27
|
+
>>> 'hello world'.encode('cp1141')
|
|
28
|
+
b'\x88\x85\x93\x93\x96@\xa6\x96\x99\x93\x84O'
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Supported codecs
|
|
32
|
+
|
|
33
|
+
The `ebcdic` package includes EBCDIC codecs for the following regions:
|
|
34
|
+
|
|
35
|
+
- cp290 - Japan (Katakana)
|
|
36
|
+
- cp420 - Arabic bilingual
|
|
37
|
+
- cp424 - Israel (Hebrew)
|
|
38
|
+
- cp833 - Korea Extended (single byte)
|
|
39
|
+
- cp838 - Thailand
|
|
40
|
+
- cp870 - Eastern Europe (Poland, Hungary, Czech, Slovakia, Slovenia, Croatian, Serbia, Bulgarian); represents Latin-2
|
|
41
|
+
- cp1097 - Iran (Farsi)
|
|
42
|
+
- cp1140 - Australia, Brazil, Canada, New Zealand, Portugal, South Africa, USA
|
|
43
|
+
- cp1141 - Austria, Germany, Switzerland
|
|
44
|
+
- cp1142 - Denmark, Norway
|
|
45
|
+
- cp1143 - Finland, Sweden
|
|
46
|
+
- cp1144 - Italy
|
|
47
|
+
- cp1145 - Latin America, Spain
|
|
48
|
+
- cp1146 - Great Britain, Ireland, North Ireland
|
|
49
|
+
- cp1147 - France
|
|
50
|
+
- cp1148 - International
|
|
51
|
+
- cp1148ms - International, Microsoft interpretation; similar to cp1148 except that 0x15 is mapped to 0x85 ("next line") instead of 0x0a ("linefeed")
|
|
52
|
+
- cp1149 - Iceland
|
|
53
|
+
|
|
54
|
+
It also includes legacy codecs:
|
|
55
|
+
|
|
56
|
+
- cp037 - Australia, Brazil, Canada, New Zealand, Portugal, South Africa; similar to cp1140 but without Euro sign
|
|
57
|
+
- cp273 - Austria, Germany, Switzerland; similar to cp1141 but without Euro sign
|
|
58
|
+
- cp277 - Denmark, Norway; similar to cp1142 but without Euro sign
|
|
59
|
+
- cp278 - Finland, Sweden; similar to cp1143 but without Euro sign
|
|
60
|
+
- cp280 - Italy; similar to cp1141 but without Euro sign
|
|
61
|
+
- cp284 - Latin America, Spain; similar to cp1145 but without Euro sign
|
|
62
|
+
- cp285 - Great Britain, Ireland, North Ireland; similar to cp1146 but without Euro sign
|
|
63
|
+
- cp297 - France; similar to cp1147 but without Euro sign
|
|
64
|
+
- cp500 - International; similar to cp1148 but without Euro sign
|
|
65
|
+
- cp500ms - International, Microsoft interpretation; identical to codecs.cp500 similar to ebcdic.cp500 except that 0x15 is mapped to 0x85 ("next line") instead of 0x0a ("linefeed")
|
|
66
|
+
- cp871 - Iceland; similar to cp1149 but without Euro sign
|
|
67
|
+
- cp875 - Greece; similar to cp9067 but without Euro sign and a few other characters
|
|
68
|
+
- cp1025 - Cyrillic
|
|
69
|
+
- cp1047 - Open Systems (MVS C compiler)
|
|
70
|
+
- cp1112 - Estonia, Latvia, Lithuania (Baltic)
|
|
71
|
+
- cp1122 - Estonia; similar to cp1157 but without Euro sign
|
|
72
|
+
- cp1123 - Ukraine; similar to cp1158 but without Euro sign
|
|
73
|
+
|
|
74
|
+
Codecs in the standard library overrule some of these codecs. At the time of this writing this concerns cp037, cp273 (since 3.4), cp500 and cp1140.
|
|
75
|
+
|
|
76
|
+
To see get a list of EBCDIC codecs that are already provided by different sources, use `ebcdic.ignored_codec_names()`. For example, with Python 3.13 the result is:
|
|
77
|
+
|
|
78
|
+
```pycon
|
|
79
|
+
>>> ebcdic.ignored_codec_names()
|
|
80
|
+
['cp037', 'cp1140', 'cp273', 'cp424', 'cp500', 'cp875']
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Unsupported codecs
|
|
84
|
+
|
|
85
|
+
According to a [comprehensive list of code pages](https://www.aivosto.com/articles/charsets-codepages.html), there are additional codecs this package does not support yet. Possible reasons and solutions are:
|
|
86
|
+
|
|
87
|
+
1. It's a double byte codec e.g., cp834 (Korea). Technically `CodecMapper` can support them by increasing the mapping size from 256 to 65536. Due to lack of test data and access to Asian mainframes this was deemed too experimental for now.
|
|
88
|
+
2. The codec contains combining characters e.g., cp1132 (Lao) which allows representing more than 256 characters combining several characters.
|
|
89
|
+
3. Java does not include a mapping for the respective code page e.g., cp410/880 (Cyrillic). You can add such a codec based on the information found at the link above and submit an enhancement request for the Java standard library. Once it is released, add the new codec to the `build.xml` as described below.
|
|
90
|
+
4. I missed a codec. Open an issue on GitHub at <https://github.com/roskakori/CodecMapper/issues>, and it will be added with the next version.
|
|
91
|
+
|
|
92
|
+
## Source code
|
|
93
|
+
|
|
94
|
+
These codecs have been generated using CodecMapper, available from <https://github.com/roskakori/CodecMapper>. Read the [CONTRIBUTING.md](https://github.com/roskakori/CodecMapper/blob/master/CONTRIBUTING.md) to build the ebcdic package from the source and learn how to add more codecs.
|
|
95
|
+
|
|
96
|
+
## License
|
|
97
|
+
|
|
98
|
+
Copyright (c) 2013 - 2026, Thomas Aglassinger
|
|
99
|
+
All rights reserved.
|
|
100
|
+
|
|
101
|
+
Distributed under the BSD license, see [LICENSE.txt](https://github.com/roskakori/CodecMapper/blob/master/LICENSE.txt) for more information.
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EBCDIC codecs for data exchange with legacy systems.
|
|
3
|
+
|
|
4
|
+
For more information, visit <https://pypi.python.org/pypi/ebcdic/>.
|
|
5
|
+
"""
|
|
6
|
+
# Copyright (c) 2013 - 2026, Thomas Aglassinger
|
|
7
|
+
# All rights reserved.
|
|
8
|
+
#
|
|
9
|
+
# Redistribution and use in source and binary forms, with or without
|
|
10
|
+
# modification, are permitted provided that the following conditions are met:
|
|
11
|
+
#
|
|
12
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
13
|
+
# this list of conditions and the following disclaimer.
|
|
14
|
+
#
|
|
15
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
16
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
17
|
+
# and/or other materials provided with the distribution.
|
|
18
|
+
#
|
|
19
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
22
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
23
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
24
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
25
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
26
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
27
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
28
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
29
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
30
|
+
|
|
31
|
+
import codecs
|
|
32
|
+
|
|
33
|
+
from ._version import __version__, __version_info__
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"codec_names",
|
|
37
|
+
"ignored_codec_names",
|
|
38
|
+
"lookup",
|
|
39
|
+
"__version__",
|
|
40
|
+
"__version_info__",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _codec_names():
|
|
45
|
+
"""
|
|
46
|
+
Names of the codecs included in the ebcdic package.
|
|
47
|
+
"""
|
|
48
|
+
import glob
|
|
49
|
+
import os.path
|
|
50
|
+
|
|
51
|
+
package_folder = os.path.dirname(__file__)
|
|
52
|
+
for codec_path in glob.glob(os.path.join(package_folder, "cp*.py")):
|
|
53
|
+
codec_name = os.path.splitext(os.path.basename(codec_path))[0]
|
|
54
|
+
yield codec_name
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _create_codec_name_to_info_map():
|
|
58
|
+
result = {}
|
|
59
|
+
for codec_name in codec_names:
|
|
60
|
+
codec_module = __import__("ebcdic." + codec_name, globals(), locals(), ["getregentry"])
|
|
61
|
+
result[codec_name] = codec_module.getregentry()
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _find_ebcdic_codec(code_name):
|
|
66
|
+
"""
|
|
67
|
+
The `codec.CodecInfo` matching `codec_name` provided it is part of the
|
|
68
|
+
package, otherwise `None`.
|
|
69
|
+
"""
|
|
70
|
+
return _codec_name_to_info_map.get(code_name)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def ignored_codec_names():
|
|
74
|
+
"""
|
|
75
|
+
A list of codec names in this package that are ignored because they are
|
|
76
|
+
already provided by other means, e.g. the standard library.
|
|
77
|
+
"""
|
|
78
|
+
return [
|
|
79
|
+
codec_name
|
|
80
|
+
for codec_name, codec_info in sorted(_codec_name_to_info_map.items())
|
|
81
|
+
if codec_info != codecs.lookup(codec_name)
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def lookup(codec_name):
|
|
86
|
+
"""
|
|
87
|
+
The `codecs.CodecInfo` for the EBCDIC codec `codec_name`. An unknown
|
|
88
|
+
`codecs_name` raises a `LookupError`.
|
|
89
|
+
"""
|
|
90
|
+
result = _find_ebcdic_codec(codec_name)
|
|
91
|
+
if result is None:
|
|
92
|
+
raise LookupError(f"EBCDIC codec is {codec_name!r} but must be one of: {codec_names}")
|
|
93
|
+
return result
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Names of the codecs included in the ebcdic package.
|
|
97
|
+
codec_names = sorted(_codec_names())
|
|
98
|
+
_codec_name_to_info_map = _create_codec_name_to_info_map()
|
|
99
|
+
codecs.register(_find_ebcdic_codec)
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Python Character Mapping Codec cp037 generated from 'temp/cp037.txt' with gencodec.py.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import codecs
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Codec APIs
|
|
9
|
+
class Codec(codecs.Codec):
|
|
10
|
+
def encode(self, text, errors='strict'):
|
|
11
|
+
return codecs.charmap_encode(text, errors, encoding_table)
|
|
12
|
+
|
|
13
|
+
def decode(self, data, errors='strict'):
|
|
14
|
+
return codecs.charmap_decode(data, errors, decoding_table)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class IncrementalEncoder(codecs.IncrementalEncoder):
|
|
18
|
+
def encode(self, text, final=False):
|
|
19
|
+
return codecs.charmap_encode(text, self.errors, encoding_table)[0]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class IncrementalDecoder(codecs.IncrementalDecoder):
|
|
23
|
+
def decode(self, data, final=False):
|
|
24
|
+
return codecs.charmap_decode(data, self.errors, decoding_table)[0]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class StreamWriter(Codec, codecs.StreamWriter):
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class StreamReader(Codec, codecs.StreamReader):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# encodings module API
|
|
36
|
+
def getregentry():
|
|
37
|
+
return codecs.CodecInfo(
|
|
38
|
+
name='cp037',
|
|
39
|
+
encode=Codec().encode,
|
|
40
|
+
decode=Codec().decode,
|
|
41
|
+
incrementalencoder=IncrementalEncoder,
|
|
42
|
+
incrementaldecoder=IncrementalDecoder,
|
|
43
|
+
streamreader=StreamReader,
|
|
44
|
+
streamwriter=StreamWriter,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Decoding Table
|
|
49
|
+
|
|
50
|
+
decoding_table = (
|
|
51
|
+
'\x00' # 0x00 -> NULL
|
|
52
|
+
'\x01' # 0x01 -> START OF HEADING
|
|
53
|
+
'\x02' # 0x02 -> START OF TEXT
|
|
54
|
+
'\x03' # 0x03 -> END OF TEXT
|
|
55
|
+
'\x9c' # 0x04 -> STRING TERMINATOR
|
|
56
|
+
'\t' # 0x05 -> CHARACTER TABULATION
|
|
57
|
+
'\x86' # 0x06 -> START OF SELECTED AREA
|
|
58
|
+
'\x7f' # 0x07 -> DELETE
|
|
59
|
+
'\x97' # 0x08 -> END OF GUARDED AREA
|
|
60
|
+
'\x8d' # 0x09 -> REVERSE LINE FEED
|
|
61
|
+
'\x8e' # 0x0A -> SINGLE SHIFT TWO
|
|
62
|
+
'\x0b' # 0x0B -> LINE TABULATION
|
|
63
|
+
'\x0c' # 0x0C -> FORM FEED (FF)
|
|
64
|
+
'\r' # 0x0D -> CARRIAGE RETURN (CR)
|
|
65
|
+
'\x0e' # 0x0E -> SHIFT OUT
|
|
66
|
+
'\x0f' # 0x0F -> SHIFT IN
|
|
67
|
+
'\x10' # 0x10 -> DATA LINK ESCAPE
|
|
68
|
+
'\x11' # 0x11 -> DEVICE CONTROL ONE
|
|
69
|
+
'\x12' # 0x12 -> DEVICE CONTROL TWO
|
|
70
|
+
'\x13' # 0x13 -> DEVICE CONTROL THREE
|
|
71
|
+
'\x9d' # 0x14 -> OPERATING SYSTEM COMMAND
|
|
72
|
+
'\n' # 0x15 -> LINE FEED (LF)
|
|
73
|
+
'\x08' # 0x16 -> BACKSPACE
|
|
74
|
+
'\x87' # 0x17 -> END OF SELECTED AREA
|
|
75
|
+
'\x18' # 0x18 -> CANCEL
|
|
76
|
+
'\x19' # 0x19 -> END OF MEDIUM
|
|
77
|
+
'\x92' # 0x1A -> PRIVATE USE TWO
|
|
78
|
+
'\x8f' # 0x1B -> SINGLE SHIFT THREE
|
|
79
|
+
'\x1c' # 0x1C -> INFORMATION SEPARATOR FOUR
|
|
80
|
+
'\x1d' # 0x1D -> INFORMATION SEPARATOR THREE
|
|
81
|
+
'\x1e' # 0x1E -> INFORMATION SEPARATOR TWO
|
|
82
|
+
'\x1f' # 0x1F -> INFORMATION SEPARATOR ONE
|
|
83
|
+
'\x80' # 0x20 -> PADDING CHARACTER
|
|
84
|
+
'\x81' # 0x21 -> HIGH OCTET PRESET
|
|
85
|
+
'\x82' # 0x22 -> BREAK PERMITTED HERE
|
|
86
|
+
'\x83' # 0x23 -> NO BREAK HERE
|
|
87
|
+
'\x84' # 0x24 -> LATIN 1 SUPPLEMENT 84
|
|
88
|
+
'\n' # 0x25 -> LINE FEED (LF)
|
|
89
|
+
'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
|
|
90
|
+
'\x1b' # 0x27 -> ESCAPE
|
|
91
|
+
'\x88' # 0x28 -> CHARACTER TABULATION SET
|
|
92
|
+
'\x89' # 0x29 -> CHARACTER TABULATION WITH JUSTIFICATION
|
|
93
|
+
'\x8a' # 0x2A -> LINE TABULATION SET
|
|
94
|
+
'\x8b' # 0x2B -> PARTIAL LINE FORWARD
|
|
95
|
+
'\x8c' # 0x2C -> PARTIAL LINE BACKWARD
|
|
96
|
+
'\x05' # 0x2D -> ENQUIRY
|
|
97
|
+
'\x06' # 0x2E -> ACKNOWLEDGE
|
|
98
|
+
'\x07' # 0x2F -> BEL
|
|
99
|
+
'\x90' # 0x30 -> DEVICE CONTROL STRING
|
|
100
|
+
'\x91' # 0x31 -> PRIVATE USE ONE
|
|
101
|
+
'\x16' # 0x32 -> SYNCHRONOUS IDLE
|
|
102
|
+
'\x93' # 0x33 -> SET TRANSMIT STATE
|
|
103
|
+
'\x94' # 0x34 -> CANCEL CHARACTER
|
|
104
|
+
'\x95' # 0x35 -> MESSAGE WAITING
|
|
105
|
+
'\x96' # 0x36 -> START OF GUARDED AREA
|
|
106
|
+
'\x04' # 0x37 -> END OF TRANSMISSION
|
|
107
|
+
'\x98' # 0x38 -> START OF STRING
|
|
108
|
+
'\x99' # 0x39 -> SINGLE GRAPHIC CHARACTER INTRODUCER
|
|
109
|
+
'\x9a' # 0x3A -> SINGLE CHARACTER INTRODUCER
|
|
110
|
+
'\x9b' # 0x3B -> CONTROL SEQUENCE INTRODUCER
|
|
111
|
+
'\x14' # 0x3C -> DEVICE CONTROL FOUR
|
|
112
|
+
'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
|
|
113
|
+
'\x9e' # 0x3E -> PRIVACY MESSAGE
|
|
114
|
+
'\x1a' # 0x3F -> SUBSTITUTE
|
|
115
|
+
' ' # 0x40 -> SPACE
|
|
116
|
+
'\xa0' # 0x41 -> NO-BREAK SPACE
|
|
117
|
+
'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
|
|
118
|
+
'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
|
|
119
|
+
'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
|
|
120
|
+
'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
|
|
121
|
+
'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
|
|
122
|
+
'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
|
|
123
|
+
'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
|
|
124
|
+
'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
|
|
125
|
+
'\xa2' # 0x4A -> CENT SIGN
|
|
126
|
+
'.' # 0x4B -> FULL STOP
|
|
127
|
+
'<' # 0x4C -> LESS-THAN SIGN
|
|
128
|
+
'(' # 0x4D -> LEFT PARENTHESIS
|
|
129
|
+
'+' # 0x4E -> PLUS SIGN
|
|
130
|
+
'|' # 0x4F -> VERTICAL LINE
|
|
131
|
+
'&' # 0x50 -> AMPERSAND
|
|
132
|
+
'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
|
|
133
|
+
'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
|
|
134
|
+
'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
|
|
135
|
+
'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
|
|
136
|
+
'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
|
|
137
|
+
'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
|
|
138
|
+
'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
|
|
139
|
+
'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
|
|
140
|
+
'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S
|
|
141
|
+
'!' # 0x5A -> EXCLAMATION MARK
|
|
142
|
+
'$' # 0x5B -> DOLLAR SIGN
|
|
143
|
+
'*' # 0x5C -> ASTERISK
|
|
144
|
+
')' # 0x5D -> RIGHT PARENTHESIS
|
|
145
|
+
';' # 0x5E -> SEMICOLON
|
|
146
|
+
'\xac' # 0x5F -> NOT SIGN
|
|
147
|
+
'-' # 0x60 -> HYPHEN-MINUS
|
|
148
|
+
'/' # 0x61 -> SOLIDUS
|
|
149
|
+
'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
|
150
|
+
'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
|
|
151
|
+
'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
|
|
152
|
+
'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
|
|
153
|
+
'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
|
|
154
|
+
'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
|
|
155
|
+
'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
|
|
156
|
+
'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
|
|
157
|
+
'\xa6' # 0x6A -> BROKEN BAR
|
|
158
|
+
',' # 0x6B -> COMMA
|
|
159
|
+
'%' # 0x6C -> PERCENT SIGN
|
|
160
|
+
'_' # 0x6D -> LOW LINE
|
|
161
|
+
'>' # 0x6E -> GREATER-THAN SIGN
|
|
162
|
+
'?' # 0x6F -> QUESTION MARK
|
|
163
|
+
'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
|
|
164
|
+
'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
|
|
165
|
+
'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
|
166
|
+
'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
|
|
167
|
+
'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
|
|
168
|
+
'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
|
|
169
|
+
'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
|
170
|
+
'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
|
|
171
|
+
'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
|
|
172
|
+
'`' # 0x79 -> GRAVE ACCENT
|
|
173
|
+
':' # 0x7A -> COLON
|
|
174
|
+
'#' # 0x7B -> NUMBER SIGN
|
|
175
|
+
'@' # 0x7C -> COMMERCIAL AT
|
|
176
|
+
"'" # 0x7D -> APOSTROPHE
|
|
177
|
+
'=' # 0x7E -> EQUALS SIGN
|
|
178
|
+
'"' # 0x7F -> QUOTATION MARK
|
|
179
|
+
'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
|
|
180
|
+
'a' # 0x81 -> LATIN SMALL LETTER A
|
|
181
|
+
'b' # 0x82 -> LATIN SMALL LETTER B
|
|
182
|
+
'c' # 0x83 -> LATIN SMALL LETTER C
|
|
183
|
+
'd' # 0x84 -> LATIN SMALL LETTER D
|
|
184
|
+
'e' # 0x85 -> LATIN SMALL LETTER E
|
|
185
|
+
'f' # 0x86 -> LATIN SMALL LETTER F
|
|
186
|
+
'g' # 0x87 -> LATIN SMALL LETTER G
|
|
187
|
+
'h' # 0x88 -> LATIN SMALL LETTER H
|
|
188
|
+
'i' # 0x89 -> LATIN SMALL LETTER I
|
|
189
|
+
'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
|
190
|
+
'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
|
191
|
+
'\xf0' # 0x8C -> LATIN SMALL LETTER ETH
|
|
192
|
+
'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
|
|
193
|
+
'\xfe' # 0x8E -> LATIN SMALL LETTER THORN
|
|
194
|
+
'\xb1' # 0x8F -> PLUS-MINUS SIGN
|
|
195
|
+
'\xb0' # 0x90 -> DEGREE SIGN
|
|
196
|
+
'j' # 0x91 -> LATIN SMALL LETTER J
|
|
197
|
+
'k' # 0x92 -> LATIN SMALL LETTER K
|
|
198
|
+
'l' # 0x93 -> LATIN SMALL LETTER L
|
|
199
|
+
'm' # 0x94 -> LATIN SMALL LETTER M
|
|
200
|
+
'n' # 0x95 -> LATIN SMALL LETTER N
|
|
201
|
+
'o' # 0x96 -> LATIN SMALL LETTER O
|
|
202
|
+
'p' # 0x97 -> LATIN SMALL LETTER P
|
|
203
|
+
'q' # 0x98 -> LATIN SMALL LETTER Q
|
|
204
|
+
'r' # 0x99 -> LATIN SMALL LETTER R
|
|
205
|
+
'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
|
|
206
|
+
'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
|
|
207
|
+
'\xe6' # 0x9C -> LATIN SMALL LETTER AE
|
|
208
|
+
'\xb8' # 0x9D -> CEDILLA
|
|
209
|
+
'\xc6' # 0x9E -> LATIN CAPITAL LETTER AE
|
|
210
|
+
'\xa4' # 0x9F -> CURRENCY SIGN
|
|
211
|
+
'\xb5' # 0xA0 -> MICRO SIGN
|
|
212
|
+
'~' # 0xA1 -> TILDE
|
|
213
|
+
's' # 0xA2 -> LATIN SMALL LETTER S
|
|
214
|
+
't' # 0xA3 -> LATIN SMALL LETTER T
|
|
215
|
+
'u' # 0xA4 -> LATIN SMALL LETTER U
|
|
216
|
+
'v' # 0xA5 -> LATIN SMALL LETTER V
|
|
217
|
+
'w' # 0xA6 -> LATIN SMALL LETTER W
|
|
218
|
+
'x' # 0xA7 -> LATIN SMALL LETTER X
|
|
219
|
+
'y' # 0xA8 -> LATIN SMALL LETTER Y
|
|
220
|
+
'z' # 0xA9 -> LATIN SMALL LETTER Z
|
|
221
|
+
'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
|
|
222
|
+
'\xbf' # 0xAB -> INVERTED QUESTION MARK
|
|
223
|
+
'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH
|
|
224
|
+
'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
|
|
225
|
+
'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN
|
|
226
|
+
'\xae' # 0xAF -> REGISTERED SIGN
|
|
227
|
+
'^' # 0xB0 -> CIRCUMFLEX ACCENT
|
|
228
|
+
'\xa3' # 0xB1 -> POUND SIGN
|
|
229
|
+
'\xa5' # 0xB2 -> YEN SIGN
|
|
230
|
+
'\xb7' # 0xB3 -> MIDDLE DOT
|
|
231
|
+
'\xa9' # 0xB4 -> COPYRIGHT SIGN
|
|
232
|
+
'\xa7' # 0xB5 -> SECTION SIGN
|
|
233
|
+
'\xb6' # 0xB6 -> PILCROW SIGN
|
|
234
|
+
'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
|
|
235
|
+
'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
|
|
236
|
+
'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
|
|
237
|
+
'[' # 0xBA -> LEFT SQUARE BRACKET
|
|
238
|
+
']' # 0xBB -> RIGHT SQUARE BRACKET
|
|
239
|
+
'\xaf' # 0xBC -> MACRON
|
|
240
|
+
'\xa8' # 0xBD -> DIAERESIS
|
|
241
|
+
'\xb4' # 0xBE -> ACUTE ACCENT
|
|
242
|
+
'\xd7' # 0xBF -> MULTIPLICATION SIGN
|
|
243
|
+
'{' # 0xC0 -> LEFT CURLY BRACKET
|
|
244
|
+
'A' # 0xC1 -> LATIN CAPITAL LETTER A
|
|
245
|
+
'B' # 0xC2 -> LATIN CAPITAL LETTER B
|
|
246
|
+
'C' # 0xC3 -> LATIN CAPITAL LETTER C
|
|
247
|
+
'D' # 0xC4 -> LATIN CAPITAL LETTER D
|
|
248
|
+
'E' # 0xC5 -> LATIN CAPITAL LETTER E
|
|
249
|
+
'F' # 0xC6 -> LATIN CAPITAL LETTER F
|
|
250
|
+
'G' # 0xC7 -> LATIN CAPITAL LETTER G
|
|
251
|
+
'H' # 0xC8 -> LATIN CAPITAL LETTER H
|
|
252
|
+
'I' # 0xC9 -> LATIN CAPITAL LETTER I
|
|
253
|
+
'\xad' # 0xCA -> SOFT HYPHEN
|
|
254
|
+
'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
|
|
255
|
+
'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
|
|
256
|
+
'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
|
|
257
|
+
'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
|
|
258
|
+
'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
|
|
259
|
+
'}' # 0xD0 -> RIGHT CURLY BRACKET
|
|
260
|
+
'J' # 0xD1 -> LATIN CAPITAL LETTER J
|
|
261
|
+
'K' # 0xD2 -> LATIN CAPITAL LETTER K
|
|
262
|
+
'L' # 0xD3 -> LATIN CAPITAL LETTER L
|
|
263
|
+
'M' # 0xD4 -> LATIN CAPITAL LETTER M
|
|
264
|
+
'N' # 0xD5 -> LATIN CAPITAL LETTER N
|
|
265
|
+
'O' # 0xD6 -> LATIN CAPITAL LETTER O
|
|
266
|
+
'P' # 0xD7 -> LATIN CAPITAL LETTER P
|
|
267
|
+
'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
|
|
268
|
+
'R' # 0xD9 -> LATIN CAPITAL LETTER R
|
|
269
|
+
'\xb9' # 0xDA -> SUPERSCRIPT ONE
|
|
270
|
+
'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
|
|
271
|
+
'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
|
|
272
|
+
'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
|
|
273
|
+
'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
|
|
274
|
+
'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
|
275
|
+
'\\' # 0xE0 -> REVERSE SOLIDUS
|
|
276
|
+
'\xf7' # 0xE1 -> DIVISION SIGN
|
|
277
|
+
'S' # 0xE2 -> LATIN CAPITAL LETTER S
|
|
278
|
+
'T' # 0xE3 -> LATIN CAPITAL LETTER T
|
|
279
|
+
'U' # 0xE4 -> LATIN CAPITAL LETTER U
|
|
280
|
+
'V' # 0xE5 -> LATIN CAPITAL LETTER V
|
|
281
|
+
'W' # 0xE6 -> LATIN CAPITAL LETTER W
|
|
282
|
+
'X' # 0xE7 -> LATIN CAPITAL LETTER X
|
|
283
|
+
'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
|
|
284
|
+
'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
|
|
285
|
+
'\xb2' # 0xEA -> SUPERSCRIPT TWO
|
|
286
|
+
'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
|
287
|
+
'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
|
|
288
|
+
'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
|
|
289
|
+
'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
|
|
290
|
+
'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
|
|
291
|
+
'0' # 0xF0 -> DIGIT ZERO
|
|
292
|
+
'1' # 0xF1 -> DIGIT ONE
|
|
293
|
+
'2' # 0xF2 -> DIGIT TWO
|
|
294
|
+
'3' # 0xF3 -> DIGIT THREE
|
|
295
|
+
'4' # 0xF4 -> DIGIT FOUR
|
|
296
|
+
'5' # 0xF5 -> DIGIT FIVE
|
|
297
|
+
'6' # 0xF6 -> DIGIT SIX
|
|
298
|
+
'7' # 0xF7 -> DIGIT SEVEN
|
|
299
|
+
'8' # 0xF8 -> DIGIT EIGHT
|
|
300
|
+
'9' # 0xF9 -> DIGIT NINE
|
|
301
|
+
'\xb3' # 0xFA -> SUPERSCRIPT THREE
|
|
302
|
+
'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
|
303
|
+
'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
|
|
304
|
+
'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
|
|
305
|
+
'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
|
|
306
|
+
'\x9f' # 0xFF -> APPLICATION PROGRAM COMMAND
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# Encoding table
|
|
311
|
+
encoding_table = codecs.charmap_build(decoding_table)
|