pyxllib 0.3.197__py3-none-any.whl → 0.3.200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. pyxllib/__init__.py +21 -21
  2. pyxllib/algo/__init__.py +8 -8
  3. pyxllib/algo/disjoint.py +54 -54
  4. pyxllib/algo/geo.py +541 -541
  5. pyxllib/algo/intervals.py +964 -964
  6. pyxllib/algo/matcher.py +389 -389
  7. pyxllib/algo/newbie.py +166 -166
  8. pyxllib/algo/pupil.py +629 -629
  9. pyxllib/algo/shapelylib.py +67 -67
  10. pyxllib/algo/specialist.py +241 -241
  11. pyxllib/algo/stat.py +494 -494
  12. pyxllib/algo/treelib.py +149 -149
  13. pyxllib/algo/unitlib.py +66 -66
  14. pyxllib/autogui/__init__.py +5 -5
  15. pyxllib/autogui/activewin.py +246 -246
  16. pyxllib/autogui/all.py +9 -9
  17. pyxllib/autogui/autogui.py +852 -852
  18. pyxllib/autogui/uiautolib.py +362 -362
  19. pyxllib/autogui/virtualkey.py +102 -102
  20. pyxllib/autogui/wechat.py +827 -827
  21. pyxllib/autogui/wechat_msg.py +421 -421
  22. pyxllib/autogui/wxautolib.py +84 -84
  23. pyxllib/cv/__init__.py +5 -5
  24. pyxllib/cv/expert.py +267 -267
  25. pyxllib/cv/imfile.py +159 -159
  26. pyxllib/cv/imhash.py +39 -39
  27. pyxllib/cv/pupil.py +9 -9
  28. pyxllib/cv/rgbfmt.py +1525 -1525
  29. pyxllib/cv/slidercaptcha.py +137 -137
  30. pyxllib/cv/trackbartools.py +251 -251
  31. pyxllib/cv/xlcvlib.py +1040 -1040
  32. pyxllib/cv/xlpillib.py +423 -423
  33. pyxllib/data/echarts.py +240 -240
  34. pyxllib/data/jsonlib.py +89 -89
  35. pyxllib/data/oss.py +72 -72
  36. pyxllib/data/pglib.py +1127 -1127
  37. pyxllib/data/sqlite.py +568 -568
  38. pyxllib/data/sqllib.py +297 -297
  39. pyxllib/ext/JLineViewer.py +505 -505
  40. pyxllib/ext/__init__.py +6 -6
  41. pyxllib/ext/demolib.py +246 -246
  42. pyxllib/ext/drissionlib.py +277 -277
  43. pyxllib/ext/kq5034lib.py +12 -12
  44. pyxllib/ext/old.py +663 -663
  45. pyxllib/ext/qt.py +449 -449
  46. pyxllib/ext/robustprocfile.py +497 -497
  47. pyxllib/ext/seleniumlib.py +76 -76
  48. pyxllib/ext/tk.py +173 -173
  49. pyxllib/ext/unixlib.py +827 -827
  50. pyxllib/ext/utools.py +351 -351
  51. pyxllib/ext/webhook.py +124 -119
  52. pyxllib/ext/win32lib.py +40 -40
  53. pyxllib/ext/wjxlib.py +88 -88
  54. pyxllib/ext/wpsapi.py +124 -124
  55. pyxllib/ext/xlwork.py +9 -9
  56. pyxllib/ext/yuquelib.py +1105 -1105
  57. pyxllib/file/__init__.py +17 -17
  58. pyxllib/file/docxlib.py +761 -761
  59. pyxllib/file/gitlib.py +309 -309
  60. pyxllib/file/libreoffice.py +165 -165
  61. pyxllib/file/movielib.py +148 -148
  62. pyxllib/file/newbie.py +10 -10
  63. pyxllib/file/onenotelib.py +1469 -1469
  64. pyxllib/file/packlib/__init__.py +330 -330
  65. pyxllib/file/packlib/zipfile.py +2441 -2441
  66. pyxllib/file/pdflib.py +426 -426
  67. pyxllib/file/pupil.py +185 -185
  68. pyxllib/file/specialist/__init__.py +685 -685
  69. pyxllib/file/specialist/dirlib.py +799 -799
  70. pyxllib/file/specialist/download.py +193 -193
  71. pyxllib/file/specialist/filelib.py +2829 -2829
  72. pyxllib/file/xlsxlib.py +3131 -3131
  73. pyxllib/file/xlsyncfile.py +341 -341
  74. pyxllib/prog/__init__.py +5 -5
  75. pyxllib/prog/cachetools.py +64 -64
  76. pyxllib/prog/deprecatedlib.py +233 -233
  77. pyxllib/prog/filelock.py +42 -42
  78. pyxllib/prog/ipyexec.py +253 -253
  79. pyxllib/prog/multiprogs.py +940 -940
  80. pyxllib/prog/newbie.py +451 -451
  81. pyxllib/prog/pupil.py +1197 -1197
  82. pyxllib/prog/sitepackages.py +33 -33
  83. pyxllib/prog/specialist/__init__.py +391 -391
  84. pyxllib/prog/specialist/bc.py +203 -203
  85. pyxllib/prog/specialist/browser.py +497 -497
  86. pyxllib/prog/specialist/common.py +347 -347
  87. pyxllib/prog/specialist/datetime.py +198 -198
  88. pyxllib/prog/specialist/tictoc.py +240 -240
  89. pyxllib/prog/specialist/xllog.py +180 -180
  90. pyxllib/prog/xlosenv.py +108 -108
  91. pyxllib/stdlib/__init__.py +17 -17
  92. pyxllib/stdlib/tablepyxl/__init__.py +10 -10
  93. pyxllib/stdlib/tablepyxl/style.py +303 -303
  94. pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
  95. pyxllib/text/__init__.py +8 -8
  96. pyxllib/text/ahocorasick.py +39 -39
  97. pyxllib/text/airscript.js +744 -744
  98. pyxllib/text/charclasslib.py +121 -121
  99. pyxllib/text/jiebalib.py +267 -267
  100. pyxllib/text/jinjalib.py +32 -32
  101. pyxllib/text/jsa_ai_prompt.md +271 -271
  102. pyxllib/text/jscode.py +922 -922
  103. pyxllib/text/latex/__init__.py +158 -158
  104. pyxllib/text/levenshtein.py +303 -303
  105. pyxllib/text/nestenv.py +1215 -1215
  106. pyxllib/text/newbie.py +300 -300
  107. pyxllib/text/pupil/__init__.py +8 -8
  108. pyxllib/text/pupil/common.py +1121 -1121
  109. pyxllib/text/pupil/xlalign.py +326 -326
  110. pyxllib/text/pycode.py +47 -47
  111. pyxllib/text/specialist/__init__.py +8 -8
  112. pyxllib/text/specialist/common.py +112 -112
  113. pyxllib/text/specialist/ptag.py +186 -186
  114. pyxllib/text/spellchecker.py +172 -172
  115. pyxllib/text/templates/echart_base.html +10 -10
  116. pyxllib/text/templates/highlight_code.html +16 -16
  117. pyxllib/text/templates/latex_editor.html +102 -102
  118. pyxllib/text/vbacode.py +17 -17
  119. pyxllib/text/xmllib.py +747 -747
  120. pyxllib/xl.py +42 -39
  121. pyxllib/xlcv.py +17 -17
  122. {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/METADATA +1 -1
  123. pyxllib-0.3.200.dist-info/RECORD +126 -0
  124. {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/licenses/LICENSE +190 -190
  125. pyxllib-0.3.197.dist-info/RECORD +0 -126
  126. {pyxllib-0.3.197.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +0 -0
@@ -1,2441 +1,2441 @@
1
- """
2
- Read and write ZIP files.
3
-
4
- XXX references to utf-8 need further investigation.
5
- """
6
- import binascii
7
- import functools
8
- import importlib.util
9
- import io
10
- import itertools
11
- import os
12
- import posixpath
13
- import shutil
14
- import stat
15
- import struct
16
- import sys
17
- import threading
18
- import time
19
- import contextlib
20
-
21
- try:
22
- import zlib # We may need its compression method
23
- crc32 = zlib.crc32
24
- except ImportError:
25
- zlib = None
26
- crc32 = binascii.crc32
27
-
28
- try:
29
- import bz2 # We may need its compression method
30
- except ImportError:
31
- bz2 = None
32
-
33
- try:
34
- import lzma # We may need its compression method
35
- except ImportError:
36
- lzma = None
37
-
38
- __all__ = ["BadZipFile", "BadZipfile", "error",
39
- "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
40
- "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41
- "Path"]
42
-
43
- class BadZipFile(Exception):
44
- pass
45
-
46
-
47
- class LargeZipFile(Exception):
48
- """
49
- Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50
- and those extensions are disabled.
51
- """
52
-
53
- error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
54
-
55
-
56
- ZIP64_LIMIT = (1 << 31) - 1
57
- ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
58
- ZIP_MAX_COMMENT = (1 << 16) - 1
59
-
60
- # constants for Zip file compression methods
61
- ZIP_STORED = 0
62
- ZIP_DEFLATED = 8
63
- ZIP_BZIP2 = 12
64
- ZIP_LZMA = 14
65
- # Other ZIP compression methods not supported
66
-
67
- DEFAULT_VERSION = 20
68
- ZIP64_VERSION = 45
69
- BZIP2_VERSION = 46
70
- LZMA_VERSION = 63
71
- # we recognize (but not necessarily support) all features up to that version
72
- MAX_EXTRACT_VERSION = 63
73
-
74
- # Below are some formats and associated data for reading/writing headers using
75
- # the struct module. The names and structures of headers/records are those used
76
- # in the PKWARE description of the ZIP file format:
77
- # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78
- # (URL valid as of January 2008)
79
-
80
- # The "end of central directory" structure, magic number, size, and indices
81
- # (section V.I in the format document)
82
- structEndArchive = b"<4s4H2LH"
83
- stringEndArchive = b"PK\005\006"
84
- sizeEndCentDir = struct.calcsize(structEndArchive)
85
-
86
- _ECD_SIGNATURE = 0
87
- _ECD_DISK_NUMBER = 1
88
- _ECD_DISK_START = 2
89
- _ECD_ENTRIES_THIS_DISK = 3
90
- _ECD_ENTRIES_TOTAL = 4
91
- _ECD_SIZE = 5
92
- _ECD_OFFSET = 6
93
- _ECD_COMMENT_SIZE = 7
94
- # These last two indices are not part of the structure as defined in the
95
- # spec, but they are used internally by this module as a convenience
96
- _ECD_COMMENT = 8
97
- _ECD_LOCATION = 9
98
-
99
- # The "central directory" structure, magic number, size, and indices
100
- # of entries in the structure (section V.F in the format document)
101
- structCentralDir = "<4s4B4HL2L5H2L"
102
- stringCentralDir = b"PK\001\002"
103
- sizeCentralDir = struct.calcsize(structCentralDir)
104
-
105
- # indexes of entries in the central directory structure
106
- _CD_SIGNATURE = 0
107
- _CD_CREATE_VERSION = 1
108
- _CD_CREATE_SYSTEM = 2
109
- _CD_EXTRACT_VERSION = 3
110
- _CD_EXTRACT_SYSTEM = 4
111
- _CD_FLAG_BITS = 5
112
- _CD_COMPRESS_TYPE = 6
113
- _CD_TIME = 7
114
- _CD_DATE = 8
115
- _CD_CRC = 9
116
- _CD_COMPRESSED_SIZE = 10
117
- _CD_UNCOMPRESSED_SIZE = 11
118
- _CD_FILENAME_LENGTH = 12
119
- _CD_EXTRA_FIELD_LENGTH = 13
120
- _CD_COMMENT_LENGTH = 14
121
- _CD_DISK_NUMBER_START = 15
122
- _CD_INTERNAL_FILE_ATTRIBUTES = 16
123
- _CD_EXTERNAL_FILE_ATTRIBUTES = 17
124
- _CD_LOCAL_HEADER_OFFSET = 18
125
-
126
- # The "local file header" structure, magic number, size, and indices
127
- # (section V.A in the format document)
128
- structFileHeader = "<4s2B4HL2L2H"
129
- stringFileHeader = b"PK\003\004"
130
- sizeFileHeader = struct.calcsize(structFileHeader)
131
-
132
- _FH_SIGNATURE = 0
133
- _FH_EXTRACT_VERSION = 1
134
- _FH_EXTRACT_SYSTEM = 2
135
- _FH_GENERAL_PURPOSE_FLAG_BITS = 3
136
- _FH_COMPRESSION_METHOD = 4
137
- _FH_LAST_MOD_TIME = 5
138
- _FH_LAST_MOD_DATE = 6
139
- _FH_CRC = 7
140
- _FH_COMPRESSED_SIZE = 8
141
- _FH_UNCOMPRESSED_SIZE = 9
142
- _FH_FILENAME_LENGTH = 10
143
- _FH_EXTRA_FIELD_LENGTH = 11
144
-
145
- # The "Zip64 end of central directory locator" structure, magic number, and size
146
- structEndArchive64Locator = "<4sLQL"
147
- stringEndArchive64Locator = b"PK\x06\x07"
148
- sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
149
-
150
- # The "Zip64 end of central directory" record, magic number, size, and indices
151
- # (section V.G in the format document)
152
- structEndArchive64 = "<4sQ2H2L4Q"
153
- stringEndArchive64 = b"PK\x06\x06"
154
- sizeEndCentDir64 = struct.calcsize(structEndArchive64)
155
-
156
- _CD64_SIGNATURE = 0
157
- _CD64_DIRECTORY_RECSIZE = 1
158
- _CD64_CREATE_VERSION = 2
159
- _CD64_EXTRACT_VERSION = 3
160
- _CD64_DISK_NUMBER = 4
161
- _CD64_DISK_NUMBER_START = 5
162
- _CD64_NUMBER_ENTRIES_THIS_DISK = 6
163
- _CD64_NUMBER_ENTRIES_TOTAL = 7
164
- _CD64_DIRECTORY_SIZE = 8
165
- _CD64_OFFSET_START_CENTDIR = 9
166
-
167
- _DD_SIGNATURE = 0x08074b50
168
-
169
- _EXTRA_FIELD_STRUCT = struct.Struct('<HH')
170
-
171
- def _strip_extra(extra, xids):
172
- # Remove Extra Fields with specified IDs.
173
- unpack = _EXTRA_FIELD_STRUCT.unpack
174
- modified = False
175
- buffer = []
176
- start = i = 0
177
- while i + 4 <= len(extra):
178
- xid, xlen = unpack(extra[i : i + 4])
179
- j = i + 4 + xlen
180
- if xid in xids:
181
- if i != start:
182
- buffer.append(extra[start : i])
183
- start = j
184
- modified = True
185
- i = j
186
- if not modified:
187
- return extra
188
- return b''.join(buffer)
189
-
190
- def _check_zipfile(fp):
191
- try:
192
- if _EndRecData(fp):
193
- return True # file has correct magic number
194
- except OSError:
195
- pass
196
- return False
197
-
198
- def is_zipfile(filename):
199
- """Quickly see if a file is a ZIP file by checking the magic number.
200
-
201
- The filename argument may be a file or file-like object too.
202
- """
203
- result = False
204
- try:
205
- if hasattr(filename, "read"):
206
- result = _check_zipfile(fp=filename)
207
- else:
208
- with open(filename, "rb") as fp:
209
- result = _check_zipfile(fp)
210
- except OSError:
211
- pass
212
- return result
213
-
214
- def _EndRecData64(fpin, offset, endrec):
215
- """
216
- Read the ZIP64 end-of-archive records and use that to update endrec
217
- """
218
- try:
219
- fpin.seek(offset - sizeEndCentDir64Locator, 2)
220
- except OSError:
221
- # If the seek fails, the file is not large enough to contain a ZIP64
222
- # end-of-archive record, so just return the end record we were given.
223
- return endrec
224
-
225
- data = fpin.read(sizeEndCentDir64Locator)
226
- if len(data) != sizeEndCentDir64Locator:
227
- return endrec
228
- sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
229
- if sig != stringEndArchive64Locator:
230
- return endrec
231
-
232
- if diskno != 0 or disks > 1:
233
- raise BadZipFile("zipfiles that span multiple disks are not supported")
234
-
235
- # Assume no 'zip64 extensible data'
236
- fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
237
- data = fpin.read(sizeEndCentDir64)
238
- if len(data) != sizeEndCentDir64:
239
- return endrec
240
- sig, sz, create_version, read_version, disk_num, disk_dir, \
241
- dircount, dircount2, dirsize, diroffset = \
242
- struct.unpack(structEndArchive64, data)
243
- if sig != stringEndArchive64:
244
- return endrec
245
-
246
- # Update the original endrec using data from the ZIP64 record
247
- endrec[_ECD_SIGNATURE] = sig
248
- endrec[_ECD_DISK_NUMBER] = disk_num
249
- endrec[_ECD_DISK_START] = disk_dir
250
- endrec[_ECD_ENTRIES_THIS_DISK] = dircount
251
- endrec[_ECD_ENTRIES_TOTAL] = dircount2
252
- endrec[_ECD_SIZE] = dirsize
253
- endrec[_ECD_OFFSET] = diroffset
254
- return endrec
255
-
256
-
257
- def _EndRecData(fpin):
258
- """Return data from the "End of Central Directory" record, or None.
259
-
260
- The data is a list of the nine items in the ZIP "End of central dir"
261
- record followed by a tenth item, the file seek offset of this record."""
262
-
263
- # Determine file size
264
- fpin.seek(0, 2)
265
- filesize = fpin.tell()
266
-
267
- # Check to see if this is ZIP file with no archive comment (the
268
- # "end of central directory" structure should be the last item in the
269
- # file if this is the case).
270
- try:
271
- fpin.seek(-sizeEndCentDir, 2)
272
- except OSError:
273
- return None
274
- data = fpin.read()
275
- if (len(data) == sizeEndCentDir and
276
- data[0:4] == stringEndArchive and
277
- data[-2:] == b"\000\000"):
278
- # the signature is correct and there's no comment, unpack structure
279
- endrec = struct.unpack(structEndArchive, data)
280
- endrec=list(endrec)
281
-
282
- # Append a blank comment and record start offset
283
- endrec.append(b"")
284
- endrec.append(filesize - sizeEndCentDir)
285
-
286
- # Try to read the "Zip64 end of central directory" structure
287
- return _EndRecData64(fpin, -sizeEndCentDir, endrec)
288
-
289
- # Either this is not a ZIP file, or it is a ZIP file with an archive
290
- # comment. Search the end of the file for the "end of central directory"
291
- # record signature. The comment is the last item in the ZIP file and may be
292
- # up to 64K long. It is assumed that the "end of central directory" magic
293
- # number does not appear in the comment.
294
- maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
295
- fpin.seek(maxCommentStart, 0)
296
- data = fpin.read()
297
- start = data.rfind(stringEndArchive)
298
- if start >= 0:
299
- # found the magic number; attempt to unpack and interpret
300
- recData = data[start:start+sizeEndCentDir]
301
- if len(recData) != sizeEndCentDir:
302
- # Zip file is corrupted.
303
- return None
304
- endrec = list(struct.unpack(structEndArchive, recData))
305
- commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
306
- comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
307
- endrec.append(comment)
308
- endrec.append(maxCommentStart + start)
309
-
310
- # Try to read the "Zip64 end of central directory" structure
311
- return _EndRecData64(fpin, maxCommentStart + start - filesize,
312
- endrec)
313
-
314
- # Unable to find a valid end of central directory structure
315
- return None
316
-
317
-
318
- class ZipInfo (object):
319
- """Class with attributes describing each file in the ZIP archive."""
320
-
321
- __slots__ = (
322
- 'orig_filename',
323
- 'filename',
324
- 'date_time',
325
- 'compress_type',
326
- '_compresslevel',
327
- 'comment',
328
- 'extra',
329
- 'create_system',
330
- 'create_version',
331
- 'extract_version',
332
- 'reserved',
333
- 'flag_bits',
334
- 'volume',
335
- 'internal_attr',
336
- 'external_attr',
337
- 'header_offset',
338
- 'CRC',
339
- 'compress_size',
340
- 'file_size',
341
- '_raw_time',
342
- )
343
-
344
- def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
345
- self.orig_filename = filename # Original file name in archive
346
-
347
- # Terminate the file name at the first null byte. Null bytes in file
348
- # names are used as tricks by viruses in archives.
349
- null_byte = filename.find(chr(0))
350
- if null_byte >= 0:
351
- filename = filename[0:null_byte]
352
- # This is used to ensure paths in generated ZIP files always use
353
- # forward slashes as the directory separator, as required by the
354
- # ZIP format specification.
355
- if os.sep != "/" and os.sep in filename:
356
- filename = filename.replace(os.sep, "/")
357
-
358
- self.filename = filename # Normalized file name
359
- self.date_time = date_time # year, month, day, hour, min, sec
360
-
361
- if date_time[0] < 1980:
362
- raise ValueError('ZIP does not support timestamps before 1980')
363
-
364
- # Standard values:
365
- self.compress_type = ZIP_STORED # Type of compression for the file
366
- self._compresslevel = None # Level for the compressor
367
- self.comment = b"" # Comment for each file
368
- self.extra = b"" # ZIP extra data
369
- if sys.platform == 'win32':
370
- self.create_system = 0 # System which created ZIP archive
371
- else:
372
- # Assume everything else is unix-y
373
- self.create_system = 3 # System which created ZIP archive
374
- self.create_version = DEFAULT_VERSION # Version which created ZIP archive
375
- self.extract_version = DEFAULT_VERSION # Version needed to extract archive
376
- self.reserved = 0 # Must be zero
377
- self.flag_bits = 0 # ZIP flag bits
378
- self.volume = 0 # Volume number of file header
379
- self.internal_attr = 0 # Internal attributes
380
- self.external_attr = 0 # External file attributes
381
- # Other attributes are set by class ZipFile:
382
- # header_offset Byte offset to the file header
383
- # CRC CRC-32 of the uncompressed file
384
- # compress_size Size of the compressed file
385
- # file_size Size of the uncompressed file
386
-
387
- def __repr__(self):
388
- result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
389
- if self.compress_type != ZIP_STORED:
390
- result.append(' compress_type=%s' %
391
- compressor_names.get(self.compress_type,
392
- self.compress_type))
393
- hi = self.external_attr >> 16
394
- lo = self.external_attr & 0xFFFF
395
- if hi:
396
- result.append(' filemode=%r' % stat.filemode(hi))
397
- if lo:
398
- result.append(' external_attr=%#x' % lo)
399
- isdir = self.is_dir()
400
- if not isdir or self.file_size:
401
- result.append(' file_size=%r' % self.file_size)
402
- if ((not isdir or self.compress_size) and
403
- (self.compress_type != ZIP_STORED or
404
- self.file_size != self.compress_size)):
405
- result.append(' compress_size=%r' % self.compress_size)
406
- result.append('>')
407
- return ''.join(result)
408
-
409
- def FileHeader(self, zip64=None):
410
- """Return the per-file header as a bytes object."""
411
- dt = self.date_time
412
- dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
413
- dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
414
- if self.flag_bits & 0x08:
415
- # Set these to zero because we write them after the file data
416
- CRC = compress_size = file_size = 0
417
- else:
418
- CRC = self.CRC
419
- compress_size = self.compress_size
420
- file_size = self.file_size
421
-
422
- extra = self.extra
423
-
424
- min_version = 0
425
- if zip64 is None:
426
- zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
427
- if zip64:
428
- fmt = '<HHQQ'
429
- extra = extra + struct.pack(fmt,
430
- 1, struct.calcsize(fmt)-4, file_size, compress_size)
431
- if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
432
- if not zip64:
433
- raise LargeZipFile("Filesize would require ZIP64 extensions")
434
- # File is larger than what fits into a 4 byte integer,
435
- # fall back to the ZIP64 extension
436
- file_size = 0xffffffff
437
- compress_size = 0xffffffff
438
- min_version = ZIP64_VERSION
439
-
440
- if self.compress_type == ZIP_BZIP2:
441
- min_version = max(BZIP2_VERSION, min_version)
442
- elif self.compress_type == ZIP_LZMA:
443
- min_version = max(LZMA_VERSION, min_version)
444
-
445
- self.extract_version = max(min_version, self.extract_version)
446
- self.create_version = max(min_version, self.create_version)
447
- filename, flag_bits = self._encodeFilenameFlags()
448
- header = struct.pack(structFileHeader, stringFileHeader,
449
- self.extract_version, self.reserved, flag_bits,
450
- self.compress_type, dostime, dosdate, CRC,
451
- compress_size, file_size,
452
- len(filename), len(extra))
453
- return header + filename + extra
454
-
455
- def _encodeFilenameFlags(self):
456
- try:
457
- return self.filename.encode('ascii'), self.flag_bits
458
- except UnicodeEncodeError:
459
- return self.filename.encode('utf-8'), self.flag_bits | 0x800
460
-
461
- def _decodeExtra(self):
462
- # Try to decode the extra field.
463
- extra = self.extra
464
- unpack = struct.unpack
465
- while len(extra) >= 4:
466
- tp, ln = unpack('<HH', extra[:4])
467
- if ln+4 > len(extra):
468
- raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
469
- if tp == 0x0001:
470
- if ln >= 24:
471
- counts = unpack('<QQQ', extra[4:28])
472
- elif ln == 16:
473
- counts = unpack('<QQ', extra[4:20])
474
- elif ln == 8:
475
- counts = unpack('<Q', extra[4:12])
476
- elif ln == 0:
477
- counts = ()
478
- else:
479
- raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
480
-
481
- idx = 0
482
-
483
- # ZIP64 extension (large files and/or large archives)
484
- if self.file_size in (0xffffffffffffffff, 0xffffffff):
485
- if len(counts) <= idx:
486
- raise BadZipFile(
487
- "Corrupt zip64 extra field. File size not found."
488
- )
489
- self.file_size = counts[idx]
490
- idx += 1
491
-
492
- if self.compress_size == 0xFFFFFFFF:
493
- if len(counts) <= idx:
494
- raise BadZipFile(
495
- "Corrupt zip64 extra field. Compress size not found."
496
- )
497
- self.compress_size = counts[idx]
498
- idx += 1
499
-
500
- if self.header_offset == 0xffffffff:
501
- if len(counts) <= idx:
502
- raise BadZipFile(
503
- "Corrupt zip64 extra field. Header offset not found."
504
- )
505
- old = self.header_offset
506
- self.header_offset = counts[idx]
507
- idx+=1
508
-
509
- extra = extra[ln+4:]
510
-
511
- @classmethod
512
- def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
513
- """Construct an appropriate ZipInfo for a file on the filesystem.
514
-
515
- filename should be the path to a file or directory on the filesystem.
516
-
517
- arcname is the name which it will have within the archive (by default,
518
- this will be the same as filename, but without a drive letter and with
519
- leading path separators removed).
520
- """
521
- if isinstance(filename, os.PathLike):
522
- filename = os.fspath(filename)
523
- st = os.stat(filename)
524
- isdir = stat.S_ISDIR(st.st_mode)
525
- mtime = time.localtime(st.st_mtime)
526
- date_time = mtime[0:6]
527
- if not strict_timestamps and date_time[0] < 1980:
528
- date_time = (1980, 1, 1, 0, 0, 0)
529
- elif not strict_timestamps and date_time[0] > 2107:
530
- date_time = (2107, 12, 31, 23, 59, 59)
531
- # Create ZipInfo instance to store file information
532
- if arcname is None:
533
- arcname = filename
534
- arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
535
- while arcname[0] in (os.sep, os.altsep):
536
- arcname = arcname[1:]
537
- if isdir:
538
- arcname += '/'
539
- zinfo = cls(arcname, date_time)
540
- zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
541
- if isdir:
542
- zinfo.file_size = 0
543
- zinfo.external_attr |= 0x10 # MS-DOS directory flag
544
- else:
545
- zinfo.file_size = st.st_size
546
-
547
- return zinfo
548
-
549
- def is_dir(self):
550
- """Return True if this archive member is a directory."""
551
- return self.filename[-1] == '/'
552
-
553
-
554
- # ZIP encryption uses the CRC32 one-byte primitive for scrambling some
555
- # internal keys. We noticed that a direct implementation is faster than
556
- # relying on binascii.crc32().
557
-
558
- _crctable = None
559
- def _gen_crc(crc):
560
- for j in range(8):
561
- if crc & 1:
562
- crc = (crc >> 1) ^ 0xEDB88320
563
- else:
564
- crc >>= 1
565
- return crc
566
-
567
- # ZIP supports a password-based form of encryption. Even though known
568
- # plaintext attacks have been found against it, it is still useful
569
- # to be able to get data out of such a file.
570
- #
571
- # Usage:
572
- # zd = _ZipDecrypter(mypwd)
573
- # plain_bytes = zd(cypher_bytes)
574
-
575
- def _ZipDecrypter(pwd):
576
- key0 = 305419896
577
- key1 = 591751049
578
- key2 = 878082192
579
-
580
- global _crctable
581
- if _crctable is None:
582
- _crctable = list(map(_gen_crc, range(256)))
583
- crctable = _crctable
584
-
585
- def crc32(ch, crc):
586
- """Compute the CRC32 primitive on one byte."""
587
- return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
588
-
589
- def update_keys(c):
590
- nonlocal key0, key1, key2
591
- key0 = crc32(c, key0)
592
- key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
593
- key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
594
- key2 = crc32(key1 >> 24, key2)
595
-
596
- for p in pwd:
597
- update_keys(p)
598
-
599
- def decrypter(data):
600
- """Decrypt a bytes object."""
601
- result = bytearray()
602
- append = result.append
603
- for c in data:
604
- k = key2 | 2
605
- c ^= ((k * (k^1)) >> 8) & 0xFF
606
- update_keys(c)
607
- append(c)
608
- return bytes(result)
609
-
610
- return decrypter
611
-
612
-
613
- class LZMACompressor:
614
-
615
- def __init__(self):
616
- self._comp = None
617
-
618
- def _init(self):
619
- props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
620
- self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
621
- lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
622
- ])
623
- return struct.pack('<BBH', 9, 4, len(props)) + props
624
-
625
- def compress(self, data):
626
- if self._comp is None:
627
- return self._init() + self._comp.compress(data)
628
- return self._comp.compress(data)
629
-
630
- def flush(self):
631
- if self._comp is None:
632
- return self._init() + self._comp.flush()
633
- return self._comp.flush()
634
-
635
-
636
- class LZMADecompressor:
637
-
638
- def __init__(self):
639
- self._decomp = None
640
- self._unconsumed = b''
641
- self.eof = False
642
-
643
- def decompress(self, data):
644
- if self._decomp is None:
645
- self._unconsumed += data
646
- if len(self._unconsumed) <= 4:
647
- return b''
648
- psize, = struct.unpack('<H', self._unconsumed[2:4])
649
- if len(self._unconsumed) <= 4 + psize:
650
- return b''
651
-
652
- self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
653
- lzma._decode_filter_properties(lzma.FILTER_LZMA1,
654
- self._unconsumed[4:4 + psize])
655
- ])
656
- data = self._unconsumed[4 + psize:]
657
- del self._unconsumed
658
-
659
- result = self._decomp.decompress(data)
660
- self.eof = self._decomp.eof
661
- return result
662
-
663
-
664
- compressor_names = {
665
- 0: 'store',
666
- 1: 'shrink',
667
- 2: 'reduce',
668
- 3: 'reduce',
669
- 4: 'reduce',
670
- 5: 'reduce',
671
- 6: 'implode',
672
- 7: 'tokenize',
673
- 8: 'deflate',
674
- 9: 'deflate64',
675
- 10: 'implode',
676
- 12: 'bzip2',
677
- 14: 'lzma',
678
- 18: 'terse',
679
- 19: 'lz77',
680
- 97: 'wavpack',
681
- 98: 'ppmd',
682
- }
683
-
684
- def _check_compression(compression):
685
- if compression == ZIP_STORED:
686
- pass
687
- elif compression == ZIP_DEFLATED:
688
- if not zlib:
689
- raise RuntimeError(
690
- "Compression requires the (missing) zlib module")
691
- elif compression == ZIP_BZIP2:
692
- if not bz2:
693
- raise RuntimeError(
694
- "Compression requires the (missing) bz2 module")
695
- elif compression == ZIP_LZMA:
696
- if not lzma:
697
- raise RuntimeError(
698
- "Compression requires the (missing) lzma module")
699
- else:
700
- raise NotImplementedError("That compression method is not supported")
701
-
702
-
703
- def _get_compressor(compress_type, compresslevel=None):
704
- if compress_type == ZIP_DEFLATED:
705
- if compresslevel is not None:
706
- return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
707
- return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
708
- elif compress_type == ZIP_BZIP2:
709
- if compresslevel is not None:
710
- return bz2.BZ2Compressor(compresslevel)
711
- return bz2.BZ2Compressor()
712
- # compresslevel is ignored for ZIP_LZMA
713
- elif compress_type == ZIP_LZMA:
714
- return LZMACompressor()
715
- else:
716
- return None
717
-
718
-
719
- def _get_decompressor(compress_type):
720
- _check_compression(compress_type)
721
- if compress_type == ZIP_STORED:
722
- return None
723
- elif compress_type == ZIP_DEFLATED:
724
- return zlib.decompressobj(-15)
725
- elif compress_type == ZIP_BZIP2:
726
- return bz2.BZ2Decompressor()
727
- elif compress_type == ZIP_LZMA:
728
- return LZMADecompressor()
729
- else:
730
- descr = compressor_names.get(compress_type)
731
- if descr:
732
- raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
733
- else:
734
- raise NotImplementedError("compression type %d" % (compress_type,))
735
-
736
-
737
- class _SharedFile:
738
- def __init__(self, file, pos, close, lock, writing):
739
- self._file = file
740
- self._pos = pos
741
- self._close = close
742
- self._lock = lock
743
- self._writing = writing
744
- self.seekable = file.seekable
745
- self.tell = file.tell
746
-
747
- def seek(self, offset, whence=0):
748
- with self._lock:
749
- if self._writing():
750
- raise ValueError("Can't reposition in the ZIP file while "
751
- "there is an open writing handle on it. "
752
- "Close the writing handle before trying to read.")
753
- self._file.seek(offset, whence)
754
- self._pos = self._file.tell()
755
- return self._pos
756
-
757
- def read(self, n=-1):
758
- with self._lock:
759
- if self._writing():
760
- raise ValueError("Can't read from the ZIP file while there "
761
- "is an open writing handle on it. "
762
- "Close the writing handle before trying to read.")
763
- self._file.seek(self._pos)
764
- data = self._file.read(n)
765
- self._pos = self._file.tell()
766
- return data
767
-
768
- def close(self):
769
- if self._file is not None:
770
- fileobj = self._file
771
- self._file = None
772
- self._close(fileobj)
773
-
774
- # Provide the tell method for unseekable stream
775
- class _Tellable:
776
- def __init__(self, fp):
777
- self.fp = fp
778
- self.offset = 0
779
-
780
- def write(self, data):
781
- n = self.fp.write(data)
782
- self.offset += n
783
- return n
784
-
785
- def tell(self):
786
- return self.offset
787
-
788
- def flush(self):
789
- self.fp.flush()
790
-
791
- def close(self):
792
- self.fp.close()
793
-
794
-
795
- class ZipExtFile(io.BufferedIOBase):
796
- """File-like object for reading an archive member.
797
- Is returned by ZipFile.open().
798
- """
799
-
800
- # Max size supported by decompressor.
801
- MAX_N = 1 << 31 - 1
802
-
803
- # Read from compressed files in 4k blocks.
804
- MIN_READ_SIZE = 4096
805
-
806
- # Chunk size to read during seek
807
- MAX_SEEK_READ = 1 << 24
808
-
809
- def __init__(self, fileobj, mode, zipinfo, pwd=None,
810
- close_fileobj=False):
811
- self._fileobj = fileobj
812
- self._pwd = pwd
813
- self._close_fileobj = close_fileobj
814
-
815
- self._compress_type = zipinfo.compress_type
816
- self._compress_left = zipinfo.compress_size
817
- self._left = zipinfo.file_size
818
-
819
- self._decompressor = _get_decompressor(self._compress_type)
820
-
821
- self._eof = False
822
- self._readbuffer = b''
823
- self._offset = 0
824
-
825
- self.newlines = None
826
-
827
- self.mode = mode
828
- self.name = zipinfo.filename
829
-
830
- if hasattr(zipinfo, 'CRC'):
831
- self._expected_crc = zipinfo.CRC
832
- self._running_crc = crc32(b'')
833
- else:
834
- self._expected_crc = None
835
-
836
- self._seekable = False
837
- try:
838
- if fileobj.seekable():
839
- self._orig_compress_start = fileobj.tell()
840
- self._orig_compress_size = zipinfo.compress_size
841
- self._orig_file_size = zipinfo.file_size
842
- self._orig_start_crc = self._running_crc
843
- self._seekable = True
844
- except AttributeError:
845
- pass
846
-
847
- self._decrypter = None
848
- if pwd:
849
- if zipinfo.flag_bits & 0x8:
850
- # compare against the file type from extended local headers
851
- check_byte = (zipinfo._raw_time >> 8) & 0xff
852
- else:
853
- # compare against the CRC otherwise
854
- check_byte = (zipinfo.CRC >> 24) & 0xff
855
- h = self._init_decrypter()
856
- if h != check_byte:
857
- raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
858
-
859
-
860
- def _init_decrypter(self):
861
- self._decrypter = _ZipDecrypter(self._pwd)
862
- # The first 12 bytes in the cypher stream is an encryption header
863
- # used to strengthen the algorithm. The first 11 bytes are
864
- # completely random, while the 12th contains the MSB of the CRC,
865
- # or the MSB of the file time depending on the header type
866
- # and is used to check the correctness of the password.
867
- header = self._fileobj.read(12)
868
- self._compress_left -= 12
869
- return self._decrypter(header)[11]
870
-
871
- def __repr__(self):
872
- result = ['<%s.%s' % (self.__class__.__module__,
873
- self.__class__.__qualname__)]
874
- if not self.closed:
875
- result.append(' name=%r mode=%r' % (self.name, self.mode))
876
- if self._compress_type != ZIP_STORED:
877
- result.append(' compress_type=%s' %
878
- compressor_names.get(self._compress_type,
879
- self._compress_type))
880
- else:
881
- result.append(' [closed]')
882
- result.append('>')
883
- return ''.join(result)
884
-
885
- def readline(self, limit=-1):
886
- """Read and return a line from the stream.
887
-
888
- If limit is specified, at most limit bytes will be read.
889
- """
890
-
891
- if limit < 0:
892
- # Shortcut common case - newline found in buffer.
893
- i = self._readbuffer.find(b'\n', self._offset) + 1
894
- if i > 0:
895
- line = self._readbuffer[self._offset: i]
896
- self._offset = i
897
- return line
898
-
899
- return io.BufferedIOBase.readline(self, limit)
900
-
901
- def peek(self, n=1):
902
- """Returns buffered bytes without advancing the position."""
903
- if n > len(self._readbuffer) - self._offset:
904
- chunk = self.read(n)
905
- if len(chunk) > self._offset:
906
- self._readbuffer = chunk + self._readbuffer[self._offset:]
907
- self._offset = 0
908
- else:
909
- self._offset -= len(chunk)
910
-
911
- # Return up to 512 bytes to reduce allocation overhead for tight loops.
912
- return self._readbuffer[self._offset: self._offset + 512]
913
-
914
- def readable(self):
915
- return True
916
-
917
- def read(self, n=-1):
918
- """Read and return up to n bytes.
919
- If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
920
- """
921
- if n is None or n < 0:
922
- buf = self._readbuffer[self._offset:]
923
- self._readbuffer = b''
924
- self._offset = 0
925
- while not self._eof:
926
- buf += self._read1(self.MAX_N)
927
- return buf
928
-
929
- end = n + self._offset
930
- if end < len(self._readbuffer):
931
- buf = self._readbuffer[self._offset:end]
932
- self._offset = end
933
- return buf
934
-
935
- n = end - len(self._readbuffer)
936
- buf = self._readbuffer[self._offset:]
937
- self._readbuffer = b''
938
- self._offset = 0
939
- while n > 0 and not self._eof:
940
- data = self._read1(n)
941
- if n < len(data):
942
- self._readbuffer = data
943
- self._offset = n
944
- buf += data[:n]
945
- break
946
- buf += data
947
- n -= len(data)
948
- return buf
949
-
950
- def _update_crc(self, newdata):
951
- # Update the CRC using the given data.
952
- if self._expected_crc is None:
953
- # No need to compute the CRC if we don't have a reference value
954
- return
955
- self._running_crc = crc32(newdata, self._running_crc)
956
- # Check the CRC if we're at the end of the file
957
- if self._eof and self._running_crc != self._expected_crc:
958
- raise BadZipFile("Bad CRC-32 for file %r" % self.name)
959
-
960
- def read1(self, n):
961
- """Read up to n bytes with at most one read() system call."""
962
-
963
- if n is None or n < 0:
964
- buf = self._readbuffer[self._offset:]
965
- self._readbuffer = b''
966
- self._offset = 0
967
- while not self._eof:
968
- data = self._read1(self.MAX_N)
969
- if data:
970
- buf += data
971
- break
972
- return buf
973
-
974
- end = n + self._offset
975
- if end < len(self._readbuffer):
976
- buf = self._readbuffer[self._offset:end]
977
- self._offset = end
978
- return buf
979
-
980
- n = end - len(self._readbuffer)
981
- buf = self._readbuffer[self._offset:]
982
- self._readbuffer = b''
983
- self._offset = 0
984
- if n > 0:
985
- while not self._eof:
986
- data = self._read1(n)
987
- if n < len(data):
988
- self._readbuffer = data
989
- self._offset = n
990
- buf += data[:n]
991
- break
992
- if data:
993
- buf += data
994
- break
995
- return buf
996
-
997
- def _read1(self, n):
998
- # Read up to n compressed bytes with at most one read() system call,
999
- # decrypt and decompress them.
1000
- if self._eof or n <= 0:
1001
- return b''
1002
-
1003
- # Read from file.
1004
- if self._compress_type == ZIP_DEFLATED:
1005
- ## Handle unconsumed data.
1006
- data = self._decompressor.unconsumed_tail
1007
- if n > len(data):
1008
- data += self._read2(n - len(data))
1009
- else:
1010
- data = self._read2(n)
1011
-
1012
- if self._compress_type == ZIP_STORED:
1013
- self._eof = self._compress_left <= 0
1014
- elif self._compress_type == ZIP_DEFLATED:
1015
- n = max(n, self.MIN_READ_SIZE)
1016
- data = self._decompressor.decompress(data, n)
1017
- self._eof = (self._decompressor.eof or
1018
- self._compress_left <= 0 and
1019
- not self._decompressor.unconsumed_tail)
1020
- if self._eof:
1021
- data += self._decompressor.flush()
1022
- else:
1023
- data = self._decompressor.decompress(data)
1024
- self._eof = self._decompressor.eof or self._compress_left <= 0
1025
-
1026
- data = data[:self._left]
1027
- self._left -= len(data)
1028
- if self._left <= 0:
1029
- self._eof = True
1030
- self._update_crc(data)
1031
- return data
1032
-
1033
- def _read2(self, n):
1034
- if self._compress_left <= 0:
1035
- return b''
1036
-
1037
- n = max(n, self.MIN_READ_SIZE)
1038
- n = min(n, self._compress_left)
1039
-
1040
- data = self._fileobj.read(n)
1041
- self._compress_left -= len(data)
1042
- if not data:
1043
- raise EOFError
1044
-
1045
- if self._decrypter is not None:
1046
- data = self._decrypter(data)
1047
- return data
1048
-
1049
- def close(self):
1050
- try:
1051
- if self._close_fileobj:
1052
- self._fileobj.close()
1053
- finally:
1054
- super().close()
1055
-
1056
- def seekable(self):
1057
- return self._seekable
1058
-
1059
- def seek(self, offset, whence=0):
1060
- if not self._seekable:
1061
- raise io.UnsupportedOperation("underlying stream is not seekable")
1062
- curr_pos = self.tell()
1063
- if whence == 0: # Seek from start of file
1064
- new_pos = offset
1065
- elif whence == 1: # Seek from current position
1066
- new_pos = curr_pos + offset
1067
- elif whence == 2: # Seek from EOF
1068
- new_pos = self._orig_file_size + offset
1069
- else:
1070
- raise ValueError("whence must be os.SEEK_SET (0), "
1071
- "os.SEEK_CUR (1), or os.SEEK_END (2)")
1072
-
1073
- if new_pos > self._orig_file_size:
1074
- new_pos = self._orig_file_size
1075
-
1076
- if new_pos < 0:
1077
- new_pos = 0
1078
-
1079
- read_offset = new_pos - curr_pos
1080
- buff_offset = read_offset + self._offset
1081
-
1082
- if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1083
- # Just move the _offset index if the new position is in the _readbuffer
1084
- self._offset = buff_offset
1085
- read_offset = 0
1086
- elif read_offset < 0:
1087
- # Position is before the current position. Reset the ZipExtFile
1088
- self._fileobj.seek(self._orig_compress_start)
1089
- self._running_crc = self._orig_start_crc
1090
- self._compress_left = self._orig_compress_size
1091
- self._left = self._orig_file_size
1092
- self._readbuffer = b''
1093
- self._offset = 0
1094
- self._decompressor = _get_decompressor(self._compress_type)
1095
- self._eof = False
1096
- read_offset = new_pos
1097
- if self._decrypter is not None:
1098
- self._init_decrypter()
1099
-
1100
- while read_offset > 0:
1101
- read_len = min(self.MAX_SEEK_READ, read_offset)
1102
- self.read(read_len)
1103
- read_offset -= read_len
1104
-
1105
- return self.tell()
1106
-
1107
- def tell(self):
1108
- if not self._seekable:
1109
- raise io.UnsupportedOperation("underlying stream is not seekable")
1110
- filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1111
- return filepos
1112
-
1113
-
1114
- class _ZipWriteFile(io.BufferedIOBase):
1115
- def __init__(self, zf, zinfo, zip64):
1116
- self._zinfo = zinfo
1117
- self._zip64 = zip64
1118
- self._zipfile = zf
1119
- self._compressor = _get_compressor(zinfo.compress_type,
1120
- zinfo._compresslevel)
1121
- self._file_size = 0
1122
- self._compress_size = 0
1123
- self._crc = 0
1124
-
1125
- @property
1126
- def _fileobj(self):
1127
- return self._zipfile.fp
1128
-
1129
- def writable(self):
1130
- return True
1131
-
1132
- def write(self, data):
1133
- if self.closed:
1134
- raise ValueError('I/O operation on closed file.')
1135
- nbytes = len(data)
1136
- self._file_size += nbytes
1137
- self._crc = crc32(data, self._crc)
1138
- if self._compressor:
1139
- data = self._compressor.compress(data)
1140
- self._compress_size += len(data)
1141
- self._fileobj.write(data)
1142
- return nbytes
1143
-
1144
- def close(self):
1145
- if self.closed:
1146
- return
1147
- try:
1148
- super().close()
1149
- # Flush any data from the compressor, and update header info
1150
- if self._compressor:
1151
- buf = self._compressor.flush()
1152
- self._compress_size += len(buf)
1153
- self._fileobj.write(buf)
1154
- self._zinfo.compress_size = self._compress_size
1155
- else:
1156
- self._zinfo.compress_size = self._file_size
1157
- self._zinfo.CRC = self._crc
1158
- self._zinfo.file_size = self._file_size
1159
-
1160
- # Write updated header info
1161
- if self._zinfo.flag_bits & 0x08:
1162
- # Write CRC and file sizes after the file data
1163
- fmt = '<LLQQ' if self._zip64 else '<LLLL'
1164
- self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1165
- self._zinfo.compress_size, self._zinfo.file_size))
1166
- self._zipfile.start_dir = self._fileobj.tell()
1167
- else:
1168
- if not self._zip64:
1169
- if self._file_size > ZIP64_LIMIT:
1170
- raise RuntimeError(
1171
- 'File size unexpectedly exceeded ZIP64 limit')
1172
- if self._compress_size > ZIP64_LIMIT:
1173
- raise RuntimeError(
1174
- 'Compressed size unexpectedly exceeded ZIP64 limit')
1175
- # Seek backwards and write file header (which will now include
1176
- # correct CRC and file sizes)
1177
-
1178
- # Preserve current position in file
1179
- self._zipfile.start_dir = self._fileobj.tell()
1180
- self._fileobj.seek(self._zinfo.header_offset)
1181
- self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1182
- self._fileobj.seek(self._zipfile.start_dir)
1183
-
1184
- # Successfully written: Add file to our caches
1185
- self._zipfile.filelist.append(self._zinfo)
1186
- self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1187
- finally:
1188
- self._zipfile._writing = False
1189
-
1190
-
1191
-
1192
- class ZipFile:
1193
- """ Class with methods to open, read, write, close, list zip files.
1194
-
1195
- z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1196
- compresslevel=None)
1197
-
1198
- file: Either the path to the file, or a file-like object.
1199
- If it is a path, the file will be opened and closed by ZipFile.
1200
- mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1201
- or append 'a'.
1202
- compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1203
- ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1204
- allowZip64: if True ZipFile will create files with ZIP64 extensions when
1205
- needed, otherwise it will raise an exception when this would
1206
- be necessary.
1207
- compresslevel: None (default for the given compression type) or an integer
1208
- specifying the level to pass to the compressor.
1209
- When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1210
- When using ZIP_DEFLATED integers 0 through 9 are accepted.
1211
- When using ZIP_BZIP2 integers 1 through 9 are accepted.
1212
-
1213
- """
1214
-
1215
- fp = None # Set here since __del__ checks it
1216
- _windows_illegal_name_trans_table = None
1217
-
1218
- def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1219
- compresslevel=None, *, strict_timestamps=True):
1220
- """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1221
- or append 'a'."""
1222
- if mode not in ('r', 'w', 'x', 'a'):
1223
- raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1224
-
1225
- _check_compression(compression)
1226
-
1227
- self._allowZip64 = allowZip64
1228
- self._didModify = False
1229
- self.debug = 0 # Level of printing: 0 through 3
1230
- self.NameToInfo = {} # Find file info given name
1231
- self.filelist = [] # List of ZipInfo instances for archive
1232
- self.compression = compression # Method of compression
1233
- self.compresslevel = compresslevel
1234
- self.mode = mode
1235
- self.pwd = None
1236
- self._comment = b''
1237
- self._strict_timestamps = strict_timestamps
1238
-
1239
- # Check if we were passed a file-like object
1240
- if isinstance(file, os.PathLike):
1241
- file = os.fspath(file)
1242
- if isinstance(file, str):
1243
- # No, it's a filename
1244
- self._filePassed = 0
1245
- self.filename = file
1246
- modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1247
- 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1248
- filemode = modeDict[mode]
1249
- while True:
1250
- try:
1251
- self.fp = io.open(file, filemode)
1252
- except OSError:
1253
- if filemode in modeDict:
1254
- filemode = modeDict[filemode]
1255
- continue
1256
- raise
1257
- break
1258
- else:
1259
- self._filePassed = 1
1260
- self.fp = file
1261
- self.filename = getattr(file, 'name', None)
1262
- self._fileRefCnt = 1
1263
- self._lock = threading.RLock()
1264
- self._seekable = True
1265
- self._writing = False
1266
-
1267
- try:
1268
- if mode == 'r':
1269
- self._RealGetContents()
1270
- elif mode in ('w', 'x'):
1271
- # set the modified flag so central directory gets written
1272
- # even if no files are added to the archive
1273
- self._didModify = True
1274
- try:
1275
- self.start_dir = self.fp.tell()
1276
- except (AttributeError, OSError):
1277
- self.fp = _Tellable(self.fp)
1278
- self.start_dir = 0
1279
- self._seekable = False
1280
- else:
1281
- # Some file-like objects can provide tell() but not seek()
1282
- try:
1283
- self.fp.seek(self.start_dir)
1284
- except (AttributeError, OSError):
1285
- self._seekable = False
1286
- elif mode == 'a':
1287
- try:
1288
- # See if file is a zip file
1289
- self._RealGetContents()
1290
- # seek to start of directory and overwrite
1291
- self.fp.seek(self.start_dir)
1292
- except BadZipFile:
1293
- # file is not a zip file, just append
1294
- self.fp.seek(0, 2)
1295
-
1296
- # set the modified flag so central directory gets written
1297
- # even if no files are added to the archive
1298
- self._didModify = True
1299
- self.start_dir = self.fp.tell()
1300
- else:
1301
- raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1302
- except:
1303
- fp = self.fp
1304
- self.fp = None
1305
- self._fpclose(fp)
1306
- raise
1307
-
1308
- def __enter__(self):
1309
- return self
1310
-
1311
- def __exit__(self, type, value, traceback):
1312
- self.close()
1313
-
1314
- def __repr__(self):
1315
- result = ['<%s.%s' % (self.__class__.__module__,
1316
- self.__class__.__qualname__)]
1317
- if self.fp is not None:
1318
- if self._filePassed:
1319
- result.append(' file=%r' % self.fp)
1320
- elif self.filename is not None:
1321
- result.append(' filename=%r' % self.filename)
1322
- result.append(' mode=%r' % self.mode)
1323
- else:
1324
- result.append(' [closed]')
1325
- result.append('>')
1326
- return ''.join(result)
1327
-
1328
- def _RealGetContents(self):
1329
- """Read in the table of contents for the ZIP file."""
1330
- fp = self.fp
1331
- try:
1332
- endrec = _EndRecData(fp)
1333
- except OSError:
1334
- raise BadZipFile("File is not a zip file")
1335
- if not endrec:
1336
- raise BadZipFile("File is not a zip file")
1337
- if self.debug > 1:
1338
- print(endrec)
1339
- size_cd = endrec[_ECD_SIZE] # bytes in central directory
1340
- offset_cd = endrec[_ECD_OFFSET] # offset of central directory
1341
- self._comment = endrec[_ECD_COMMENT] # archive comment
1342
-
1343
- # "concat" is zero, unless zip was concatenated to another file
1344
- concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1345
- if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1346
- # If Zip64 extension structures are present, account for them
1347
- concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1348
-
1349
- if self.debug > 2:
1350
- inferred = concat + offset_cd
1351
- print("given, inferred, offset", offset_cd, inferred, concat)
1352
- # self.start_dir: Position of start of central directory
1353
- self.start_dir = offset_cd + concat
1354
- fp.seek(self.start_dir, 0)
1355
- data = fp.read(size_cd)
1356
- fp = io.BytesIO(data)
1357
- total = 0
1358
- while total < size_cd:
1359
- centdir = fp.read(sizeCentralDir)
1360
- if len(centdir) != sizeCentralDir:
1361
- raise BadZipFile("Truncated central directory")
1362
- centdir = struct.unpack(structCentralDir, centdir)
1363
- if centdir[_CD_SIGNATURE] != stringCentralDir:
1364
- raise BadZipFile("Bad magic number for central directory")
1365
- if self.debug > 2:
1366
- print(centdir)
1367
- filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1368
- flags = centdir[5]
1369
- if flags & 0x800:
1370
- # UTF-8 file names extension
1371
- filename = filename.decode('utf-8')
1372
- else:
1373
- # Historical ZIP filename encoding
1374
- filename = filename.decode('gbk')
1375
- # Create ZipInfo instance to store file information
1376
- x = ZipInfo(filename)
1377
- x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1378
- x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1379
- x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1380
- (x.create_version, x.create_system, x.extract_version, x.reserved,
1381
- x.flag_bits, x.compress_type, t, d,
1382
- x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1383
- if x.extract_version > MAX_EXTRACT_VERSION:
1384
- raise NotImplementedError("zip file version %.1f" %
1385
- (x.extract_version / 10))
1386
- x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1387
- # Convert date/time code to (year, month, day, hour, min, sec)
1388
- x._raw_time = t
1389
- x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1390
- t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1391
-
1392
- x._decodeExtra()
1393
- x.header_offset = x.header_offset + concat
1394
- self.filelist.append(x)
1395
- self.NameToInfo[x.filename] = x
1396
-
1397
- # update total bytes read from central directory
1398
- total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1399
- + centdir[_CD_EXTRA_FIELD_LENGTH]
1400
- + centdir[_CD_COMMENT_LENGTH])
1401
-
1402
- if self.debug > 2:
1403
- print("total", total)
1404
-
1405
-
1406
- def namelist(self):
1407
- """Return a list of file names in the archive."""
1408
- return [data.filename for data in self.filelist]
1409
-
1410
- def infolist(self):
1411
- """Return a list of class ZipInfo instances for files in the
1412
- archive."""
1413
- return self.filelist
1414
-
1415
- def printdir(self, file=None):
1416
- """Print a table of contents for the zip file."""
1417
- print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1418
- file=file)
1419
- for zinfo in self.filelist:
1420
- date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1421
- print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1422
- file=file)
1423
-
1424
- def testzip(self):
1425
- """Read all the files and check the CRC."""
1426
- chunk_size = 2 ** 20
1427
- for zinfo in self.filelist:
1428
- try:
1429
- # Read by chunks, to avoid an OverflowError or a
1430
- # MemoryError with very large embedded files.
1431
- with self.open(zinfo.filename, "r") as f:
1432
- while f.read(chunk_size): # Check CRC-32
1433
- pass
1434
- except BadZipFile:
1435
- return zinfo.filename
1436
-
1437
- def getinfo(self, name):
1438
- """Return the instance of ZipInfo given 'name'."""
1439
- info = self.NameToInfo.get(name)
1440
- if info is None:
1441
- raise KeyError(
1442
- 'There is no item named %r in the archive' % name)
1443
-
1444
- return info
1445
-
1446
- def setpassword(self, pwd):
1447
- """Set default password for encrypted files."""
1448
- if pwd and not isinstance(pwd, bytes):
1449
- raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1450
- if pwd:
1451
- self.pwd = pwd
1452
- else:
1453
- self.pwd = None
1454
-
1455
- @property
1456
- def comment(self):
1457
- """The comment text associated with the ZIP file."""
1458
- return self._comment
1459
-
1460
- @comment.setter
1461
- def comment(self, comment):
1462
- if not isinstance(comment, bytes):
1463
- raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1464
- # check for valid comment length
1465
- if len(comment) > ZIP_MAX_COMMENT:
1466
- import warnings
1467
- warnings.warn('Archive comment is too long; truncating to %d bytes'
1468
- % ZIP_MAX_COMMENT, stacklevel=2)
1469
- comment = comment[:ZIP_MAX_COMMENT]
1470
- self._comment = comment
1471
- self._didModify = True
1472
-
1473
- def read(self, name, pwd=None):
1474
- """Return file bytes for name."""
1475
- with self.open(name, "r", pwd) as fp:
1476
- return fp.read()
1477
-
1478
- def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1479
- """Return file-like object for 'name'.
1480
-
1481
- name is a string for the file name within the ZIP file, or a ZipInfo
1482
- object.
1483
-
1484
- mode should be 'r' to read a file already in the ZIP file, or 'w' to
1485
- write to a file newly added to the archive.
1486
-
1487
- pwd is the password to decrypt files (only used for reading).
1488
-
1489
- When writing, if the file size is not known in advance but may exceed
1490
- 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1491
- files. If the size is known in advance, it is best to pass a ZipInfo
1492
- instance for name, with zinfo.file_size set.
1493
- """
1494
- if mode not in {"r", "w"}:
1495
- raise ValueError('open() requires mode "r" or "w"')
1496
- if pwd and not isinstance(pwd, bytes):
1497
- raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1498
- if pwd and (mode == "w"):
1499
- raise ValueError("pwd is only supported for reading files")
1500
- if not self.fp:
1501
- raise ValueError(
1502
- "Attempt to use ZIP archive that was already closed")
1503
-
1504
- # Make sure we have an info object
1505
- if isinstance(name, ZipInfo):
1506
- # 'name' is already an info object
1507
- zinfo = name
1508
- elif mode == 'w':
1509
- zinfo = ZipInfo(name)
1510
- zinfo.compress_type = self.compression
1511
- zinfo._compresslevel = self.compresslevel
1512
- else:
1513
- # Get info object for name
1514
- zinfo = self.getinfo(name)
1515
-
1516
- if mode == 'w':
1517
- return self._open_to_write(zinfo, force_zip64=force_zip64)
1518
-
1519
- if self._writing:
1520
- raise ValueError("Can't read from the ZIP file while there "
1521
- "is an open writing handle on it. "
1522
- "Close the writing handle before trying to read.")
1523
-
1524
- # Open for reading:
1525
- self._fileRefCnt += 1
1526
- zef_file = _SharedFile(self.fp, zinfo.header_offset,
1527
- self._fpclose, self._lock, lambda: self._writing)
1528
- try:
1529
- # Skip the file header:
1530
- fheader = zef_file.read(sizeFileHeader)
1531
- if len(fheader) != sizeFileHeader:
1532
- raise BadZipFile("Truncated file header")
1533
- fheader = struct.unpack(structFileHeader, fheader)
1534
- if fheader[_FH_SIGNATURE] != stringFileHeader:
1535
- raise BadZipFile("Bad magic number for file header")
1536
-
1537
- fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1538
- if fheader[_FH_EXTRA_FIELD_LENGTH]:
1539
- zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1540
-
1541
- if zinfo.flag_bits & 0x20:
1542
- # Zip 2.7: compressed patched data
1543
- raise NotImplementedError("compressed patched data (flag bit 5)")
1544
-
1545
- if zinfo.flag_bits & 0x40:
1546
- # strong encryption
1547
- raise NotImplementedError("strong encryption (flag bit 6)")
1548
-
1549
- if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
1550
- # UTF-8 filename
1551
- fname_str = fname.decode("utf-8")
1552
- else:
1553
- fname_str = fname.decode("gbk")
1554
-
1555
- if fname_str != zinfo.orig_filename:
1556
- raise BadZipFile(
1557
- 'File name in directory %r and header %r differ.'
1558
- % (zinfo.orig_filename, fname))
1559
-
1560
- # check for encrypted flag & handle password
1561
- is_encrypted = zinfo.flag_bits & 0x1
1562
- if is_encrypted:
1563
- if not pwd:
1564
- pwd = self.pwd
1565
- if not pwd:
1566
- raise RuntimeError("File %r is encrypted, password "
1567
- "required for extraction" % name)
1568
- else:
1569
- pwd = None
1570
-
1571
- return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1572
- except:
1573
- zef_file.close()
1574
- raise
1575
-
1576
- def _open_to_write(self, zinfo, force_zip64=False):
1577
- if force_zip64 and not self._allowZip64:
1578
- raise ValueError(
1579
- "force_zip64 is True, but allowZip64 was False when opening "
1580
- "the ZIP file."
1581
- )
1582
- if self._writing:
1583
- raise ValueError("Can't write to the ZIP file while there is "
1584
- "another write handle open on it. "
1585
- "Close the first handle before opening another.")
1586
-
1587
- # Sizes and CRC are overwritten with correct data after processing the file
1588
- if not hasattr(zinfo, 'file_size'):
1589
- zinfo.file_size = 0
1590
- zinfo.compress_size = 0
1591
- zinfo.CRC = 0
1592
-
1593
- zinfo.flag_bits = 0x00
1594
- if zinfo.compress_type == ZIP_LZMA:
1595
- # Compressed data includes an end-of-stream (EOS) marker
1596
- zinfo.flag_bits |= 0x02
1597
- if not self._seekable:
1598
- zinfo.flag_bits |= 0x08
1599
-
1600
- if not zinfo.external_attr:
1601
- zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1602
-
1603
- # Compressed size can be larger than uncompressed size
1604
- zip64 = self._allowZip64 and \
1605
- (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1606
-
1607
- if self._seekable:
1608
- self.fp.seek(self.start_dir)
1609
- zinfo.header_offset = self.fp.tell()
1610
-
1611
- self._writecheck(zinfo)
1612
- self._didModify = True
1613
-
1614
- self.fp.write(zinfo.FileHeader(zip64))
1615
-
1616
- self._writing = True
1617
- return _ZipWriteFile(self, zinfo, zip64)
1618
-
1619
- def extract(self, member, path=None, pwd=None):
1620
- """Extract a member from the archive to the current working directory,
1621
- using its full name. Its file information is extracted as accurately
1622
- as possible. `member' may be a filename or a ZipInfo object. You can
1623
- specify a different directory using `path'.
1624
- """
1625
- if path is None:
1626
- path = os.getcwd()
1627
- else:
1628
- path = os.fspath(path)
1629
-
1630
- return self._extract_member(member, path, pwd)
1631
-
1632
- def extractall(self, path=None, members=None, pwd=None):
1633
- """Extract all members from the archive to the current working
1634
- directory. `path' specifies a different directory to extract to.
1635
- `members' is optional and must be a subset of the list returned
1636
- by namelist().
1637
- """
1638
- if members is None:
1639
- members = self.namelist()
1640
-
1641
- if path is None:
1642
- path = os.getcwd()
1643
- else:
1644
- path = os.fspath(path)
1645
-
1646
- for zipinfo in members:
1647
- self._extract_member(zipinfo, path, pwd)
1648
-
1649
- @classmethod
1650
- def _sanitize_windows_name(cls, arcname, pathsep):
1651
- """Replace bad characters and remove trailing dots from parts."""
1652
- table = cls._windows_illegal_name_trans_table
1653
- if not table:
1654
- illegal = ':<>|"?*'
1655
- table = str.maketrans(illegal, '_' * len(illegal))
1656
- cls._windows_illegal_name_trans_table = table
1657
- arcname = arcname.translate(table)
1658
- # remove trailing dots
1659
- arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1660
- # rejoin, removing empty parts.
1661
- arcname = pathsep.join(x for x in arcname if x)
1662
- return arcname
1663
-
1664
- def _extract_member(self, member, targetpath, pwd):
1665
- """Extract the ZipInfo object 'member' to a physical
1666
- file on the path targetpath.
1667
- """
1668
- if not isinstance(member, ZipInfo):
1669
- member = self.getinfo(member)
1670
-
1671
- # build the destination pathname, replacing
1672
- # forward slashes to platform specific separators.
1673
- arcname = member.filename.replace('/', os.path.sep)
1674
-
1675
- if os.path.altsep:
1676
- arcname = arcname.replace(os.path.altsep, os.path.sep)
1677
- # interpret absolute pathname as relative, remove drive letter or
1678
- # UNC path, redundant separators, "." and ".." components.
1679
- arcname = os.path.splitdrive(arcname)[1]
1680
- invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1681
- arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1682
- if x not in invalid_path_parts)
1683
- if os.path.sep == '\\':
1684
- # filter illegal characters on Windows
1685
- arcname = self._sanitize_windows_name(arcname, os.path.sep)
1686
-
1687
- targetpath = os.path.join(targetpath, arcname)
1688
- targetpath = os.path.normpath(targetpath)
1689
-
1690
- # Create all upper directories if necessary.
1691
- upperdirs = os.path.dirname(targetpath)
1692
- if upperdirs and not os.path.exists(upperdirs):
1693
- os.makedirs(upperdirs)
1694
-
1695
- if member.is_dir():
1696
- if not os.path.isdir(targetpath):
1697
- os.mkdir(targetpath)
1698
- return targetpath
1699
-
1700
- with self.open(member, pwd=pwd) as source, \
1701
- open(targetpath, "wb") as target:
1702
- shutil.copyfileobj(source, target)
1703
-
1704
- return targetpath
1705
-
1706
- def _writecheck(self, zinfo):
1707
- """Check for errors before writing a file to the archive."""
1708
- if zinfo.filename in self.NameToInfo:
1709
- import warnings
1710
- warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1711
- if self.mode not in ('w', 'x', 'a'):
1712
- raise ValueError("write() requires mode 'w', 'x', or 'a'")
1713
- if not self.fp:
1714
- raise ValueError(
1715
- "Attempt to write ZIP archive that was already closed")
1716
- _check_compression(zinfo.compress_type)
1717
- if not self._allowZip64:
1718
- requires_zip64 = None
1719
- if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1720
- requires_zip64 = "Files count"
1721
- elif zinfo.file_size > ZIP64_LIMIT:
1722
- requires_zip64 = "Filesize"
1723
- elif zinfo.header_offset > ZIP64_LIMIT:
1724
- requires_zip64 = "Zipfile size"
1725
- if requires_zip64:
1726
- raise LargeZipFile(requires_zip64 +
1727
- " would require ZIP64 extensions")
1728
-
1729
- def write(self, filename, arcname=None,
1730
- compress_type=None, compresslevel=None):
1731
- """Put the bytes from filename into the archive under the name
1732
- arcname."""
1733
- if not self.fp:
1734
- raise ValueError(
1735
- "Attempt to write to ZIP archive that was already closed")
1736
- if self._writing:
1737
- raise ValueError(
1738
- "Can't write to ZIP archive while an open writing handle exists"
1739
- )
1740
-
1741
- zinfo = ZipInfo.from_file(filename, arcname,
1742
- strict_timestamps=self._strict_timestamps)
1743
-
1744
- if zinfo.is_dir():
1745
- zinfo.compress_size = 0
1746
- zinfo.CRC = 0
1747
- else:
1748
- if compress_type is not None:
1749
- zinfo.compress_type = compress_type
1750
- else:
1751
- zinfo.compress_type = self.compression
1752
-
1753
- if compresslevel is not None:
1754
- zinfo._compresslevel = compresslevel
1755
- else:
1756
- zinfo._compresslevel = self.compresslevel
1757
-
1758
- if zinfo.is_dir():
1759
- with self._lock:
1760
- if self._seekable:
1761
- self.fp.seek(self.start_dir)
1762
- zinfo.header_offset = self.fp.tell() # Start of header bytes
1763
- if zinfo.compress_type == ZIP_LZMA:
1764
- # Compressed data includes an end-of-stream (EOS) marker
1765
- zinfo.flag_bits |= 0x02
1766
-
1767
- self._writecheck(zinfo)
1768
- self._didModify = True
1769
-
1770
- self.filelist.append(zinfo)
1771
- self.NameToInfo[zinfo.filename] = zinfo
1772
- self.fp.write(zinfo.FileHeader(False))
1773
- self.start_dir = self.fp.tell()
1774
- else:
1775
- with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1776
- shutil.copyfileobj(src, dest, 1024*8)
1777
-
1778
- def writestr(self, zinfo_or_arcname, data,
1779
- compress_type=None, compresslevel=None):
1780
- """Write a file into the archive. The contents is 'data', which
1781
- may be either a 'str' or a 'bytes' instance; if it is a 'str',
1782
- it is encoded as UTF-8 first.
1783
- 'zinfo_or_arcname' is either a ZipInfo instance or
1784
- the name of the file in the archive."""
1785
- if isinstance(data, str):
1786
- data = data.encode("utf-8")
1787
- if not isinstance(zinfo_or_arcname, ZipInfo):
1788
- zinfo = ZipInfo(filename=zinfo_or_arcname,
1789
- date_time=time.localtime(time.time())[:6])
1790
- zinfo.compress_type = self.compression
1791
- zinfo._compresslevel = self.compresslevel
1792
- if zinfo.filename[-1] == '/':
1793
- zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1794
- zinfo.external_attr |= 0x10 # MS-DOS directory flag
1795
- else:
1796
- zinfo.external_attr = 0o600 << 16 # ?rw-------
1797
- else:
1798
- zinfo = zinfo_or_arcname
1799
-
1800
- if not self.fp:
1801
- raise ValueError(
1802
- "Attempt to write to ZIP archive that was already closed")
1803
- if self._writing:
1804
- raise ValueError(
1805
- "Can't write to ZIP archive while an open writing handle exists."
1806
- )
1807
-
1808
- if compress_type is not None:
1809
- zinfo.compress_type = compress_type
1810
-
1811
- if compresslevel is not None:
1812
- zinfo._compresslevel = compresslevel
1813
-
1814
- zinfo.file_size = len(data) # Uncompressed size
1815
- with self._lock:
1816
- with self.open(zinfo, mode='w') as dest:
1817
- dest.write(data)
1818
-
1819
- def __del__(self):
1820
- """Call the "close()" method in case the user forgot."""
1821
- self.close()
1822
-
1823
- def close(self):
1824
- """Close the file, and for mode 'w', 'x' and 'a' write the ending
1825
- records."""
1826
- if self.fp is None:
1827
- return
1828
-
1829
- if self._writing:
1830
- raise ValueError("Can't close the ZIP file while there is "
1831
- "an open writing handle on it. "
1832
- "Close the writing handle before closing the zip.")
1833
-
1834
- try:
1835
- if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1836
- with self._lock:
1837
- if self._seekable:
1838
- self.fp.seek(self.start_dir)
1839
- self._write_end_record()
1840
- finally:
1841
- fp = self.fp
1842
- self.fp = None
1843
- self._fpclose(fp)
1844
-
1845
- def _write_end_record(self):
1846
- for zinfo in self.filelist: # write central directory
1847
- dt = zinfo.date_time
1848
- dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1849
- dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1850
- extra = []
1851
- if zinfo.file_size > ZIP64_LIMIT \
1852
- or zinfo.compress_size > ZIP64_LIMIT:
1853
- extra.append(zinfo.file_size)
1854
- extra.append(zinfo.compress_size)
1855
- file_size = 0xffffffff
1856
- compress_size = 0xffffffff
1857
- else:
1858
- file_size = zinfo.file_size
1859
- compress_size = zinfo.compress_size
1860
-
1861
- if zinfo.header_offset > ZIP64_LIMIT:
1862
- extra.append(zinfo.header_offset)
1863
- header_offset = 0xffffffff
1864
- else:
1865
- header_offset = zinfo.header_offset
1866
-
1867
- extra_data = zinfo.extra
1868
- min_version = 0
1869
- if extra:
1870
- # Append a ZIP64 field to the extra's
1871
- extra_data = _strip_extra(extra_data, (1,))
1872
- extra_data = struct.pack(
1873
- '<HH' + 'Q'*len(extra),
1874
- 1, 8*len(extra), *extra) + extra_data
1875
-
1876
- min_version = ZIP64_VERSION
1877
-
1878
- if zinfo.compress_type == ZIP_BZIP2:
1879
- min_version = max(BZIP2_VERSION, min_version)
1880
- elif zinfo.compress_type == ZIP_LZMA:
1881
- min_version = max(LZMA_VERSION, min_version)
1882
-
1883
- extract_version = max(min_version, zinfo.extract_version)
1884
- create_version = max(min_version, zinfo.create_version)
1885
- try:
1886
- filename, flag_bits = zinfo._encodeFilenameFlags()
1887
- centdir = struct.pack(structCentralDir,
1888
- stringCentralDir, create_version,
1889
- zinfo.create_system, extract_version, zinfo.reserved,
1890
- flag_bits, zinfo.compress_type, dostime, dosdate,
1891
- zinfo.CRC, compress_size, file_size,
1892
- len(filename), len(extra_data), len(zinfo.comment),
1893
- 0, zinfo.internal_attr, zinfo.external_attr,
1894
- header_offset)
1895
- except DeprecationWarning:
1896
- print((structCentralDir, stringCentralDir, create_version,
1897
- zinfo.create_system, extract_version, zinfo.reserved,
1898
- zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1899
- zinfo.CRC, compress_size, file_size,
1900
- len(zinfo.filename), len(extra_data), len(zinfo.comment),
1901
- 0, zinfo.internal_attr, zinfo.external_attr,
1902
- header_offset), file=sys.stderr)
1903
- raise
1904
- self.fp.write(centdir)
1905
- self.fp.write(filename)
1906
- self.fp.write(extra_data)
1907
- self.fp.write(zinfo.comment)
1908
-
1909
- pos2 = self.fp.tell()
1910
- # Write end-of-zip-archive record
1911
- centDirCount = len(self.filelist)
1912
- centDirSize = pos2 - self.start_dir
1913
- centDirOffset = self.start_dir
1914
- requires_zip64 = None
1915
- if centDirCount > ZIP_FILECOUNT_LIMIT:
1916
- requires_zip64 = "Files count"
1917
- elif centDirOffset > ZIP64_LIMIT:
1918
- requires_zip64 = "Central directory offset"
1919
- elif centDirSize > ZIP64_LIMIT:
1920
- requires_zip64 = "Central directory size"
1921
- if requires_zip64:
1922
- # Need to write the ZIP64 end-of-archive records
1923
- if not self._allowZip64:
1924
- raise LargeZipFile(requires_zip64 +
1925
- " would require ZIP64 extensions")
1926
- zip64endrec = struct.pack(
1927
- structEndArchive64, stringEndArchive64,
1928
- 44, 45, 45, 0, 0, centDirCount, centDirCount,
1929
- centDirSize, centDirOffset)
1930
- self.fp.write(zip64endrec)
1931
-
1932
- zip64locrec = struct.pack(
1933
- structEndArchive64Locator,
1934
- stringEndArchive64Locator, 0, pos2, 1)
1935
- self.fp.write(zip64locrec)
1936
- centDirCount = min(centDirCount, 0xFFFF)
1937
- centDirSize = min(centDirSize, 0xFFFFFFFF)
1938
- centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1939
-
1940
- endrec = struct.pack(structEndArchive, stringEndArchive,
1941
- 0, 0, centDirCount, centDirCount,
1942
- centDirSize, centDirOffset, len(self._comment))
1943
- self.fp.write(endrec)
1944
- self.fp.write(self._comment)
1945
- if self.mode == "a":
1946
- self.fp.truncate()
1947
- self.fp.flush()
1948
-
1949
- def _fpclose(self, fp):
1950
- assert self._fileRefCnt > 0
1951
- self._fileRefCnt -= 1
1952
- if not self._fileRefCnt and not self._filePassed:
1953
- fp.close()
1954
-
1955
-
1956
- class PyZipFile(ZipFile):
1957
- """Class to create ZIP archives with Python library files and packages."""
1958
-
1959
- def __init__(self, file, mode="r", compression=ZIP_STORED,
1960
- allowZip64=True, optimize=-1):
1961
- ZipFile.__init__(self, file, mode=mode, compression=compression,
1962
- allowZip64=allowZip64)
1963
- self._optimize = optimize
1964
-
1965
- def writepy(self, pathname, basename="", filterfunc=None):
1966
- """Add all files from "pathname" to the ZIP archive.
1967
-
1968
- If pathname is a package directory, search the directory and
1969
- all package subdirectories recursively for all *.py and enter
1970
- the modules into the archive. If pathname is a plain
1971
- directory, listdir *.py and enter all modules. Else, pathname
1972
- must be a Python *.py file and the module will be put into the
1973
- archive. Added modules are always module.pyc.
1974
- This method will compile the module.py into module.pyc if
1975
- necessary.
1976
- If filterfunc(pathname) is given, it is called with every argument.
1977
- When it is False, the file or directory is skipped.
1978
- """
1979
- pathname = os.fspath(pathname)
1980
- if filterfunc and not filterfunc(pathname):
1981
- if self.debug:
1982
- label = 'path' if os.path.isdir(pathname) else 'file'
1983
- print('%s %r skipped by filterfunc' % (label, pathname))
1984
- return
1985
- dir, name = os.path.split(pathname)
1986
- if os.path.isdir(pathname):
1987
- initname = os.path.join(pathname, "__init__.py")
1988
- if os.path.isfile(initname):
1989
- # This is a package directory, add it
1990
- if basename:
1991
- basename = "%s/%s" % (basename, name)
1992
- else:
1993
- basename = name
1994
- if self.debug:
1995
- print("Adding package in", pathname, "as", basename)
1996
- fname, arcname = self._get_codename(initname[0:-3], basename)
1997
- if self.debug:
1998
- print("Adding", arcname)
1999
- self.write(fname, arcname)
2000
- dirlist = sorted(os.listdir(pathname))
2001
- dirlist.remove("__init__.py")
2002
- # Add all *.py files and package subdirectories
2003
- for filename in dirlist:
2004
- path = os.path.join(pathname, filename)
2005
- root, ext = os.path.splitext(filename)
2006
- if os.path.isdir(path):
2007
- if os.path.isfile(os.path.join(path, "__init__.py")):
2008
- # This is a package directory, add it
2009
- self.writepy(path, basename,
2010
- filterfunc=filterfunc) # Recursive call
2011
- elif ext == ".py":
2012
- if filterfunc and not filterfunc(path):
2013
- if self.debug:
2014
- print('file %r skipped by filterfunc' % path)
2015
- continue
2016
- fname, arcname = self._get_codename(path[0:-3],
2017
- basename)
2018
- if self.debug:
2019
- print("Adding", arcname)
2020
- self.write(fname, arcname)
2021
- else:
2022
- # This is NOT a package directory, add its files at top level
2023
- if self.debug:
2024
- print("Adding files from directory", pathname)
2025
- for filename in sorted(os.listdir(pathname)):
2026
- path = os.path.join(pathname, filename)
2027
- root, ext = os.path.splitext(filename)
2028
- if ext == ".py":
2029
- if filterfunc and not filterfunc(path):
2030
- if self.debug:
2031
- print('file %r skipped by filterfunc' % path)
2032
- continue
2033
- fname, arcname = self._get_codename(path[0:-3],
2034
- basename)
2035
- if self.debug:
2036
- print("Adding", arcname)
2037
- self.write(fname, arcname)
2038
- else:
2039
- if pathname[-3:] != ".py":
2040
- raise RuntimeError(
2041
- 'Files added with writepy() must end with ".py"')
2042
- fname, arcname = self._get_codename(pathname[0:-3], basename)
2043
- if self.debug:
2044
- print("Adding file", arcname)
2045
- self.write(fname, arcname)
2046
-
2047
- def _get_codename(self, pathname, basename):
2048
- """Return (filename, archivename) for the path.
2049
-
2050
- Given a module name path, return the correct file path and
2051
- archive name, compiling if necessary. For example, given
2052
- /python/lib/string, return (/python/lib/string.pyc, string).
2053
- """
2054
- def _compile(file, optimize=-1):
2055
- import py_compile
2056
- if self.debug:
2057
- print("Compiling", file)
2058
- try:
2059
- py_compile.compile(file, doraise=True, optimize=optimize)
2060
- except py_compile.PyCompileError as err:
2061
- print(err.msg)
2062
- return False
2063
- return True
2064
-
2065
- file_py = pathname + ".py"
2066
- file_pyc = pathname + ".pyc"
2067
- pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2068
- pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2069
- pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2070
- if self._optimize == -1:
2071
- # legacy mode: use whatever file is present
2072
- if (os.path.isfile(file_pyc) and
2073
- os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2074
- # Use .pyc file.
2075
- arcname = fname = file_pyc
2076
- elif (os.path.isfile(pycache_opt0) and
2077
- os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2078
- # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2079
- # file name in the archive.
2080
- fname = pycache_opt0
2081
- arcname = file_pyc
2082
- elif (os.path.isfile(pycache_opt1) and
2083
- os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2084
- # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2085
- # file name in the archive.
2086
- fname = pycache_opt1
2087
- arcname = file_pyc
2088
- elif (os.path.isfile(pycache_opt2) and
2089
- os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2090
- # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2091
- # file name in the archive.
2092
- fname = pycache_opt2
2093
- arcname = file_pyc
2094
- else:
2095
- # Compile py into PEP 3147 pyc file.
2096
- if _compile(file_py):
2097
- if sys.flags.optimize == 0:
2098
- fname = pycache_opt0
2099
- elif sys.flags.optimize == 1:
2100
- fname = pycache_opt1
2101
- else:
2102
- fname = pycache_opt2
2103
- arcname = file_pyc
2104
- else:
2105
- fname = arcname = file_py
2106
- else:
2107
- # new mode: use given optimization level
2108
- if self._optimize == 0:
2109
- fname = pycache_opt0
2110
- arcname = file_pyc
2111
- else:
2112
- arcname = file_pyc
2113
- if self._optimize == 1:
2114
- fname = pycache_opt1
2115
- elif self._optimize == 2:
2116
- fname = pycache_opt2
2117
- else:
2118
- msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2119
- raise ValueError(msg)
2120
- if not (os.path.isfile(fname) and
2121
- os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2122
- if not _compile(file_py, optimize=self._optimize):
2123
- fname = arcname = file_py
2124
- archivename = os.path.split(arcname)[1]
2125
- if basename:
2126
- archivename = "%s/%s" % (basename, archivename)
2127
- return (fname, archivename)
2128
-
2129
-
2130
- def _parents(path):
2131
- """
2132
- Given a path with elements separated by
2133
- posixpath.sep, generate all parents of that path.
2134
-
2135
- >>> list(_parents('b/d'))
2136
- ['b']
2137
- >>> list(_parents('/b/d/'))
2138
- ['/b']
2139
- >>> list(_parents('b/d/f/'))
2140
- ['b/d', 'b']
2141
- >>> list(_parents('b'))
2142
- []
2143
- >>> list(_parents(''))
2144
- []
2145
- """
2146
- return itertools.islice(_ancestry(path), 1, None)
2147
-
2148
-
2149
- def _ancestry(path):
2150
- """
2151
- Given a path with elements separated by
2152
- posixpath.sep, generate all elements of that path
2153
-
2154
- >>> list(_ancestry('b/d'))
2155
- ['b/d', 'b']
2156
- >>> list(_ancestry('/b/d/'))
2157
- ['/b/d', '/b']
2158
- >>> list(_ancestry('b/d/f/'))
2159
- ['b/d/f', 'b/d', 'b']
2160
- >>> list(_ancestry('b'))
2161
- ['b']
2162
- >>> list(_ancestry(''))
2163
- []
2164
- """
2165
- path = path.rstrip(posixpath.sep)
2166
- while path and path != posixpath.sep:
2167
- yield path
2168
- path, tail = posixpath.split(path)
2169
-
2170
-
2171
- _dedupe = dict.fromkeys
2172
- """Deduplicate an iterable in original order"""
2173
-
2174
-
2175
- def _difference(minuend, subtrahend):
2176
- """
2177
- Return items in minuend not in subtrahend, retaining order
2178
- with O(1) lookup.
2179
- """
2180
- return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2181
-
2182
-
2183
- class CompleteDirs(ZipFile):
2184
- """
2185
- A ZipFile subclass that ensures that implied directories
2186
- are always included in the namelist.
2187
- """
2188
-
2189
- @staticmethod
2190
- def _implied_dirs(names):
2191
- parents = itertools.chain.from_iterable(map(_parents, names))
2192
- as_dirs = (p + posixpath.sep for p in parents)
2193
- return _dedupe(_difference(as_dirs, names))
2194
-
2195
- def namelist(self):
2196
- names = super(CompleteDirs, self).namelist()
2197
- return names + list(self._implied_dirs(names))
2198
-
2199
- def _name_set(self):
2200
- return set(self.namelist())
2201
-
2202
- def resolve_dir(self, name):
2203
- """
2204
- If the name represents a directory, return that name
2205
- as a directory (with the trailing slash).
2206
- """
2207
- names = self._name_set()
2208
- dirname = name + '/'
2209
- dir_match = name not in names and dirname in names
2210
- return dirname if dir_match else name
2211
-
2212
- @classmethod
2213
- def make(cls, source):
2214
- """
2215
- Given a source (filename or zipfile), return an
2216
- appropriate CompleteDirs subclass.
2217
- """
2218
- if isinstance(source, CompleteDirs):
2219
- return source
2220
-
2221
- if not isinstance(source, ZipFile):
2222
- return cls(source)
2223
-
2224
- # Only allow for FastPath when supplied zipfile is read-only
2225
- if 'r' not in source.mode:
2226
- cls = CompleteDirs
2227
-
2228
- res = cls.__new__(cls)
2229
- vars(res).update(vars(source))
2230
- return res
2231
-
2232
-
2233
- class FastLookup(CompleteDirs):
2234
- """
2235
- ZipFile subclass to ensure implicit
2236
- dirs exist and are resolved rapidly.
2237
- """
2238
- def namelist(self):
2239
- with contextlib.suppress(AttributeError):
2240
- return self.__names
2241
- self.__names = super(FastLookup, self).namelist()
2242
- return self.__names
2243
-
2244
- def _name_set(self):
2245
- with contextlib.suppress(AttributeError):
2246
- return self.__lookup
2247
- self.__lookup = super(FastLookup, self)._name_set()
2248
- return self.__lookup
2249
-
2250
-
2251
- class Path:
2252
- """
2253
- A pathlib-compatible interface for zip files.
2254
-
2255
- Consider a zip file with this structure::
2256
-
2257
- .
2258
- ├── a.txt
2259
- └── b
2260
- ├── c.txt
2261
- └── d
2262
- └── e.txt
2263
-
2264
- >>> data = io.BytesIO()
2265
- >>> zf = ZipFile(data, 'w')
2266
- >>> zf.writestr('a.txt', 'content of a')
2267
- >>> zf.writestr('b/c.txt', 'content of c')
2268
- >>> zf.writestr('b/d/e.txt', 'content of e')
2269
- >>> zf.filename = 'abcde.zip'
2270
-
2271
- Path accepts the zipfile object itself or a filename
2272
-
2273
- >>> root = Path(zf)
2274
-
2275
- From there, several path operations are available.
2276
-
2277
- Directory iteration (including the zip file itself):
2278
-
2279
- >>> a, b = root.iterdir()
2280
- >>> a
2281
- Path('abcde.zip', 'a.txt')
2282
- >>> b
2283
- Path('abcde.zip', 'b/')
2284
-
2285
- name property:
2286
-
2287
- >>> b.name
2288
- 'b'
2289
-
2290
- join with divide operator:
2291
-
2292
- >>> c = b / 'c.txt'
2293
- >>> c
2294
- Path('abcde.zip', 'b/c.txt')
2295
- >>> c.name
2296
- 'c.txt'
2297
-
2298
- Read text:
2299
-
2300
- >>> c.read_text()
2301
- 'content of c'
2302
-
2303
- existence:
2304
-
2305
- >>> c.exists()
2306
- True
2307
- >>> (b / 'missing.txt').exists()
2308
- False
2309
-
2310
- Coercion to string:
2311
-
2312
- >>> str(c)
2313
- 'abcde.zip/b/c.txt'
2314
- """
2315
-
2316
- __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2317
-
2318
- def __init__(self, root, at=""):
2319
- self.root = FastLookup.make(root)
2320
- self.at = at
2321
-
2322
- @property
2323
- def open(self):
2324
- return functools.partial(self.root.open, self.at)
2325
-
2326
- @property
2327
- def name(self):
2328
- return posixpath.basename(self.at.rstrip("/"))
2329
-
2330
- def read_text(self, *args, **kwargs):
2331
- with self.open() as strm:
2332
- return io.TextIOWrapper(strm, *args, **kwargs).read()
2333
-
2334
- def read_bytes(self):
2335
- with self.open() as strm:
2336
- return strm.read()
2337
-
2338
- def _is_child(self, path):
2339
- return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2340
-
2341
- def _next(self, at):
2342
- return Path(self.root, at)
2343
-
2344
- def is_dir(self):
2345
- return not self.at or self.at.endswith("/")
2346
-
2347
- def is_file(self):
2348
- return not self.is_dir()
2349
-
2350
- def exists(self):
2351
- return self.at in self.root._name_set()
2352
-
2353
- def iterdir(self):
2354
- if not self.is_dir():
2355
- raise ValueError("Can't listdir a file")
2356
- subs = map(self._next, self.root.namelist())
2357
- return filter(self._is_child, subs)
2358
-
2359
- def __str__(self):
2360
- return posixpath.join(self.root.filename, self.at)
2361
-
2362
- def __repr__(self):
2363
- return self.__repr.format(self=self)
2364
-
2365
- def joinpath(self, add):
2366
- next = posixpath.join(self.at, add)
2367
- return self._next(self.root.resolve_dir(next))
2368
-
2369
- __truediv__ = joinpath
2370
-
2371
- @property
2372
- def parent(self):
2373
- parent_at = posixpath.dirname(self.at.rstrip('/'))
2374
- if parent_at:
2375
- parent_at += '/'
2376
- return self._next(parent_at)
2377
-
2378
-
2379
- def main(args=None):
2380
- import argparse
2381
-
2382
- description = 'A simple command-line interface for zipfile module.'
2383
- parser = argparse.ArgumentParser(description=description)
2384
- group = parser.add_mutually_exclusive_group(required=True)
2385
- group.add_argument('-l', '--list', metavar='<zipfile>',
2386
- help='Show listing of a zipfile')
2387
- group.add_argument('-e', '--extract', nargs=2,
2388
- metavar=('<zipfile>', '<output_dir>'),
2389
- help='Extract zipfile into target dir')
2390
- group.add_argument('-c', '--create', nargs='+',
2391
- metavar=('<name>', '<file>'),
2392
- help='Create zipfile from sources')
2393
- group.add_argument('-t', '--test', metavar='<zipfile>',
2394
- help='Test if a zipfile is valid')
2395
- args = parser.parse_args(args)
2396
-
2397
- if args.test is not None:
2398
- src = args.test
2399
- with ZipFile(src, 'r') as zf:
2400
- badfile = zf.testzip()
2401
- if badfile:
2402
- print("The following enclosed file is corrupted: {!r}".format(badfile))
2403
- print("Done testing")
2404
-
2405
- elif args.list is not None:
2406
- src = args.list
2407
- with ZipFile(src, 'r') as zf:
2408
- zf.printdir()
2409
-
2410
- elif args.extract is not None:
2411
- src, curdir = args.extract
2412
- with ZipFile(src, 'r') as zf:
2413
- zf.extractall(curdir)
2414
-
2415
- elif args.create is not None:
2416
- zip_name = args.create.pop(0)
2417
- files = args.create
2418
-
2419
- def addToZip(zf, path, zippath):
2420
- if os.path.isfile(path):
2421
- zf.write(path, zippath, ZIP_DEFLATED)
2422
- elif os.path.isdir(path):
2423
- if zippath:
2424
- zf.write(path, zippath)
2425
- for nm in sorted(os.listdir(path)):
2426
- addToZip(zf,
2427
- os.path.join(path, nm), os.path.join(zippath, nm))
2428
- # else: ignore
2429
-
2430
- with ZipFile(zip_name, 'w') as zf:
2431
- for path in files:
2432
- zippath = os.path.basename(path)
2433
- if not zippath:
2434
- zippath = os.path.basename(os.path.dirname(path))
2435
- if zippath in ('', os.curdir, os.pardir):
2436
- zippath = ''
2437
- addToZip(zf, path, zippath)
2438
-
2439
-
2440
- if __name__ == "__main__":
2441
- main()
1
+ """
2
+ Read and write ZIP files.
3
+
4
+ XXX references to utf-8 need further investigation.
5
+ """
6
+ import binascii
7
+ import functools
8
+ import importlib.util
9
+ import io
10
+ import itertools
11
+ import os
12
+ import posixpath
13
+ import shutil
14
+ import stat
15
+ import struct
16
+ import sys
17
+ import threading
18
+ import time
19
+ import contextlib
20
+
21
+ try:
22
+ import zlib # We may need its compression method
23
+ crc32 = zlib.crc32
24
+ except ImportError:
25
+ zlib = None
26
+ crc32 = binascii.crc32
27
+
28
+ try:
29
+ import bz2 # We may need its compression method
30
+ except ImportError:
31
+ bz2 = None
32
+
33
+ try:
34
+ import lzma # We may need its compression method
35
+ except ImportError:
36
+ lzma = None
37
+
38
+ __all__ = ["BadZipFile", "BadZipfile", "error",
39
+ "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
40
+ "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41
+ "Path"]
42
+
43
+ class BadZipFile(Exception):
44
+ pass
45
+
46
+
47
+ class LargeZipFile(Exception):
48
+ """
49
+ Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50
+ and those extensions are disabled.
51
+ """
52
+
53
+ error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
54
+
55
+
56
+ ZIP64_LIMIT = (1 << 31) - 1
57
+ ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
58
+ ZIP_MAX_COMMENT = (1 << 16) - 1
59
+
60
+ # constants for Zip file compression methods
61
+ ZIP_STORED = 0
62
+ ZIP_DEFLATED = 8
63
+ ZIP_BZIP2 = 12
64
+ ZIP_LZMA = 14
65
+ # Other ZIP compression methods not supported
66
+
67
+ DEFAULT_VERSION = 20
68
+ ZIP64_VERSION = 45
69
+ BZIP2_VERSION = 46
70
+ LZMA_VERSION = 63
71
+ # we recognize (but not necessarily support) all features up to that version
72
+ MAX_EXTRACT_VERSION = 63
73
+
74
+ # Below are some formats and associated data for reading/writing headers using
75
+ # the struct module. The names and structures of headers/records are those used
76
+ # in the PKWARE description of the ZIP file format:
77
+ # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78
+ # (URL valid as of January 2008)
79
+
80
+ # The "end of central directory" structure, magic number, size, and indices
81
+ # (section V.I in the format document)
82
+ structEndArchive = b"<4s4H2LH"
83
+ stringEndArchive = b"PK\005\006"
84
+ sizeEndCentDir = struct.calcsize(structEndArchive)
85
+
86
+ _ECD_SIGNATURE = 0
87
+ _ECD_DISK_NUMBER = 1
88
+ _ECD_DISK_START = 2
89
+ _ECD_ENTRIES_THIS_DISK = 3
90
+ _ECD_ENTRIES_TOTAL = 4
91
+ _ECD_SIZE = 5
92
+ _ECD_OFFSET = 6
93
+ _ECD_COMMENT_SIZE = 7
94
+ # These last two indices are not part of the structure as defined in the
95
+ # spec, but they are used internally by this module as a convenience
96
+ _ECD_COMMENT = 8
97
+ _ECD_LOCATION = 9
98
+
99
+ # The "central directory" structure, magic number, size, and indices
100
+ # of entries in the structure (section V.F in the format document)
101
+ structCentralDir = "<4s4B4HL2L5H2L"
102
+ stringCentralDir = b"PK\001\002"
103
+ sizeCentralDir = struct.calcsize(structCentralDir)
104
+
105
+ # indexes of entries in the central directory structure
106
+ _CD_SIGNATURE = 0
107
+ _CD_CREATE_VERSION = 1
108
+ _CD_CREATE_SYSTEM = 2
109
+ _CD_EXTRACT_VERSION = 3
110
+ _CD_EXTRACT_SYSTEM = 4
111
+ _CD_FLAG_BITS = 5
112
+ _CD_COMPRESS_TYPE = 6
113
+ _CD_TIME = 7
114
+ _CD_DATE = 8
115
+ _CD_CRC = 9
116
+ _CD_COMPRESSED_SIZE = 10
117
+ _CD_UNCOMPRESSED_SIZE = 11
118
+ _CD_FILENAME_LENGTH = 12
119
+ _CD_EXTRA_FIELD_LENGTH = 13
120
+ _CD_COMMENT_LENGTH = 14
121
+ _CD_DISK_NUMBER_START = 15
122
+ _CD_INTERNAL_FILE_ATTRIBUTES = 16
123
+ _CD_EXTERNAL_FILE_ATTRIBUTES = 17
124
+ _CD_LOCAL_HEADER_OFFSET = 18
125
+
126
+ # The "local file header" structure, magic number, size, and indices
127
+ # (section V.A in the format document)
128
+ structFileHeader = "<4s2B4HL2L2H"
129
+ stringFileHeader = b"PK\003\004"
130
+ sizeFileHeader = struct.calcsize(structFileHeader)
131
+
132
+ _FH_SIGNATURE = 0
133
+ _FH_EXTRACT_VERSION = 1
134
+ _FH_EXTRACT_SYSTEM = 2
135
+ _FH_GENERAL_PURPOSE_FLAG_BITS = 3
136
+ _FH_COMPRESSION_METHOD = 4
137
+ _FH_LAST_MOD_TIME = 5
138
+ _FH_LAST_MOD_DATE = 6
139
+ _FH_CRC = 7
140
+ _FH_COMPRESSED_SIZE = 8
141
+ _FH_UNCOMPRESSED_SIZE = 9
142
+ _FH_FILENAME_LENGTH = 10
143
+ _FH_EXTRA_FIELD_LENGTH = 11
144
+
145
+ # The "Zip64 end of central directory locator" structure, magic number, and size
146
+ structEndArchive64Locator = "<4sLQL"
147
+ stringEndArchive64Locator = b"PK\x06\x07"
148
+ sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
149
+
150
+ # The "Zip64 end of central directory" record, magic number, size, and indices
151
+ # (section V.G in the format document)
152
+ structEndArchive64 = "<4sQ2H2L4Q"
153
+ stringEndArchive64 = b"PK\x06\x06"
154
+ sizeEndCentDir64 = struct.calcsize(structEndArchive64)
155
+
156
+ _CD64_SIGNATURE = 0
157
+ _CD64_DIRECTORY_RECSIZE = 1
158
+ _CD64_CREATE_VERSION = 2
159
+ _CD64_EXTRACT_VERSION = 3
160
+ _CD64_DISK_NUMBER = 4
161
+ _CD64_DISK_NUMBER_START = 5
162
+ _CD64_NUMBER_ENTRIES_THIS_DISK = 6
163
+ _CD64_NUMBER_ENTRIES_TOTAL = 7
164
+ _CD64_DIRECTORY_SIZE = 8
165
+ _CD64_OFFSET_START_CENTDIR = 9
166
+
167
+ _DD_SIGNATURE = 0x08074b50
168
+
169
+ _EXTRA_FIELD_STRUCT = struct.Struct('<HH')
170
+
171
+ def _strip_extra(extra, xids):
172
+ # Remove Extra Fields with specified IDs.
173
+ unpack = _EXTRA_FIELD_STRUCT.unpack
174
+ modified = False
175
+ buffer = []
176
+ start = i = 0
177
+ while i + 4 <= len(extra):
178
+ xid, xlen = unpack(extra[i : i + 4])
179
+ j = i + 4 + xlen
180
+ if xid in xids:
181
+ if i != start:
182
+ buffer.append(extra[start : i])
183
+ start = j
184
+ modified = True
185
+ i = j
186
+ if not modified:
187
+ return extra
188
+ return b''.join(buffer)
189
+
190
+ def _check_zipfile(fp):
191
+ try:
192
+ if _EndRecData(fp):
193
+ return True # file has correct magic number
194
+ except OSError:
195
+ pass
196
+ return False
197
+
198
+ def is_zipfile(filename):
199
+ """Quickly see if a file is a ZIP file by checking the magic number.
200
+
201
+ The filename argument may be a file or file-like object too.
202
+ """
203
+ result = False
204
+ try:
205
+ if hasattr(filename, "read"):
206
+ result = _check_zipfile(fp=filename)
207
+ else:
208
+ with open(filename, "rb") as fp:
209
+ result = _check_zipfile(fp)
210
+ except OSError:
211
+ pass
212
+ return result
213
+
214
+ def _EndRecData64(fpin, offset, endrec):
215
+ """
216
+ Read the ZIP64 end-of-archive records and use that to update endrec
217
+ """
218
+ try:
219
+ fpin.seek(offset - sizeEndCentDir64Locator, 2)
220
+ except OSError:
221
+ # If the seek fails, the file is not large enough to contain a ZIP64
222
+ # end-of-archive record, so just return the end record we were given.
223
+ return endrec
224
+
225
+ data = fpin.read(sizeEndCentDir64Locator)
226
+ if len(data) != sizeEndCentDir64Locator:
227
+ return endrec
228
+ sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
229
+ if sig != stringEndArchive64Locator:
230
+ return endrec
231
+
232
+ if diskno != 0 or disks > 1:
233
+ raise BadZipFile("zipfiles that span multiple disks are not supported")
234
+
235
+ # Assume no 'zip64 extensible data'
236
+ fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
237
+ data = fpin.read(sizeEndCentDir64)
238
+ if len(data) != sizeEndCentDir64:
239
+ return endrec
240
+ sig, sz, create_version, read_version, disk_num, disk_dir, \
241
+ dircount, dircount2, dirsize, diroffset = \
242
+ struct.unpack(structEndArchive64, data)
243
+ if sig != stringEndArchive64:
244
+ return endrec
245
+
246
+ # Update the original endrec using data from the ZIP64 record
247
+ endrec[_ECD_SIGNATURE] = sig
248
+ endrec[_ECD_DISK_NUMBER] = disk_num
249
+ endrec[_ECD_DISK_START] = disk_dir
250
+ endrec[_ECD_ENTRIES_THIS_DISK] = dircount
251
+ endrec[_ECD_ENTRIES_TOTAL] = dircount2
252
+ endrec[_ECD_SIZE] = dirsize
253
+ endrec[_ECD_OFFSET] = diroffset
254
+ return endrec
255
+
256
+
257
+ def _EndRecData(fpin):
258
+ """Return data from the "End of Central Directory" record, or None.
259
+
260
+ The data is a list of the nine items in the ZIP "End of central dir"
261
+ record followed by a tenth item, the file seek offset of this record."""
262
+
263
+ # Determine file size
264
+ fpin.seek(0, 2)
265
+ filesize = fpin.tell()
266
+
267
+ # Check to see if this is ZIP file with no archive comment (the
268
+ # "end of central directory" structure should be the last item in the
269
+ # file if this is the case).
270
+ try:
271
+ fpin.seek(-sizeEndCentDir, 2)
272
+ except OSError:
273
+ return None
274
+ data = fpin.read()
275
+ if (len(data) == sizeEndCentDir and
276
+ data[0:4] == stringEndArchive and
277
+ data[-2:] == b"\000\000"):
278
+ # the signature is correct and there's no comment, unpack structure
279
+ endrec = struct.unpack(structEndArchive, data)
280
+ endrec=list(endrec)
281
+
282
+ # Append a blank comment and record start offset
283
+ endrec.append(b"")
284
+ endrec.append(filesize - sizeEndCentDir)
285
+
286
+ # Try to read the "Zip64 end of central directory" structure
287
+ return _EndRecData64(fpin, -sizeEndCentDir, endrec)
288
+
289
+ # Either this is not a ZIP file, or it is a ZIP file with an archive
290
+ # comment. Search the end of the file for the "end of central directory"
291
+ # record signature. The comment is the last item in the ZIP file and may be
292
+ # up to 64K long. It is assumed that the "end of central directory" magic
293
+ # number does not appear in the comment.
294
+ maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
295
+ fpin.seek(maxCommentStart, 0)
296
+ data = fpin.read()
297
+ start = data.rfind(stringEndArchive)
298
+ if start >= 0:
299
+ # found the magic number; attempt to unpack and interpret
300
+ recData = data[start:start+sizeEndCentDir]
301
+ if len(recData) != sizeEndCentDir:
302
+ # Zip file is corrupted.
303
+ return None
304
+ endrec = list(struct.unpack(structEndArchive, recData))
305
+ commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
306
+ comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
307
+ endrec.append(comment)
308
+ endrec.append(maxCommentStart + start)
309
+
310
+ # Try to read the "Zip64 end of central directory" structure
311
+ return _EndRecData64(fpin, maxCommentStart + start - filesize,
312
+ endrec)
313
+
314
+ # Unable to find a valid end of central directory structure
315
+ return None
316
+
317
+
318
+ class ZipInfo (object):
319
+ """Class with attributes describing each file in the ZIP archive."""
320
+
321
+ __slots__ = (
322
+ 'orig_filename',
323
+ 'filename',
324
+ 'date_time',
325
+ 'compress_type',
326
+ '_compresslevel',
327
+ 'comment',
328
+ 'extra',
329
+ 'create_system',
330
+ 'create_version',
331
+ 'extract_version',
332
+ 'reserved',
333
+ 'flag_bits',
334
+ 'volume',
335
+ 'internal_attr',
336
+ 'external_attr',
337
+ 'header_offset',
338
+ 'CRC',
339
+ 'compress_size',
340
+ 'file_size',
341
+ '_raw_time',
342
+ )
343
+
344
+ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
345
+ self.orig_filename = filename # Original file name in archive
346
+
347
+ # Terminate the file name at the first null byte. Null bytes in file
348
+ # names are used as tricks by viruses in archives.
349
+ null_byte = filename.find(chr(0))
350
+ if null_byte >= 0:
351
+ filename = filename[0:null_byte]
352
+ # This is used to ensure paths in generated ZIP files always use
353
+ # forward slashes as the directory separator, as required by the
354
+ # ZIP format specification.
355
+ if os.sep != "/" and os.sep in filename:
356
+ filename = filename.replace(os.sep, "/")
357
+
358
+ self.filename = filename # Normalized file name
359
+ self.date_time = date_time # year, month, day, hour, min, sec
360
+
361
+ if date_time[0] < 1980:
362
+ raise ValueError('ZIP does not support timestamps before 1980')
363
+
364
+ # Standard values:
365
+ self.compress_type = ZIP_STORED # Type of compression for the file
366
+ self._compresslevel = None # Level for the compressor
367
+ self.comment = b"" # Comment for each file
368
+ self.extra = b"" # ZIP extra data
369
+ if sys.platform == 'win32':
370
+ self.create_system = 0 # System which created ZIP archive
371
+ else:
372
+ # Assume everything else is unix-y
373
+ self.create_system = 3 # System which created ZIP archive
374
+ self.create_version = DEFAULT_VERSION # Version which created ZIP archive
375
+ self.extract_version = DEFAULT_VERSION # Version needed to extract archive
376
+ self.reserved = 0 # Must be zero
377
+ self.flag_bits = 0 # ZIP flag bits
378
+ self.volume = 0 # Volume number of file header
379
+ self.internal_attr = 0 # Internal attributes
380
+ self.external_attr = 0 # External file attributes
381
+ # Other attributes are set by class ZipFile:
382
+ # header_offset Byte offset to the file header
383
+ # CRC CRC-32 of the uncompressed file
384
+ # compress_size Size of the compressed file
385
+ # file_size Size of the uncompressed file
386
+
387
+ def __repr__(self):
388
+ result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
389
+ if self.compress_type != ZIP_STORED:
390
+ result.append(' compress_type=%s' %
391
+ compressor_names.get(self.compress_type,
392
+ self.compress_type))
393
+ hi = self.external_attr >> 16
394
+ lo = self.external_attr & 0xFFFF
395
+ if hi:
396
+ result.append(' filemode=%r' % stat.filemode(hi))
397
+ if lo:
398
+ result.append(' external_attr=%#x' % lo)
399
+ isdir = self.is_dir()
400
+ if not isdir or self.file_size:
401
+ result.append(' file_size=%r' % self.file_size)
402
+ if ((not isdir or self.compress_size) and
403
+ (self.compress_type != ZIP_STORED or
404
+ self.file_size != self.compress_size)):
405
+ result.append(' compress_size=%r' % self.compress_size)
406
+ result.append('>')
407
+ return ''.join(result)
408
+
409
+ def FileHeader(self, zip64=None):
410
+ """Return the per-file header as a bytes object."""
411
+ dt = self.date_time
412
+ dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
413
+ dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
414
+ if self.flag_bits & 0x08:
415
+ # Set these to zero because we write them after the file data
416
+ CRC = compress_size = file_size = 0
417
+ else:
418
+ CRC = self.CRC
419
+ compress_size = self.compress_size
420
+ file_size = self.file_size
421
+
422
+ extra = self.extra
423
+
424
+ min_version = 0
425
+ if zip64 is None:
426
+ zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
427
+ if zip64:
428
+ fmt = '<HHQQ'
429
+ extra = extra + struct.pack(fmt,
430
+ 1, struct.calcsize(fmt)-4, file_size, compress_size)
431
+ if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
432
+ if not zip64:
433
+ raise LargeZipFile("Filesize would require ZIP64 extensions")
434
+ # File is larger than what fits into a 4 byte integer,
435
+ # fall back to the ZIP64 extension
436
+ file_size = 0xffffffff
437
+ compress_size = 0xffffffff
438
+ min_version = ZIP64_VERSION
439
+
440
+ if self.compress_type == ZIP_BZIP2:
441
+ min_version = max(BZIP2_VERSION, min_version)
442
+ elif self.compress_type == ZIP_LZMA:
443
+ min_version = max(LZMA_VERSION, min_version)
444
+
445
+ self.extract_version = max(min_version, self.extract_version)
446
+ self.create_version = max(min_version, self.create_version)
447
+ filename, flag_bits = self._encodeFilenameFlags()
448
+ header = struct.pack(structFileHeader, stringFileHeader,
449
+ self.extract_version, self.reserved, flag_bits,
450
+ self.compress_type, dostime, dosdate, CRC,
451
+ compress_size, file_size,
452
+ len(filename), len(extra))
453
+ return header + filename + extra
454
+
455
+ def _encodeFilenameFlags(self):
456
+ try:
457
+ return self.filename.encode('ascii'), self.flag_bits
458
+ except UnicodeEncodeError:
459
+ return self.filename.encode('utf-8'), self.flag_bits | 0x800
460
+
461
+ def _decodeExtra(self):
462
+ # Try to decode the extra field.
463
+ extra = self.extra
464
+ unpack = struct.unpack
465
+ while len(extra) >= 4:
466
+ tp, ln = unpack('<HH', extra[:4])
467
+ if ln+4 > len(extra):
468
+ raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
469
+ if tp == 0x0001:
470
+ if ln >= 24:
471
+ counts = unpack('<QQQ', extra[4:28])
472
+ elif ln == 16:
473
+ counts = unpack('<QQ', extra[4:20])
474
+ elif ln == 8:
475
+ counts = unpack('<Q', extra[4:12])
476
+ elif ln == 0:
477
+ counts = ()
478
+ else:
479
+ raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
480
+
481
+ idx = 0
482
+
483
+ # ZIP64 extension (large files and/or large archives)
484
+ if self.file_size in (0xffffffffffffffff, 0xffffffff):
485
+ if len(counts) <= idx:
486
+ raise BadZipFile(
487
+ "Corrupt zip64 extra field. File size not found."
488
+ )
489
+ self.file_size = counts[idx]
490
+ idx += 1
491
+
492
+ if self.compress_size == 0xFFFFFFFF:
493
+ if len(counts) <= idx:
494
+ raise BadZipFile(
495
+ "Corrupt zip64 extra field. Compress size not found."
496
+ )
497
+ self.compress_size = counts[idx]
498
+ idx += 1
499
+
500
+ if self.header_offset == 0xffffffff:
501
+ if len(counts) <= idx:
502
+ raise BadZipFile(
503
+ "Corrupt zip64 extra field. Header offset not found."
504
+ )
505
+ old = self.header_offset
506
+ self.header_offset = counts[idx]
507
+ idx+=1
508
+
509
+ extra = extra[ln+4:]
510
+
511
+ @classmethod
512
+ def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
513
+ """Construct an appropriate ZipInfo for a file on the filesystem.
514
+
515
+ filename should be the path to a file or directory on the filesystem.
516
+
517
+ arcname is the name which it will have within the archive (by default,
518
+ this will be the same as filename, but without a drive letter and with
519
+ leading path separators removed).
520
+ """
521
+ if isinstance(filename, os.PathLike):
522
+ filename = os.fspath(filename)
523
+ st = os.stat(filename)
524
+ isdir = stat.S_ISDIR(st.st_mode)
525
+ mtime = time.localtime(st.st_mtime)
526
+ date_time = mtime[0:6]
527
+ if not strict_timestamps and date_time[0] < 1980:
528
+ date_time = (1980, 1, 1, 0, 0, 0)
529
+ elif not strict_timestamps and date_time[0] > 2107:
530
+ date_time = (2107, 12, 31, 23, 59, 59)
531
+ # Create ZipInfo instance to store file information
532
+ if arcname is None:
533
+ arcname = filename
534
+ arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
535
+ while arcname[0] in (os.sep, os.altsep):
536
+ arcname = arcname[1:]
537
+ if isdir:
538
+ arcname += '/'
539
+ zinfo = cls(arcname, date_time)
540
+ zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
541
+ if isdir:
542
+ zinfo.file_size = 0
543
+ zinfo.external_attr |= 0x10 # MS-DOS directory flag
544
+ else:
545
+ zinfo.file_size = st.st_size
546
+
547
+ return zinfo
548
+
549
+ def is_dir(self):
550
+ """Return True if this archive member is a directory."""
551
+ return self.filename[-1] == '/'
552
+
553
+
554
+ # ZIP encryption uses the CRC32 one-byte primitive for scrambling some
555
+ # internal keys. We noticed that a direct implementation is faster than
556
+ # relying on binascii.crc32().
557
+
558
+ _crctable = None
559
+ def _gen_crc(crc):
560
+ for j in range(8):
561
+ if crc & 1:
562
+ crc = (crc >> 1) ^ 0xEDB88320
563
+ else:
564
+ crc >>= 1
565
+ return crc
566
+
567
+ # ZIP supports a password-based form of encryption. Even though known
568
+ # plaintext attacks have been found against it, it is still useful
569
+ # to be able to get data out of such a file.
570
+ #
571
+ # Usage:
572
+ # zd = _ZipDecrypter(mypwd)
573
+ # plain_bytes = zd(cypher_bytes)
574
+
575
+ def _ZipDecrypter(pwd):
576
+ key0 = 305419896
577
+ key1 = 591751049
578
+ key2 = 878082192
579
+
580
+ global _crctable
581
+ if _crctable is None:
582
+ _crctable = list(map(_gen_crc, range(256)))
583
+ crctable = _crctable
584
+
585
+ def crc32(ch, crc):
586
+ """Compute the CRC32 primitive on one byte."""
587
+ return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
588
+
589
+ def update_keys(c):
590
+ nonlocal key0, key1, key2
591
+ key0 = crc32(c, key0)
592
+ key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
593
+ key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
594
+ key2 = crc32(key1 >> 24, key2)
595
+
596
+ for p in pwd:
597
+ update_keys(p)
598
+
599
+ def decrypter(data):
600
+ """Decrypt a bytes object."""
601
+ result = bytearray()
602
+ append = result.append
603
+ for c in data:
604
+ k = key2 | 2
605
+ c ^= ((k * (k^1)) >> 8) & 0xFF
606
+ update_keys(c)
607
+ append(c)
608
+ return bytes(result)
609
+
610
+ return decrypter
611
+
612
+
613
+ class LZMACompressor:
614
+
615
+ def __init__(self):
616
+ self._comp = None
617
+
618
+ def _init(self):
619
+ props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
620
+ self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
621
+ lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
622
+ ])
623
+ return struct.pack('<BBH', 9, 4, len(props)) + props
624
+
625
+ def compress(self, data):
626
+ if self._comp is None:
627
+ return self._init() + self._comp.compress(data)
628
+ return self._comp.compress(data)
629
+
630
+ def flush(self):
631
+ if self._comp is None:
632
+ return self._init() + self._comp.flush()
633
+ return self._comp.flush()
634
+
635
+
636
+ class LZMADecompressor:
637
+
638
+ def __init__(self):
639
+ self._decomp = None
640
+ self._unconsumed = b''
641
+ self.eof = False
642
+
643
+ def decompress(self, data):
644
+ if self._decomp is None:
645
+ self._unconsumed += data
646
+ if len(self._unconsumed) <= 4:
647
+ return b''
648
+ psize, = struct.unpack('<H', self._unconsumed[2:4])
649
+ if len(self._unconsumed) <= 4 + psize:
650
+ return b''
651
+
652
+ self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
653
+ lzma._decode_filter_properties(lzma.FILTER_LZMA1,
654
+ self._unconsumed[4:4 + psize])
655
+ ])
656
+ data = self._unconsumed[4 + psize:]
657
+ del self._unconsumed
658
+
659
+ result = self._decomp.decompress(data)
660
+ self.eof = self._decomp.eof
661
+ return result
662
+
663
+
664
+ compressor_names = {
665
+ 0: 'store',
666
+ 1: 'shrink',
667
+ 2: 'reduce',
668
+ 3: 'reduce',
669
+ 4: 'reduce',
670
+ 5: 'reduce',
671
+ 6: 'implode',
672
+ 7: 'tokenize',
673
+ 8: 'deflate',
674
+ 9: 'deflate64',
675
+ 10: 'implode',
676
+ 12: 'bzip2',
677
+ 14: 'lzma',
678
+ 18: 'terse',
679
+ 19: 'lz77',
680
+ 97: 'wavpack',
681
+ 98: 'ppmd',
682
+ }
683
+
684
+ def _check_compression(compression):
685
+ if compression == ZIP_STORED:
686
+ pass
687
+ elif compression == ZIP_DEFLATED:
688
+ if not zlib:
689
+ raise RuntimeError(
690
+ "Compression requires the (missing) zlib module")
691
+ elif compression == ZIP_BZIP2:
692
+ if not bz2:
693
+ raise RuntimeError(
694
+ "Compression requires the (missing) bz2 module")
695
+ elif compression == ZIP_LZMA:
696
+ if not lzma:
697
+ raise RuntimeError(
698
+ "Compression requires the (missing) lzma module")
699
+ else:
700
+ raise NotImplementedError("That compression method is not supported")
701
+
702
+
703
+ def _get_compressor(compress_type, compresslevel=None):
704
+ if compress_type == ZIP_DEFLATED:
705
+ if compresslevel is not None:
706
+ return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
707
+ return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
708
+ elif compress_type == ZIP_BZIP2:
709
+ if compresslevel is not None:
710
+ return bz2.BZ2Compressor(compresslevel)
711
+ return bz2.BZ2Compressor()
712
+ # compresslevel is ignored for ZIP_LZMA
713
+ elif compress_type == ZIP_LZMA:
714
+ return LZMACompressor()
715
+ else:
716
+ return None
717
+
718
+
719
+ def _get_decompressor(compress_type):
720
+ _check_compression(compress_type)
721
+ if compress_type == ZIP_STORED:
722
+ return None
723
+ elif compress_type == ZIP_DEFLATED:
724
+ return zlib.decompressobj(-15)
725
+ elif compress_type == ZIP_BZIP2:
726
+ return bz2.BZ2Decompressor()
727
+ elif compress_type == ZIP_LZMA:
728
+ return LZMADecompressor()
729
+ else:
730
+ descr = compressor_names.get(compress_type)
731
+ if descr:
732
+ raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
733
+ else:
734
+ raise NotImplementedError("compression type %d" % (compress_type,))
735
+
736
+
737
+ class _SharedFile:
738
+ def __init__(self, file, pos, close, lock, writing):
739
+ self._file = file
740
+ self._pos = pos
741
+ self._close = close
742
+ self._lock = lock
743
+ self._writing = writing
744
+ self.seekable = file.seekable
745
+ self.tell = file.tell
746
+
747
+ def seek(self, offset, whence=0):
748
+ with self._lock:
749
+ if self._writing():
750
+ raise ValueError("Can't reposition in the ZIP file while "
751
+ "there is an open writing handle on it. "
752
+ "Close the writing handle before trying to read.")
753
+ self._file.seek(offset, whence)
754
+ self._pos = self._file.tell()
755
+ return self._pos
756
+
757
+ def read(self, n=-1):
758
+ with self._lock:
759
+ if self._writing():
760
+ raise ValueError("Can't read from the ZIP file while there "
761
+ "is an open writing handle on it. "
762
+ "Close the writing handle before trying to read.")
763
+ self._file.seek(self._pos)
764
+ data = self._file.read(n)
765
+ self._pos = self._file.tell()
766
+ return data
767
+
768
+ def close(self):
769
+ if self._file is not None:
770
+ fileobj = self._file
771
+ self._file = None
772
+ self._close(fileobj)
773
+
774
+ # Provide the tell method for unseekable stream
775
+ class _Tellable:
776
+ def __init__(self, fp):
777
+ self.fp = fp
778
+ self.offset = 0
779
+
780
+ def write(self, data):
781
+ n = self.fp.write(data)
782
+ self.offset += n
783
+ return n
784
+
785
+ def tell(self):
786
+ return self.offset
787
+
788
+ def flush(self):
789
+ self.fp.flush()
790
+
791
+ def close(self):
792
+ self.fp.close()
793
+
794
+
795
+ class ZipExtFile(io.BufferedIOBase):
796
+ """File-like object for reading an archive member.
797
+ Is returned by ZipFile.open().
798
+ """
799
+
800
+ # Max size supported by decompressor.
801
+ MAX_N = 1 << 31 - 1
802
+
803
+ # Read from compressed files in 4k blocks.
804
+ MIN_READ_SIZE = 4096
805
+
806
+ # Chunk size to read during seek
807
+ MAX_SEEK_READ = 1 << 24
808
+
809
+ def __init__(self, fileobj, mode, zipinfo, pwd=None,
810
+ close_fileobj=False):
811
+ self._fileobj = fileobj
812
+ self._pwd = pwd
813
+ self._close_fileobj = close_fileobj
814
+
815
+ self._compress_type = zipinfo.compress_type
816
+ self._compress_left = zipinfo.compress_size
817
+ self._left = zipinfo.file_size
818
+
819
+ self._decompressor = _get_decompressor(self._compress_type)
820
+
821
+ self._eof = False
822
+ self._readbuffer = b''
823
+ self._offset = 0
824
+
825
+ self.newlines = None
826
+
827
+ self.mode = mode
828
+ self.name = zipinfo.filename
829
+
830
+ if hasattr(zipinfo, 'CRC'):
831
+ self._expected_crc = zipinfo.CRC
832
+ self._running_crc = crc32(b'')
833
+ else:
834
+ self._expected_crc = None
835
+
836
+ self._seekable = False
837
+ try:
838
+ if fileobj.seekable():
839
+ self._orig_compress_start = fileobj.tell()
840
+ self._orig_compress_size = zipinfo.compress_size
841
+ self._orig_file_size = zipinfo.file_size
842
+ self._orig_start_crc = self._running_crc
843
+ self._seekable = True
844
+ except AttributeError:
845
+ pass
846
+
847
+ self._decrypter = None
848
+ if pwd:
849
+ if zipinfo.flag_bits & 0x8:
850
+ # compare against the file type from extended local headers
851
+ check_byte = (zipinfo._raw_time >> 8) & 0xff
852
+ else:
853
+ # compare against the CRC otherwise
854
+ check_byte = (zipinfo.CRC >> 24) & 0xff
855
+ h = self._init_decrypter()
856
+ if h != check_byte:
857
+ raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
858
+
859
+
860
+ def _init_decrypter(self):
861
+ self._decrypter = _ZipDecrypter(self._pwd)
862
+ # The first 12 bytes in the cypher stream is an encryption header
863
+ # used to strengthen the algorithm. The first 11 bytes are
864
+ # completely random, while the 12th contains the MSB of the CRC,
865
+ # or the MSB of the file time depending on the header type
866
+ # and is used to check the correctness of the password.
867
+ header = self._fileobj.read(12)
868
+ self._compress_left -= 12
869
+ return self._decrypter(header)[11]
870
+
871
+ def __repr__(self):
872
+ result = ['<%s.%s' % (self.__class__.__module__,
873
+ self.__class__.__qualname__)]
874
+ if not self.closed:
875
+ result.append(' name=%r mode=%r' % (self.name, self.mode))
876
+ if self._compress_type != ZIP_STORED:
877
+ result.append(' compress_type=%s' %
878
+ compressor_names.get(self._compress_type,
879
+ self._compress_type))
880
+ else:
881
+ result.append(' [closed]')
882
+ result.append('>')
883
+ return ''.join(result)
884
+
885
+ def readline(self, limit=-1):
886
+ """Read and return a line from the stream.
887
+
888
+ If limit is specified, at most limit bytes will be read.
889
+ """
890
+
891
+ if limit < 0:
892
+ # Shortcut common case - newline found in buffer.
893
+ i = self._readbuffer.find(b'\n', self._offset) + 1
894
+ if i > 0:
895
+ line = self._readbuffer[self._offset: i]
896
+ self._offset = i
897
+ return line
898
+
899
+ return io.BufferedIOBase.readline(self, limit)
900
+
901
+ def peek(self, n=1):
902
+ """Returns buffered bytes without advancing the position."""
903
+ if n > len(self._readbuffer) - self._offset:
904
+ chunk = self.read(n)
905
+ if len(chunk) > self._offset:
906
+ self._readbuffer = chunk + self._readbuffer[self._offset:]
907
+ self._offset = 0
908
+ else:
909
+ self._offset -= len(chunk)
910
+
911
+ # Return up to 512 bytes to reduce allocation overhead for tight loops.
912
+ return self._readbuffer[self._offset: self._offset + 512]
913
+
914
+ def readable(self):
915
+ return True
916
+
917
+ def read(self, n=-1):
918
+ """Read and return up to n bytes.
919
+ If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
920
+ """
921
+ if n is None or n < 0:
922
+ buf = self._readbuffer[self._offset:]
923
+ self._readbuffer = b''
924
+ self._offset = 0
925
+ while not self._eof:
926
+ buf += self._read1(self.MAX_N)
927
+ return buf
928
+
929
+ end = n + self._offset
930
+ if end < len(self._readbuffer):
931
+ buf = self._readbuffer[self._offset:end]
932
+ self._offset = end
933
+ return buf
934
+
935
+ n = end - len(self._readbuffer)
936
+ buf = self._readbuffer[self._offset:]
937
+ self._readbuffer = b''
938
+ self._offset = 0
939
+ while n > 0 and not self._eof:
940
+ data = self._read1(n)
941
+ if n < len(data):
942
+ self._readbuffer = data
943
+ self._offset = n
944
+ buf += data[:n]
945
+ break
946
+ buf += data
947
+ n -= len(data)
948
+ return buf
949
+
950
+ def _update_crc(self, newdata):
951
+ # Update the CRC using the given data.
952
+ if self._expected_crc is None:
953
+ # No need to compute the CRC if we don't have a reference value
954
+ return
955
+ self._running_crc = crc32(newdata, self._running_crc)
956
+ # Check the CRC if we're at the end of the file
957
+ if self._eof and self._running_crc != self._expected_crc:
958
+ raise BadZipFile("Bad CRC-32 for file %r" % self.name)
959
+
960
+ def read1(self, n):
961
+ """Read up to n bytes with at most one read() system call."""
962
+
963
+ if n is None or n < 0:
964
+ buf = self._readbuffer[self._offset:]
965
+ self._readbuffer = b''
966
+ self._offset = 0
967
+ while not self._eof:
968
+ data = self._read1(self.MAX_N)
969
+ if data:
970
+ buf += data
971
+ break
972
+ return buf
973
+
974
+ end = n + self._offset
975
+ if end < len(self._readbuffer):
976
+ buf = self._readbuffer[self._offset:end]
977
+ self._offset = end
978
+ return buf
979
+
980
+ n = end - len(self._readbuffer)
981
+ buf = self._readbuffer[self._offset:]
982
+ self._readbuffer = b''
983
+ self._offset = 0
984
+ if n > 0:
985
+ while not self._eof:
986
+ data = self._read1(n)
987
+ if n < len(data):
988
+ self._readbuffer = data
989
+ self._offset = n
990
+ buf += data[:n]
991
+ break
992
+ if data:
993
+ buf += data
994
+ break
995
+ return buf
996
+
997
+ def _read1(self, n):
998
+ # Read up to n compressed bytes with at most one read() system call,
999
+ # decrypt and decompress them.
1000
+ if self._eof or n <= 0:
1001
+ return b''
1002
+
1003
+ # Read from file.
1004
+ if self._compress_type == ZIP_DEFLATED:
1005
+ ## Handle unconsumed data.
1006
+ data = self._decompressor.unconsumed_tail
1007
+ if n > len(data):
1008
+ data += self._read2(n - len(data))
1009
+ else:
1010
+ data = self._read2(n)
1011
+
1012
+ if self._compress_type == ZIP_STORED:
1013
+ self._eof = self._compress_left <= 0
1014
+ elif self._compress_type == ZIP_DEFLATED:
1015
+ n = max(n, self.MIN_READ_SIZE)
1016
+ data = self._decompressor.decompress(data, n)
1017
+ self._eof = (self._decompressor.eof or
1018
+ self._compress_left <= 0 and
1019
+ not self._decompressor.unconsumed_tail)
1020
+ if self._eof:
1021
+ data += self._decompressor.flush()
1022
+ else:
1023
+ data = self._decompressor.decompress(data)
1024
+ self._eof = self._decompressor.eof or self._compress_left <= 0
1025
+
1026
+ data = data[:self._left]
1027
+ self._left -= len(data)
1028
+ if self._left <= 0:
1029
+ self._eof = True
1030
+ self._update_crc(data)
1031
+ return data
1032
+
1033
+ def _read2(self, n):
1034
+ if self._compress_left <= 0:
1035
+ return b''
1036
+
1037
+ n = max(n, self.MIN_READ_SIZE)
1038
+ n = min(n, self._compress_left)
1039
+
1040
+ data = self._fileobj.read(n)
1041
+ self._compress_left -= len(data)
1042
+ if not data:
1043
+ raise EOFError
1044
+
1045
+ if self._decrypter is not None:
1046
+ data = self._decrypter(data)
1047
+ return data
1048
+
1049
+ def close(self):
1050
+ try:
1051
+ if self._close_fileobj:
1052
+ self._fileobj.close()
1053
+ finally:
1054
+ super().close()
1055
+
1056
+ def seekable(self):
1057
+ return self._seekable
1058
+
1059
+ def seek(self, offset, whence=0):
1060
+ if not self._seekable:
1061
+ raise io.UnsupportedOperation("underlying stream is not seekable")
1062
+ curr_pos = self.tell()
1063
+ if whence == 0: # Seek from start of file
1064
+ new_pos = offset
1065
+ elif whence == 1: # Seek from current position
1066
+ new_pos = curr_pos + offset
1067
+ elif whence == 2: # Seek from EOF
1068
+ new_pos = self._orig_file_size + offset
1069
+ else:
1070
+ raise ValueError("whence must be os.SEEK_SET (0), "
1071
+ "os.SEEK_CUR (1), or os.SEEK_END (2)")
1072
+
1073
+ if new_pos > self._orig_file_size:
1074
+ new_pos = self._orig_file_size
1075
+
1076
+ if new_pos < 0:
1077
+ new_pos = 0
1078
+
1079
+ read_offset = new_pos - curr_pos
1080
+ buff_offset = read_offset + self._offset
1081
+
1082
+ if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1083
+ # Just move the _offset index if the new position is in the _readbuffer
1084
+ self._offset = buff_offset
1085
+ read_offset = 0
1086
+ elif read_offset < 0:
1087
+ # Position is before the current position. Reset the ZipExtFile
1088
+ self._fileobj.seek(self._orig_compress_start)
1089
+ self._running_crc = self._orig_start_crc
1090
+ self._compress_left = self._orig_compress_size
1091
+ self._left = self._orig_file_size
1092
+ self._readbuffer = b''
1093
+ self._offset = 0
1094
+ self._decompressor = _get_decompressor(self._compress_type)
1095
+ self._eof = False
1096
+ read_offset = new_pos
1097
+ if self._decrypter is not None:
1098
+ self._init_decrypter()
1099
+
1100
+ while read_offset > 0:
1101
+ read_len = min(self.MAX_SEEK_READ, read_offset)
1102
+ self.read(read_len)
1103
+ read_offset -= read_len
1104
+
1105
+ return self.tell()
1106
+
1107
+ def tell(self):
1108
+ if not self._seekable:
1109
+ raise io.UnsupportedOperation("underlying stream is not seekable")
1110
+ filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1111
+ return filepos
1112
+
1113
+
1114
+ class _ZipWriteFile(io.BufferedIOBase):
1115
+ def __init__(self, zf, zinfo, zip64):
1116
+ self._zinfo = zinfo
1117
+ self._zip64 = zip64
1118
+ self._zipfile = zf
1119
+ self._compressor = _get_compressor(zinfo.compress_type,
1120
+ zinfo._compresslevel)
1121
+ self._file_size = 0
1122
+ self._compress_size = 0
1123
+ self._crc = 0
1124
+
1125
+ @property
1126
+ def _fileobj(self):
1127
+ return self._zipfile.fp
1128
+
1129
+ def writable(self):
1130
+ return True
1131
+
1132
+ def write(self, data):
1133
+ if self.closed:
1134
+ raise ValueError('I/O operation on closed file.')
1135
+ nbytes = len(data)
1136
+ self._file_size += nbytes
1137
+ self._crc = crc32(data, self._crc)
1138
+ if self._compressor:
1139
+ data = self._compressor.compress(data)
1140
+ self._compress_size += len(data)
1141
+ self._fileobj.write(data)
1142
+ return nbytes
1143
+
1144
+ def close(self):
1145
+ if self.closed:
1146
+ return
1147
+ try:
1148
+ super().close()
1149
+ # Flush any data from the compressor, and update header info
1150
+ if self._compressor:
1151
+ buf = self._compressor.flush()
1152
+ self._compress_size += len(buf)
1153
+ self._fileobj.write(buf)
1154
+ self._zinfo.compress_size = self._compress_size
1155
+ else:
1156
+ self._zinfo.compress_size = self._file_size
1157
+ self._zinfo.CRC = self._crc
1158
+ self._zinfo.file_size = self._file_size
1159
+
1160
+ # Write updated header info
1161
+ if self._zinfo.flag_bits & 0x08:
1162
+ # Write CRC and file sizes after the file data
1163
+ fmt = '<LLQQ' if self._zip64 else '<LLLL'
1164
+ self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1165
+ self._zinfo.compress_size, self._zinfo.file_size))
1166
+ self._zipfile.start_dir = self._fileobj.tell()
1167
+ else:
1168
+ if not self._zip64:
1169
+ if self._file_size > ZIP64_LIMIT:
1170
+ raise RuntimeError(
1171
+ 'File size unexpectedly exceeded ZIP64 limit')
1172
+ if self._compress_size > ZIP64_LIMIT:
1173
+ raise RuntimeError(
1174
+ 'Compressed size unexpectedly exceeded ZIP64 limit')
1175
+ # Seek backwards and write file header (which will now include
1176
+ # correct CRC and file sizes)
1177
+
1178
+ # Preserve current position in file
1179
+ self._zipfile.start_dir = self._fileobj.tell()
1180
+ self._fileobj.seek(self._zinfo.header_offset)
1181
+ self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1182
+ self._fileobj.seek(self._zipfile.start_dir)
1183
+
1184
+ # Successfully written: Add file to our caches
1185
+ self._zipfile.filelist.append(self._zinfo)
1186
+ self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1187
+ finally:
1188
+ self._zipfile._writing = False
1189
+
1190
+
1191
+
1192
+ class ZipFile:
1193
+ """ Class with methods to open, read, write, close, list zip files.
1194
+
1195
+ z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1196
+ compresslevel=None)
1197
+
1198
+ file: Either the path to the file, or a file-like object.
1199
+ If it is a path, the file will be opened and closed by ZipFile.
1200
+ mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1201
+ or append 'a'.
1202
+ compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1203
+ ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1204
+ allowZip64: if True ZipFile will create files with ZIP64 extensions when
1205
+ needed, otherwise it will raise an exception when this would
1206
+ be necessary.
1207
+ compresslevel: None (default for the given compression type) or an integer
1208
+ specifying the level to pass to the compressor.
1209
+ When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1210
+ When using ZIP_DEFLATED integers 0 through 9 are accepted.
1211
+ When using ZIP_BZIP2 integers 1 through 9 are accepted.
1212
+
1213
+ """
1214
+
1215
+ fp = None # Set here since __del__ checks it
1216
+ _windows_illegal_name_trans_table = None
1217
+
1218
+ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1219
+ compresslevel=None, *, strict_timestamps=True):
1220
+ """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1221
+ or append 'a'."""
1222
+ if mode not in ('r', 'w', 'x', 'a'):
1223
+ raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1224
+
1225
+ _check_compression(compression)
1226
+
1227
+ self._allowZip64 = allowZip64
1228
+ self._didModify = False
1229
+ self.debug = 0 # Level of printing: 0 through 3
1230
+ self.NameToInfo = {} # Find file info given name
1231
+ self.filelist = [] # List of ZipInfo instances for archive
1232
+ self.compression = compression # Method of compression
1233
+ self.compresslevel = compresslevel
1234
+ self.mode = mode
1235
+ self.pwd = None
1236
+ self._comment = b''
1237
+ self._strict_timestamps = strict_timestamps
1238
+
1239
+ # Check if we were passed a file-like object
1240
+ if isinstance(file, os.PathLike):
1241
+ file = os.fspath(file)
1242
+ if isinstance(file, str):
1243
+ # No, it's a filename
1244
+ self._filePassed = 0
1245
+ self.filename = file
1246
+ modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1247
+ 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1248
+ filemode = modeDict[mode]
1249
+ while True:
1250
+ try:
1251
+ self.fp = io.open(file, filemode)
1252
+ except OSError:
1253
+ if filemode in modeDict:
1254
+ filemode = modeDict[filemode]
1255
+ continue
1256
+ raise
1257
+ break
1258
+ else:
1259
+ self._filePassed = 1
1260
+ self.fp = file
1261
+ self.filename = getattr(file, 'name', None)
1262
+ self._fileRefCnt = 1
1263
+ self._lock = threading.RLock()
1264
+ self._seekable = True
1265
+ self._writing = False
1266
+
1267
+ try:
1268
+ if mode == 'r':
1269
+ self._RealGetContents()
1270
+ elif mode in ('w', 'x'):
1271
+ # set the modified flag so central directory gets written
1272
+ # even if no files are added to the archive
1273
+ self._didModify = True
1274
+ try:
1275
+ self.start_dir = self.fp.tell()
1276
+ except (AttributeError, OSError):
1277
+ self.fp = _Tellable(self.fp)
1278
+ self.start_dir = 0
1279
+ self._seekable = False
1280
+ else:
1281
+ # Some file-like objects can provide tell() but not seek()
1282
+ try:
1283
+ self.fp.seek(self.start_dir)
1284
+ except (AttributeError, OSError):
1285
+ self._seekable = False
1286
+ elif mode == 'a':
1287
+ try:
1288
+ # See if file is a zip file
1289
+ self._RealGetContents()
1290
+ # seek to start of directory and overwrite
1291
+ self.fp.seek(self.start_dir)
1292
+ except BadZipFile:
1293
+ # file is not a zip file, just append
1294
+ self.fp.seek(0, 2)
1295
+
1296
+ # set the modified flag so central directory gets written
1297
+ # even if no files are added to the archive
1298
+ self._didModify = True
1299
+ self.start_dir = self.fp.tell()
1300
+ else:
1301
+ raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1302
+ except:
1303
+ fp = self.fp
1304
+ self.fp = None
1305
+ self._fpclose(fp)
1306
+ raise
1307
+
1308
+ def __enter__(self):
1309
+ return self
1310
+
1311
+ def __exit__(self, type, value, traceback):
1312
+ self.close()
1313
+
1314
+ def __repr__(self):
1315
+ result = ['<%s.%s' % (self.__class__.__module__,
1316
+ self.__class__.__qualname__)]
1317
+ if self.fp is not None:
1318
+ if self._filePassed:
1319
+ result.append(' file=%r' % self.fp)
1320
+ elif self.filename is not None:
1321
+ result.append(' filename=%r' % self.filename)
1322
+ result.append(' mode=%r' % self.mode)
1323
+ else:
1324
+ result.append(' [closed]')
1325
+ result.append('>')
1326
+ return ''.join(result)
1327
+
1328
+ def _RealGetContents(self):
1329
+ """Read in the table of contents for the ZIP file."""
1330
+ fp = self.fp
1331
+ try:
1332
+ endrec = _EndRecData(fp)
1333
+ except OSError:
1334
+ raise BadZipFile("File is not a zip file")
1335
+ if not endrec:
1336
+ raise BadZipFile("File is not a zip file")
1337
+ if self.debug > 1:
1338
+ print(endrec)
1339
+ size_cd = endrec[_ECD_SIZE] # bytes in central directory
1340
+ offset_cd = endrec[_ECD_OFFSET] # offset of central directory
1341
+ self._comment = endrec[_ECD_COMMENT] # archive comment
1342
+
1343
+ # "concat" is zero, unless zip was concatenated to another file
1344
+ concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1345
+ if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1346
+ # If Zip64 extension structures are present, account for them
1347
+ concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1348
+
1349
+ if self.debug > 2:
1350
+ inferred = concat + offset_cd
1351
+ print("given, inferred, offset", offset_cd, inferred, concat)
1352
+ # self.start_dir: Position of start of central directory
1353
+ self.start_dir = offset_cd + concat
1354
+ fp.seek(self.start_dir, 0)
1355
+ data = fp.read(size_cd)
1356
+ fp = io.BytesIO(data)
1357
+ total = 0
1358
+ while total < size_cd:
1359
+ centdir = fp.read(sizeCentralDir)
1360
+ if len(centdir) != sizeCentralDir:
1361
+ raise BadZipFile("Truncated central directory")
1362
+ centdir = struct.unpack(structCentralDir, centdir)
1363
+ if centdir[_CD_SIGNATURE] != stringCentralDir:
1364
+ raise BadZipFile("Bad magic number for central directory")
1365
+ if self.debug > 2:
1366
+ print(centdir)
1367
+ filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1368
+ flags = centdir[5]
1369
+ if flags & 0x800:
1370
+ # UTF-8 file names extension
1371
+ filename = filename.decode('utf-8')
1372
+ else:
1373
+ # Historical ZIP filename encoding
1374
+ filename = filename.decode('gbk')
1375
+ # Create ZipInfo instance to store file information
1376
+ x = ZipInfo(filename)
1377
+ x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1378
+ x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1379
+ x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1380
+ (x.create_version, x.create_system, x.extract_version, x.reserved,
1381
+ x.flag_bits, x.compress_type, t, d,
1382
+ x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1383
+ if x.extract_version > MAX_EXTRACT_VERSION:
1384
+ raise NotImplementedError("zip file version %.1f" %
1385
+ (x.extract_version / 10))
1386
+ x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1387
+ # Convert date/time code to (year, month, day, hour, min, sec)
1388
+ x._raw_time = t
1389
+ x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1390
+ t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1391
+
1392
+ x._decodeExtra()
1393
+ x.header_offset = x.header_offset + concat
1394
+ self.filelist.append(x)
1395
+ self.NameToInfo[x.filename] = x
1396
+
1397
+ # update total bytes read from central directory
1398
+ total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1399
+ + centdir[_CD_EXTRA_FIELD_LENGTH]
1400
+ + centdir[_CD_COMMENT_LENGTH])
1401
+
1402
+ if self.debug > 2:
1403
+ print("total", total)
1404
+
1405
+
1406
+ def namelist(self):
1407
+ """Return a list of file names in the archive."""
1408
+ return [data.filename for data in self.filelist]
1409
+
1410
+ def infolist(self):
1411
+ """Return a list of class ZipInfo instances for files in the
1412
+ archive."""
1413
+ return self.filelist
1414
+
1415
+ def printdir(self, file=None):
1416
+ """Print a table of contents for the zip file."""
1417
+ print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1418
+ file=file)
1419
+ for zinfo in self.filelist:
1420
+ date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1421
+ print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1422
+ file=file)
1423
+
1424
+ def testzip(self):
1425
+ """Read all the files and check the CRC."""
1426
+ chunk_size = 2 ** 20
1427
+ for zinfo in self.filelist:
1428
+ try:
1429
+ # Read by chunks, to avoid an OverflowError or a
1430
+ # MemoryError with very large embedded files.
1431
+ with self.open(zinfo.filename, "r") as f:
1432
+ while f.read(chunk_size): # Check CRC-32
1433
+ pass
1434
+ except BadZipFile:
1435
+ return zinfo.filename
1436
+
1437
+ def getinfo(self, name):
1438
+ """Return the instance of ZipInfo given 'name'."""
1439
+ info = self.NameToInfo.get(name)
1440
+ if info is None:
1441
+ raise KeyError(
1442
+ 'There is no item named %r in the archive' % name)
1443
+
1444
+ return info
1445
+
1446
+ def setpassword(self, pwd):
1447
+ """Set default password for encrypted files."""
1448
+ if pwd and not isinstance(pwd, bytes):
1449
+ raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1450
+ if pwd:
1451
+ self.pwd = pwd
1452
+ else:
1453
+ self.pwd = None
1454
+
1455
+ @property
1456
+ def comment(self):
1457
+ """The comment text associated with the ZIP file."""
1458
+ return self._comment
1459
+
1460
+ @comment.setter
1461
+ def comment(self, comment):
1462
+ if not isinstance(comment, bytes):
1463
+ raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1464
+ # check for valid comment length
1465
+ if len(comment) > ZIP_MAX_COMMENT:
1466
+ import warnings
1467
+ warnings.warn('Archive comment is too long; truncating to %d bytes'
1468
+ % ZIP_MAX_COMMENT, stacklevel=2)
1469
+ comment = comment[:ZIP_MAX_COMMENT]
1470
+ self._comment = comment
1471
+ self._didModify = True
1472
+
1473
+ def read(self, name, pwd=None):
1474
+ """Return file bytes for name."""
1475
+ with self.open(name, "r", pwd) as fp:
1476
+ return fp.read()
1477
+
1478
+ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1479
+ """Return file-like object for 'name'.
1480
+
1481
+ name is a string for the file name within the ZIP file, or a ZipInfo
1482
+ object.
1483
+
1484
+ mode should be 'r' to read a file already in the ZIP file, or 'w' to
1485
+ write to a file newly added to the archive.
1486
+
1487
+ pwd is the password to decrypt files (only used for reading).
1488
+
1489
+ When writing, if the file size is not known in advance but may exceed
1490
+ 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1491
+ files. If the size is known in advance, it is best to pass a ZipInfo
1492
+ instance for name, with zinfo.file_size set.
1493
+ """
1494
+ if mode not in {"r", "w"}:
1495
+ raise ValueError('open() requires mode "r" or "w"')
1496
+ if pwd and not isinstance(pwd, bytes):
1497
+ raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1498
+ if pwd and (mode == "w"):
1499
+ raise ValueError("pwd is only supported for reading files")
1500
+ if not self.fp:
1501
+ raise ValueError(
1502
+ "Attempt to use ZIP archive that was already closed")
1503
+
1504
+ # Make sure we have an info object
1505
+ if isinstance(name, ZipInfo):
1506
+ # 'name' is already an info object
1507
+ zinfo = name
1508
+ elif mode == 'w':
1509
+ zinfo = ZipInfo(name)
1510
+ zinfo.compress_type = self.compression
1511
+ zinfo._compresslevel = self.compresslevel
1512
+ else:
1513
+ # Get info object for name
1514
+ zinfo = self.getinfo(name)
1515
+
1516
+ if mode == 'w':
1517
+ return self._open_to_write(zinfo, force_zip64=force_zip64)
1518
+
1519
+ if self._writing:
1520
+ raise ValueError("Can't read from the ZIP file while there "
1521
+ "is an open writing handle on it. "
1522
+ "Close the writing handle before trying to read.")
1523
+
1524
+ # Open for reading:
1525
+ self._fileRefCnt += 1
1526
+ zef_file = _SharedFile(self.fp, zinfo.header_offset,
1527
+ self._fpclose, self._lock, lambda: self._writing)
1528
+ try:
1529
+ # Skip the file header:
1530
+ fheader = zef_file.read(sizeFileHeader)
1531
+ if len(fheader) != sizeFileHeader:
1532
+ raise BadZipFile("Truncated file header")
1533
+ fheader = struct.unpack(structFileHeader, fheader)
1534
+ if fheader[_FH_SIGNATURE] != stringFileHeader:
1535
+ raise BadZipFile("Bad magic number for file header")
1536
+
1537
+ fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1538
+ if fheader[_FH_EXTRA_FIELD_LENGTH]:
1539
+ zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1540
+
1541
+ if zinfo.flag_bits & 0x20:
1542
+ # Zip 2.7: compressed patched data
1543
+ raise NotImplementedError("compressed patched data (flag bit 5)")
1544
+
1545
+ if zinfo.flag_bits & 0x40:
1546
+ # strong encryption
1547
+ raise NotImplementedError("strong encryption (flag bit 6)")
1548
+
1549
+ if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
1550
+ # UTF-8 filename
1551
+ fname_str = fname.decode("utf-8")
1552
+ else:
1553
+ fname_str = fname.decode("gbk")
1554
+
1555
+ if fname_str != zinfo.orig_filename:
1556
+ raise BadZipFile(
1557
+ 'File name in directory %r and header %r differ.'
1558
+ % (zinfo.orig_filename, fname))
1559
+
1560
+ # check for encrypted flag & handle password
1561
+ is_encrypted = zinfo.flag_bits & 0x1
1562
+ if is_encrypted:
1563
+ if not pwd:
1564
+ pwd = self.pwd
1565
+ if not pwd:
1566
+ raise RuntimeError("File %r is encrypted, password "
1567
+ "required for extraction" % name)
1568
+ else:
1569
+ pwd = None
1570
+
1571
+ return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1572
+ except:
1573
+ zef_file.close()
1574
+ raise
1575
+
1576
+ def _open_to_write(self, zinfo, force_zip64=False):
1577
+ if force_zip64 and not self._allowZip64:
1578
+ raise ValueError(
1579
+ "force_zip64 is True, but allowZip64 was False when opening "
1580
+ "the ZIP file."
1581
+ )
1582
+ if self._writing:
1583
+ raise ValueError("Can't write to the ZIP file while there is "
1584
+ "another write handle open on it. "
1585
+ "Close the first handle before opening another.")
1586
+
1587
+ # Sizes and CRC are overwritten with correct data after processing the file
1588
+ if not hasattr(zinfo, 'file_size'):
1589
+ zinfo.file_size = 0
1590
+ zinfo.compress_size = 0
1591
+ zinfo.CRC = 0
1592
+
1593
+ zinfo.flag_bits = 0x00
1594
+ if zinfo.compress_type == ZIP_LZMA:
1595
+ # Compressed data includes an end-of-stream (EOS) marker
1596
+ zinfo.flag_bits |= 0x02
1597
+ if not self._seekable:
1598
+ zinfo.flag_bits |= 0x08
1599
+
1600
+ if not zinfo.external_attr:
1601
+ zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1602
+
1603
+ # Compressed size can be larger than uncompressed size
1604
+ zip64 = self._allowZip64 and \
1605
+ (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1606
+
1607
+ if self._seekable:
1608
+ self.fp.seek(self.start_dir)
1609
+ zinfo.header_offset = self.fp.tell()
1610
+
1611
+ self._writecheck(zinfo)
1612
+ self._didModify = True
1613
+
1614
+ self.fp.write(zinfo.FileHeader(zip64))
1615
+
1616
+ self._writing = True
1617
+ return _ZipWriteFile(self, zinfo, zip64)
1618
+
1619
+ def extract(self, member, path=None, pwd=None):
1620
+ """Extract a member from the archive to the current working directory,
1621
+ using its full name. Its file information is extracted as accurately
1622
+ as possible. `member' may be a filename or a ZipInfo object. You can
1623
+ specify a different directory using `path'.
1624
+ """
1625
+ if path is None:
1626
+ path = os.getcwd()
1627
+ else:
1628
+ path = os.fspath(path)
1629
+
1630
+ return self._extract_member(member, path, pwd)
1631
+
1632
+ def extractall(self, path=None, members=None, pwd=None):
1633
+ """Extract all members from the archive to the current working
1634
+ directory. `path' specifies a different directory to extract to.
1635
+ `members' is optional and must be a subset of the list returned
1636
+ by namelist().
1637
+ """
1638
+ if members is None:
1639
+ members = self.namelist()
1640
+
1641
+ if path is None:
1642
+ path = os.getcwd()
1643
+ else:
1644
+ path = os.fspath(path)
1645
+
1646
+ for zipinfo in members:
1647
+ self._extract_member(zipinfo, path, pwd)
1648
+
1649
+ @classmethod
1650
+ def _sanitize_windows_name(cls, arcname, pathsep):
1651
+ """Replace bad characters and remove trailing dots from parts."""
1652
+ table = cls._windows_illegal_name_trans_table
1653
+ if not table:
1654
+ illegal = ':<>|"?*'
1655
+ table = str.maketrans(illegal, '_' * len(illegal))
1656
+ cls._windows_illegal_name_trans_table = table
1657
+ arcname = arcname.translate(table)
1658
+ # remove trailing dots
1659
+ arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1660
+ # rejoin, removing empty parts.
1661
+ arcname = pathsep.join(x for x in arcname if x)
1662
+ return arcname
1663
+
1664
+ def _extract_member(self, member, targetpath, pwd):
1665
+ """Extract the ZipInfo object 'member' to a physical
1666
+ file on the path targetpath.
1667
+ """
1668
+ if not isinstance(member, ZipInfo):
1669
+ member = self.getinfo(member)
1670
+
1671
+ # build the destination pathname, replacing
1672
+ # forward slashes to platform specific separators.
1673
+ arcname = member.filename.replace('/', os.path.sep)
1674
+
1675
+ if os.path.altsep:
1676
+ arcname = arcname.replace(os.path.altsep, os.path.sep)
1677
+ # interpret absolute pathname as relative, remove drive letter or
1678
+ # UNC path, redundant separators, "." and ".." components.
1679
+ arcname = os.path.splitdrive(arcname)[1]
1680
+ invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1681
+ arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1682
+ if x not in invalid_path_parts)
1683
+ if os.path.sep == '\\':
1684
+ # filter illegal characters on Windows
1685
+ arcname = self._sanitize_windows_name(arcname, os.path.sep)
1686
+
1687
+ targetpath = os.path.join(targetpath, arcname)
1688
+ targetpath = os.path.normpath(targetpath)
1689
+
1690
+ # Create all upper directories if necessary.
1691
+ upperdirs = os.path.dirname(targetpath)
1692
+ if upperdirs and not os.path.exists(upperdirs):
1693
+ os.makedirs(upperdirs)
1694
+
1695
+ if member.is_dir():
1696
+ if not os.path.isdir(targetpath):
1697
+ os.mkdir(targetpath)
1698
+ return targetpath
1699
+
1700
+ with self.open(member, pwd=pwd) as source, \
1701
+ open(targetpath, "wb") as target:
1702
+ shutil.copyfileobj(source, target)
1703
+
1704
+ return targetpath
1705
+
1706
+ def _writecheck(self, zinfo):
1707
+ """Check for errors before writing a file to the archive."""
1708
+ if zinfo.filename in self.NameToInfo:
1709
+ import warnings
1710
+ warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1711
+ if self.mode not in ('w', 'x', 'a'):
1712
+ raise ValueError("write() requires mode 'w', 'x', or 'a'")
1713
+ if not self.fp:
1714
+ raise ValueError(
1715
+ "Attempt to write ZIP archive that was already closed")
1716
+ _check_compression(zinfo.compress_type)
1717
+ if not self._allowZip64:
1718
+ requires_zip64 = None
1719
+ if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1720
+ requires_zip64 = "Files count"
1721
+ elif zinfo.file_size > ZIP64_LIMIT:
1722
+ requires_zip64 = "Filesize"
1723
+ elif zinfo.header_offset > ZIP64_LIMIT:
1724
+ requires_zip64 = "Zipfile size"
1725
+ if requires_zip64:
1726
+ raise LargeZipFile(requires_zip64 +
1727
+ " would require ZIP64 extensions")
1728
+
1729
+ def write(self, filename, arcname=None,
1730
+ compress_type=None, compresslevel=None):
1731
+ """Put the bytes from filename into the archive under the name
1732
+ arcname."""
1733
+ if not self.fp:
1734
+ raise ValueError(
1735
+ "Attempt to write to ZIP archive that was already closed")
1736
+ if self._writing:
1737
+ raise ValueError(
1738
+ "Can't write to ZIP archive while an open writing handle exists"
1739
+ )
1740
+
1741
+ zinfo = ZipInfo.from_file(filename, arcname,
1742
+ strict_timestamps=self._strict_timestamps)
1743
+
1744
+ if zinfo.is_dir():
1745
+ zinfo.compress_size = 0
1746
+ zinfo.CRC = 0
1747
+ else:
1748
+ if compress_type is not None:
1749
+ zinfo.compress_type = compress_type
1750
+ else:
1751
+ zinfo.compress_type = self.compression
1752
+
1753
+ if compresslevel is not None:
1754
+ zinfo._compresslevel = compresslevel
1755
+ else:
1756
+ zinfo._compresslevel = self.compresslevel
1757
+
1758
+ if zinfo.is_dir():
1759
+ with self._lock:
1760
+ if self._seekable:
1761
+ self.fp.seek(self.start_dir)
1762
+ zinfo.header_offset = self.fp.tell() # Start of header bytes
1763
+ if zinfo.compress_type == ZIP_LZMA:
1764
+ # Compressed data includes an end-of-stream (EOS) marker
1765
+ zinfo.flag_bits |= 0x02
1766
+
1767
+ self._writecheck(zinfo)
1768
+ self._didModify = True
1769
+
1770
+ self.filelist.append(zinfo)
1771
+ self.NameToInfo[zinfo.filename] = zinfo
1772
+ self.fp.write(zinfo.FileHeader(False))
1773
+ self.start_dir = self.fp.tell()
1774
+ else:
1775
+ with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1776
+ shutil.copyfileobj(src, dest, 1024*8)
1777
+
1778
+ def writestr(self, zinfo_or_arcname, data,
1779
+ compress_type=None, compresslevel=None):
1780
+ """Write a file into the archive. The contents is 'data', which
1781
+ may be either a 'str' or a 'bytes' instance; if it is a 'str',
1782
+ it is encoded as UTF-8 first.
1783
+ 'zinfo_or_arcname' is either a ZipInfo instance or
1784
+ the name of the file in the archive."""
1785
+ if isinstance(data, str):
1786
+ data = data.encode("utf-8")
1787
+ if not isinstance(zinfo_or_arcname, ZipInfo):
1788
+ zinfo = ZipInfo(filename=zinfo_or_arcname,
1789
+ date_time=time.localtime(time.time())[:6])
1790
+ zinfo.compress_type = self.compression
1791
+ zinfo._compresslevel = self.compresslevel
1792
+ if zinfo.filename[-1] == '/':
1793
+ zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1794
+ zinfo.external_attr |= 0x10 # MS-DOS directory flag
1795
+ else:
1796
+ zinfo.external_attr = 0o600 << 16 # ?rw-------
1797
+ else:
1798
+ zinfo = zinfo_or_arcname
1799
+
1800
+ if not self.fp:
1801
+ raise ValueError(
1802
+ "Attempt to write to ZIP archive that was already closed")
1803
+ if self._writing:
1804
+ raise ValueError(
1805
+ "Can't write to ZIP archive while an open writing handle exists."
1806
+ )
1807
+
1808
+ if compress_type is not None:
1809
+ zinfo.compress_type = compress_type
1810
+
1811
+ if compresslevel is not None:
1812
+ zinfo._compresslevel = compresslevel
1813
+
1814
+ zinfo.file_size = len(data) # Uncompressed size
1815
+ with self._lock:
1816
+ with self.open(zinfo, mode='w') as dest:
1817
+ dest.write(data)
1818
+
1819
+ def __del__(self):
1820
+ """Call the "close()" method in case the user forgot."""
1821
+ self.close()
1822
+
1823
+ def close(self):
1824
+ """Close the file, and for mode 'w', 'x' and 'a' write the ending
1825
+ records."""
1826
+ if self.fp is None:
1827
+ return
1828
+
1829
+ if self._writing:
1830
+ raise ValueError("Can't close the ZIP file while there is "
1831
+ "an open writing handle on it. "
1832
+ "Close the writing handle before closing the zip.")
1833
+
1834
+ try:
1835
+ if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1836
+ with self._lock:
1837
+ if self._seekable:
1838
+ self.fp.seek(self.start_dir)
1839
+ self._write_end_record()
1840
+ finally:
1841
+ fp = self.fp
1842
+ self.fp = None
1843
+ self._fpclose(fp)
1844
+
1845
+ def _write_end_record(self):
1846
+ for zinfo in self.filelist: # write central directory
1847
+ dt = zinfo.date_time
1848
+ dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1849
+ dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1850
+ extra = []
1851
+ if zinfo.file_size > ZIP64_LIMIT \
1852
+ or zinfo.compress_size > ZIP64_LIMIT:
1853
+ extra.append(zinfo.file_size)
1854
+ extra.append(zinfo.compress_size)
1855
+ file_size = 0xffffffff
1856
+ compress_size = 0xffffffff
1857
+ else:
1858
+ file_size = zinfo.file_size
1859
+ compress_size = zinfo.compress_size
1860
+
1861
+ if zinfo.header_offset > ZIP64_LIMIT:
1862
+ extra.append(zinfo.header_offset)
1863
+ header_offset = 0xffffffff
1864
+ else:
1865
+ header_offset = zinfo.header_offset
1866
+
1867
+ extra_data = zinfo.extra
1868
+ min_version = 0
1869
+ if extra:
1870
+ # Append a ZIP64 field to the extra's
1871
+ extra_data = _strip_extra(extra_data, (1,))
1872
+ extra_data = struct.pack(
1873
+ '<HH' + 'Q'*len(extra),
1874
+ 1, 8*len(extra), *extra) + extra_data
1875
+
1876
+ min_version = ZIP64_VERSION
1877
+
1878
+ if zinfo.compress_type == ZIP_BZIP2:
1879
+ min_version = max(BZIP2_VERSION, min_version)
1880
+ elif zinfo.compress_type == ZIP_LZMA:
1881
+ min_version = max(LZMA_VERSION, min_version)
1882
+
1883
+ extract_version = max(min_version, zinfo.extract_version)
1884
+ create_version = max(min_version, zinfo.create_version)
1885
+ try:
1886
+ filename, flag_bits = zinfo._encodeFilenameFlags()
1887
+ centdir = struct.pack(structCentralDir,
1888
+ stringCentralDir, create_version,
1889
+ zinfo.create_system, extract_version, zinfo.reserved,
1890
+ flag_bits, zinfo.compress_type, dostime, dosdate,
1891
+ zinfo.CRC, compress_size, file_size,
1892
+ len(filename), len(extra_data), len(zinfo.comment),
1893
+ 0, zinfo.internal_attr, zinfo.external_attr,
1894
+ header_offset)
1895
+ except DeprecationWarning:
1896
+ print((structCentralDir, stringCentralDir, create_version,
1897
+ zinfo.create_system, extract_version, zinfo.reserved,
1898
+ zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1899
+ zinfo.CRC, compress_size, file_size,
1900
+ len(zinfo.filename), len(extra_data), len(zinfo.comment),
1901
+ 0, zinfo.internal_attr, zinfo.external_attr,
1902
+ header_offset), file=sys.stderr)
1903
+ raise
1904
+ self.fp.write(centdir)
1905
+ self.fp.write(filename)
1906
+ self.fp.write(extra_data)
1907
+ self.fp.write(zinfo.comment)
1908
+
1909
+ pos2 = self.fp.tell()
1910
+ # Write end-of-zip-archive record
1911
+ centDirCount = len(self.filelist)
1912
+ centDirSize = pos2 - self.start_dir
1913
+ centDirOffset = self.start_dir
1914
+ requires_zip64 = None
1915
+ if centDirCount > ZIP_FILECOUNT_LIMIT:
1916
+ requires_zip64 = "Files count"
1917
+ elif centDirOffset > ZIP64_LIMIT:
1918
+ requires_zip64 = "Central directory offset"
1919
+ elif centDirSize > ZIP64_LIMIT:
1920
+ requires_zip64 = "Central directory size"
1921
+ if requires_zip64:
1922
+ # Need to write the ZIP64 end-of-archive records
1923
+ if not self._allowZip64:
1924
+ raise LargeZipFile(requires_zip64 +
1925
+ " would require ZIP64 extensions")
1926
+ zip64endrec = struct.pack(
1927
+ structEndArchive64, stringEndArchive64,
1928
+ 44, 45, 45, 0, 0, centDirCount, centDirCount,
1929
+ centDirSize, centDirOffset)
1930
+ self.fp.write(zip64endrec)
1931
+
1932
+ zip64locrec = struct.pack(
1933
+ structEndArchive64Locator,
1934
+ stringEndArchive64Locator, 0, pos2, 1)
1935
+ self.fp.write(zip64locrec)
1936
+ centDirCount = min(centDirCount, 0xFFFF)
1937
+ centDirSize = min(centDirSize, 0xFFFFFFFF)
1938
+ centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1939
+
1940
+ endrec = struct.pack(structEndArchive, stringEndArchive,
1941
+ 0, 0, centDirCount, centDirCount,
1942
+ centDirSize, centDirOffset, len(self._comment))
1943
+ self.fp.write(endrec)
1944
+ self.fp.write(self._comment)
1945
+ if self.mode == "a":
1946
+ self.fp.truncate()
1947
+ self.fp.flush()
1948
+
1949
+ def _fpclose(self, fp):
1950
+ assert self._fileRefCnt > 0
1951
+ self._fileRefCnt -= 1
1952
+ if not self._fileRefCnt and not self._filePassed:
1953
+ fp.close()
1954
+
1955
+
1956
+ class PyZipFile(ZipFile):
1957
+ """Class to create ZIP archives with Python library files and packages."""
1958
+
1959
+ def __init__(self, file, mode="r", compression=ZIP_STORED,
1960
+ allowZip64=True, optimize=-1):
1961
+ ZipFile.__init__(self, file, mode=mode, compression=compression,
1962
+ allowZip64=allowZip64)
1963
+ self._optimize = optimize
1964
+
1965
+ def writepy(self, pathname, basename="", filterfunc=None):
1966
+ """Add all files from "pathname" to the ZIP archive.
1967
+
1968
+ If pathname is a package directory, search the directory and
1969
+ all package subdirectories recursively for all *.py and enter
1970
+ the modules into the archive. If pathname is a plain
1971
+ directory, listdir *.py and enter all modules. Else, pathname
1972
+ must be a Python *.py file and the module will be put into the
1973
+ archive. Added modules are always module.pyc.
1974
+ This method will compile the module.py into module.pyc if
1975
+ necessary.
1976
+ If filterfunc(pathname) is given, it is called with every argument.
1977
+ When it is False, the file or directory is skipped.
1978
+ """
1979
+ pathname = os.fspath(pathname)
1980
+ if filterfunc and not filterfunc(pathname):
1981
+ if self.debug:
1982
+ label = 'path' if os.path.isdir(pathname) else 'file'
1983
+ print('%s %r skipped by filterfunc' % (label, pathname))
1984
+ return
1985
+ dir, name = os.path.split(pathname)
1986
+ if os.path.isdir(pathname):
1987
+ initname = os.path.join(pathname, "__init__.py")
1988
+ if os.path.isfile(initname):
1989
+ # This is a package directory, add it
1990
+ if basename:
1991
+ basename = "%s/%s" % (basename, name)
1992
+ else:
1993
+ basename = name
1994
+ if self.debug:
1995
+ print("Adding package in", pathname, "as", basename)
1996
+ fname, arcname = self._get_codename(initname[0:-3], basename)
1997
+ if self.debug:
1998
+ print("Adding", arcname)
1999
+ self.write(fname, arcname)
2000
+ dirlist = sorted(os.listdir(pathname))
2001
+ dirlist.remove("__init__.py")
2002
+ # Add all *.py files and package subdirectories
2003
+ for filename in dirlist:
2004
+ path = os.path.join(pathname, filename)
2005
+ root, ext = os.path.splitext(filename)
2006
+ if os.path.isdir(path):
2007
+ if os.path.isfile(os.path.join(path, "__init__.py")):
2008
+ # This is a package directory, add it
2009
+ self.writepy(path, basename,
2010
+ filterfunc=filterfunc) # Recursive call
2011
+ elif ext == ".py":
2012
+ if filterfunc and not filterfunc(path):
2013
+ if self.debug:
2014
+ print('file %r skipped by filterfunc' % path)
2015
+ continue
2016
+ fname, arcname = self._get_codename(path[0:-3],
2017
+ basename)
2018
+ if self.debug:
2019
+ print("Adding", arcname)
2020
+ self.write(fname, arcname)
2021
+ else:
2022
+ # This is NOT a package directory, add its files at top level
2023
+ if self.debug:
2024
+ print("Adding files from directory", pathname)
2025
+ for filename in sorted(os.listdir(pathname)):
2026
+ path = os.path.join(pathname, filename)
2027
+ root, ext = os.path.splitext(filename)
2028
+ if ext == ".py":
2029
+ if filterfunc and not filterfunc(path):
2030
+ if self.debug:
2031
+ print('file %r skipped by filterfunc' % path)
2032
+ continue
2033
+ fname, arcname = self._get_codename(path[0:-3],
2034
+ basename)
2035
+ if self.debug:
2036
+ print("Adding", arcname)
2037
+ self.write(fname, arcname)
2038
+ else:
2039
+ if pathname[-3:] != ".py":
2040
+ raise RuntimeError(
2041
+ 'Files added with writepy() must end with ".py"')
2042
+ fname, arcname = self._get_codename(pathname[0:-3], basename)
2043
+ if self.debug:
2044
+ print("Adding file", arcname)
2045
+ self.write(fname, arcname)
2046
+
2047
+ def _get_codename(self, pathname, basename):
2048
+ """Return (filename, archivename) for the path.
2049
+
2050
+ Given a module name path, return the correct file path and
2051
+ archive name, compiling if necessary. For example, given
2052
+ /python/lib/string, return (/python/lib/string.pyc, string).
2053
+ """
2054
+ def _compile(file, optimize=-1):
2055
+ import py_compile
2056
+ if self.debug:
2057
+ print("Compiling", file)
2058
+ try:
2059
+ py_compile.compile(file, doraise=True, optimize=optimize)
2060
+ except py_compile.PyCompileError as err:
2061
+ print(err.msg)
2062
+ return False
2063
+ return True
2064
+
2065
+ file_py = pathname + ".py"
2066
+ file_pyc = pathname + ".pyc"
2067
+ pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2068
+ pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2069
+ pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2070
+ if self._optimize == -1:
2071
+ # legacy mode: use whatever file is present
2072
+ if (os.path.isfile(file_pyc) and
2073
+ os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2074
+ # Use .pyc file.
2075
+ arcname = fname = file_pyc
2076
+ elif (os.path.isfile(pycache_opt0) and
2077
+ os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2078
+ # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2079
+ # file name in the archive.
2080
+ fname = pycache_opt0
2081
+ arcname = file_pyc
2082
+ elif (os.path.isfile(pycache_opt1) and
2083
+ os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2084
+ # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2085
+ # file name in the archive.
2086
+ fname = pycache_opt1
2087
+ arcname = file_pyc
2088
+ elif (os.path.isfile(pycache_opt2) and
2089
+ os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2090
+ # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2091
+ # file name in the archive.
2092
+ fname = pycache_opt2
2093
+ arcname = file_pyc
2094
+ else:
2095
+ # Compile py into PEP 3147 pyc file.
2096
+ if _compile(file_py):
2097
+ if sys.flags.optimize == 0:
2098
+ fname = pycache_opt0
2099
+ elif sys.flags.optimize == 1:
2100
+ fname = pycache_opt1
2101
+ else:
2102
+ fname = pycache_opt2
2103
+ arcname = file_pyc
2104
+ else:
2105
+ fname = arcname = file_py
2106
+ else:
2107
+ # new mode: use given optimization level
2108
+ if self._optimize == 0:
2109
+ fname = pycache_opt0
2110
+ arcname = file_pyc
2111
+ else:
2112
+ arcname = file_pyc
2113
+ if self._optimize == 1:
2114
+ fname = pycache_opt1
2115
+ elif self._optimize == 2:
2116
+ fname = pycache_opt2
2117
+ else:
2118
+ msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2119
+ raise ValueError(msg)
2120
+ if not (os.path.isfile(fname) and
2121
+ os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2122
+ if not _compile(file_py, optimize=self._optimize):
2123
+ fname = arcname = file_py
2124
+ archivename = os.path.split(arcname)[1]
2125
+ if basename:
2126
+ archivename = "%s/%s" % (basename, archivename)
2127
+ return (fname, archivename)
2128
+
2129
+
2130
+ def _parents(path):
2131
+ """
2132
+ Given a path with elements separated by
2133
+ posixpath.sep, generate all parents of that path.
2134
+
2135
+ >>> list(_parents('b/d'))
2136
+ ['b']
2137
+ >>> list(_parents('/b/d/'))
2138
+ ['/b']
2139
+ >>> list(_parents('b/d/f/'))
2140
+ ['b/d', 'b']
2141
+ >>> list(_parents('b'))
2142
+ []
2143
+ >>> list(_parents(''))
2144
+ []
2145
+ """
2146
+ return itertools.islice(_ancestry(path), 1, None)
2147
+
2148
+
2149
+ def _ancestry(path):
2150
+ """
2151
+ Given a path with elements separated by
2152
+ posixpath.sep, generate all elements of that path
2153
+
2154
+ >>> list(_ancestry('b/d'))
2155
+ ['b/d', 'b']
2156
+ >>> list(_ancestry('/b/d/'))
2157
+ ['/b/d', '/b']
2158
+ >>> list(_ancestry('b/d/f/'))
2159
+ ['b/d/f', 'b/d', 'b']
2160
+ >>> list(_ancestry('b'))
2161
+ ['b']
2162
+ >>> list(_ancestry(''))
2163
+ []
2164
+ """
2165
+ path = path.rstrip(posixpath.sep)
2166
+ while path and path != posixpath.sep:
2167
+ yield path
2168
+ path, tail = posixpath.split(path)
2169
+
2170
+
2171
+ _dedupe = dict.fromkeys
2172
+ """Deduplicate an iterable in original order"""
2173
+
2174
+
2175
+ def _difference(minuend, subtrahend):
2176
+ """
2177
+ Return items in minuend not in subtrahend, retaining order
2178
+ with O(1) lookup.
2179
+ """
2180
+ return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2181
+
2182
+
2183
+ class CompleteDirs(ZipFile):
2184
+ """
2185
+ A ZipFile subclass that ensures that implied directories
2186
+ are always included in the namelist.
2187
+ """
2188
+
2189
+ @staticmethod
2190
+ def _implied_dirs(names):
2191
+ parents = itertools.chain.from_iterable(map(_parents, names))
2192
+ as_dirs = (p + posixpath.sep for p in parents)
2193
+ return _dedupe(_difference(as_dirs, names))
2194
+
2195
+ def namelist(self):
2196
+ names = super(CompleteDirs, self).namelist()
2197
+ return names + list(self._implied_dirs(names))
2198
+
2199
+ def _name_set(self):
2200
+ return set(self.namelist())
2201
+
2202
+ def resolve_dir(self, name):
2203
+ """
2204
+ If the name represents a directory, return that name
2205
+ as a directory (with the trailing slash).
2206
+ """
2207
+ names = self._name_set()
2208
+ dirname = name + '/'
2209
+ dir_match = name not in names and dirname in names
2210
+ return dirname if dir_match else name
2211
+
2212
+ @classmethod
2213
+ def make(cls, source):
2214
+ """
2215
+ Given a source (filename or zipfile), return an
2216
+ appropriate CompleteDirs subclass.
2217
+ """
2218
+ if isinstance(source, CompleteDirs):
2219
+ return source
2220
+
2221
+ if not isinstance(source, ZipFile):
2222
+ return cls(source)
2223
+
2224
+ # Only allow for FastPath when supplied zipfile is read-only
2225
+ if 'r' not in source.mode:
2226
+ cls = CompleteDirs
2227
+
2228
+ res = cls.__new__(cls)
2229
+ vars(res).update(vars(source))
2230
+ return res
2231
+
2232
+
2233
+ class FastLookup(CompleteDirs):
2234
+ """
2235
+ ZipFile subclass to ensure implicit
2236
+ dirs exist and are resolved rapidly.
2237
+ """
2238
+ def namelist(self):
2239
+ with contextlib.suppress(AttributeError):
2240
+ return self.__names
2241
+ self.__names = super(FastLookup, self).namelist()
2242
+ return self.__names
2243
+
2244
+ def _name_set(self):
2245
+ with contextlib.suppress(AttributeError):
2246
+ return self.__lookup
2247
+ self.__lookup = super(FastLookup, self)._name_set()
2248
+ return self.__lookup
2249
+
2250
+
2251
+ class Path:
2252
+ """
2253
+ A pathlib-compatible interface for zip files.
2254
+
2255
+ Consider a zip file with this structure::
2256
+
2257
+ .
2258
+ ├── a.txt
2259
+ └── b
2260
+ ├── c.txt
2261
+ └── d
2262
+ └── e.txt
2263
+
2264
+ >>> data = io.BytesIO()
2265
+ >>> zf = ZipFile(data, 'w')
2266
+ >>> zf.writestr('a.txt', 'content of a')
2267
+ >>> zf.writestr('b/c.txt', 'content of c')
2268
+ >>> zf.writestr('b/d/e.txt', 'content of e')
2269
+ >>> zf.filename = 'abcde.zip'
2270
+
2271
+ Path accepts the zipfile object itself or a filename
2272
+
2273
+ >>> root = Path(zf)
2274
+
2275
+ From there, several path operations are available.
2276
+
2277
+ Directory iteration (including the zip file itself):
2278
+
2279
+ >>> a, b = root.iterdir()
2280
+ >>> a
2281
+ Path('abcde.zip', 'a.txt')
2282
+ >>> b
2283
+ Path('abcde.zip', 'b/')
2284
+
2285
+ name property:
2286
+
2287
+ >>> b.name
2288
+ 'b'
2289
+
2290
+ join with divide operator:
2291
+
2292
+ >>> c = b / 'c.txt'
2293
+ >>> c
2294
+ Path('abcde.zip', 'b/c.txt')
2295
+ >>> c.name
2296
+ 'c.txt'
2297
+
2298
+ Read text:
2299
+
2300
+ >>> c.read_text()
2301
+ 'content of c'
2302
+
2303
+ existence:
2304
+
2305
+ >>> c.exists()
2306
+ True
2307
+ >>> (b / 'missing.txt').exists()
2308
+ False
2309
+
2310
+ Coercion to string:
2311
+
2312
+ >>> str(c)
2313
+ 'abcde.zip/b/c.txt'
2314
+ """
2315
+
2316
+ __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2317
+
2318
+ def __init__(self, root, at=""):
2319
+ self.root = FastLookup.make(root)
2320
+ self.at = at
2321
+
2322
+ @property
2323
+ def open(self):
2324
+ return functools.partial(self.root.open, self.at)
2325
+
2326
+ @property
2327
+ def name(self):
2328
+ return posixpath.basename(self.at.rstrip("/"))
2329
+
2330
+ def read_text(self, *args, **kwargs):
2331
+ with self.open() as strm:
2332
+ return io.TextIOWrapper(strm, *args, **kwargs).read()
2333
+
2334
+ def read_bytes(self):
2335
+ with self.open() as strm:
2336
+ return strm.read()
2337
+
2338
+ def _is_child(self, path):
2339
+ return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2340
+
2341
+ def _next(self, at):
2342
+ return Path(self.root, at)
2343
+
2344
+ def is_dir(self):
2345
+ return not self.at or self.at.endswith("/")
2346
+
2347
+ def is_file(self):
2348
+ return not self.is_dir()
2349
+
2350
+ def exists(self):
2351
+ return self.at in self.root._name_set()
2352
+
2353
+ def iterdir(self):
2354
+ if not self.is_dir():
2355
+ raise ValueError("Can't listdir a file")
2356
+ subs = map(self._next, self.root.namelist())
2357
+ return filter(self._is_child, subs)
2358
+
2359
+ def __str__(self):
2360
+ return posixpath.join(self.root.filename, self.at)
2361
+
2362
+ def __repr__(self):
2363
+ return self.__repr.format(self=self)
2364
+
2365
+ def joinpath(self, add):
2366
+ next = posixpath.join(self.at, add)
2367
+ return self._next(self.root.resolve_dir(next))
2368
+
2369
+ __truediv__ = joinpath
2370
+
2371
+ @property
2372
+ def parent(self):
2373
+ parent_at = posixpath.dirname(self.at.rstrip('/'))
2374
+ if parent_at:
2375
+ parent_at += '/'
2376
+ return self._next(parent_at)
2377
+
2378
+
2379
+ def main(args=None):
2380
+ import argparse
2381
+
2382
+ description = 'A simple command-line interface for zipfile module.'
2383
+ parser = argparse.ArgumentParser(description=description)
2384
+ group = parser.add_mutually_exclusive_group(required=True)
2385
+ group.add_argument('-l', '--list', metavar='<zipfile>',
2386
+ help='Show listing of a zipfile')
2387
+ group.add_argument('-e', '--extract', nargs=2,
2388
+ metavar=('<zipfile>', '<output_dir>'),
2389
+ help='Extract zipfile into target dir')
2390
+ group.add_argument('-c', '--create', nargs='+',
2391
+ metavar=('<name>', '<file>'),
2392
+ help='Create zipfile from sources')
2393
+ group.add_argument('-t', '--test', metavar='<zipfile>',
2394
+ help='Test if a zipfile is valid')
2395
+ args = parser.parse_args(args)
2396
+
2397
+ if args.test is not None:
2398
+ src = args.test
2399
+ with ZipFile(src, 'r') as zf:
2400
+ badfile = zf.testzip()
2401
+ if badfile:
2402
+ print("The following enclosed file is corrupted: {!r}".format(badfile))
2403
+ print("Done testing")
2404
+
2405
+ elif args.list is not None:
2406
+ src = args.list
2407
+ with ZipFile(src, 'r') as zf:
2408
+ zf.printdir()
2409
+
2410
+ elif args.extract is not None:
2411
+ src, curdir = args.extract
2412
+ with ZipFile(src, 'r') as zf:
2413
+ zf.extractall(curdir)
2414
+
2415
+ elif args.create is not None:
2416
+ zip_name = args.create.pop(0)
2417
+ files = args.create
2418
+
2419
+ def addToZip(zf, path, zippath):
2420
+ if os.path.isfile(path):
2421
+ zf.write(path, zippath, ZIP_DEFLATED)
2422
+ elif os.path.isdir(path):
2423
+ if zippath:
2424
+ zf.write(path, zippath)
2425
+ for nm in sorted(os.listdir(path)):
2426
+ addToZip(zf,
2427
+ os.path.join(path, nm), os.path.join(zippath, nm))
2428
+ # else: ignore
2429
+
2430
+ with ZipFile(zip_name, 'w') as zf:
2431
+ for path in files:
2432
+ zippath = os.path.basename(path)
2433
+ if not zippath:
2434
+ zippath = os.path.basename(os.path.dirname(path))
2435
+ if zippath in ('', os.curdir, os.pardir):
2436
+ zippath = ''
2437
+ addToZip(zf, path, zippath)
2438
+
2439
+
2440
+ if __name__ == "__main__":
2441
+ main()