datamule 1.6.3__py3-none-any.whl → 1.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/submission.py +50 -18
- {datamule-1.6.3.dist-info → datamule-1.6.4.dist-info}/METADATA +1 -1
- {datamule-1.6.3.dist-info → datamule-1.6.4.dist-info}/RECORD +5 -5
- {datamule-1.6.3.dist-info → datamule-1.6.4.dist-info}/WHEEL +0 -0
- {datamule-1.6.3.dist-info → datamule-1.6.4.dist-info}/top_level.txt +0 -0
datamule/submission.py
CHANGED
@@ -144,24 +144,57 @@ class Submission:
|
|
144
144
|
|
145
145
|
# Handle regular path case
|
146
146
|
else:
|
147
|
-
#
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
147
|
+
# Check if path is a tar file (old format)
|
148
|
+
if self.path.suffix == '.tar':
|
149
|
+
with tarfile.open(self.path, 'r') as tar:
|
150
|
+
# Try to extract the file, handling compression
|
151
|
+
try:
|
152
|
+
content = tar.extractfile(filename).read()
|
153
|
+
actual_filename = filename
|
154
|
+
except:
|
155
|
+
try:
|
156
|
+
content = tar.extractfile(filename + '.gz').read()
|
157
|
+
actual_filename = filename + '.gz'
|
158
|
+
is_compressed = 'gzip'
|
159
|
+
except:
|
160
|
+
try:
|
161
|
+
content = tar.extractfile(filename + '.zst').read()
|
162
|
+
actual_filename = filename + '.zst'
|
163
|
+
is_compressed = 'zstd'
|
164
|
+
except:
|
165
|
+
raise FileNotFoundError(f"Document file not found in tar: {filename}")
|
166
|
+
|
167
|
+
# Decompress if compressed
|
168
|
+
if is_compressed == 'gzip':
|
169
|
+
content = gzip.decompress(content)
|
170
|
+
elif is_compressed == 'zstd':
|
171
|
+
content = zstd.ZstdDecompressor().decompress(content)
|
172
|
+
|
173
|
+
# Decode text files
|
174
|
+
if extension in ['.htm', '.html', '.txt', '.xml']:
|
175
|
+
content = content.decode('utf-8', errors='replace')
|
176
|
+
|
177
|
+
document_path = f"{self.path}::{actual_filename}"
|
161
178
|
|
162
|
-
|
163
|
-
|
164
|
-
|
179
|
+
else:
|
180
|
+
# Regular directory case
|
181
|
+
document_path = self.path / filename
|
182
|
+
|
183
|
+
if not document_path.exists():
|
184
|
+
raise FileNotFoundError(f"Document file not found: {document_path}")
|
185
|
+
|
186
|
+
with document_path.open('rb') as f:
|
187
|
+
content = f.read()
|
188
|
+
|
189
|
+
# Decompress if needed based on filename extension
|
190
|
+
if is_compressed == 'gzip':
|
191
|
+
content = gzip.decompress(content)
|
192
|
+
elif is_compressed == 'zstd':
|
193
|
+
content = zstd.ZstdDecompressor().decompress(content)
|
194
|
+
|
195
|
+
# Decode text files
|
196
|
+
if extension in ['.htm', '.html', '.txt', '.xml']:
|
197
|
+
content = content.decode('utf-8', errors='replace')
|
165
198
|
|
166
199
|
return Document(
|
167
200
|
type=doc['type'],
|
@@ -171,7 +204,6 @@ class Submission:
|
|
171
204
|
accession=self.accession,
|
172
205
|
path=document_path
|
173
206
|
)
|
174
|
-
|
175
207
|
def __iter__(self):
|
176
208
|
"""Make Submission iterable by yielding all documents."""
|
177
209
|
for idx in range(len(self.metadata.content['documents'])):
|
@@ -6,7 +6,7 @@ datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,9
|
|
6
6
|
datamule/portfolio.py,sha256=tADqQMkFaFyjanbJ0QcaOHGdJJB254rOg29FW7a13l0,11835
|
7
7
|
datamule/portfolio_compression_utils.py,sha256=8OPYEN5zAdV1FiTxgVN3S7cTKs99Elv74bwgoIJP4QY,12654
|
8
8
|
datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
|
9
|
-
datamule/submission.py,sha256=
|
9
|
+
datamule/submission.py,sha256=ooLsesZ5HkgSWyEFID4u08CobTxdo35eAUHSCB6fw2k,10332
|
10
10
|
datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
|
11
11
|
datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
datamule/datamule/sec_connector.py,sha256=T3edE7I-d4oHysqj7zYlIOxH3Fuauj9tfw39UdFWvB8,2393
|
@@ -66,7 +66,7 @@ datamule/seclibrary/downloader.py,sha256=3jEy67oiEg8BF20KcKCx2KC0UjHzhiepdu29TOa
|
|
66
66
|
datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
|
67
67
|
datamule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
68
68
|
datamule/utils/construct_submissions_data.py,sha256=aX7ZaAp3zXHLcv4TFk_rGwjb8r7yNDQDFVg4nPf60kM,5934
|
69
|
-
datamule-1.6.
|
70
|
-
datamule-1.6.
|
71
|
-
datamule-1.6.
|
72
|
-
datamule-1.6.
|
69
|
+
datamule-1.6.4.dist-info/METADATA,sha256=4v85X90MyUpvQC37rMyiEA_3UA-2scIHz4tTF1xH2e4,524
|
70
|
+
datamule-1.6.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
71
|
+
datamule-1.6.4.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
72
|
+
datamule-1.6.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|