datamule 1.4.6__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,19 +11,7 @@ from selectolax.parser import HTMLParser
11
11
  from .processing import process_tabular_data
12
12
  from pathlib import Path
13
13
  import webbrowser
14
-
15
- def convert_bytes_keys(obj):
16
- if isinstance(obj, dict):
17
- return {
18
- (k.decode('utf-8').lower() if isinstance(k, bytes) else k): convert_bytes_keys(v)
19
- for k, v in obj.items()
20
- }
21
- elif isinstance(obj, list):
22
- return [convert_bytes_keys(item) for item in obj]
23
- elif isinstance(obj, bytes):
24
- return obj.decode('utf-8').lower()
25
- else:
26
- return obj
14
+ from secsgml.utils import bytes_to_str
27
15
 
28
16
  class Document:
29
17
  def __init__(self, type, content, extension,accession,filing_date,path=None):
@@ -34,7 +22,8 @@ class Document:
34
22
  self.filing_date = filing_date
35
23
 
36
24
  if self.type == 'submission_metadata':
37
- self.content = convert_bytes_keys(content)
25
+ # this converts to lower
26
+ self.content = bytes_to_str(content)
38
27
  else:
39
28
  self.content = content
40
29
 
datamule/portfolio.py CHANGED
@@ -125,7 +125,7 @@ class Portfolio:
125
125
  # First query, just set the accession numbers
126
126
  self.accession_numbers = new_accession_numbers
127
127
 
128
- def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None,document_type=None,requests_per_second=5, **kwargs):
128
+ def download_submissions(self, cik=None, ticker=None, submission_type=None, filing_date=None, provider=None,document_type=[],requests_per_second=5, **kwargs):
129
129
  if provider is None:
130
130
  config = Config()
131
131
  provider = config.get_default_source()
@@ -1,55 +1,23 @@
1
1
  import os
2
- import json
3
2
  from .streamer import stream
4
- import aiofiles
5
- from ...submission import Submission
6
-
7
- async def download_callback(hit, content, cik, accno, url, output_dir="filings", keep_document_types=None):
8
- """Save downloaded SEC submission to disk."""
9
- try:
10
- # Create a Submission object directly from the content
11
- # Note: the content needs to be decoded from bytes to string for the parser
12
- submission = Submission(sgml_content=content,
13
- keep_document_types=keep_document_types)
14
-
15
- # Use the async save method to write the submission to disk
16
- file_dir = await submission.save_async(output_dir=output_dir)
17
-
18
- return file_dir
19
- except Exception as e:
20
- print(f"Error processing {accno}: {e}")
21
- return None
3
+ from secsgml import write_sgml_file_to_tar
4
+ from tqdm import tqdm
22
5
 
23
6
  def download(cik=None, submission_type=None, filing_date=None, location=None, name=None,
24
7
  requests_per_second=5, output_dir="filings", accession_numbers=None,
25
- quiet=False, keep_document_types=None):
26
- """
27
- Download SEC EDGAR filings and extract their documents.
28
-
29
- Parameters:
30
- - cik: CIK number(s) to query for
31
- - submission_type: Filing type(s) to query for (default: 10-K)
32
- - filing_date: Date or date range to query for
33
- - location: Location code to filter by (e.g., 'CA' for California)
34
- - name: Company name to search for (alternative to providing CIK)
35
- - requests_per_second: Rate limit for SEC requests
36
- - output_dir: Directory to save documents
37
- - accession_numbers: Optional list of accession numbers to filter by
38
- - quiet: Whether to suppress progress output
39
- - keep_document_types: Optional list of document types to keep (e.g. ['10-K', 'EX-10.1'])
40
-
41
- Returns:
42
- - List of all document paths processed
43
- """
8
+ quiet=False, keep_document_types=[]):
44
9
  # Make sure output directory exists
45
10
  os.makedirs(output_dir, exist_ok=True)
46
-
11
+
12
+ pbar = tqdm(desc="Writing", unit=" submissions", disable=quiet,position=2)
13
+
47
14
  # Create a wrapper for the download_callback that includes the output_dir
48
15
  async def callback_wrapper(hit, content, cik, accno, url):
49
- return await download_callback(hit, content, cik, accno, url,
50
- output_dir=output_dir,
51
- keep_document_types=keep_document_types)
52
-
16
+ output_path = os.path.join(output_dir, accno.replace('-','') + '.tar')
17
+ write_sgml_file_to_tar(output_path, bytes_content=content, filter_document_types=keep_document_types)
18
+ pbar.update(1)
19
+
20
+
53
21
  # Call the stream function with our callback
54
22
  return stream(
55
23
  cik=cik,
@@ -15,6 +15,7 @@ from threading import Thread
15
15
  from .query import query
16
16
  from os import cpu_count
17
17
  from ..submission import Submission
18
+ from secsgml import write_sgml_file_to_tar
18
19
 
19
20
 
20
21
 
@@ -73,7 +74,7 @@ class Downloader:
73
74
  print(f"Failed to log error to {error_file}: {str(e)}")
74
75
 
75
76
  class FileProcessor:
76
- def __init__(self, output_dir, max_workers, queue_size, pbar, downloader, keep_document_types=None):
77
+ def __init__(self, output_dir, max_workers, queue_size, pbar, downloader, keep_document_types=[]):
77
78
  self.processing_queue = Queue(maxsize=queue_size)
78
79
  self.should_stop = False
79
80
  self.processing_workers = []
@@ -93,17 +94,9 @@ class Downloader:
93
94
 
94
95
  def _process_file(self, item):
95
96
  filename, content = item
96
- try:
97
- submission = Submission(sgml_content=content, keep_document_types=self.keep_document_types)
98
- # Use the shared event loop to run save_async
99
- self.downloader._run_coroutine(submission.save_async(output_dir=self.output_dir))
100
- self.pbar.update(1)
101
- except Exception as e:
102
- print(f"Exception {e} in {filename}")
103
- accession_dir = os.path.join(self.output_dir, filename.split('.')[0])
104
- if os.path.exists(accession_dir):
105
- shutil.rmtree(accession_dir)
106
- self.downloader._log_error(self.output_dir, filename, str(e))
97
+ output_path = os.path.join(self.output_dir, filename.split('.')[0] + '.tar')
98
+ write_sgml_file_to_tar(output_path, bytes_content=content, filter_document_types=self.keep_document_types)
99
+ self.pbar.update(1)
107
100
 
108
101
  def _processing_worker(self):
109
102
  batch = []
@@ -211,7 +204,7 @@ class Downloader:
211
204
  except Exception as e:
212
205
  self._log_error(output_dir, filename, str(e))
213
206
 
214
- async def process_batch(self, urls, output_dir, keep_document_types=None):
207
+ async def process_batch(self, urls, output_dir, keep_document_types=[]):
215
208
  os.makedirs(output_dir, exist_ok=True)
216
209
 
217
210
  with tqdm(total=len(urls), desc="Processing files") as pbar:
@@ -238,7 +231,7 @@ class Downloader:
238
231
  processor.stop_workers()
239
232
  decompression_pool.shutdown()
240
233
 
241
- def download(self, submission_type=None, cik=None, filing_date=None, output_dir="downloads", accession_numbers=None, keep_document_types=None):
234
+ def download(self, submission_type=None, cik=None, filing_date=None, output_dir="downloads", accession_numbers=None, keep_document_types=[]):
242
235
  """
243
236
  Query SEC filings and download/process them.
244
237
 
@@ -299,7 +292,7 @@ class Downloader:
299
292
  self.loop.call_soon_threadsafe(self.loop.stop)
300
293
 
301
294
 
302
- def download(submission_type=None, cik=None, filing_date=None, api_key=None, output_dir="downloads", accession_numbers=None, keep_document_types=None):
295
+ def download(submission_type=None, cik=None, filing_date=None, api_key=None, output_dir="downloads", accession_numbers=None, keep_document_types=[]):
303
296
  """
304
297
  Query SEC filings and download/process them.
305
298
 
datamule/submission.py CHANGED
@@ -2,73 +2,11 @@ from pathlib import Path
2
2
  import json
3
3
  from .document.document import Document
4
4
  from secsgml import parse_sgml_content_into_memory
5
- import os
6
- import aiofiles
7
- import tempfile
8
-
9
-
10
- # # NEW CODE YAY. probably will remove
11
-
12
- # def save_metadata_atomically(metadata_file_path, metadata_content):
13
- # """Save metadata to a JSONL file atomically, works on any filesystem"""
14
-
15
- # # Create directory if it doesn't exist
16
- # os.makedirs(os.path.dirname(metadata_file_path), exist_ok=True)
17
-
18
- # # Format the JSON with newline
19
- # json_str = json.dumps(metadata_content, indent=4) + "\n"
20
-
21
- # # Write complete content to a temporary file first
22
- # fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(metadata_file_path))
23
- # try:
24
- # with os.fdopen(fd, 'w') as temp_file:
25
- # temp_file.write(json_str)
26
- # temp_file.flush()
27
- # os.fsync(temp_file.fileno()) # Force write to disk
28
-
29
- # # Append the temporary file to the main file
30
- # with open(metadata_file_path, 'a') as target_file:
31
- # with open(temp_path, 'r') as temp_read:
32
- # content = temp_read.read()
33
- # target_file.write(content)
34
- # target_file.flush()
35
- # os.fsync(target_file.fileno()) # Force write to disk
36
- # finally:
37
- # # Clean up the temporary file
38
- # if os.path.exists(temp_path):
39
- # os.unlink(temp_path)
40
-
41
- # async def save_metadata_atomically_async(metadata_file_path, metadata_content):
42
- # """Save metadata to a JSONL file atomically in async mode"""
43
-
44
- # # Create directory if it doesn't exist
45
- # os.makedirs(os.path.dirname(metadata_file_path), exist_ok=True)
46
-
47
- # # Format the JSON with newline
48
- # json_str = json.dumps(metadata_content, indent=4) + "\n"
49
-
50
- # # Write to a temporary file first
51
- # fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(metadata_file_path))
52
- # os.close(fd) # Close the file descriptor
53
-
54
- # try:
55
- # async with aiofiles.open(temp_path, 'w') as temp_file:
56
- # await temp_file.write(json_str)
57
- # await temp_file.flush()
58
-
59
- # # Append the temporary file to the main file
60
- # async with aiofiles.open(metadata_file_path, 'a') as target_file:
61
- # async with aiofiles.open(temp_path, 'r') as temp_read:
62
- # content = await temp_read.read()
63
- # await target_file.write(content)
64
- # await target_file.flush()
65
- # finally:
66
- # # Clean up the temporary file
67
- # if os.path.exists(temp_path):
68
- # os.unlink(temp_path)
69
-
70
- # # END OF NEW CODE
71
-
5
+ import tarfile
6
+ import shutil
7
+ import zstandard as zstd
8
+ from io import BytesIO
9
+ import gzip
72
10
 
73
11
  class Submission:
74
12
  def __init__(self, path=None,sgml_content=None,keep_document_types=None):
@@ -89,7 +27,7 @@ class Submission:
89
27
  filtered_metadata_documents = []
90
28
 
91
29
  for idx,doc in enumerate(self.metadata.content['documents']):
92
- type = doc.get('type')
30
+ type = doc.get('type')()
93
31
 
94
32
  # Keep only specified types
95
33
  if keep_document_types is not None and type not in keep_document_types:
@@ -106,156 +44,172 @@ class Submission:
106
44
 
107
45
  if path is not None:
108
46
  self.path = Path(path)
109
- metadata_path = self.path / 'metadata.json'
110
- with metadata_path.open('r') as f:
111
- metadata = json.load(f)
112
- self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=metadata_path)
47
+ if self.path.suffix == '.tar':
48
+ with tarfile.open(self.path,'r') as tar:
49
+ metadata_obj = tar.extractfile('metadata.json')
50
+ metadata = json.loads(metadata_obj.read().decode('utf-8'))
113
51
 
114
- # Code dupe
52
+ # tarpath
53
+ metadata_path = f"{self.path}::metadata.json"
54
+ else:
55
+ metadata_path = self.path / 'metadata.json'
56
+ with metadata_path.open('r') as f:
57
+ metadata = json.load(f)
58
+ self.metadata = Document(type='submission_metadata', content=metadata, extension='.json',filing_date=None,accession=None,path=metadata_path)
115
59
  self.accession = self.metadata.content['accession-number']
116
60
  self.filing_date= f"{self.metadata.content['filing-date'][:4]}-{self.metadata.content['filing-date'][4:6]}-{self.metadata.content['filing-date'][6:8]}"
117
61
 
118
62
 
119
63
 
120
-
121
- def document_type(self, document_type):
122
- # Convert single document type to list for consistent handling
123
- if isinstance(document_type, str):
124
- document_types = [document_type]
125
- else:
126
- document_types = document_type
127
-
128
- for idx,doc in enumerate(self.metadata.content['documents']):
129
- if doc['type'] in document_types:
130
-
131
- # if loaded from path
132
- if self.path is not None:
133
- filename = doc.get('filename')
134
- # oh we need handling here for sequences case
135
- if filename is None:
136
- filename = doc['sequence'] + '.txt'
137
-
138
- document_path = self.path / filename
139
- extension = document_path.suffix
140
-
141
- with document_path.open('rb') as f:
142
- content = f.read()
143
-
144
- if extension in ['.htm','.html','.txt','.xml']:
145
- content = content.decode('utf-8', errors='replace')
146
-
147
- yield Document(type=doc['type'], content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
148
- # if loaded from sgml_content
149
- else:
150
- yield self.documents[idx]
151
-
152
-
153
- def __iter__(self):
154
- for idx,doc in enumerate(self.metadata.content['documents']):
155
- # if loaded from path
156
- if self.path is not None:
64
+ def compress(self, compression=None, level=None, threshold=1048576):
65
+ if self.path is None:
66
+ raise ValueError("Compress requires path")
67
+
68
+ if compression is not None and compression not in ['gzip', 'zstd']:
69
+ raise ValueError("compression must be 'gzip' or 'zstd'")
70
+
71
+ # Create tar file (replace directory with .tar file)
72
+ tar_path = self.path.with_suffix('.tar')
73
+
74
+ with tarfile.open(tar_path, 'w') as tar:
75
+ # Add metadata.json first
76
+ metadata_path = self.path / 'metadata.json'
77
+ if metadata_path.exists():
78
+ tar.add(metadata_path, arcname='metadata.json')
79
+
80
+ # Add documents in order
81
+ for doc in self.metadata.content['documents']:
157
82
  filename = doc.get('filename')
158
-
159
- # oh we need handling here for sequences case
160
83
  if filename is None:
161
84
  filename = doc['sequence'] + '.txt'
162
-
163
- document_path = self.path / filename
164
- extension = document_path.suffix
165
-
166
- # check if the file exists
167
- if document_path.exists():
168
- with document_path.open('rb') as f:
169
- content = f.read()
170
-
171
- if extension in ['.htm','.html','.txt','.xml']:
172
- content = content.decode('utf-8', errors='replace')
173
-
174
- yield Document(type=doc['type'], content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
175
- else:
176
- print(f"Warning: File {document_path} does not exist likely due to keep types in downloading.")
177
-
178
- # if loaded from sgml_content
179
- else:
180
- yield self.documents[idx]
181
-
182
-
183
-
85
+
86
+ file_path = self.path / filename
87
+ if file_path.exists():
88
+ file_size = file_path.stat().st_size
184
89
 
185
- def save(self, output_dir="filings"):
186
- file_dir = Path(output_dir) / str(self.accession)
187
- file_dir.mkdir(parents=True, exist_ok=True)
188
-
189
- metadata_path = file_dir / "metadata.json"
190
- with open(metadata_path, 'w') as f:
191
- json.dump(self.metadata.content, f, indent=4)
192
-
193
- for idx, doc in enumerate(self.metadata.content['documents']):
194
- filename = doc.get('filename')
195
- if filename is None:
196
- filename = f"{doc.get('sequence')}.txt"
197
-
198
- doc_path = file_dir / filename
199
-
200
- if self.path is not None:
201
- if hasattr(self, 'documents') and self.documents:
202
- content = self.documents[idx].content
203
- else:
204
- orig_doc_path = self.path / filename
205
- if orig_doc_path.exists():
206
- with open(orig_doc_path, 'r', encoding='utf-8', errors='replace') as f:
207
- content = f.read()
90
+
91
+ # Compress if compression specified and over threshold
92
+ if compression is not None and file_size >= threshold:
93
+ content = file_path.read_bytes()
94
+
95
+ if compression == 'gzip':
96
+ compressed_content = gzip.compress(content, compresslevel=level or 6)
97
+ compressed_filename = filename + '.gz'
98
+ else: # zstd
99
+ cctx = zstd.ZstdCompressor(level=level or 3)
100
+ compressed_content = cctx.compress(content)
101
+ compressed_filename = filename + '.zst'
102
+
103
+ # Add compressed file to tar
104
+ tarinfo = tarfile.TarInfo(name=compressed_filename)
105
+ tarinfo.size = len(compressed_content)
106
+ tar.addfile(tarinfo, BytesIO(compressed_content))
208
107
  else:
209
- print(f"Warning: File {orig_doc_path} does not exist, skipping.")
210
- continue
211
- else:
212
- content = self.documents[idx].content
213
-
214
- if isinstance(content, bytes):
215
- with open(doc_path, 'wb') as f:
216
- f.write(content)
217
- else:
218
- with open(doc_path, 'w', encoding='utf-8', errors='replace') as f:
219
- f.write(content)
108
+ # Add uncompressed file
109
+ tar.add(file_path, arcname=filename)
220
110
 
221
- return file_dir
222
-
223
- async def save_async(self, output_dir="filings"):
224
- file_dir = Path(output_dir) / str(self.accession)
225
- os.makedirs(file_dir, exist_ok=True)
111
+ # Delete original folder
112
+ shutil.rmtree(self.path)
226
113
 
227
- metadata_path = file_dir / "metadata.json"
228
- async with aiofiles.open(metadata_path, 'w') as f:
229
- await f.write(json.dumps(self.metadata.content, indent=4))
114
+ # Update path to point to new tar file
115
+ self.path = tar_path
116
+
117
+ def decompress(self):
118
+ if self.path is None:
119
+ raise ValueError("Decompress requires path")
120
+ elif self.path.suffix != '.tar':
121
+ raise ValueError("Can only decompress tar")
230
122
 
231
- for idx, doc in enumerate(self.metadata.content['documents']):
232
- filename = doc.get('filename')
233
- # oh we need handling here for sequences case
234
- if filename is None:
235
- filename = doc['sequence'] + '.txt'
236
-
237
-
238
- doc_path = file_dir / filename
239
-
240
- if self.path is not None:
241
- if hasattr(self, 'documents') and self.documents:
242
- content = self.documents[idx].content
243
- else:
244
- orig_doc_path = self.path / filename
245
- if orig_doc_path.exists():
246
- async with aiofiles.open(orig_doc_path, 'r', encoding='utf-8', errors='replace') as f:
247
- content = await f.read()
123
+ # Create output directory (path without .tar extension)
124
+ output_dir = self.path.with_suffix('')
125
+ output_dir.mkdir(exist_ok=True)
126
+
127
+ with tarfile.open(self.path, 'r') as tar:
128
+ for member in tar.getmembers():
129
+ if member.isfile():
130
+ content = tar.extractfile(member).read()
131
+
132
+ # Decompress if gzipped
133
+ if member.name.endswith('.gz'):
134
+ content = gzip.decompress(content)
135
+ output_path = output_dir / member.name[:-3] # Remove .gz extension
248
136
  else:
249
- print(f"Warning: File {orig_doc_path} does not exist, skipping.")
250
- continue
251
- else:
252
- content = self.documents[idx].content
253
-
254
- if isinstance(content, bytes):
255
- async with aiofiles.open(doc_path, 'wb') as f:
256
- await f.write(content)
257
- else:
258
- async with aiofiles.open(doc_path, 'w', encoding='utf-8', errors='replace') as f:
259
- await f.write(content)
137
+ output_path = output_dir / member.name
138
+
139
+ # Write to output directory
140
+ output_path.parent.mkdir(parents=True, exist_ok=True)
141
+ with output_path.open('wb') as f:
142
+ f.write(content)
143
+
144
+ # delete original file
145
+ self.path.unlink()
146
+ self.path = output_dir
147
+
148
+ def _load_document_by_index(self, idx):
149
+ """Load a document by its index in the metadata documents list."""
150
+ doc = self.metadata.content['documents'][idx]
151
+
152
+ # If loaded from sgml_content, return pre-loaded document
153
+ if self.path is None:
154
+ return self.documents[idx]
260
155
 
261
- return file_dir
156
+ # If loaded from path, load document on-demand
157
+ filename = doc.get('filename')
158
+ if filename is None:
159
+ filename = doc['sequence'] + '.txt'
160
+
161
+ document_path = self.path / filename
162
+ extension = document_path.suffix
163
+
164
+ if self.path.suffix == '.tar':
165
+ with tarfile.open(self.path, 'r') as tar:
166
+ # bandaid fix TODO
167
+ try:
168
+ content = tar.extractfile(filename).read()
169
+ except:
170
+ try:
171
+ content = tar.extractfile(filename+'.gz').read()
172
+ except:
173
+ try:
174
+ content = tar.extractfile(filename+'.zst').read()
175
+ except:
176
+ raise ValueError("Something went wrong with tar")
177
+ # Decompress if compressed
178
+ if filename.endswith('.gz'):
179
+ content = gzip.decompress(content)
180
+ elif filename.endswith('.zst'):
181
+ dctx = zstd.ZstdDecompressor()
182
+ content = dctx.decompress(content)
183
+ else:
184
+ with document_path.open('rb') as f:
185
+ content = f.read()
186
+
187
+ # Decode text files
188
+ if extension in ['.htm', '.html', '.txt', '.xml']:
189
+ content = content.decode('utf-8', errors='replace')
190
+
191
+ return Document(
192
+ type=doc['type'],
193
+ content=content,
194
+ extension=extension,
195
+ filing_date=self.filing_date,
196
+ accession=self.accession,
197
+ path=document_path
198
+ )
199
+
200
+ def __iter__(self):
201
+ """Make Submission iterable by yielding all documents."""
202
+ for idx in range(len(self.metadata.content['documents'])):
203
+ yield self._load_document_by_index(idx)
204
+
205
+ def document_type(self, document_type):
206
+ """Yield documents matching the specified type(s)."""
207
+ # Convert single document type to list for consistent handling
208
+ if isinstance(document_type, str):
209
+ document_types = [document_type]
210
+ else:
211
+ document_types = [item for item in document_type]
212
+
213
+ for idx, doc in enumerate(self.metadata.content['documents']):
214
+ if doc['type'] in document_types:
215
+ yield self._load_document_by_index(idx)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.4.6
3
+ Version: 1.5.0
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -3,12 +3,12 @@ datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
3
3
  datamule/helper.py,sha256=KqhAmTMdvATEh3I-O4xLcAcrHB9zXQERBuwzue7zyQw,3674
4
4
  datamule/index.py,sha256=Rrcna9FJV-Oh_K6O2IuUEIDmtay_7UZ4l4jgKCi7A7I,2079
5
5
  datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
6
- datamule/portfolio.py,sha256=8fiK-vfZM5-NJSvOEsDR2YDb-2njjzFk6l7BiRyrzOM,7168
6
+ datamule/portfolio.py,sha256=5EhK3FDq1q-91O0-BCZpW4pt1uBaS5VlziysJ3roroY,7166
7
7
  datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
8
- datamule/submission.py,sha256=EtWdEnAyWLZdu69Dyzbs4qb5YL41HlExFGMjwEoMhsg,10904
8
+ datamule/submission.py,sha256=Pq6dGf1zjlFdOIKaaNvPgYGkqcqa1PEla-3QcSGqMSk,9300
9
9
  datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
10
10
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- datamule/document/document.py,sha256=3vX850H7rZH4H8BysitZDaLhT6WPJuIreoV1PSjACno,14301
11
+ datamule/document/document.py,sha256=VaJWo9HrcODlbifYcXzifW3xBD7nUOWAN8zcVCDWMcs,13958
12
12
  datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
13
13
  datamule/document/table.py,sha256=73yUJKY82ap32jhLmZeTti-jQ_lyhcJGlGwyxLtgYOg,12944
14
14
  datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -44,7 +44,7 @@ datamule/sec/utils.py,sha256=JUxwijJiqRMnRJNQzVUamyF5h9ZGc7RnO_zsLOIM73g,2079
44
44
  datamule/sec/infrastructure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNsBw5Jv0Tx5aljiGUJkk7DRk,18745
46
46
  datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- datamule/sec/submissions/downloader.py,sha256=izaz559PtBCAWPWGzqUReloawJtXwnraclgXdzEOteI,2631
47
+ datamule/sec/submissions/downloader.py,sha256=QQ7DkSaXBe-TQ2oZb2coGn3TBXIHywXn0uCGeIaVWCI,1216
48
48
  datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
49
49
  datamule/sec/submissions/monitor.py,sha256=dZYuVCi_X82eYA8l_9cbnkRjiawz3K4U-FnCAyJcgk4,7892
50
50
  datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
@@ -56,9 +56,9 @@ datamule/sec/xbrl/streamcompanyfacts.py,sha256=Qq88PqW5_j1k3Aqrl0KRmKeF54D6Wbb6H
56
56
  datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
57
57
  datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
58
  datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
59
- datamule/seclibrary/downloader.py,sha256=VIdaQq5wDcYWnqrv9t8J7z0KtdNRGK8ahfBsgvTfdQQ,13675
59
+ datamule/seclibrary/downloader.py,sha256=m8yrRjjRVJ2WtKIKIO_66lQHtAsWssyTPip6B2mW9tE,13275
60
60
  datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
61
- datamule-1.4.6.dist-info/METADATA,sha256=IxggkAHbjanZjnTtWGNOyRM68sztal4gQlUfa0shlXg,469
62
- datamule-1.4.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
63
- datamule-1.4.6.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
64
- datamule-1.4.6.dist-info/RECORD,,
61
+ datamule-1.5.0.dist-info/METADATA,sha256=USW4JuULE-3tHuIlvI5zjSf6-cKUTMdso4Sy1Hfr0co,469
62
+ datamule-1.5.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
63
+ datamule-1.5.0.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
64
+ datamule-1.5.0.dist-info/RECORD,,