datamule 2.2.9__py3-none-any.whl → 2.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamule might be problematic. Click here for more details.

datamule/__init__.py CHANGED
@@ -8,6 +8,7 @@ from .index import Index
8
8
  from .package_updater import PackageUpdater
9
9
  from .utils.format_accession import format_accession
10
10
  from .utils.construct_submissions_data import construct_submissions_data
11
+ from .book.book import Book
11
12
 
12
13
 
13
14
  # Keep the notebook environment setup
File without changes
datamule/book/book.py ADDED
@@ -0,0 +1,13 @@
1
+ from .s3transfer import s3_transfer
2
+
3
+ class Book:
4
+ def __init__(self):
5
+ pass
6
+
7
+ def s3_transfer(self, datamule_bucket, s3_credentials, max_workers=4, errors_json_filename='s3_transfer_errors.json', retry_errors=3,
8
+ force_daily=True, cik=None, submission_type=None, filing_date=None, datamule_api_key=None,accession=None):
9
+
10
+ s3_transfer(datamule_bucket=datamule_bucket, s3_credentials=s3_credentials, max_workers=max_workers,
11
+ errors_json_filename=errors_json_filename, retry_errors=retry_errors,
12
+ force_daily=force_daily, cik=cik, submission_type=submission_type,
13
+ filing_date=filing_date, datamule_api_key=datamule_api_key,accession_number=accession)
@@ -0,0 +1,264 @@
1
+ import asyncio
2
+ import aiohttp
3
+ import aioboto3
4
+ import ssl
5
+ import time
6
+ import json
7
+ from datetime import datetime, timedelta
8
+ from urllib.parse import urlparse
9
+ from tqdm import tqdm
10
+ import logging
11
+ from ..sheet import Sheet
12
+ from ..utils.format_accession import format_accession
13
+
14
+ # Set up logging
15
+ logging.basicConfig(
16
+ level=logging.INFO,
17
+ format='%(asctime)s - %(levelname)s - %(message)s'
18
+ )
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def generate_date_range(start_date_str, end_date_str):
23
+ start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
24
+ end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
25
+
26
+ dates = []
27
+ current_date = start_date
28
+
29
+ while current_date <= end_date:
30
+ dates.append(current_date.strftime('%Y-%m-%d'))
31
+ current_date += timedelta(days=1)
32
+
33
+ return dates
34
+
35
+
36
+ def get_filings_sgml_r2_urls(submission_type=None, cik=None, datamule_api_key=None, filing_date=None,accession_number=None):
37
+ datamule_bucket_endpoint = 'https://sec-library.datamule.xyz/'
38
+ sheet = Sheet('s3transfer')
39
+ submissions = sheet.get_submissions(distinct=True, quiet=False, api_key=datamule_api_key,
40
+ submission_type=submission_type, cik=cik, columns=['accessionNumber'], filing_date=filing_date,
41
+ accession_number=accession_number)
42
+
43
+ accessions = [format_accession(sub['accessionNumber'], 'no-dash') for sub in submissions]
44
+
45
+ urls = [f"{datamule_bucket_endpoint}{accession}.sgml" for accession in accessions]
46
+
47
+ return urls
48
+
49
+
50
+ class AsyncS3Transfer:
51
+ def __init__(self, s3_credentials, max_workers=100, chunk_size=2*1024*1024):
52
+ self.s3_credentials = s3_credentials
53
+ self.max_workers = max_workers
54
+ self.chunk_size = chunk_size
55
+
56
+ async def __aenter__(self):
57
+ # Create aiohttp session with optimized connector
58
+ connector = aiohttp.TCPConnector(
59
+ limit=self.max_workers,
60
+ force_close=False,
61
+ ssl=ssl.create_default_context(),
62
+ ttl_dns_cache=300,
63
+ keepalive_timeout=60
64
+ )
65
+
66
+ self.session = aiohttp.ClientSession(
67
+ connector=connector,
68
+ timeout=aiohttp.ClientTimeout(total=600),
69
+ headers={
70
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
71
+ 'Connection': 'keep-alive',
72
+ 'Accept-Encoding': 'gzip, deflate, br'
73
+ }
74
+ )
75
+
76
+ # Create async boto3 client
77
+ if self.s3_credentials['s3_provider'] == 'aws':
78
+ session = aioboto3.Session()
79
+ self.s3_client = await session.client(
80
+ 's3',
81
+ aws_access_key_id=self.s3_credentials['aws_access_key_id'],
82
+ aws_secret_access_key=self.s3_credentials['aws_secret_access_key'],
83
+ region_name=self.s3_credentials['region_name']
84
+ ).__aenter__()
85
+ else:
86
+ raise ValueError("S3 Provider not supported yet. Please use another provider or email johnfriedman@datamule.xyz to add support.")
87
+
88
+ return self
89
+
90
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
91
+ if hasattr(self, 'session') and self.session:
92
+ await self.session.close()
93
+ if hasattr(self, 's3_client') and self.s3_client:
94
+ await self.s3_client.__aexit__(exc_type, exc_val, exc_tb)
95
+
96
+ async def transfer_single_file(self, semaphore, url, retry_errors=3):
97
+ """Transfer a single file with retry logic and preserve metadata"""
98
+ async with semaphore:
99
+ filename = urlparse(url).path.split('/')[-1]
100
+ s3_key = filename
101
+ bucket_name = self.s3_credentials['bucket_name']
102
+
103
+ last_error = None
104
+
105
+ for attempt in range(retry_errors + 1):
106
+ try:
107
+ async with self.session.get(url) as response:
108
+ if response.status == 200:
109
+ # Capture source metadata from response headers
110
+ content_length = response.headers.get('Content-Length')
111
+ size_bytes = int(content_length) if content_length else 0
112
+ content_type = response.headers.get('Content-Type', 'application/octet-stream')
113
+ last_modified = response.headers.get('Last-Modified')
114
+
115
+ # Read response content
116
+ content = await response.read()
117
+
118
+ # Prepare S3 upload parameters with preserved metadata
119
+ upload_params = {
120
+ 'Bucket': bucket_name,
121
+ 'Key': s3_key,
122
+ 'Body': content,
123
+ 'ContentType': content_type,
124
+ 'StorageClass': 'STANDARD',
125
+ 'Metadata': {
126
+ 'source-url': url,
127
+ 'original-size': str(size_bytes),
128
+ 'transfer-date': datetime.utcnow().isoformat()
129
+ }
130
+ }
131
+
132
+ # Add last modified if available
133
+ if last_modified:
134
+ upload_params['Metadata']['original-last-modified'] = last_modified
135
+
136
+ # Upload to S3 with metadata
137
+ await self.s3_client.put_object(**upload_params)
138
+
139
+ return {
140
+ 'success': True,
141
+ 'url': url,
142
+ 'message': f"Copied: {url} -> s3://{bucket_name}/{s3_key}",
143
+ 'size_bytes': size_bytes,
144
+ 's3_key': s3_key,
145
+ 'content_type': content_type,
146
+ 'last_modified': last_modified
147
+ }
148
+ else:
149
+ raise aiohttp.ClientResponseError(
150
+ request_info=response.request_info,
151
+ history=response.history,
152
+ status=response.status
153
+ )
154
+
155
+ except Exception as e:
156
+ print(e)
157
+ last_error = e
158
+ if attempt < retry_errors:
159
+ await asyncio.sleep(2 ** attempt) # Exponential backoff
160
+
161
+ # All attempts failed
162
+ return {
163
+ 'success': False,
164
+ 'url': url,
165
+ 'error': str(last_error),
166
+ 'message': f"Failed to copy {url} after {retry_errors + 1} attempts: {last_error}",
167
+ 'size_bytes': 0
168
+ }
169
+
170
+ async def transfer_batch(self, urls, retry_errors=3):
171
+ """Transfer multiple files concurrently"""
172
+ semaphore = asyncio.Semaphore(self.max_workers)
173
+ failed_files = []
174
+ total_bytes = 0
175
+ start_time = time.time()
176
+
177
+ # Create tasks for all transfers
178
+ tasks = [
179
+ self.transfer_single_file(semaphore, url, retry_errors)
180
+ for url in urls
181
+ ]
182
+
183
+ # Process with progress bar
184
+ with tqdm(total=len(urls), desc="Transferring files", unit="file") as pbar:
185
+ for coro in asyncio.as_completed(tasks):
186
+ result = await coro
187
+
188
+ if result['success']:
189
+ total_bytes += result.get('size_bytes', 0)
190
+ else:
191
+ failed_files.append(result)
192
+
193
+ # Update progress bar with total GB transferred
194
+ total_gb = total_bytes / (1024 ** 3)
195
+ pbar.set_postfix({'Total': f'{total_gb:.2f} GB'})
196
+
197
+ pbar.update(1)
198
+
199
+ return failed_files, total_bytes
200
+
201
+
202
+ async def async_transfer_cached_urls_to_s3(urls, s3_credentials, max_workers=4,
203
+ errors_json_filename='s3_transfer_errors.json',
204
+ retry_errors=3):
205
+ """Async version of transfer_cached_urls_to_s3"""
206
+ failed_files = []
207
+ total_bytes = 0
208
+
209
+ async with AsyncS3Transfer(s3_credentials, max_workers) as transfer:
210
+ failed_files, total_bytes = await transfer.transfer_batch(urls, retry_errors)
211
+
212
+ # Save errors to JSON if filename provided and there are errors
213
+ if errors_json_filename and failed_files:
214
+ with open(errors_json_filename, 'w') as f:
215
+ json.dump(failed_files, f, indent=2)
216
+ print(f"Saved {len(failed_files)} errors to {errors_json_filename}")
217
+
218
+ print(f"Transfer complete: {len(urls) - len(failed_files)}/{len(urls)} files successful")
219
+
220
+
221
+ def transfer_cached_urls_to_s3(urls, s3_credentials, max_workers=4, errors_json_filename='s3_transfer_errors.json', retry_errors=3):
222
+ """Wrapper to run async transfer in sync context"""
223
+ asyncio.run(async_transfer_cached_urls_to_s3(urls, s3_credentials, max_workers, errors_json_filename, retry_errors))
224
+
225
+
226
+ def s3_transfer(datamule_bucket, s3_credentials, max_workers=4, errors_json_filename='s3_transfer_errors.json', retry_errors=3,
227
+ force_daily=True, cik=None, submission_type=None, filing_date=None, datamule_api_key=None,accession_number=None):
228
+
229
+ if datamule_bucket == 'filings_sgml_r2':
230
+
231
+
232
+ if accession_number is not None:
233
+ if any(param is not None for param in [cik, submission_type, filing_date]):
234
+ raise ValueError('If accession is provided, then cik, type, and date must be None')
235
+ urls = get_filings_sgml_r2_urls(datamule_api_key=datamule_api_key,accession_number=accession_number)
236
+ transfer_cached_urls_to_s3(urls=urls, s3_credentials=s3_credentials, max_workers=max_workers, errors_json_filename=errors_json_filename, retry_errors=retry_errors)
237
+ else:
238
+ if not force_daily:
239
+ urls = get_filings_sgml_r2_urls(submission_type=submission_type, cik=cik, datamule_api_key=datamule_api_key,
240
+ filing_date=filing_date)
241
+ transfer_cached_urls_to_s3(urls=urls, s3_credentials=s3_credentials, max_workers=max_workers, errors_json_filename=errors_json_filename, retry_errors=retry_errors)
242
+ else:
243
+ if isinstance(filing_date, str):
244
+ urls = get_filings_sgml_r2_urls(submission_type=submission_type, cik=cik, datamule_api_key=datamule_api_key,
245
+ filing_date=filing_date)
246
+ transfer_cached_urls_to_s3(urls=urls, s3_credentials=s3_credentials, max_workers=max_workers, errors_json_filename=errors_json_filename, retry_errors=retry_errors)
247
+ elif isinstance(filing_date, list):
248
+ for date in filing_date:
249
+ print(f"Transferring {date}")
250
+ urls = get_filings_sgml_r2_urls(submission_type=submission_type, cik=cik, datamule_api_key=datamule_api_key,
251
+ filing_date=date)
252
+ transfer_cached_urls_to_s3(urls=urls, s3_credentials=s3_credentials, max_workers=max_workers, errors_json_filename=errors_json_filename, retry_errors=retry_errors)
253
+ elif isinstance(filing_date, tuple):
254
+ dates = generate_date_range(filing_date[0], filing_date[1])
255
+ for date in dates:
256
+ print(f"Transferring {date}")
257
+ urls = get_filings_sgml_r2_urls(submission_type=submission_type, cik=cik, datamule_api_key=datamule_api_key,
258
+ filing_date=date)
259
+ transfer_cached_urls_to_s3(urls=urls, s3_credentials=s3_credentials, max_workers=max_workers, errors_json_filename=errors_json_filename, retry_errors=retry_errors)
260
+ else:
261
+ raise ValueError('filing_date can only be string, list, or (startdt,enddt)')
262
+
263
+ else:
264
+ raise ValueError('Datamule S3 bucket not found.')
@@ -228,7 +228,7 @@ class Downloader:
228
228
  headers = {
229
229
  'Connection': 'keep-alive',
230
230
  'Accept-Encoding': 'gzip, deflate, br',
231
- 'Authorization': f'Bearer {api_key}'
231
+ #'Authorization': f'Bearer {api_key}'
232
232
  }
233
233
 
234
234
  async with session.get(url, headers=headers) as response:
@@ -7,8 +7,6 @@ from doc2dict import html2dict, visualize_dict, get_title, unnest_dict, pdf2dict
7
7
  from ..mapping_dicts.txt_mapping_dicts import dict_10k, dict_10q, dict_8k, dict_13d, dict_13g
8
8
  from ..mapping_dicts.xml_mapping_dicts import dict_345
9
9
  from ..mapping_dicts.html_mapping_dicts import *
10
- from selectolax.parser import HTMLParser
11
-
12
10
  from pathlib import Path
13
11
  import webbrowser
14
12
  from secsgml.utils import bytes_to_str
@@ -294,7 +292,6 @@ class Document:
294
292
  return bool(re.search(pattern, self.content))
295
293
  return False
296
294
 
297
- # Note: this method will be heavily modified in the future
298
295
  def parse(self):
299
296
  # check if we have already parsed the content
300
297
  if self._data:
@@ -384,6 +381,8 @@ class Document:
384
381
  dct = html2dict(content=self.content, mapping_dict=mapping_dict)
385
382
  elif self.extension in ['.txt']:
386
383
  dct = txt2dict(content=self.content, mapping_dict=mapping_dict)
384
+ elif self.extension == '.pdf':
385
+ dct = pdf2dict(content=self.content, mapping_dict=mapping_dict)
387
386
  else:
388
387
  dct = {}
389
388
 
@@ -391,10 +390,8 @@ class Document:
391
390
  elif self.extension == '.xml':
392
391
  if self.type in ['3', '4', '5', '3/A', '4/A', '5/A']:
393
392
  mapping_dict = dict_345
394
-
395
393
  self._data = xml2dict(content=self.content, mapping_dict=mapping_dict)
396
- elif self.extension == '.pdf':
397
- self._data = pdf2dict(content=self.content, mapping_dict=mapping_dict)
394
+
398
395
  else:
399
396
  pass
400
397
 
@@ -409,6 +406,12 @@ class Document:
409
406
 
410
407
  if not isinstance(self._data, DataWithTags):
411
408
  self._data = DataWithTags(self._data, self)
409
+ elif self.extension == '.xml':
410
+ if self._data is None:
411
+ self.parse()
412
+
413
+ if self._data is None:
414
+ self._data = {}
412
415
 
413
416
  return self._data
414
417
 
@@ -444,19 +447,46 @@ class Document:
444
447
  json.dump(self.data, f, indent=2)
445
448
 
446
449
  def parse_tables(self,must_exist_in_mapping=True):
447
- if self.extension != '.xml':
448
- self._tables = []
450
+ """Must exist in mapping means columns must occur in mapping schema."""
451
+ if self.extension == '.xml':
452
+ tables = Tables(document_type = self.type, accession=self.accession)
453
+ tables.parse_tables(data=self.data,must_exist_in_mapping=must_exist_in_mapping)
454
+ self._tables = tables
455
+
456
+ elif self._data_bool:
457
+ tables = Tables(document_type = self.type, accession=self.accession)
458
+ data_tuples = self.data_tuples
459
+
460
+ for i, (id, type, content, level) in enumerate(data_tuples):
461
+ if type == "table" and i > 0:
462
+ description = None
463
+
464
+ # Look at previous element
465
+ prev_id, prev_type, prev_content, prev_level = data_tuples[i-1]
466
+
467
+ # Case 1: Same level + text content
468
+ if prev_level == level and prev_type in ["text", "textsmall"]:
469
+ description = prev_content
470
+
471
+ # Case 2: Higher level (lower number) + title
472
+ elif prev_level < level and prev_type == "title":
473
+ description = prev_content
474
+
475
+ # Case 3: No matching description - add table without description
476
+ # (description remains None)
477
+
478
+ tables.add_table(data=content, description=description, name="extracted_table")
479
+
480
+ self._tables = tables
481
+
449
482
  else:
450
- # Use the property to trigger parsing if needed
451
- data = self.data
452
- tables = Tables(document_type = self.type, accession=self.accession, data=data,must_exist_in_mapping=must_exist_in_mapping)
453
- self._tables = tables.tables
483
+ self._tables = []
454
484
 
455
485
  @property
456
486
  def tables(self):
457
487
  if self._tables is None:
458
488
  self.parse_tables()
459
- return self._tables
489
+ return self._tables.tables
460
490
 
461
491
 
462
492
  def write_csv(self, output_folder):
@@ -547,6 +577,7 @@ class Document:
547
577
  webbrowser.open('file://' + temp_path)
548
578
  else:
549
579
  print(f"Cannot open files with extension {self.extension}")
580
+
550
581
  def get_section(self, title=None, title_regex=None,title_class=None, format='dict'):
551
582
  if self._data_bool:
552
583
  if not self.data:
@@ -557,3 +588,9 @@ class Document:
557
588
  return [item[1] for item in result]
558
589
  else:
559
590
  return [flatten_dict(item[1],format) for item in result]
591
+
592
+ # TODO
593
+ def get_tables(self,description_regex=None,name=None):
594
+ # make sure tables is initialized
595
+ self.tables
596
+ return self._tables.get_tables(description_regex=description_regex, name=name)
@@ -6,8 +6,10 @@ from .tables_npx import config_npx
6
6
  from .tables_sbsef import config_sbsef
7
7
  from .tables_sdr import config_sdr
8
8
  from .tables_proxyvotingrecord import config_proxyvotingrecord
9
+ from doc2dict.utils.format_dict import _format_table
9
10
 
10
11
  from .utils import safe_get, flatten_dict
12
+ import re
11
13
  # will add filing date param later? or extension
12
14
  all_tables_dict = {
13
15
  '3' : config_ownership,
@@ -93,25 +95,30 @@ def apply_mapping(flattened_data, mapping_dict, accession, must_exist_in_mapping
93
95
 
94
96
  # should have table type, accession, data
95
97
  class Table:
96
- def __init__(self,data,name,accession):
98
+ def __init__(self,data,name,accession,description = None):
97
99
  self.data = data
98
100
  self.name = name
99
101
  self.accession = accession
102
+ self.description = description
103
+
104
+ # TODO MADE IN A HURRY #
105
+ def __str__(self):
106
+ formatted_table = _format_table(self.data)
107
+ if isinstance(formatted_table, list):
108
+ table_str = '\n'.join(formatted_table)
109
+ else:
110
+ table_str = str(formatted_table)
111
+ return f"Table '{self.name}' ({self.accession}) - {len(self.data) if isinstance(self.data, list) else 'N/A'} rows\ndescription: {self.description if self.description else ''}\n{table_str}"
100
112
 
101
113
 
102
114
  class Tables():
103
- def __init__(self,document_type,accession,data,must_exist_in_mapping=True):
115
+ def __init__(self,document_type,accession):
104
116
  self.document_type = document_type
105
117
  self.accession = accession
106
- self.data = data
107
-
108
- # to fill in
109
118
  self.tables = []
110
119
 
111
- self.parse_tables(must_exist_in_mapping=must_exist_in_mapping)
112
-
113
- def parse_tables(self,must_exist_in_mapping=True):
114
- # first select dict
120
+ def parse_tables(self,data,must_exist_in_mapping=True):
121
+ self.data = data
115
122
 
116
123
  try:
117
124
  tables_dict = all_tables_dict[self.document_type]
@@ -120,11 +127,32 @@ class Tables():
120
127
 
121
128
  # now get the dicts from the data
122
129
  data_dicts = seperate_data(tables_dict,self.data)
123
-
130
+
124
131
  # now flatten
125
132
  data_dicts = [(x,flatten_dict(y)) for x,y in data_dicts]
126
133
 
127
134
  for table_name, flattened_data in data_dicts:
128
135
  mapping_dict = tables_dict[table_name]['mapping']
129
136
  mapped_data = apply_mapping(flattened_data, mapping_dict, self.accession,must_exist_in_mapping)
130
- self.tables.append(Table(mapped_data, table_name, self.accession))
137
+ self.tables.append(Table(mapped_data, table_name, self.accession))
138
+
139
+ def add_table(self,data,name,description=None):
140
+ self.tables.append(Table(data=data,name=name,accession=self.accession,description=description))
141
+
142
+ def get_tables(self, description_regex=None, name=None):
143
+ matching_tables = []
144
+
145
+ for table in self.tables:
146
+ # Check name match (exact match)
147
+ if name is not None:
148
+ if table.name == name:
149
+ matching_tables.append(table)
150
+ continue
151
+
152
+ # Check description regex match
153
+ if description_regex is not None and table.description is not None:
154
+ if re.search(description_regex, table.description):
155
+ matching_tables.append(table)
156
+ continue
157
+
158
+ return matching_tables
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 2.2.9
3
+ Version: 2.3.2
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -20,4 +20,5 @@ Requires-Dist: secsgml
20
20
  Requires-Dist: websocket-client
21
21
  Requires-Dist: company-fundamentals
22
22
  Requires-Dist: flashtext
23
+ Requires-Dist: aioboto3
23
24
 
@@ -1,4 +1,4 @@
1
- datamule/__init__.py,sha256=sY9rYx9z4LADjOLmwjL3BXssIzHs8MQM6gt9IWMS85U,1192
1
+ datamule/__init__.py,sha256=gsWTW0emwGtM-KVtwe2OICVmW7ImvLvP0SORULTPe-Y,1220
2
2
  datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
3
3
  datamule/datasets.py,sha256=-2_5kTRS3mxlkKbXwBg8aiistYljLYRnZjDLZNhV8bk,1867
4
4
  datamule/helper.py,sha256=KqhAmTMdvATEh3I-O4xLcAcrHB9zXQERBuwzue7zyQw,3674
@@ -8,16 +8,19 @@ datamule/portfolio.py,sha256=0-E1ZSEjJ8hba7HxF8oCrRneNuF_KKISOY6K4dRg0Cg,12282
8
8
  datamule/portfolio_compression_utils.py,sha256=8OPYEN5zAdV1FiTxgVN3S7cTKs99Elv74bwgoIJP4QY,12654
9
9
  datamule/sheet.py,sha256=KD7yAgSB8BE-Z4GDuH58IV-2DJ673nMcEsrCyJbeYp8,10707
10
10
  datamule/submission.py,sha256=phHmi9ScjWHtVLjEoEdAO7RieUSKN5gPr0onfg5R8wE,16139
11
+ datamule/book/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ datamule/book/book.py,sha256=Vw33JHhmulNDWRN2AQpUQrf8wgVqqUYg5QJgbKhBNak,773
13
+ datamule/book/s3transfer.py,sha256=4Zpw5daAH05u1dppv2ARXG_VSBIdsHnlEWC9xZgBfZM,12590
11
14
  datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
12
15
  datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
16
  datamule/datamule/datamule_lookup.py,sha256=e8djAg-ctSyHiKk7BjbtgugZ3p8roUjzsym5z3AihUg,9468
14
17
  datamule/datamule/datamule_mysql_rds.py,sha256=Q6_h24-SNECWK60RnM6UQjUIp5dhJmfn3SSKzTITB3o,12317
15
- datamule/datamule/downloader.py,sha256=B22ULAuYzclxxVCH4DsLWUIyFUC5Iep-Hl1W3RgCfeg,18580
18
+ datamule/datamule/downloader.py,sha256=Ss9mz0Jf5UAd-CZJ6oO96o9hN04xMQIF3-e1wahokdM,18581
16
19
  datamule/datamule/sec_connector.py,sha256=VwOaODpHoAWy8JIky6kLR1-orW_PB61RHw7pIGRpkow,3288
17
20
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- datamule/document/document.py,sha256=GceuC8estrVkUzMMbcxjtSa3xga_gj0wzledpH2-VMA,21589
21
+ datamule/document/document.py,sha256=NrMqhY_u_X7gyvraxY0hzZEDJddqSJDgiHFzkaRTBVA,23102
19
22
  datamule/document/tables/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- datamule/document/tables/tables.py,sha256=8riSAof6o-Gxoo0SkiQAE61fw8NmzDnEhJe6dATzmvA,4487
23
+ datamule/document/tables/tables.py,sha256=uEMDYg7c4iHjVtIjNQgCgZOGp6j9aFWVB05agpVsNOI,5727
21
24
  datamule/document/tables/tables_13fhr.py,sha256=-6tWcaTyNsb0XuW0WMBrYir9Zn1wLZL0laKxRYfPNyg,4265
22
25
  datamule/document/tables/tables_25nse.py,sha256=kpoOcIpra6i3Wx_6pUCj1fkx0wUbMhx7pc8yUkrBJb4,980
23
26
  datamule/document/tables/tables_informationtable.py,sha256=3yjuxYuLoBjRd6O0BNd0jQDmS1XUDjA6xp51Csq2cH8,649
@@ -58,7 +61,7 @@ datamule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
61
  datamule/utils/construct_submissions_data.py,sha256=NB_hvfxlRXPyt4Fgc-5qA8vJRItkLhBedCSTaxwW7Jg,5887
59
62
  datamule/utils/format_accession.py,sha256=60RtqoNqoT9zSKVb1DeOv1gncJxzPTFMNW4SNOVmC_g,476
60
63
  datamule/utils/pdf.py,sha256=Z9xrdVhKex2YdvjYsaPaygRE_J6P_JNiUGkwflz2Hw0,735
61
- datamule-2.2.9.dist-info/METADATA,sha256=6X1QFR5xzzjUf_cDGlXOqnbTblURDLfkwCAZyXr3Z9E,585
62
- datamule-2.2.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
63
- datamule-2.2.9.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
64
- datamule-2.2.9.dist-info/RECORD,,
64
+ datamule-2.3.2.dist-info/METADATA,sha256=Mn-oEWDcCGepxr663ugvpQibjGbDsYzlqg5CsP1Rgvs,609
65
+ datamule-2.3.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
66
+ datamule-2.3.2.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
67
+ datamule-2.3.2.dist-info/RECORD,,