datamule 1.7.0__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/__init__.py +1 -1
- datamule/seclibrary/datamule_lookup.py +236 -0
- datamule/seclibrary/downloader.py +35 -31
- datamule/sheet.py +9 -0
- {datamule-1.7.0.dist-info → datamule-1.8.0.dist-info}/METADATA +1 -1
- {datamule-1.7.0.dist-info → datamule-1.8.0.dist-info}/RECORD +8 -10
- datamule/document/mappings_new/__init__.py +0 -0
- datamule/document/mappings_new/mappings.py +0 -13
- datamule/document/mappings_new/ownership.py +0 -174
- {datamule-1.7.0.dist-info → datamule-1.8.0.dist-info}/WHEEL +0 -0
- {datamule-1.7.0.dist-info → datamule-1.8.0.dist-info}/top_level.txt +0 -0
datamule/__init__.py
CHANGED
@@ -7,7 +7,7 @@ from .sheet import Sheet
|
|
7
7
|
from .index import Index
|
8
8
|
from .package_updater import PackageUpdater
|
9
9
|
from .utils.format_accession import format_accession
|
10
|
-
|
10
|
+
from .utils.construct_submissions_data import construct_submissions_data
|
11
11
|
|
12
12
|
|
13
13
|
# Keep the notebook environment setup
|
@@ -0,0 +1,236 @@
|
|
1
|
+
import os
|
2
|
+
import asyncio
|
3
|
+
import aiohttp
|
4
|
+
import urllib.parse
|
5
|
+
import ssl
|
6
|
+
import json
|
7
|
+
import time
|
8
|
+
from tqdm import tqdm
|
9
|
+
|
10
|
+
class DatamuleLookup:
|
11
|
+
def __init__(self, api_key=None):
|
12
|
+
self.API_BASE_URL = "https://datamule-lookup.jgfriedman99.workers.dev/"
|
13
|
+
self._api_key = api_key
|
14
|
+
self.total_cost = 0
|
15
|
+
self.remaining_balance = None
|
16
|
+
self.start_time = None
|
17
|
+
|
18
|
+
@property
|
19
|
+
def api_key(self):
|
20
|
+
return getattr(self, '_api_key', None) or os.getenv('DATAMULE_API_KEY')
|
21
|
+
|
22
|
+
@api_key.setter
|
23
|
+
def api_key(self, value):
|
24
|
+
if not value:
|
25
|
+
raise ValueError("API key cannot be empty")
|
26
|
+
self._api_key = value
|
27
|
+
|
28
|
+
async def _fetch_page(self, session, cik=None, accession_number=None, submission_type=None,
|
29
|
+
filing_date=None, columns=None, distinct=False, page=1, page_size=25000):
|
30
|
+
params = {
|
31
|
+
'api_key': self.api_key,
|
32
|
+
'page': page,
|
33
|
+
'pageSize': page_size
|
34
|
+
}
|
35
|
+
|
36
|
+
# Handle CIK parameter
|
37
|
+
if cik:
|
38
|
+
if isinstance(cik, list):
|
39
|
+
params['cik'] = ','.join(str(x) for x in cik)
|
40
|
+
else:
|
41
|
+
params['cik'] = str(cik)
|
42
|
+
|
43
|
+
# Handle accession number parameter
|
44
|
+
if accession_number:
|
45
|
+
if isinstance(accession_number, list):
|
46
|
+
params['accessionNumber'] = ','.join(str(x) for x in accession_number)
|
47
|
+
else:
|
48
|
+
params['accessionNumber'] = str(accession_number)
|
49
|
+
|
50
|
+
# Handle submission_type parameter
|
51
|
+
if submission_type:
|
52
|
+
if isinstance(submission_type, list):
|
53
|
+
params['submissionType'] = ','.join(str(x) for x in submission_type)
|
54
|
+
else:
|
55
|
+
params['submissionType'] = str(submission_type)
|
56
|
+
|
57
|
+
# Handle filing_date parameter
|
58
|
+
if filing_date:
|
59
|
+
if isinstance(filing_date, tuple):
|
60
|
+
params['startDate'] = str(filing_date[0])
|
61
|
+
params['endDate'] = str(filing_date[1])
|
62
|
+
else:
|
63
|
+
if isinstance(filing_date, list):
|
64
|
+
params['filingDate'] = ','.join(str(x) for x in filing_date)
|
65
|
+
else:
|
66
|
+
params['filingDate'] = str(filing_date)
|
67
|
+
|
68
|
+
# Handle columns parameter
|
69
|
+
if columns:
|
70
|
+
if isinstance(columns, list):
|
71
|
+
params['columns'] = ','.join(columns)
|
72
|
+
else:
|
73
|
+
params['columns'] = str(columns)
|
74
|
+
|
75
|
+
# Handle distinct parameter
|
76
|
+
if distinct:
|
77
|
+
params['distinct'] = 'true'
|
78
|
+
|
79
|
+
url = f"{self.API_BASE_URL}?{urllib.parse.urlencode(params)}"
|
80
|
+
|
81
|
+
async with session.get(url) as response:
|
82
|
+
data = await response.json()
|
83
|
+
if not data.get('success'):
|
84
|
+
raise ValueError(f"API request failed: {data.get('error')}")
|
85
|
+
|
86
|
+
# Track costs and balance
|
87
|
+
billing = data['metadata']['billing']
|
88
|
+
page_cost = billing['total_charge']
|
89
|
+
self.total_cost += page_cost
|
90
|
+
self.remaining_balance = billing['remaining_balance']
|
91
|
+
|
92
|
+
return data['data'], data['metadata']['pagination'], page_cost
|
93
|
+
|
94
|
+
async def execute_query(self, cik=None, accession_number=None, submission_type=None,
|
95
|
+
filing_date=None, columns=None, distinct=False, page_size=25000, quiet=False):
|
96
|
+
if self.api_key is None:
|
97
|
+
raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key in constructor")
|
98
|
+
|
99
|
+
self.start_time = time.time()
|
100
|
+
total_items = 0
|
101
|
+
pages_processed = 0
|
102
|
+
|
103
|
+
# Display query parameters
|
104
|
+
query_desc = []
|
105
|
+
if cik:
|
106
|
+
query_desc.append(f"CIK={cik}")
|
107
|
+
if accession_number:
|
108
|
+
query_desc.append(f"Accession={accession_number}")
|
109
|
+
if submission_type:
|
110
|
+
query_desc.append(f"Type={submission_type}")
|
111
|
+
if filing_date:
|
112
|
+
if isinstance(filing_date, tuple):
|
113
|
+
query_desc.append(f"Date={filing_date[0]} to {filing_date[1]}")
|
114
|
+
else:
|
115
|
+
query_desc.append(f"Date={filing_date}")
|
116
|
+
if columns:
|
117
|
+
query_desc.append(f"Columns={columns}")
|
118
|
+
if distinct:
|
119
|
+
query_desc.append("DISTINCT=True")
|
120
|
+
|
121
|
+
if query_desc and not quiet:
|
122
|
+
print(f"QUERY: {', '.join(query_desc)}")
|
123
|
+
|
124
|
+
connector = aiohttp.TCPConnector(ssl=ssl.create_default_context())
|
125
|
+
async with aiohttp.ClientSession(connector=connector) as session:
|
126
|
+
# Initialize progress bar only if not quiet
|
127
|
+
if not quiet:
|
128
|
+
pbar = tqdm(unit="page", bar_format="{desc}: {n_fmt} {unit} [{elapsed}<{remaining}, {rate_fmt}{postfix}]")
|
129
|
+
pbar.set_description("Fetching data")
|
130
|
+
|
131
|
+
current_page = 1
|
132
|
+
has_more = True
|
133
|
+
results = []
|
134
|
+
|
135
|
+
while has_more:
|
136
|
+
# Fetch page
|
137
|
+
page_results, pagination, page_cost = await self._fetch_page(
|
138
|
+
session,
|
139
|
+
cik=cik,
|
140
|
+
accession_number=accession_number,
|
141
|
+
submission_type=submission_type,
|
142
|
+
filing_date=filing_date,
|
143
|
+
columns=columns,
|
144
|
+
distinct=distinct,
|
145
|
+
page=current_page,
|
146
|
+
page_size=page_size
|
147
|
+
)
|
148
|
+
|
149
|
+
# Accumulate results
|
150
|
+
results.extend(page_results)
|
151
|
+
|
152
|
+
pages_processed += 1
|
153
|
+
total_items += len(page_results)
|
154
|
+
|
155
|
+
# Update progress bar only if not quiet
|
156
|
+
if not quiet:
|
157
|
+
pbar.set_description(f"Fetching data (page {current_page})")
|
158
|
+
pbar.set_postfix_str(f"cost=${self.total_cost:.4f} | balance=${self.remaining_balance:.2f}")
|
159
|
+
pbar.update(1)
|
160
|
+
|
161
|
+
# Check if we need to fetch more pages
|
162
|
+
has_more = pagination.get('hasMore', False)
|
163
|
+
current_page += 1
|
164
|
+
|
165
|
+
# For the first page, display record info only if not quiet
|
166
|
+
if pages_processed == 1 and not quiet:
|
167
|
+
records_per_page = pagination.get('currentPageRecords', len(page_results))
|
168
|
+
if records_per_page > 0:
|
169
|
+
pbar.write(f"Retrieved {records_per_page} records (page 1) - Fetching additional pages...")
|
170
|
+
else:
|
171
|
+
pbar.write("No records found matching criteria")
|
172
|
+
break
|
173
|
+
|
174
|
+
if not quiet:
|
175
|
+
pbar.close()
|
176
|
+
|
177
|
+
# Final summary only if not quiet
|
178
|
+
if not quiet:
|
179
|
+
elapsed_time = time.time() - self.start_time
|
180
|
+
print("\nQuery complete:")
|
181
|
+
print(f"- Retrieved {total_items} records across {pages_processed} pages")
|
182
|
+
print(f"- Total cost: ${self.total_cost:.4f}")
|
183
|
+
print(f"- Remaining balance: ${self.remaining_balance:.2f}")
|
184
|
+
print(f"- Time: {elapsed_time:.1f} seconds")
|
185
|
+
|
186
|
+
return results
|
187
|
+
|
188
|
+
|
189
|
+
def datamule_lookup(cik=None, accession_number=None, submission_type=None, filing_date=None,
|
190
|
+
columns=None, distinct=False, page_size=25000, quiet=False, api_key=None):
|
191
|
+
"""
|
192
|
+
Query SEC filing data from Datamule with optional filtering
|
193
|
+
|
194
|
+
Parameters:
|
195
|
+
- cik: Company CIK number(s), can be string, int, or list
|
196
|
+
- accession_number: Accession number(s), can be string or list
|
197
|
+
- submission_type: Filing type(s), can be string or list (e.g., '10-K', ['10-K', '10-Q'])
|
198
|
+
- filing_date: Filing date(s), can be string, list, or tuple of (start_date, end_date)
|
199
|
+
- columns: Column(s) to return, can be string or list. Options: 'accessionNumber', 'cik', 'filingDate', 'submissionType'
|
200
|
+
- distinct: Boolean, whether to return distinct results only
|
201
|
+
- page_size: Number of records per page (max 25000)
|
202
|
+
- quiet: Boolean, whether to suppress progress output and summary
|
203
|
+
- api_key: Optional API key (can also use DATAMULE_API_KEY environment variable)
|
204
|
+
|
205
|
+
Returns:
|
206
|
+
- List of dictionaries containing the requested data (ready for pandas DataFrame)
|
207
|
+
"""
|
208
|
+
# Create a DatamuleLookup instance for this request
|
209
|
+
dl = DatamuleLookup(api_key=api_key)
|
210
|
+
|
211
|
+
# Format dates by removing dashes if present
|
212
|
+
if isinstance(filing_date, tuple):
|
213
|
+
filing_date = (filing_date[0].replace('-', ''), filing_date[1].replace('-', ''))
|
214
|
+
elif isinstance(filing_date, str):
|
215
|
+
filing_date = filing_date.replace('-', '')
|
216
|
+
elif isinstance(filing_date, list):
|
217
|
+
filing_date = [x.replace('-', '') for x in filing_date]
|
218
|
+
|
219
|
+
# Set default columns if none specified
|
220
|
+
if columns is None:
|
221
|
+
columns = ['accessionNumber', 'cik', 'filingDate', 'submissionType']
|
222
|
+
|
223
|
+
# Validate page_size
|
224
|
+
page_size = min(max(1, page_size), 25000)
|
225
|
+
|
226
|
+
# Run the query and return results
|
227
|
+
return asyncio.run(dl.execute_query(
|
228
|
+
cik=cik,
|
229
|
+
accession_number=accession_number,
|
230
|
+
submission_type=submission_type,
|
231
|
+
filing_date=filing_date,
|
232
|
+
columns=columns,
|
233
|
+
distinct=distinct,
|
234
|
+
page_size=page_size,
|
235
|
+
quiet=quiet
|
236
|
+
))
|
@@ -9,20 +9,24 @@ import zstandard as zstd
|
|
9
9
|
import io
|
10
10
|
import json
|
11
11
|
import tarfile
|
12
|
+
import logging
|
12
13
|
from concurrent.futures import ThreadPoolExecutor
|
13
14
|
from functools import partial
|
14
|
-
from queue import Queue
|
15
|
+
from queue import Queue
|
15
16
|
from threading import Thread, Lock
|
16
|
-
from .query import query
|
17
17
|
from os import cpu_count
|
18
18
|
from secsgml import parse_sgml_content_into_memory
|
19
19
|
from secsgml.utils import bytes_to_str
|
20
|
+
from .datamule_lookup import datamule_lookup
|
20
21
|
|
22
|
+
# Set up logging
|
23
|
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
24
|
+
logger = logging.getLogger(__name__)
|
21
25
|
|
22
26
|
|
23
27
|
class Downloader:
|
24
28
|
def __init__(self, api_key=None):
|
25
|
-
self.BASE_URL = "https://library.datamule.xyz/
|
29
|
+
self.BASE_URL = "https://sec-library.datamule.xyz/"
|
26
30
|
self.CHUNK_SIZE = 2 * 1024 * 1024
|
27
31
|
self.MAX_CONCURRENT_DOWNLOADS = 100
|
28
32
|
self.MAX_DECOMPRESSION_WORKERS = cpu_count()
|
@@ -66,7 +70,7 @@ class Downloader:
|
|
66
70
|
with open(error_file, 'w') as f:
|
67
71
|
json.dump(errors, f, indent=2)
|
68
72
|
except Exception as e:
|
69
|
-
|
73
|
+
logger.error(f"Failed to log error to {error_file}: {str(e)}")
|
70
74
|
|
71
75
|
class TarManager:
|
72
76
|
def __init__(self, output_dir, num_tar_files, max_batch_size=1024*1024*1024):
|
@@ -81,7 +85,7 @@ class Downloader:
|
|
81
85
|
|
82
86
|
for i in range(num_tar_files):
|
83
87
|
tar_path = os.path.join(output_dir, f'batch_{i:03d}_001.tar')
|
84
|
-
self.tar_files[i] = tarfile.open(tar_path, '
|
88
|
+
self.tar_files[i] = tarfile.open(tar_path, 'a')
|
85
89
|
self.tar_locks[i] = Lock()
|
86
90
|
self.file_counters[i] = 0
|
87
91
|
self.tar_sizes[i] = 0
|
@@ -105,7 +109,7 @@ class Downloader:
|
|
105
109
|
|
106
110
|
self.tar_sequences[tar_index] += 1
|
107
111
|
new_tar_path = os.path.join(self.output_dir, f'batch_{tar_index:03d}_{self.tar_sequences[tar_index]:03d}.tar')
|
108
|
-
self.tar_files[tar_index] = tarfile.open(new_tar_path, '
|
112
|
+
self.tar_files[tar_index] = tarfile.open(new_tar_path, 'a')
|
109
113
|
self.file_counters[tar_index] = 0
|
110
114
|
self.tar_sizes[tar_index] = 0
|
111
115
|
|
@@ -127,7 +131,7 @@ class Downloader:
|
|
127
131
|
return True
|
128
132
|
|
129
133
|
except Exception as e:
|
130
|
-
|
134
|
+
logger.error(f"Error writing {filename} to tar {tar_index}: {str(e)}")
|
131
135
|
return False
|
132
136
|
|
133
137
|
def _get_document_name(self, metadata, file_num, standardize_metadata):
|
@@ -153,7 +157,7 @@ class Downloader:
|
|
153
157
|
try:
|
154
158
|
tar.close()
|
155
159
|
except Exception as e:
|
156
|
-
|
160
|
+
logger.error(f"Error closing tar {i}: {str(e)}")
|
157
161
|
|
158
162
|
def decompress_and_parse_and_write(self, compressed_chunks, filename, keep_document_types, keep_filtered_metadata, standardize_metadata, tar_manager, output_dir):
|
159
163
|
dctx = zstd.ZstdDecompressor()
|
@@ -221,17 +225,21 @@ class Downloader:
|
|
221
225
|
}
|
222
226
|
|
223
227
|
async with session.get(url, headers=headers) as response:
|
228
|
+
content_type = response.headers.get('Content-Type', '')
|
229
|
+
|
224
230
|
if response.status == 200:
|
225
231
|
async for chunk in response.content.iter_chunked(self.CHUNK_SIZE):
|
226
232
|
chunks.append(chunk)
|
227
233
|
|
228
234
|
loop = asyncio.get_running_loop()
|
229
|
-
if
|
235
|
+
if content_type == 'application/zstd':
|
236
|
+
logger.debug(f"Processing {filename} as compressed (zstd)")
|
230
237
|
success = await loop.run_in_executor(
|
231
238
|
decompression_pool,
|
232
239
|
partial(self.decompress_and_parse_and_write, chunks, filename, keep_document_types, keep_filtered_metadata, standardize_metadata, tar_manager, output_dir)
|
233
240
|
)
|
234
241
|
else:
|
242
|
+
logger.debug(f"Processing {filename} as uncompressed")
|
235
243
|
success = await loop.run_in_executor(
|
236
244
|
decompression_pool,
|
237
245
|
partial(self.parse_and_write_regular_file, chunks, filename, keep_document_types, keep_filtered_metadata, standardize_metadata, tar_manager, output_dir)
|
@@ -293,32 +301,27 @@ class Downloader:
|
|
293
301
|
if self.api_key is None:
|
294
302
|
raise ValueError("No API key found. Please set DATAMULE_API_KEY environment variable or provide api_key in constructor")
|
295
303
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
filing_date=filing_date,
|
301
|
-
api_key=self.api_key
|
302
|
-
)
|
304
|
+
logger.debug("Querying SEC filings...")
|
305
|
+
|
306
|
+
filings = datamule_lookup(cik=cik, submission_type=submission_type, filing_date=filing_date,
|
307
|
+
columns=['accessionNumber'], distinct=True, page_size=25000, quiet=False)
|
303
308
|
|
304
309
|
if accession_numbers:
|
305
310
|
accession_numbers = [str(int(item.replace('-',''))) for item in accession_numbers]
|
306
|
-
filings = [filing for filing in filings if filing['
|
311
|
+
filings = [filing for filing in filings if filing['accessionNumber'] in accession_numbers]
|
307
312
|
|
308
313
|
if skip_accession_numbers:
|
309
314
|
skip_accession_numbers = [int(item.replace('-','')) for item in skip_accession_numbers]
|
310
|
-
filings = [filing for filing in filings if filing['
|
315
|
+
filings = [filing for filing in filings if filing['accessionNumber'] not in skip_accession_numbers]
|
311
316
|
|
312
|
-
|
317
|
+
logger.debug(f"Generating URLs for {len(filings)} filings...")
|
313
318
|
urls = []
|
314
319
|
for item in filings:
|
315
|
-
url = f"{self.BASE_URL}{str(item['
|
316
|
-
if item['compressed'] == True or item['compressed'] == 'true' or item['compressed'] == 'True':
|
317
|
-
url += '.zst'
|
320
|
+
url = f"{self.BASE_URL}{str(item['accessionNumber']).zfill(18)}.sgml"
|
318
321
|
urls.append(url)
|
319
322
|
|
320
323
|
if not urls:
|
321
|
-
|
324
|
+
logger.warning("No submissions found matching the criteria")
|
322
325
|
return
|
323
326
|
|
324
327
|
urls = list(set(urls))
|
@@ -328,8 +331,8 @@ class Downloader:
|
|
328
331
|
asyncio.run(self.process_batch(urls, output_dir, keep_document_types=keep_document_types, keep_filtered_metadata=keep_filtered_metadata, standardize_metadata=standardize_metadata, max_batch_size=max_batch_size))
|
329
332
|
|
330
333
|
elapsed_time = time.time() - start_time
|
331
|
-
|
332
|
-
|
334
|
+
logger.debug(f"Processing completed in {elapsed_time:.2f} seconds")
|
335
|
+
logger.debug(f"Processing speed: {len(urls)/elapsed_time:.2f} files/second")
|
333
336
|
|
334
337
|
def __del__(self):
|
335
338
|
if hasattr(self, 'loop') and self.loop.is_running():
|
@@ -348,10 +351,10 @@ class Downloader:
|
|
348
351
|
for filename in filenames:
|
349
352
|
if not isinstance(filename, str):
|
350
353
|
raise ValueError(f"Invalid filename type: {type(filename)}. Expected string.")
|
351
|
-
if not
|
352
|
-
raise ValueError(f"Invalid filename format: {filename}. Expected .sgml
|
354
|
+
if not filename.endswith('.sgml'):
|
355
|
+
raise ValueError(f"Invalid filename format: {filename}. Expected .sgml extension.")
|
353
356
|
|
354
|
-
|
357
|
+
logger.debug(f"Generating URLs for {len(filenames)} files...")
|
355
358
|
urls = []
|
356
359
|
for filename in filenames:
|
357
360
|
url = f"{self.BASE_URL}{filename}"
|
@@ -360,7 +363,7 @@ class Downloader:
|
|
360
363
|
seen = set()
|
361
364
|
urls = [url for url in urls if not (url in seen or seen.add(url))]
|
362
365
|
|
363
|
-
|
366
|
+
logger.debug(f"Downloading {len(urls)} files...")
|
364
367
|
|
365
368
|
start_time = time.time()
|
366
369
|
|
@@ -374,12 +377,13 @@ class Downloader:
|
|
374
377
|
))
|
375
378
|
|
376
379
|
elapsed_time = time.time() - start_time
|
377
|
-
|
378
|
-
|
380
|
+
logger.debug(f"Processing completed in {elapsed_time:.2f} seconds")
|
381
|
+
logger.debug(f"Processing speed: {len(urls)/elapsed_time:.2f} files/second")
|
379
382
|
|
380
383
|
|
381
384
|
def download(submission_type=None, cik=None, filing_date=None, api_key=None, output_dir="downloads", accession_numbers=None, keep_document_types=[],keep_filtered_metadata=False,standardize_metadata=True,
|
382
385
|
skip_accession_numbers=[], max_batch_size=1024*1024*1024):
|
386
|
+
|
383
387
|
if accession_numbers:
|
384
388
|
accession_numbers = [int(str(x).replace('-', '')) for x in accession_numbers]
|
385
389
|
elif accession_numbers == []:
|
datamule/sheet.py
CHANGED
@@ -3,12 +3,21 @@ import csv
|
|
3
3
|
import os
|
4
4
|
from .helper import _process_cik_and_metadata_filters, load_package_dataset
|
5
5
|
from .sec.xbrl.downloadcompanyfacts import download_company_facts
|
6
|
+
from .seclibrary.datamule_lookup import datamule_lookup
|
7
|
+
|
8
|
+
# slated for deprecation?
|
6
9
|
from .seclibrary.bq import get_information_table, get_345, get_proxy_voting_record
|
7
10
|
|
8
11
|
class Sheet:
|
9
12
|
def __init__(self, path):
|
10
13
|
self.path = Path(path)
|
11
14
|
|
15
|
+
def get_submissions(self,cik=None, accession_number=None, submission_type=None, filing_date=None,
|
16
|
+
columns=None, distinct=False, page_size=25000, quiet=False, api_key=None):
|
17
|
+
|
18
|
+
return datamule_lookup(cik, accession_number, submission_type, filing_date,
|
19
|
+
columns, distinct, page_size, quiet, api_key)
|
20
|
+
|
12
21
|
def download_xbrl(
|
13
22
|
self,
|
14
23
|
cik=None,
|
@@ -1,11 +1,11 @@
|
|
1
|
-
datamule/__init__.py,sha256=
|
1
|
+
datamule/__init__.py,sha256=sY9rYx9z4LADjOLmwjL3BXssIzHs8MQM6gt9IWMS85U,1192
|
2
2
|
datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
|
3
3
|
datamule/helper.py,sha256=KqhAmTMdvATEh3I-O4xLcAcrHB9zXQERBuwzue7zyQw,3674
|
4
4
|
datamule/index.py,sha256=Rrcna9FJV-Oh_K6O2IuUEIDmtay_7UZ4l4jgKCi7A7I,2079
|
5
5
|
datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
|
6
6
|
datamule/portfolio.py,sha256=tADqQMkFaFyjanbJ0QcaOHGdJJB254rOg29FW7a13l0,11835
|
7
7
|
datamule/portfolio_compression_utils.py,sha256=8OPYEN5zAdV1FiTxgVN3S7cTKs99Elv74bwgoIJP4QY,12654
|
8
|
-
datamule/sheet.py,sha256=
|
8
|
+
datamule/sheet.py,sha256=V5iR9_LkuwTFxfHCfzgadO6qgB6qOhzWiCAED-y8ZJQ,22744
|
9
9
|
datamule/submission.py,sha256=ooLsesZ5HkgSWyEFID4u08CobTxdo35eAUHSCB6fw2k,10332
|
10
10
|
datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
|
11
11
|
datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -38,9 +38,6 @@ datamule/document/mappings/ta.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
38
38
|
datamule/document/mappings/thirteenfhr.py,sha256=XpYRIMPZnGLfEE4TqBI0BPXbyuq0xf3hut1fePOF6kU,4250
|
39
39
|
datamule/document/mappings/twentyfivense.py,sha256=lKyj0ZBhkHX9gQJMTUPrQlxYFg3k-aBnWqtoS5bujZM,905
|
40
40
|
datamule/document/mappings/twentyfourf2nt.py,sha256=Q7RPT3JgJHjYdjMuaSyAxclt6QPT_LgCQloxp-ByDuI,4118
|
41
|
-
datamule/document/mappings_new/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
|
-
datamule/document/mappings_new/mappings.py,sha256=sP94GK3-klMCTD6XFajAP9KxJ7Wq5YMMaXcHx1rQEKA,281
|
43
|
-
datamule/document/mappings_new/ownership.py,sha256=GVtyROefvEC_X5l6kayvZv57-kHxj8bHckAru8JtFOQ,10656
|
44
41
|
datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
42
|
datamule/mapping_dicts/html_mapping_dicts.py,sha256=G2PWB__FNg4VH9iFJFkflM0u-qOEtk67IWtGoqesb0k,5388
|
46
43
|
datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
|
@@ -62,12 +59,13 @@ datamule/sec/xbrl/streamcompanyfacts.py,sha256=Qq88PqW5_j1k3Aqrl0KRmKeF54D6Wbb6H
|
|
62
59
|
datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
|
63
60
|
datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
61
|
datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
|
65
|
-
datamule/seclibrary/
|
62
|
+
datamule/seclibrary/datamule_lookup.py,sha256=_opEh-DRY3ZBXFbuE2Ua_aRwoc1IsV-cPSWK0c61ofY,9465
|
63
|
+
datamule/seclibrary/downloader.py,sha256=6cPPddjXekOwlzsyratUqzpCSbvdaNyRCGjQXUtVoJU,17930
|
66
64
|
datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
|
67
65
|
datamule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
68
66
|
datamule/utils/construct_submissions_data.py,sha256=aX7ZaAp3zXHLcv4TFk_rGwjb8r7yNDQDFVg4nPf60kM,5934
|
69
67
|
datamule/utils/format_accession.py,sha256=60RtqoNqoT9zSKVb1DeOv1gncJxzPTFMNW4SNOVmC_g,476
|
70
|
-
datamule-1.
|
71
|
-
datamule-1.
|
72
|
-
datamule-1.
|
73
|
-
datamule-1.
|
68
|
+
datamule-1.8.0.dist-info/METADATA,sha256=QGhncvN1wEhQ3m9mrqZ2-jrb7LZ9BwJAzhCp72CEUWc,524
|
69
|
+
datamule-1.8.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
70
|
+
datamule-1.8.0.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
71
|
+
datamule-1.8.0.dist-info/RECORD,,
|
File without changes
|
@@ -1,13 +0,0 @@
|
|
1
|
-
import ownership
|
2
|
-
|
3
|
-
|
4
|
-
# key is document type
|
5
|
-
# note: this assumes XML format.
|
6
|
-
table_mappings = {
|
7
|
-
'3' : ownership.mappings,
|
8
|
-
'3/A' : ownership.mappings,
|
9
|
-
'4' : ownership.mappings,
|
10
|
-
'4/A' : ownership.mappings,
|
11
|
-
'5' : ownership.mappings,
|
12
|
-
'5/A' : ownership.mappings,
|
13
|
-
}
|
@@ -1,174 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
# Non-derivative transaction ownership mapping
|
5
|
-
ownership_non_derivative_transactions_dict = {
|
6
|
-
'securityTitle_value': 'securityTitle',
|
7
|
-
'securityTitle_footnote': 'securityTitleFootnote',
|
8
|
-
'transactionDate_value': 'transactionDate',
|
9
|
-
'transactionDate_footnote': 'transactionDateFootnote',
|
10
|
-
'deemedExecutionDate_value': 'deemedExecutionDate',
|
11
|
-
'deemedExecutionDate_footnote': 'deemedExecutionDateFootnote',
|
12
|
-
'transactionCoding_transactionFormType': 'transactionFormType',
|
13
|
-
'transactionCoding_transactionCode': 'transactionCode',
|
14
|
-
'transactionCoding_equitySwapInvolved': 'equitySwapInvolved',
|
15
|
-
'transactionCoding_footnote': 'transactionCodingFootnote',
|
16
|
-
'transactionAmounts_transactionShares_value': 'transactionShares',
|
17
|
-
'transactionAmounts_transactionShares_footnote': 'transactionSharesFootnote',
|
18
|
-
'transactionAmounts_transactionPricePerShare_value': 'transactionPricePerShare',
|
19
|
-
'transactionAmounts_transactionPricePerShare_footnote': 'transactionPricePerShareFootnote',
|
20
|
-
'transactionAmounts_transactionAcquiredDisposedCode_value': 'transactionAcquiredDisposedCode',
|
21
|
-
'transactionAmounts_transactionAcquiredDisposedCode_footnote': 'transactionAcquiredDisposedCodeFootnote',
|
22
|
-
'postTransactionAmounts_sharesOwnedFollowingTransaction_value': 'sharesOwnedFollowingTransaction',
|
23
|
-
'postTransactionAmounts_sharesOwnedFollowingTransaction_footnote': 'sharesOwnedFollowingTransactionFootnote',
|
24
|
-
'ownershipNature_directOrIndirectOwnership_value': 'directOrIndirectOwnership',
|
25
|
-
'ownershipNature_directOrIndirectOwnership_footnote': 'directOrIndirectOwnershipFootnote',
|
26
|
-
'ownershipNature_natureOfOwnership_value': 'natureOfOwnership',
|
27
|
-
'ownershipNature_natureOfOwnership_footnote': 'natureOfOwnershipFootnote',
|
28
|
-
'transactionTimeliness_value': 'transactionTimeliness',
|
29
|
-
'transactionTimeliness_footnote': 'transactionTimelinessFootnote',
|
30
|
-
'postTransactionAmounts_valueOwnedFollowingTransaction_value': 'valueOwnedFollowingTransaction',
|
31
|
-
'postTransactionAmounts_valueOwnedFollowingTransaction_footnote': 'valueOwnedFollowingTransactionFootnote'
|
32
|
-
}
|
33
|
-
|
34
|
-
# Derivative transaction ownership mapping
|
35
|
-
derivative_transaction_ownership_dict = {
|
36
|
-
'securityTitle_value': 'securityTitle',
|
37
|
-
'securityTitle_footnote': 'securityTitleFootnote',
|
38
|
-
'conversionOrExercisePrice_value': 'conversionOrExercisePrice',
|
39
|
-
'conversionOrExercisePrice_footnote': 'conversionOrExercisePriceFootnote',
|
40
|
-
'transactionDate_value': 'transactionDate',
|
41
|
-
'transactionDate_footnote': 'transactionDateFootnote',
|
42
|
-
'deemedExecutionDate_value': 'deemedExecutionDate',
|
43
|
-
'deemedExecutionDate_footnote': 'deemedExecutionDateFootnote',
|
44
|
-
'transactionCoding_transactionFormType': 'transactionFormType',
|
45
|
-
'transactionCoding_transactionCode': 'transactionCode',
|
46
|
-
'transactionCoding_equitySwapInvolved': 'equitySwapInvolved',
|
47
|
-
'transactionCoding_footnote': 'transactionCodingFootnote',
|
48
|
-
'transactionAmounts_transactionShares_value': 'transactionShares',
|
49
|
-
'transactionAmounts_transactionShares_footnote': 'transactionSharesFootnote',
|
50
|
-
'transactionAmounts_transactionPricePerShare_value': 'transactionPricePerShare',
|
51
|
-
'transactionAmounts_transactionPricePerShare_footnote': 'transactionPricePerShareFootnote',
|
52
|
-
'transactionAmounts_transactionAcquiredDisposedCode_value': 'transactionAcquiredDisposedCode',
|
53
|
-
'transactionAmounts_transactionTotalValue_value': 'transactionTotalValue',
|
54
|
-
'transactionAmounts_transactionTotalValue_footnote': 'transactionTotalValueFootnote',
|
55
|
-
'exerciseDate_value': 'exerciseDate',
|
56
|
-
'exerciseDate_footnote': 'exerciseDateFootnote',
|
57
|
-
'expirationDate_value': 'expirationDate',
|
58
|
-
'expirationDate_footnote': 'expirationDateFootnote',
|
59
|
-
'underlyingSecurity_underlyingSecurityTitle_value': 'underlyingSecurityTitle',
|
60
|
-
'underlyingSecurity_underlyingSecurityTitle_footnote': 'underlyingSecurityTitleFootnote',
|
61
|
-
'underlyingSecurity_underlyingSecurityShares_value': 'underlyingSecurityShares',
|
62
|
-
'underlyingSecurity_underlyingSecurityShares_footnote': 'underlyingSecuritySharesFootnote',
|
63
|
-
'underlyingSecurity_underlyingSecurityValue_value': 'underlyingSecurityValue',
|
64
|
-
'postTransactionAmounts_sharesOwnedFollowingTransaction_value': 'sharesOwnedFollowingTransaction',
|
65
|
-
'postTransactionAmounts_sharesOwnedFollowingTransaction_footnote': 'sharesOwnedFollowingTransactionFootnote',
|
66
|
-
'ownershipNature_directOrIndirectOwnership_value': 'directOrIndirectOwnership',
|
67
|
-
'ownershipNature_directOrIndirectOwnership_footnote': 'directOrIndirectOwnershipFootnote',
|
68
|
-
'ownershipNature_natureOfOwnership_value': 'natureOfOwnership',
|
69
|
-
'ownershipNature_natureOfOwnership_footnote': 'natureOfOwnershipFootnote',
|
70
|
-
'transactionTimeliness_value': 'transactionTimeliness',
|
71
|
-
'transactionTimeliness_footnote': 'transactionTimelinessFootnote',
|
72
|
-
'postTransactionAmounts_valueOwnedFollowingTransaction_value': 'valueOwnedFollowingTransaction',
|
73
|
-
'postTransactionAmounts_valueOwnedFollowingTransaction_footnote': 'valueOwnedFollowingTransactionFootnote',
|
74
|
-
'transactionAmounts_transactionAcquiredDisposedCode_footnote': 'transactionAcquiredDisposedCodeFootnote',
|
75
|
-
'underlyingSecurity_underlyingSecurityValue_footnote': 'underlyingSecurityValueFootnote'
|
76
|
-
}
|
77
|
-
|
78
|
-
# Non-derivative holding ownership mapping
|
79
|
-
non_derivative_holding_ownership_dict = {
|
80
|
-
'securityTitle_value': 'securityTitle',
|
81
|
-
'securityTitle_footnote': 'securityTitleFootnote',
|
82
|
-
'postTransactionAmounts_sharesOwnedFollowingTransaction_value': 'sharesOwnedFollowingTransaction',
|
83
|
-
'postTransactionAmounts_sharesOwnedFollowingTransaction_footnote': 'sharesOwnedFollowingTransactionFootnote',
|
84
|
-
'ownershipNature_directOrIndirectOwnership_value': 'directOrIndirectOwnership',
|
85
|
-
'ownershipNature_directOrIndirectOwnership_footnote': 'directOrIndirectOwnershipFootnote',
|
86
|
-
'ownershipNature_natureOfOwnership_value': 'natureOfOwnership',
|
87
|
-
'ownershipNature_natureOfOwnership_footnote': 'natureOfOwnershipFootnote',
|
88
|
-
'postTransactionAmounts_valueOwnedFollowingTransaction_value': 'valueOwnedFollowingTransaction',
|
89
|
-
'transactionCoding_footnote': 'transactionCodingFootnote',
|
90
|
-
'transactionCoding_transactionFormType': 'transactionFormType',
|
91
|
-
'postTransactionAmounts_valueOwnedFollowingTransaction_footnote': 'valueOwnedFollowingTransactionFootnote'
|
92
|
-
}
|
93
|
-
|
94
|
-
# Derivative holding ownership mapping
|
95
|
-
derivative_holding_ownership_dict = {
|
96
|
-
'securityTitle_value': 'securityTitle',
|
97
|
-
'securityTitle_footnote': 'securityTitleFootnote',
|
98
|
-
'conversionOrExercisePrice_value': 'conversionOrExercisePrice',
|
99
|
-
'conversionOrExercisePrice_footnote': 'conversionOrExercisePriceFootnote',
|
100
|
-
'exerciseDate_value': 'exerciseDate',
|
101
|
-
'exerciseDate_footnote': 'exerciseDateFootnote',
|
102
|
-
'expirationDate_value': 'expirationDate',
|
103
|
-
'expirationDate_footnote': 'expirationDateFootnote',
|
104
|
-
'underlyingSecurity_underlyingSecurityTitle_value': 'underlyingSecurityTitle',
|
105
|
-
'underlyingSecurity_underlyingSecurityTitle_footnote': 'underlyingSecurityTitleFootnote',
|
106
|
-
'underlyingSecurity_underlyingSecurityShares_value': 'underlyingSecurityShares',
|
107
|
-
'underlyingSecurity_underlyingSecurityShares_footnote': 'underlyingSecuritySharesFootnote',
|
108
|
-
'underlyingSecurity_underlyingSecurityValue_value': 'underlyingSecurityValue',
|
109
|
-
'underlyingSecurity_underlyingSecurityValue_footnote': 'underlyingSecurityValueFootnote',
|
110
|
-
'ownershipNature_directOrIndirectOwnership_value': 'directOrIndirectOwnership',
|
111
|
-
'ownershipNature_directOrIndirectOwnership_footnote': 'directOrIndirectOwnershipFootnote',
|
112
|
-
'ownershipNature_natureOfOwnership_value': 'natureOfOwnership',
|
113
|
-
'ownershipNature_natureOfOwnership_footnote': 'natureOfOwnershipFootnote',
|
114
|
-
'postTransactionAmounts_sharesOwnedFollowingTransaction_value': 'sharesOwnedFollowingTransaction',
|
115
|
-
'postTransactionAmounts_sharesOwnedFollowingTransaction_footnote': 'sharesOwnedFollowingTransactionFootnote',
|
116
|
-
'postTransactionAmounts_valueOwnedFollowingTransaction_value': 'valueOwnedFollowingTransaction',
|
117
|
-
'postTransactionAmounts_valueOwnedFollowingTransaction_footnote': 'valueOwnedFollowingTransactionFootnote',
|
118
|
-
'transactionCoding_transactionFormType': 'transactionFormType',
|
119
|
-
'transactionCoding_footnote': 'transactionCodingFootnote'
|
120
|
-
}
|
121
|
-
|
122
|
-
# Reporting owner ownership mapping
|
123
|
-
reporting_owner_ownership_dict = {
|
124
|
-
'reportingOwnerAddress_rptOwnerCity': 'rptOwnerCity',
|
125
|
-
'reportingOwnerAddress_rptOwnerState': 'rptOwnerState',
|
126
|
-
'reportingOwnerAddress_rptOwnerStateDescription': 'rptOwnerStateDescription',
|
127
|
-
'reportingOwnerAddress_rptOwnerStreet1': 'rptOwnerStreet1',
|
128
|
-
'reportingOwnerAddress_rptOwnerStreet2': 'rptOwnerStreet2',
|
129
|
-
'reportingOwnerAddress_rptOwnerZipCode': 'rptOwnerZipCode',
|
130
|
-
'reportingOwnerId_rptOwnerCik': 'rptOwnerCik',
|
131
|
-
'reportingOwnerId_rptOwnerName': 'rptOwnerName',
|
132
|
-
'reportingOwnerRelationship_isDirector': 'rptOwnerIsDirector',
|
133
|
-
'reportingOwnerRelationship_isOfficer': 'rptOwnerIsOfficer',
|
134
|
-
'reportingOwnerRelationship_isTenPercentOwner': 'rptOwnerIsTenPercentOwner',
|
135
|
-
'reportingOwnerRelationship_isOther': 'rptOwnerIsOther',
|
136
|
-
'reportingOwnerRelationship_officerTitle': 'rptOwnerOfficerTitle',
|
137
|
-
'reportingOwnerRelationship_otherText': 'rptOwnerOtherText'
|
138
|
-
}
|
139
|
-
|
140
|
-
# Metadata ownership mapping
|
141
|
-
metadata_ownership_dict = {
|
142
|
-
'periodOfReport': 'periodOfReport',
|
143
|
-
'issuer_issuerCik': 'issuerCik',
|
144
|
-
'issuer_issuerName': 'issuerName',
|
145
|
-
'issuer_issuerTradingSymbol': 'issuerTradingSymbol',
|
146
|
-
'documentType': 'documentType',
|
147
|
-
'remarks': 'remarks',
|
148
|
-
'documentDescription': 'documentDescription',
|
149
|
-
'footnotes': 'footnotes',
|
150
|
-
'notSubjectToSection16': 'notSubjectToSection16',
|
151
|
-
'form3HoldingsReported': 'form3HoldingsReported',
|
152
|
-
'form4TransactionsReported': 'form4TransactionsReported',
|
153
|
-
'noSecuritiesOwned': 'noSecuritiesOwned',
|
154
|
-
'aff10b5One': 'aff10b5One',
|
155
|
-
'dateOfOriginalSubmission': 'dateOfOriginalSubmission',
|
156
|
-
'schemaVersion': 'schemaVersion'
|
157
|
-
}
|
158
|
-
|
159
|
-
# Owner signature ownership mapping
|
160
|
-
owner_signature_ownership_dict = {
|
161
|
-
'signatureName': 'signatureName',
|
162
|
-
'signatureDate': 'signatureDate'
|
163
|
-
}
|
164
|
-
|
165
|
-
|
166
|
-
mappings = {
|
167
|
-
'ownership_non_derivative_transactions' : ownership_non_derivative_transactions_dict,
|
168
|
-
'ownership_derivative_transactions' : derivative_transaction_ownership_dict,
|
169
|
-
'ownership_non_derivative_holdings' : non_derivative_holding_ownership_dict,
|
170
|
-
'ownership_derivative_holdings' : derivative_holding_ownership_dict,
|
171
|
-
'ownership_reporting_owner' : reporting_owner_ownership_dict,
|
172
|
-
'ownership_metadata' : metadata_ownership_dict,
|
173
|
-
'ownership_owner_signature' : owner_signature_ownership_dict
|
174
|
-
}
|
File without changes
|
File without changes
|