datamule 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,502 @@
1
+ import asyncio
2
+ import aiohttp
3
+ from datetime import datetime
4
+ from urllib.parse import urlencode
5
+ from tqdm import tqdm
6
+ from ..utils import RetryException, PreciseRateLimiter, RateMonitor, headers
7
+
8
+ class EFTSQuery:
9
+ def __init__(self, requests_per_second=5.0):
10
+ self.base_url = "https://efts.sec.gov/LATEST/search-index"
11
+ self.headers = headers
12
+ self.limiter = PreciseRateLimiter(requests_per_second)
13
+ self.rate_monitor = RateMonitor()
14
+ self.session = None
15
+ self.pbar = None
16
+ self.max_page_size = 100 # EFTS API limit
17
+ self.fetch_queue = asyncio.Queue()
18
+ self.connection_semaphore = asyncio.Semaphore(5) # Max 5 concurrent connections
19
+ self.max_efts_hits = 10000 # EFTS API hard limit
20
+ self.total_results_to_fetch = 0
21
+ self.pending_page_requests = [] # Store pages to fetch during planning phase
22
+ self.initial_query_hit_count = 0 # Track initial query hits to avoid double counting
23
+ self.was_primary_docs_query = False # Track if original query was for primary docs
24
+ self.true_total_docs = 0 # Track the true total number of documents
25
+ self.processed_doc_count = 0 # Track how many documents we've processed
26
+ self.original_forms = [] # Track original form request before adding exclusions
27
+
28
+ def update_progress_description(self):
29
+ if self.pbar:
30
+ reqs_per_sec, mb_per_sec = self.rate_monitor.get_current_rates()
31
+ self.pbar.set_description(
32
+ f"Querying documents [Rate: {reqs_per_sec}/s | {mb_per_sec} MB/s]"
33
+ )
34
+
35
+ async def __aenter__(self):
36
+ self.session = aiohttp.ClientSession(headers=self.headers)
37
+ return self
38
+
39
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
40
+ if self.session:
41
+ await self.session.close()
42
+ self.session = None
43
+
44
+ def _get_form_exclusions(self, form):
45
+ """Dynamically generate form exclusions based on patterns"""
46
+ # Skip already negated forms
47
+ if form.startswith('-'):
48
+ return []
49
+
50
+ # For forms without "/A", exclude the amendment version
51
+ if not form.endswith('/A'):
52
+ return [f"-{form}/A"]
53
+
54
+ # No exclusions for amendment forms
55
+ return []
56
+
57
+ def _prepare_params(self, cik=None, submission_type=None, filing_date=None):
58
+ params = {}
59
+
60
+ # Handle CIK
61
+ if cik:
62
+ if isinstance(cik, list):
63
+ params['ciks'] = ','.join(str(int(c)).zfill(10) for c in cik)
64
+ else:
65
+ params['ciks'] = str(int(cik)).zfill(10)
66
+
67
+ # Handle submission type with exact form matching
68
+ if submission_type:
69
+ # Store original form request for reference
70
+ if isinstance(submission_type, list):
71
+ self.original_forms = submission_type.copy()
72
+ form_list = submission_type.copy() # Create a copy to modify
73
+ else:
74
+ self.original_forms = [submission_type]
75
+ form_list = [submission_type] # Create a list to modify
76
+
77
+ # Apply form exclusions for exact matching
78
+ expanded_forms = []
79
+ for form in form_list:
80
+ # Add the original form
81
+ expanded_forms.append(form)
82
+
83
+ # Get and add any exclusions for this form
84
+ exclusions = self._get_form_exclusions(form)
85
+ expanded_forms.extend(exclusions)
86
+
87
+ params['forms'] = ','.join(expanded_forms)
88
+ else:
89
+ # Default to primary documents only
90
+ self.original_forms = ["-0"]
91
+ params['forms'] = "-0"
92
+
93
+ # Handle filing date
94
+ if filing_date:
95
+ if isinstance(filing_date, tuple):
96
+ start_date, end_date = filing_date
97
+ params['startdt'] = start_date
98
+ params['enddt'] = end_date
99
+ elif isinstance(filing_date, list):
100
+ # Use the earliest and latest dates in the list
101
+ dates = [d for d in filing_date if d]
102
+ if dates:
103
+ params['startdt'] = min(dates)
104
+ params['enddt'] = max(dates)
105
+ else:
106
+ params['startdt'] = filing_date
107
+ params['enddt'] = filing_date
108
+ else:
109
+ # Default to all available data
110
+ params['startdt'] = "2001-01-01"
111
+ params['enddt'] = datetime.now().strftime('%Y-%m-%d')
112
+
113
+ return params
114
+
115
+ def _get_query_description(self, params):
116
+ parts = []
117
+
118
+ if 'ciks' in params:
119
+ parts.append(f"cik={params['ciks']}")
120
+
121
+ if 'forms' in params:
122
+ parts.append(f"forms={params['forms']}")
123
+
124
+ if 'startdt' in params and 'enddt' in params:
125
+ parts.append(f"dates={params['startdt']} to {params['enddt']}")
126
+
127
+ return ", ".join(parts)
128
+
129
+ async def _fetch_json(self, url):
130
+ async with self.connection_semaphore:
131
+ async with self.limiter:
132
+ try:
133
+ async with self.session.get(url) as response:
134
+ if response.status == 429:
135
+ raise RetryException(url)
136
+ response.raise_for_status()
137
+ content = await response.read()
138
+ await self.rate_monitor.add_request(len(content))
139
+ self.update_progress_description()
140
+ return await response.json()
141
+ except aiohttp.ClientResponseError as e:
142
+ if e.status == 429:
143
+ raise RetryException(url)
144
+ raise
145
+
146
+ async def _fetch_worker(self):
147
+ while True:
148
+ try:
149
+ params, from_val, size_val, callback = await self.fetch_queue.get()
150
+
151
+ url = f"{self.base_url}?{urlencode(params, doseq=True)}&from={from_val}&size={size_val}"
152
+
153
+ try:
154
+ data = await self._fetch_json(url)
155
+ if 'hits' in data:
156
+ hits = data['hits']['hits']
157
+ if self.pbar:
158
+ self.pbar.update(len(hits))
159
+ if callback:
160
+ await callback(hits)
161
+ self.fetch_queue.task_done()
162
+ except RetryException as e:
163
+ print(f"\nRate limited. Sleeping for {e.retry_after} seconds...")
164
+ await asyncio.sleep(e.retry_after)
165
+ # Put back in queue
166
+ await self.fetch_queue.put((params, from_val, size_val, callback))
167
+ self.fetch_queue.task_done()
168
+ except Exception as e:
169
+ print(f"\nError fetching {url}: {str(e)}")
170
+ self.fetch_queue.task_done()
171
+ except asyncio.CancelledError:
172
+ break
173
+ except Exception as e:
174
+ print(f"\nWorker error: {str(e)}")
175
+ self.fetch_queue.task_done()
176
+
177
+ def _split_date_range(self, start_date, end_date, num_splits=4):
178
+ start = datetime.strptime(start_date, '%Y-%m-%d')
179
+ end = datetime.strptime(end_date, '%Y-%m-%d')
180
+
181
+ # For single day, just return it
182
+ if start.date() == end.date():
183
+ return [(start_date, end_date)]
184
+
185
+ delta = (end - start) / num_splits
186
+
187
+ date_ranges = []
188
+ for i in range(num_splits):
189
+ range_start = start + delta * i
190
+ range_end = start + delta * (i + 1) if i < num_splits - 1 else end
191
+ date_ranges.append((
192
+ range_start.strftime('%Y-%m-%d'),
193
+ range_end.strftime('%Y-%m-%d')
194
+ ))
195
+
196
+ return date_ranges
197
+
198
+ def _get_form_groups(self, buckets, max_count, num_groups=5):
199
+ total_docs = sum(b['doc_count'] for b in buckets)
200
+ target_per_group = total_docs / num_groups
201
+
202
+ buckets_sorted = sorted(buckets, key=lambda x: x['doc_count'], reverse=True)
203
+ groups = []
204
+ current_group = []
205
+ current_count = 0
206
+
207
+ for bucket in buckets_sorted:
208
+ if current_count + bucket['doc_count'] > max_count and current_group:
209
+ groups.append(current_group)
210
+ current_group = [bucket['key']]
211
+ current_count = bucket['doc_count']
212
+ else:
213
+ current_group.append(bucket['key'])
214
+ current_count += bucket['doc_count']
215
+
216
+ if current_group:
217
+ groups.append(current_group)
218
+
219
+ return groups
220
+
221
+ def _preserve_form_exclusions(self, form_group):
222
+ """Add necessary exclusions to a form group based on form patterns"""
223
+ result = form_group.copy()
224
+
225
+ # Check each form in the group to see if it needs exclusions
226
+ for form in form_group:
227
+ exclusions = self._get_form_exclusions(form)
228
+
229
+ # Add exclusions if they're not already in the form group
230
+ for excluded_form in exclusions:
231
+ if excluded_form not in result:
232
+ result.append(excluded_form)
233
+
234
+ return result
235
+
236
+ def _store_page_request(self, params, total_hits, callback=None, is_initial_query=False):
237
+ """Store pages to be requested later, after planning is complete"""
238
+ page_size = self.max_page_size
239
+ # Cap total_hits to what we can actually fetch (max 100 pages of 100 results)
240
+ actual_hits = min(total_hits, self.max_efts_hits)
241
+
242
+ # If this is the initial query, track hit count to avoid double counting
243
+ if is_initial_query:
244
+ self.initial_query_hit_count = actual_hits
245
+ else:
246
+ # Keep track of total processed documents
247
+ self.processed_doc_count += actual_hits
248
+
249
+ self.total_results_to_fetch += actual_hits
250
+
251
+ num_pages = min((actual_hits + page_size - 1) // page_size, 100) # Max 100 pages
252
+
253
+ for page in range(num_pages):
254
+ from_val = page * page_size
255
+ self.pending_page_requests.append((params.copy(), from_val, page_size, callback))
256
+
257
+ async def _test_query_size(self, params):
258
+ """Get the total number of hits for a query"""
259
+ url = f"{self.base_url}?{urlencode(params, doseq=True)}&from=0&size=1"
260
+ data = await self._fetch_json(url)
261
+ if not data or 'hits' not in data:
262
+ return 0, None
263
+ return data['hits']['total']['value'], data
264
+
265
+ def _get_total_from_buckets(self, data):
266
+ """Get the true total count from aggregation buckets"""
267
+ if 'aggregations' in data and 'form_filter' in data['aggregations']:
268
+ form_filter = data['aggregations']['form_filter']
269
+ buckets = form_filter.get('buckets', [])
270
+ other_count = form_filter.get('sum_other_doc_count', 0)
271
+
272
+ # Calculate total from all buckets
273
+ total = sum(bucket['doc_count'] for bucket in buckets) + other_count
274
+
275
+ return total
276
+
277
+ # Fallback to the reported hits total
278
+ if 'hits' in data and 'total' in data['hits']:
279
+ return data['hits']['total']['value']
280
+
281
+ return 0
282
+
283
+ async def _get_split_strategy(self, data):
284
+ """Determine how to split a query that would return more than 10000 results"""
285
+ if 'aggregations' in data and 'form_filter' in data['aggregations']:
286
+ form_filter = data['aggregations']['form_filter']
287
+ buckets = form_filter.get('buckets', [])
288
+ other_count = form_filter.get('sum_other_doc_count', 0)
289
+
290
+ # Check if we have form buckets and they're worth splitting on
291
+ if len(buckets) > 0:
292
+ # Try splitting by forms first
293
+ form_groups = self._get_form_groups(buckets, 9000)
294
+ if form_groups and len(form_groups) > 1:
295
+ # Track processed forms for later negation
296
+ processed_forms = [form for group in form_groups for form in group]
297
+ return {
298
+ 'type': 'form',
299
+ 'form_groups': form_groups,
300
+ 'other_count': other_count,
301
+ 'buckets': buckets
302
+ }
303
+
304
+ # Default to date splitting
305
+ return {'type': 'date', 'splits': 4}
306
+
307
+ def _get_negated_forms(self, buckets):
308
+ """Generate a negated form list to capture all forms not in buckets"""
309
+ negated_forms = [f"-{bucket['key']}" for bucket in buckets]
310
+ return negated_forms
311
+
312
+ async def _process_negated_forms_recursive(self, base_params, negated_forms, start_date, end_date, depth=0, callback=None):
313
+ """Process queries for negated forms with recursive date splitting"""
314
+ # Create params with negated forms
315
+ params = base_params.copy()
316
+ params['forms'] = ','.join(negated_forms)
317
+ params['startdt'] = start_date
318
+ params['enddt'] = end_date
319
+
320
+ # Test query size
321
+ total_hits, data = await self._test_query_size(params)
322
+
323
+ # Skip if no results
324
+ if total_hits == 0:
325
+ print(f"Skipping negated forms query - no results returned")
326
+ return
327
+
328
+ query_desc = self._get_query_description(params)
329
+ date_range = f"{start_date} to {end_date}"
330
+ print(f"Planning: Analyzing negated forms query (depth {depth}): {date_range} [{total_hits:,} hits]")
331
+
332
+ # If small enough or at max depth, process directly
333
+ if total_hits < self.max_efts_hits or start_date == end_date:
334
+ self._store_page_request(params, total_hits, callback)
335
+ return
336
+
337
+ # Split date range more aggressively (10 parts)
338
+ date_ranges = self._split_date_range(start_date, end_date, 10)
339
+
340
+ # Process each date range recursively
341
+ for sub_start, sub_end in date_ranges:
342
+ await self._process_negated_forms_recursive(
343
+ base_params, negated_forms, sub_start, sub_end, depth + 1, callback
344
+ )
345
+
346
+ async def _process_query_recursive(self, params, processed_forms=None, depth=0, max_depth=3, callback=None, is_initial_query=True):
347
+ """Process a query with recursive splitting until all chunks are under 10K"""
348
+ if processed_forms is None:
349
+ processed_forms = []
350
+
351
+ total_hits, data = await self._test_query_size(params)
352
+
353
+ query_desc = self._get_query_description(params)
354
+ print(f"Planning: Analyzing {' '*depth}query: {query_desc} [{total_hits:,} hits]")
355
+
356
+ # If we're at the maximum recursion depth or hits are under limit, process directly
357
+ if depth >= max_depth or total_hits < self.max_efts_hits:
358
+ self._store_page_request(params, total_hits, callback, is_initial_query)
359
+ return processed_forms
360
+
361
+ # Need to split further
362
+ split_strategy = await self._get_split_strategy(data)
363
+
364
+ if split_strategy['type'] == 'form':
365
+ # Split by form groups
366
+ form_groups = split_strategy['form_groups']
367
+ buckets = split_strategy['buckets']
368
+
369
+ # Process form groups from buckets
370
+ for group in form_groups:
371
+ # Preserve necessary form exclusions when splitting form groups
372
+ form_group = self._preserve_form_exclusions(group)
373
+ form_params = params.copy()
374
+ form_params['forms'] = ','.join(form_group)
375
+ # Track which forms we've processed
376
+ processed_forms.extend(group)
377
+ await self._process_query_recursive(form_params, processed_forms, depth + 1, max_depth, callback, False)
378
+
379
+ # Return processed forms to parent
380
+ return processed_forms
381
+ else:
382
+ # Split by date ranges
383
+ num_splits = split_strategy['splits']
384
+ start_date = params['startdt']
385
+ end_date = params['enddt']
386
+ date_ranges = self._split_date_range(start_date, end_date, num_splits)
387
+
388
+ for start, end in date_ranges:
389
+ date_params = params.copy()
390
+ date_params['startdt'] = start
391
+ date_params['enddt'] = end
392
+ await self._process_query_recursive(date_params, processed_forms, depth + 1, max_depth, callback, False)
393
+
394
+ # Return processed forms to parent
395
+ return processed_forms
396
+
397
+ async def _start_query_phase(self, callback):
398
+ """Start the query phase after planning is complete"""
399
+ print("\n--- Starting query phase ---")
400
+ self.pbar = tqdm(total=self.total_results_to_fetch, desc="Querying documents [Rate: 0/s | 0 MB/s]")
401
+
402
+ # Queue all pending page requests
403
+ for params, from_val, size_val, callback in self.pending_page_requests:
404
+ await self.fetch_queue.put((params, from_val, size_val, callback))
405
+
406
+ async def query(self, cik=None, submission_type=None, filing_date=None, callback=None):
407
+ params = self._prepare_params(cik, submission_type, filing_date)
408
+ all_hits = []
409
+
410
+ # Check if this is a primary documents query
411
+ self.was_primary_docs_query = '-0' in params.get('forms', '').split(',')
412
+
413
+ # Collector callback to gather all hits
414
+ async def collect_hits(hits):
415
+ all_hits.extend(hits)
416
+ if callback:
417
+ await callback(hits)
418
+
419
+ async with self as client:
420
+ # Reset state for new query
421
+ self.total_results_to_fetch = 0
422
+ self.pending_page_requests = []
423
+ self.initial_query_hit_count = 0
424
+ self.processed_doc_count = 0
425
+ self.pbar = None
426
+
427
+ # First check size
428
+ print("\n--- Starting query planning phase ---")
429
+ print("Analyzing request and splitting into manageable chunks...")
430
+
431
+ total_hits, data = await self._test_query_size(params)
432
+
433
+ if total_hits == 0:
434
+ print("No results found for this query.")
435
+ return []
436
+
437
+ # Get accurate total from aggregation buckets
438
+ self.true_total_docs = self._get_total_from_buckets(data)
439
+ print(f"Found {self.true_total_docs:,} total documents to retrieve.")
440
+
441
+ # Start worker tasks
442
+ workers = [asyncio.create_task(self._fetch_worker()) for _ in range(5)]
443
+
444
+ # Process the query recursively, splitting as needed, and get processed forms
445
+ processed_forms = await self._process_query_recursive(params, None, 0, 4, collect_hits, True)
446
+
447
+ # Check if we need to process forms that weren't included in our form splitting
448
+ # Only do this if:
449
+ # 1. We split by form (processed_forms is not empty)
450
+ # 2. We haven't processed all documents yet (processed_doc_count < true_total_docs)
451
+ # 3. This was a forms=-0 query originally (for primary docs)
452
+
453
+ if processed_forms and len(processed_forms) > 0 and self.processed_doc_count < self.true_total_docs:
454
+ if self.was_primary_docs_query:
455
+ # We split a primary documents query, need to handle other document types
456
+ # Create a negated form query that also maintains primary docs constraint
457
+ negated_forms = [f"-{form}" for form in processed_forms]
458
+ negated_forms.append('-0') # Keep primary documents constraint
459
+
460
+ remaining_docs = self.true_total_docs - self.processed_doc_count
461
+ print(f"Planning: Analyzing remaining primary document forms using negation (~{remaining_docs:,} hits)")
462
+
463
+ # Process negated forms query with recursive date splitting
464
+ start_date = params['startdt']
465
+ end_date = params['enddt']
466
+ await self._process_negated_forms_recursive(
467
+ params, negated_forms, start_date, end_date, 0, collect_hits
468
+ )
469
+ else:
470
+ print("No additional forms to process with negation - not a primary documents query")
471
+ else:
472
+ print("No additional forms to process with negation")
473
+
474
+ # Start the download phase
475
+ await self._start_query_phase(collect_hits)
476
+
477
+ # Wait for all queued fetches to complete
478
+ await self.fetch_queue.join()
479
+
480
+ # Cancel worker tasks
481
+ for worker in workers:
482
+ worker.cancel()
483
+
484
+ await asyncio.gather(*workers, return_exceptions=True)
485
+
486
+ # Clean up
487
+ if self.pbar:
488
+ self.pbar.close()
489
+ self.pbar = None
490
+
491
+ print(f"\n--- Query complete: {len(all_hits):,} submissions retrieved ---")
492
+ return all_hits
493
+
494
+ def query_efts(cik=None, submission_type=None, filing_date=None, requests_per_second=5.0, callback=None):
495
+ """
496
+ Convenience function to run a query without managing the async context.
497
+ """
498
+ async def run_query():
499
+ query = EFTSQuery(requests_per_second=requests_per_second)
500
+ return await query.query(cik, submission_type, filing_date, callback)
501
+
502
+ return asyncio.run(run_query())
@@ -0,0 +1,126 @@
1
+ import asyncio
2
+ from datetime import datetime, timedelta
3
+ from .eftsquery import EFTSQuery # Import the class directly instead of the function
4
+ from ..rss.monitor import start_monitor # Import start_monitor directly
5
+ import pytz
6
+
7
+
8
+ async def _process_efts_hits(hits, collected_accession_numbers, data_callback=None):
9
+ """Process EFTS hits, collect accession numbers, and call data callback."""
10
+ processed_hits = []
11
+
12
+ for hit in hits:
13
+ try:
14
+ source = hit.get('_source', {})
15
+
16
+ # Extract key fields
17
+ accession_number = source.get('adsh')
18
+
19
+ # Extract submission_type (form) and ciks
20
+ submission_type = source.get('form')
21
+ ciks = source.get('ciks', [])
22
+ ciks = [str(int(cik)) for cik in ciks]
23
+
24
+ # Create standardized filing record
25
+ filing = {
26
+ 'accession_number': accession_number,
27
+ 'submission_type': submission_type,
28
+ 'ciks': ciks
29
+ }
30
+
31
+ processed_hits.append(filing)
32
+ collected_accession_numbers.add(accession_number) # Changed append to add for set operation
33
+
34
+ except Exception as e:
35
+ print(f"Error processing EFTS hit: {e}")
36
+
37
+ # Call data callback if provided
38
+ if data_callback and processed_hits:
39
+ await data_callback(processed_hits)
40
+
41
+ return processed_hits
42
+
43
+ async def _master_monitor_impl(data_callback=None, poll_callback=None, submission_type=None, cik=None,
44
+ polling_interval=200, requests_per_second=2.0, quiet=True, start_date=None):
45
+ """Implementation of the master monitor."""
46
+ # Set default start date to today if not provided (eastern)
47
+ eastern_tz = pytz.timezone('US/Eastern')
48
+ current_date = datetime.now(eastern_tz).strftime('%Y-%m-%d')
49
+ if not start_date:
50
+ start_date = current_date
51
+
52
+ # Changed from list to set for more efficient lookups
53
+ collected_accession_numbers = set()
54
+
55
+ if not quiet:
56
+ print(f"Starting SEC monitoring from {start_date}")
57
+
58
+ # Step 1: Query EFTS for all filings from start_date up to current date
59
+ if not quiet:
60
+ print(f"Fetching filings from {start_date} to {current_date}...")
61
+
62
+ # Prepare a wrapper callback to collect accession numbers
63
+ async def process_callback(hits):
64
+ await _process_efts_hits(hits, collected_accession_numbers, data_callback)
65
+
66
+ # Create an EFTSQuery instance
67
+ efts_query = EFTSQuery(requests_per_second=requests_per_second)
68
+
69
+ # Run EFTS query for the date range
70
+ async with efts_query:
71
+ await efts_query.query(
72
+ cik=cik,
73
+ submission_type=submission_type,
74
+ filing_date=(start_date, current_date),
75
+ callback=process_callback
76
+ )
77
+
78
+ if not quiet:
79
+ print(f"Historical query complete. Collected {len(collected_accession_numbers)} accession numbers.")
80
+
81
+ # Step 2: Hand off to RSS monitor with collected accession numbers
82
+ if not quiet:
83
+ print("Starting real-time RSS monitoring...")
84
+
85
+ # Start RSS monitor with the set of accession numbers to skip (from EFTS)
86
+ # and an empty list for ongoing tracking
87
+ await start_monitor(
88
+ data_callback=data_callback,
89
+ poll_callback=poll_callback,
90
+ submission_type=submission_type,
91
+ cik=cik,
92
+ polling_interval=polling_interval,
93
+ requests_per_second=requests_per_second,
94
+ quiet=quiet,
95
+ known_accession_numbers=[], # Start with an empty list for ongoing tracking
96
+ skip_initial_accession_numbers=collected_accession_numbers # Pass the EFTS accession numbers as the skip list
97
+ )
98
+
99
+ def monitor(data_callback=None, poll_callback=None, submission_type=None, cik=None,
100
+ polling_interval=200, requests_per_second=2.0, quiet=True, start_date=None):
101
+ """
102
+ Monitor SEC filings by combining EFTS historical queries with real-time RSS monitoring.
103
+
104
+ Parameters:
105
+ data_callback (callable): Async function to call when new filings are found.
106
+ Will be called with a list of dicts containing
107
+ 'accession_number', 'submission_type', and 'ciks'.
108
+ poll_callback (callable): Async function to call during RSS polling wait periods.
109
+ submission_type (str or list): Form type(s) to monitor (e.g., "8-K", "10-Q").
110
+ cik (str or list): CIK(s) to monitor.
111
+ polling_interval (int): Polling interval in milliseconds for RSS monitor.
112
+ requests_per_second (float): Maximum requests per second.
113
+ quiet (bool): Suppress verbose output.
114
+ start_date (str): ISO format date (YYYY-MM-DD) from which to start monitoring.
115
+ If None, will start from current date. (EASTERN TIME)
116
+ """
117
+ return asyncio.run(_master_monitor_impl(
118
+ data_callback=data_callback,
119
+ poll_callback=poll_callback,
120
+ submission_type=submission_type,
121
+ cik=cik,
122
+ polling_interval=polling_interval,
123
+ requests_per_second=requests_per_second,
124
+ quiet=quiet,
125
+ start_date=start_date
126
+ ))