datamule 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/mapping_dicts/txt_mapping_dicts.py +6 -4
- datamule/monitor.py +57 -12
- {datamule-1.0.2.dist-info → datamule-1.0.3.dist-info}/METADATA +1 -1
- {datamule-1.0.2.dist-info → datamule-1.0.3.dist-info}/RECORD +6 -6
- {datamule-1.0.2.dist-info → datamule-1.0.3.dist-info}/WHEEL +0 -0
- {datamule-1.0.2.dist-info → datamule-1.0.3.dist-info}/top_level.txt +0 -0
@@ -29,10 +29,12 @@ dict_sgml = {
|
|
29
29
|
}
|
30
30
|
}
|
31
31
|
|
32
|
-
item_pattern_mapping = r"^\n\n\s*(ITEM|Item)\
|
33
|
-
|
32
|
+
item_pattern_mapping = r"^\n\n\s*(ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
|
33
|
+
item_pattern_mapping_8k = r"^\n\n\s*(ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
|
34
|
+
part_pattern_mapping = r"^\n\n\s*(PART|Part)\s+(?:I{1,3}|IV)\.?"
|
34
35
|
|
35
36
|
item_pattern_standardization = r"^\s*(?:ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
|
37
|
+
item_pattern_standardization_8k = r"^\s*(?:ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN)\.?"
|
36
38
|
part_pattern_standardization = r"^\s*(?:PART|Part)\s+([IVX]+)"
|
37
39
|
|
38
40
|
|
@@ -194,7 +196,7 @@ dict_8k["rules"]["mappings"].extend([
|
|
194
196
|
{
|
195
197
|
"type": "hierarchy",
|
196
198
|
"name": "item",
|
197
|
-
"pattern":
|
199
|
+
"pattern": item_pattern_mapping_8k,
|
198
200
|
"hierarchy": 0
|
199
201
|
},
|
200
202
|
])
|
@@ -204,7 +206,7 @@ dict_8k['transformations'] = [
|
|
204
206
|
"type": "standardize",
|
205
207
|
"match": {
|
206
208
|
"type": "item",
|
207
|
-
"text_pattern":
|
209
|
+
"text_pattern": item_pattern_standardization_8k
|
208
210
|
},
|
209
211
|
"output": {
|
210
212
|
"format": "item{}",
|
datamule/monitor.py
CHANGED
@@ -11,6 +11,15 @@ def _get_current_eastern_date():
|
|
11
11
|
eastern = pytz.timezone('America/New_York')
|
12
12
|
return datetime.now(eastern)
|
13
13
|
|
14
|
+
def _parse_date(date_str):
|
15
|
+
"""Parse YYYY-MM-DD date string to datetime object in Eastern timezone"""
|
16
|
+
try:
|
17
|
+
date = datetime.strptime(date_str, '%Y-%m-%d')
|
18
|
+
eastern = pytz.timezone('America/New_York')
|
19
|
+
return eastern.localize(date)
|
20
|
+
except ValueError as e:
|
21
|
+
raise ValueError(f"Invalid date format. Please use YYYY-MM-DD. Error: {str(e)}")
|
22
|
+
|
14
23
|
class PreciseRateLimiter:
|
15
24
|
def __init__(self, rate, interval=1.0):
|
16
25
|
self.rate = rate # requests per interval
|
@@ -67,7 +76,8 @@ class RateMonitor:
|
|
67
76
|
class Monitor:
|
68
77
|
def __init__(self):
|
69
78
|
self.last_total = 0
|
70
|
-
self.last_date =
|
79
|
+
self.last_date = None
|
80
|
+
self.current_monitor_date = None
|
71
81
|
self.submissions = []
|
72
82
|
self.max_hits = 10000
|
73
83
|
self.limiter = PreciseRateLimiter(5) # 5 requests per second
|
@@ -91,16 +101,29 @@ class Monitor:
|
|
91
101
|
"""Poll API until new submissions are found."""
|
92
102
|
while True:
|
93
103
|
current_date = _get_current_eastern_date()
|
94
|
-
date_str = current_date.strftime('%Y-%m-%d')
|
95
|
-
timestamp = int(time.time()) # Add this line
|
96
104
|
|
97
|
-
|
98
|
-
|
105
|
+
# If we're caught up to current date, use it, otherwise use our tracking date
|
106
|
+
if self.current_monitor_date.date() >= current_date.date():
|
107
|
+
self.current_monitor_date = current_date
|
108
|
+
else:
|
109
|
+
# If we're behind current date and haven't finished current date's processing,
|
110
|
+
# continue with current date
|
111
|
+
if self.last_date == self.current_monitor_date.strftime('%Y-%m-%d'):
|
112
|
+
pass
|
113
|
+
else:
|
114
|
+
# Move to next day
|
115
|
+
self.current_monitor_date += timedelta(days=1)
|
116
|
+
|
117
|
+
date_str = self.current_monitor_date.strftime('%Y-%m-%d')
|
118
|
+
timestamp = int(time.time())
|
119
|
+
|
120
|
+
if self.last_date != date_str:
|
121
|
+
print(f"Processing date: {date_str}")
|
99
122
|
self.last_total = 0
|
100
123
|
self.submissions = []
|
101
124
|
self.last_date = date_str
|
102
125
|
|
103
|
-
poll_url = f"{base_url}&startdt={date_str}&enddt={date_str}&v={timestamp}"
|
126
|
+
poll_url = f"{base_url}&startdt={date_str}&enddt={date_str}&v={timestamp}"
|
104
127
|
if not quiet:
|
105
128
|
print(f"Polling {poll_url}")
|
106
129
|
|
@@ -109,10 +132,16 @@ class Monitor:
|
|
109
132
|
if data:
|
110
133
|
current_total = data['hits']['total']['value']
|
111
134
|
if current_total > self.last_total:
|
112
|
-
print(f"Found {current_total - self.last_total} new submissions")
|
135
|
+
print(f"Found {current_total - self.last_total} new submissions for {date_str}")
|
113
136
|
self.last_total = current_total
|
114
137
|
return current_total, data, poll_url
|
115
138
|
self.last_total = current_total
|
139
|
+
|
140
|
+
# If we have no hits and we're processing a past date,
|
141
|
+
# we can move to the next day immediately
|
142
|
+
if current_total == 0 and self.current_monitor_date.date() < current_date.date():
|
143
|
+
continue
|
144
|
+
|
116
145
|
except Exception as e:
|
117
146
|
print(f"Error in poll: {str(e)}")
|
118
147
|
|
@@ -120,7 +149,6 @@ class Monitor:
|
|
120
149
|
|
121
150
|
async def _retrieve_batch(self, session, poll_url, from_positions, quiet):
|
122
151
|
"""Retrieve a batch of submissions concurrently."""
|
123
|
-
# The poll_url already contains the timestamp from _poll
|
124
152
|
tasks = [
|
125
153
|
self._fetch_json(
|
126
154
|
session,
|
@@ -176,11 +204,17 @@ class Monitor:
|
|
176
204
|
|
177
205
|
return submissions
|
178
206
|
|
179
|
-
async def _monitor(self, callback, form=None, cik=None, ticker=None, poll_interval=1000, quiet=True):
|
207
|
+
async def _monitor(self, callback, form=None, cik=None, ticker=None, start_date=None, poll_interval=1000, quiet=True):
|
180
208
|
"""Main monitoring loop with parallel processing."""
|
181
209
|
if poll_interval < 100:
|
182
210
|
raise ValueError("SEC rate limit is 10 requests per second, set poll_interval to 100ms or higher")
|
183
211
|
|
212
|
+
# Set up initial monitoring date
|
213
|
+
if start_date:
|
214
|
+
self.current_monitor_date = _parse_date(start_date)
|
215
|
+
else:
|
216
|
+
self.current_monitor_date = _get_current_eastern_date()
|
217
|
+
|
184
218
|
# Handle form parameter
|
185
219
|
if form is None:
|
186
220
|
form = ['-0']
|
@@ -233,6 +267,17 @@ class Monitor:
|
|
233
267
|
|
234
268
|
await asyncio.sleep(poll_interval / 1000)
|
235
269
|
|
236
|
-
def monitor_submissions(self, callback=None, form=None, cik=None, ticker=None, poll_interval=1000, quiet=True):
|
237
|
-
"""
|
238
|
-
|
270
|
+
def monitor_submissions(self, callback=None, form=None, cik=None, ticker=None, start_date=None, poll_interval=1000, quiet=True):
|
271
|
+
"""
|
272
|
+
Start the monitoring process.
|
273
|
+
|
274
|
+
Parameters:
|
275
|
+
callback (callable, optional): Function to call when new submissions are found
|
276
|
+
form (str or list, optional): Form type(s) to monitor
|
277
|
+
cik (str or list, optional): CIK(s) to monitor
|
278
|
+
ticker (str, optional): Ticker symbol to monitor
|
279
|
+
start_date (str, optional): Start date in YYYY-MM-DD format
|
280
|
+
poll_interval (int, optional): Polling interval in milliseconds
|
281
|
+
quiet (bool, optional): Suppress verbose output
|
282
|
+
"""
|
283
|
+
asyncio.run(self._monitor(callback, form, cik, ticker, start_date, poll_interval, quiet))
|
@@ -2,7 +2,7 @@ datamule/__init__.py,sha256=IDVK3i5i5DxLlQJ_71aYkloGNi528JOUx8hU6bDzLXM,1255
|
|
2
2
|
datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
|
3
3
|
datamule/document.py,sha256=-XT3nkRb1oCOken7n3AUCI8itQNfuKVN86cuD3rys3E,10912
|
4
4
|
datamule/helper.py,sha256=tr3AQWus9dHNZFKpLSglWjcb8zmm5qDXjOWACMhvMxQ,4594
|
5
|
-
datamule/monitor.py,sha256=
|
5
|
+
datamule/monitor.py,sha256=NgzOB_RCLAZDrowYagPWIM4PGNngd1lA3K9Qplkk3Ys,11325
|
6
6
|
datamule/packageupdater.py,sha256=vEGqlTj6FudIeVHBVJltPh2eBDEqMG9HYmnyrRVKeSU,9595
|
7
7
|
datamule/portfolio.py,sha256=U_QRNk_CbMmi3nJ0VBIwc9SVEGq6kA8LCZHBj9nOGXs,4032
|
8
8
|
datamule/submission.py,sha256=uioIYJbsoe-87nRPyzlo-LZ8Hp7HG7A4KPGSnw86PKY,2790
|
@@ -17,7 +17,7 @@ datamule/data/sec-glossary.csv,sha256=-cN7GjiadLw5C1sv4zSeCnfeZZDYeSgJl-0ydarMAo
|
|
17
17
|
datamule/data/xbrl_descriptions.csv,sha256=SQ9wUURNqG424rnTiZtopsxV2q-PvU4NMj52LqgDsvg,2621524
|
18
18
|
datamule/downloader/downloader.py,sha256=vnMsw0oWqRa84scu6ZcywxbJxsIn38vLV0tybakx3jQ,15217
|
19
19
|
datamule/downloader/premiumdownloader.py,sha256=YhGFwkYqjLkdc5ex2YKM-L7nBAPm5MMCdTwVVP0JO78,14314
|
20
|
-
datamule/mapping_dicts/txt_mapping_dicts.py,sha256=
|
20
|
+
datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
|
21
21
|
datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHhhMKp0CLqVKV5OI,438
|
22
22
|
datamule/mulebot/__init__.py,sha256=YvZXV6xQ0iP-oGD8rloufjdwJL6D46P3NNr0CY9PQCA,29
|
23
23
|
datamule/mulebot/helper.py,sha256=olztOwltfELZ-IERM2bRNLBavD04kfB6ueWTisJAleA,1080
|
@@ -37,7 +37,7 @@ datamule/mulebot/mulebot_server/static/scripts/suggestions.js,sha256=TCyz8OYuXeI
|
|
37
37
|
datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js,sha256=UtkUpLvELNI4Ibpb7VstgVA9Tk-8jbkxXhmXsgufFa4,4437
|
38
38
|
datamule/mulebot/mulebot_server/static/scripts/utils.js,sha256=oGPMtyT9dvuqHqrfZj33t4vLZiF8UJrMXB1hpPXRNu4,1255
|
39
39
|
datamule/mulebot/mulebot_server/templates/chat-minimalist.html,sha256=MsTbgpnLD0JCQiKKP3XeeNJRNsRqKsRa1j_XXW7nBKw,6975
|
40
|
-
datamule-1.0.
|
41
|
-
datamule-1.0.
|
42
|
-
datamule-1.0.
|
43
|
-
datamule-1.0.
|
40
|
+
datamule-1.0.3.dist-info/METADATA,sha256=8PZAcyMcoQTNaV21b9N09t8cd4Uw0Kxm6aImKXlSsCo,732
|
41
|
+
datamule-1.0.3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
42
|
+
datamule-1.0.3.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
43
|
+
datamule-1.0.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|