datamule 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,10 +29,12 @@ dict_sgml = {
29
29
  }
30
30
  }
31
31
 
32
- item_pattern_mapping = r"^\n\n\s*(ITEM|Item)\b"
33
- part_pattern_mapping = r"^\n\n\s*(PART|Part)\b"
32
+ item_pattern_mapping = r"^\n\n\s*(ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
33
+ item_pattern_mapping_8k = r"^\n\n\s*(ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
34
+ part_pattern_mapping = r"^\n\n\s*(PART|Part)\s+(?:I{1,3}|IV)\.?"
34
35
 
35
36
  item_pattern_standardization = r"^\s*(?:ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
37
+ item_pattern_standardization_8k = r"^\s*(?:ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN)\.?"
36
38
  part_pattern_standardization = r"^\s*(?:PART|Part)\s+([IVX]+)"
37
39
 
38
40
 
@@ -194,7 +196,7 @@ dict_8k["rules"]["mappings"].extend([
194
196
  {
195
197
  "type": "hierarchy",
196
198
  "name": "item",
197
- "pattern": item_pattern_mapping,
199
+ "pattern": item_pattern_mapping_8k,
198
200
  "hierarchy": 0
199
201
  },
200
202
  ])
@@ -204,7 +206,7 @@ dict_8k['transformations'] = [
204
206
  "type": "standardize",
205
207
  "match": {
206
208
  "type": "item",
207
- "text_pattern": item_pattern_standardization
209
+ "text_pattern": item_pattern_standardization_8k
208
210
  },
209
211
  "output": {
210
212
  "format": "item{}",
datamule/monitor.py CHANGED
@@ -11,6 +11,15 @@ def _get_current_eastern_date():
11
11
  eastern = pytz.timezone('America/New_York')
12
12
  return datetime.now(eastern)
13
13
 
14
+ def _parse_date(date_str):
15
+ """Parse YYYY-MM-DD date string to datetime object in Eastern timezone"""
16
+ try:
17
+ date = datetime.strptime(date_str, '%Y-%m-%d')
18
+ eastern = pytz.timezone('America/New_York')
19
+ return eastern.localize(date)
20
+ except ValueError as e:
21
+ raise ValueError(f"Invalid date format. Please use YYYY-MM-DD. Error: {str(e)}")
22
+
14
23
  class PreciseRateLimiter:
15
24
  def __init__(self, rate, interval=1.0):
16
25
  self.rate = rate # requests per interval
@@ -67,7 +76,8 @@ class RateMonitor:
67
76
  class Monitor:
68
77
  def __init__(self):
69
78
  self.last_total = 0
70
- self.last_date = _get_current_eastern_date()
79
+ self.last_date = None
80
+ self.current_monitor_date = None
71
81
  self.submissions = []
72
82
  self.max_hits = 10000
73
83
  self.limiter = PreciseRateLimiter(5) # 5 requests per second
@@ -91,16 +101,29 @@ class Monitor:
91
101
  """Poll API until new submissions are found."""
92
102
  while True:
93
103
  current_date = _get_current_eastern_date()
94
- date_str = current_date.strftime('%Y-%m-%d')
95
- timestamp = int(time.time()) # Add this line
96
104
 
97
- if self.last_date != current_date.strftime('%Y-%m-%d'):
98
- print(f"New date: {date_str}")
105
+ # If we're caught up to current date, use it, otherwise use our tracking date
106
+ if self.current_monitor_date.date() >= current_date.date():
107
+ self.current_monitor_date = current_date
108
+ else:
109
+ # If we're behind current date and haven't finished current date's processing,
110
+ # continue with current date
111
+ if self.last_date == self.current_monitor_date.strftime('%Y-%m-%d'):
112
+ pass
113
+ else:
114
+ # Move to next day
115
+ self.current_monitor_date += timedelta(days=1)
116
+
117
+ date_str = self.current_monitor_date.strftime('%Y-%m-%d')
118
+ timestamp = int(time.time())
119
+
120
+ if self.last_date != date_str:
121
+ print(f"Processing date: {date_str}")
99
122
  self.last_total = 0
100
123
  self.submissions = []
101
124
  self.last_date = date_str
102
125
 
103
- poll_url = f"{base_url}&startdt={date_str}&enddt={date_str}&v={timestamp}" # Modified this line
126
+ poll_url = f"{base_url}&startdt={date_str}&enddt={date_str}&v={timestamp}"
104
127
  if not quiet:
105
128
  print(f"Polling {poll_url}")
106
129
 
@@ -109,10 +132,16 @@ class Monitor:
109
132
  if data:
110
133
  current_total = data['hits']['total']['value']
111
134
  if current_total > self.last_total:
112
- print(f"Found {current_total - self.last_total} new submissions")
135
+ print(f"Found {current_total - self.last_total} new submissions for {date_str}")
113
136
  self.last_total = current_total
114
137
  return current_total, data, poll_url
115
138
  self.last_total = current_total
139
+
140
+ # If we have no hits and we're processing a past date,
141
+ # we can move to the next day immediately
142
+ if current_total == 0 and self.current_monitor_date.date() < current_date.date():
143
+ continue
144
+
116
145
  except Exception as e:
117
146
  print(f"Error in poll: {str(e)}")
118
147
 
@@ -120,7 +149,6 @@ class Monitor:
120
149
 
121
150
  async def _retrieve_batch(self, session, poll_url, from_positions, quiet):
122
151
  """Retrieve a batch of submissions concurrently."""
123
- # The poll_url already contains the timestamp from _poll
124
152
  tasks = [
125
153
  self._fetch_json(
126
154
  session,
@@ -176,11 +204,17 @@ class Monitor:
176
204
 
177
205
  return submissions
178
206
 
179
- async def _monitor(self, callback, form=None, cik=None, ticker=None, poll_interval=1000, quiet=True):
207
+ async def _monitor(self, callback, form=None, cik=None, ticker=None, start_date=None, poll_interval=1000, quiet=True):
180
208
  """Main monitoring loop with parallel processing."""
181
209
  if poll_interval < 100:
182
210
  raise ValueError("SEC rate limit is 10 requests per second, set poll_interval to 100ms or higher")
183
211
 
212
+ # Set up initial monitoring date
213
+ if start_date:
214
+ self.current_monitor_date = _parse_date(start_date)
215
+ else:
216
+ self.current_monitor_date = _get_current_eastern_date()
217
+
184
218
  # Handle form parameter
185
219
  if form is None:
186
220
  form = ['-0']
@@ -233,6 +267,17 @@ class Monitor:
233
267
 
234
268
  await asyncio.sleep(poll_interval / 1000)
235
269
 
236
- def monitor_submissions(self, callback=None, form=None, cik=None, ticker=None, poll_interval=1000, quiet=True):
237
- """Start the monitoring process."""
238
- asyncio.run(self._monitor(callback, form, cik, ticker, poll_interval, quiet))
270
+ def monitor_submissions(self, callback=None, form=None, cik=None, ticker=None, start_date=None, poll_interval=1000, quiet=True):
271
+ """
272
+ Start the monitoring process.
273
+
274
+ Parameters:
275
+ callback (callable, optional): Function to call when new submissions are found
276
+ form (str or list, optional): Form type(s) to monitor
277
+ cik (str or list, optional): CIK(s) to monitor
278
+ ticker (str, optional): Ticker symbol to monitor
279
+ start_date (str, optional): Start date in YYYY-MM-DD format
280
+ poll_interval (int, optional): Polling interval in milliseconds
281
+ quiet (bool, optional): Suppress verbose output
282
+ """
283
+ asyncio.run(self._monitor(callback, form, cik, ticker, start_date, poll_interval, quiet))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -2,7 +2,7 @@ datamule/__init__.py,sha256=IDVK3i5i5DxLlQJ_71aYkloGNi528JOUx8hU6bDzLXM,1255
2
2
  datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
3
3
  datamule/document.py,sha256=-XT3nkRb1oCOken7n3AUCI8itQNfuKVN86cuD3rys3E,10912
4
4
  datamule/helper.py,sha256=tr3AQWus9dHNZFKpLSglWjcb8zmm5qDXjOWACMhvMxQ,4594
5
- datamule/monitor.py,sha256=AfhGqC_GFTYWemRKgYE85V7rIGMN_pbcpxW6kORQtpw,9273
5
+ datamule/monitor.py,sha256=NgzOB_RCLAZDrowYagPWIM4PGNngd1lA3K9Qplkk3Ys,11325
6
6
  datamule/packageupdater.py,sha256=vEGqlTj6FudIeVHBVJltPh2eBDEqMG9HYmnyrRVKeSU,9595
7
7
  datamule/portfolio.py,sha256=U_QRNk_CbMmi3nJ0VBIwc9SVEGq6kA8LCZHBj9nOGXs,4032
8
8
  datamule/submission.py,sha256=uioIYJbsoe-87nRPyzlo-LZ8Hp7HG7A4KPGSnw86PKY,2790
@@ -17,7 +17,7 @@ datamule/data/sec-glossary.csv,sha256=-cN7GjiadLw5C1sv4zSeCnfeZZDYeSgJl-0ydarMAo
17
17
  datamule/data/xbrl_descriptions.csv,sha256=SQ9wUURNqG424rnTiZtopsxV2q-PvU4NMj52LqgDsvg,2621524
18
18
  datamule/downloader/downloader.py,sha256=vnMsw0oWqRa84scu6ZcywxbJxsIn38vLV0tybakx3jQ,15217
19
19
  datamule/downloader/premiumdownloader.py,sha256=YhGFwkYqjLkdc5ex2YKM-L7nBAPm5MMCdTwVVP0JO78,14314
20
- datamule/mapping_dicts/txt_mapping_dicts.py,sha256=Eh6qYhseuKjjnxGh0A5blHr7mbq9CigFn6Zv9xcG2zU,5783
20
+ datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
21
21
  datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHhhMKp0CLqVKV5OI,438
22
22
  datamule/mulebot/__init__.py,sha256=YvZXV6xQ0iP-oGD8rloufjdwJL6D46P3NNr0CY9PQCA,29
23
23
  datamule/mulebot/helper.py,sha256=olztOwltfELZ-IERM2bRNLBavD04kfB6ueWTisJAleA,1080
@@ -37,7 +37,7 @@ datamule/mulebot/mulebot_server/static/scripts/suggestions.js,sha256=TCyz8OYuXeI
37
37
  datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js,sha256=UtkUpLvELNI4Ibpb7VstgVA9Tk-8jbkxXhmXsgufFa4,4437
38
38
  datamule/mulebot/mulebot_server/static/scripts/utils.js,sha256=oGPMtyT9dvuqHqrfZj33t4vLZiF8UJrMXB1hpPXRNu4,1255
39
39
  datamule/mulebot/mulebot_server/templates/chat-minimalist.html,sha256=MsTbgpnLD0JCQiKKP3XeeNJRNsRqKsRa1j_XXW7nBKw,6975
40
- datamule-1.0.2.dist-info/METADATA,sha256=GF3kWzUe2GxJjJwRyewMfLh8intVCMZuSIngk6o1J4g,732
41
- datamule-1.0.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
42
- datamule-1.0.2.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
43
- datamule-1.0.2.dist-info/RECORD,,
40
+ datamule-1.0.3.dist-info/METADATA,sha256=8PZAcyMcoQTNaV21b9N09t8cd4Uw0Kxm6aImKXlSsCo,732
41
+ datamule-1.0.3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
42
+ datamule-1.0.3.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
43
+ datamule-1.0.3.dist-info/RECORD,,