datamule 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,6 @@ from ..utils import headers
12
12
 
13
13
  async def download_sec_file(url, target_path):
14
14
  """Download submissions.zip from SEC website with progress bar."""
15
-
16
15
 
17
16
  async with aiohttp.ClientSession() as session:
18
17
  async with session.get(url, headers=headers) as response:
@@ -53,6 +52,9 @@ def extract_metadata(data):
53
52
  for field in ['street1', 'street2', 'city', 'stateOrCountry', 'zipCode', 'stateOrCountryDescription']:
54
53
  result[f"{addr_type}_{field}"] = addr.get(field)
55
54
 
55
+ # Add start_date field (will be populated later)
56
+ result['start_date'] = ''
57
+
56
58
  return result
57
59
 
58
60
  def extract_earliest_filing_date(data):
@@ -78,8 +80,12 @@ def extract_earliest_filing_date(data):
78
80
  return earliest_date
79
81
 
80
82
  def process_former_names(data, cik, current_name):
81
- """Process former names into a list of records."""
83
+ """
84
+ Process former names into a list of records.
85
+ Returns former names records and the earliest company date.
86
+ """
82
87
  former_names_records = []
88
+ earliest_company_date = None
83
89
 
84
90
  # Process former names if present
85
91
  former_names = data.get('formerNames', [])
@@ -98,6 +104,10 @@ def process_former_names(data, cik, current_name):
98
104
  # Clean up date formats (remove time component)
99
105
  if start_date:
100
106
  start_date = start_date.split('T')[0]
107
+ # Track earliest company date across all former names
108
+ if earliest_company_date is None or start_date < earliest_company_date:
109
+ earliest_company_date = start_date
110
+
101
111
  if end_date:
102
112
  end_date = end_date.split('T')[0]
103
113
  # Track latest end date
@@ -114,10 +124,16 @@ def process_former_names(data, cik, current_name):
114
124
 
115
125
  former_names_records.append(record)
116
126
 
127
+ # Find the earliest filing date for the company if no date found in former names
128
+ if earliest_company_date is None:
129
+ earliest_company_date = extract_earliest_filing_date(data)
130
+ if earliest_company_date and 'T' in earliest_company_date:
131
+ earliest_company_date = earliest_company_date.split('T')[0]
132
+
117
133
  # For the current name, if we don't have a start date from former names,
118
- # we'll try to find the earliest filing date
134
+ # we'll use the earliest filing date
119
135
  if not latest_end_date:
120
- latest_end_date = extract_earliest_filing_date(data)
136
+ latest_end_date = earliest_company_date
121
137
 
122
138
  # Add current name record with start date as latest end date
123
139
  current_record = {
@@ -129,7 +145,8 @@ def process_former_names(data, cik, current_name):
129
145
 
130
146
  former_names_records.append(current_record)
131
147
 
132
- return former_names_records
148
+ # Return both the records and the earliest company date (for metadata)
149
+ return former_names_records, earliest_company_date
133
150
 
134
151
  def write_metadata_to_csv(metadata_list, output_path):
135
152
  """Write metadata records to CSV and compress with gzip."""
@@ -145,8 +162,8 @@ def write_metadata_to_csv(metadata_list, output_path):
145
162
  for metadata in metadata_list:
146
163
  fieldnames.update(metadata.keys())
147
164
 
148
- # Make sure 'name' and 'cik' come first
149
- fieldnames = ['name', 'cik'] + [f for f in sorted(fieldnames) if f not in ['name', 'cik']]
165
+ # Make sure 'name', 'cik', and 'start_date' come first
166
+ fieldnames = ['name', 'cik', 'start_date'] + [f for f in sorted(fieldnames) if f not in ['name', 'cik', 'start_date']]
150
167
 
151
168
  # Write directly to gzipped CSV without using StringIO buffer
152
169
  with gzip.open(output_path, 'wt', encoding='utf-8', newline='') as gzfile:
@@ -299,7 +316,11 @@ async def extract_and_process_metadata(output_dir, local_zip_path=None, sec_url=
299
316
  name = metadata.get('name', '')
300
317
 
301
318
  # Process former names with the full json_data
302
- former_names_records = process_former_names(json_data, cik, name)
319
+ # Now also returning the earliest company date
320
+ former_names_records, earliest_company_date = process_former_names(json_data, cik, name)
321
+
322
+ # Add the earliest company date to the metadata
323
+ metadata['start_date'] = earliest_company_date if earliest_company_date else ''
303
324
 
304
325
  # Check if company is listed (has tickers)
305
326
  tickers = metadata.get('tickers', [])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.0.9
3
+ Version: 1.1.0
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -12,7 +12,7 @@ datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHh
12
12
  datamule/sec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  datamule/sec/utils.py,sha256=JUxwijJiqRMnRJNQzVUamyF5h9ZGc7RnO_zsLOIM73g,2079
14
14
  datamule/sec/infrastructure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- datamule/sec/infrastructure/submissions_metadata.py,sha256=zsSYmvYLZ7KS_MVDsg-j9Y4qeOyDOaHOQ6ZR6MpiET8,17520
15
+ datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNsBw5Jv0Tx5aljiGUJkk7DRk,18745
16
16
  datamule/sec/rss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  datamule/sec/rss/monitor.py,sha256=6r4EYaSlGu6VYErlj9zXJsIMLVie1cfacSZU-ESfuBI,18231
18
18
  datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -29,7 +29,7 @@ datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTq
29
29
  datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
30
  datamule/seclibrary/downloader.py,sha256=Zb1TxsIz887tO3MJVP66siYVtNus89ti-g9oZ6VywrM,11500
31
31
  datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
32
- datamule-1.0.9.dist-info/METADATA,sha256=QhoFw_l9Rc-VaXuBG_JpgWkB-02wxg0C2MTCgWNU3uA,512
33
- datamule-1.0.9.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
34
- datamule-1.0.9.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
35
- datamule-1.0.9.dist-info/RECORD,,
32
+ datamule-1.1.0.dist-info/METADATA,sha256=SsccfLG4NULPHgcZHL-06layatv9j4ZvhmmVaYv8PAg,512
33
+ datamule-1.1.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
34
+ datamule-1.1.0.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
35
+ datamule-1.1.0.dist-info/RECORD,,