datamule 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/sec/infrastructure/submissions_metadata.py +29 -8
- {datamule-1.0.9.dist-info → datamule-1.1.0.dist-info}/METADATA +1 -1
- {datamule-1.0.9.dist-info → datamule-1.1.0.dist-info}/RECORD +5 -5
- {datamule-1.0.9.dist-info → datamule-1.1.0.dist-info}/WHEEL +0 -0
- {datamule-1.0.9.dist-info → datamule-1.1.0.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,6 @@ from ..utils import headers
|
|
12
12
|
|
13
13
|
async def download_sec_file(url, target_path):
|
14
14
|
"""Download submissions.zip from SEC website with progress bar."""
|
15
|
-
|
16
15
|
|
17
16
|
async with aiohttp.ClientSession() as session:
|
18
17
|
async with session.get(url, headers=headers) as response:
|
@@ -53,6 +52,9 @@ def extract_metadata(data):
|
|
53
52
|
for field in ['street1', 'street2', 'city', 'stateOrCountry', 'zipCode', 'stateOrCountryDescription']:
|
54
53
|
result[f"{addr_type}_{field}"] = addr.get(field)
|
55
54
|
|
55
|
+
# Add start_date field (will be populated later)
|
56
|
+
result['start_date'] = ''
|
57
|
+
|
56
58
|
return result
|
57
59
|
|
58
60
|
def extract_earliest_filing_date(data):
|
@@ -78,8 +80,12 @@ def extract_earliest_filing_date(data):
|
|
78
80
|
return earliest_date
|
79
81
|
|
80
82
|
def process_former_names(data, cik, current_name):
|
81
|
-
"""
|
83
|
+
"""
|
84
|
+
Process former names into a list of records.
|
85
|
+
Returns former names records and the earliest company date.
|
86
|
+
"""
|
82
87
|
former_names_records = []
|
88
|
+
earliest_company_date = None
|
83
89
|
|
84
90
|
# Process former names if present
|
85
91
|
former_names = data.get('formerNames', [])
|
@@ -98,6 +104,10 @@ def process_former_names(data, cik, current_name):
|
|
98
104
|
# Clean up date formats (remove time component)
|
99
105
|
if start_date:
|
100
106
|
start_date = start_date.split('T')[0]
|
107
|
+
# Track earliest company date across all former names
|
108
|
+
if earliest_company_date is None or start_date < earliest_company_date:
|
109
|
+
earliest_company_date = start_date
|
110
|
+
|
101
111
|
if end_date:
|
102
112
|
end_date = end_date.split('T')[0]
|
103
113
|
# Track latest end date
|
@@ -114,10 +124,16 @@ def process_former_names(data, cik, current_name):
|
|
114
124
|
|
115
125
|
former_names_records.append(record)
|
116
126
|
|
127
|
+
# Find the earliest filing date for the company if no date found in former names
|
128
|
+
if earliest_company_date is None:
|
129
|
+
earliest_company_date = extract_earliest_filing_date(data)
|
130
|
+
if earliest_company_date and 'T' in earliest_company_date:
|
131
|
+
earliest_company_date = earliest_company_date.split('T')[0]
|
132
|
+
|
117
133
|
# For the current name, if we don't have a start date from former names,
|
118
|
-
# we'll
|
134
|
+
# we'll use the earliest filing date
|
119
135
|
if not latest_end_date:
|
120
|
-
latest_end_date =
|
136
|
+
latest_end_date = earliest_company_date
|
121
137
|
|
122
138
|
# Add current name record with start date as latest end date
|
123
139
|
current_record = {
|
@@ -129,7 +145,8 @@ def process_former_names(data, cik, current_name):
|
|
129
145
|
|
130
146
|
former_names_records.append(current_record)
|
131
147
|
|
132
|
-
|
148
|
+
# Return both the records and the earliest company date (for metadata)
|
149
|
+
return former_names_records, earliest_company_date
|
133
150
|
|
134
151
|
def write_metadata_to_csv(metadata_list, output_path):
|
135
152
|
"""Write metadata records to CSV and compress with gzip."""
|
@@ -145,8 +162,8 @@ def write_metadata_to_csv(metadata_list, output_path):
|
|
145
162
|
for metadata in metadata_list:
|
146
163
|
fieldnames.update(metadata.keys())
|
147
164
|
|
148
|
-
# Make sure 'name' and '
|
149
|
-
fieldnames = ['name', 'cik'] + [f for f in sorted(fieldnames) if f not in ['name', 'cik']]
|
165
|
+
# Make sure 'name', 'cik', and 'start_date' come first
|
166
|
+
fieldnames = ['name', 'cik', 'start_date'] + [f for f in sorted(fieldnames) if f not in ['name', 'cik', 'start_date']]
|
150
167
|
|
151
168
|
# Write directly to gzipped CSV without using StringIO buffer
|
152
169
|
with gzip.open(output_path, 'wt', encoding='utf-8', newline='') as gzfile:
|
@@ -299,7 +316,11 @@ async def extract_and_process_metadata(output_dir, local_zip_path=None, sec_url=
|
|
299
316
|
name = metadata.get('name', '')
|
300
317
|
|
301
318
|
# Process former names with the full json_data
|
302
|
-
|
319
|
+
# Now also returning the earliest company date
|
320
|
+
former_names_records, earliest_company_date = process_former_names(json_data, cik, name)
|
321
|
+
|
322
|
+
# Add the earliest company date to the metadata
|
323
|
+
metadata['start_date'] = earliest_company_date if earliest_company_date else ''
|
303
324
|
|
304
325
|
# Check if company is listed (has tickers)
|
305
326
|
tickers = metadata.get('tickers', [])
|
@@ -12,7 +12,7 @@ datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHh
|
|
12
12
|
datamule/sec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
datamule/sec/utils.py,sha256=JUxwijJiqRMnRJNQzVUamyF5h9ZGc7RnO_zsLOIM73g,2079
|
14
14
|
datamule/sec/infrastructure/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
datamule/sec/infrastructure/submissions_metadata.py,sha256=
|
15
|
+
datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNsBw5Jv0Tx5aljiGUJkk7DRk,18745
|
16
16
|
datamule/sec/rss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
datamule/sec/rss/monitor.py,sha256=6r4EYaSlGu6VYErlj9zXJsIMLVie1cfacSZU-ESfuBI,18231
|
18
18
|
datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -29,7 +29,7 @@ datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTq
|
|
29
29
|
datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
30
|
datamule/seclibrary/downloader.py,sha256=Zb1TxsIz887tO3MJVP66siYVtNus89ti-g9oZ6VywrM,11500
|
31
31
|
datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
|
32
|
-
datamule-1.0.
|
33
|
-
datamule-1.0.
|
34
|
-
datamule-1.0.
|
35
|
-
datamule-1.0.
|
32
|
+
datamule-1.1.0.dist-info/METADATA,sha256=SsccfLG4NULPHgcZHL-06layatv9j4ZvhmmVaYv8PAg,512
|
33
|
+
datamule-1.1.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
34
|
+
datamule-1.1.0.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
35
|
+
datamule-1.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|