datamule 1.1.6__py3-none-any.whl → 1.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/document.py +262 -68
- datamule/portfolio.py +7 -5
- datamule/sec/submissions/downloader.py +19 -2
- datamule/sec/submissions/eftsquery.py +129 -8
- datamule/sec/submissions/monitor.py +5 -1
- datamule/sec/submissions/streamer.py +59 -23
- datamule/sec/submissions/textsearch.py +33 -6
- datamule/seclibrary/bq.py +191 -0
- datamule/sheet.py +220 -6
- datamule/submission.py +94 -19
- {datamule-1.1.6.dist-info → datamule-1.1.8.dist-info}/METADATA +1 -1
- {datamule-1.1.6.dist-info → datamule-1.1.8.dist-info}/RECORD +14 -13
- {datamule-1.1.6.dist-info → datamule-1.1.8.dist-info}/WHEEL +0 -0
- {datamule-1.1.6.dist-info → datamule-1.1.8.dist-info}/top_level.txt +0 -0
datamule/sheet.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
from pathlib import Path
|
2
|
+
import csv
|
3
|
+
import os
|
2
4
|
from .helper import _process_cik_and_metadata_filters, load_package_dataset
|
3
5
|
from .sec.xbrl.downloadcompanyfacts import download_company_facts
|
6
|
+
from .seclibrary.bq import get_information_table
|
4
7
|
|
5
8
|
class Sheet:
|
6
9
|
def __init__(self, path):
|
@@ -26,9 +29,220 @@ class Sheet:
|
|
26
29
|
# Download facts for all CIKs in parallel
|
27
30
|
download_company_facts(cik=cik_list, output_dir=self.path)
|
28
31
|
|
29
|
-
def
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
def get_information_table(
|
33
|
+
self,
|
34
|
+
# Required parameters
|
35
|
+
table_type="INFORMATION_TABLE",
|
36
|
+
|
37
|
+
# Optional filtering parameters
|
38
|
+
columns=None,
|
39
|
+
name_of_issuer=None,
|
40
|
+
title_of_class=None,
|
41
|
+
cusip=None,
|
42
|
+
value=None,
|
43
|
+
ssh_prnamt=None,
|
44
|
+
ssh_prnamt_type=None,
|
45
|
+
investment_discretion=None,
|
46
|
+
voting_authority_sole=None,
|
47
|
+
voting_authority_shared=None,
|
48
|
+
voting_authority_none=None,
|
49
|
+
reporting_owner_cik=None,
|
50
|
+
put_call=None,
|
51
|
+
other_manager=None,
|
52
|
+
figi=None,
|
53
|
+
accession=None,
|
54
|
+
filing_date=None,
|
55
|
+
|
56
|
+
# API key handling
|
57
|
+
api_key=None,
|
58
|
+
|
59
|
+
# Additional options
|
60
|
+
print_cost=True,
|
61
|
+
verbose=False
|
62
|
+
):
|
63
|
+
"""
|
64
|
+
Query the SEC BigQuery API for 13F-HR information table data.
|
65
|
+
|
66
|
+
Parameters:
|
67
|
+
-----------
|
68
|
+
table_type : str
|
69
|
+
The table to query (default is "INFORMATION_TABLE")
|
70
|
+
columns : List[str], optional
|
71
|
+
Specific columns to return. If None, all columns are returned.
|
72
|
+
|
73
|
+
# Filter parameters
|
74
|
+
name_of_issuer, title_of_class, etc. : Various filters that can be:
|
75
|
+
- str: Exact match
|
76
|
+
- List[str]: Match any in list
|
77
|
+
- tuple: (min, max) range for numeric/date fields
|
78
|
+
|
79
|
+
api_key : str, optional
|
80
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
81
|
+
print_cost : bool
|
82
|
+
Whether to print the query cost information
|
83
|
+
verbose : bool
|
84
|
+
Whether to print additional information about the query
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
--------
|
88
|
+
List[Dict]
|
89
|
+
A list of dictionaries containing the query results
|
90
|
+
|
91
|
+
Raises:
|
92
|
+
-------
|
93
|
+
ValueError
|
94
|
+
If API key is missing or invalid
|
95
|
+
Exception
|
96
|
+
For API errors or other issues
|
97
|
+
"""
|
98
|
+
|
99
|
+
return get_information_table(
|
100
|
+
table_type=table_type,
|
101
|
+
columns=columns,
|
102
|
+
name_of_issuer=name_of_issuer,
|
103
|
+
title_of_class=title_of_class,
|
104
|
+
cusip=cusip,
|
105
|
+
value=value,
|
106
|
+
ssh_prnamt=ssh_prnamt,
|
107
|
+
ssh_prnamt_type=ssh_prnamt_type,
|
108
|
+
investment_discretion=investment_discretion,
|
109
|
+
voting_authority_sole=voting_authority_sole,
|
110
|
+
voting_authority_shared=voting_authority_shared,
|
111
|
+
voting_authority_none=voting_authority_none,
|
112
|
+
reporting_owner_cik=reporting_owner_cik,
|
113
|
+
put_call=put_call,
|
114
|
+
other_manager=other_manager,
|
115
|
+
figi=figi,
|
116
|
+
accession=accession,
|
117
|
+
filing_date=filing_date,
|
118
|
+
|
119
|
+
# API key handling
|
120
|
+
api_key=api_key,
|
121
|
+
|
122
|
+
# Additional options
|
123
|
+
print_cost=print_cost,
|
124
|
+
verbose=verbose
|
125
|
+
)
|
126
|
+
|
127
|
+
def download_information_table(
|
128
|
+
self,
|
129
|
+
filepath,
|
130
|
+
# Required parameters
|
131
|
+
table_type="INFORMATION_TABLE",
|
132
|
+
|
133
|
+
# Optional filtering parameters
|
134
|
+
columns=None,
|
135
|
+
name_of_issuer=None,
|
136
|
+
title_of_class=None,
|
137
|
+
cusip=None,
|
138
|
+
value=None,
|
139
|
+
ssh_prnamt=None,
|
140
|
+
ssh_prnamt_type=None,
|
141
|
+
investment_discretion=None,
|
142
|
+
voting_authority_sole=None,
|
143
|
+
voting_authority_shared=None,
|
144
|
+
voting_authority_none=None,
|
145
|
+
reporting_owner_cik=None,
|
146
|
+
put_call=None,
|
147
|
+
other_manager=None,
|
148
|
+
figi=None,
|
149
|
+
accession=None,
|
150
|
+
filing_date=None,
|
151
|
+
|
152
|
+
# API key handling
|
153
|
+
api_key=None,
|
154
|
+
|
155
|
+
# Additional options
|
156
|
+
print_cost=True,
|
157
|
+
verbose=False
|
158
|
+
):
|
159
|
+
"""
|
160
|
+
Query the SEC BigQuery API for 13F-HR information table data and save to CSV.
|
161
|
+
|
162
|
+
Parameters:
|
163
|
+
-----------
|
164
|
+
filepath : str
|
165
|
+
Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
|
166
|
+
|
167
|
+
table_type : str
|
168
|
+
The table to query (default is "INFORMATION_TABLE")
|
169
|
+
columns : List[str], optional
|
170
|
+
Specific columns to return. If None, all columns are returned.
|
171
|
+
|
172
|
+
# Filter parameters
|
173
|
+
name_of_issuer, title_of_class, etc. : Various filters that can be:
|
174
|
+
- str: Exact match
|
175
|
+
- List[str]: Match any in list
|
176
|
+
- tuple: (min, max) range for numeric/date fields
|
177
|
+
|
178
|
+
api_key : str, optional
|
179
|
+
SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
|
180
|
+
print_cost : bool
|
181
|
+
Whether to print the query cost information
|
182
|
+
verbose : bool
|
183
|
+
Whether to print additional information about the query
|
184
|
+
|
185
|
+
Returns:
|
186
|
+
--------
|
187
|
+
List[Dict]
|
188
|
+
A list of dictionaries containing the query results
|
189
|
+
|
190
|
+
Raises:
|
191
|
+
-------
|
192
|
+
ValueError
|
193
|
+
If API key is missing or invalid
|
194
|
+
Exception
|
195
|
+
For API errors or other issues
|
196
|
+
"""
|
197
|
+
# Get the data from the API
|
198
|
+
data = self.get_information_table(
|
199
|
+
table_type=table_type,
|
200
|
+
columns=columns,
|
201
|
+
name_of_issuer=name_of_issuer,
|
202
|
+
title_of_class=title_of_class,
|
203
|
+
cusip=cusip,
|
204
|
+
value=value,
|
205
|
+
ssh_prnamt=ssh_prnamt,
|
206
|
+
ssh_prnamt_type=ssh_prnamt_type,
|
207
|
+
investment_discretion=investment_discretion,
|
208
|
+
voting_authority_sole=voting_authority_sole,
|
209
|
+
voting_authority_shared=voting_authority_shared,
|
210
|
+
voting_authority_none=voting_authority_none,
|
211
|
+
reporting_owner_cik=reporting_owner_cik,
|
212
|
+
put_call=put_call,
|
213
|
+
other_manager=other_manager,
|
214
|
+
figi=figi,
|
215
|
+
accession=accession,
|
216
|
+
filing_date=filing_date,
|
217
|
+
api_key=api_key,
|
218
|
+
print_cost=print_cost,
|
219
|
+
verbose=verbose
|
220
|
+
)
|
221
|
+
|
222
|
+
# If no data returned, nothing to save
|
223
|
+
if not data:
|
224
|
+
if verbose:
|
225
|
+
print("No data returned from API. No file was created.")
|
226
|
+
return data
|
227
|
+
|
228
|
+
# Resolve filepath - if it's not absolute, make it relative to self.path
|
229
|
+
filepath_obj = Path(filepath)
|
230
|
+
if not filepath_obj.is_absolute():
|
231
|
+
filepath_obj = self.path / filepath_obj
|
232
|
+
|
233
|
+
# Create directory if it doesn't exist
|
234
|
+
os.makedirs(filepath_obj.parent, exist_ok=True)
|
235
|
+
|
236
|
+
# Get fieldnames from the first record
|
237
|
+
fieldnames = data[0].keys()
|
238
|
+
|
239
|
+
# Write to CSV
|
240
|
+
with open(filepath_obj, 'w', newline='') as csvfile:
|
241
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
242
|
+
writer.writeheader()
|
243
|
+
writer.writerows(data)
|
244
|
+
|
245
|
+
if verbose:
|
246
|
+
print(f"Saved {len(data)} records to {filepath_obj}")
|
247
|
+
|
248
|
+
return data
|
datamule/submission.py
CHANGED
@@ -1,16 +1,38 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
import json
|
3
3
|
from .document import Document
|
4
|
+
from secsgml import parse_sgml_submission_into_memory
|
5
|
+
from pathlib import Path
|
4
6
|
|
5
7
|
class Submission:
|
6
|
-
def __init__(self, path):
|
7
|
-
|
8
|
-
|
8
|
+
def __init__(self, path=None,sgml_content=None,keep_document_types=None):
|
9
|
+
if path is None and sgml_content is None:
|
10
|
+
raise ValueError("Either path or sgml_content must be provided")
|
11
|
+
if path is not None and sgml_content is not None:
|
12
|
+
raise ValueError("Only one of path or sgml_content must be provided")
|
13
|
+
|
14
|
+
if sgml_content is not None:
|
15
|
+
self.path = None
|
16
|
+
self.metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
|
17
|
+
self.documents = []
|
18
|
+
|
19
|
+
for idx,doc in enumerate(self.metadata['documents']):
|
20
|
+
type = doc.get('type')
|
21
|
+
|
22
|
+
# Keep only specified types
|
23
|
+
if keep_document_types is not None and type not in keep_document_types:
|
24
|
+
continue
|
25
|
+
filename = doc.get('filename')
|
26
|
+
extension = Path(filename).suffix
|
27
|
+
self.documents.append(Document(type=type, content=raw_documents[idx], extension=extension))
|
28
|
+
|
29
|
+
|
30
|
+
if path is not None:
|
31
|
+
self.path = Path(path)
|
32
|
+
metadata_path = self.path / 'metadata.json'
|
33
|
+
with metadata_path.open('r') as f:
|
34
|
+
self.metadata = json.load(f)
|
9
35
|
|
10
|
-
def _load_metadata(self):
|
11
|
-
metadata_path = self.path / 'metadata.json'
|
12
|
-
with metadata_path.open('r') as f:
|
13
|
-
self.metadata = json.load(f)
|
14
36
|
|
15
37
|
def document_type(self, document_type):
|
16
38
|
# Convert single document type to list for consistent handling
|
@@ -19,20 +41,73 @@ class Submission:
|
|
19
41
|
else:
|
20
42
|
document_types = document_type
|
21
43
|
|
22
|
-
for doc in self.metadata['documents']:
|
44
|
+
for idx,doc in enumerate(self.metadata['documents']):
|
23
45
|
if doc['type'] in document_types:
|
46
|
+
|
47
|
+
# if loaded from path
|
48
|
+
if self.path is not None:
|
49
|
+
filename = doc.get('filename')
|
50
|
+
# oh we need handling here for sequences case
|
51
|
+
if filename is None:
|
52
|
+
filename = doc['sequence'] + '.txt'
|
53
|
+
|
54
|
+
document_path = self.path / filename
|
55
|
+
extension = document_path.suffix
|
56
|
+
|
57
|
+
with document_path.open('r') as f:
|
58
|
+
content = f.read()
|
59
|
+
|
60
|
+
yield Document(type=doc['type'], content=content, extension=extension)
|
61
|
+
# if loaded from sgml_content
|
62
|
+
else:
|
63
|
+
yield self.documents[idx]
|
64
|
+
|
65
|
+
|
66
|
+
def __iter__(self):
|
67
|
+
for idx,doc in enumerate(self.metadata['documents']):
|
68
|
+
# if loaded from path
|
69
|
+
if self.path is not None:
|
24
70
|
filename = doc.get('filename')
|
71
|
+
|
72
|
+
# oh we need handling here for sequences case
|
25
73
|
if filename is None:
|
26
|
-
|
74
|
+
filename = doc['sequence'] + '.txt'
|
27
75
|
|
28
76
|
document_path = self.path / filename
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
77
|
+
extension = document_path.suffix
|
78
|
+
|
79
|
+
# check if the file exists
|
80
|
+
if document_path.exists():
|
81
|
+
with document_path.open('r') as f:
|
82
|
+
content = f.read()
|
83
|
+
|
84
|
+
yield Document(type=doc['type'], content=content, extension=extension)
|
85
|
+
else:
|
86
|
+
print(f"Warning: File {document_path} does not exist likely due to keep types in downloading.")
|
87
|
+
|
88
|
+
# if loaded from sgml_content
|
89
|
+
else:
|
90
|
+
yield self.documents[idx]
|
91
|
+
|
92
|
+
# keep documents by document type
|
93
|
+
def keep(self, document_type):
|
94
|
+
# Convert single document type to list for consistent handling
|
95
|
+
if isinstance(document_type, str):
|
96
|
+
document_types = [document_type]
|
97
|
+
else:
|
98
|
+
document_types = document_type
|
99
|
+
|
100
|
+
if self.path is not None:
|
101
|
+
for doc in self.metadata['documents']:
|
102
|
+
filename = doc.get('filename')
|
103
|
+
type = doc.get('type')
|
104
|
+
if type not in document_types:
|
105
|
+
# oh we need handling here for sequences case
|
106
|
+
if filename is None:
|
107
|
+
filename = doc.sequence + '.txt'
|
108
|
+
|
109
|
+
document_path = self.path / filename
|
110
|
+
# delete the file
|
111
|
+
document_path.unlink()
|
112
|
+
else:
|
113
|
+
print("Warning: keep() method is only available when loading from path.")
|
@@ -1,11 +1,11 @@
|
|
1
1
|
datamule/__init__.py,sha256=l6YlwT5EeRxPlCtO5Jd4I8l266rSRUJyfFe97cRtSCM,991
|
2
2
|
datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
|
3
|
-
datamule/document.py,sha256=
|
3
|
+
datamule/document.py,sha256=qShyVKHQ1nSCNvSfrhAOMVXprOd1br1rFKLy52S9WnE,22007
|
4
4
|
datamule/helper.py,sha256=xgOVnea-lUlQ5I-U0vYUp0VeKPNZehNhqjJvegA3lYE,3342
|
5
5
|
datamule/index.py,sha256=0txvbzPcvY1GsdxA-wGdLzAByxSeE_1VyyBp9mZEQRM,2292
|
6
|
-
datamule/portfolio.py,sha256=
|
7
|
-
datamule/sheet.py,sha256=
|
8
|
-
datamule/submission.py,sha256=
|
6
|
+
datamule/portfolio.py,sha256=yWt5gYTjV7rJsLiPUmhc6Vmr3lfvfCR5MSpLQ_6Gdp4,7104
|
7
|
+
datamule/sheet.py,sha256=QaArtx7LpT7bwyteelJV67C-lK0RjQbGS3ka7ftdi8w,7978
|
8
|
+
datamule/submission.py,sha256=LI7Zr60YbE_tU-v2N09k2dGjfztSgplKZACT3eRUkFE,4463
|
9
9
|
datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
|
11
11
|
datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHhhMKp0CLqVKV5OI,438
|
@@ -16,20 +16,21 @@ datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNs
|
|
16
16
|
datamule/sec/rss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
datamule/sec/rss/monitor.py,sha256=6r4EYaSlGu6VYErlj9zXJsIMLVie1cfacSZU-ESfuBI,18231
|
18
18
|
datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
datamule/sec/submissions/downloader.py,sha256=
|
20
|
-
datamule/sec/submissions/eftsquery.py,sha256=
|
21
|
-
datamule/sec/submissions/monitor.py,sha256=
|
22
|
-
datamule/sec/submissions/streamer.py,sha256=
|
23
|
-
datamule/sec/submissions/textsearch.py,sha256
|
19
|
+
datamule/sec/submissions/downloader.py,sha256=IB08W8-lQD5Bb0LgzrTN4Xi4HsCw24DybRLHqE1AUrU,3290
|
20
|
+
datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
|
21
|
+
datamule/sec/submissions/monitor.py,sha256=Im2kgnUehhTgyY2Vq3uk07n4Vkj4PjII_SsRDi8ehAE,5384
|
22
|
+
datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
|
23
|
+
datamule/sec/submissions/textsearch.py,sha256=-a5yIrrxxtaK10IJeywFmXuJmSndYL9VKm4SC4I9JAs,5808
|
24
24
|
datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
25
|
datamule/sec/xbrl/downloadcompanyfacts.py,sha256=rMWRiCF9ci_gNZMJ9MC2c_PGEd-yEthawQ0CtVwWTjM,3323
|
26
26
|
datamule/sec/xbrl/filter_xbrl.py,sha256=g9OT4zrNS0tiUJeBIwbCs_zMisOBkpFnMR3tV4Tr39Q,1316
|
27
27
|
datamule/sec/xbrl/streamcompanyfacts.py,sha256=WyJIwuy5mNMXWpx_IkhFzDMe9MOfQ-vNkWl_JzBzFmc,3323
|
28
28
|
datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
|
29
29
|
datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
|
+
datamule/seclibrary/bq.py,sha256=C6kafFXWtm-MUjf70H1wTtpwv1Rxpcbk-Kfy8fkBPfo,6469
|
30
31
|
datamule/seclibrary/downloader.py,sha256=Zb1TxsIz887tO3MJVP66siYVtNus89ti-g9oZ6VywrM,11500
|
31
32
|
datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
|
32
|
-
datamule-1.1.
|
33
|
-
datamule-1.1.
|
34
|
-
datamule-1.1.
|
35
|
-
datamule-1.1.
|
33
|
+
datamule-1.1.8.dist-info/METADATA,sha256=8HRRMz6l928E5tuHXkPi1_Kf-8nfPSjWQnnfReSxdPM,512
|
34
|
+
datamule-1.1.8.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
35
|
+
datamule-1.1.8.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
36
|
+
datamule-1.1.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|