datamule 1.1.6__py3-none-any.whl → 1.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamule/sheet.py CHANGED
@@ -1,6 +1,9 @@
1
1
  from pathlib import Path
2
+ import csv
3
+ import os
2
4
  from .helper import _process_cik_and_metadata_filters, load_package_dataset
3
5
  from .sec.xbrl.downloadcompanyfacts import download_company_facts
6
+ from .seclibrary.bq import get_information_table
4
7
 
5
8
  class Sheet:
6
9
  def __init__(self, path):
@@ -26,9 +29,220 @@ class Sheet:
26
29
  # Download facts for all CIKs in parallel
27
30
  download_company_facts(cik=cik_list, output_dir=self.path)
28
31
 
29
- def query_345():
30
- pass
31
- def query_xbrl():
32
- pass
33
- def query_13fhr():
34
- pass
32
+ def get_information_table(
33
+ self,
34
+ # Required parameters
35
+ table_type="INFORMATION_TABLE",
36
+
37
+ # Optional filtering parameters
38
+ columns=None,
39
+ name_of_issuer=None,
40
+ title_of_class=None,
41
+ cusip=None,
42
+ value=None,
43
+ ssh_prnamt=None,
44
+ ssh_prnamt_type=None,
45
+ investment_discretion=None,
46
+ voting_authority_sole=None,
47
+ voting_authority_shared=None,
48
+ voting_authority_none=None,
49
+ reporting_owner_cik=None,
50
+ put_call=None,
51
+ other_manager=None,
52
+ figi=None,
53
+ accession=None,
54
+ filing_date=None,
55
+
56
+ # API key handling
57
+ api_key=None,
58
+
59
+ # Additional options
60
+ print_cost=True,
61
+ verbose=False
62
+ ):
63
+ """
64
+ Query the SEC BigQuery API for 13F-HR information table data.
65
+
66
+ Parameters:
67
+ -----------
68
+ table_type : str
69
+ The table to query (default is "INFORMATION_TABLE")
70
+ columns : List[str], optional
71
+ Specific columns to return. If None, all columns are returned.
72
+
73
+ # Filter parameters
74
+ name_of_issuer, title_of_class, etc. : Various filters that can be:
75
+ - str: Exact match
76
+ - List[str]: Match any in list
77
+ - tuple: (min, max) range for numeric/date fields
78
+
79
+ api_key : str, optional
80
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
81
+ print_cost : bool
82
+ Whether to print the query cost information
83
+ verbose : bool
84
+ Whether to print additional information about the query
85
+
86
+ Returns:
87
+ --------
88
+ List[Dict]
89
+ A list of dictionaries containing the query results
90
+
91
+ Raises:
92
+ -------
93
+ ValueError
94
+ If API key is missing or invalid
95
+ Exception
96
+ For API errors or other issues
97
+ """
98
+
99
+ return get_information_table(
100
+ table_type=table_type,
101
+ columns=columns,
102
+ name_of_issuer=name_of_issuer,
103
+ title_of_class=title_of_class,
104
+ cusip=cusip,
105
+ value=value,
106
+ ssh_prnamt=ssh_prnamt,
107
+ ssh_prnamt_type=ssh_prnamt_type,
108
+ investment_discretion=investment_discretion,
109
+ voting_authority_sole=voting_authority_sole,
110
+ voting_authority_shared=voting_authority_shared,
111
+ voting_authority_none=voting_authority_none,
112
+ reporting_owner_cik=reporting_owner_cik,
113
+ put_call=put_call,
114
+ other_manager=other_manager,
115
+ figi=figi,
116
+ accession=accession,
117
+ filing_date=filing_date,
118
+
119
+ # API key handling
120
+ api_key=api_key,
121
+
122
+ # Additional options
123
+ print_cost=print_cost,
124
+ verbose=verbose
125
+ )
126
+
127
+ def download_information_table(
128
+ self,
129
+ filepath,
130
+ # Required parameters
131
+ table_type="INFORMATION_TABLE",
132
+
133
+ # Optional filtering parameters
134
+ columns=None,
135
+ name_of_issuer=None,
136
+ title_of_class=None,
137
+ cusip=None,
138
+ value=None,
139
+ ssh_prnamt=None,
140
+ ssh_prnamt_type=None,
141
+ investment_discretion=None,
142
+ voting_authority_sole=None,
143
+ voting_authority_shared=None,
144
+ voting_authority_none=None,
145
+ reporting_owner_cik=None,
146
+ put_call=None,
147
+ other_manager=None,
148
+ figi=None,
149
+ accession=None,
150
+ filing_date=None,
151
+
152
+ # API key handling
153
+ api_key=None,
154
+
155
+ # Additional options
156
+ print_cost=True,
157
+ verbose=False
158
+ ):
159
+ """
160
+ Query the SEC BigQuery API for 13F-HR information table data and save to CSV.
161
+
162
+ Parameters:
163
+ -----------
164
+ filepath : str
165
+ Path where to save the CSV file. If relative, it will be relative to the Sheet's path.
166
+
167
+ table_type : str
168
+ The table to query (default is "INFORMATION_TABLE")
169
+ columns : List[str], optional
170
+ Specific columns to return. If None, all columns are returned.
171
+
172
+ # Filter parameters
173
+ name_of_issuer, title_of_class, etc. : Various filters that can be:
174
+ - str: Exact match
175
+ - List[str]: Match any in list
176
+ - tuple: (min, max) range for numeric/date fields
177
+
178
+ api_key : str, optional
179
+ SEC BigQuery API key. If None, looks for DATAMULE_API_KEY env variable.
180
+ print_cost : bool
181
+ Whether to print the query cost information
182
+ verbose : bool
183
+ Whether to print additional information about the query
184
+
185
+ Returns:
186
+ --------
187
+ List[Dict]
188
+ A list of dictionaries containing the query results
189
+
190
+ Raises:
191
+ -------
192
+ ValueError
193
+ If API key is missing or invalid
194
+ Exception
195
+ For API errors or other issues
196
+ """
197
+ # Get the data from the API
198
+ data = self.get_information_table(
199
+ table_type=table_type,
200
+ columns=columns,
201
+ name_of_issuer=name_of_issuer,
202
+ title_of_class=title_of_class,
203
+ cusip=cusip,
204
+ value=value,
205
+ ssh_prnamt=ssh_prnamt,
206
+ ssh_prnamt_type=ssh_prnamt_type,
207
+ investment_discretion=investment_discretion,
208
+ voting_authority_sole=voting_authority_sole,
209
+ voting_authority_shared=voting_authority_shared,
210
+ voting_authority_none=voting_authority_none,
211
+ reporting_owner_cik=reporting_owner_cik,
212
+ put_call=put_call,
213
+ other_manager=other_manager,
214
+ figi=figi,
215
+ accession=accession,
216
+ filing_date=filing_date,
217
+ api_key=api_key,
218
+ print_cost=print_cost,
219
+ verbose=verbose
220
+ )
221
+
222
+ # If no data returned, nothing to save
223
+ if not data:
224
+ if verbose:
225
+ print("No data returned from API. No file was created.")
226
+ return data
227
+
228
+ # Resolve filepath - if it's not absolute, make it relative to self.path
229
+ filepath_obj = Path(filepath)
230
+ if not filepath_obj.is_absolute():
231
+ filepath_obj = self.path / filepath_obj
232
+
233
+ # Create directory if it doesn't exist
234
+ os.makedirs(filepath_obj.parent, exist_ok=True)
235
+
236
+ # Get fieldnames from the first record
237
+ fieldnames = data[0].keys()
238
+
239
+ # Write to CSV
240
+ with open(filepath_obj, 'w', newline='') as csvfile:
241
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
242
+ writer.writeheader()
243
+ writer.writerows(data)
244
+
245
+ if verbose:
246
+ print(f"Saved {len(data)} records to {filepath_obj}")
247
+
248
+ return data
datamule/submission.py CHANGED
@@ -1,16 +1,38 @@
1
1
  from pathlib import Path
2
2
  import json
3
3
  from .document import Document
4
+ from secsgml import parse_sgml_submission_into_memory
5
+ from pathlib import Path
4
6
 
5
7
  class Submission:
6
- def __init__(self, path):
7
- self.path = Path(path)
8
- self._load_metadata()
8
+ def __init__(self, path=None,sgml_content=None,keep_document_types=None):
9
+ if path is None and sgml_content is None:
10
+ raise ValueError("Either path or sgml_content must be provided")
11
+ if path is not None and sgml_content is not None:
12
+ raise ValueError("Only one of path or sgml_content must be provided")
13
+
14
+ if sgml_content is not None:
15
+ self.path = None
16
+ self.metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
17
+ self.documents = []
18
+
19
+ for idx,doc in enumerate(self.metadata['documents']):
20
+ type = doc.get('type')
21
+
22
+ # Keep only specified types
23
+ if keep_document_types is not None and type not in keep_document_types:
24
+ continue
25
+ filename = doc.get('filename')
26
+ extension = Path(filename).suffix
27
+ self.documents.append(Document(type=type, content=raw_documents[idx], extension=extension))
28
+
29
+
30
+ if path is not None:
31
+ self.path = Path(path)
32
+ metadata_path = self.path / 'metadata.json'
33
+ with metadata_path.open('r') as f:
34
+ self.metadata = json.load(f)
9
35
 
10
- def _load_metadata(self):
11
- metadata_path = self.path / 'metadata.json'
12
- with metadata_path.open('r') as f:
13
- self.metadata = json.load(f)
14
36
 
15
37
  def document_type(self, document_type):
16
38
  # Convert single document type to list for consistent handling
@@ -19,20 +41,73 @@ class Submission:
19
41
  else:
20
42
  document_types = document_type
21
43
 
22
- for doc in self.metadata['documents']:
44
+ for idx,doc in enumerate(self.metadata['documents']):
23
45
  if doc['type'] in document_types:
46
+
47
+ # if loaded from path
48
+ if self.path is not None:
49
+ filename = doc.get('filename')
50
+ # oh we need handling here for sequences case
51
+ if filename is None:
52
+ filename = doc['sequence'] + '.txt'
53
+
54
+ document_path = self.path / filename
55
+ extension = document_path.suffix
56
+
57
+ with document_path.open('r') as f:
58
+ content = f.read()
59
+
60
+ yield Document(type=doc['type'], content=content, extension=extension)
61
+ # if loaded from sgml_content
62
+ else:
63
+ yield self.documents[idx]
64
+
65
+
66
+ def __iter__(self):
67
+ for idx,doc in enumerate(self.metadata['documents']):
68
+ # if loaded from path
69
+ if self.path is not None:
24
70
  filename = doc.get('filename')
71
+
72
+ # oh we need handling here for sequences case
25
73
  if filename is None:
26
- continue
74
+ filename = doc['sequence'] + '.txt'
27
75
 
28
76
  document_path = self.path / filename
29
- yield Document(doc['type'], document_path)
30
-
31
- def __iter__(self):
32
- for doc in self.metadata['documents']:
33
- filename = doc.get('filename')
34
- if filename is None:
35
- continue
36
-
37
- document_path = self.path / filename
38
- yield Document(doc['type'], document_path)
77
+ extension = document_path.suffix
78
+
79
+ # check if the file exists
80
+ if document_path.exists():
81
+ with document_path.open('r') as f:
82
+ content = f.read()
83
+
84
+ yield Document(type=doc['type'], content=content, extension=extension)
85
+ else:
86
+ print(f"Warning: File {document_path} does not exist likely due to keep types in downloading.")
87
+
88
+ # if loaded from sgml_content
89
+ else:
90
+ yield self.documents[idx]
91
+
92
+ # keep documents by document type
93
+ def keep(self, document_type):
94
+ # Convert single document type to list for consistent handling
95
+ if isinstance(document_type, str):
96
+ document_types = [document_type]
97
+ else:
98
+ document_types = document_type
99
+
100
+ if self.path is not None:
101
+ for doc in self.metadata['documents']:
102
+ filename = doc.get('filename')
103
+ type = doc.get('type')
104
+ if type not in document_types:
105
+ # oh we need handling here for sequences case
106
+ if filename is None:
107
+ filename = doc.sequence + '.txt'
108
+
109
+ document_path = self.path / filename
110
+ # delete the file
111
+ document_path.unlink()
112
+ else:
113
+ print("Warning: keep() method is only available when loading from path.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.1.6
3
+ Version: 1.1.8
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -1,11 +1,11 @@
1
1
  datamule/__init__.py,sha256=l6YlwT5EeRxPlCtO5Jd4I8l266rSRUJyfFe97cRtSCM,991
2
2
  datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
3
- datamule/document.py,sha256=BC8jdVy9pMOA9ghIqV5N2XJidmVNThqbBohsuSAnVoY,10813
3
+ datamule/document.py,sha256=qShyVKHQ1nSCNvSfrhAOMVXprOd1br1rFKLy52S9WnE,22007
4
4
  datamule/helper.py,sha256=xgOVnea-lUlQ5I-U0vYUp0VeKPNZehNhqjJvegA3lYE,3342
5
5
  datamule/index.py,sha256=0txvbzPcvY1GsdxA-wGdLzAByxSeE_1VyyBp9mZEQRM,2292
6
- datamule/portfolio.py,sha256=JmZlTrom_g7FXKXxWp_CiQTyC7p6_cDP08G0kFUja48,6982
7
- datamule/sheet.py,sha256=WwumRdniClGU7W3AXVLOpCdMnepLC7KMrRpQlA6_NUY,1022
8
- datamule/submission.py,sha256=JsxYlEz1Ywu6eC32OS15p4p-p8qB6SWd_rXuf2p5UfY,1247
6
+ datamule/portfolio.py,sha256=yWt5gYTjV7rJsLiPUmhc6Vmr3lfvfCR5MSpLQ_6Gdp4,7104
7
+ datamule/sheet.py,sha256=QaArtx7LpT7bwyteelJV67C-lK0RjQbGS3ka7ftdi8w,7978
8
+ datamule/submission.py,sha256=LI7Zr60YbE_tU-v2N09k2dGjfztSgplKZACT3eRUkFE,4463
9
9
  datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
11
11
  datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHhhMKp0CLqVKV5OI,438
@@ -16,20 +16,21 @@ datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNs
16
16
  datamule/sec/rss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  datamule/sec/rss/monitor.py,sha256=6r4EYaSlGu6VYErlj9zXJsIMLVie1cfacSZU-ESfuBI,18231
18
18
  datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- datamule/sec/submissions/downloader.py,sha256=HxbSkNotLLW6ROmU30rnXPlCo9gY3SoB1Z4ZWvj9FIY,2669
20
- datamule/sec/submissions/eftsquery.py,sha256=v6YMBZzksqweqHnNIllMFN-frWypAgvZPKx2FH1UrL4,22515
21
- datamule/sec/submissions/monitor.py,sha256=F24I9yn1k8ggbCJQ-Vk7go_qJHlpkBzVKFYKDs_CWLs,5287
22
- datamule/sec/submissions/streamer.py,sha256=hc61le7gGIIWp1KEaOv_PhriUxf7YYFkQrSKELlZ3pg,9748
23
- datamule/sec/submissions/textsearch.py,sha256=oEIUrcO3HW-4dcyPCiOTvM7UUimNEM4HNIb-Juvc1BQ,4642
19
+ datamule/sec/submissions/downloader.py,sha256=IB08W8-lQD5Bb0LgzrTN4Xi4HsCw24DybRLHqE1AUrU,3290
20
+ datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
21
+ datamule/sec/submissions/monitor.py,sha256=Im2kgnUehhTgyY2Vq3uk07n4Vkj4PjII_SsRDi8ehAE,5384
22
+ datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
23
+ datamule/sec/submissions/textsearch.py,sha256=-a5yIrrxxtaK10IJeywFmXuJmSndYL9VKm4SC4I9JAs,5808
24
24
  datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  datamule/sec/xbrl/downloadcompanyfacts.py,sha256=rMWRiCF9ci_gNZMJ9MC2c_PGEd-yEthawQ0CtVwWTjM,3323
26
26
  datamule/sec/xbrl/filter_xbrl.py,sha256=g9OT4zrNS0tiUJeBIwbCs_zMisOBkpFnMR3tV4Tr39Q,1316
27
27
  datamule/sec/xbrl/streamcompanyfacts.py,sha256=WyJIwuy5mNMXWpx_IkhFzDMe9MOfQ-vNkWl_JzBzFmc,3323
28
28
  datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
29
29
  datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
+ datamule/seclibrary/bq.py,sha256=C6kafFXWtm-MUjf70H1wTtpwv1Rxpcbk-Kfy8fkBPfo,6469
30
31
  datamule/seclibrary/downloader.py,sha256=Zb1TxsIz887tO3MJVP66siYVtNus89ti-g9oZ6VywrM,11500
31
32
  datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
32
- datamule-1.1.6.dist-info/METADATA,sha256=2kV79oqPrN2_HTcd4bxbJ0Q2_nP7Ta5Z79yskrST5S8,512
33
- datamule-1.1.6.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
34
- datamule-1.1.6.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
35
- datamule-1.1.6.dist-info/RECORD,,
33
+ datamule-1.1.8.dist-info/METADATA,sha256=8HRRMz6l928E5tuHXkPi1_Kf-8nfPSjWQnnfReSxdPM,512
34
+ datamule-1.1.8.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
35
+ datamule-1.1.8.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
36
+ datamule-1.1.8.dist-info/RECORD,,