datamule 0.417__cp311-cp311-macosx_10_9_universal2.whl → 0.420__cp311-cp311-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamule might be problematic. Click here for more details.
- datamule/__init__.py +9 -0
- datamule/parser/sgml_parsing/sgml_parser_cy.cpython-311-darwin.so +0 -0
- datamule/portfolio.py +12 -7
- datamule/submission.py +7 -1
- {datamule-0.417.dist-info → datamule-0.420.dist-info}/METADATA +4 -4
- {datamule-0.417.dist-info → datamule-0.420.dist-info}/RECORD +8 -9
- datamule/parser/document_parsing/company_concepts_parser.py +0 -0
- {datamule-0.417.dist-info → datamule-0.420.dist-info}/WHEEL +0 -0
- {datamule-0.417.dist-info → datamule-0.420.dist-info}/top_level.txt +0 -0
datamule/__init__.py
CHANGED
|
@@ -12,12 +12,21 @@ def __getattr__(name):
|
|
|
12
12
|
elif name == 'Parser':
|
|
13
13
|
from .parser.document_parsing.sec_parser import Parser
|
|
14
14
|
return Parser
|
|
15
|
+
elif name == 'Monitor':
|
|
16
|
+
from .monitor import Monitor
|
|
17
|
+
return Monitor
|
|
18
|
+
elif name == 'PackageUpdater':
|
|
19
|
+
from .packageupdater import PackageUpdater
|
|
20
|
+
return PackageUpdater
|
|
15
21
|
elif name == 'Submission':
|
|
16
22
|
from .submission import Submission
|
|
17
23
|
return Submission
|
|
18
24
|
elif name == 'Portfolio':
|
|
19
25
|
from .portfolio import Portfolio
|
|
20
26
|
return Portfolio
|
|
27
|
+
elif name == 'Document':
|
|
28
|
+
from .document import Document
|
|
29
|
+
return Document
|
|
21
30
|
elif name == "parse_sgml_submission":
|
|
22
31
|
from .parser.sgml_parsing.sgml_parser_cy import parse_sgml_submission
|
|
23
32
|
return parse_sgml_submission
|
|
Binary file
|
datamule/portfolio.py
CHANGED
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
+
from tqdm import tqdm
|
|
3
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
2
4
|
from .submission import Submission
|
|
3
5
|
|
|
4
6
|
class Portfolio:
|
|
5
7
|
def __init__(self, path):
|
|
6
8
|
self.path = Path(path)
|
|
7
|
-
self.
|
|
8
|
-
|
|
9
|
-
# Load
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
9
|
+
folders = [f for f in self.path.iterdir() if f.is_dir()]
|
|
10
|
+
print(f"Loading {len(folders)} submissions")
|
|
11
|
+
# Load submissions in parallel
|
|
12
|
+
with ProcessPoolExecutor() as executor:
|
|
13
|
+
# Show progress while loading
|
|
14
|
+
self.submissions = list(tqdm(
|
|
15
|
+
executor.map(Submission, folders),
|
|
16
|
+
total=len(folders),
|
|
17
|
+
desc="Loading submissions"
|
|
18
|
+
))
|
|
14
19
|
|
|
15
20
|
def __iter__(self):
|
|
16
21
|
return iter(self.submissions)
|
datamule/submission.py
CHANGED
|
@@ -51,8 +51,14 @@ class Submission:
|
|
|
51
51
|
filepath.unlink()
|
|
52
52
|
|
|
53
53
|
def document_type(self, document_type):
|
|
54
|
+
# Convert single document type to list for consistent handling
|
|
55
|
+
if isinstance(document_type, str):
|
|
56
|
+
document_types = [document_type]
|
|
57
|
+
else:
|
|
58
|
+
document_types = document_type
|
|
59
|
+
|
|
54
60
|
for doc in self.metadata['documents']:
|
|
55
|
-
if doc['TYPE']
|
|
61
|
+
if doc['TYPE'] in document_types:
|
|
56
62
|
filename = doc.get('FILENAME')
|
|
57
63
|
if filename is None:
|
|
58
64
|
continue
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datamule
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.420
|
|
4
4
|
Summary: Making it easier to use SEC filings.
|
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
|
6
6
|
Author: John Friedman
|
|
@@ -16,11 +16,11 @@ Requires-Dist: selectolax
|
|
|
16
16
|
Requires-Dist: pytz
|
|
17
17
|
Requires-Dist: zstandard
|
|
18
18
|
Provides-Extra: all
|
|
19
|
-
Requires-Dist: google-generativeai; extra == "all"
|
|
20
|
-
Requires-Dist: pandas; extra == "all"
|
|
21
19
|
Requires-Dist: flask; extra == "all"
|
|
22
|
-
Requires-Dist: openai; extra == "all"
|
|
23
20
|
Requires-Dist: psutil; extra == "all"
|
|
21
|
+
Requires-Dist: openai; extra == "all"
|
|
22
|
+
Requires-Dist: google-generativeai; extra == "all"
|
|
23
|
+
Requires-Dist: pandas; extra == "all"
|
|
24
24
|
Provides-Extra: dataset_builder
|
|
25
25
|
Requires-Dist: pandas; extra == "dataset-builder"
|
|
26
26
|
Requires-Dist: google-generativeai; extra == "dataset-builder"
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
datamule/__init__.py,sha256=
|
|
1
|
+
datamule/__init__.py,sha256=i3HnWFrqEdsK2OmvE7NUjcO05w1BuIYBV6J4cSFaN3s,2268
|
|
2
2
|
datamule/document.py,sha256=Yn8UqUjKwYPE29MrMjreHK_HY9eTqOSjPyM5B1VBrHQ,5144
|
|
3
3
|
datamule/helper.py,sha256=tr3AQWus9dHNZFKpLSglWjcb8zmm5qDXjOWACMhvMxQ,4594
|
|
4
4
|
datamule/monitor.py,sha256=mRaM8v5NgcMF9DJ1s_YBzucjrbr-3yFwW422MVml-_Q,9114
|
|
5
5
|
datamule/packageupdater.py,sha256=rw4hTnQGjZttee8QmTadg9vlhEjVp_dwdSFv8uQNvss,9584
|
|
6
|
-
datamule/portfolio.py,sha256=
|
|
7
|
-
datamule/submission.py,sha256=
|
|
6
|
+
datamule/portfolio.py,sha256=Qd_j3gGUi95Ad0g-PpjfJ3ozg-92VohMvCOvcEb0Kbw,710
|
|
7
|
+
datamule/submission.py,sha256=LlwHJL3CIMHOlaHqCfDH7UBiKCFu0tPYV-g92uPvfcY,2492
|
|
8
8
|
datamule/data/company_former_names.csv,sha256=HE9cAv-_QKFX6jT-_-D0rHmaDyQuAzL4MJwank5O1U8,706380
|
|
9
9
|
datamule/data/company_metadata.csv,sha256=yPovrCVjYwLWTU_hBUFJymp8iNO0NBYuq_QwOkRLoN8,3068599
|
|
10
10
|
datamule/data/company_tickers.csv,sha256=GW6lOP54RiGJCx-d9N5jEBy7tGVgU3zI-5xHJXrZfSI,400363
|
|
@@ -36,7 +36,6 @@ datamule/parser/document_parsing/basic_10q_parser.py,sha256=ccQc3pwBqevDb6-vBwEE
|
|
|
36
36
|
datamule/parser/document_parsing/basic_13d_parser.py,sha256=loJC97H_ccu_hWMhgNt5tvGZnN3--7tsqZxzBnWB_FY,1528
|
|
37
37
|
datamule/parser/document_parsing/basic_13g_parser.py,sha256=sWg83-QTAzUDNs45iWtpxnMxQgtC3zJlFj0R9ybZpNI,1631
|
|
38
38
|
datamule/parser/document_parsing/basic_8k_parser.py,sha256=inCSmlH_BkLK0Lkvt0kZ6EUJ0nijul_RkdXzccyOmRI,2466
|
|
39
|
-
datamule/parser/document_parsing/company_concepts_parser.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
39
|
datamule/parser/document_parsing/form_d_parser.py,sha256=dWlGeVZRzh0kfT3gVMC8eyqeQORdVV3r8KXUwEqAW3s,2036
|
|
41
40
|
datamule/parser/document_parsing/generalized_item_parser.py,sha256=67_DFb1BQbMmdHefEgoCPlEoiUT0zyxh3eBNJpjGXUk,2616
|
|
42
41
|
datamule/parser/document_parsing/generalized_xml_parser.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -48,8 +47,8 @@ datamule/parser/document_parsing/n_port_p_parser.py,sha256=GmmQFkCZt57WikUZ5Daht
|
|
|
48
47
|
datamule/parser/document_parsing/sec_parser.py,sha256=AS8H4h1sfUAdWP2gotULcjbylsYN_nHgTfkeVRyENPo,2716
|
|
49
48
|
datamule/parser/document_parsing/sgml_parser.py,sha256=tC1cL3cdVQPWbc9QtoRUYSo2wRuYNaglFaCmP57oEfA,3317
|
|
50
49
|
datamule/parser/sgml_parsing/sgml_parser_cy.c,sha256=UwXSuLGSBLKfO5bM7xTzjFLnGDV3-NNnCjgUCPAY1gk,796689
|
|
51
|
-
datamule/parser/sgml_parsing/sgml_parser_cy.cpython-311-darwin.so,sha256=
|
|
52
|
-
datamule-0.
|
|
53
|
-
datamule-0.
|
|
54
|
-
datamule-0.
|
|
55
|
-
datamule-0.
|
|
50
|
+
datamule/parser/sgml_parsing/sgml_parser_cy.cpython-311-darwin.so,sha256=hds9ZxW6dUjdyxCvqlPmuDbgRpR5Q8xGJ53aeb7seiE,362376
|
|
51
|
+
datamule-0.420.dist-info/METADATA,sha256=yl7VYR4JZIqpbkLIUYZXM4DrFs1NpKdvaffNIVYg2xo,1007
|
|
52
|
+
datamule-0.420.dist-info/WHEEL,sha256=vrdZjlh4exCGuOeHFVGYgW_9eYEDoByDCp4vVTWiG0w,115
|
|
53
|
+
datamule-0.420.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
|
54
|
+
datamule-0.420.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|