datamule 0.417__cp310-cp310-win_amd64.whl → 0.420__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamule might be problematic. Click here for more details.

datamule/__init__.py CHANGED
@@ -12,12 +12,21 @@ def __getattr__(name):
12
12
  elif name == 'Parser':
13
13
  from .parser.document_parsing.sec_parser import Parser
14
14
  return Parser
15
+ elif name == 'Monitor':
16
+ from .monitor import Monitor
17
+ return Monitor
18
+ elif name == 'PackageUpdater':
19
+ from .packageupdater import PackageUpdater
20
+ return PackageUpdater
15
21
  elif name == 'Submission':
16
22
  from .submission import Submission
17
23
  return Submission
18
24
  elif name == 'Portfolio':
19
25
  from .portfolio import Portfolio
20
26
  return Portfolio
27
+ elif name == 'Document':
28
+ from .document import Document
29
+ return Document
21
30
  elif name == "parse_sgml_submission":
22
31
  from .parser.sgml_parsing.sgml_parser_cy import parse_sgml_submission
23
32
  return parse_sgml_submission
datamule/portfolio.py CHANGED
@@ -1,16 +1,21 @@
1
1
  from pathlib import Path
2
+ from tqdm import tqdm
3
+ from concurrent.futures import ProcessPoolExecutor
2
4
  from .submission import Submission
3
5
 
4
6
  class Portfolio:
5
7
  def __init__(self, path):
6
8
  self.path = Path(path)
7
- self.submissions = []
8
-
9
- # Load all subdirectories as submissions
10
- for folder in self.path.iterdir():
11
- if folder.is_dir():
12
- self.submissions.append(Submission(folder))
13
-
9
+ folders = [f for f in self.path.iterdir() if f.is_dir()]
10
+ print(f"Loading {len(folders)} submissions")
11
+ # Load submissions in parallel
12
+ with ProcessPoolExecutor() as executor:
13
+ # Show progress while loading
14
+ self.submissions = list(tqdm(
15
+ executor.map(Submission, folders),
16
+ total=len(folders),
17
+ desc="Loading submissions"
18
+ ))
14
19
 
15
20
  def __iter__(self):
16
21
  return iter(self.submissions)
datamule/submission.py CHANGED
@@ -51,8 +51,14 @@ class Submission:
51
51
  filepath.unlink()
52
52
 
53
53
  def document_type(self, document_type):
54
+ # Convert single document type to list for consistent handling
55
+ if isinstance(document_type, str):
56
+ document_types = [document_type]
57
+ else:
58
+ document_types = document_type
59
+
54
60
  for doc in self.metadata['documents']:
55
- if doc['TYPE'] == document_type:
61
+ if doc['TYPE'] in document_types:
56
62
  filename = doc.get('FILENAME')
57
63
  if filename is None:
58
64
  continue
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 0.417
3
+ Version: 0.420
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -16,11 +16,11 @@ Requires-Dist: selectolax
16
16
  Requires-Dist: pytz
17
17
  Requires-Dist: zstandard
18
18
  Provides-Extra: all
19
- Requires-Dist: openai; extra == "all"
20
- Requires-Dist: flask; extra == "all"
21
- Requires-Dist: psutil; extra == "all"
22
19
  Requires-Dist: pandas; extra == "all"
20
+ Requires-Dist: flask; extra == "all"
21
+ Requires-Dist: openai; extra == "all"
23
22
  Requires-Dist: google-generativeai; extra == "all"
23
+ Requires-Dist: psutil; extra == "all"
24
24
  Provides-Extra: dataset_builder
25
25
  Requires-Dist: pandas; extra == "dataset-builder"
26
26
  Requires-Dist: google-generativeai; extra == "dataset-builder"
@@ -1,10 +1,10 @@
1
- datamule/__init__.py,sha256=c5Ozl19hzvyMqG75Zl3XFHIBQihxfB47uoTm7EfOOTQ,2033
1
+ datamule/__init__.py,sha256=Li3iau_u87wQQhoPliSTTpGaf3OMf5jIvqtHFJmCvnw,2338
2
2
  datamule/document.py,sha256=6xEaI-32AQiBxX3gZcX4Qr49bgvcvLviFAwUGpTwtr0,5273
3
3
  datamule/helper.py,sha256=8HOjB3Y7svw_zjEY-AY5JKOJ-LrBiuQMPyok3MH6CCg,4716
4
4
  datamule/monitor.py,sha256=WVds1HGV_ojYgWmo0b4Dsiv9mzZ85HHnCucH-7XoUw8,9350
5
5
  datamule/packageupdater.py,sha256=qVabK4YwSd62OjIvNY59hcQNgZCi4R2PRgCenq7THgE,9790
6
- datamule/portfolio.py,sha256=is7QY2QkK9S2U2yZ6INC8TmLqIy5E1i0TUAlVT-Qzhg,440
7
- datamule/submission.py,sha256=TNaj-epcpuV8JLRwawForQRFUAlqbyvRwAykj6xLANE,2334
6
+ datamule/portfolio.py,sha256=T6V04T0WmN4Wy7cudiv3c2BbSxpUTt3mI3jZOsM6kUM,730
7
+ datamule/submission.py,sha256=sB6tidsAdaqP5VIQEFPq6PjLTgmD-crgdNviaOpiqlU,2558
8
8
  datamule/data/company_former_names.csv,sha256=zTBWdV12_JE3aROFOMrFNTHLPW_M4TDruxtl15-XfA0,714528
9
9
  datamule/data/company_metadata.csv,sha256=X7uSIwConqC0sz-moIhXIISg6FI7GLGSlvAfDDf8Sd0,3078648
10
10
  datamule/data/company_tickers.csv,sha256=ihU6aNFriN0lADloCO85Op04deFk3qVcLZ0EJhi5QVo,410362
@@ -36,7 +36,6 @@ datamule/parser/document_parsing/basic_10q_parser.py,sha256=kZWGljvC0TYLh4eWl4JQ
36
36
  datamule/parser/document_parsing/basic_13d_parser.py,sha256=1nxBi9KFJuzw7CeEXpJFDzYVUd6rj4eCYiKHsGjK2b4,1585
37
37
  datamule/parser/document_parsing/basic_13g_parser.py,sha256=H9_MuOgkYVTIGwhj9w_WIpfMDdgakE_qs0Y3fvaUj_E,1691
38
38
  datamule/parser/document_parsing/basic_8k_parser.py,sha256=fzf8q9LOpBMHGWw-sfqUq3pyFZBlw47nLJBQWPhtGGg,2549
39
- datamule/parser/document_parsing/company_concepts_parser.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
39
  datamule/parser/document_parsing/form_d_parser.py,sha256=NTAfC8W3i2y7aIofXoLlAbY-4F6QVELYfIPIrVErjVY,2105
41
40
  datamule/parser/document_parsing/generalized_item_parser.py,sha256=M2bmYivSXe0POyBtDlPMykyyCgG8n1egRpJuZtZTR_g,2694
42
41
  datamule/parser/document_parsing/generalized_xml_parser.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -48,8 +47,8 @@ datamule/parser/document_parsing/n_port_p_parser.py,sha256=T6GliMm-TETPsFM-hDKt1
48
47
  datamule/parser/document_parsing/sec_parser.py,sha256=YewOdOsi0P25teQuxS5DNEND9ZCyxE2ewK1DoP9mPto,2788
49
48
  datamule/parser/document_parsing/sgml_parser.py,sha256=ASpe1SzgPj4qk0VOmmuMiEQeatjcwZzsuO3MvsYCHhc,3410
50
49
  datamule/parser/sgml_parsing/sgml_parser_cy.c,sha256=vxLnjpUgZ5LLvBvzYI_CZVxjpgRpulnzj3EFQG5eB8g,797203
51
- datamule/parser/sgml_parsing/sgml_parser_cy.cp310-win_amd64.pyd,sha256=rgIru67F1CqieSN0D01VU9fPt0hhSMRp3RX_sNWXR0E,121856
52
- datamule-0.417.dist-info/METADATA,sha256=P7gIs6NrPGm3pRcDeR77InFJFgrjNKO6XkNUCN3pj7U,1039
53
- datamule-0.417.dist-info/WHEEL,sha256=NVXpD7b4Gxps0cd2ds5rr5TG8W4ApEwx_i5J99qMZ5E,102
54
- datamule-0.417.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
55
- datamule-0.417.dist-info/RECORD,,
50
+ datamule/parser/sgml_parsing/sgml_parser_cy.cp310-win_amd64.pyd,sha256=HiCnYkg0dL4EKTluT4AfuvAww2kumhxYXqIfb_ZfRmA,121856
51
+ datamule-0.420.dist-info/METADATA,sha256=rDPjA36yLxZDb_Zk6heYW9a1XP88ts9zbOQCqCPbi0s,1039
52
+ datamule-0.420.dist-info/WHEEL,sha256=NVXpD7b4Gxps0cd2ds5rr5TG8W4ApEwx_i5J99qMZ5E,102
53
+ datamule-0.420.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
54
+ datamule-0.420.dist-info/RECORD,,