datamule 0.380__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. datamule/__init__.py +46 -86
  2. datamule/book.py +16 -0
  3. datamule/config.py +29 -0
  4. datamule/data/company_former_names.csv +8148 -8148
  5. datamule/data/company_metadata.csv +10049 -10049
  6. datamule/data/company_tickers.csv +9999 -10168
  7. datamule/data/sec-glossary.csv +728 -728
  8. datamule/data/xbrl_descriptions.csv +10024 -10024
  9. datamule/document.py +278 -0
  10. datamule/downloader/downloader.py +374 -0
  11. datamule/downloader/premiumdownloader.py +335 -0
  12. datamule/helper.py +123 -136
  13. datamule/mapping_dicts/txt_mapping_dicts.py +232 -0
  14. datamule/mapping_dicts/xml_mapping_dicts.py +19 -0
  15. datamule/monitor.py +238 -0
  16. datamule/mulebot/__init__.py +1 -1
  17. datamule/mulebot/helper.py +34 -34
  18. datamule/mulebot/mulebot.py +129 -129
  19. datamule/mulebot/mulebot_server/server.py +86 -86
  20. datamule/mulebot/mulebot_server/static/css/minimalist.css +173 -173
  21. datamule/mulebot/mulebot_server/static/scripts/artifacts.js +67 -67
  22. datamule/mulebot/mulebot_server/static/scripts/chat.js +91 -91
  23. datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js +55 -55
  24. datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js +14 -14
  25. datamule/mulebot/mulebot_server/static/scripts/main.js +56 -56
  26. datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js +26 -26
  27. datamule/mulebot/mulebot_server/static/scripts/suggestions.js +46 -46
  28. datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js +128 -128
  29. datamule/mulebot/mulebot_server/static/scripts/utils.js +27 -27
  30. datamule/mulebot/mulebot_server/templates/chat-minimalist.html +90 -90
  31. datamule/mulebot/search.py +51 -51
  32. datamule/mulebot/tools.py +82 -82
  33. datamule/packageupdater.py +207 -0
  34. datamule/portfolio.py +106 -0
  35. datamule/submission.py +76 -0
  36. datamule-1.0.0.dist-info/METADATA +27 -0
  37. datamule-1.0.0.dist-info/RECORD +40 -0
  38. {datamule-0.380.dist-info → datamule-1.0.0.dist-info}/WHEEL +1 -1
  39. datamule/data/filing_types.csv +0 -485
  40. datamule/data/ftd_locations.csv +0 -388
  41. datamule/datamule_api.py +0 -21
  42. datamule/dataset_builder/_init.py +0 -1
  43. datamule/dataset_builder/dataset_builder.py +0 -260
  44. datamule/downloader/__init__.py +0 -0
  45. datamule/downloader/dropbox_downloader.py +0 -225
  46. datamule/downloader/ftd.py +0 -216
  47. datamule/downloader/information_table_13f.py +0 -231
  48. datamule/downloader/sec_downloader.py +0 -635
  49. datamule/filing_viewer/__init__.py +0 -1
  50. datamule/filing_viewer/filing_viewer.py +0 -256
  51. datamule/global_vars.py +0 -202
  52. datamule/parser/__init__.py +0 -1
  53. datamule/parser/basic_10k_parser.py +0 -82
  54. datamule/parser/basic_10q_parser.py +0 -73
  55. datamule/parser/basic_13d_parser.py +0 -58
  56. datamule/parser/basic_13g_parser.py +0 -61
  57. datamule/parser/basic_8k_parser.py +0 -84
  58. datamule/parser/company_concepts_parser.py +0 -0
  59. datamule/parser/form_d_parser.py +0 -70
  60. datamule/parser/generalized_item_parser.py +0 -78
  61. datamule/parser/generalized_xml_parser.py +0 -0
  62. datamule/parser/helper.py +0 -75
  63. datamule/parser/information_table_parser_13fhr.py +0 -41
  64. datamule/parser/insider_trading_parser.py +0 -158
  65. datamule/parser/mappings.py +0 -95
  66. datamule/parser/n_port_p_parser.py +0 -70
  67. datamule/parser/sec_parser.py +0 -79
  68. datamule/parser/sgml_parser.py +0 -180
  69. datamule/sec_filing.py +0 -126
  70. datamule/sec_search.py +0 -20
  71. datamule-0.380.dist-info/METADATA +0 -110
  72. datamule-0.380.dist-info/RECORD +0 -61
  73. {datamule-0.380.dist-info → datamule-1.0.0.dist-info}/top_level.txt +0 -0
datamule/__init__.py CHANGED
@@ -1,87 +1,47 @@
1
- # datamule/__init__.py
2
- import sys
3
- from importlib.util import find_spec
4
- from functools import lru_cache
5
-
6
- # Lazy load nest_asyncio only when needed
7
- def _setup_jupyter():
8
- """Setup Jupyter-specific configurations if needed."""
9
- if _is_jupyter():
10
- import nest_asyncio
11
- nest_asyncio.apply()
12
-
13
- def _is_jupyter():
14
- """Check if the code is running in a Jupyter environment."""
15
- try:
16
- shell = get_ipython().__class__.__name__
17
- return shell == 'ZMQInteractiveShell'
18
- except NameError:
19
- return False
20
-
21
- # Lazy loading for main components
22
- @lru_cache(None)
23
- def get_downloader():
24
- from .downloader.sec_downloader import Downloader
25
- return Downloader
26
-
27
- @lru_cache(None)
28
- def get_parser():
29
- from .parser.sec_parser import Parser
30
- return Parser
31
-
32
- @lru_cache(None)
33
- def get_filing():
34
- from .sec_filing import Filing
35
- return Filing
36
-
37
- @lru_cache(None)
38
- def get_dataset_builder():
39
- if find_spec('pandas') is not None:
40
- try:
41
- from .dataset_builder.dataset_builder import DatasetBuilder
42
- return DatasetBuilder
43
- except ImportError:
44
- return None
45
- return None
46
-
47
- # Helper functions that can be imported directly
48
- from .datamule_api import parse_textual_filing
49
- from .helper import load_package_csv, load_package_dataset
50
- from .global_vars import *
51
- from .parser.sgml_parser import parse_submission
52
-
53
- # Define classes with delayed initialization
54
- class Downloader:
55
- def __new__(cls, *args, **kwargs):
56
- return get_downloader()(*args, **kwargs)
57
-
58
- class Parser:
59
- def __new__(cls, *args, **kwargs):
60
- return get_parser()(*args, **kwargs)
61
-
62
- class Filing:
63
- def __new__(cls, *args, **kwargs):
64
- return get_filing()(*args, **kwargs)
65
-
66
- class DatasetBuilder:
67
- def __new__(cls, *args, **kwargs):
68
- builder_cls = get_dataset_builder()
69
- if builder_cls is None:
70
- raise ImportError(
71
- "DatasetBuilder requires pandas. "
72
- "Install with: pip install datamule[dataset_builder]"
73
- )
74
- return builder_cls(*args, **kwargs)
75
-
76
- # Set up Jupyter support only when imported
77
- _setup_jupyter()
78
-
79
- __all__ = [
80
- 'Downloader',
81
- 'parse_textual_filing',
82
- 'load_package_csv',
83
- 'load_package_dataset',
84
- 'Parser',
85
- 'Filing',
86
- 'DatasetBuilder'
1
+ from .downloader.downloader import Downloader
2
+ from .downloader.premiumdownloader import PremiumDownloader
3
+ from .monitor import Monitor
4
+ from .packageupdater import PackageUpdater
5
+ from .submission import Submission
6
+ from .portfolio import Portfolio
7
+ from .document import Document
8
+ from secsgml import parse_sgml_submission
9
+ from .helper import load_package_csv, load_package_dataset
10
+ from .config import Config
11
+
12
+
13
+ # Keep the notebook environment setup
14
+ def _is_notebook_env():
15
+ """Check if the code is running in a Jupyter or Colab environment."""
16
+ try:
17
+ shell = get_ipython().__class__.__name__
18
+ return shell in ('ZMQInteractiveShell', 'Shell', 'Google.Colab')
19
+ except NameError:
20
+ return False
21
+
22
+ from functools import lru_cache
23
+
24
+ @lru_cache(maxsize=1)
25
+ def _setup_notebook_env():
26
+ """Setup Jupyter/Colab-specific configurations if needed."""
27
+ if _is_notebook_env():
28
+ import nest_asyncio
29
+ nest_asyncio.apply()
30
+
31
+ # Set up notebook environment
32
+ _setup_notebook_env()
33
+
34
+ __all__ = [
35
+ 'Downloader',
36
+ 'PremiumDownloader',
37
+ 'load_package_csv',
38
+ 'load_package_dataset',
39
+ 'Filing',
40
+ 'Portfolio',
41
+ 'Monitor',
42
+ 'PackageUpdater',
43
+ 'Submission',
44
+ 'Document',
45
+ 'parse_sgml_submission',
46
+ 'Config'
87
47
  ]
datamule/book.py ADDED
@@ -0,0 +1,16 @@
1
+ # Streams data rather than downloading it.
2
+ # additional functionality such as query by xbrl, and other db
3
+ # also this is basically our experimental rework of portfolio w/o disturbing existing users
4
+ # this is highly experimental and may not work as expected
5
+ # only for datamule source
6
+ # likely new bottleneck will be local parsing() - will be bypassed in future when we have parsed archive
7
+ # wow parsed archive is going to be crazy fast - like every 10k in 1 minute.
8
+
9
+ class Book():
10
+ pass
11
+ def process_submissions(self,cik,ticker,sic,submission_type,document_type,date,
12
+ xbrl_query={},
13
+ metadata_callback=None,
14
+ document_callback=None,):
15
+ # grabs data and processes it
16
+ pass
datamule/config.py ADDED
@@ -0,0 +1,29 @@
1
+ import json
2
+ import os
3
+
4
+ class Config:
5
+ def __init__(self):
6
+ self.config_path = os.path.expanduser("~/.datamule/config.json")
7
+ self._ensure_config_exists()
8
+
9
+ def _ensure_config_exists(self):
10
+ os.makedirs(os.path.dirname(self.config_path), exist_ok=True)
11
+ if not os.path.exists(self.config_path):
12
+ self._save_config({"default_source": None})
13
+
14
+ def _save_config(self, config):
15
+ with open(self.config_path, 'w') as f:
16
+ json.dump(config, f)
17
+
18
+ def set_default_source(self, source):
19
+ config = self._load_config()
20
+ config["default_source"] = source
21
+ self._save_config(config)
22
+
23
+ def get_default_source(self):
24
+ config = self._load_config()
25
+ return config.get("default_source")
26
+
27
+ def _load_config(self):
28
+ with open(self.config_path) as f:
29
+ return json.load(f)