datamule 2.2.0__tar.gz → 2.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {datamule-2.2.0 → datamule-2.2.1}/PKG-INFO +1 -1
  2. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/document.py +9 -5
  3. datamule-2.2.1/datamule/tags/config.py +33 -0
  4. {datamule-2.2.0 → datamule-2.2.1}/datamule/tags/utils.py +3 -7
  5. {datamule-2.2.0 → datamule-2.2.1}/datamule.egg-info/PKG-INFO +1 -1
  6. {datamule-2.2.0 → datamule-2.2.1}/setup.py +1 -1
  7. datamule-2.2.0/datamule/tags/config.py +0 -16
  8. {datamule-2.2.0 → datamule-2.2.1}/datamule/__init__.py +0 -0
  9. {datamule-2.2.0 → datamule-2.2.1}/datamule/config.py +0 -0
  10. {datamule-2.2.0 → datamule-2.2.1}/datamule/data/listed_filer_metadata.csv +0 -0
  11. {datamule-2.2.0 → datamule-2.2.1}/datamule/datamule/__init__.py +0 -0
  12. {datamule-2.2.0 → datamule-2.2.1}/datamule/datamule/datamule_lookup.py +0 -0
  13. {datamule-2.2.0 → datamule-2.2.1}/datamule/datamule/datamule_mysql_rds.py +0 -0
  14. {datamule-2.2.0 → datamule-2.2.1}/datamule/datamule/downloader.py +0 -0
  15. {datamule-2.2.0 → datamule-2.2.1}/datamule/datamule/sec_connector.py +0 -0
  16. {datamule-2.2.0 → datamule-2.2.1}/datamule/datasets.py +0 -0
  17. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/__init__.py +0 -0
  18. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/__init__.py +0 -0
  19. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/tables.py +0 -0
  20. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/tables_13fhr.py +0 -0
  21. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/tables_25nse.py +0 -0
  22. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/tables_informationtable.py +0 -0
  23. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/tables_npx.py +0 -0
  24. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/tables_ownership.py +0 -0
  25. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/tables_proxyvotingrecord.py +0 -0
  26. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/tables_sbsef.py +0 -0
  27. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/tables_sdr.py +0 -0
  28. {datamule-2.2.0 → datamule-2.2.1}/datamule/document/tables/utils.py +0 -0
  29. {datamule-2.2.0 → datamule-2.2.1}/datamule/helper.py +0 -0
  30. {datamule-2.2.0 → datamule-2.2.1}/datamule/index.py +0 -0
  31. {datamule-2.2.0 → datamule-2.2.1}/datamule/mapping_dicts/__init__.py +0 -0
  32. {datamule-2.2.0 → datamule-2.2.1}/datamule/mapping_dicts/html_mapping_dicts.py +0 -0
  33. {datamule-2.2.0 → datamule-2.2.1}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  34. {datamule-2.2.0 → datamule-2.2.1}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  35. {datamule-2.2.0 → datamule-2.2.1}/datamule/package_updater.py +0 -0
  36. {datamule-2.2.0 → datamule-2.2.1}/datamule/portfolio.py +0 -0
  37. {datamule-2.2.0 → datamule-2.2.1}/datamule/portfolio_compression_utils.py +0 -0
  38. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/__init__.py +0 -0
  39. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/infrastructure/__init__.py +0 -0
  40. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  41. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/submissions/__init__.py +0 -0
  42. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/submissions/downloader.py +0 -0
  43. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/submissions/eftsquery.py +0 -0
  44. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/submissions/monitor.py +0 -0
  45. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/submissions/streamer.py +0 -0
  46. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/submissions/textsearch.py +0 -0
  47. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/utils.py +0 -0
  48. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/xbrl/__init__.py +0 -0
  49. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  50. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  51. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  52. {datamule-2.2.0 → datamule-2.2.1}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  53. {datamule-2.2.0 → datamule-2.2.1}/datamule/seclibrary/__init__.py +0 -0
  54. {datamule-2.2.0 → datamule-2.2.1}/datamule/seclibrary/bq.py +0 -0
  55. {datamule-2.2.0 → datamule-2.2.1}/datamule/sheet.py +0 -0
  56. {datamule-2.2.0 → datamule-2.2.1}/datamule/submission.py +0 -0
  57. {datamule-2.2.0 → datamule-2.2.1}/datamule/tags/__init__.py +0 -0
  58. {datamule-2.2.0 → datamule-2.2.1}/datamule/tags/regex.py +0 -0
  59. {datamule-2.2.0 → datamule-2.2.1}/datamule/utils/__init__.py +0 -0
  60. {datamule-2.2.0 → datamule-2.2.1}/datamule/utils/construct_submissions_data.py +0 -0
  61. {datamule-2.2.0 → datamule-2.2.1}/datamule/utils/dictionaries.py +0 -0
  62. {datamule-2.2.0 → datamule-2.2.1}/datamule/utils/format_accession.py +0 -0
  63. {datamule-2.2.0 → datamule-2.2.1}/datamule.egg-info/SOURCES.txt +0 -0
  64. {datamule-2.2.0 → datamule-2.2.1}/datamule.egg-info/dependency_links.txt +0 -0
  65. {datamule-2.2.0 → datamule-2.2.1}/datamule.egg-info/requires.txt +0 -0
  66. {datamule-2.2.0 → datamule-2.2.1}/datamule.egg-info/top_level.txt +0 -0
  67. {datamule-2.2.0 → datamule-2.2.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 2.2.0
3
+ Version: 2.2.1
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -64,11 +64,15 @@ class Tags:
64
64
  self.document = document
65
65
  self._tickers = None
66
66
  self.dictionaries = {}
67
+ self.processors = {}
67
68
 
68
- # Load global dictionaries with their data
69
+ # Load global dictionaries with their data and processors
69
70
  active_dicts = _active_dictionaries
70
71
  for dict_name in active_dicts:
71
- self.dictionaries[dict_name] = _loaded_dictionaries[dict_name]
72
+ dict_info = _loaded_dictionaries[dict_name]
73
+ self.dictionaries[dict_name] = dict_info['data']
74
+ if dict_info['processor'] is not None:
75
+ self.processors[dict_name] = dict_info['processor']
72
76
 
73
77
 
74
78
  def _check_support(self):
@@ -128,9 +132,9 @@ class Tags:
128
132
  return None
129
133
 
130
134
  if not hasattr(self, '_persons'):
131
- if '8k_2024_persons' in self.dictionaries:
132
- # Use FlashText dictionary lookup for 8K persons
133
- self._persons = get_full_names_dictionary_lookup(self.document.text, self.dictionaries['8k_2024_persons'])
135
+ if '8k_2024_persons' in self.processors:
136
+ # Use pre-built processor
137
+ self._persons = get_full_names_dictionary_lookup(self.document.text, self.processors['8k_2024_persons'])
134
138
  elif 'ssa_baby_first_names' in self.dictionaries:
135
139
  # Use regex with SSA names for validation
136
140
  self._persons = get_full_names(self.document.text, self.dictionaries['ssa_baby_first_names'])
@@ -0,0 +1,33 @@
1
+ from ..utils.dictionaries import download_dictionary, load_dictionary
2
+
3
+ _active_dictionaries = []
4
+ _loaded_dictionaries = {}
5
+
6
+ def set_dictionaries(dictionaries, overwrite=False):
7
+ """Set active dictionaries and load them into memory"""
8
+ global _active_dictionaries, _loaded_dictionaries
9
+ _active_dictionaries = dictionaries
10
+ _loaded_dictionaries = {}
11
+
12
+ for dict_name in dictionaries:
13
+ # Download if needed
14
+ download_dictionary(dict_name, overwrite=overwrite)
15
+ # Load raw data
16
+ raw_data = load_dictionary(dict_name)
17
+
18
+ # Create processor for dictionary lookup methods
19
+ if dict_name in ['8k_2024_persons']: # Add other dict names as needed
20
+ from flashtext import KeywordProcessor
21
+ processor = KeywordProcessor(case_sensitive=True)
22
+ for key in raw_data.keys():
23
+ processor.add_keyword(key, key)
24
+
25
+ _loaded_dictionaries[dict_name] = {
26
+ 'data': raw_data,
27
+ 'processor': processor
28
+ }
29
+ else:
30
+ _loaded_dictionaries[dict_name] = {
31
+ 'data': raw_data,
32
+ 'processor': None
33
+ }
@@ -134,14 +134,10 @@ def get_full_names(text,keywords=None):
134
134
  return full_names
135
135
 
136
136
  # add dictionary lookup based on precomputed lists
137
- def get_full_names_dictionary_lookup(text, dictionary):
138
- keyword_processor = KeywordProcessor(case_sensitive=True)
139
-
140
- for key in dictionary.keys():
141
- keyword_processor.add_keyword(key, key)
142
-
137
+ def get_full_names_dictionary_lookup(text, processor):
138
+ """Use pre-built KeywordProcessor instead of creating new one"""
143
139
  matches = []
144
- keywords_found = keyword_processor.extract_keywords(text, span_info=True)
140
+ keywords_found = processor.extract_keywords(text, span_info=True)
145
141
 
146
142
  for keyword, start_pos, end_pos in keywords_found:
147
143
  matches.append((keyword, start_pos, end_pos))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 2.2.0
3
+ Version: 2.2.1
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -32,7 +32,7 @@ if not os.path.exists(file_path):
32
32
  setup(
33
33
  name="datamule",
34
34
  author="John Friedman",
35
- version="2.2.0",
35
+ version="2.2.1",
36
36
  description="Work with SEC submissions at scale.",
37
37
  packages=find_packages(include=['datamule', 'datamule.*']),
38
38
  url="https://github.com/john-friedman/datamule-python",
@@ -1,16 +0,0 @@
1
- from ..utils.dictionaries import download_dictionary, load_dictionary
2
-
3
- _active_dictionaries = []
4
- _loaded_dictionaries = {}
5
-
6
- def set_dictionaries(dictionaries, overwrite=False):
7
- """Set active dictionaries and load them into memory"""
8
- global _active_dictionaries, _loaded_dictionaries
9
- _active_dictionaries = dictionaries
10
- _loaded_dictionaries = {}
11
-
12
- for dict_name in dictionaries:
13
- # Download if needed
14
- download_dictionary(dict_name, overwrite=overwrite)
15
- # Load into memory
16
- _loaded_dictionaries[dict_name] = load_dictionary(dict_name)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes