datamule 2.1.6__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,9 +13,137 @@ from pathlib import Path
13
13
  import webbrowser
14
14
  from secsgml.utils import bytes_to_str
15
15
  import tempfile
16
-
16
+ import warnings
17
17
  from .tables.tables import Tables
18
18
 
19
+ from ..tags.utils import get_cusip_using_regex, get_isin_using_regex, get_figi_using_regex,get_all_tickers, get_full_names,get_full_names_dictionary_lookup
20
+
21
+
22
+ class Tickers:
23
+ def __init__(self, document):
24
+ self.document = document
25
+ self._tickers_data = None
26
+
27
+ def _get_tickers_data(self):
28
+ """Get all tickers data once and cache it"""
29
+ if self._tickers_data is None:
30
+ # Check if document extension is supported
31
+ if self.document.extension not in ['.htm', '.html', '.txt']:
32
+ self._tickers_data = {}
33
+ else:
34
+ self._tickers_data = get_all_tickers(self.document.text)
35
+ return self._tickers_data
36
+
37
+ def __getattr__(self, exchange_name):
38
+ data = self._get_tickers_data()
39
+
40
+ if exchange_name in data:
41
+ return data[exchange_name]
42
+
43
+ return []
44
+
45
+ def __bool__(self):
46
+ """Return True if any tickers were found"""
47
+ data = self._get_tickers_data()
48
+ return bool(data.get('all', []))
49
+
50
+ def __repr__(self):
51
+ """Show the full ticker data when printed or accessed directly"""
52
+ data = self._get_tickers_data()
53
+ return str(data)
54
+
55
+ def __str__(self):
56
+ """Show the full ticker data when printed"""
57
+ data = self._get_tickers_data()
58
+ return str(data)
59
+
60
+ class Tags:
61
+ def __init__(self, document):
62
+ from ..tags.config import _active_dictionaries,_loaded_dictionaries
63
+ self.not_supported = document.extension not in ['.htm', '.html', '.txt']
64
+ self.document = document
65
+ self._tickers = None
66
+ self.dictionaries = {}
67
+ self.processors = {}
68
+
69
+ # Load global dictionaries with their data and processors
70
+ active_dicts = _active_dictionaries
71
+ for dict_name in active_dicts:
72
+ dict_info = _loaded_dictionaries[dict_name]
73
+ self.dictionaries[dict_name] = dict_info['data']
74
+ if dict_info['processor'] is not None:
75
+ self.processors[dict_name] = dict_info['processor']
76
+
77
+
78
+ def _check_support(self):
79
+ if self.not_supported:
80
+ warnings.warn(f"Document extension '{self.document.extension}' is not supported. Supported formats: .htm, .html, .txt")
81
+ return False
82
+ return True
83
+
84
+ @property
85
+ def cusips(self):
86
+ if not self._check_support():
87
+ return None
88
+
89
+ if not hasattr(self, '_cusip'):
90
+ if 'sc13dg_cusips' in self.dictionaries:
91
+ keywords = self.dictionaries['sc13dg_cusips']
92
+ self._cusip = get_cusip_using_regex(self.document.text, keywords)
93
+ else:
94
+ self._cusip = get_cusip_using_regex(self.document.text)
95
+ return self._cusip
96
+
97
+ @property
98
+ def isins(self):
99
+ if not self._check_support():
100
+ return None
101
+
102
+ if not hasattr(self, '_isin'):
103
+ if 'npx_isins' in self.dictionaries:
104
+ keywords = self.dictionaries['npx_isins']
105
+ self._isin = get_isin_using_regex(self.document.text, keywords)
106
+ else:
107
+ self._isin = get_isin_using_regex(self.document.text)
108
+ return self._isin
109
+
110
+ @property
111
+ def figis(self):
112
+ if not self._check_support():
113
+ return None
114
+
115
+ if not hasattr(self, '_figi'):
116
+ if 'npx_figis' in self.dictionaries:
117
+ keywords = self.dictionaries['npx_figis']
118
+ self._figi = get_figi_using_regex(self.document.text, keywords)
119
+ else:
120
+ self._figi = get_figi_using_regex(self.document.text)
121
+ return self._figi
122
+
123
+ @property
124
+ def tickers(self):
125
+ if self._tickers is None:
126
+ self._tickers = Tickers(self.document)
127
+ return self._tickers
128
+
129
+ @property
130
+ def persons(self):
131
+ if not self._check_support():
132
+ return None
133
+
134
+ if not hasattr(self, '_persons'):
135
+ if '8k_2024_persons' in self.processors:
136
+ # Use pre-built processor
137
+ self._persons = get_full_names_dictionary_lookup(self.document.text, self.processors['8k_2024_persons'])
138
+ elif 'ssa_baby_first_names' in self.dictionaries:
139
+ # Use regex with SSA names for validation
140
+ self._persons = get_full_names(self.document.text, self.dictionaries['ssa_baby_first_names'])
141
+ else:
142
+ # Fallback to regex without validation
143
+ self._persons = get_full_names(self.document.text)
144
+ return self._persons
145
+
146
+
19
147
  class Document:
20
148
  def __init__(self, type, content, extension,accession,filing_date,path=None):
21
149
 
@@ -34,10 +162,13 @@ class Document:
34
162
  self.path = path
35
163
 
36
164
  self.extension = extension
165
+
37
166
  # this will be filled by parsed
38
167
  self._data = None
39
168
  self._tables = None
40
169
  self._text = None
170
+
171
+ self.tags = Tags(self)
41
172
 
42
173
 
43
174
 
File without changes
@@ -0,0 +1,33 @@
1
+ from ..utils.dictionaries import download_dictionary, load_dictionary
2
+
3
+ _active_dictionaries = []
4
+ _loaded_dictionaries = {}
5
+
6
+ def set_dictionaries(dictionaries, overwrite=False):
7
+ """Set active dictionaries and load them into memory"""
8
+ global _active_dictionaries, _loaded_dictionaries
9
+ _active_dictionaries = dictionaries
10
+ _loaded_dictionaries = {}
11
+
12
+ for dict_name in dictionaries:
13
+ # Download if needed
14
+ download_dictionary(dict_name, overwrite=overwrite)
15
+ # Load raw data
16
+ raw_data = load_dictionary(dict_name)
17
+
18
+ # Create processor for dictionary lookup methods
19
+ if dict_name in ['8k_2024_persons']: # Add other dict names as needed
20
+ from flashtext import KeywordProcessor
21
+ processor = KeywordProcessor(case_sensitive=True)
22
+ for key in raw_data.keys():
23
+ processor.add_keyword(key, key)
24
+
25
+ _loaded_dictionaries[dict_name] = {
26
+ 'data': raw_data,
27
+ 'processor': processor
28
+ }
29
+ else:
30
+ _loaded_dictionaries[dict_name] = {
31
+ 'data': raw_data,
32
+ 'processor': None
33
+ }
datamule/tags/regex.py ADDED
@@ -0,0 +1,105 @@
1
+ # Exchange ticker regexes with word boundaries
2
+ nyse_regex = r"\b([A-Z]{1,4})(\.[A-Z]+)?\b"
3
+ nasdaq_regex = r"\b([A-Z]{1,5})(\.[A-Z]+)?\b"
4
+ nyse_american_regex = r"\b([A-Z]{1,5})(\.[A-Z]+)?\b"
5
+ london_stock_exchange_regex = r"\b([A-Z]{3,4})(\.[A-Z]+)?\b"
6
+ toronto_stock_exchange_regex = r"\b([A-Z]{1,5})(\.[A-Z]+)?\b"
7
+ euronext_paris_regex = r"\b([A-Z]{2,12})(\.[A-Z]+)?\b"
8
+ euronext_amsterdam_regex = r"\b([A-Z]{1,5})(\.[A-Z]+)?\b"
9
+ euronext_brussels_regex = r"\b([A-Z]{1,5})(\.[A-Z]+)?\b"
10
+ euronext_lisbon_regex = r"\b([A-Z]{3,5})(\.[A-Z]+)?\b"
11
+ euronext_milan_regex = r"\b([A-Z]{2,5})(\.[A-Z]+)?\b"
12
+ deutsche_borse_xetra_regex = r"\b([A-Z0-9]{3,6})(\.[A-Z]+)?\b"
13
+ six_swiss_exchange_regex = r"\b([A-Z]{2,6})(\.[A-Z]+)?\b"
14
+ tokyo_stock_exchange_regex = r"\b(\d{4})\b"
15
+ hong_kong_stock_exchange_regex = r"\b(\d{4,5})\b"
16
+ shanghai_stock_exchange_regex = r"\b(6\d{5})\b"
17
+ shenzhen_stock_exchange_regex = r"\b([03]\d{5})\b"
18
+ australian_securities_exchange_regex = r"\b([A-Z]{3})(\.[A-Z]+)?\b"
19
+ singapore_exchange_regex = r"\b([A-Z]\d{2}[A-Z]?)(\.[A-Z]+)?\b"
20
+ nse_bse_regex = r"\b([A-Z&]{1,10})(\.[A-Z]+)?\b"
21
+ sao_paulo_b3_regex = r"\b([A-Z]{4}\d{1,2})(\.[A-Z]+)?\b"
22
+ mexico_bmv_regex = r"\b([A-Z*]{1,7})(\.[A-Z]+)?\b"
23
+ korea_exchange_regex = r"\b(\d{6})\b"
24
+ taiwan_stock_exchange_regex = r"\b(\d{4})\b"
25
+ johannesburg_stock_exchange_regex = r"\b([A-Z]{3})(\.[A-Z]+)?\b"
26
+ tel_aviv_stock_exchange_regex = r"\b([A-Z]{4})(\.[A-Z]+)?\b"
27
+ moscow_exchange_regex = r"\b([A-Z]{4})(\.[A-Z]+)?\b"
28
+ istanbul_stock_exchange_regex = r"\b([A-Z]{5})(\.[A-Z]+)?\b"
29
+ nasdaq_stockholm_regex = r"\b([A-Z]{3,4})( [A-Z])?(\.[A-Z]+)?\b"
30
+ oslo_bors_regex = r"\b([A-Z]{3,5})(\.[A-Z]+)?\b"
31
+ otc_markets_us_regex = r"\b([A-Z]{4,5})[FY]?(\.[A-Z]+)?\b"
32
+ pink_sheets_regex = r"\b([A-Z]{4,5})(\.[A-Z]+)?\b"
33
+
34
+ ticker_regex_list = [
35
+ ("nyse", r"\b([A-Z]{1,4})(\.[A-Z]+)?\b"),
36
+ ("nasdaq", r"\b([A-Z]{1,5})(\.[A-Z]+)?\b"),
37
+ ("nyse_american", r"\b([A-Z]{1,5})(\.[A-Z]+)?\b"),
38
+ ("london_stock_exchange", r"\b([A-Z]{3,4})(\.[A-Z]+)?\b"),
39
+ ("toronto_stock_exchange", r"\b([A-Z]{1,5})(\.[A-Z]+)?\b"),
40
+ ("euronext_paris", r"\b([A-Z]{2,12})(\.[A-Z]+)?\b"),
41
+ ("euronext_amsterdam", r"\b([A-Z]{1,5})(\.[A-Z]+)?\b"),
42
+ ("euronext_brussels", r"\b([A-Z]{1,5})(\.[A-Z]+)?\b"),
43
+ ("euronext_lisbon", r"\b([A-Z]{3,5})(\.[A-Z]+)?\b"),
44
+ ("euronext_milan", r"\b([A-Z]{2,5})(\.[A-Z]+)?\b"),
45
+ ("deutsche_borse_xetra", r"\b([A-Z0-9]{3,6})(\.[A-Z]+)?\b"),
46
+ ("six_swiss_exchange", r"\b([A-Z]{2,6})(\.[A-Z]+)?\b"),
47
+ ("tokyo_stock_exchange", r"\b(\d{4})\b"),
48
+ ("hong_kong_stock_exchange", r"\b(\d{4,5})\b"),
49
+ ("shanghai_stock_exchange", r"\b(6\d{5})\b"),
50
+ ("shenzhen_stock_exchange", r"\b([03]\d{5})\b"),
51
+ ("australian_securities_exchange", r"\b([A-Z]{3})(\.[A-Z]+)?\b"),
52
+ ("singapore_exchange", r"\b([A-Z]\d{2}[A-Z]?)(\.[A-Z]+)?\b"),
53
+ ("nse_bse", r"\b([A-Z&]{1,10})(\.[A-Z]+)?\b"),
54
+ ("sao_paulo_b3", r"\b([A-Z]{4}\d{1,2})(\.[A-Z]+)?\b"),
55
+ ("mexico_bmv", r"\b([A-Z*]{1,7})(\.[A-Z]+)?\b"),
56
+ ("korea_exchange", r"\b(\d{6})\b"),
57
+ ("taiwan_stock_exchange", r"\b(\d{4})\b"),
58
+ ("johannesburg_stock_exchange", r"\b([A-Z]{3})(\.[A-Z]+)?\b"),
59
+ ("tel_aviv_stock_exchange", r"\b([A-Z]{4})(\.[A-Z]+)?\b"),
60
+ ("moscow_exchange", r"\b([A-Z]{4})(\.[A-Z]+)?\b"),
61
+ ("istanbul_stock_exchange", r"\b([A-Z]{5})(\.[A-Z]+)?\b"),
62
+ ("nasdaq_stockholm", r"\b([A-Z]{3,4})( [A-Z])?(\.[A-Z]+)?\b"),
63
+ ("oslo_bors", r"\b([A-Z]{3,5})(\.[A-Z]+)?\b"),
64
+ ("otc_markets_us", r"\b([A-Z]{4,5})[FY]?(\.[A-Z]+)?\b"),
65
+ ("pink_sheets", r"\b([A-Z]{4,5})(\.[A-Z]+)?\b"),
66
+ ]
67
+ # Security identifier regexes with word boundaries
68
+ cusip_regex = r"\b[0-9A-Z]{8}[0-9]\b"
69
+ isin_regex = r"\b[A-Z]{2}[0-9A-Z]{9}[0-9]\b"
70
+ figi_regex = r"\b[A-Z]{2}G[A-Z0-9]{8}[0-9]\b"
71
+
72
+ particles = {
73
+ # Dutch - single words only
74
+ 'van', 'der', 'den', 'de',
75
+
76
+ # German - single words only
77
+ 'von', 'zu', 'vom', 'zur', 'zum',
78
+
79
+ # Spanish - single words only
80
+ 'de', 'del', 'y',
81
+
82
+ # Portuguese - single words only
83
+ 'da', 'das', 'do', 'dos', 'e',
84
+
85
+ # French - single words only
86
+ 'de', 'du', 'des', 'le', 'la', 'les', "d'",
87
+
88
+ # Italian - single words only
89
+ 'da', 'di', 'del', 'della', 'delle', 'dei', 'degli', 'dello',
90
+
91
+ # Irish/Scottish
92
+ 'mac', 'mc', 'o',
93
+
94
+ # Arabic
95
+ 'al', 'el', 'ibn', 'bin', 'bint', 'abu',
96
+
97
+ # Other European
98
+ 'af', 'av', # Scandinavian
99
+ 'ter', # Dutch/Flemish
100
+ 'op', # Dutch
101
+ 'aan', # Dutch
102
+ 'ten', # Dutch
103
+ 'het', # Dutch
104
+ 'in', # Dutch
105
+ }
datamule/tags/utils.py ADDED
@@ -0,0 +1,145 @@
1
+ import re
2
+ from .regex import cusip_regex, isin_regex, figi_regex, ticker_regex_list
3
+ from .regex import particles
4
+ from flashtext import KeywordProcessor
5
+
6
+ def get_cusip_using_regex(text,keywords=None):
7
+ matches = []
8
+ for match in re.finditer(cusip_regex, text):
9
+ if keywords is not None:
10
+ if match.group() in keywords:
11
+ matches.append((match.group(), match.start(), match.end()))
12
+ else:
13
+ matches.append((match.group(), match.start(), match.end()))
14
+ return matches
15
+
16
+ def get_isin_using_regex(text,keywords=None):
17
+ matches = []
18
+ for match in re.finditer(isin_regex, text):
19
+ if keywords is not None:
20
+ if match.group() in keywords:
21
+ matches.append((match.group(), match.start(), match.end()))
22
+ else:
23
+ matches.append((match.group(), match.start(), match.end()))
24
+ return matches
25
+
26
+ def get_figi_using_regex(text,keywords=None):
27
+ matches = []
28
+ for match in re.finditer(figi_regex, text):
29
+ if keywords is not None:
30
+ if match.group() in keywords:
31
+ matches.append((match.group(), match.start(), match.end()))
32
+ else:
33
+ matches.append((match.group(), match.start(), match.end()))
34
+ return matches
35
+
36
+ def get_tickers_using_regex(text, regex_pattern):
37
+ """Extract tickers using the given regex pattern with position information"""
38
+ matches = []
39
+ for match in re.finditer(regex_pattern, text):
40
+ # Handle tuples from regex groups - take the first capture group
41
+ if match.groups():
42
+ ticker = match.group(1) if match.group(1) else match.group(0)
43
+ else:
44
+ ticker = match.group(0)
45
+ matches.append((ticker, match.start(), match.end()))
46
+ return matches
47
+
48
+ def get_all_tickers(text):
49
+ """Get all tickers from all exchanges organized by exchange with position info"""
50
+ result = {}
51
+ all_tickers = []
52
+
53
+ for exchange_name, regex_pattern in ticker_regex_list:
54
+ tickers = get_tickers_using_regex(text, regex_pattern)
55
+ result[exchange_name] = tickers
56
+ all_tickers.extend(tickers)
57
+
58
+ # Remove duplicates while preserving order for 'all'
59
+ # Keep track of seen ticker values (first element of tuple)
60
+ seen = set()
61
+ result['all'] = [x for x in all_tickers if not (x[0] in seen or seen.add(x[0]))]
62
+
63
+ return result
64
+
65
+ def get_ticker_regex_dict():
66
+ """Return ticker regex list as a dictionary for easy lookup"""
67
+ return dict(ticker_regex_list)
68
+
69
+ # will change in future to accomodate other datasets
70
+ def validate_full_name(full_name,keywords):
71
+ if len(full_name) == 1:
72
+ return False
73
+ # check all is upper
74
+ if all(word.isupper() for word in full_name):
75
+ return False
76
+ # check if any number in word
77
+ if any(any(char.isdigit() for char in word) for word in full_name):
78
+ return False
79
+ if any(any(char in ".,;:!?()[]" for char in word) for word in full_name):
80
+ return False
81
+
82
+ # add optional set lookups
83
+ if keywords is not None:
84
+ # return false if first word is not in keywords set
85
+ if full_name[0] not in keywords:
86
+ return False
87
+
88
+
89
+ return True
90
+
91
+ def get_full_names(text,keywords=None):
92
+ words = text.split()
93
+ full_names = []
94
+ current_pos = None
95
+ word_start_positions = []
96
+
97
+ # Calculate word positions in the original text
98
+ pos = 0
99
+ for word in words:
100
+ start = text.find(word, pos)
101
+ word_start_positions.append(start)
102
+ pos = start + len(word)
103
+
104
+ for idx, word in enumerate(words):
105
+ if current_pos is None:
106
+ if word[0].isupper():
107
+ current_pos = idx
108
+ else:
109
+ if word[0].isupper() or word.lower() in particles:
110
+ continue
111
+ else:
112
+ full_name = words[current_pos:idx]
113
+ if validate_full_name(full_name,keywords):
114
+ name_text = ' '.join(full_name)
115
+ start_pos = word_start_positions[current_pos]
116
+ # Calculate end position of the last word in the name
117
+ last_word_idx = idx - 1
118
+ end_pos = word_start_positions[last_word_idx] + len(words[last_word_idx])
119
+ full_names.append((name_text, start_pos, end_pos))
120
+
121
+ current_pos = None
122
+
123
+ # handle last case - if we're still tracking a name when we reach the end
124
+ if current_pos is not None:
125
+ full_name = words[current_pos:]
126
+ if validate_full_name(full_name,keywords):
127
+ name_text = ' '.join(full_name)
128
+ start_pos = word_start_positions[current_pos]
129
+ # Calculate end position of the last word
130
+ last_word_idx = len(words) - 1
131
+ end_pos = word_start_positions[last_word_idx] + len(words[last_word_idx])
132
+ full_names.append((name_text, start_pos, end_pos))
133
+
134
+ return full_names
135
+
136
+ # add dictionary lookup based on precomputed lists
137
+ def get_full_names_dictionary_lookup(text, processor):
138
+ """Use pre-built KeywordProcessor instead of creating new one"""
139
+ matches = []
140
+ keywords_found = processor.extract_keywords(text, span_info=True)
141
+
142
+ for keyword, start_pos, end_pos in keywords_found:
143
+ matches.append((keyword, start_pos, end_pos))
144
+
145
+ return matches
@@ -0,0 +1,76 @@
1
+ from pathlib import Path
2
+ import urllib.request
3
+ import json
4
+ urls = {
5
+ "ssa_baby_first_names": "https://raw.githubusercontent.com/john-friedman/datamule-data/master/data/dictionaries/ssa_baby_first_names.txt",
6
+ "npx_figis" : "https://raw.githubusercontent.com/john-friedman/datamule-data/master/data/dictionaries/npx_figis.txt",
7
+ "npx_isins" : "https://raw.githubusercontent.com/john-friedman/datamule-data/master/data/dictionaries/npx_isins.txt",
8
+ "sc13dg_cusips" : "https://raw.githubusercontent.com/john-friedman/datamule-data/master/data/dictionaries/sc13dg_cusips.txt",
9
+ "8k_2024_persons" : "https://raw.githubusercontent.com/john-friedman/datamule-data/master/data/dictionaries/8k_2024_persons.json"
10
+ }
11
+
12
+
13
+ def download_dictionary(name,overwrite=False):
14
+ url = urls[name]
15
+
16
+ # Create dictionaries directory in datamule folder
17
+ dict_dir = Path.home() / ".datamule" / "dictionaries"
18
+ dict_dir.mkdir(parents=True, exist_ok=True)
19
+
20
+ # check if file exists first
21
+ if not overwrite:
22
+ filename = url.split('/')[-1]
23
+ file_path = dict_dir / filename
24
+ if file_path.exists():
25
+ return
26
+
27
+ # Extract filename from URL
28
+ filename = url.split('/')[-1]
29
+ file_path = dict_dir / filename
30
+
31
+ print(f"Downloading {name} dictionary to {file_path}")
32
+ urllib.request.urlretrieve(url, file_path)
33
+ return
34
+
35
+ def load_dictionary(name):
36
+ # Get or download the dictionary file
37
+ dict_dir = Path.home() / ".datamule" / "dictionaries"
38
+ filename = urls[name].split('/')[-1]
39
+ file_path = dict_dir / filename
40
+
41
+ # Download if doesn't exist
42
+ if not file_path.exists():
43
+ download_dictionary(name)
44
+
45
+ # Load the dictionary based on name
46
+ if name == "ssa_baby_first_names":
47
+ names_set = set()
48
+ with open(file_path, 'r', encoding='utf-8') as f:
49
+ for line in f:
50
+ names_set.add(line.strip())
51
+ return names_set
52
+ elif name == "npx_figis":
53
+ figi_set = set()
54
+ with open(file_path, 'r', encoding='utf-8') as f:
55
+ for line in f:
56
+ figi_set.add(line.strip())
57
+ return figi_set
58
+ elif name == "npx_isins":
59
+ isin_set = set()
60
+ with open(file_path, 'r', encoding='utf-8') as f:
61
+ for line in f:
62
+ isin_set.add(line.strip())
63
+ return isin_set
64
+ elif name == "sc13dg_cusips":
65
+ cusip_set = set()
66
+ with open(file_path, 'r', encoding='utf-8') as f:
67
+ for line in f:
68
+ cusip_set.add(line.strip())
69
+ return cusip_set
70
+ elif name == "8k_2024_persons":
71
+
72
+ with open(file_path, 'r', encoding='utf-8') as f:
73
+ persons_list = json.load(f)
74
+ return persons_list
75
+ else:
76
+ raise ValueError("dictionary not found")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 2.1.6
3
+ Version: 2.2.1
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -19,4 +19,5 @@ Requires-Dist: secxbrl
19
19
  Requires-Dist: secsgml
20
20
  Requires-Dist: websocket-client
21
21
  Requires-Dist: company-fundamentals
22
+ Requires-Dist: flashtext
22
23
 
@@ -15,7 +15,7 @@ datamule/datamule/datamule_mysql_rds.py,sha256=Q6_h24-SNECWK60RnM6UQjUIp5dhJmfn3
15
15
  datamule/datamule/downloader.py,sha256=mVg1SApfij_9-dTpcm_YB26Bxc_Yq1FR8xv2k50MHqU,18579
16
16
  datamule/datamule/sec_connector.py,sha256=VwOaODpHoAWy8JIky6kLR1-orW_PB61RHw7pIGRpkow,3288
17
17
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- datamule/document/document.py,sha256=VDe4MBDd2zgSI7d9vepNOaeE297ndLhYH7cDVGRO7iM,15915
18
+ datamule/document/document.py,sha256=yiev4AYewjp8bPjWn9cuL43N2O11s9WUo4X2e7WUgiY,20628
19
19
  datamule/document/tables/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  datamule/document/tables/tables.py,sha256=8riSAof6o-Gxoo0SkiQAE61fw8NmzDnEhJe6dATzmvA,4487
21
21
  datamule/document/tables/tables_13fhr.py,sha256=-6tWcaTyNsb0XuW0WMBrYir9Zn1wLZL0laKxRYfPNyg,4265
@@ -48,10 +48,15 @@ datamule/sec/xbrl/streamcompanyfacts.py,sha256=Qq88PqW5_j1k3Aqrl0KRmKeF54D6Wbb6H
48
48
  datamule/sec/xbrl/xbrlmonitor.py,sha256=TKFVfSyyUUfUgFQw4WxEVs4g8Nh-2C0tygNIRmTqW3Y,5848
49
49
  datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
50
  datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
51
+ datamule/tags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
+ datamule/tags/config.py,sha256=RCYRw_voP2MrEx_iN7zjJiZ8YDa4QlzKPGpW5ZTij6U,1197
53
+ datamule/tags/regex.py,sha256=Zr1dlnb8OfecDkI2DFCI8DUBr9LI50fapQyBAYNEZrg,4487
54
+ datamule/tags/utils.py,sha256=hQpQBVAJPmys1UKVS2mqc8Z5-qO_zma5ecFXvW9DXoo,5329
51
55
  datamule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
56
  datamule/utils/construct_submissions_data.py,sha256=NB_hvfxlRXPyt4Fgc-5qA8vJRItkLhBedCSTaxwW7Jg,5887
57
+ datamule/utils/dictionaries.py,sha256=VImvQWlP8IohB76rDd83bZcT184LBOpOaXPOH46fA6Y,2795
53
58
  datamule/utils/format_accession.py,sha256=60RtqoNqoT9zSKVb1DeOv1gncJxzPTFMNW4SNOVmC_g,476
54
- datamule-2.1.6.dist-info/METADATA,sha256=eCN-rg72CAvKqZON551zZSySScovaDNfjBFGrFTOVmc,560
55
- datamule-2.1.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
56
- datamule-2.1.6.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
57
- datamule-2.1.6.dist-info/RECORD,,
59
+ datamule-2.2.1.dist-info/METADATA,sha256=aINGZMWV34SclEt-2Ij2d2848PJA7cLF6ZoBL2LwpfY,585
60
+ datamule-2.2.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
61
+ datamule-2.2.1.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
62
+ datamule-2.2.1.dist-info/RECORD,,