datamule 1.2.9__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/data/listed_filer_metadata.csv +7639 -0
- datamule/helper.py +59 -50
- datamule/sec/submissions/monitor.py +1 -1
- {datamule-1.2.9.dist-info → datamule-1.3.0.dist-info}/METADATA +2 -3
- {datamule-1.2.9.dist-info → datamule-1.3.0.dist-info}/RECORD +7 -6
- {datamule-1.2.9.dist-info → datamule-1.3.0.dist-info}/WHEEL +0 -0
- {datamule-1.2.9.dist-info → datamule-1.3.0.dist-info}/top_level.txt +0 -0
datamule/helper.py
CHANGED
@@ -1,20 +1,32 @@
|
|
1
1
|
from functools import lru_cache
|
2
2
|
import csv
|
3
3
|
from pathlib import Path
|
4
|
+
import os
|
4
5
|
|
5
6
|
def _load_package_csv(name):
|
6
|
-
"""Load CSV files from
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
7
|
+
"""Load CSV files from package data directory"""
|
8
|
+
# First try to load from the package data directory
|
9
|
+
try:
|
10
|
+
package_dir = os.path.dirname(os.path.dirname(__file__))
|
11
|
+
csv_path = os.path.join(package_dir, "data", f"{name}.csv")
|
12
|
+
|
13
|
+
# Fallback to the legacy location
|
14
|
+
if not os.path.exists(csv_path):
|
15
|
+
csv_path = Path.home() / ".datamule" / f"{name}.csv"
|
16
|
+
|
17
|
+
data = []
|
18
|
+
with open(csv_path, 'r') as csvfile:
|
19
|
+
csv_reader = csv.DictReader(csvfile)
|
20
|
+
for row in csv_reader:
|
21
|
+
data.append(row)
|
22
|
+
|
23
|
+
return data
|
16
24
|
|
17
|
-
|
25
|
+
except FileNotFoundError:
|
26
|
+
raise FileNotFoundError(
|
27
|
+
f"Required data file '{name}.csv' not found. "
|
28
|
+
f"This file should be in the datamule package directory or in ~/.datamule/"
|
29
|
+
)
|
18
30
|
|
19
31
|
def load_package_dataset(dataset):
|
20
32
|
if dataset =='listed_filer_metadata':
|
@@ -39,8 +51,6 @@ def get_cik_from_dataset(dataset_name, key, value):
|
|
39
51
|
|
40
52
|
return result
|
41
53
|
|
42
|
-
|
43
|
-
|
44
54
|
@lru_cache(maxsize=128)
|
45
55
|
def get_ciks_from_metadata_filters(**kwargs):
|
46
56
|
"""Get CIKs from listed_filer_metadata.csv that match all provided filters."""
|
@@ -67,46 +77,45 @@ def get_ciks_from_metadata_filters(**kwargs):
|
|
67
77
|
|
68
78
|
return list(result_ciks)
|
69
79
|
|
70
|
-
|
71
80
|
def _process_cik_and_metadata_filters(cik=None, ticker=None, **kwargs):
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
81
|
+
"""
|
82
|
+
Helper method to process CIK, ticker, and metadata filters.
|
83
|
+
Returns a list of CIKs after processing.
|
84
|
+
"""
|
85
|
+
# Input validation
|
86
|
+
if cik is not None and ticker is not None:
|
87
|
+
raise ValueError("Only one of cik or ticker should be provided, not both.")
|
88
|
+
|
89
|
+
if 'tickers' in kwargs:
|
90
|
+
raise ValueError("Use 'ticker' instead of 'tickers'.")
|
79
91
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
92
|
+
# Convert ticker to CIK if provided
|
93
|
+
if ticker is not None:
|
94
|
+
if isinstance(ticker, str):
|
95
|
+
ticker = [ticker]
|
96
|
+
|
97
|
+
cik = []
|
98
|
+
for t in ticker:
|
99
|
+
ticker_ciks = get_cik_from_dataset('listed_filer_metadata', 'ticker', t)
|
100
|
+
if ticker_ciks:
|
101
|
+
cik.extend(ticker_ciks)
|
84
102
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
103
|
+
# Normalize CIK format
|
104
|
+
if cik is not None:
|
105
|
+
if isinstance(cik, str):
|
106
|
+
cik = [int(cik)]
|
107
|
+
elif isinstance(cik, int):
|
108
|
+
cik = [cik]
|
109
|
+
elif isinstance(cik, list):
|
110
|
+
cik = [int(x) for x in cik]
|
90
111
|
|
91
|
-
|
112
|
+
# Process metadata filters if provided
|
113
|
+
if kwargs:
|
114
|
+
metadata_ciks = get_ciks_from_metadata_filters(**kwargs)
|
92
115
|
|
93
|
-
# Normalize CIK format
|
94
116
|
if cik is not None:
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
cik = [int(x) for x in cik]
|
101
|
-
|
102
|
-
# Process metadata filters if provided
|
103
|
-
if kwargs:
|
104
|
-
metadata_ciks = get_ciks_from_metadata_filters(**kwargs)
|
105
|
-
|
106
|
-
if cik is not None:
|
107
|
-
cik = list(set(cik).intersection(metadata_ciks))
|
108
|
-
else:
|
109
|
-
cik = metadata_ciks
|
110
|
-
|
111
|
-
return cik
|
112
|
-
|
117
|
+
cik = list(set(cik).intersection(metadata_ciks))
|
118
|
+
else:
|
119
|
+
cik = metadata_ciks
|
120
|
+
|
121
|
+
return cik
|
@@ -48,7 +48,7 @@ async def poll_rss(limiter):
|
|
48
48
|
|
49
49
|
def clean_efts_hits(hits):
|
50
50
|
# clean hits
|
51
|
-
hits = [{'accession': int(hit['_source']['adsh'].replace('-','')), 'filing_date': hit['_source']['file_date'], 'ciks': hit['_source']['ciks']} for hit in hits]
|
51
|
+
hits = [{'accession': int(hit['_source']['adsh'].replace('-','')), 'filing_date': hit['_source']['file_date'], 'ciks': hit['_source']['ciks'], 'submission_type': hit['_source']['file_type']} for hit in hits]
|
52
52
|
return hits
|
53
53
|
|
54
54
|
class Monitor():
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: datamule
|
3
|
-
Version: 1.
|
4
|
-
Summary:
|
3
|
+
Version: 1.3.0
|
4
|
+
Summary: Work with SEC submissions at scale.
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
6
6
|
Author: John Friedman
|
7
7
|
Requires-Dist: aiohttp
|
@@ -16,5 +16,4 @@ Requires-Dist: pytz
|
|
16
16
|
Requires-Dist: zstandard
|
17
17
|
Requires-Dist: doc2dict
|
18
18
|
Requires-Dist: secsgml
|
19
|
-
Requires-Dist: lxml
|
20
19
|
|
@@ -1,11 +1,12 @@
|
|
1
1
|
datamule/__init__.py,sha256=glzwBeGJEE6-TG7mRule9GH6L59XaIRR9T7ALcdpMus,1067
|
2
2
|
datamule/config.py,sha256=Y--CVv7JcgrjJkMOSLrvm2S8B9ost6RMSkGviP-MKtg,883
|
3
|
-
datamule/helper.py,sha256=
|
3
|
+
datamule/helper.py,sha256=BB4v73HDd5UWqG212UvSKGTTEW_ugohFPGKAiMboQ3s,4050
|
4
4
|
datamule/index.py,sha256=_7Ox5hyF_7RWdblVFr5rNyv_ARwBP7VY4f703pk9qQ8,2074
|
5
5
|
datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
|
6
6
|
datamule/portfolio.py,sha256=8fiK-vfZM5-NJSvOEsDR2YDb-2njjzFk6l7BiRyrzOM,7168
|
7
7
|
datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
|
8
8
|
datamule/submission.py,sha256=Yh5nG3ioumhl6z30wJdIEmKjDDNSuo0r2xycZSIaeIg,11035
|
9
|
+
datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
|
9
10
|
datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
11
|
datamule/document/document.py,sha256=menUFoeWwiY0rJnBkQiqY4NWnO0J17-qs8jFvO_1jiY,9969
|
11
12
|
datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
|
@@ -44,7 +45,7 @@ datamule/sec/infrastructure/submissions_metadata.py,sha256=f1KarzFSryKm0EV8DCDNs
|
|
44
45
|
datamule/sec/submissions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
46
|
datamule/sec/submissions/downloader.py,sha256=60wX2Yml1UCuxOtU0xMxqqeyHhrypCmlDQ0jZF-StJo,2665
|
46
47
|
datamule/sec/submissions/eftsquery.py,sha256=mSZon8rlW8dxma7M49ZW5V02Fn-ENOdt9TNO6elBrhE,27983
|
47
|
-
datamule/sec/submissions/monitor.py,sha256=
|
48
|
+
datamule/sec/submissions/monitor.py,sha256=dZYuVCi_X82eYA8l_9cbnkRjiawz3K4U-FnCAyJcgk4,7892
|
48
49
|
datamule/sec/submissions/streamer.py,sha256=EXyWNCD9N6mZmvm9lFSCFodF19zSQ8jfIbWPZNp0K5Y,11253
|
49
50
|
datamule/sec/submissions/textsearch.py,sha256=zEr3NXdhVFL8eMh2jruVXIt7taUZTMdNy2hOAyRM2pA,5706
|
50
51
|
datamule/sec/xbrl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -56,7 +57,7 @@ datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
56
57
|
datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
|
57
58
|
datamule/seclibrary/downloader.py,sha256=PIgz_7ASUTZOHcUZGcD1SmLaGSbq7xe7EiJT0Z7HU4M,13653
|
58
59
|
datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
|
59
|
-
datamule-1.
|
60
|
-
datamule-1.
|
61
|
-
datamule-1.
|
62
|
-
datamule-1.
|
60
|
+
datamule-1.3.0.dist-info/METADATA,sha256=ANPyXCixCioGwfTqpxX0BsbnxDe7-LS3NtLWg0WledM,469
|
61
|
+
datamule-1.3.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
62
|
+
datamule-1.3.0.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
63
|
+
datamule-1.3.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|