datamule 1.2.1__tar.gz → 1.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {datamule-1.2.1 → datamule-1.2.3}/PKG-INFO +1 -1
  2. {datamule-1.2.1 → datamule-1.2.3}/datamule/package_updater.py +10 -11
  3. {datamule-1.2.1 → datamule-1.2.3}/datamule/submission.py +14 -24
  4. {datamule-1.2.1 → datamule-1.2.3}/datamule.egg-info/PKG-INFO +1 -1
  5. {datamule-1.2.1 → datamule-1.2.3}/setup.py +1 -1
  6. {datamule-1.2.1 → datamule-1.2.3}/datamule/__init__.py +0 -0
  7. {datamule-1.2.1 → datamule-1.2.3}/datamule/config.py +0 -0
  8. {datamule-1.2.1 → datamule-1.2.3}/datamule/document/__init__.py +0 -0
  9. {datamule-1.2.1 → datamule-1.2.3}/datamule/document/document.py +0 -0
  10. {datamule-1.2.1 → datamule-1.2.3}/datamule/document/processing.py +0 -0
  11. {datamule-1.2.1 → datamule-1.2.3}/datamule/document/table.py +0 -0
  12. {datamule-1.2.1 → datamule-1.2.3}/datamule/helper.py +0 -0
  13. {datamule-1.2.1 → datamule-1.2.3}/datamule/index.py +0 -0
  14. {datamule-1.2.1 → datamule-1.2.3}/datamule/mapping_dicts/__init__.py +0 -0
  15. {datamule-1.2.1 → datamule-1.2.3}/datamule/mapping_dicts/txt_mapping_dicts.py +0 -0
  16. {datamule-1.2.1 → datamule-1.2.3}/datamule/mapping_dicts/xml_mapping_dicts.py +0 -0
  17. {datamule-1.2.1 → datamule-1.2.3}/datamule/portfolio.py +0 -0
  18. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/__init__.py +0 -0
  19. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/infrastructure/__init__.py +0 -0
  20. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/infrastructure/submissions_metadata.py +0 -0
  21. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/rss/__init__.py +0 -0
  22. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/rss/monitor.py +0 -0
  23. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/submissions/__init__.py +0 -0
  24. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/submissions/downloader.py +0 -0
  25. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/submissions/eftsquery.py +0 -0
  26. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/submissions/monitor.py +0 -0
  27. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/submissions/streamer.py +0 -0
  28. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/submissions/textsearch.py +0 -0
  29. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/utils.py +0 -0
  30. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/xbrl/__init__.py +0 -0
  31. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/xbrl/downloadcompanyfacts.py +0 -0
  32. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/xbrl/filter_xbrl.py +0 -0
  33. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/xbrl/streamcompanyfacts.py +0 -0
  34. {datamule-1.2.1 → datamule-1.2.3}/datamule/sec/xbrl/xbrlmonitor.py +0 -0
  35. {datamule-1.2.1 → datamule-1.2.3}/datamule/seclibrary/__init__.py +0 -0
  36. {datamule-1.2.1 → datamule-1.2.3}/datamule/seclibrary/bq.py +0 -0
  37. {datamule-1.2.1 → datamule-1.2.3}/datamule/seclibrary/downloader.py +0 -0
  38. {datamule-1.2.1 → datamule-1.2.3}/datamule/seclibrary/query.py +0 -0
  39. {datamule-1.2.1 → datamule-1.2.3}/datamule/sheet.py +0 -0
  40. {datamule-1.2.1 → datamule-1.2.3}/datamule.egg-info/SOURCES.txt +0 -0
  41. {datamule-1.2.1 → datamule-1.2.3}/datamule.egg-info/dependency_links.txt +0 -0
  42. {datamule-1.2.1 → datamule-1.2.3}/datamule.egg-info/requires.txt +0 -0
  43. {datamule-1.2.1 → datamule-1.2.3}/datamule.egg-info/top_level.txt +0 -0
  44. {datamule-1.2.1 → datamule-1.2.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.2.1
3
+ Version: 1.2.3
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -9,7 +9,7 @@ class PackageUpdater():
9
9
  def __init__(self):
10
10
  pass
11
11
 
12
- def update_package_data():
12
+ def update_package_data(self):
13
13
  # Create data directory in user's home
14
14
  data_dir = Path.home() / ".datamule"
15
15
  data_dir.mkdir(exist_ok=True)
@@ -19,13 +19,12 @@ class PackageUpdater():
19
19
  file_path = data_dir / "listed_filer_metadata.csv"
20
20
  temp_gz_path = data_dir / "listed_filer_metadata.csv.gz"
21
21
 
22
- if not file_path.exists():
23
- print(f"Downloading data to {data_dir}")
24
- urllib.request.urlretrieve(file_url, temp_gz_path)
25
-
26
- with gzip.open(temp_gz_path, 'rb') as f_in:
27
- with open(file_path, 'wb') as f_out:
28
- shutil.copyfileobj(f_in, f_out)
29
-
30
- os.remove(temp_gz_path)
31
- print(f"Data downloaded to {file_path}")
22
+ print(f"Downloading data to {data_dir}")
23
+ urllib.request.urlretrieve(file_url, temp_gz_path)
24
+
25
+ with gzip.open(temp_gz_path, 'rb') as f_in:
26
+ with open(file_path, 'wb') as f_out:
27
+ shutil.copyfileobj(f_in, f_out)
28
+
29
+ os.remove(temp_gz_path)
30
+ print(f"Data downloaded to {file_path}")
@@ -17,6 +17,7 @@ class Submission:
17
17
  self.path = None
18
18
  self.metadata, raw_documents = parse_sgml_submission_into_memory(sgml_content)
19
19
 
20
+ # code dupe
20
21
  self.accession = self.metadata['accession-number']
21
22
  self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
22
23
 
@@ -43,6 +44,11 @@ class Submission:
43
44
  with metadata_path.open('r') as f:
44
45
  self.metadata = json.load(f)
45
46
 
47
+ # Code dupe
48
+ self.accession = self.metadata['accession-number']
49
+ self.filing_date= f"{self.metadata['filing-date'][:4]}-{self.metadata['filing-date'][4:6]}-{self.metadata['filing-date'][6:8]}"
50
+
51
+
46
52
 
47
53
 
48
54
  def document_type(self, document_type):
@@ -65,9 +71,12 @@ class Submission:
65
71
  document_path = self.path / filename
66
72
  extension = document_path.suffix
67
73
 
68
- with document_path.open('r') as f:
74
+ with document_path.open('rb') as f:
69
75
  content = f.read()
70
76
 
77
+ if extension in ['.htm','.html','.txt','.xml']:
78
+ content = content.decode('utf-8', errors='replace')
79
+
71
80
  yield Document(type=doc['type'], content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
72
81
  # if loaded from sgml_content
73
82
  else:
@@ -89,9 +98,12 @@ class Submission:
89
98
 
90
99
  # check if the file exists
91
100
  if document_path.exists():
92
- with document_path.open('r') as f:
101
+ with document_path.open('rb') as f:
93
102
  content = f.read()
94
103
 
104
+ if extension in ['.htm','.html','.txt','.xml']:
105
+ content = content.decode('utf-8', errors='replace')
106
+
95
107
  yield Document(type=doc['type'], content=content, extension=extension,filing_date=self.filing_date,accession=self.accession,path=document_path)
96
108
  else:
97
109
  print(f"Warning: File {document_path} does not exist likely due to keep types in downloading.")
@@ -100,28 +112,6 @@ class Submission:
100
112
  else:
101
113
  yield self.documents[idx]
102
114
 
103
- # keep documents by document type
104
- def keep(self, document_type):
105
- # Convert single document type to list for consistent handling
106
- if isinstance(document_type, str):
107
- document_types = [document_type]
108
- else:
109
- document_types = document_type
110
-
111
- if self.path is not None:
112
- for doc in self.metadata['documents']:
113
- filename = doc.get('filename')
114
- type = doc.get('type')
115
- if type not in document_types:
116
- # oh we need handling here for sequences case
117
- if filename is None:
118
- filename = doc.sequence + '.txt'
119
-
120
- document_path = self.path / filename
121
- # delete the file
122
- document_path.unlink()
123
- else:
124
- print("Warning: keep() method is only available when loading from path.")
125
115
 
126
116
 
127
117
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.2.1
3
+ Version: 1.2.3
4
4
  Summary: Making it easier to use SEC filings.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -30,7 +30,7 @@ if not file_path.exists():
30
30
  setup(
31
31
  name="datamule",
32
32
  author="John Friedman",
33
- version="1.2.1",
33
+ version="1.2.3",
34
34
  description="Making it easier to use SEC filings.",
35
35
  packages=find_packages(include=['datamule', 'datamule.*']),
36
36
  url="https://github.com/john-friedman/datamule-python",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes