datamule 1.5.9__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ from .processing import process_tabular_data
12
12
  from pathlib import Path
13
13
  import webbrowser
14
14
  from secsgml.utils import bytes_to_str
15
+ from secxbrl import parse_inline_xbrl
15
16
 
16
17
  class Document:
17
18
  def __init__(self, type, content, extension,accession,filing_date,path=None):
@@ -33,6 +34,7 @@ class Document:
33
34
  self.extension = extension
34
35
  # this will be filled by parsed
35
36
  self.data = None
37
+ self.xbrl = None
36
38
 
37
39
  #_load_text_content
38
40
  def _preprocess_txt_content(self):
@@ -101,12 +103,23 @@ class Document:
101
103
  if self.extension in ['.htm', '.html', '.txt','.xml']:
102
104
  return bool(re.search(pattern, self.content))
103
105
  return False
106
+
107
+ def parse_xbrl(self,type='inline'):
108
+ if self.xbrl:
109
+ return
110
+ if type =='inline':
111
+ if self.extension not in ['.htm','.html']:
112
+ return
113
+
114
+ self.xbrl = parse_inline_xbrl(self.content)
115
+ else:
116
+ raise ValueError("Only inline has been implemented so far.")
104
117
 
105
118
  # Note: this method will be heavily modified in the future
106
119
  def parse(self):
107
120
  # check if we have already parsed the content
108
121
  if self.data:
109
- return self.data
122
+ return
110
123
 
111
124
  mapping_dict = None
112
125
 
File without changes
@@ -0,0 +1,13 @@
1
+ import ownership
2
+
3
+
4
+ # key is document type
5
+ # note: this assumes XML format.
6
+ table_mappings = {
7
+ '3' : ownership.mappings,
8
+ '3/A' : ownership.mappings,
9
+ '4' : ownership.mappings,
10
+ '4/A' : ownership.mappings,
11
+ '5' : ownership.mappings,
12
+ '5/A' : ownership.mappings,
13
+ }
@@ -0,0 +1,174 @@
1
+
2
+
3
+
4
+ # Non-derivative transaction ownership mapping
5
+ ownership_non_derivative_transactions_dict = {
6
+ 'securityTitle_value': 'securityTitle',
7
+ 'securityTitle_footnote': 'securityTitleFootnote',
8
+ 'transactionDate_value': 'transactionDate',
9
+ 'transactionDate_footnote': 'transactionDateFootnote',
10
+ 'deemedExecutionDate_value': 'deemedExecutionDate',
11
+ 'deemedExecutionDate_footnote': 'deemedExecutionDateFootnote',
12
+ 'transactionCoding_transactionFormType': 'transactionFormType',
13
+ 'transactionCoding_transactionCode': 'transactionCode',
14
+ 'transactionCoding_equitySwapInvolved': 'equitySwapInvolved',
15
+ 'transactionCoding_footnote': 'transactionCodingFootnote',
16
+ 'transactionAmounts_transactionShares_value': 'transactionShares',
17
+ 'transactionAmounts_transactionShares_footnote': 'transactionSharesFootnote',
18
+ 'transactionAmounts_transactionPricePerShare_value': 'transactionPricePerShare',
19
+ 'transactionAmounts_transactionPricePerShare_footnote': 'transactionPricePerShareFootnote',
20
+ 'transactionAmounts_transactionAcquiredDisposedCode_value': 'transactionAcquiredDisposedCode',
21
+ 'transactionAmounts_transactionAcquiredDisposedCode_footnote': 'transactionAcquiredDisposedCodeFootnote',
22
+ 'postTransactionAmounts_sharesOwnedFollowingTransaction_value': 'sharesOwnedFollowingTransaction',
23
+ 'postTransactionAmounts_sharesOwnedFollowingTransaction_footnote': 'sharesOwnedFollowingTransactionFootnote',
24
+ 'ownershipNature_directOrIndirectOwnership_value': 'directOrIndirectOwnership',
25
+ 'ownershipNature_directOrIndirectOwnership_footnote': 'directOrIndirectOwnershipFootnote',
26
+ 'ownershipNature_natureOfOwnership_value': 'natureOfOwnership',
27
+ 'ownershipNature_natureOfOwnership_footnote': 'natureOfOwnershipFootnote',
28
+ 'transactionTimeliness_value': 'transactionTimeliness',
29
+ 'transactionTimeliness_footnote': 'transactionTimelinessFootnote',
30
+ 'postTransactionAmounts_valueOwnedFollowingTransaction_value': 'valueOwnedFollowingTransaction',
31
+ 'postTransactionAmounts_valueOwnedFollowingTransaction_footnote': 'valueOwnedFollowingTransactionFootnote'
32
+ }
33
+
34
+ # Derivative transaction ownership mapping
35
+ derivative_transaction_ownership_dict = {
36
+ 'securityTitle_value': 'securityTitle',
37
+ 'securityTitle_footnote': 'securityTitleFootnote',
38
+ 'conversionOrExercisePrice_value': 'conversionOrExercisePrice',
39
+ 'conversionOrExercisePrice_footnote': 'conversionOrExercisePriceFootnote',
40
+ 'transactionDate_value': 'transactionDate',
41
+ 'transactionDate_footnote': 'transactionDateFootnote',
42
+ 'deemedExecutionDate_value': 'deemedExecutionDate',
43
+ 'deemedExecutionDate_footnote': 'deemedExecutionDateFootnote',
44
+ 'transactionCoding_transactionFormType': 'transactionFormType',
45
+ 'transactionCoding_transactionCode': 'transactionCode',
46
+ 'transactionCoding_equitySwapInvolved': 'equitySwapInvolved',
47
+ 'transactionCoding_footnote': 'transactionCodingFootnote',
48
+ 'transactionAmounts_transactionShares_value': 'transactionShares',
49
+ 'transactionAmounts_transactionShares_footnote': 'transactionSharesFootnote',
50
+ 'transactionAmounts_transactionPricePerShare_value': 'transactionPricePerShare',
51
+ 'transactionAmounts_transactionPricePerShare_footnote': 'transactionPricePerShareFootnote',
52
+ 'transactionAmounts_transactionAcquiredDisposedCode_value': 'transactionAcquiredDisposedCode',
53
+ 'transactionAmounts_transactionTotalValue_value': 'transactionTotalValue',
54
+ 'transactionAmounts_transactionTotalValue_footnote': 'transactionTotalValueFootnote',
55
+ 'exerciseDate_value': 'exerciseDate',
56
+ 'exerciseDate_footnote': 'exerciseDateFootnote',
57
+ 'expirationDate_value': 'expirationDate',
58
+ 'expirationDate_footnote': 'expirationDateFootnote',
59
+ 'underlyingSecurity_underlyingSecurityTitle_value': 'underlyingSecurityTitle',
60
+ 'underlyingSecurity_underlyingSecurityTitle_footnote': 'underlyingSecurityTitleFootnote',
61
+ 'underlyingSecurity_underlyingSecurityShares_value': 'underlyingSecurityShares',
62
+ 'underlyingSecurity_underlyingSecurityShares_footnote': 'underlyingSecuritySharesFootnote',
63
+ 'underlyingSecurity_underlyingSecurityValue_value': 'underlyingSecurityValue',
64
+ 'postTransactionAmounts_sharesOwnedFollowingTransaction_value': 'sharesOwnedFollowingTransaction',
65
+ 'postTransactionAmounts_sharesOwnedFollowingTransaction_footnote': 'sharesOwnedFollowingTransactionFootnote',
66
+ 'ownershipNature_directOrIndirectOwnership_value': 'directOrIndirectOwnership',
67
+ 'ownershipNature_directOrIndirectOwnership_footnote': 'directOrIndirectOwnershipFootnote',
68
+ 'ownershipNature_natureOfOwnership_value': 'natureOfOwnership',
69
+ 'ownershipNature_natureOfOwnership_footnote': 'natureOfOwnershipFootnote',
70
+ 'transactionTimeliness_value': 'transactionTimeliness',
71
+ 'transactionTimeliness_footnote': 'transactionTimelinessFootnote',
72
+ 'postTransactionAmounts_valueOwnedFollowingTransaction_value': 'valueOwnedFollowingTransaction',
73
+ 'postTransactionAmounts_valueOwnedFollowingTransaction_footnote': 'valueOwnedFollowingTransactionFootnote',
74
+ 'transactionAmounts_transactionAcquiredDisposedCode_footnote': 'transactionAcquiredDisposedCodeFootnote',
75
+ 'underlyingSecurity_underlyingSecurityValue_footnote': 'underlyingSecurityValueFootnote'
76
+ }
77
+
78
+ # Non-derivative holding ownership mapping
79
+ non_derivative_holding_ownership_dict = {
80
+ 'securityTitle_value': 'securityTitle',
81
+ 'securityTitle_footnote': 'securityTitleFootnote',
82
+ 'postTransactionAmounts_sharesOwnedFollowingTransaction_value': 'sharesOwnedFollowingTransaction',
83
+ 'postTransactionAmounts_sharesOwnedFollowingTransaction_footnote': 'sharesOwnedFollowingTransactionFootnote',
84
+ 'ownershipNature_directOrIndirectOwnership_value': 'directOrIndirectOwnership',
85
+ 'ownershipNature_directOrIndirectOwnership_footnote': 'directOrIndirectOwnershipFootnote',
86
+ 'ownershipNature_natureOfOwnership_value': 'natureOfOwnership',
87
+ 'ownershipNature_natureOfOwnership_footnote': 'natureOfOwnershipFootnote',
88
+ 'postTransactionAmounts_valueOwnedFollowingTransaction_value': 'valueOwnedFollowingTransaction',
89
+ 'transactionCoding_footnote': 'transactionCodingFootnote',
90
+ 'transactionCoding_transactionFormType': 'transactionFormType',
91
+ 'postTransactionAmounts_valueOwnedFollowingTransaction_footnote': 'valueOwnedFollowingTransactionFootnote'
92
+ }
93
+
94
+ # Derivative holding ownership mapping
95
+ derivative_holding_ownership_dict = {
96
+ 'securityTitle_value': 'securityTitle',
97
+ 'securityTitle_footnote': 'securityTitleFootnote',
98
+ 'conversionOrExercisePrice_value': 'conversionOrExercisePrice',
99
+ 'conversionOrExercisePrice_footnote': 'conversionOrExercisePriceFootnote',
100
+ 'exerciseDate_value': 'exerciseDate',
101
+ 'exerciseDate_footnote': 'exerciseDateFootnote',
102
+ 'expirationDate_value': 'expirationDate',
103
+ 'expirationDate_footnote': 'expirationDateFootnote',
104
+ 'underlyingSecurity_underlyingSecurityTitle_value': 'underlyingSecurityTitle',
105
+ 'underlyingSecurity_underlyingSecurityTitle_footnote': 'underlyingSecurityTitleFootnote',
106
+ 'underlyingSecurity_underlyingSecurityShares_value': 'underlyingSecurityShares',
107
+ 'underlyingSecurity_underlyingSecurityShares_footnote': 'underlyingSecuritySharesFootnote',
108
+ 'underlyingSecurity_underlyingSecurityValue_value': 'underlyingSecurityValue',
109
+ 'underlyingSecurity_underlyingSecurityValue_footnote': 'underlyingSecurityValueFootnote',
110
+ 'ownershipNature_directOrIndirectOwnership_value': 'directOrIndirectOwnership',
111
+ 'ownershipNature_directOrIndirectOwnership_footnote': 'directOrIndirectOwnershipFootnote',
112
+ 'ownershipNature_natureOfOwnership_value': 'natureOfOwnership',
113
+ 'ownershipNature_natureOfOwnership_footnote': 'natureOfOwnershipFootnote',
114
+ 'postTransactionAmounts_sharesOwnedFollowingTransaction_value': 'sharesOwnedFollowingTransaction',
115
+ 'postTransactionAmounts_sharesOwnedFollowingTransaction_footnote': 'sharesOwnedFollowingTransactionFootnote',
116
+ 'postTransactionAmounts_valueOwnedFollowingTransaction_value': 'valueOwnedFollowingTransaction',
117
+ 'postTransactionAmounts_valueOwnedFollowingTransaction_footnote': 'valueOwnedFollowingTransactionFootnote',
118
+ 'transactionCoding_transactionFormType': 'transactionFormType',
119
+ 'transactionCoding_footnote': 'transactionCodingFootnote'
120
+ }
121
+
122
+ # Reporting owner ownership mapping
123
+ reporting_owner_ownership_dict = {
124
+ 'reportingOwnerAddress_rptOwnerCity': 'rptOwnerCity',
125
+ 'reportingOwnerAddress_rptOwnerState': 'rptOwnerState',
126
+ 'reportingOwnerAddress_rptOwnerStateDescription': 'rptOwnerStateDescription',
127
+ 'reportingOwnerAddress_rptOwnerStreet1': 'rptOwnerStreet1',
128
+ 'reportingOwnerAddress_rptOwnerStreet2': 'rptOwnerStreet2',
129
+ 'reportingOwnerAddress_rptOwnerZipCode': 'rptOwnerZipCode',
130
+ 'reportingOwnerId_rptOwnerCik': 'rptOwnerCik',
131
+ 'reportingOwnerId_rptOwnerName': 'rptOwnerName',
132
+ 'reportingOwnerRelationship_isDirector': 'rptOwnerIsDirector',
133
+ 'reportingOwnerRelationship_isOfficer': 'rptOwnerIsOfficer',
134
+ 'reportingOwnerRelationship_isTenPercentOwner': 'rptOwnerIsTenPercentOwner',
135
+ 'reportingOwnerRelationship_isOther': 'rptOwnerIsOther',
136
+ 'reportingOwnerRelationship_officerTitle': 'rptOwnerOfficerTitle',
137
+ 'reportingOwnerRelationship_otherText': 'rptOwnerOtherText'
138
+ }
139
+
140
+ # Metadata ownership mapping
141
+ metadata_ownership_dict = {
142
+ 'periodOfReport': 'periodOfReport',
143
+ 'issuer_issuerCik': 'issuerCik',
144
+ 'issuer_issuerName': 'issuerName',
145
+ 'issuer_issuerTradingSymbol': 'issuerTradingSymbol',
146
+ 'documentType': 'documentType',
147
+ 'remarks': 'remarks',
148
+ 'documentDescription': 'documentDescription',
149
+ 'footnotes': 'footnotes',
150
+ 'notSubjectToSection16': 'notSubjectToSection16',
151
+ 'form3HoldingsReported': 'form3HoldingsReported',
152
+ 'form4TransactionsReported': 'form4TransactionsReported',
153
+ 'noSecuritiesOwned': 'noSecuritiesOwned',
154
+ 'aff10b5One': 'aff10b5One',
155
+ 'dateOfOriginalSubmission': 'dateOfOriginalSubmission',
156
+ 'schemaVersion': 'schemaVersion'
157
+ }
158
+
159
+ # Owner signature ownership mapping
160
+ owner_signature_ownership_dict = {
161
+ 'signatureName': 'signatureName',
162
+ 'signatureDate': 'signatureDate'
163
+ }
164
+
165
+
166
+ mappings = {
167
+ 'ownership_non_derivative_transactions' : ownership_non_derivative_transactions_dict,
168
+ 'ownership_derivative_transactions' : derivative_transaction_ownership_dict,
169
+ 'ownership_non_derivative_holdings' : non_derivative_holding_ownership_dict,
170
+ 'ownership_derivative_holdings' : derivative_holding_ownership_dict,
171
+ 'ownership_reporting_owner' : reporting_owner_ownership_dict,
172
+ 'ownership_metadata' : metadata_ownership_dict,
173
+ 'ownership_owner_signature' : owner_signature_ownership_dict
174
+ }
datamule/submission.py CHANGED
@@ -251,7 +251,8 @@ class Submission:
251
251
  try:
252
252
  content = tar.extractfile(filename+'.zst').read()
253
253
  except:
254
- raise ValueError("Something went wrong with tar")
254
+ # some of these issues are on SEC data end, will fix when I setup cloud.
255
+ raise ValueError(f"Something went wrong with tar: {self.path}")
255
256
  # Decompress if compressed
256
257
  if filename.endswith('.gz'):
257
258
  content = gzip.decompress(content)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 1.5.9
3
+ Version: 1.6.0
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -15,6 +15,7 @@ Requires-Dist: selectolax
15
15
  Requires-Dist: pytz
16
16
  Requires-Dist: zstandard
17
17
  Requires-Dist: doc2dict
18
+ Requires-Dist: secxbrl
18
19
  Requires-Dist: secsgml
19
20
  Requires-Dist: websocket-client
20
21
 
@@ -5,12 +5,12 @@ datamule/index.py,sha256=Rrcna9FJV-Oh_K6O2IuUEIDmtay_7UZ4l4jgKCi7A7I,2079
5
5
  datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,958
6
6
  datamule/portfolio.py,sha256=eF1eDSwIg-CI8ZmZAHRjCGU0UhuPN4ijxPB0YDT4s2o,8023
7
7
  datamule/sheet.py,sha256=TvFqK9eAYuVoJ2uWdAlx5EN6vS9lke-aZf7FqtUiDBc,22304
8
- datamule/submission.py,sha256=6JIi-ayLL-jENVj6Q4IhmrYlAreJI7xBAHP_NYaDB6k,12918
8
+ datamule/submission.py,sha256=vAiYNas1YrWgm4Grw24peJbfSUVERySEko1zmdtG49s,13033
9
9
  datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
10
10
  datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  datamule/datamule/sec_connector.py,sha256=T3edE7I-d4oHysqj7zYlIOxH3Fuauj9tfw39UdFWvB8,2393
12
12
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- datamule/document/document.py,sha256=04Rivdphq0D1HEGIBjtl1LelJr-IyQU1qCMi8yNJajw,14038
13
+ datamule/document/document.py,sha256=YGo-Iz_sBXekUeKEAoNJV0BiLDtSOgD9OXFo2FocYq8,14439
14
14
  datamule/document/processing.py,sha256=jDCEzBFDSQtq7nQxRScIsbALnFcvMPOkNkMUCa7mFxg,31921
15
15
  datamule/document/table.py,sha256=73yUJKY82ap32jhLmZeTti-jQ_lyhcJGlGwyxLtgYOg,12944
16
16
  datamule/document/mappings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -37,6 +37,9 @@ datamule/document/mappings/ta.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
37
37
  datamule/document/mappings/thirteenfhr.py,sha256=XpYRIMPZnGLfEE4TqBI0BPXbyuq0xf3hut1fePOF6kU,4250
38
38
  datamule/document/mappings/twentyfivense.py,sha256=lKyj0ZBhkHX9gQJMTUPrQlxYFg3k-aBnWqtoS5bujZM,905
39
39
  datamule/document/mappings/twentyfourf2nt.py,sha256=Q7RPT3JgJHjYdjMuaSyAxclt6QPT_LgCQloxp-ByDuI,4118
40
+ datamule/document/mappings_new/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ datamule/document/mappings_new/mappings.py,sha256=sP94GK3-klMCTD6XFajAP9KxJ7Wq5YMMaXcHx1rQEKA,281
42
+ datamule/document/mappings_new/ownership.py,sha256=GVtyROefvEC_X5l6kayvZv57-kHxj8bHckAru8JtFOQ,10656
40
43
  datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
44
  datamule/mapping_dicts/html_mapping_dicts.py,sha256=G2PWB__FNg4VH9iFJFkflM0u-qOEtk67IWtGoqesb0k,5388
42
45
  datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
@@ -60,7 +63,7 @@ datamule/seclibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
60
63
  datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,18025
61
64
  datamule/seclibrary/downloader.py,sha256=ylv69VF22IVfrdeCkiGr5mVa2GKrPC9zFiDJU1fiBu8,17262
62
65
  datamule/seclibrary/query.py,sha256=qGuursTERRbOGfoDcYcpo4oWkW3PCBW6x1Qf1Puiak4,7352
63
- datamule-1.5.9.dist-info/METADATA,sha256=DkoMbTIImVjWfEkqwfe7BBqCpkvBC8CFRRF5v7PKyco,501
64
- datamule-1.5.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
65
- datamule-1.5.9.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
66
- datamule-1.5.9.dist-info/RECORD,,
66
+ datamule-1.6.0.dist-info/METADATA,sha256=E4F7MeBNWhHn19TH7eUyQN_vnONCvw-NiObNCRbsLE0,524
67
+ datamule-1.6.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
68
+ datamule-1.6.0.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
69
+ datamule-1.6.0.dist-info/RECORD,,