orbitkit 0.8.37__tar.gz → 0.8.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orbitkit-0.8.37/orbitkit.egg-info → orbitkit-0.8.39}/PKG-INFO +1 -1
- orbitkit-0.8.39/orbitkit/VERSION +1 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/airflow_handler/data_preprocessing.py +34 -5
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/airflow_handler/file_flow_entry_process.py +76 -7
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/airflow_handler/file_handler_v2.py +1 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39/orbitkit.egg-info}/PKG-INFO +1 -1
- orbitkit-0.8.37/orbitkit/VERSION +0 -1
- {orbitkit-0.8.37 → orbitkit-0.8.39}/LICENSE +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/MANIFEST.in +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/README.md +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/airflow_handler/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/airflow_handler/file_flow_exit_process.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/airflow_handler/file_handler.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/audio_transcoder/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/audio_transcoder/netmind_extract_v1.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/constant/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/constant/report_schema.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/id_srv/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/id_srv/id_gen.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/id_srv/id_perm_like.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/lark_send/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/lark_send/lark.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/llm_tools/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/llm_tools/quick_rag_chat.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/orbit_type/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/orbit_type/doc_4_compile_rule.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/orbit_type/orbit_type_simple.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/orbit_type/tools.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_embedding/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_embedding/pdf_txt_embedding.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_embedding/pdf_txt_embedding_v2.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/a_stock_extractor_v1.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/exceptions.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/pdf_block_extractor_base.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/pdf_block_extractor_v1.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/pdf_block_extractor_v2.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/pdf_extractor_azure.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/pdf_extractor_minerU_v1.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/pdf_extractor_netmind_v1.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/pdf_extractor_netmind_v2.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/pdf_extractor_netmind_v3.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor/pdf_extractor_orbit.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor_simple/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor_simple/base.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor_simple/cloud_provider.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor_simple/core.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor_simple/exceptions.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor_simple/extractors.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_extractor_simple/utils.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_writer/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/pdf_writer/pdf_writer_simple.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/__init__.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/cache_asset_downloader.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/common.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/customize_regix_manager.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/secret_manager.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_aliyun.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_aliyun_oss_simple.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_aws.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_aws_s3_wrapper.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_date.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_html.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_kafka.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_md5.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_selenium.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_simple_timer.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_str.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_type_mapping.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit/util/util_url.py +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit.egg-info/SOURCES.txt +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit.egg-info/dependency_links.txt +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit.egg-info/not-zip-safe +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit.egg-info/requires.txt +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/orbitkit.egg-info/top_level.txt +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/setup.cfg +0 -0
- {orbitkit-0.8.37 → orbitkit-0.8.39}/setup.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.8.39
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import datetime
|
|
2
3
|
from collections import defaultdict
|
|
3
4
|
|
|
@@ -102,19 +103,43 @@ class DocumentProcessor:
|
|
|
102
103
|
return "extract" if report_type_ids == ['19999'] else target_stage
|
|
103
104
|
|
|
104
105
|
@staticmethod
|
|
105
|
-
def
|
|
106
|
+
def update_target_stage_for_reported_at(doc, target_stage):
|
|
107
|
+
date_str = doc.get('x_reported_at_utc_date', '1970-01-01')
|
|
108
|
+
try:
|
|
109
|
+
reported_date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
|
|
110
|
+
except ValueError:
|
|
111
|
+
reported_date = datetime.datetime(1970, 1, 1)
|
|
112
|
+
return "extract" if reported_date < datetime.datetime(2020, 1, 1) else target_stage
|
|
113
|
+
|
|
114
|
+
@classmethod
|
|
115
|
+
def create_record(cls, doc, start_stage):
|
|
106
116
|
attachments = doc.get('x_attachments', []) if start_stage == 'convert' else doc.get('x_attachments_pdf', [])
|
|
107
117
|
s3_path_info = []
|
|
118
|
+
add_extends = {}
|
|
108
119
|
for att in attachments:
|
|
109
120
|
if len(att['file_name']) > 2000 or len(att['file_name'].encode('utf-8')) > 2000:
|
|
110
121
|
return False, None
|
|
122
|
+
|
|
123
|
+
if start_stage == 'convert' and not add_extends:
|
|
124
|
+
_, ext = os.path.splitext(att['store_path'])
|
|
125
|
+
if ext in cls.AUDIO_SUFFIXES or ext in cls.VIDEO_SUFFIXES:
|
|
126
|
+
add_extends = {
|
|
127
|
+
"title": doc['x_orbit_data']['report_title'],
|
|
128
|
+
"published": doc['x_reported_at_utc_date'],
|
|
129
|
+
"tickers": [],
|
|
130
|
+
"perm_id_list": doc['x_orbit_data']['perm_id_list'],
|
|
131
|
+
"report_type_id_list_str": doc['x_orbit_data']['report_type_id_list']
|
|
132
|
+
}
|
|
133
|
+
|
|
111
134
|
s3_path_info.append({
|
|
112
135
|
'store_path': f"s3://{att['bucket']}/{att['store_path']}" if start_stage == 'convert' else att[
|
|
113
136
|
'store_path'],
|
|
114
137
|
'file_name': att['file_name']
|
|
115
138
|
})
|
|
116
|
-
|
|
117
|
-
|
|
139
|
+
result_dict = {'id': doc['_id'], 's3_path_info': s3_path_info}
|
|
140
|
+
if add_extends:
|
|
141
|
+
result_dict['extends'] = add_extends
|
|
142
|
+
return True, result_dict
|
|
118
143
|
|
|
119
144
|
@staticmethod
|
|
120
145
|
def create_result_info(process_type, message, result_data):
|
|
@@ -125,7 +150,7 @@ class DocumentProcessor:
|
|
|
125
150
|
}
|
|
126
151
|
|
|
127
152
|
@classmethod
|
|
128
|
-
def process(cls, doc):
|
|
153
|
+
def process(cls, doc, check_doc):
|
|
129
154
|
report_id = doc['_id']
|
|
130
155
|
# 筛选文件
|
|
131
156
|
doc = cls.stock_us_filter_by_is_primary(doc)
|
|
@@ -140,7 +165,11 @@ class DocumentProcessor:
|
|
|
140
165
|
return cls.create_result_info("xbrl", "XBRL format cannot be processed.", xbrl_data)
|
|
141
166
|
|
|
142
167
|
start_stage, target_stage, x_spider_name = cls.get_start_stage_target_stage(doc)
|
|
143
|
-
|
|
168
|
+
|
|
169
|
+
# 特殊条件下的数据不做embedding ('19999'类型和报告日期小于2020-01-01)
|
|
170
|
+
if target_stage == 'embedding' and check_doc:
|
|
171
|
+
target_stage = cls.update_target_stage_for_report_type(doc, target_stage)
|
|
172
|
+
target_stage = cls.update_target_stage_for_reported_at(doc, target_stage)
|
|
144
173
|
|
|
145
174
|
if cls.DATA_PROCESS_STEPS.index(target_stage) < cls.DATA_PROCESS_STEPS.index(start_stage):
|
|
146
175
|
return cls.create_result_info("step_error",
|
|
@@ -5,31 +5,40 @@ from typing import Literal
|
|
|
5
5
|
import logging
|
|
6
6
|
import pymongo
|
|
7
7
|
import pytz
|
|
8
|
+
import boto3
|
|
9
|
+
from sqlalchemy import create_engine, Table, MetaData, select
|
|
10
|
+
from sqlalchemy.orm import sessionmaker, scoped_session
|
|
11
|
+
from contextlib import contextmanager
|
|
12
|
+
|
|
8
13
|
from orbitkit.airflow_handler.file_handler_v2 import FileFlowHandleV2
|
|
9
14
|
from orbitkit.airflow_handler.data_preprocessing import DocumentProcessor
|
|
15
|
+
from orbitkit.orbit_type import OrbitTypeMatcher
|
|
10
16
|
|
|
11
17
|
logger = logging.getLogger(__name__)
|
|
12
18
|
|
|
13
19
|
|
|
14
20
|
class FilingOfficialProcessor:
|
|
15
21
|
|
|
16
|
-
def __init__(self, mongo_uri=None, postgres_uri=None):
|
|
22
|
+
def __init__(self, mongo_uri=None, postgres_uri=None, aws_access_key_id=None, aws_secret_access_key=None, pi2_postgres_uri=None, pi2_database_name=None):
|
|
17
23
|
mongo_uri = os.environ.get('MONGO_URI_MAIN_USER_APP') if not mongo_uri else mongo_uri
|
|
18
24
|
if not mongo_uri:
|
|
19
25
|
raise KeyError('mongo_uri not set.')
|
|
20
26
|
|
|
27
|
+
if not aws_secret_access_key or not aws_access_key_id:
|
|
28
|
+
raise KeyError('aws_access_key_id and aws_secret_access_key not set.')
|
|
29
|
+
|
|
21
30
|
self.mongo_client = pymongo.MongoClient(mongo_uri)
|
|
22
31
|
self.data_xbrl_convert_collection = self.mongo_client['filing_reports']['data_xbrl_convert']
|
|
23
32
|
self.filing_data_collection = self.mongo_client['filing_reports']['filing_data']
|
|
24
|
-
self.
|
|
33
|
+
self.filing_reports_astock_test0822_collection = self.mongo_client['filing_reports']['filing_reports_astock_test0822']
|
|
25
34
|
self.annotation_reports_view_rows_collection = self.mongo_client['filing_reports'][
|
|
26
35
|
'annotation_reports_view_rows']
|
|
27
36
|
self.source_map = {
|
|
28
37
|
'filing_data': (self.filing_data_collection, 'filing_data'),
|
|
29
|
-
'
|
|
38
|
+
'filing_reports_astock_test0822': (self.filing_reports_astock_test0822_collection, 'filing_reports_astock_test0822'),
|
|
30
39
|
'reports_view': [
|
|
31
40
|
(self.filing_data_collection, 'filing_data'),
|
|
32
|
-
(self.
|
|
41
|
+
(self.filing_reports_astock_test0822_collection, 'filing_reports_astock_test0822')
|
|
33
42
|
]
|
|
34
43
|
}
|
|
35
44
|
self.only_low_important_set = {'internal_seekingalpha'}
|
|
@@ -41,6 +50,36 @@ class FilingOfficialProcessor:
|
|
|
41
50
|
self.max_batch_size = 10000
|
|
42
51
|
self.all_stat_count = {'all': 0, 'skip': 0, 'step_error': 0, 'xbrl': 0, 'file_flow': 0}
|
|
43
52
|
|
|
53
|
+
self.s3_client = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
|
|
54
|
+
self.matcher = OrbitTypeMatcher(self.s3_client)
|
|
55
|
+
self.report_type_id_name_map = {i["lv3_id"]: i["lv3_name"] for i in self.matcher.get_full_type_list()}
|
|
56
|
+
|
|
57
|
+
self.pi2_postgres_uri = pi2_postgres_uri or os.environ['PG_URI_CX45_USER_GLAUUIADMIN']
|
|
58
|
+
if not self.pi2_postgres_uri:
|
|
59
|
+
raise KeyError('pie_postgres_uri not set.')
|
|
60
|
+
self.databases = pi2_database_name or 'newsfeedsite'
|
|
61
|
+
self.postgres_engine = create_engine(f"{self.pi2_postgres_uri}/{self.databases}", connect_args={"sslmode": "require"})
|
|
62
|
+
self.postgres_session = sessionmaker(bind=self.postgres_engine)
|
|
63
|
+
self.Session = scoped_session(self.postgres_session)
|
|
64
|
+
self.postgres_metadata = MetaData()
|
|
65
|
+
|
|
66
|
+
self.pi2_table = Table(
|
|
67
|
+
'primary_instrument_2_release', self.postgres_metadata,
|
|
68
|
+
autoload_with=self.postgres_engine, schema='security_master'
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
@contextmanager
|
|
72
|
+
def session_scope(self):
|
|
73
|
+
session = self.Session()
|
|
74
|
+
try:
|
|
75
|
+
yield session
|
|
76
|
+
session.commit()
|
|
77
|
+
except Exception:
|
|
78
|
+
session.rollback()
|
|
79
|
+
raise
|
|
80
|
+
finally:
|
|
81
|
+
self.Session.remove()
|
|
82
|
+
|
|
44
83
|
def create_spider_name_source_type_map(self, collections):
|
|
45
84
|
|
|
46
85
|
def find_duplicates(keys):
|
|
@@ -85,6 +124,29 @@ class FilingOfficialProcessor:
|
|
|
85
124
|
}})
|
|
86
125
|
logger.info(f'The document file type cannot be converted.')
|
|
87
126
|
|
|
127
|
+
def update_extends_fields(self, perm_id_list, file_flow_info):
|
|
128
|
+
stmt = select(self.pi2_table.c.orbit_entity_id, self.pi2_table.c.ticker).where(self.pi2_table.c.orbit_entity_id.in_(perm_id_list))
|
|
129
|
+
orbit_entity_id_ticker_map = {}
|
|
130
|
+
with self.session_scope() as session:
|
|
131
|
+
result = session.execute(stmt)
|
|
132
|
+
for row in result:
|
|
133
|
+
if row.orbit_entity_id not in orbit_entity_id_ticker_map:
|
|
134
|
+
orbit_entity_id_ticker_map[row.orbit_entity_id] = []
|
|
135
|
+
|
|
136
|
+
if row.ticker is not None:
|
|
137
|
+
orbit_entity_id_ticker_map[row.orbit_entity_id].append(row.ticker)
|
|
138
|
+
for step_info, records in file_flow_info.items():
|
|
139
|
+
for record in records:
|
|
140
|
+
if 'extends' in record and record.get('extends') is not None:
|
|
141
|
+
tickers = []
|
|
142
|
+
for i in record['extends']['perm_id_list']:
|
|
143
|
+
tickers.extend(orbit_entity_id_ticker_map.get(i, []))
|
|
144
|
+
record['extends']['tickers'] = tickers
|
|
145
|
+
|
|
146
|
+
record['extends']['report_type_id_list_str'] = [self.report_type_id_name_map.get(i) for i in record['extends']['report_type_id_list_str']]
|
|
147
|
+
|
|
148
|
+
return file_flow_info
|
|
149
|
+
|
|
88
150
|
def send_task(self, file_flow_info, tags, is_important, priority, spider_name_source_type):
|
|
89
151
|
for step_str, records in file_flow_info.items():
|
|
90
152
|
steps = step_str.split('@__@')
|
|
@@ -116,9 +178,9 @@ class FilingOfficialProcessor:
|
|
|
116
178
|
logger.info(f"{len(records)}--{start_stage}-{target_stage}-{x_spider_name} status: {status}, message: {message}")
|
|
117
179
|
|
|
118
180
|
|
|
119
|
-
def process_task_entry(self, source: Literal["filing_data", "
|
|
181
|
+
def process_task_entry(self, source: Literal["filing_data", "filing_reports_astock_test0822", "reports_view"],
|
|
120
182
|
query: dict, tags: list[str], priority: str,
|
|
121
|
-
is_important: bool = False):
|
|
183
|
+
is_important: bool = False, check_doc: bool = True):
|
|
122
184
|
|
|
123
185
|
if source == 'reports_view':
|
|
124
186
|
collections = self.source_map[source]
|
|
@@ -128,16 +190,20 @@ class FilingOfficialProcessor:
|
|
|
128
190
|
spider_name_source_type = self.create_spider_name_source_type_map(collections)
|
|
129
191
|
|
|
130
192
|
process_data = []
|
|
193
|
+
perm_id_set = set()
|
|
131
194
|
for collection, label in collections:
|
|
132
195
|
logger.info(f"load {label} data.")
|
|
133
196
|
docs = collection.find(query).batch_size(1000)
|
|
134
197
|
|
|
135
198
|
for doc in docs:
|
|
136
199
|
self.all_stat_count['all'] += 1
|
|
137
|
-
|
|
200
|
+
for orbit_entity_id in doc['x_orbit_data']['perm_id_list']:
|
|
201
|
+
perm_id_set.add(orbit_entity_id)
|
|
202
|
+
process_data.append(self.data_processor.process(doc, check_doc))
|
|
138
203
|
if len(process_data) >= self.max_batch_size:
|
|
139
204
|
file_flow_info, xbrl_data, except_id_list, doc_error_list = self.data_processor.split_data_by_spider_name_and_step(
|
|
140
205
|
process_data)
|
|
206
|
+
file_flow_info = self.update_extends_fields(list(perm_id_set), file_flow_info)
|
|
141
207
|
self.all_stat_count['skip'] += len(doc_error_list)
|
|
142
208
|
self.all_stat_count['step_error'] += len(except_id_list)
|
|
143
209
|
self.all_stat_count['xbrl'] += len(xbrl_data)
|
|
@@ -145,10 +211,12 @@ class FilingOfficialProcessor:
|
|
|
145
211
|
self.send_xbrl_data_to_mongo(xbrl_data)
|
|
146
212
|
self.update_doc_status_to_convert(collection, doc_error_list)
|
|
147
213
|
process_data.clear()
|
|
214
|
+
perm_id_set.clear()
|
|
148
215
|
|
|
149
216
|
if process_data:
|
|
150
217
|
file_flow_info, xbrl_data, except_id_list, doc_error_list = self.data_processor.split_data_by_spider_name_and_step(
|
|
151
218
|
process_data)
|
|
219
|
+
file_flow_info = self.update_extends_fields(list(perm_id_set), file_flow_info)
|
|
152
220
|
self.all_stat_count['skip'] += len(doc_error_list)
|
|
153
221
|
self.all_stat_count['step_error'] += len(except_id_list)
|
|
154
222
|
self.all_stat_count['xbrl'] += len(xbrl_data)
|
|
@@ -156,5 +224,6 @@ class FilingOfficialProcessor:
|
|
|
156
224
|
self.send_xbrl_data_to_mongo(xbrl_data)
|
|
157
225
|
self.update_doc_status_to_convert(collection, doc_error_list)
|
|
158
226
|
process_data.clear()
|
|
227
|
+
perm_id_set.clear()
|
|
159
228
|
|
|
160
229
|
logger.info(f"finish processing {self.all_stat_count}.")
|
|
@@ -132,6 +132,7 @@ class FileFlowHandleV2:
|
|
|
132
132
|
'current_stage': params['current_stage'],
|
|
133
133
|
'target_stage': params['target_stage'],
|
|
134
134
|
'data_source': params['source_type'],
|
|
135
|
+
'extends': record.get('extends', {}),
|
|
135
136
|
'created_at': now,
|
|
136
137
|
'updated_at': now,
|
|
137
138
|
'tags': params['tags'],
|
orbitkit-0.8.37/orbitkit/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.8.37
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|