orbitkit 0.8.45__tar.gz → 0.8.46__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orbitkit-0.8.45/orbitkit.egg-info → orbitkit-0.8.46}/PKG-INFO +1 -1
- orbitkit-0.8.46/orbitkit/VERSION +1 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/data_preprocessing.py +4 -4
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/file_flow_entry_process.py +8 -2
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/file_handler_v2.py +2 -1
- {orbitkit-0.8.45 → orbitkit-0.8.46/orbitkit.egg-info}/PKG-INFO +1 -1
- orbitkit-0.8.45/orbitkit/VERSION +0 -1
- {orbitkit-0.8.45 → orbitkit-0.8.46}/LICENSE +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/MANIFEST.in +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/README.md +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/file_flow_exit_process.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/file_handler.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/audio_transcoder/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/audio_transcoder/netmind_extract_v1.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/constant/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/constant/report_schema.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/id_srv/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/id_srv/id_gen.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/id_srv/id_perm_like.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/lark_send/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/lark_send/lark.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/llm_tools/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/llm_tools/quick_rag_chat.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/orbit_type/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/orbit_type/doc_4_compile_rule.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/orbit_type/orbit_type_simple.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/orbit_type/tools.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_embedding/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_embedding/pdf_txt_embedding.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_embedding/pdf_txt_embedding_v2.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/a_stock_extractor_v1.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/exceptions.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_block_extractor_base.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_block_extractor_v1.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_block_extractor_v2.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_azure.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_minerU_v1.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_netmind_v1.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_netmind_v2.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_netmind_v3.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_orbit.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/base.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/cloud_provider.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/core.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/exceptions.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/extractors.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/utils.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_writer/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_writer/pdf_writer_simple.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/__init__.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/cache_asset_downloader.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/common.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/customize_regix_manager.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/secret_manager.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_aliyun.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_aliyun_oss_simple.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_aws.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_aws_s3_wrapper.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_date.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_html.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_kafka.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_md5.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_selenium.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_simple_timer.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_str.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_type_mapping.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_url.py +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit.egg-info/SOURCES.txt +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit.egg-info/dependency_links.txt +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit.egg-info/not-zip-safe +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit.egg-info/requires.txt +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit.egg-info/top_level.txt +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/setup.cfg +0 -0
- {orbitkit-0.8.45 → orbitkit-0.8.46}/setup.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.8.46
|
|
@@ -126,7 +126,7 @@ class DocumentProcessor:
|
|
|
126
126
|
return "extract" if reported_date < datetime.datetime(2020, 1, 1) else target_stage
|
|
127
127
|
|
|
128
128
|
@classmethod
|
|
129
|
-
async def create_record(cls, doc, start_stage):
|
|
129
|
+
async def create_record(cls, doc, start_stage, important_level):
|
|
130
130
|
attachments = doc.get('x_attachments', []) if start_stage == 'convert' else doc.get('x_attachments_pdf', [])
|
|
131
131
|
s3_path_info = []
|
|
132
132
|
add_extends = {}
|
|
@@ -151,7 +151,7 @@ class DocumentProcessor:
|
|
|
151
151
|
'store_path'],
|
|
152
152
|
'file_name': att['file_name']
|
|
153
153
|
})
|
|
154
|
-
result_dict = {'id': doc['_id'], 's3_path_info': s3_path_info}
|
|
154
|
+
result_dict = {'id': doc['_id'], 's3_path_info': s3_path_info, 'important_level': important_level}
|
|
155
155
|
if add_extends:
|
|
156
156
|
result_dict['extends'] = add_extends
|
|
157
157
|
return True, result_dict
|
|
@@ -165,7 +165,7 @@ class DocumentProcessor:
|
|
|
165
165
|
}
|
|
166
166
|
|
|
167
167
|
@classmethod
|
|
168
|
-
async def process(cls, doc, custom_process_step):
|
|
168
|
+
async def process(cls, doc, custom_process_step, important_level):
|
|
169
169
|
report_id = doc['_id']
|
|
170
170
|
# 筛选文件
|
|
171
171
|
doc = cls.stock_us_filter_by_is_primary(doc)
|
|
@@ -195,7 +195,7 @@ class DocumentProcessor:
|
|
|
195
195
|
"Invalid process sequence: 'start_stage' occurs before 'target_stage'.",
|
|
196
196
|
doc['_id'])
|
|
197
197
|
|
|
198
|
-
file_name_check_status, record = await cls.create_record(doc, start_stage)
|
|
198
|
+
file_name_check_status, record = await cls.create_record(doc, start_stage, important_level)
|
|
199
199
|
if not file_name_check_status:
|
|
200
200
|
return cls.create_result_info("step_error", "Document file name too lang.", report_id)
|
|
201
201
|
|
|
@@ -177,7 +177,13 @@ class FilingOfficialProcessor:
|
|
|
177
177
|
|
|
178
178
|
async def process_task_entry(self, source: Literal["filing_data", "reports_view", "G7_demo"],
|
|
179
179
|
query: dict, tags: list[str], priority: str,
|
|
180
|
-
is_important: bool = False, custom_step: Optional[list[str]] = None):
|
|
180
|
+
is_important: bool = False, custom_step: Optional[list[str]] = None, important_level = None):
|
|
181
|
+
|
|
182
|
+
if not important_level or not isinstance(important_level, int):
|
|
183
|
+
important_level = 0
|
|
184
|
+
|
|
185
|
+
if important_level == 0:
|
|
186
|
+
raise ValueError(f'important_level must be an integer (int) greater than 0. {important_level}')
|
|
181
187
|
|
|
182
188
|
allowed_steps = {"convert", "extract", "embedding"}
|
|
183
189
|
if custom_step is not None:
|
|
@@ -206,7 +212,7 @@ class FilingOfficialProcessor:
|
|
|
206
212
|
self.all_stat_count['all'] += 1
|
|
207
213
|
for orbit_entity_id in doc['x_orbit_data']['perm_id_list']:
|
|
208
214
|
perm_id_set.add(orbit_entity_id)
|
|
209
|
-
result_record = await self.data_processor.process(doc=doc, custom_process_step=custom_step)
|
|
215
|
+
result_record = await self.data_processor.process(doc=doc, custom_process_step=custom_step, important_level=important_level)
|
|
210
216
|
process_data.append(result_record)
|
|
211
217
|
if len(process_data) >= self.max_batch_size:
|
|
212
218
|
file_flow_info, xbrl_data, except_id_list, doc_error_list = self.data_processor.split_data_by_spider_name_and_step(
|
orbitkit-0.8.45/orbitkit/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.8.45
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|