orbitkit 0.8.58__tar.gz → 0.8.59__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orbitkit-0.8.58/orbitkit.egg-info → orbitkit-0.8.59}/PKG-INFO +1 -1
- orbitkit-0.8.59/orbitkit/VERSION +1 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/airflow_handler/file_flow_entry_process.py +8 -6
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/airflow_handler/file_handler_v2.py +10 -5
- {orbitkit-0.8.58 → orbitkit-0.8.59/orbitkit.egg-info}/PKG-INFO +1 -1
- orbitkit-0.8.58/orbitkit/VERSION +0 -1
- {orbitkit-0.8.58 → orbitkit-0.8.59}/LICENSE +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/MANIFEST.in +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/README.md +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/airflow_handler/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/airflow_handler/data_preprocessing.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/airflow_handler/file_flow_exit_process.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/airflow_handler/file_handler.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/audio_transcoder/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/audio_transcoder/netmind_extract_v1.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/constant/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/constant/report_schema.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/id_srv/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/id_srv/id_gen.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/id_srv/id_perm_like.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/lark_send/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/lark_send/lark.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/llm_tools/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/llm_tools/quick_rag_chat.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/orbit_type/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/orbit_type/doc_4_compile_rule.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/orbit_type/orbit_type_simple.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/orbit_type/tools.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_embedding/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_embedding/pdf_txt_embedding.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_embedding/pdf_txt_embedding_v2.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/a_stock_extractor_v1.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/exceptions.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/pdf_block_extractor_base.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/pdf_block_extractor_v1.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/pdf_block_extractor_v2.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/pdf_extractor_azure.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/pdf_extractor_minerU_v1.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/pdf_extractor_netmind_v1.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/pdf_extractor_netmind_v2.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/pdf_extractor_netmind_v3.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor/pdf_extractor_orbit.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor_simple/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor_simple/base.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor_simple/cloud_provider.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor_simple/core.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor_simple/exceptions.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor_simple/extractors.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_extractor_simple/utils.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_writer/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/pdf_writer/pdf_writer_simple.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/__init__.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/cache_asset_downloader.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/common.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/customize_regix_manager.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/secret_manager.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/universal_extractor.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_aliyun.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_aliyun_oss_simple.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_aws.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_aws_s3_wrapper.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_date.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_html.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_kafka.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_md5.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_selenium.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_simple_timer.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_str.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_type_mapping.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit/util/util_url.py +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit.egg-info/SOURCES.txt +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit.egg-info/dependency_links.txt +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit.egg-info/not-zip-safe +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit.egg-info/requires.txt +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/orbitkit.egg-info/top_level.txt +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/setup.cfg +0 -0
- {orbitkit-0.8.58 → orbitkit-0.8.59}/setup.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.8.59
|
|
@@ -151,7 +151,7 @@ class FilingOfficialProcessor:
|
|
|
151
151
|
|
|
152
152
|
return file_flow_info
|
|
153
153
|
|
|
154
|
-
def send_task(self, file_flow_info, tags, is_important, priority, spider_name_source_type):
|
|
154
|
+
def send_task(self, file_flow_info, tags, is_important, priority, spider_name_source_type, queue_type):
|
|
155
155
|
for step_str, records in file_flow_info.items():
|
|
156
156
|
steps = step_str.split('@__@')
|
|
157
157
|
start_stage = steps[0]
|
|
@@ -172,12 +172,14 @@ class FilingOfficialProcessor:
|
|
|
172
172
|
tag=x_spider_name,
|
|
173
173
|
priority=priority,
|
|
174
174
|
source_type=spider_name_source_type[
|
|
175
|
-
x_spider_name]
|
|
175
|
+
x_spider_name],
|
|
176
|
+
queue_type=queue_type)
|
|
176
177
|
else:
|
|
177
178
|
status, ids, message = self.file_handler.entry_point(records=records, start_stage=start_stage,
|
|
178
179
|
target_stage=target_stage, tags=tags,tag=x_spider_name,
|
|
179
180
|
priority=priority,
|
|
180
|
-
source_type=spider_name_source_type[x_spider_name]
|
|
181
|
+
source_type=spider_name_source_type[x_spider_name],
|
|
182
|
+
queue_type=queue_type)
|
|
181
183
|
self.all_stat_count['file_flow'] += len(records)
|
|
182
184
|
logger.info(f"{len(records)}--{start_stage}-{target_stage}-{x_spider_name} status: {status}, message: {message}")
|
|
183
185
|
|
|
@@ -213,7 +215,7 @@ class FilingOfficialProcessor:
|
|
|
213
215
|
|
|
214
216
|
async def process_task_entry(self, source: str,
|
|
215
217
|
query: dict, tags: list[str], priority: str,
|
|
216
|
-
is_important: bool = False, custom_step: Optional[list[str]] = None, important_level = None, db_name: str = None):
|
|
218
|
+
is_important: bool = False, custom_step: Optional[list[str]] = None, important_level = None, db_name: str = None, queue_type: str = 'Default'):
|
|
217
219
|
|
|
218
220
|
if not important_level or not isinstance(important_level, int):
|
|
219
221
|
important_level = 0
|
|
@@ -251,7 +253,7 @@ class FilingOfficialProcessor:
|
|
|
251
253
|
self.all_stat_count['doc_error'] += len(doc_error_list)
|
|
252
254
|
self.all_stat_count['step_error'] += len(except_id_list)
|
|
253
255
|
self.all_stat_count['xbrl'] += len(xbrl_data)
|
|
254
|
-
self.send_task(file_flow_info, tags, is_important, priority, spider_name_source_type)
|
|
256
|
+
self.send_task(file_flow_info, tags, is_important, priority, spider_name_source_type, queue_type)
|
|
255
257
|
self.send_xbrl_data_to_mongo(xbrl_data)
|
|
256
258
|
self.update_doc_status_to_convert(collection, doc_error_list)
|
|
257
259
|
process_data.clear()
|
|
@@ -264,7 +266,7 @@ class FilingOfficialProcessor:
|
|
|
264
266
|
self.all_stat_count['doc_error'] += len(doc_error_list)
|
|
265
267
|
self.all_stat_count['step_error'] += len(except_id_list)
|
|
266
268
|
self.all_stat_count['xbrl'] += len(xbrl_data)
|
|
267
|
-
self.send_task(file_flow_info, tags, is_important, priority, spider_name_source_type)
|
|
269
|
+
self.send_task(file_flow_info, tags, is_important, priority, spider_name_source_type, queue_type)
|
|
268
270
|
self.send_xbrl_data_to_mongo(xbrl_data)
|
|
269
271
|
self.update_doc_status_to_convert(collection, doc_error_list)
|
|
270
272
|
process_data.clear()
|
|
@@ -141,7 +141,8 @@ class FileFlowHandleV2:
|
|
|
141
141
|
'updated_at': now,
|
|
142
142
|
'tags': params['tags'],
|
|
143
143
|
'tag': params['tag'],
|
|
144
|
-
'important_level': record.get('important_level', 0)
|
|
144
|
+
'important_level': record.get('important_level', 0),
|
|
145
|
+
'queue': params['queue']
|
|
145
146
|
}
|
|
146
147
|
|
|
147
148
|
step = {
|
|
@@ -225,6 +226,7 @@ class FileFlowHandleV2:
|
|
|
225
226
|
tag: str = None,
|
|
226
227
|
priority: str = '1',
|
|
227
228
|
source_type: Optional[str] = None,
|
|
229
|
+
queue_type: str = 'Default'
|
|
228
230
|
) -> Tuple[bool, Any, str]:
|
|
229
231
|
"""
|
|
230
232
|
普通任务接口
|
|
@@ -252,7 +254,7 @@ class FileFlowHandleV2:
|
|
|
252
254
|
"""
|
|
253
255
|
return self._file_flow_entry_point_internal(
|
|
254
256
|
records, start_stage, target_stage, tags, priority, source_type,tag,
|
|
255
|
-
urgent=False, clean_exist_data=False
|
|
257
|
+
urgent=False, clean_exist_data=False, queue_type=queue_type
|
|
256
258
|
)
|
|
257
259
|
|
|
258
260
|
def entry_point_urgent(
|
|
@@ -264,6 +266,7 @@ class FileFlowHandleV2:
|
|
|
264
266
|
tag: str = None,
|
|
265
267
|
priority: str = '1',
|
|
266
268
|
source_type: Optional[str] = None,
|
|
269
|
+
queue_type: str = 'Default'
|
|
267
270
|
) -> Tuple[bool, Any, str]:
|
|
268
271
|
"""
|
|
269
272
|
加急任务接口
|
|
@@ -291,7 +294,7 @@ class FileFlowHandleV2:
|
|
|
291
294
|
"""
|
|
292
295
|
return self._file_flow_entry_point_internal(
|
|
293
296
|
records, start_stage, target_stage, tags, priority, source_type, tag,
|
|
294
|
-
urgent=True, clean_exist_data=True
|
|
297
|
+
urgent=True, clean_exist_data=True, queue_type=queue_type
|
|
295
298
|
)
|
|
296
299
|
|
|
297
300
|
def _file_flow_entry_point_internal(
|
|
@@ -304,7 +307,8 @@ class FileFlowHandleV2:
|
|
|
304
307
|
source_type: Optional[str],
|
|
305
308
|
tag: str,
|
|
306
309
|
urgent: bool,
|
|
307
|
-
clean_exist_data: bool = False
|
|
310
|
+
clean_exist_data: bool = False,
|
|
311
|
+
queue_type: str = 'Default'
|
|
308
312
|
) -> Tuple[bool, Any, str]:
|
|
309
313
|
"""核心处理逻辑"""
|
|
310
314
|
params = {
|
|
@@ -315,7 +319,8 @@ class FileFlowHandleV2:
|
|
|
315
319
|
'current_stage': start_stage,
|
|
316
320
|
'source_type': source_type,
|
|
317
321
|
'tag': tag,
|
|
318
|
-
'urgent': urgent
|
|
322
|
+
'urgent': urgent,
|
|
323
|
+
'queue': queue_type
|
|
319
324
|
}
|
|
320
325
|
|
|
321
326
|
is_valid, msg = self._validate_params(params)
|
orbitkit-0.8.58/orbitkit/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.8.58
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|