orbitkit 0.8.74__tar.gz → 0.8.75__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orbitkit-0.8.74/orbitkit.egg-info → orbitkit-0.8.75}/PKG-INFO +1 -1
- orbitkit-0.8.75/orbitkit/VERSION +1 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/airflow_handler/data_preprocessing.py +11 -11
- {orbitkit-0.8.74 → orbitkit-0.8.75/orbitkit.egg-info}/PKG-INFO +1 -1
- orbitkit-0.8.74/orbitkit/VERSION +0 -1
- {orbitkit-0.8.74 → orbitkit-0.8.75}/LICENSE +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/MANIFEST.in +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/README.md +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/airflow_handler/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/airflow_handler/file_flow_entry_process.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/airflow_handler/file_flow_exit_process.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/airflow_handler/file_handler.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/airflow_handler/file_handler_v2.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/audio_transcoder/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/audio_transcoder/netmind_extract_v1.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/constant/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/constant/report_schema.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/id_srv/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/id_srv/id_gen.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/id_srv/id_perm_like.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/lark_send/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/lark_send/lark.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/llm_tools/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/llm_tools/quick_rag_chat.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/orbit_type/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/orbit_type/doc_4_compile_rule.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/orbit_type/orbit_type_simple.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/orbit_type/tools.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_embedding/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_embedding/pdf_txt_embedding.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_embedding/pdf_txt_embedding_v2.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/a_stock_extractor_v1.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/exceptions.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/mineru_demo.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/pdf_block_extractor_base.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/pdf_block_extractor_v1.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/pdf_block_extractor_v2.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/pdf_extractor_azure.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/pdf_extractor_minerU_v1.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/pdf_extractor_netmind_v1.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/pdf_extractor_netmind_v2.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/pdf_extractor_netmind_v3.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor/pdf_extractor_orbit.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor_simple/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor_simple/base.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor_simple/cloud_provider.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor_simple/core.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor_simple/exceptions.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor_simple/extractors.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_extractor_simple/utils.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_writer/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/pdf_writer/pdf_writer_simple.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/__init__.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/cache_asset_downloader.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/common.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/customize_regix_manager.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/is_xbrl_structure.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/secret_manager.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/universal_extractor.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_aliyun.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_aliyun_oss_simple.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_aws.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_aws_s3_wrapper.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_date.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_html.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_kafka.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_md5.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_selenium.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_simple_timer.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_str.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_type_mapping.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit/util/util_url.py +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit.egg-info/SOURCES.txt +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit.egg-info/dependency_links.txt +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit.egg-info/not-zip-safe +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit.egg-info/requires.txt +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/orbitkit.egg-info/top_level.txt +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/setup.cfg +0 -0
- {orbitkit-0.8.74 → orbitkit-0.8.75}/setup.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.8.75
|
|
@@ -121,16 +121,16 @@ class DocumentProcessor:
|
|
|
121
121
|
allow_embedding_types = {'10002', '10085', '10076', '10122', '10311', '10178', '10075', '10090', '10050'}
|
|
122
122
|
return target_stage if any(rid in allow_embedding_types for rid in report_type_ids) else "extract"
|
|
123
123
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
124
|
+
@staticmethod
|
|
125
|
+
def update_target_stage_by_reported_at(doc, target_stage):
|
|
126
|
+
date_str = doc.get('x_reported_at_utc_date', '1970-01-01')
|
|
127
|
+
now = datetime.datetime.now()
|
|
128
|
+
one_year_ago = now - relativedelta(years=1)
|
|
129
|
+
try:
|
|
130
|
+
reported_date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
|
|
131
|
+
except ValueError:
|
|
132
|
+
reported_date = datetime.datetime(1970, 1, 1)
|
|
133
|
+
return "extract" if reported_date < one_year_ago else target_stage
|
|
134
134
|
|
|
135
135
|
@staticmethod
|
|
136
136
|
def update_target_stage_by_perm_match(doc, target_stage):
|
|
@@ -201,7 +201,7 @@ class DocumentProcessor:
|
|
|
201
201
|
# 判断 特殊条件下的数据不做embedding ('19999'类型和报告日期小于2023-01-01)
|
|
202
202
|
if target_stage == 'embedding' and not custom_process_step:
|
|
203
203
|
target_stage = cls.update_target_stage_by_report_type(doc, target_stage)
|
|
204
|
-
|
|
204
|
+
target_stage = cls.update_target_stage_by_reported_at(doc, target_stage)
|
|
205
205
|
target_stage = cls.update_target_stage_by_perm_match(doc, target_stage)
|
|
206
206
|
# 特殊情况下只需要做embedding 但是这个数据被条件限制为只做到提取时状态异常
|
|
207
207
|
if start_stage == 'embedding' and target_stage == 'extract':
|
orbitkit-0.8.74/orbitkit/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.8.74
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|