orbitkit 0.8.52__tar.gz → 0.8.53__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orbitkit-0.8.52/orbitkit.egg-info → orbitkit-0.8.53}/PKG-INFO +1 -1
- orbitkit-0.8.53/orbitkit/VERSION +1 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/universal_extractor.py +41 -13
- {orbitkit-0.8.52 → orbitkit-0.8.53/orbitkit.egg-info}/PKG-INFO +1 -1
- orbitkit-0.8.52/orbitkit/VERSION +0 -1
- {orbitkit-0.8.52 → orbitkit-0.8.53}/LICENSE +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/MANIFEST.in +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/README.md +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/airflow_handler/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/airflow_handler/data_preprocessing.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/airflow_handler/file_flow_entry_process.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/airflow_handler/file_flow_exit_process.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/airflow_handler/file_handler.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/airflow_handler/file_handler_v2.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/audio_transcoder/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/audio_transcoder/netmind_extract_v1.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/constant/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/constant/report_schema.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/id_srv/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/id_srv/id_gen.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/id_srv/id_perm_like.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/lark_send/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/lark_send/lark.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/llm_tools/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/llm_tools/quick_rag_chat.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/orbit_type/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/orbit_type/doc_4_compile_rule.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/orbit_type/orbit_type_simple.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/orbit_type/tools.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_embedding/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_embedding/pdf_txt_embedding.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_embedding/pdf_txt_embedding_v2.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/a_stock_extractor_v1.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/exceptions.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/pdf_block_extractor_base.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/pdf_block_extractor_v1.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/pdf_block_extractor_v2.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/pdf_extractor_azure.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/pdf_extractor_minerU_v1.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/pdf_extractor_netmind_v1.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/pdf_extractor_netmind_v2.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/pdf_extractor_netmind_v3.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor/pdf_extractor_orbit.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor_simple/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor_simple/base.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor_simple/cloud_provider.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor_simple/core.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor_simple/exceptions.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor_simple/extractors.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_extractor_simple/utils.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_writer/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/pdf_writer/pdf_writer_simple.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/__init__.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/cache_asset_downloader.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/common.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/customize_regix_manager.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/secret_manager.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_aliyun.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_aliyun_oss_simple.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_aws.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_aws_s3_wrapper.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_date.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_html.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_kafka.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_md5.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_selenium.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_simple_timer.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_str.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_type_mapping.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit/util/util_url.py +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit.egg-info/SOURCES.txt +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit.egg-info/dependency_links.txt +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit.egg-info/not-zip-safe +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit.egg-info/requires.txt +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/orbitkit.egg-info/top_level.txt +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/setup.cfg +0 -0
- {orbitkit-0.8.52 → orbitkit-0.8.53}/setup.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.8.53
|
|
@@ -8,21 +8,47 @@ from typing import Optional, List, Union, Set
|
|
|
8
8
|
import logging
|
|
9
9
|
import platform
|
|
10
10
|
|
|
11
|
-
try:
|
|
12
|
-
import rarfile
|
|
13
|
-
import py7zr
|
|
14
|
-
except ImportError:
|
|
15
|
-
raise ValueError(
|
|
16
|
-
"Please install below packages before using this function.\n"
|
|
17
|
-
"- rarfile\n"
|
|
18
|
-
"- py7zr\n"
|
|
19
|
-
)
|
|
20
|
-
|
|
21
11
|
logger = logging.getLogger(__name__)
|
|
22
12
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
13
|
+
# Lazy import flags
|
|
14
|
+
_rarfile = None
|
|
15
|
+
_py7zr = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _ensure_rarfile():
|
|
19
|
+
"""Lazy import rarfile module"""
|
|
20
|
+
global _rarfile
|
|
21
|
+
if _rarfile is None:
|
|
22
|
+
try:
|
|
23
|
+
import rarfile
|
|
24
|
+
_rarfile = rarfile
|
|
25
|
+
# Windows specific warning
|
|
26
|
+
if platform.system() == 'Windows':
|
|
27
|
+
logger.warning(
|
|
28
|
+
"Windows system requires additional UnRAR tool to extract RAR files. "
|
|
29
|
+
"Please download and install UnRAR from https://www.rarlab.com/rar_add.htm"
|
|
30
|
+
)
|
|
31
|
+
except ImportError:
|
|
32
|
+
raise ImportError(
|
|
33
|
+
"Package 'rarfile' is required for RAR file extraction.\n"
|
|
34
|
+
"Please install it: pip install rarfile"
|
|
35
|
+
)
|
|
36
|
+
return _rarfile
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _ensure_py7zr():
|
|
40
|
+
"""Lazy import py7zr module"""
|
|
41
|
+
global _py7zr
|
|
42
|
+
if _py7zr is None:
|
|
43
|
+
try:
|
|
44
|
+
import py7zr
|
|
45
|
+
_py7zr = py7zr
|
|
46
|
+
except ImportError:
|
|
47
|
+
raise ImportError(
|
|
48
|
+
"Package 'py7zr' is required for 7z file extraction.\n"
|
|
49
|
+
"Please install it: pip install py7zr"
|
|
50
|
+
)
|
|
51
|
+
return _py7zr
|
|
26
52
|
|
|
27
53
|
|
|
28
54
|
class UniversalExtractor:
|
|
@@ -161,6 +187,7 @@ class UniversalExtractor:
|
|
|
161
187
|
|
|
162
188
|
def _extract_rar(self, archive_path: str, output_path: str, password: Optional[str] = None) -> List[str]:
|
|
163
189
|
"""解压RAR文件"""
|
|
190
|
+
rarfile = _ensure_rarfile()
|
|
164
191
|
extracted_files = []
|
|
165
192
|
|
|
166
193
|
try:
|
|
@@ -195,6 +222,7 @@ class UniversalExtractor:
|
|
|
195
222
|
|
|
196
223
|
def _extract_7z(self, archive_path: str, output_path: str, password: Optional[str] = None) -> List[str]:
|
|
197
224
|
"""解压7Z文件"""
|
|
225
|
+
py7zr = _ensure_py7zr()
|
|
198
226
|
extracted_files = []
|
|
199
227
|
|
|
200
228
|
try:
|
orbitkit-0.8.52/orbitkit/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.8.52
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|