deepdoc-lib 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepdoc/README.md +122 -0
- deepdoc/README_zh.md +116 -0
- deepdoc/__init__.py +43 -0
- deepdoc/_version.py +34 -0
- deepdoc/common/__init__.py +52 -0
- deepdoc/common/config_utils.py +63 -0
- deepdoc/common/connection_utils.py +73 -0
- deepdoc/common/file_utils.py +19 -0
- deepdoc/common/misc_utils.py +44 -0
- deepdoc/common/model_store.py +369 -0
- deepdoc/common/settings.py +42 -0
- deepdoc/common/tiktoken_cache.py +84 -0
- deepdoc/common/token_utils.py +96 -0
- deepdoc/config.py +149 -0
- deepdoc/depend/find_codec.py +42 -0
- deepdoc/depend/nltk_manager.py +114 -0
- deepdoc/depend/prompts/vision_llm_describe_prompt.md +23 -0
- deepdoc/depend/prompts/vision_llm_figure_describe_prompt.md +24 -0
- deepdoc/depend/prompts.py +35 -0
- deepdoc/depend/rag_tokenizer.py +578 -0
- deepdoc/depend/simple_cv_model.py +469 -0
- deepdoc/depend/surname.py +91 -0
- deepdoc/depend/timeout.py +73 -0
- deepdoc/depend/vision_llm_chunk.py +35 -0
- deepdoc/dict/README.md +19 -0
- deepdoc/dict/huqie.txt +555629 -0
- deepdoc/download_models.py +169 -0
- deepdoc/llm_adapter/__init__.py +15 -0
- deepdoc/llm_adapter/adapter.py +223 -0
- deepdoc/llm_adapter/utils.py +104 -0
- deepdoc/llm_adapter/vision.py +163 -0
- deepdoc/parser/__init__.py +42 -0
- deepdoc/parser/docling_parser.py +889 -0
- deepdoc/parser/docx_parser.py +150 -0
- deepdoc/parser/excel_parser.py +270 -0
- deepdoc/parser/figure_parser.py +182 -0
- deepdoc/parser/html_parser.py +221 -0
- deepdoc/parser/json_parser.py +179 -0
- deepdoc/parser/markdown_parser.py +321 -0
- deepdoc/parser/mineru_parser.py +646 -0
- deepdoc/parser/pdf_parser.py +1591 -0
- deepdoc/parser/ppt_parser.py +96 -0
- deepdoc/parser/resume/__init__.py +109 -0
- deepdoc/parser/resume/entities/__init__.py +15 -0
- deepdoc/parser/resume/entities/corporations.py +128 -0
- deepdoc/parser/resume/entities/degrees.py +44 -0
- deepdoc/parser/resume/entities/industries.py +712 -0
- deepdoc/parser/resume/entities/regions.py +789 -0
- deepdoc/parser/resume/entities/res/corp.tks.freq.json +65 -0
- deepdoc/parser/resume/entities/res/corp_baike_len.csv +31480 -0
- deepdoc/parser/resume/entities/res/corp_tag.json +14939 -0
- deepdoc/parser/resume/entities/res/good_corp.json +911 -0
- deepdoc/parser/resume/entities/res/good_sch.json +595 -0
- deepdoc/parser/resume/entities/res/school.rank.csv +1627 -0
- deepdoc/parser/resume/entities/res/schools.csv +5713 -0
- deepdoc/parser/resume/entities/schools.py +91 -0
- deepdoc/parser/resume/step_one.py +189 -0
- deepdoc/parser/resume/step_two.py +692 -0
- deepdoc/parser/tcadp_parser.py +538 -0
- deepdoc/parser/txt_parser.py +64 -0
- deepdoc/parser/utils.py +33 -0
- deepdoc/vision/__init__.py +90 -0
- deepdoc/vision/layout_recognizer.py +481 -0
- deepdoc/vision/ocr.py +757 -0
- deepdoc/vision/operators.py +733 -0
- deepdoc/vision/postprocess.py +370 -0
- deepdoc/vision/recognizer.py +451 -0
- deepdoc/vision/seeit.py +87 -0
- deepdoc/vision/t_ocr.py +101 -0
- deepdoc/vision/t_recognizer.py +186 -0
- deepdoc/vision/table_structure_recognizer.py +617 -0
- deepdoc_lib-0.2.0.dist-info/METADATA +246 -0
- deepdoc_lib-0.2.0.dist-info/RECORD +78 -0
- deepdoc_lib-0.2.0.dist-info/WHEEL +5 -0
- deepdoc_lib-0.2.0.dist-info/entry_points.txt +2 -0
- deepdoc_lib-0.2.0.dist-info/licenses/LICENSE +201 -0
- deepdoc_lib-0.2.0.dist-info/top_level.txt +2 -0
- scripts/download_models.py +10 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import logging
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _parse_args(argv: list[str]) -> argparse.Namespace:
|
|
10
|
+
from deepdoc.common import model_store
|
|
11
|
+
|
|
12
|
+
parser = argparse.ArgumentParser(
|
|
13
|
+
prog="deepdoc-download-models",
|
|
14
|
+
description="Download/cache all Deepdoc model bundles, NLTK data, and tiktoken assets for offline use.",
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Default behavior: no args downloads everything using the remote provider into the default cache dirs
|
|
18
|
+
# (~/.cache/deepdoc unless DEEPDOC_MODEL_HOME is set).
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--provider",
|
|
21
|
+
default="modelscope",
|
|
22
|
+
choices=("auto", "local", "modelscope"),
|
|
23
|
+
help="Model provider to use (default: %(default)s).",
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--model-home",
|
|
27
|
+
default=None,
|
|
28
|
+
help="Model cache root (default: $DEEPDOC_MODEL_HOME or ~/.cache/deepdoc).",
|
|
29
|
+
)
|
|
30
|
+
parser.add_argument(
|
|
31
|
+
"--offline",
|
|
32
|
+
action="store_true",
|
|
33
|
+
help="Disable remote downloads (also disables NLTK auto-download).",
|
|
34
|
+
)
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
"--bundle",
|
|
37
|
+
action="append",
|
|
38
|
+
choices=tuple(model_store.BUNDLES.keys()),
|
|
39
|
+
help="Bundle(s) to download. Repeatable. Default: all bundles.",
|
|
40
|
+
)
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"--no-nltk",
|
|
43
|
+
action="store_true",
|
|
44
|
+
help="Skip downloading required NLTK resources used by the tokenizer.",
|
|
45
|
+
)
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--nltk-data-dir",
|
|
48
|
+
default=None,
|
|
49
|
+
help="Where to store NLTK data (default: $DEEPDOC_NLTK_DATA_DIR, $NLTK_DATA, or ~/.cache/deepdoc/nltk_data).",
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--no-tiktoken",
|
|
53
|
+
action="store_true",
|
|
54
|
+
help="Skip downloading the cached cl100k_base tiktoken file used by token_utils.",
|
|
55
|
+
)
|
|
56
|
+
parser.add_argument(
|
|
57
|
+
"--tiktoken-cache-dir",
|
|
58
|
+
default=None,
|
|
59
|
+
help="Where to store the cached tiktoken file (default: $DEEPDOC_TIKTOKEN_CACHE_DIR, $TIKTOKEN_CACHE_DIR, $DEEPDOC_MODEL_HOME/tiktoken_cache, or ~/.cache/deepdoc/tiktoken_cache).",
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"-v",
|
|
63
|
+
"--verbose",
|
|
64
|
+
action="count",
|
|
65
|
+
default=0,
|
|
66
|
+
help="Increase logging verbosity (repeatable).",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
args = parser.parse_args(argv)
|
|
70
|
+
if not args.bundle:
|
|
71
|
+
args.bundle = list(model_store.BUNDLES.keys())
|
|
72
|
+
return args
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def download_all(*, provider: str = "modelscope", model_home: str | None = None, offline: bool = False) -> dict[str, str]:
|
|
76
|
+
"""Download/cache all bundles into the configured cache directories."""
|
|
77
|
+
from deepdoc.common import model_store
|
|
78
|
+
|
|
79
|
+
resolved: dict[str, str] = {}
|
|
80
|
+
for bundle in model_store.BUNDLES:
|
|
81
|
+
resolved[bundle] = model_store.resolve_bundle_dir(
|
|
82
|
+
bundle,
|
|
83
|
+
model_home=model_home,
|
|
84
|
+
provider=provider,
|
|
85
|
+
offline=offline,
|
|
86
|
+
)
|
|
87
|
+
return resolved
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def main(argv: list[str] | None = None) -> int:
|
|
91
|
+
argv = list(sys.argv[1:] if argv is None else argv)
|
|
92
|
+
args = _parse_args(argv)
|
|
93
|
+
|
|
94
|
+
log_level = logging.WARNING
|
|
95
|
+
if args.verbose == 1:
|
|
96
|
+
log_level = logging.INFO
|
|
97
|
+
elif args.verbose >= 2:
|
|
98
|
+
log_level = logging.DEBUG
|
|
99
|
+
logging.basicConfig(level=log_level, format="%(levelname)s %(message)s")
|
|
100
|
+
|
|
101
|
+
model_home: str | None
|
|
102
|
+
if args.model_home:
|
|
103
|
+
model_home = str(Path(args.model_home).expanduser().resolve())
|
|
104
|
+
else:
|
|
105
|
+
model_home = None
|
|
106
|
+
|
|
107
|
+
from deepdoc.common import model_store
|
|
108
|
+
|
|
109
|
+
failures: list[str] = []
|
|
110
|
+
resolved: dict[str, str] = {}
|
|
111
|
+
tiktoken_cache_path: str | None = None
|
|
112
|
+
|
|
113
|
+
for bundle in args.bundle:
|
|
114
|
+
try:
|
|
115
|
+
resolved_dir = model_store.resolve_bundle_dir(
|
|
116
|
+
bundle,
|
|
117
|
+
model_home=model_home,
|
|
118
|
+
provider=args.provider,
|
|
119
|
+
offline=args.offline,
|
|
120
|
+
)
|
|
121
|
+
resolved[bundle] = resolved_dir
|
|
122
|
+
except Exception as exc:
|
|
123
|
+
failures.append(f"{bundle}: {exc}")
|
|
124
|
+
|
|
125
|
+
if not args.no_nltk:
|
|
126
|
+
try:
|
|
127
|
+
from deepdoc.depend.nltk_manager import ensure_nltk_data
|
|
128
|
+
|
|
129
|
+
ensure_nltk_data(
|
|
130
|
+
data_dir=args.nltk_data_dir,
|
|
131
|
+
offline=args.offline,
|
|
132
|
+
)
|
|
133
|
+
except Exception as exc:
|
|
134
|
+
failures.append(f"nltk: {exc}")
|
|
135
|
+
|
|
136
|
+
if not args.no_tiktoken:
|
|
137
|
+
try:
|
|
138
|
+
from deepdoc.common.tiktoken_cache import configure_tiktoken_cache_env, download_cl100k_base
|
|
139
|
+
|
|
140
|
+
tiktoken_cache_path = str(
|
|
141
|
+
download_cl100k_base(
|
|
142
|
+
cache_dir=args.tiktoken_cache_dir,
|
|
143
|
+
model_home=model_home,
|
|
144
|
+
offline=args.offline,
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
configure_tiktoken_cache_env(
|
|
148
|
+
cache_dir=args.tiktoken_cache_dir,
|
|
149
|
+
model_home=model_home,
|
|
150
|
+
)
|
|
151
|
+
except Exception as exc:
|
|
152
|
+
failures.append(f"tiktoken: {exc}")
|
|
153
|
+
|
|
154
|
+
for bundle_name in sorted(resolved):
|
|
155
|
+
print(f"{bundle_name}\t{resolved[bundle_name]}")
|
|
156
|
+
|
|
157
|
+
if tiktoken_cache_path:
|
|
158
|
+
print(f"tiktoken\t{tiktoken_cache_path}")
|
|
159
|
+
|
|
160
|
+
if failures:
|
|
161
|
+
for item in failures:
|
|
162
|
+
print(f"ERROR\t{item}", file=sys.stderr)
|
|
163
|
+
return 1
|
|
164
|
+
|
|
165
|
+
return 0
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
if __name__ == "__main__":
|
|
169
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Adapter Layer for DeepDoc
|
|
3
|
+
|
|
4
|
+
This module provides a thin adapter layer that handles LLM-related dependencies,
|
|
5
|
+
allowing DeepDoc to work in both FenixAOS environments and standalone configurations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .adapter import LLMAdapter, LLMType
|
|
9
|
+
from .vision import vision_llm_chunk
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"LLMAdapter",
|
|
13
|
+
"LLMType",
|
|
14
|
+
"vision_llm_chunk",
|
|
15
|
+
]
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Adapter - Thin wrapper for LLM services
|
|
3
|
+
|
|
4
|
+
This module provides a unified interface for LLM services that works in both
|
|
5
|
+
FenixAOS environments and standalone configurations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Any, Optional, Union
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# Try to import from FenixAOS first, fallback to local implementations
|
|
15
|
+
try:
|
|
16
|
+
from fenixaos.core.model import LLMType as FenixLLMType
|
|
17
|
+
from fenixaos.core.model.chat.basic.adapter import create_base_llm
|
|
18
|
+
from fenixaos.core.model.model import ChatModelConfig, ImageModelConfig
|
|
19
|
+
FENIXAOS_AVAILABLE = True
|
|
20
|
+
logger.info("Using FenixAOS LLM services")
|
|
21
|
+
except ImportError:
|
|
22
|
+
FENIXAOS_AVAILABLE = False
|
|
23
|
+
logger.info("FenixAOS not available, using local LLM implementations")
|
|
24
|
+
|
|
25
|
+
# Local LLM implementations
|
|
26
|
+
try:
|
|
27
|
+
from ..depend.simple_cv_model import create_vision_model
|
|
28
|
+
LOCAL_VISION_AVAILABLE = True
|
|
29
|
+
except ImportError:
|
|
30
|
+
LOCAL_VISION_AVAILABLE = False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class LLMType(str, Enum):
|
|
34
|
+
"""Unified LLM Type enumeration"""
|
|
35
|
+
CHAT = "chat"
|
|
36
|
+
EMBEDDING = "embedding"
|
|
37
|
+
IMAGE2TEXT = "image2text"
|
|
38
|
+
SPEECH2TEXT = "speech2text"
|
|
39
|
+
RERANK = "rerank"
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def from_fenix(cls, fenix_type: Any) -> 'LLMType':
|
|
43
|
+
"""Convert FenixAOS LLMType to unified type"""
|
|
44
|
+
if FENIXAOS_AVAILABLE:
|
|
45
|
+
# Map FenixAOS types to unified types
|
|
46
|
+
type_mapping = {
|
|
47
|
+
'chat': cls.CHAT,
|
|
48
|
+
'embedding': cls.EMBEDDING,
|
|
49
|
+
'image2text': cls.IMAGE2TEXT,
|
|
50
|
+
'speech2text': cls.SPEECH2TEXT,
|
|
51
|
+
'rerank': cls.RERANK,
|
|
52
|
+
}
|
|
53
|
+
return type_mapping.get(str(fenix_type).lower(), cls.CHAT)
|
|
54
|
+
return cls.CHAT
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class LLMServiceInterface:
|
|
58
|
+
"""Unified interface for LLM services"""
|
|
59
|
+
|
|
60
|
+
def describe_with_prompt(self, image: Union[bytes, Any], prompt: Optional[str] = None) -> str:
|
|
61
|
+
"""Describe an image with optional prompt"""
|
|
62
|
+
raise NotImplementedError
|
|
63
|
+
|
|
64
|
+
def encode(self, texts: list) -> tuple:
|
|
65
|
+
"""Encode texts to embeddings"""
|
|
66
|
+
raise NotImplementedError
|
|
67
|
+
|
|
68
|
+
def chat(self, messages: list, **kwargs) -> str:
|
|
69
|
+
"""Chat completion"""
|
|
70
|
+
raise NotImplementedError
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class FenixAOSLLMService(LLMServiceInterface):
|
|
74
|
+
"""LLM Service adapter for FenixAOS environment"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, tenant_id: Optional[str], llm_type: LLMType, llm_name: Optional[str] = None, **kwargs):
|
|
77
|
+
self.tenant_id = tenant_id
|
|
78
|
+
self.llm_type = llm_type
|
|
79
|
+
self.llm_name = llm_name
|
|
80
|
+
self._api_key = kwargs.get('api_key')
|
|
81
|
+
self._base_url = kwargs.get('base_url')
|
|
82
|
+
|
|
83
|
+
# Try to create vision service from FenixAOS
|
|
84
|
+
try:
|
|
85
|
+
self._service = self._create_fenix_service()
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.warning(f"Failed to create FenixAOS LLM service: {e}")
|
|
88
|
+
raise
|
|
89
|
+
|
|
90
|
+
def _create_fenix_service(self):
|
|
91
|
+
"""Create service using FenixAOS APIs"""
|
|
92
|
+
# Try to get vision model from FenixAOS
|
|
93
|
+
try:
|
|
94
|
+
# Import FenixAOS components
|
|
95
|
+
from fenixaos.core.model.image.adapter import ImageModelAdapter
|
|
96
|
+
from fenixaos.core.model.model import ImageModelConfig
|
|
97
|
+
|
|
98
|
+
# Create vision model config
|
|
99
|
+
config = ImageModelConfig(
|
|
100
|
+
id=f"deepdoc_vision_{self.llm_name or 'default'}",
|
|
101
|
+
model_name=self.llm_name or "gpt-4-vision-preview",
|
|
102
|
+
model_provider="openai", # Default to OpenAI
|
|
103
|
+
api_key=getattr(self, '_api_key', None),
|
|
104
|
+
base_url=getattr(self, '_base_url', None),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Create and return adapter
|
|
108
|
+
return ImageModelAdapter(config)
|
|
109
|
+
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logger.error(f"Failed to create FenixAOS vision service: {e}")
|
|
112
|
+
raise
|
|
113
|
+
|
|
114
|
+
def describe_with_prompt(self, image: Union[bytes, Any], prompt: Optional[str] = None) -> str:
|
|
115
|
+
return self._service.describe_with_prompt(image, prompt)
|
|
116
|
+
|
|
117
|
+
def encode(self, texts: list) -> tuple:
|
|
118
|
+
return self._service.encode(texts)
|
|
119
|
+
|
|
120
|
+
def chat(self, messages: list, **kwargs) -> str:
|
|
121
|
+
return self._service.chat(messages, **kwargs)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class LocalLLMService(LLMServiceInterface):
|
|
125
|
+
"""LLM Service using local implementations"""
|
|
126
|
+
|
|
127
|
+
def __init__(self, config: Optional[dict] = None, **kwargs):
|
|
128
|
+
self.config = config or {}
|
|
129
|
+
self._vision_model = None
|
|
130
|
+
|
|
131
|
+
if LOCAL_VISION_AVAILABLE:
|
|
132
|
+
try:
|
|
133
|
+
self._vision_model = create_vision_model()
|
|
134
|
+
logger.info("Local vision model initialized")
|
|
135
|
+
except Exception as e:
|
|
136
|
+
logger.warning(f"Failed to initialize local vision model: {e}")
|
|
137
|
+
|
|
138
|
+
def describe_with_prompt(self, image: Union[bytes, Any], prompt: Optional[str] = None) -> str:
|
|
139
|
+
if self._vision_model:
|
|
140
|
+
return self._vision_model.describe_with_prompt(image, prompt)
|
|
141
|
+
else:
|
|
142
|
+
logger.warning("No vision model available")
|
|
143
|
+
return "Vision model not available"
|
|
144
|
+
|
|
145
|
+
def encode(self, texts: list) -> tuple:
|
|
146
|
+
# Placeholder for embedding functionality
|
|
147
|
+
logger.warning("Local embedding not implemented")
|
|
148
|
+
return [], 0
|
|
149
|
+
|
|
150
|
+
def chat(self, messages: list, **kwargs) -> str:
|
|
151
|
+
# Placeholder for chat functionality
|
|
152
|
+
logger.warning("Local chat not implemented")
|
|
153
|
+
return "Local chat not available"
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class LLMAdapter:
|
|
157
|
+
"""Main adapter class that provides unified LLM service access"""
|
|
158
|
+
|
|
159
|
+
def __init__(self, tenant_id: Optional[str] = None, llm_type: LLMType = LLMType.IMAGE2TEXT,
|
|
160
|
+
llm_name: Optional[str] = None, **kwargs):
|
|
161
|
+
self.tenant_id = tenant_id
|
|
162
|
+
self.llm_type = llm_type
|
|
163
|
+
self.llm_name = llm_name
|
|
164
|
+
self.kwargs = kwargs
|
|
165
|
+
|
|
166
|
+
# Try FenixAOS first, then fallback to local
|
|
167
|
+
self._service = self._create_service()
|
|
168
|
+
|
|
169
|
+
def _create_service(self) -> LLMServiceInterface:
|
|
170
|
+
"""Create appropriate LLM service based on environment"""
|
|
171
|
+
# Always try FenixAOS first if available, regardless of tenant_id
|
|
172
|
+
if FENIXAOS_AVAILABLE:
|
|
173
|
+
try:
|
|
174
|
+
return FenixAOSLLMService(self.tenant_id, self.llm_type, self.llm_name, **self.kwargs)
|
|
175
|
+
except Exception as e:
|
|
176
|
+
logger.warning(f"FenixAOS LLM service creation failed: {e}, falling back to local")
|
|
177
|
+
|
|
178
|
+
# Fallback to local implementation
|
|
179
|
+
return LocalLLMService(**self.kwargs)
|
|
180
|
+
|
|
181
|
+
def describe_with_prompt(self, image: Union[bytes, Any], prompt: Optional[str] = None) -> str:
|
|
182
|
+
"""Describe an image with optional prompt"""
|
|
183
|
+
return self._service.describe_with_prompt(image, prompt)
|
|
184
|
+
|
|
185
|
+
def encode(self, texts: list) -> tuple:
|
|
186
|
+
"""Encode texts to embeddings"""
|
|
187
|
+
return self._service.encode(texts)
|
|
188
|
+
|
|
189
|
+
def chat(self, messages: list, **kwargs) -> str:
|
|
190
|
+
"""Chat completion"""
|
|
191
|
+
return self._service.chat(messages, **kwargs)
|
|
192
|
+
|
|
193
|
+
# Compatibility methods for LLMBundle-like interface
|
|
194
|
+
def bind_tools(self, toolcall_session, tools):
|
|
195
|
+
"""Bind tools (placeholder for compatibility)"""
|
|
196
|
+
logger.debug("Tool binding not implemented in adapter")
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def is_tools(self) -> bool:
|
|
200
|
+
"""Check if tools are supported"""
|
|
201
|
+
return False
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def max_length(self) -> int:
|
|
205
|
+
"""Maximum context length"""
|
|
206
|
+
return 4096 # Default value
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def create_llm_service(tenant_id: Optional[str] = None, llm_type: LLMType = LLMType.IMAGE2TEXT,
|
|
210
|
+
llm_name: Optional[str] = None, **kwargs) -> LLMAdapter:
|
|
211
|
+
"""
|
|
212
|
+
Factory function to create LLM service
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
tenant_id: Tenant ID (required for FenixAOS)
|
|
216
|
+
llm_type: Type of LLM service
|
|
217
|
+
llm_name: Specific model name
|
|
218
|
+
**kwargs: Additional configuration
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
LLMAdapter instance
|
|
222
|
+
"""
|
|
223
|
+
return LLMAdapter(tenant_id, llm_type, llm_name, **kwargs)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for LLM adapter
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def clean_markdown_block(text: str) -> str:
|
|
9
|
+
"""
|
|
10
|
+
Clean markdown block formatting from text.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
text: Text that may contain markdown code blocks
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
str: Cleaned text without markdown formatting
|
|
17
|
+
"""
|
|
18
|
+
if not text:
|
|
19
|
+
return ""
|
|
20
|
+
|
|
21
|
+
# Remove markdown code block markers
|
|
22
|
+
text = re.sub(r'^\s*```markdown\s*\n?', '', text)
|
|
23
|
+
text = re.sub(r'\n?\s*```\s*$', '', text)
|
|
24
|
+
|
|
25
|
+
return text.strip()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def extract_image_description(text: str) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Extract the main description from vision model output.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
text: Raw output from vision model
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
str: Cleaned description
|
|
37
|
+
"""
|
|
38
|
+
# Clean markdown formatting
|
|
39
|
+
text = clean_markdown_block(text)
|
|
40
|
+
|
|
41
|
+
# Remove common prefixes that vision models might add
|
|
42
|
+
prefixes_to_remove = [
|
|
43
|
+
"The image shows",
|
|
44
|
+
"This image depicts",
|
|
45
|
+
"The picture shows",
|
|
46
|
+
"This is an image of",
|
|
47
|
+
"The photo shows",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
for prefix in prefixes_to_remove:
|
|
51
|
+
if text.lower().startswith(prefix.lower()):
|
|
52
|
+
# Only remove if it's followed by actual content
|
|
53
|
+
remaining = text[len(prefix):].strip()
|
|
54
|
+
if remaining and not remaining.startswith(("a", "an", "the")):
|
|
55
|
+
continue
|
|
56
|
+
text = remaining
|
|
57
|
+
break
|
|
58
|
+
|
|
59
|
+
return text.strip()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def validate_image_data(image_data: bytes, max_size: int = 10 * 1024 * 1024) -> bool:
|
|
63
|
+
"""
|
|
64
|
+
Validate image data.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
image_data: Raw image bytes
|
|
68
|
+
max_size: Maximum allowed size in bytes
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
bool: True if valid
|
|
72
|
+
"""
|
|
73
|
+
if not image_data:
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
if len(image_data) > max_size:
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
# Check for common image signatures
|
|
80
|
+
if len(image_data) < 8:
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
# JPEG signature
|
|
84
|
+
if image_data.startswith(b'\xff\xd8\xff'):
|
|
85
|
+
return True
|
|
86
|
+
|
|
87
|
+
# PNG signature
|
|
88
|
+
if image_data.startswith(b'\x89PNG\r\n\x1a\n'):
|
|
89
|
+
return True
|
|
90
|
+
|
|
91
|
+
# GIF signature
|
|
92
|
+
if image_data.startswith(b'GIF87a') or image_data.startswith(b'GIF89a'):
|
|
93
|
+
return True
|
|
94
|
+
|
|
95
|
+
# BMP signature
|
|
96
|
+
if image_data.startswith(b'BM'):
|
|
97
|
+
return True
|
|
98
|
+
|
|
99
|
+
# WebP signature
|
|
100
|
+
if image_data.startswith(b'RIFF') and len(image_data) >= 12:
|
|
101
|
+
if image_data[8:12] == b'WEBP':
|
|
102
|
+
return True
|
|
103
|
+
|
|
104
|
+
return False
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vision LLM utilities for DeepDoc
|
|
3
|
+
|
|
4
|
+
Enhanced vision_llm_chunk with better error handling and format support.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import io
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Callable, Optional, Union
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
# Try to import markdown cleaning function
|
|
14
|
+
try:
|
|
15
|
+
from .utils import clean_markdown_block
|
|
16
|
+
except ImportError:
|
|
17
|
+
def clean_markdown_block(text: str) -> str:
|
|
18
|
+
"""Fallback markdown cleaning function"""
|
|
19
|
+
import re
|
|
20
|
+
text = re.sub(r'^\s*```markdown\s*\n?', '', text)
|
|
21
|
+
text = re.sub(r'\n?\s*```\s*$', '', text)
|
|
22
|
+
return text.strip()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def vision_llm_chunk(
|
|
26
|
+
binary: Any,
|
|
27
|
+
vision_model: Any,
|
|
28
|
+
prompt: Optional[str] = None,
|
|
29
|
+
callback: Optional[Callable] = None
|
|
30
|
+
) -> str:
|
|
31
|
+
"""
|
|
32
|
+
Enhanced vision LLM chunk processing with better error handling and format support.
|
|
33
|
+
|
|
34
|
+
This is an improved version that supports multiple image formats and provides
|
|
35
|
+
better error handling compared to the basic depend/vision_llm_chunk.py.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
binary: Image binary data (PIL Image, bytes, or BytesIO)
|
|
39
|
+
vision_model: Vision model instance with describe_with_prompt method
|
|
40
|
+
prompt: Optional prompt for image description
|
|
41
|
+
callback: Optional callback function for progress reporting
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
str: Processed markdown text from vision model
|
|
45
|
+
"""
|
|
46
|
+
callback = callback or (lambda prog, msg: None)
|
|
47
|
+
|
|
48
|
+
img = binary
|
|
49
|
+
txt = ""
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
# Convert image to bytes with format fallback
|
|
53
|
+
img_binary = io.BytesIO()
|
|
54
|
+
|
|
55
|
+
# Try different formats in order of preference
|
|
56
|
+
formats_to_try = ['JPEG', 'PNG', 'WEBP', 'BMP']
|
|
57
|
+
|
|
58
|
+
saved_successfully = False
|
|
59
|
+
for fmt in formats_to_try:
|
|
60
|
+
try:
|
|
61
|
+
img.save(img_binary, format=fmt)
|
|
62
|
+
saved_successfully = True
|
|
63
|
+
break
|
|
64
|
+
except Exception:
|
|
65
|
+
img_binary.seek(0)
|
|
66
|
+
img_binary.truncate() # Clear buffer for next attempt
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
if not saved_successfully:
|
|
70
|
+
raise ValueError("Unable to save image in any supported format")
|
|
71
|
+
|
|
72
|
+
img_binary.seek(0)
|
|
73
|
+
|
|
74
|
+
# Call vision model
|
|
75
|
+
ans = clean_markdown_block(
|
|
76
|
+
vision_model.describe_with_prompt(img_binary.read(), prompt)
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
txt += "\n" + ans
|
|
80
|
+
|
|
81
|
+
return txt
|
|
82
|
+
|
|
83
|
+
except Exception as e:
|
|
84
|
+
error_msg = f"Vision model processing failed: {str(e)}"
|
|
85
|
+
logger.error(error_msg)
|
|
86
|
+
callback(-1, error_msg)
|
|
87
|
+
return ""
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def vision_llm_chunk_with_fallback(
|
|
91
|
+
binary: Any,
|
|
92
|
+
vision_model: Any,
|
|
93
|
+
prompt: Optional[str] = None,
|
|
94
|
+
callback: Optional[Callable] = None,
|
|
95
|
+
fallback_text: str = "Image processing failed"
|
|
96
|
+
) -> str:
|
|
97
|
+
"""
|
|
98
|
+
Vision LLM chunk processing with fallback text on failure.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
binary: Image binary data
|
|
102
|
+
vision_model: Vision model instance
|
|
103
|
+
prompt: Optional prompt
|
|
104
|
+
callback: Optional callback
|
|
105
|
+
fallback_text: Text to return on failure
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
str: Processed text or fallback text
|
|
109
|
+
"""
|
|
110
|
+
result = vision_llm_chunk(binary, vision_model, prompt, callback)
|
|
111
|
+
return result if result.strip() else fallback_text
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def batch_vision_llm_chunk(
|
|
115
|
+
images: list,
|
|
116
|
+
vision_model: Any,
|
|
117
|
+
prompts: Optional[list] = None,
|
|
118
|
+
callback: Optional[Callable] = None,
|
|
119
|
+
max_workers: int = 3
|
|
120
|
+
) -> list:
|
|
121
|
+
"""
|
|
122
|
+
Process multiple images in parallel.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
images: List of image binary data
|
|
126
|
+
vision_model: Vision model instance
|
|
127
|
+
prompts: Optional list of prompts (same length as images)
|
|
128
|
+
callback: Optional callback function
|
|
129
|
+
max_workers: Maximum number of parallel workers
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
list: List of processed text results
|
|
133
|
+
"""
|
|
134
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
135
|
+
|
|
136
|
+
if not images:
|
|
137
|
+
return []
|
|
138
|
+
|
|
139
|
+
if prompts and len(prompts) != len(images):
|
|
140
|
+
raise ValueError("prompts list must have same length as images list")
|
|
141
|
+
|
|
142
|
+
results = [None] * len(images)
|
|
143
|
+
prompts = prompts or [None] * len(images)
|
|
144
|
+
|
|
145
|
+
def process_single(idx: int, img: Any, prompt: Optional[str]) -> tuple:
|
|
146
|
+
try:
|
|
147
|
+
result = vision_llm_chunk(img, vision_model, prompt, callback)
|
|
148
|
+
return idx, result
|
|
149
|
+
except Exception as e:
|
|
150
|
+
logger.error(f"Failed to process image {idx}: {e}")
|
|
151
|
+
return idx, ""
|
|
152
|
+
|
|
153
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
154
|
+
futures = [
|
|
155
|
+
executor.submit(process_single, i, img, prompt)
|
|
156
|
+
for i, (img, prompt) in enumerate(zip(images, prompts))
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
for future in as_completed(futures):
|
|
160
|
+
idx, result = future.result()
|
|
161
|
+
results[idx] = result
|
|
162
|
+
|
|
163
|
+
return results
|