MeUtils 2025.3.3.18.41.24__py3-none-any.whl → 2025.3.5.19.55.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/METADATA +264 -264
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/RECORD +61 -33
- examples/_openaisdk/open_router.py +2 -1
- examples/_openaisdk/openai_files.py +16 -5
- examples/_openaisdk/openai_images.py +1 -0
- examples/_openaisdk/openai_moon.py +22 -19
- examples/sh/__init__.py +11 -0
- meutils/apis/baidu/bdaitpzs.py +9 -17
- meutils/apis/chatglm/glm_video_api.py +2 -2
- meutils/apis/images/edits.py +7 -2
- meutils/apis/jimeng/common.py +1 -1
- meutils/apis/oneapi/common.py +4 -4
- meutils/apis/proxy/ips.py +2 -0
- meutils/caches/common.py +4 -0
- meutils/data/VERSION +1 -1
- meutils/data/oneapi/NOTICE.html +12 -0
- meutils/data/oneapi/__init__.py +1 -1
- meutils/data/oneapi/index.html +275 -0
- meutils/io/_openai_files.py +31 -0
- meutils/io/openai_files.py +138 -0
- meutils/io/parsers/__init__.py +10 -0
- meutils/io/parsers/fileparser/PDF/346/212/275/345/217/226.py +58 -0
- meutils/io/parsers/fileparser/__init__.py +11 -0
- meutils/io/parsers/fileparser/common.py +91 -0
- meutils/io/parsers/fileparser/demo.py +41 -0
- meutils/io/parsers/fileparser/filetype/__init__.py +10 -0
- meutils/io/parsers/fileparser/filetype/__main__.py +37 -0
- meutils/io/parsers/fileparser/filetype/filetype.py +98 -0
- meutils/io/parsers/fileparser/filetype/helpers.py +140 -0
- meutils/io/parsers/fileparser/filetype/match.py +155 -0
- meutils/io/parsers/fileparser/filetype/types/__init__.py +118 -0
- meutils/io/parsers/fileparser/filetype/types/application.py +22 -0
- meutils/io/parsers/fileparser/filetype/types/archive.py +687 -0
- meutils/io/parsers/fileparser/filetype/types/audio.py +212 -0
- meutils/io/parsers/fileparser/filetype/types/base.py +29 -0
- meutils/io/parsers/fileparser/filetype/types/document.py +256 -0
- meutils/io/parsers/fileparser/filetype/types/font.py +115 -0
- meutils/io/parsers/fileparser/filetype/types/image.py +383 -0
- meutils/io/parsers/fileparser/filetype/types/isobmff.py +33 -0
- meutils/io/parsers/fileparser/filetype/types/video.py +223 -0
- meutils/io/parsers/fileparser/filetype/utils.py +84 -0
- meutils/io/parsers/fileparser/filetype.py +41 -0
- meutils/io/parsers/fileparser/mineru.py +48 -0
- meutils/io/parsers/fileparser/pdf.py +30 -0
- meutils/io/parsers/fileparser//350/241/250/346/240/274/346/212/275/345/217/226.py +118 -0
- meutils/llm/check_utils.py +33 -2
- meutils/llm/clients.py +1 -0
- meutils/llm/completions/chat_gemini.py +72 -0
- meutils/llm/completions/chat_plus.py +78 -0
- meutils/llm/completions/{agents/file.py → chat_spark.py} +46 -26
- meutils/llm/completions/qwenllm.py +57 -16
- meutils/llm/completions/yuanbao.py +29 -3
- meutils/llm/openai_utils/common.py +2 -2
- meutils/schemas/oneapi/common.py +22 -19
- meutils/schemas/openai_types.py +65 -29
- meutils/schemas/yuanbao_types.py +6 -7
- meutils/types.py +2 -0
- meutils/data/oneapi/NOTICE.md +0 -1
- meutils/data/oneapi/_NOTICE.md +0 -140
- meutils/llm/completions/gemini.py +0 -69
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/LICENSE +0 -0
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/WHEEL +0 -0
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/entry_points.txt +0 -0
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Project : AI. @by PyCharm
|
4
|
+
# @File : filetype
|
5
|
+
# @Time : 2023/7/15 15:05
|
6
|
+
# @Author : betterme
|
7
|
+
# @WeChat : meutils
|
8
|
+
# @Software : PyCharm
|
9
|
+
# @Description : 文件或扩展名被更改
|
10
|
+
|
11
|
+
from meutils.pipe import *
|
12
|
+
|
13
|
+
import filetype
|
14
|
+
import magic
|
15
|
+
import mimetypes
|
16
|
+
|
17
|
+
import mimetypes
|
18
|
+
|
19
|
+
# mimetype, _ = mimetypes.guess_type('example.txt')
|
20
|
+
# print(mimetype)
|
21
|
+
#
|
22
|
+
# import magic
|
23
|
+
#
|
24
|
+
# file_type = magic.from_file('example.pdf', mime=True)
|
25
|
+
# print(file_type)
|
26
|
+
#
|
27
|
+
# import filetype
|
28
|
+
#
|
29
|
+
# kind = filetype.guess('example.jpg')
|
30
|
+
# if kind is None:
|
31
|
+
# print('Cannot guess file type!')
|
32
|
+
# else:
|
33
|
+
# print('File extension: %s' % kind.extension)
|
34
|
+
# print('File MIME type: %s' % kind.mime)
|
35
|
+
|
36
|
+
if __name__ == '__main__':
|
37
|
+
# mimetype, _ = mimetypes.guess_type('example.mp3')
|
38
|
+
mimetype, _ = mimetypes.guess_type('https://www.baidu.com/img/flexible/logo/pc/result@2.png')
|
39
|
+
|
40
|
+
print(mimetype)
|
41
|
+
print(_)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Project : AI. @by PyCharm
|
4
|
+
# @File : mineru
|
5
|
+
# @Time : 2025/1/23 13:13
|
6
|
+
# @Author : betterme
|
7
|
+
# @WeChat : meutils
|
8
|
+
# @Software : PyCharm
|
9
|
+
# @Description :
|
10
|
+
|
11
|
+
from meutils.pipe import *
|
12
|
+
|
13
|
+
import requests
|
14
|
+
|
15
|
+
token = """
|
16
|
+
eyJ0eXBlIjoiSldUIiwiYWxnIjoiSFM1MTIifQ.eyJqdGkiOiI4MDAwNDQ3NyIsInJvbCI6IlJPTEVfUkVHSVNURVIiLCJpc3MiOiJPcGVuWExhYiIsImlhdCI6MTczNzYxMTQ5MiwiY2xpZW50SWQiOiJsa3pkeDU3bnZ5MjJqa3BxOXgydyIsInBob25lIjoiIiwidXVpZCI6IjczNTZjNjY1LTU4MTMtNGQxNC04ZjFiLWM0NWIyZmFhYTBhMCIsImVtYWlsIjoiMzEzMzAzMzAzQHFxLmNvbSIsImV4cCI6MTczODgyMTA5Mn0.i8CwWoRE6j5wAC_hD9z9amkWT56HdOewgXMFV4jMpg17JHB0HOY-K4o9zp06Puav2vxkuC3Lnqm_8ip7-QdxsQ
|
17
|
+
"""
|
18
|
+
url = 'https://mineru.net/api/v4/extract/task'
|
19
|
+
header = {
|
20
|
+
'Content-Type': 'application/json',
|
21
|
+
"Authorization": f"Bearer {token.strip()}"
|
22
|
+
}
|
23
|
+
data = {
|
24
|
+
'url': 'https://cdn-mineru.openxlab.org.cn/demo/example.pdf',
|
25
|
+
'is_ocr': True,
|
26
|
+
'enable_formula': False,
|
27
|
+
}
|
28
|
+
|
29
|
+
res = requests.post(url, headers=header, json=data)
|
30
|
+
print(res.status_code)
|
31
|
+
print(res.json())
|
32
|
+
print(res.json()["data"])
|
33
|
+
|
34
|
+
# {'task_id': 'adb223f6-794b-4950-8d60-d766ebd0bf14'}
|
35
|
+
|
36
|
+
task_id = 'adb223f6-794b-4950-8d60-d766ebd0bf14'
|
37
|
+
import requests
|
38
|
+
|
39
|
+
url = f'https://mineru.net/api/v4/extract/task/{task_id}'
|
40
|
+
header = {
|
41
|
+
'Content-Type':'application/json',
|
42
|
+
"Authorization": f"Bearer {token.strip()}"
|
43
|
+
}
|
44
|
+
|
45
|
+
res = requests.get(url, headers=header)
|
46
|
+
print(res.status_code)
|
47
|
+
print(res.json())
|
48
|
+
print(res.json()["data"])
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Project : AI. @by PyCharm
|
4
|
+
# @File : pdf
|
5
|
+
# @Time : 2023/5/18 16:39
|
6
|
+
# @Author : betterme
|
7
|
+
# @WeChat : meutils
|
8
|
+
# @Software : PyCharm
|
9
|
+
# @Description :
|
10
|
+
|
11
|
+
from meutils.pipe import *
|
12
|
+
|
13
|
+
|
14
|
+
class PageWord(BaseModel):
|
15
|
+
x0 = 153.5
|
16
|
+
x1 = 441.72032
|
17
|
+
top = 76.19035999999994
|
18
|
+
doctop = 76.19035999999994
|
19
|
+
bottom = 92.15035999999998
|
20
|
+
upright = True
|
21
|
+
direction = 1
|
22
|
+
text = "国投瑞银基金管理有限公司基金相关参数"
|
23
|
+
|
24
|
+
|
25
|
+
class PageWords(BaseModel): # page.extract_words()
|
26
|
+
pagewords: List[PageWord]
|
27
|
+
|
28
|
+
|
29
|
+
class Part(BaseModel):
|
30
|
+
pass
|
@@ -0,0 +1,118 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Project : AI. @by PyCharm
|
4
|
+
# @File : 表格抽取
|
5
|
+
# @Time : 2023/7/17 18:14
|
6
|
+
# @Author : betterme
|
7
|
+
# @WeChat : meutils
|
8
|
+
# @Software : PyCharm
|
9
|
+
# @Description :
|
10
|
+
|
11
|
+
from meutils.pipe import *
|
12
|
+
import os
|
13
|
+
from dataclasses import dataclass
|
14
|
+
from typing import List
|
15
|
+
from collections import Counter
|
16
|
+
|
17
|
+
import pandas as pd
|
18
|
+
import pdfplumber
|
19
|
+
import tabulate
|
20
|
+
|
21
|
+
|
22
|
+
def extract_tables_with_text(pdf) -> List[str]:
|
23
|
+
"""抽取表格并嵌入文本"""
|
24
|
+
|
25
|
+
def clean_cell_text(text):
|
26
|
+
"""
|
27
|
+
清除文本中的换行符和多余空格
|
28
|
+
"""
|
29
|
+
if text is None:
|
30
|
+
return ""
|
31
|
+
text = text.replace("\n", "")
|
32
|
+
# 去除字符串开头和结尾的空白字符
|
33
|
+
text = text.strip()
|
34
|
+
return text
|
35
|
+
|
36
|
+
def check_bboxes(word, table_bbox):
|
37
|
+
left = word['x0'], word['top'], word['x1'], word['bottom']
|
38
|
+
r = table_bbox
|
39
|
+
return left[0] > r[0] and left[1] > r[1] and left[2] < r[2] and left[3] < r[3]
|
40
|
+
|
41
|
+
def keep_visible_lines(obj):
|
42
|
+
|
43
|
+
if obj['object_type'] == 'rect':
|
44
|
+
return obj['non_stroking_color'] == 0
|
45
|
+
return True
|
46
|
+
|
47
|
+
lines = []
|
48
|
+
page_counter = 0
|
49
|
+
# 用来缓存没完整展示的表
|
50
|
+
pending_table = []
|
51
|
+
tp = None
|
52
|
+
for page_number, page in enumerate(pdf.pages, start=0):
|
53
|
+
p = page.filter(keep_visible_lines)
|
54
|
+
combine_flag = len(pending_table) != 0
|
55
|
+
page_counter = page_number
|
56
|
+
tables = p.find_tables(
|
57
|
+
table_settings={
|
58
|
+
"vertical_strategy": "lines",
|
59
|
+
"horizontal_strategy": "lines",
|
60
|
+
# "explicit_vertical_lines": self.curves_to_edges(p.curves) + p.edges,
|
61
|
+
# "explicit_horizontal_lines": self.curves_to_edges(p.curves) + p.edges,
|
62
|
+
}
|
63
|
+
)
|
64
|
+
if len(tables) != 0:
|
65
|
+
is_over_footer = p.chars[-1].get('y0') >= p.bbox[3] - p.find_tables()[-1].bbox[3]
|
66
|
+
else:
|
67
|
+
is_over_footer = False
|
68
|
+
bboxes = [table.bbox for table in tables]
|
69
|
+
tables = [{'table': i.extract(), 'top': i.bbox[1]} for i in tables]
|
70
|
+
if is_over_footer:
|
71
|
+
tp = pd.DataFrame(tables[-1]['table']).applymap(clean_cell_text)
|
72
|
+
tables = tables[0:len(tables) - 1]
|
73
|
+
non_table_words = [word for word in p.extract_words() if
|
74
|
+
not any([check_bboxes(word, table_bbox) for table_bbox in bboxes])]
|
75
|
+
|
76
|
+
for cluster in pdfplumber.utils.cluster_objects(non_table_words + tables, 'top', tolerance=5):
|
77
|
+
for c in cluster:
|
78
|
+
if 'text' in c and c['text'] != str(page_number) and c['text'] != non_table_words[-1]:
|
79
|
+
lines.append("".join(c['text']))
|
80
|
+
elif 'table' in c:
|
81
|
+
if combine_flag:
|
82
|
+
dt = pd.concat([pd.concat(pending_table, axis=0),
|
83
|
+
pd.DataFrame(c['table']).applymap(clean_cell_text)], axis=0)
|
84
|
+
combine_flag = False
|
85
|
+
pending_table = []
|
86
|
+
else:
|
87
|
+
dt = pd.DataFrame(c['table']).applymap(clean_cell_text)
|
88
|
+
tb = tabulate.tabulate(dt, tablefmt="pipe", showindex=False, headers="keys")
|
89
|
+
lines.append(tb)
|
90
|
+
if is_over_footer:
|
91
|
+
pending_table.append(tp)
|
92
|
+
content_counter = Counter(lines)
|
93
|
+
return [word for word in content_counter if content_counter[word] != page_counter]
|
94
|
+
|
95
|
+
|
96
|
+
@dataclass
|
97
|
+
class Document:
|
98
|
+
"""文档结构"""
|
99
|
+
text: List[str]
|
100
|
+
source: str
|
101
|
+
|
102
|
+
|
103
|
+
def extract_text(filepath) -> Document:
|
104
|
+
with pdfplumber.open(filepath) as pdf:
|
105
|
+
text = extract_tables_with_text(pdf)
|
106
|
+
return Document(text=text, source=os.path.basename(filepath))
|
107
|
+
|
108
|
+
|
109
|
+
if __name__ == '__main__':
|
110
|
+
"""
|
111
|
+
pdfplumber==0.9.0
|
112
|
+
tabulate[widechars]==0.9.0
|
113
|
+
numpy==1.21.5
|
114
|
+
pandas==2.0.3
|
115
|
+
"""
|
116
|
+
doc = extract_text("/Users/betterme/Downloads/H2_AN202303301584686196_1.pdf")
|
117
|
+
for line in doc.text:
|
118
|
+
print(line)
|
meutils/llm/check_utils.py
CHANGED
@@ -92,18 +92,49 @@ async def check_token_for_jina(api_key, threshold=1000):
|
|
92
92
|
return False
|
93
93
|
|
94
94
|
|
95
|
+
@retrying()
|
96
|
+
async def check_token_for_moonshot(api_key, threshold: float=0):
|
97
|
+
if not isinstance(api_key, str):
|
98
|
+
return await check_tokens(api_key, check_token_for_jina)
|
99
|
+
|
100
|
+
headers = {
|
101
|
+
"Authorization": f"Bearer {api_key}",
|
102
|
+
"Accept": "application/json"
|
103
|
+
}
|
104
|
+
|
105
|
+
try:
|
106
|
+
async with httpx.AsyncClient(base_url="https://api.moonshot.cn/v1", headers=headers, timeout=60) as client:
|
107
|
+
response: httpx.Response = await client.get("/users/me/balance")
|
108
|
+
response.raise_for_status()
|
109
|
+
|
110
|
+
logger.debug(response.text)
|
111
|
+
logger.debug(response.status_code)
|
112
|
+
|
113
|
+
if response.is_success:
|
114
|
+
data = response.json()
|
115
|
+
logger.debug(data)
|
116
|
+
balance = data['data']['available_balance']
|
117
|
+
return float(balance) >= threshold
|
118
|
+
|
119
|
+
except Exception as e:
|
120
|
+
logger.error(f"Error: {e}\n{api_key}")
|
121
|
+
return False
|
122
|
+
|
123
|
+
|
95
124
|
if __name__ == '__main__':
|
96
125
|
from meutils.config_utils.lark_utils import get_next_token_for_polling
|
97
126
|
|
98
127
|
check_valid_token = partial(check_token_for_siliconflow, threshold=-1)
|
99
128
|
|
100
129
|
pass
|
101
|
-
arun(check_valid_token("sk-LlB4W38z9kv5Wy1c3ceeu4PHeIWs6bbWsjr8Om31jYvsucRv", threshold=0.1))
|
130
|
+
# arun(check_valid_token("sk-LlB4W38z9kv5Wy1c3ceeu4PHeIWs6bbWsjr8Om31jYvsucRv", threshold=0.1))
|
102
131
|
|
103
|
-
FEISHU_URL = "https://xchatllm.feishu.cn/sheets/Bmjtst2f6hfMqFttbhLcdfRJnNf?sheet=KVClcs"
|
132
|
+
# FEISHU_URL = "https://xchatllm.feishu.cn/sheets/Bmjtst2f6hfMqFttbhLcdfRJnNf?sheet=KVClcs"
|
104
133
|
|
105
134
|
# b = arun(check_token_for_openai(os.getenv("STEP_API_KEY")))
|
106
135
|
|
107
136
|
# arun(get_next_token_for_polling(check_token=check_token_for_openai, feishu_url=FEISHU_URL))
|
108
137
|
|
109
138
|
# arun(check_token_for_jina(["jina_c8da77fed9704d558c8def39837960edplTLkNYrsPTJHBF1HcYg_RkRVh0X"]*10))
|
139
|
+
|
140
|
+
arun(check_token_for_moonshot("sk-fWqLGmUtoGgoK9gx5IefO1mWrRF9QHaV7uVRrTcFv1lrJVvJ"))
|
meutils/llm/clients.py
CHANGED
@@ -0,0 +1,72 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Project : AI. @by PyCharm
|
4
|
+
# @File : gemini
|
5
|
+
# @Time : 2025/2/14 17:36
|
6
|
+
# @Author : betterme
|
7
|
+
# @WeChat : meutils
|
8
|
+
# @Software : PyCharm
|
9
|
+
# @Description :
|
10
|
+
|
11
|
+
|
12
|
+
from meutils.pipe import *
|
13
|
+
from meutils.llm.openai_utils import to_openai_params
|
14
|
+
from meutils.llm.clients import AsyncOpenAI
|
15
|
+
|
16
|
+
from meutils.schemas.openai_types import chat_completion, chat_completion_chunk, CompletionRequest, CompletionUsage
|
17
|
+
|
18
|
+
"""
|
19
|
+
image => file
|
20
|
+
|
21
|
+
"type": "image_url",
|
22
|
+
"image_url": {
|
23
|
+
|
24
|
+
|
25
|
+
"""
|
26
|
+
|
27
|
+
|
28
|
+
class Completions(object):
|
29
|
+
|
30
|
+
def __init__(self,
|
31
|
+
base_url: Optional[str] = None,
|
32
|
+
api_key: Optional[str] = None
|
33
|
+
):
|
34
|
+
self.client = AsyncOpenAI(
|
35
|
+
base_url=base_url,
|
36
|
+
api_key=api_key,
|
37
|
+
)
|
38
|
+
|
39
|
+
async def create(self, request: CompletionRequest):
|
40
|
+
urls = sum(request.last_urls.values(), [])
|
41
|
+
for url in urls:
|
42
|
+
request.messages[-1]["content"].append({"type": "image_url", "image_url": {"url": url}})
|
43
|
+
|
44
|
+
data = to_openai_params(request)
|
45
|
+
return await self.client.chat.completions.create(**data)
|
46
|
+
|
47
|
+
|
48
|
+
if __name__ == '__main__':
|
49
|
+
url = "https://oss.ffire.cc/files/lipsync.mp3"
|
50
|
+
content = [
|
51
|
+
{"type": "text", "text": "总结下"},
|
52
|
+
# {"type": "image_url", "image_url": {"url": url}},
|
53
|
+
|
54
|
+
{"type": "video_url", "video_url": {"url": url}}
|
55
|
+
|
56
|
+
]
|
57
|
+
request = CompletionRequest(
|
58
|
+
# model="qwen-turbo-2024-11-01",
|
59
|
+
model="gemini-all",
|
60
|
+
# model="qwen-plus-latest",
|
61
|
+
|
62
|
+
messages=[
|
63
|
+
{
|
64
|
+
'role': 'user',
|
65
|
+
|
66
|
+
'content': content
|
67
|
+
},
|
68
|
+
|
69
|
+
],
|
70
|
+
stream=False,
|
71
|
+
)
|
72
|
+
arun(Completions().create(request))
|
@@ -0,0 +1,78 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Project : AI. @by PyCharm
|
4
|
+
# @File : chat_all
|
5
|
+
# @Time : 2025/3/4 13:25
|
6
|
+
# @Author : betterme
|
7
|
+
# @WeChat : meutils
|
8
|
+
# @Software : PyCharm
|
9
|
+
# @Description :
|
10
|
+
|
11
|
+
|
12
|
+
from openai import AsyncOpenAI
|
13
|
+
|
14
|
+
from meutils.pipe import *
|
15
|
+
from meutils.decorators.retry import retrying
|
16
|
+
from meutils.io.files_utils import to_bytes
|
17
|
+
from meutils.io.openai_files import file_extract
|
18
|
+
|
19
|
+
from meutils.llm.clients import qwen_client, chatfire_client
|
20
|
+
from meutils.llm.openai_utils import to_openai_params
|
21
|
+
|
22
|
+
from meutils.config_utils.lark_utils import get_next_token_for_polling
|
23
|
+
from meutils.schemas.openai_types import chat_completion, chat_completion_chunk, CompletionRequest, CompletionUsage
|
24
|
+
|
25
|
+
FEISHU_URL = "https://xchatllm.feishu.cn/sheets/Bmjtst2f6hfMqFttbhLcdfRJnNf?sheet=PP1PGr"
|
26
|
+
|
27
|
+
base_url = "https://chat.qwenlm.ai/api"
|
28
|
+
|
29
|
+
from fake_useragent import UserAgent
|
30
|
+
|
31
|
+
ua = UserAgent()
|
32
|
+
|
33
|
+
"""
|
34
|
+
# vision_model="doubao-1.5-vision-pro-32k"
|
35
|
+
# glm-4v-flash
|
36
|
+
|
37
|
+
1. 文件解析
|
38
|
+
|
39
|
+
"""
|
40
|
+
|
41
|
+
|
42
|
+
class Completions(object):
|
43
|
+
|
44
|
+
def __init__(self,
|
45
|
+
vision_model: str = "doubao-1.5-vision-pro-32k",
|
46
|
+
base_url: Optional[str] = None,
|
47
|
+
api_key: Optional[str] = None,
|
48
|
+
):
|
49
|
+
self.vision_model = vision_model
|
50
|
+
self.client = AsyncOpenAI(
|
51
|
+
base_url=base_url,
|
52
|
+
api_key=api_key,
|
53
|
+
)
|
54
|
+
|
55
|
+
async def create(self, request: CompletionRequest):
|
56
|
+
"""适配任意客户端"""
|
57
|
+
|
58
|
+
last_message = request.last_message
|
59
|
+
file_urls = request.last_urls.get("file_url", [])
|
60
|
+
file_contents = await file_extract(file_urls)
|
61
|
+
|
62
|
+
|
63
|
+
# if 'r1' in request.model: # vl
|
64
|
+
# data = to_openai_params(request)
|
65
|
+
# data['stream'] = False
|
66
|
+
# data['model'] = self.vision_model
|
67
|
+
#
|
68
|
+
# data['messages'] = [
|
69
|
+
# {"type": "text", "text": "图片描述:"}
|
70
|
+
# ]
|
71
|
+
#
|
72
|
+
# # await chatfire_client.create(**data)
|
73
|
+
#
|
74
|
+
# return await chatfire_client.create(**data)
|
75
|
+
#
|
76
|
+
# request.last_message
|
77
|
+
|
78
|
+
# "image_url": "http://ai.chatfire.cn/files/images/image-1725418399272-d7b71012f.png"
|
@@ -17,14 +17,12 @@ todo: 记录上下文日志
|
|
17
17
|
"""
|
18
18
|
|
19
19
|
from meutils.pipe import *
|
20
|
-
from meutils.
|
20
|
+
from meutils.io.openai_files import file_extract
|
21
21
|
|
22
22
|
from meutils.llm.clients import AsyncOpenAI
|
23
23
|
from meutils.llm.openai_utils import to_openai_params
|
24
24
|
|
25
|
-
from meutils.
|
26
|
-
|
27
|
-
from meutils.schemas.openai_types import chat_completion, chat_completion_chunk, ChatCompletionRequest, CompletionUsage
|
25
|
+
from meutils.schemas.openai_types import chat_completion, chat_completion_chunk, CompletionRequest, CompletionUsage
|
28
26
|
|
29
27
|
|
30
28
|
class Completions(object):
|
@@ -32,21 +30,36 @@ class Completions(object):
|
|
32
30
|
def __init__(self, api_key: Optional[str] = None):
|
33
31
|
self.api_key = api_key
|
34
32
|
|
35
|
-
async def create(self, request:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
}
|
48
|
-
|
49
|
-
|
33
|
+
async def create(self, request: CompletionRequest):
|
34
|
+
|
35
|
+
if image_urls := request.last_urls.get("image_url"):
|
36
|
+
request.model = "doubao-1.5-vision-pro-32k" # 6月过期
|
37
|
+
|
38
|
+
request.messages = [
|
39
|
+
{
|
40
|
+
'role': 'user',
|
41
|
+
"content": [
|
42
|
+
{"type": "text", "text": request.last_user_content},
|
43
|
+
{"type": "image_url", "image_url": {"url": image_urls[-1]}}
|
44
|
+
]
|
45
|
+
}
|
46
|
+
]
|
47
|
+
else:
|
48
|
+
for i, message in enumerate(request.messages[::-1], 1):
|
49
|
+
if message.get("role") == "user": # 每一轮还要处理
|
50
|
+
content = message.get("content")
|
51
|
+
|
52
|
+
if content.startswith("http"):
|
53
|
+
file_url, content = content.split(maxsplit=1)
|
54
|
+
file_content = await file_extract(file_url)
|
55
|
+
|
56
|
+
request.messages[-i] = {
|
57
|
+
'role': 'user',
|
58
|
+
'content': f"""{json.dumps(file_content, ensure_ascii=False)}\n\n
|
59
|
+
{content}
|
60
|
+
"""
|
61
|
+
}
|
62
|
+
break
|
50
63
|
|
51
64
|
logger.debug(request)
|
52
65
|
|
@@ -58,16 +71,16 @@ class Completions(object):
|
|
58
71
|
if __name__ == '__main__':
|
59
72
|
c = Completions()
|
60
73
|
|
61
|
-
request =
|
74
|
+
request = CompletionRequest(
|
62
75
|
# model="qwen-turbo-2024-11-01",
|
63
76
|
# model="claude-3-5-sonnet-20241022",
|
64
77
|
model="gpt-4o-mini",
|
65
78
|
|
66
79
|
messages=[
|
67
|
-
{
|
68
|
-
|
69
|
-
|
70
|
-
},
|
80
|
+
# {
|
81
|
+
# 'role': 'system',
|
82
|
+
# 'content': '你是一个文件问答助手'
|
83
|
+
# },
|
71
84
|
# {
|
72
85
|
# 'role': 'user',
|
73
86
|
# # 'content': {
|
@@ -89,6 +102,13 @@ if __name__ == '__main__':
|
|
89
102
|
|
90
103
|
{
|
91
104
|
'role': 'user',
|
105
|
+
# "content": '你好',
|
106
|
+
# "content": [
|
107
|
+
# {"type": "text", "text": "描述"},
|
108
|
+
#
|
109
|
+
# {"type": "image_url", "image_url": "https://oss.ffire.cc/files/kling_watermark.png"}
|
110
|
+
# ],
|
111
|
+
|
92
112
|
# 'content': {
|
93
113
|
# "type": "file_url",
|
94
114
|
# "file_url": {"url": "https://mj101-1317487292.cos.ap-shanghai.myqcloud.com/ai/test.pdf", "detai": "auto"}
|
@@ -96,10 +116,10 @@ if __name__ == '__main__':
|
|
96
116
|
# 'content': "https://oss.ffire.cc/files/%E6%8B%9B%E6%A0%87%E6%96%87%E4%BB%B6%E5%A4%87%E6%A1%88%E8%A1%A8%EF%BC%88%E7%AC%AC%E4%BA%8C%E6%AC%A1%EF%BC%89.pdf 这个文件讲了什么?",
|
97
117
|
# 'content': "https://translate.google.com/?sl=zh-CN&tl=en&text=%E6%8F%90%E4%BE%9B%E6%96%B9&op=tr1anslate 这个文件讲了什么?",
|
98
118
|
|
99
|
-
|
119
|
+
"content": "https://oss.ffire.cc/files/百炼系列手机产品介绍.docx 总结下"
|
100
120
|
# "content": "https://mj101-1317487292.cos.ap-shanghai.myqcloud.com/ai/test.pdf\n\n总结下"
|
101
121
|
|
102
|
-
"content": "https://admin.ilovechatgpt.top/file/lunIMYAIzhinengzhushouduishenghuodocx_14905733.docx 总结"
|
122
|
+
# "content": "https://admin.ilovechatgpt.top/file/lunIMYAIzhinengzhushouduishenghuodocx_14905733.docx 总结"
|
103
123
|
|
104
124
|
},
|
105
125
|
|