chat2llms 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chat2llms-0.1.0/LICENSE +22 -0
- chat2llms-0.1.0/PKG-INFO +48 -0
- chat2llms-0.1.0/README.md +12 -0
- chat2llms-0.1.0/setup.cfg +4 -0
- chat2llms-0.1.0/setup.py +57 -0
- chat2llms-0.1.0/src/chat2llms/__init__.py +7 -0
- chat2llms-0.1.0/src/chat2llms/analyzer.py +271 -0
- chat2llms-0.1.0/src/chat2llms/base_client.py +84 -0
- chat2llms-0.1.0/src/chat2llms/model_response.py +295 -0
- chat2llms-0.1.0/src/chat2llms/output_files.py +239 -0
- chat2llms-0.1.0/src/chat2llms/utils.py +0 -0
- chat2llms-0.1.0/src/chat2llms.egg-info/PKG-INFO +48 -0
- chat2llms-0.1.0/src/chat2llms.egg-info/SOURCES.txt +16 -0
- chat2llms-0.1.0/src/chat2llms.egg-info/dependency_links.txt +1 -0
- chat2llms-0.1.0/src/chat2llms.egg-info/entry_points.txt +2 -0
- chat2llms-0.1.0/src/chat2llms.egg-info/requires.txt +1 -0
- chat2llms-0.1.0/src/chat2llms.egg-info/top_level.txt +1 -0
- chat2llms-0.1.0/tests/test_analyzer.py +35 -0
chat2llms-0.1.0/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# LICENSE
|
2
|
+
MIT License
|
3
|
+
|
4
|
+
Copyright (c) 2025 Your Name
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in all
|
14
|
+
copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22
|
+
SOFTWARE.
|
chat2llms-0.1.0/PKG-INFO
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: chat2llms
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: A framework for comparing responses from different large language models.
|
5
|
+
Home-page: https://github.com/goldollarch/chat2llms
|
6
|
+
Author: scitao
|
7
|
+
Author-email: goldollarch@gmail.com
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
9
|
+
Classifier: Intended Audience :: Developers
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
12
|
+
Classifier: Programming Language :: Python :: 3.7
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Classifier: Operating System :: OS Independent
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
22
|
+
Requires-Python: >=3.7
|
23
|
+
Description-Content-Type: text/markdown
|
24
|
+
License-File: LICENSE
|
25
|
+
Requires-Dist: requests
|
26
|
+
Dynamic: author
|
27
|
+
Dynamic: author-email
|
28
|
+
Dynamic: classifier
|
29
|
+
Dynamic: description
|
30
|
+
Dynamic: description-content-type
|
31
|
+
Dynamic: home-page
|
32
|
+
Dynamic: license-file
|
33
|
+
Dynamic: requires-dist
|
34
|
+
Dynamic: requires-python
|
35
|
+
Dynamic: summary
|
36
|
+
|
37
|
+
# chat2llms
|
38
|
+
|
39
|
+
A Python library to compare responses from different large language models (e.g., DeepSeek, Gemini, OpenAI, Grok).
|
40
|
+
|
41
|
+
[](https://badge.fury.io/py/chat2llms)
|
42
|
+
[](https://chat2llms.readthedocs.io/en/latest/?badge=latest)
|
43
|
+
|
44
|
+
## Installation
|
45
|
+
|
46
|
+
```bash
|
47
|
+
pip install chat2llms
|
48
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# chat2llms
|
2
|
+
|
3
|
+
A Python library to compare responses from different large language models (e.g., DeepSeek, Gemini, OpenAI, Grok).
|
4
|
+
|
5
|
+
[](https://badge.fury.io/py/chat2llms)
|
6
|
+
[](https://chat2llms.readthedocs.io/en/latest/?badge=latest)
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
```bash
|
11
|
+
pip install chat2llms
|
12
|
+
|
chat2llms-0.1.0/setup.py
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
import os
|
2
|
+
from setuptools import setup, find_packages
|
3
|
+
|
4
|
+
# Utility function to read the README file.
|
5
|
+
# Used for the long_description. It's nice, because now 1) we have a top level
|
6
|
+
# README file and 2) it's easier to type in the README file than to put a raw
|
7
|
+
# string in below ...
|
8
|
+
def read(fname):
|
9
|
+
return open(os.path.join(os.path.dirname(__file__), fname), encoding='utf-8').read()
|
10
|
+
|
11
|
+
setup(
|
12
|
+
name='chat2llms', # 包的名称
|
13
|
+
version='0.1.0', # 包的版本号 - 发布新版本时需要更新
|
14
|
+
author='scitao', # 作者姓名
|
15
|
+
author_email='goldollarch@gmail.com', # 作者邮箱
|
16
|
+
description='A framework for comparing responses from different large language models.', # 包的简短描述
|
17
|
+
long_description=read('README.md'), # 包的详细描述,从 README.md 读取
|
18
|
+
long_description_content_type='text/markdown', # 详细描述的格式
|
19
|
+
url='https://github.com/goldollarch/chat2llms', # 项目的主页或其他相关 URL
|
20
|
+
packages=find_packages(where='src'), # 查找 src 目录下的所有包
|
21
|
+
package_dir={'': 'src'}, # 指定包的根目录在 src 下
|
22
|
+
install_requires=[ # 项目运行时依赖的第三方库
|
23
|
+
'requests',
|
24
|
+
# 根据您的项目实际需要添加其他库
|
25
|
+
# 例如,如果您需要特定的 LLM 客户端,可能需要它们的 SDK:
|
26
|
+
# 'openai>=1.0.0',
|
27
|
+
# 'google-generativeai>=0.5.0',
|
28
|
+
# 'deepseek>=1.0.0', # 假设有这样的库
|
29
|
+
],
|
30
|
+
classifiers=[ # 包的分类信息,对用户搜索和理解包很有帮助
|
31
|
+
'Development Status :: 3 - Alpha', # 开发状态
|
32
|
+
'Intended Audience :: Developers', # 目标受众
|
33
|
+
'License :: OSI Approved :: MIT License', # 许可证信息 (请根据您的实际许可证修改)
|
34
|
+
'Programming Language :: Python :: 3',
|
35
|
+
'Programming Language :: Python :: 3.7',
|
36
|
+
'Programming Language :: Python :: 3.8',
|
37
|
+
'Programming Language :: Python :: 3.9',
|
38
|
+
'Programming Language :: Python :: 3.10',
|
39
|
+
'Programming Language :: Python :: 3.11',
|
40
|
+
'Programming Language :: Python :: 3.12',
|
41
|
+
'Operating System :: OS Independent',
|
42
|
+
'Topic :: Scientific/Engineering :: Artificial Intelligence',
|
43
|
+
'Topic :: Text Processing :: Linguistic',
|
44
|
+
'Topic :: Software Development :: Libraries :: Python Modules',
|
45
|
+
],
|
46
|
+
python_requires='>=3.7', # 要求的最低 Python 版本
|
47
|
+
entry_points={ # 定义命令行入口点 (如果您有命令行工具)
|
48
|
+
'console_scripts': [
|
49
|
+
'chat2llms=chat2llms.cli.cli_click:main' # 示例,如果您的 CLI 入口在这里
|
50
|
+
]
|
51
|
+
},
|
52
|
+
# 如果您的包包含非代码文件 (如数据文件),可能需要 package_data 或 include_package_data
|
53
|
+
# include_package_data=True,
|
54
|
+
# package_data={
|
55
|
+
# 'chat2llms': ['data/*.json'], # 示例:包含 chat2llms 包内的 data 目录下的 json 文件
|
56
|
+
# },
|
57
|
+
)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
# src/chat2llms/__init__.py
|
2
|
+
__version__ = '0.1.0'
|
3
|
+
|
4
|
+
from .cli.cli_click import main
|
5
|
+
from .base_client import BaseClient, logger, load_config
|
6
|
+
from .model_response import ModelResponse, OpenAIResponse, GeminiResponse
|
7
|
+
from .analyzer import AnswerComparator, AnswerAnalyzer, MultiAnswerAnalyzer
|
@@ -0,0 +1,271 @@
|
|
1
|
+
import os
|
2
|
+
import re
|
3
|
+
import time
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from typing import Dict, List, Optional
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import matplotlib.pyplot as plt
|
9
|
+
|
10
|
+
import spacy
|
11
|
+
import difflib
|
12
|
+
from difflib import SequenceMatcher
|
13
|
+
|
14
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
15
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
16
|
+
# from sentence_transformers import SentenceTransformer, util
|
17
|
+
|
18
|
+
from .model_response import ModelResponse, OpenAIResponse, GeminiResponse
|
19
|
+
|
20
|
+
##################################
|
21
|
+
|
22
|
+
@dataclass
|
23
|
+
class ModelAnswer:
|
24
|
+
"""模型响应数据结构"""
|
25
|
+
|
26
|
+
model_name: str
|
27
|
+
answer: str # answer=response.text
|
28
|
+
latency: float # latency=time.time() - start
|
29
|
+
tokens: int # tokens=len(response.text.split())
|
30
|
+
error: Optional[str] = None
|
31
|
+
|
32
|
+
class AnswerComparator:
|
33
|
+
"""Class to compare responses from LLMs for the same question.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
list[ModelResponse]: The LLM responses.
|
37
|
+
|
38
|
+
"""
|
39
|
+
|
40
|
+
def __init__(self, responses: List[ModelResponse], question: str = None):
|
41
|
+
|
42
|
+
self.model_names = [r.get_model_name() for r in responses]
|
43
|
+
self.responses = responses
|
44
|
+
self.question = question
|
45
|
+
|
46
|
+
self.vectorizer = TfidfVectorizer(stop_words="english")
|
47
|
+
|
48
|
+
# # 初始化模型(用于相似度计算)
|
49
|
+
# self.sim_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
|
50
|
+
|
51
|
+
def get_answers(self, question: str) -> List[ModelAnswer]:
|
52
|
+
"""获取多模型响应"""
|
53
|
+
answers = [self.get_model_answer(question, r) for r in self.responses]
|
54
|
+
return answers
|
55
|
+
|
56
|
+
def get_model_answer(self, question: str, response: ModelResponse):
|
57
|
+
"""获取模型响应"""
|
58
|
+
model_name = response.get_model_name()
|
59
|
+
try:
|
60
|
+
start = time.time()
|
61
|
+
answer = response.get_response(question)
|
62
|
+
return ModelAnswer(
|
63
|
+
model_name=model_name,
|
64
|
+
answer=answer,
|
65
|
+
latency=time.time() - start,
|
66
|
+
tokens=len(answer.split()),
|
67
|
+
)
|
68
|
+
except Exception as e:
|
69
|
+
return ModelAnswer(model_name, "", 0, 0, str(e))
|
70
|
+
|
71
|
+
def analyze_answers(self, responses: List[ModelAnswer]) -> Dict:
|
72
|
+
"""执行对比分析"""
|
73
|
+
valid_responses = [r for r in responses if not r.error]
|
74
|
+
|
75
|
+
# 语义相似度分析
|
76
|
+
answers = [r.answer for r in valid_responses]
|
77
|
+
tfidf = self.vectorizer.fit_transform(answers)
|
78
|
+
similarity = cosine_similarity(tfidf)
|
79
|
+
|
80
|
+
# 关键差异提取
|
81
|
+
differences = self._extract_differences(answers)
|
82
|
+
|
83
|
+
return {
|
84
|
+
"similarity_matrix": similarity,
|
85
|
+
"performance": {
|
86
|
+
r.model_name: {"latency": r.latency, "tokens": r.tokens}
|
87
|
+
for r in valid_responses
|
88
|
+
},
|
89
|
+
"key_differences": differences,
|
90
|
+
"errors": [r for r in responses if r.error],
|
91
|
+
}
|
92
|
+
|
93
|
+
def _extract_differences(self, answers: List[str]) -> List[str]:
|
94
|
+
"""提取关键差异点"""
|
95
|
+
noun_phrases = []
|
96
|
+
for ans in answers:
|
97
|
+
phrases = re.findall(
|
98
|
+
r"\b[A-Z][a-z]+\s[A-Z][a-z]+\b|\b\w+ion\b|\b\w+ment\b", ans
|
99
|
+
)
|
100
|
+
noun_phrases.extend(phrases)
|
101
|
+
|
102
|
+
unique_phrases = list(set(noun_phrases))
|
103
|
+
return [
|
104
|
+
phrase
|
105
|
+
for phrase in unique_phrases
|
106
|
+
if sum(1 for ans in answers if phrase in ans) < len(answers)
|
107
|
+
][:5]
|
108
|
+
|
109
|
+
def generate_report(self, analysis: Dict, question: str) -> str:
|
110
|
+
"""生成分析报告"""
|
111
|
+
report = [
|
112
|
+
f"# 问题分析报告\n**问题**: {question}\n",
|
113
|
+
"## 性能对比",
|
114
|
+
*self._format_performance(analysis["performance"]),
|
115
|
+
"\n## 语义相似度",
|
116
|
+
self._format_matrix(analysis["similarity_matrix"]),
|
117
|
+
"\n## 关键差异点",
|
118
|
+
*[f"- {diff}" for diff in analysis["key_differences"]],
|
119
|
+
"\n## 错误日志",
|
120
|
+
*self._format_errors(analysis["errors"]),
|
121
|
+
]
|
122
|
+
return "\n".join(report)
|
123
|
+
|
124
|
+
def _format_performance(self, perf: Dict) -> List[str]:
|
125
|
+
return [
|
126
|
+
f"### {model}\n"
|
127
|
+
f"- 响应时间: {data['latency']:.2f}s\n"
|
128
|
+
f"- Token使用量: {data['tokens']}"
|
129
|
+
for model, data in perf.items()
|
130
|
+
]
|
131
|
+
|
132
|
+
def _format_matrix(self, matrix: np.ndarray) -> str:
|
133
|
+
return (
|
134
|
+
"```\n" + "\n".join(["\t".join(map(str, row)) for row in matrix]) + "\n```"
|
135
|
+
)
|
136
|
+
|
137
|
+
def _format_errors(self, errors: List[ModelAnswer]) -> List[str]:
|
138
|
+
return [f"- {err.model_name}: {err.error}" for err in errors if err.error]
|
139
|
+
|
140
|
+
|
141
|
+
# #############################
|
142
|
+
|
143
|
+
class AnswerAnalyzer:
|
144
|
+
"""Class to compare responses from two LLMs for the same prompt.
|
145
|
+
|
146
|
+
Args:
|
147
|
+
response1 (ModelResponse): The first LLM response.
|
148
|
+
response2 (ModelResponse): The second LLM response.
|
149
|
+
|
150
|
+
Attributes:
|
151
|
+
response1 (ModelResponse): The first response object.
|
152
|
+
response2 (ModelResponse): The second response object.
|
153
|
+
nlp (spacy.language.Language): SpaCy NLP model (optional).
|
154
|
+
"""
|
155
|
+
|
156
|
+
def __init__(self, response1: ModelResponse, response2: ModelResponse, question: str = None):
|
157
|
+
|
158
|
+
self.question = question
|
159
|
+
self.response1 = response1
|
160
|
+
self.response2 = response2
|
161
|
+
|
162
|
+
self.nlp: Optional[spacy.language.Language] = None
|
163
|
+
|
164
|
+
try:
|
165
|
+
self.nlp = spacy.load("en_core_web_sm")
|
166
|
+
except ImportError:
|
167
|
+
print("SpaCy not installed or model not found. Install with: pip install spacy && python -m spacy download en_core_web_sm")
|
168
|
+
|
169
|
+
def compute_similarity(self) -> float:
|
170
|
+
"""Compute similarity between two responses using SequenceMatcher.
|
171
|
+
|
172
|
+
Returns:
|
173
|
+
float: Similarity score etween 0.0 and 1.0.
|
174
|
+
"""
|
175
|
+
question = self.question
|
176
|
+
text1 = self.response1.get_response(question)
|
177
|
+
text2 = self.response2.get_response(question)
|
178
|
+
|
179
|
+
matcher = SequenceMatcher(None, text1, text2)
|
180
|
+
return matcher.ratio()
|
181
|
+
|
182
|
+
def compute_semantic_similarity(self) -> float:
|
183
|
+
"""Compute semantic similarity using Spacy.
|
184
|
+
|
185
|
+
Returns:
|
186
|
+
float: Semantic similarity score between 0.0 and 1.0, or -1.0 if SpaCy is unavailable.
|
187
|
+
"""
|
188
|
+
if self.nlp is None:
|
189
|
+
return -1.0
|
190
|
+
|
191
|
+
question = self.question
|
192
|
+
text1 = self.response1.get_response(question)
|
193
|
+
text2 = self.response2.get_response(question)
|
194
|
+
|
195
|
+
doc1 = self.nlp(text1)
|
196
|
+
doc2 = self.nlp(text2)
|
197
|
+
return doc1.similarity(doc2)
|
198
|
+
|
199
|
+
def highlight_differences(self) -> str:
|
200
|
+
"""Highlight differences between two responses.
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
str: A formatted string showing both responses.
|
204
|
+
"""
|
205
|
+
|
206
|
+
question = self.question
|
207
|
+
text1 = self.response1.get_response(question)
|
208
|
+
text2 = self.response2.get_response(question)
|
209
|
+
|
210
|
+
return f"Response 1 ({self.response1.get_model_name()}):\n{text1}\n\nResponse 2 ({self.response2.get_model_name()}):\n{text2}"
|
211
|
+
|
212
|
+
def save_comparison_to_csv(self, filename: str):
|
213
|
+
import csv
|
214
|
+
question = self.question
|
215
|
+
with open(filename, 'w', newline='') as f:
|
216
|
+
writer = csv.writer(f)
|
217
|
+
writer.writerow(['Prompt', 'Model1', 'Response1', 'Model2', 'Response2', 'Text Similarity', 'Semantic Similarity'])
|
218
|
+
writer.writerow([
|
219
|
+
question,
|
220
|
+
self.response1.get_model_name(),
|
221
|
+
self.response1.get_response(question),
|
222
|
+
self.response2.get_model_name(),
|
223
|
+
self.response2.get_response(question),
|
224
|
+
self.compute_similarity(),
|
225
|
+
self.compute_semantic_similarity()
|
226
|
+
])
|
227
|
+
|
228
|
+
def plot_similarity(self):
|
229
|
+
similarities = [self.compute_similarity(), self.compute_semantic_similarity()]
|
230
|
+
labels = ['Text Similarity', 'Semantic Similarity']
|
231
|
+
plt.bar(labels, similarities)
|
232
|
+
plt.ylim(0, 1)
|
233
|
+
plt.title('Response Similarity Comparison')
|
234
|
+
plt.savefig('similarity_plot.png')
|
235
|
+
plt.show()
|
236
|
+
|
237
|
+
class MultiAnswerAnalyzer:
|
238
|
+
"""Class to compare responses from LLMs for the same prompt.
|
239
|
+
|
240
|
+
Args:
|
241
|
+
list[ModelResponse]: The LLM responses.
|
242
|
+
|
243
|
+
Attributes:
|
244
|
+
list[ModelResponse]: many LLMs, not only two.
|
245
|
+
|
246
|
+
Methods:
|
247
|
+
compare_all(): compare responses.
|
248
|
+
|
249
|
+
"""
|
250
|
+
|
251
|
+
def __init__(self, responses: list[ModelResponse], question: str = None):
|
252
|
+
"""Initialize MultiAnswerAnalyzer.
|
253
|
+
|
254
|
+
Args:
|
255
|
+
list[ModelResponse]: The LLM responses.
|
256
|
+
|
257
|
+
Attributes:
|
258
|
+
list[ModelResponse]: many LLMs, not only two.
|
259
|
+
|
260
|
+
"""
|
261
|
+
|
262
|
+
self.question = question
|
263
|
+
self.responses = responses
|
264
|
+
|
265
|
+
|
266
|
+
def compare_all(self):
|
267
|
+
for i in range(len(self.responses)):
|
268
|
+
for j in range(i + 1, len(self.responses)):
|
269
|
+
analyzer = AnswerAnalyzer(self.responses[i], self.responses[j], self.question)
|
270
|
+
print(f"Comparing {self.responses[i].get_model_name()} vs {self.responses[j].get_model_name()}")
|
271
|
+
print(analyzer.highlight_differences())
|
@@ -0,0 +1,84 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import List, Dict, Tuple
|
3
|
+
|
4
|
+
import requests
|
5
|
+
import logging
|
6
|
+
import yaml
|
7
|
+
import os
|
8
|
+
|
9
|
+
# 配置日志记录
|
10
|
+
logging.basicConfig(
|
11
|
+
filename="chat_history.log",
|
12
|
+
level=logging.INFO,
|
13
|
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
14
|
+
)
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
# 加载配置文件
|
18
|
+
def load_config(config_path: str = "config.yaml") -> Dict:
|
19
|
+
"""加载配置文件"""
|
20
|
+
try:
|
21
|
+
with open(config_path, "r", encoding="utf-8") as f:
|
22
|
+
return yaml.safe_load(f)
|
23
|
+
except (FileNotFoundError, yaml.YAMLError) as e:
|
24
|
+
logger.error(f"配置文件加载失败: {str(e)}")
|
25
|
+
raise
|
26
|
+
|
27
|
+
# == 基础类定义 ==
|
28
|
+
class BaseClient(ABC):
|
29
|
+
"""Base class for large language model clients.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
provider (str): The provider of the LLM model.
|
33
|
+
config (Dict): The model parameters of the LLM model.
|
34
|
+
|
35
|
+
Attributes:
|
36
|
+
provider (str): The provider.
|
37
|
+
config (Dict): Other parameters.
|
38
|
+
"""
|
39
|
+
|
40
|
+
def __init__(self, provider: str, config: Dict = None):
|
41
|
+
"""初始化客户端配置"""
|
42
|
+
|
43
|
+
providers = {"gemini", "openai", "grok", "deepseek"}
|
44
|
+
|
45
|
+
if provider not in providers:
|
46
|
+
print("Unknown provider, Using DeepSeek instead")
|
47
|
+
self.provider = "deepseek"
|
48
|
+
else:
|
49
|
+
self.provider = provider
|
50
|
+
|
51
|
+
config = load_config()
|
52
|
+
self.timeout = config[self.provider].get("timeout", 30)
|
53
|
+
self.max_retries = config[self.provider].get("max_retries", 3)
|
54
|
+
self.max_history = config[self.provider].get("max_history", 10) # 保留最近10轮对话
|
55
|
+
|
56
|
+
if self.provider == "openai":
|
57
|
+
self.api_key = config[self.provider].get("api_key", os.getenv("OPENAI_API_KEY"))
|
58
|
+
self.base_url = config[self.provider].get("base_url", "https://api.openai.com/v1")
|
59
|
+
self.model_name = config[self.provider].get("model", "gpt-4")
|
60
|
+
|
61
|
+
elif self.provider == "gemini":
|
62
|
+
self.api_key = config[self.provider].get("api_key", os.getenv("GEMINI_API_KEY"))
|
63
|
+
self.base_url = config[self.provider].get(
|
64
|
+
"base_url", "https://generativelanguage.googleapis.com/v1beta"
|
65
|
+
)
|
66
|
+
self.model_name = config[self.provider].get("model", "gemini-1.5-pro")
|
67
|
+
|
68
|
+
elif self.provider == "grok":
|
69
|
+
self.api_key = config[self.provider].get("api_key", os.getenv("XAI_API_KEY"))
|
70
|
+
self.base_url = config[self.provider].get("base_url", "https://api.x.ai/v1")
|
71
|
+
self.model_name = config[self.provider].get("model", "grok-2-latest")
|
72
|
+
|
73
|
+
else:
|
74
|
+
self.api_key = config[self.provider].get("api_key", os.getenv("DEEPSEEK_API_KEY"))
|
75
|
+
self.base_url = config[self.provider].get("base_url", "https://api.deepseek.com")
|
76
|
+
self.model_name = config[self.provider].get("model", "deepseek-chat")
|
77
|
+
|
78
|
+
self.headers = {
|
79
|
+
"Content-Type": "application/json",
|
80
|
+
"Authorization": f"Bearer {self.api_key}",
|
81
|
+
}
|
82
|
+
|
83
|
+
if self.provider == "gemini":
|
84
|
+
self.headers.pop("Authorization")
|
@@ -0,0 +1,295 @@
|
|
1
|
+
import os
|
2
|
+
import datetime
|
3
|
+
from markdown import markdown
|
4
|
+
from typing import List, Dict, Tuple
|
5
|
+
import requests
|
6
|
+
import json
|
7
|
+
|
8
|
+
from abc import ABC, abstractmethod
|
9
|
+
|
10
|
+
from .base_client import BaseClient, logger
|
11
|
+
|
12
|
+
import openai
|
13
|
+
import google.generativeai as genai
|
14
|
+
|
15
|
+
# === 响应处理类 ===
|
16
|
+
class ModelResponse(BaseClient):
|
17
|
+
"""Class to handle responses from a specific LLM for a given prompt.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
client (BaseClient): The LLM client instance.
|
21
|
+
prompt (str): The input prompt for the LLM.
|
22
|
+
|
23
|
+
Attributes:
|
24
|
+
client (BaseClient): The LLM client.
|
25
|
+
prompt (str): The input prompt.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(self, client: BaseClient, prompt: str = None):
|
29
|
+
self.client = client
|
30
|
+
self.prompt = prompt
|
31
|
+
self._response = None
|
32
|
+
# self.response = None
|
33
|
+
|
34
|
+
self.latency = 0 # latency=time.time() - start
|
35
|
+
self.tokens = 0 # tokens=len(response.text.split())
|
36
|
+
self.error = None
|
37
|
+
|
38
|
+
self.history: List[Dict] = [] # 存储完整对话历史
|
39
|
+
self.max_history = 20 # 最大保留历史轮次
|
40
|
+
|
41
|
+
"""无历史响应"""
|
42
|
+
|
43
|
+
@abstractmethod
|
44
|
+
def call_api(self, prompt):
|
45
|
+
"""调用模型API的抽象方法"""
|
46
|
+
raise NotImplementedError()
|
47
|
+
|
48
|
+
def get_model_name(self):
|
49
|
+
"""获取模型响应"""
|
50
|
+
return self.client.model_name
|
51
|
+
|
52
|
+
def get_prompt_str(self):
|
53
|
+
"""获取提示词"""
|
54
|
+
return self.prompt
|
55
|
+
|
56
|
+
def api_response(self, prompt: str):
|
57
|
+
"""获取模型响应"""
|
58
|
+
if not self._response:
|
59
|
+
try:
|
60
|
+
self.prompt = prompt
|
61
|
+
self._response = self.call_api(prompt)
|
62
|
+
except Exception as e:
|
63
|
+
self._response = f"Error: {str(e)}"
|
64
|
+
return self._response
|
65
|
+
|
66
|
+
"""历史响应"""
|
67
|
+
@abstractmethod
|
68
|
+
def get_response(
|
69
|
+
self,
|
70
|
+
prompt: str,
|
71
|
+
history: List[Dict] = None,
|
72
|
+
temperature: float = 0.7,
|
73
|
+
max_tokens: int = 4000,
|
74
|
+
) -> str:
|
75
|
+
"""同步生成回复"""
|
76
|
+
raise NotImplementedError()
|
77
|
+
|
78
|
+
@abstractmethod
|
79
|
+
def get_response_history(
|
80
|
+
self, prompt: str, history: List[Dict] = None
|
81
|
+
) -> Tuple[str, List[Dict]]:
|
82
|
+
"""发送消息并返回响应和更新后的历史"""
|
83
|
+
pass
|
84
|
+
|
85
|
+
def _build_messages(self, prompt: str, history: List[Dict]) -> List[Dict]:
|
86
|
+
"""构造对话历史(可被子类重写)"""
|
87
|
+
messages = history.copy()
|
88
|
+
messages.append({"role": "user", "content": prompt})
|
89
|
+
return messages
|
90
|
+
|
91
|
+
|
92
|
+
class OpenAIResponse(ModelResponse):
|
93
|
+
"""OpenAI API 客户端实现"""
|
94
|
+
|
95
|
+
def call_api(self, prompt):
|
96
|
+
client = openai.OpenAI(
|
97
|
+
api_key=self.client.api_key, base_url=self.client.base_url
|
98
|
+
)
|
99
|
+
response = client.chat.completions.create(
|
100
|
+
model=self.client.model_name, messages=[{"role": "user", "content": prompt}]
|
101
|
+
)
|
102
|
+
return response.choices[0].message.content
|
103
|
+
|
104
|
+
"""直接调用 OpenAI API 底层接口"""
|
105
|
+
|
106
|
+
def get_response(self, prompt, history=None, temperature=0.7, max_tokens=4000):
|
107
|
+
if not self._response:
|
108
|
+
try:
|
109
|
+
self.prompt = prompt
|
110
|
+
messages = self._build_messages(prompt, history or [])
|
111
|
+
response = requests.post(
|
112
|
+
f"{self.client.base_url}/chat/completions",
|
113
|
+
headers=self.client.headers,
|
114
|
+
json={
|
115
|
+
"model": self.client.model_name,
|
116
|
+
"messages": messages,
|
117
|
+
"temperature": temperature,
|
118
|
+
"max_tokens": max_tokens,
|
119
|
+
},
|
120
|
+
)
|
121
|
+
result = response.json()
|
122
|
+
self._response = result["choices"][0]["message"]["content"]
|
123
|
+
except Exception as e:
|
124
|
+
self._response = f"Error: {str(e)}"
|
125
|
+
return self._response
|
126
|
+
|
127
|
+
def get_response_history(
|
128
|
+
self, message: str, history: List[Dict] = []
|
129
|
+
) -> Tuple[str, List[Dict]]:
|
130
|
+
|
131
|
+
new_history = self._update_history(history, message, role="user")
|
132
|
+
|
133
|
+
headers = {
|
134
|
+
"Content-Type": "application/json",
|
135
|
+
"Authorization": f"Bearer {self.client.api_key}",
|
136
|
+
}
|
137
|
+
|
138
|
+
payload = {
|
139
|
+
"model": self.client.model_name,
|
140
|
+
"messages": new_history[-self.client.max_history * 2 :], # 保留最近的对话
|
141
|
+
}
|
142
|
+
|
143
|
+
for attempt in range(self.client.max_retries):
|
144
|
+
try:
|
145
|
+
response = requests.post(
|
146
|
+
f"{self.client.base_url}/chat/completions",
|
147
|
+
headers=headers,
|
148
|
+
json=payload,
|
149
|
+
timeout=self.client.timeout,
|
150
|
+
)
|
151
|
+
response.raise_for_status()
|
152
|
+
response_data = response.json()
|
153
|
+
assistant_message = response_data["choices"][0]["message"][
|
154
|
+
"content"
|
155
|
+
].strip()
|
156
|
+
|
157
|
+
updated_history = self._update_history(
|
158
|
+
new_history, assistant_message, role="assistant"
|
159
|
+
)
|
160
|
+
return (
|
161
|
+
assistant_message,
|
162
|
+
updated_history[-self.client.max_history * 2 :],
|
163
|
+
)
|
164
|
+
|
165
|
+
except requests.exceptions.RequestException as e:
|
166
|
+
logger.warning(
|
167
|
+
f"API请求失败,尝试 {attempt + 1}/{self.client.max_retries}: {str(e)}"
|
168
|
+
)
|
169
|
+
if attempt == self.client.max_retries - 1:
|
170
|
+
raise RuntimeError("API请求超过最大重试次数") from e
|
171
|
+
|
172
|
+
except (KeyError, IndexError, TypeError) as e:
|
173
|
+
logger.error(f"响应解析错误: {str(e)}")
|
174
|
+
raise RuntimeError("服务器返回了无效的响应格式") from e
|
175
|
+
|
176
|
+
return "", history
|
177
|
+
|
178
|
+
def _build_messages(self, prompt: str, history: List[Dict]) -> List[Dict]:
|
179
|
+
return [*history, {"role": "user", "content": prompt}]
|
180
|
+
|
181
|
+
def _update_history(
|
182
|
+
self, history: List[Dict], content: str, role: str
|
183
|
+
) -> List[Dict]:
|
184
|
+
"""更新对话历史"""
|
185
|
+
return history + [{"role": role, "content": content}]
|
186
|
+
|
187
|
+
|
188
|
+
class GeminiResponse(ModelResponse):
|
189
|
+
|
190
|
+
def init_google_genai(self):
|
191
|
+
genai.configure(api_key=self.client.api_key)
|
192
|
+
self.model = genai.GenerativeModel(self.client.model_name)
|
193
|
+
|
194
|
+
"""Gemini API 客户端实现"""
|
195
|
+
|
196
|
+
def call_api(self, prompt):
|
197
|
+
self.init_google_genai()
|
198
|
+
response = self.model.generate_content(prompt)
|
199
|
+
return response.text
|
200
|
+
|
201
|
+
# 使用 request 直接调用 Gemini API 底层接口。
|
202
|
+
# 底层接口:由 Google 提供的 HTTP REST 或 gRPC 端点,如
|
203
|
+
# https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent
|
204
|
+
# 开发者可以直接通过 HTTP 请求调用,无需依赖特定语言的库。适合需要灵活性的高级用户或跨语言集成。
|
205
|
+
|
206
|
+
def get_response(self, prompt, history=None, temperature=0.7, max_tokens=4000):
|
207
|
+
if not self._response:
|
208
|
+
try:
|
209
|
+
self.prompt = prompt
|
210
|
+
messages = self._build_messages(prompt, history or [])
|
211
|
+
response = requests.post(
|
212
|
+
f"{self.client.base_url}/models/{self.client.model_name}:generateContent?key={self.client.api_key}",
|
213
|
+
headers=self.client.headers,
|
214
|
+
json={
|
215
|
+
"contents": messages,
|
216
|
+
"generationConfig": {
|
217
|
+
"temperature": temperature,
|
218
|
+
"maxOutputTokens": max_tokens,
|
219
|
+
},
|
220
|
+
},
|
221
|
+
)
|
222
|
+
result = response.json()
|
223
|
+
self._response = result["candidates"][0]["content"]["parts"][0]["text"]
|
224
|
+
except Exception as e:
|
225
|
+
self._response = f"Error: {str(e)}"
|
226
|
+
return self._response
|
227
|
+
|
228
|
+
def get_response_history(self, prompt: str, history=None) -> Tuple[str, List[Dict]]:
|
229
|
+
"""执行带历史管理的对话"""
|
230
|
+
try:
|
231
|
+
# 构造请求头
|
232
|
+
headers = {
|
233
|
+
"Content-Type": "application/json",
|
234
|
+
"x-goog-api-key": self.client.api_key,
|
235
|
+
}
|
236
|
+
|
237
|
+
# 添加当前对话到历史
|
238
|
+
self.history.append({"type": "chat", "role": "user", "content": prompt})
|
239
|
+
|
240
|
+
# 发送API请求
|
241
|
+
response = requests.post(
|
242
|
+
f"{self.client.base_url}/models/{self.client.model_name}:generateContent",
|
243
|
+
headers=headers,
|
244
|
+
json=self._build_payload(prompt),
|
245
|
+
)
|
246
|
+
response.raise_for_status()
|
247
|
+
|
248
|
+
# 解析响应
|
249
|
+
ai_response = self._parse_response(response.json())
|
250
|
+
|
251
|
+
# 记录AI响应历史
|
252
|
+
self.history.append(
|
253
|
+
{"type": "chat", "role": "model", "content": ai_response}
|
254
|
+
)
|
255
|
+
|
256
|
+
return ai_response, self.history
|
257
|
+
|
258
|
+
except requests.exceptions.RequestException as e:
|
259
|
+
# 网络错误处理
|
260
|
+
self.history.append({"type": "system", "content": f"网络错误: {str(e)}"})
|
261
|
+
return f"API请求失败: {str(e)}"
|
262
|
+
|
263
|
+
except json.JSONDecodeError:
|
264
|
+
# JSON解析错误
|
265
|
+
return "响应解析失败"
|
266
|
+
|
267
|
+
def _build_messages(self, prompt, history):
|
268
|
+
# Gemini的messages结构不同
|
269
|
+
return [
|
270
|
+
{"parts": [{"text": msg["content"]}], "role": msg["role"].upper()}
|
271
|
+
for msg in super()._build_messages(prompt, history)
|
272
|
+
]
|
273
|
+
|
274
|
+
def _build_payload(self, prompt: str) -> Dict:
|
275
|
+
"""构造包含聊天历史的请求体"""
|
276
|
+
parts = [{"text": prompt}]
|
277
|
+
|
278
|
+
# 添加历史上下文(排除系统指令)
|
279
|
+
context = [
|
280
|
+
{"role": entry["role"], "parts": [{"text": entry["content"]}]}
|
281
|
+
for entry in self.history[-self.max_history :]
|
282
|
+
if entry["type"] == "chat"
|
283
|
+
]
|
284
|
+
|
285
|
+
return {
|
286
|
+
"contents": context + [{"role": "user", "parts": parts}],
|
287
|
+
"generationConfig": {"temperature": 0.9, "maxOutputTokens": 4000},
|
288
|
+
}
|
289
|
+
|
290
|
+
def _parse_response(self, response: Dict) -> str:
|
291
|
+
"""解析API响应并提取文本内容"""
|
292
|
+
try:
|
293
|
+
return response["candidates"][0]["content"]["parts"][0]["text"]
|
294
|
+
except (KeyError, IndexError) as e:
|
295
|
+
raise ValueError(f"无效的API响应结构: {str(e)}")
|
@@ -0,0 +1,239 @@
|
|
1
|
+
import datetime
|
2
|
+
from typing import Dict, List
|
3
|
+
|
4
|
+
from markdown import markdown
|
5
|
+
|
6
|
+
import os
|
7
|
+
import subprocess
|
8
|
+
import re
|
9
|
+
|
10
|
+
# from difflib import Differ
|
11
|
+
|
12
|
+
SAVE_PATH = "docs/refers/Chats" # 保存目录
|
13
|
+
|
14
|
+
class WriteMarkDown:
|
15
|
+
def __init__(self, filename: str):
|
16
|
+
|
17
|
+
self._init_md_structure(filename)
|
18
|
+
|
19
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
20
|
+
output_path = f"{SAVE_PATH}/{filename}_{timestamp}.md"
|
21
|
+
self.output_path = output_path
|
22
|
+
|
23
|
+
def _init_md_structure(self, filename: str):
|
24
|
+
"""初始化Markdown文档结构"""
|
25
|
+
self.md_content = [
|
26
|
+
f"# {filename}\n",
|
27
|
+
f"**生成时间**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n",
|
28
|
+
]
|
29
|
+
|
30
|
+
def add_comparison_table(
|
31
|
+
self, metrics: Dict[str, List[float]], model_names: List[str]
|
32
|
+
):
|
33
|
+
"""
|
34
|
+
添加指标对比表格
|
35
|
+
:param metrics: 指标字典 {指标名称: [模型1值, 模型2值...]}
|
36
|
+
:param model_names: 模型名称列表
|
37
|
+
"""
|
38
|
+
# 生成表头
|
39
|
+
header = "| 指标 " + " ".join([f"| {name} " for name in model_names]) + "|"
|
40
|
+
separator = "|-----" + " ".join(["|----" for _ in model_names]) + "|"
|
41
|
+
|
42
|
+
# 生成数据行
|
43
|
+
rows = []
|
44
|
+
for metric, values in metrics.items():
|
45
|
+
row = f"| **{metric}** " + " ".join([f"| {v} " for v in values]) + "|"
|
46
|
+
rows.append(row)
|
47
|
+
|
48
|
+
# 组装表格
|
49
|
+
table = [header, separator] + rows
|
50
|
+
self.md_content.extend(["\n## 性能对比\n"] + table)
|
51
|
+
|
52
|
+
def add_conclusion(self, conclusions: List[str], highlight: str = None):
|
53
|
+
"""
|
54
|
+
添加分析结论
|
55
|
+
:param conclusions: 结论条目列表
|
56
|
+
:param highlight: 需要高亮显示的关键结论
|
57
|
+
"""
|
58
|
+
conclusion_section = ["\n## 最终结论\n"]
|
59
|
+
for item in conclusions:
|
60
|
+
conclusion_section.append(f"- {item}")
|
61
|
+
|
62
|
+
if highlight:
|
63
|
+
conclusion_section.append(f"\n**关键结论**: {highlight}")
|
64
|
+
|
65
|
+
self.md_content.extend(conclusion_section)
|
66
|
+
|
67
|
+
def save(self, mode: str = "w"):
|
68
|
+
"""保存Markdown文件"""
|
69
|
+
with open(self.output_path, mode, encoding="utf-8") as f:
|
70
|
+
f.write("\n".join(self.md_content))
|
71
|
+
|
72
|
+
def add_section(self, title: str, content: str, level: int = 2):
|
73
|
+
"""添加自定义章节"""
|
74
|
+
prefix = "#" * level
|
75
|
+
self.md_content.append(f"\n{prefix} {title}\n{content}")
|
76
|
+
|
77
|
+
def add_plot(self, plot_path: str, caption: str = ""):
|
78
|
+
"""插入本地图片"""
|
79
|
+
self.md_content.append(f"\n")
|
80
|
+
|
81
|
+
def add_diff_analysis(self, text_a: str, text_b: str):
|
82
|
+
"""文本差异对比"""
|
83
|
+
differ = Differ()
|
84
|
+
diff = list(differ.compare(text_a.split(), text_b.split()))
|
85
|
+
self.add_section("响应差异分析", "```diff\n" + "\n".join(diff) + "\n```")
|
86
|
+
|
87
|
+
def add_response(self, question: str, model: str, answer: str):
|
88
|
+
"""保存对话记录到Markdown文件"""
|
89
|
+
|
90
|
+
self.add_section("用户提问", question)
|
91
|
+
self.add_section(f"{model}模型回复", answer)
|
92
|
+
|
93
|
+
|
94
|
+
def convert_with_pandoc(input_path, output_path):
|
95
|
+
try:
|
96
|
+
subprocess.run(
|
97
|
+
[
|
98
|
+
"pandoc",
|
99
|
+
"-s",
|
100
|
+
input_path,
|
101
|
+
"-t",
|
102
|
+
"markdown+hard_line_breaks+pipe_tables",
|
103
|
+
"-o",
|
104
|
+
output_path,
|
105
|
+
"--extract-media=./images",
|
106
|
+
],
|
107
|
+
check=True,
|
108
|
+
)
|
109
|
+
return True
|
110
|
+
except subprocess.CalledProcessError as e:
|
111
|
+
print(f"转换错误: {str(e)}")
|
112
|
+
return False
|
113
|
+
|
114
|
+
|
115
|
+
def docx_to_sphinx_rst(docx_path, rst_path):
|
116
|
+
# Step 1: 使用 Pandoc 转换基础格式
|
117
|
+
subprocess.run(
|
118
|
+
[
|
119
|
+
"pandoc",
|
120
|
+
"--extract-media=./_static",
|
121
|
+
"--shift-heading-level-by=1",
|
122
|
+
"-t",
|
123
|
+
"rst+auto_identifiers",
|
124
|
+
"-o",
|
125
|
+
rst_path,
|
126
|
+
docx_path,
|
127
|
+
],
|
128
|
+
check=True,
|
129
|
+
)
|
130
|
+
|
131
|
+
# Step 2: 后处理优化
|
132
|
+
with open(rst_path, "r+", encoding="utf-8") as f:
|
133
|
+
content = f.read()
|
134
|
+
|
135
|
+
# 修复表格对齐问题
|
136
|
+
content = re.sub(r"(\+-+)+\+", lambda m: m.group().replace("-", "="), content)
|
137
|
+
|
138
|
+
# 转换 Word 注释为 Sphinx 警告框
|
139
|
+
content = re.sub(r"^\.\.\scomment::", ".. warning::", content, flags=re.M)
|
140
|
+
|
141
|
+
# 添加 docutils 特殊指令支持
|
142
|
+
if ".. code-block::" in content:
|
143
|
+
content = ".. highlight:: python\n\n" + content
|
144
|
+
|
145
|
+
f.seek(0)
|
146
|
+
f.write(content)
|
147
|
+
f.truncate()
|
148
|
+
|
149
|
+
|
150
|
+
def md_to_rst(md_path, rst_path):
|
151
|
+
# Step 1: 使用 Pandoc 转换
|
152
|
+
subprocess.run(
|
153
|
+
[
|
154
|
+
"pandoc",
|
155
|
+
"-f",
|
156
|
+
"markdown+yaml_metadata_block",
|
157
|
+
"-t",
|
158
|
+
"rst",
|
159
|
+
"-o",
|
160
|
+
rst_path,
|
161
|
+
md_path,
|
162
|
+
],
|
163
|
+
check=True,
|
164
|
+
)
|
165
|
+
|
166
|
+
# Step 2: 格式修正
|
167
|
+
with open(rst_path, "r+", encoding="utf-8") as f:
|
168
|
+
content = f.read()
|
169
|
+
|
170
|
+
# 修复代码块语法
|
171
|
+
content = re.sub(r"^\.\. code::\s*$", ".. code-block::", content, flags=re.M)
|
172
|
+
|
173
|
+
# 转换表格格式
|
174
|
+
content = re.sub(r"(\+-+)+\+", lambda m: m.group().replace("-", "="), content)
|
175
|
+
|
176
|
+
# 修正图片路径
|
177
|
+
content = re.sub(r"!$$(.*?)$$$(.*?)$", r".. image:: \2\n :alt: \1", content)
|
178
|
+
|
179
|
+
f.seek(0)
|
180
|
+
f.write(content)
|
181
|
+
f.truncate()
|
182
|
+
|
183
|
+
|
184
|
+
# 使用示例
|
185
|
+
|
186
|
+
if __name__ == "__main__":
|
187
|
+
# # 初始化分析器
|
188
|
+
# analyzer = WriteMarkDown("comparison")
|
189
|
+
|
190
|
+
# # 添加性能对比表格
|
191
|
+
# metrics = {
|
192
|
+
# "准确率(%)": [92.3, 89.7],
|
193
|
+
# "推理速度(ms)": [15.2, 28.4],
|
194
|
+
# "内存占用(GB)": [1.2, 0.8],
|
195
|
+
# "F1 Score": [0.88, 0.85]
|
196
|
+
# }
|
197
|
+
# analyzer.add_comparison_table(metrics, ["GPT-4", "DeepSeek-R1"])
|
198
|
+
|
199
|
+
# # 添加结论
|
200
|
+
# conclusions = [
|
201
|
+
# "GPT-4在精度指标上全面领先",
|
202
|
+
# "DeepSeek-R1在内存效率方面表现优异",
|
203
|
+
# "两者响应速度均满足实时需求"
|
204
|
+
# ]
|
205
|
+
# analyzer.add_conclusion(conclusions,
|
206
|
+
# highlight="推荐高精度场景使用GPT-4,资源受限环境选择DeepSeek-R1")
|
207
|
+
|
208
|
+
# # 添加自定义分析模块
|
209
|
+
# analyzer.add_section(
|
210
|
+
# "错误案例分析",
|
211
|
+
# "```python\n# 典型错误示例\nprint(f'1 + 1 = {1+1=}') # DeepSeek-R1输出格式问题\n```"
|
212
|
+
# )
|
213
|
+
|
214
|
+
# text_a = "GPT-4在精度指标上全面领先"
|
215
|
+
# text_b = "DeepSeek-R1在内存指标方面表现优异"
|
216
|
+
# analyzer.add_diff_analysis(text_a, text_b)
|
217
|
+
|
218
|
+
# caption = "cenjoy"
|
219
|
+
# plot_path = "docs/source/_static/cenjoy.png"
|
220
|
+
# analyzer.add_plot(plot_path, caption)
|
221
|
+
|
222
|
+
# # 生成最终报告
|
223
|
+
# analyzer.save()
|
224
|
+
|
225
|
+
# # 文档转换使用示例
|
226
|
+
input_path = "xcenjoy.docx"
|
227
|
+
output_path = "xcenjoy.md"
|
228
|
+
convert_with_pandoc(input_path, output_path)
|
229
|
+
|
230
|
+
# input_path = "Letter.docx"
|
231
|
+
# output_path = "Letter.rst"
|
232
|
+
# docx_to_sphinx_rst(input_path, output_path)
|
233
|
+
|
234
|
+
# input_path = "Letter_cn.md"
|
235
|
+
# output_path = "Letter_cn.rst"
|
236
|
+
# md_to_rst(input_path, output_path)
|
237
|
+
|
238
|
+
# # rstfromdocx
|
239
|
+
# os.system("rstfromdocx -lurg 项目概要.docx")
|
File without changes
|
@@ -0,0 +1,48 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: chat2llms
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: A framework for comparing responses from different large language models.
|
5
|
+
Home-page: https://github.com/goldollarch/chat2llms
|
6
|
+
Author: scitao
|
7
|
+
Author-email: goldollarch@gmail.com
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
9
|
+
Classifier: Intended Audience :: Developers
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
12
|
+
Classifier: Programming Language :: Python :: 3.7
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Classifier: Operating System :: OS Independent
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
20
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
22
|
+
Requires-Python: >=3.7
|
23
|
+
Description-Content-Type: text/markdown
|
24
|
+
License-File: LICENSE
|
25
|
+
Requires-Dist: requests
|
26
|
+
Dynamic: author
|
27
|
+
Dynamic: author-email
|
28
|
+
Dynamic: classifier
|
29
|
+
Dynamic: description
|
30
|
+
Dynamic: description-content-type
|
31
|
+
Dynamic: home-page
|
32
|
+
Dynamic: license-file
|
33
|
+
Dynamic: requires-dist
|
34
|
+
Dynamic: requires-python
|
35
|
+
Dynamic: summary
|
36
|
+
|
37
|
+
# chat2llms
|
38
|
+
|
39
|
+
A Python library to compare responses from different large language models (e.g., DeepSeek, Gemini, OpenAI, Grok).
|
40
|
+
|
41
|
+
[](https://badge.fury.io/py/chat2llms)
|
42
|
+
[](https://chat2llms.readthedocs.io/en/latest/?badge=latest)
|
43
|
+
|
44
|
+
## Installation
|
45
|
+
|
46
|
+
```bash
|
47
|
+
pip install chat2llms
|
48
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
LICENSE
|
2
|
+
README.md
|
3
|
+
setup.py
|
4
|
+
src/chat2llms/__init__.py
|
5
|
+
src/chat2llms/analyzer.py
|
6
|
+
src/chat2llms/base_client.py
|
7
|
+
src/chat2llms/model_response.py
|
8
|
+
src/chat2llms/output_files.py
|
9
|
+
src/chat2llms/utils.py
|
10
|
+
src/chat2llms.egg-info/PKG-INFO
|
11
|
+
src/chat2llms.egg-info/SOURCES.txt
|
12
|
+
src/chat2llms.egg-info/dependency_links.txt
|
13
|
+
src/chat2llms.egg-info/entry_points.txt
|
14
|
+
src/chat2llms.egg-info/requires.txt
|
15
|
+
src/chat2llms.egg-info/top_level.txt
|
16
|
+
tests/test_analyzer.py
|
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
requests
|
@@ -0,0 +1 @@
|
|
1
|
+
chat2llms
|
@@ -0,0 +1,35 @@
|
|
1
|
+
import pytest
|
2
|
+
|
3
|
+
import os, sys
|
4
|
+
sys.path.append(os.path.join(os.path.dirname(__file__),'..') )
|
5
|
+
|
6
|
+
from src.chat2llms.analyzer import AnswerAnalyzer
|
7
|
+
from src.chat2llms.model_response import OpenAIResponse, GeminiResponse
|
8
|
+
from src.chat2llms.base_client import BaseClient
|
9
|
+
|
10
|
+
def test_semantic_similarity():
|
11
|
+
gemini = BaseClient("gemini")
|
12
|
+
deepseek = BaseClient("deepseek")
|
13
|
+
question = "What is 2 + 2?"
|
14
|
+
gemini_response = GeminiResponse(gemini)
|
15
|
+
deepseek_response = OpenAIResponse(deepseek)
|
16
|
+
analyzer = AnswerAnalyzer(gemini_response, deepseek_response, question)
|
17
|
+
semantic_sim = analyzer.compute_semantic_similarity()
|
18
|
+
assert semantic_sim >= -1.0 # -1.0 if SpaCy is unavailable
|
19
|
+
|
20
|
+
# 使用示例
|
21
|
+
if __name__ == "__main__":
|
22
|
+
|
23
|
+
# 初始化客户端
|
24
|
+
gemini = BaseClient("gemini")
|
25
|
+
deepseek = BaseClient("deepseek")
|
26
|
+
|
27
|
+
# 获取响应
|
28
|
+
question = "What is 2 + 2?"
|
29
|
+
gemini_response = GeminiResponse(gemini)
|
30
|
+
deepseek_response = OpenAIResponse(deepseek)
|
31
|
+
analyzer = AnswerAnalyzer(gemini_response, deepseek_response, question)
|
32
|
+
|
33
|
+
print(f"Similarity: {analyzer.compute_similarity():.2f}")
|
34
|
+
print(f"semantic_sim: {analyzer.compute_semantic_similarity():.2f}")
|
35
|
+
print(analyzer.highlight_differences())
|