chat2llms 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ # LICENSE
2
+ MIT License
3
+
4
+ Copyright (c) 2025 Your Name
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.4
2
+ Name: chat2llms
3
+ Version: 0.1.0
4
+ Summary: A framework for comparing responses from different large language models.
5
+ Home-page: https://github.com/goldollarch/chat2llms
6
+ Author: scitao
7
+ Author-email: goldollarch@gmail.com
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.7
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Text Processing :: Linguistic
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.7
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: requests
26
+ Dynamic: author
27
+ Dynamic: author-email
28
+ Dynamic: classifier
29
+ Dynamic: description
30
+ Dynamic: description-content-type
31
+ Dynamic: home-page
32
+ Dynamic: license-file
33
+ Dynamic: requires-dist
34
+ Dynamic: requires-python
35
+ Dynamic: summary
36
+
37
+ # chat2llms
38
+
39
+ A Python library to compare responses from different large language models (e.g., DeepSeek, Gemini, OpenAI, Grok).
40
+
41
+ [![PyPI version](https://badge.fury.io/py/chat2llms.svg)](https://badge.fury.io/py/chat2llms)
42
+ [![Documentation Status](https://readthedocs.org/projects/chat2llms/badge/?version=latest)](https://chat2llms.readthedocs.io/en/latest/?badge=latest)
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ pip install chat2llms
48
+
@@ -0,0 +1,12 @@
1
+ # chat2llms
2
+
3
+ A Python library to compare responses from different large language models (e.g., DeepSeek, Gemini, OpenAI, Grok).
4
+
5
+ [![PyPI version](https://badge.fury.io/py/chat2llms.svg)](https://badge.fury.io/py/chat2llms)
6
+ [![Documentation Status](https://readthedocs.org/projects/chat2llms/badge/?version=latest)](https://chat2llms.readthedocs.io/en/latest/?badge=latest)
7
+
8
+ ## Installation
9
+
10
+ ```bash
11
+ pip install chat2llms
12
+
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,57 @@
1
+ import os
2
+ from setuptools import setup, find_packages
3
+
4
+ # Utility function to read the README file.
5
+ # Used for the long_description. It's nice, because now 1) we have a top level
6
+ # README file and 2) it's easier to type in the README file than to put a raw
7
+ # string in below ...
8
+ def read(fname):
9
+ return open(os.path.join(os.path.dirname(__file__), fname), encoding='utf-8').read()
10
+
11
+ setup(
12
+ name='chat2llms', # 包的名称
13
+ version='0.1.0', # 包的版本号 - 发布新版本时需要更新
14
+ author='scitao', # 作者姓名
15
+ author_email='goldollarch@gmail.com', # 作者邮箱
16
+ description='A framework for comparing responses from different large language models.', # 包的简短描述
17
+ long_description=read('README.md'), # 包的详细描述,从 README.md 读取
18
+ long_description_content_type='text/markdown', # 详细描述的格式
19
+ url='https://github.com/goldollarch/chat2llms', # 项目的主页或其他相关 URL
20
+ packages=find_packages(where='src'), # 查找 src 目录下的所有包
21
+ package_dir={'': 'src'}, # 指定包的根目录在 src 下
22
+ install_requires=[ # 项目运行时依赖的第三方库
23
+ 'requests',
24
+ # 根据您的项目实际需要添加其他库
25
+ # 例如,如果您需要特定的 LLM 客户端,可能需要它们的 SDK:
26
+ # 'openai>=1.0.0',
27
+ # 'google-generativeai>=0.5.0',
28
+ # 'deepseek>=1.0.0', # 假设有这样的库
29
+ ],
30
+ classifiers=[ # 包的分类信息,对用户搜索和理解包很有帮助
31
+ 'Development Status :: 3 - Alpha', # 开发状态
32
+ 'Intended Audience :: Developers', # 目标受众
33
+ 'License :: OSI Approved :: MIT License', # 许可证信息 (请根据您的实际许可证修改)
34
+ 'Programming Language :: Python :: 3',
35
+ 'Programming Language :: Python :: 3.7',
36
+ 'Programming Language :: Python :: 3.8',
37
+ 'Programming Language :: Python :: 3.9',
38
+ 'Programming Language :: Python :: 3.10',
39
+ 'Programming Language :: Python :: 3.11',
40
+ 'Programming Language :: Python :: 3.12',
41
+ 'Operating System :: OS Independent',
42
+ 'Topic :: Scientific/Engineering :: Artificial Intelligence',
43
+ 'Topic :: Text Processing :: Linguistic',
44
+ 'Topic :: Software Development :: Libraries :: Python Modules',
45
+ ],
46
+ python_requires='>=3.7', # 要求的最低 Python 版本
47
+ entry_points={ # 定义命令行入口点 (如果您有命令行工具)
48
+ 'console_scripts': [
49
+ 'chat2llms=chat2llms.cli.cli_click:main' # 示例,如果您的 CLI 入口在这里
50
+ ]
51
+ },
52
+ # 如果您的包包含非代码文件 (如数据文件),可能需要 package_data 或 include_package_data
53
+ # include_package_data=True,
54
+ # package_data={
55
+ # 'chat2llms': ['data/*.json'], # 示例:包含 chat2llms 包内的 data 目录下的 json 文件
56
+ # },
57
+ )
@@ -0,0 +1,7 @@
1
+ # src/chat2llms/__init__.py
2
+ __version__ = '0.1.0'
3
+
4
+ from .cli.cli_click import main
5
+ from .base_client import BaseClient, logger, load_config
6
+ from .model_response import ModelResponse, OpenAIResponse, GeminiResponse
7
+ from .analyzer import AnswerComparator, AnswerAnalyzer, MultiAnswerAnalyzer
@@ -0,0 +1,271 @@
1
+ import os
2
+ import re
3
+ import time
4
+ from dataclasses import dataclass
5
+ from typing import Dict, List, Optional
6
+
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+
10
+ import spacy
11
+ import difflib
12
+ from difflib import SequenceMatcher
13
+
14
+ from sklearn.metrics.pairwise import cosine_similarity
15
+ from sklearn.feature_extraction.text import TfidfVectorizer
16
+ # from sentence_transformers import SentenceTransformer, util
17
+
18
+ from .model_response import ModelResponse, OpenAIResponse, GeminiResponse
19
+
20
+ ##################################
21
+
22
+ @dataclass
23
+ class ModelAnswer:
24
+ """模型响应数据结构"""
25
+
26
+ model_name: str
27
+ answer: str # answer=response.text
28
+ latency: float # latency=time.time() - start
29
+ tokens: int # tokens=len(response.text.split())
30
+ error: Optional[str] = None
31
+
32
+ class AnswerComparator:
33
+ """Class to compare responses from LLMs for the same question.
34
+
35
+ Args:
36
+ list[ModelResponse]: The LLM responses.
37
+
38
+ """
39
+
40
+ def __init__(self, responses: List[ModelResponse], question: str = None):
41
+
42
+ self.model_names = [r.get_model_name() for r in responses]
43
+ self.responses = responses
44
+ self.question = question
45
+
46
+ self.vectorizer = TfidfVectorizer(stop_words="english")
47
+
48
+ # # 初始化模型(用于相似度计算)
49
+ # self.sim_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
50
+
51
+ def get_answers(self, question: str) -> List[ModelAnswer]:
52
+ """获取多模型响应"""
53
+ answers = [self.get_model_answer(question, r) for r in self.responses]
54
+ return answers
55
+
56
+ def get_model_answer(self, question: str, response: ModelResponse):
57
+ """获取模型响应"""
58
+ model_name = response.get_model_name()
59
+ try:
60
+ start = time.time()
61
+ answer = response.get_response(question)
62
+ return ModelAnswer(
63
+ model_name=model_name,
64
+ answer=answer,
65
+ latency=time.time() - start,
66
+ tokens=len(answer.split()),
67
+ )
68
+ except Exception as e:
69
+ return ModelAnswer(model_name, "", 0, 0, str(e))
70
+
71
+ def analyze_answers(self, responses: List[ModelAnswer]) -> Dict:
72
+ """执行对比分析"""
73
+ valid_responses = [r for r in responses if not r.error]
74
+
75
+ # 语义相似度分析
76
+ answers = [r.answer for r in valid_responses]
77
+ tfidf = self.vectorizer.fit_transform(answers)
78
+ similarity = cosine_similarity(tfidf)
79
+
80
+ # 关键差异提取
81
+ differences = self._extract_differences(answers)
82
+
83
+ return {
84
+ "similarity_matrix": similarity,
85
+ "performance": {
86
+ r.model_name: {"latency": r.latency, "tokens": r.tokens}
87
+ for r in valid_responses
88
+ },
89
+ "key_differences": differences,
90
+ "errors": [r for r in responses if r.error],
91
+ }
92
+
93
+ def _extract_differences(self, answers: List[str]) -> List[str]:
94
+ """提取关键差异点"""
95
+ noun_phrases = []
96
+ for ans in answers:
97
+ phrases = re.findall(
98
+ r"\b[A-Z][a-z]+\s[A-Z][a-z]+\b|\b\w+ion\b|\b\w+ment\b", ans
99
+ )
100
+ noun_phrases.extend(phrases)
101
+
102
+ unique_phrases = list(set(noun_phrases))
103
+ return [
104
+ phrase
105
+ for phrase in unique_phrases
106
+ if sum(1 for ans in answers if phrase in ans) < len(answers)
107
+ ][:5]
108
+
109
+ def generate_report(self, analysis: Dict, question: str) -> str:
110
+ """生成分析报告"""
111
+ report = [
112
+ f"# 问题分析报告\n**问题**: {question}\n",
113
+ "## 性能对比",
114
+ *self._format_performance(analysis["performance"]),
115
+ "\n## 语义相似度",
116
+ self._format_matrix(analysis["similarity_matrix"]),
117
+ "\n## 关键差异点",
118
+ *[f"- {diff}" for diff in analysis["key_differences"]],
119
+ "\n## 错误日志",
120
+ *self._format_errors(analysis["errors"]),
121
+ ]
122
+ return "\n".join(report)
123
+
124
+ def _format_performance(self, perf: Dict) -> List[str]:
125
+ return [
126
+ f"### {model}\n"
127
+ f"- 响应时间: {data['latency']:.2f}s\n"
128
+ f"- Token使用量: {data['tokens']}"
129
+ for model, data in perf.items()
130
+ ]
131
+
132
+ def _format_matrix(self, matrix: np.ndarray) -> str:
133
+ return (
134
+ "```\n" + "\n".join(["\t".join(map(str, row)) for row in matrix]) + "\n```"
135
+ )
136
+
137
+ def _format_errors(self, errors: List[ModelAnswer]) -> List[str]:
138
+ return [f"- {err.model_name}: {err.error}" for err in errors if err.error]
139
+
140
+
141
+ # #############################
142
+
143
+ class AnswerAnalyzer:
144
+ """Class to compare responses from two LLMs for the same prompt.
145
+
146
+ Args:
147
+ response1 (ModelResponse): The first LLM response.
148
+ response2 (ModelResponse): The second LLM response.
149
+
150
+ Attributes:
151
+ response1 (ModelResponse): The first response object.
152
+ response2 (ModelResponse): The second response object.
153
+ nlp (spacy.language.Language): SpaCy NLP model (optional).
154
+ """
155
+
156
+ def __init__(self, response1: ModelResponse, response2: ModelResponse, question: str = None):
157
+
158
+ self.question = question
159
+ self.response1 = response1
160
+ self.response2 = response2
161
+
162
+ self.nlp: Optional[spacy.language.Language] = None
163
+
164
+ try:
165
+ self.nlp = spacy.load("en_core_web_sm")
166
+ except ImportError:
167
+ print("SpaCy not installed or model not found. Install with: pip install spacy && python -m spacy download en_core_web_sm")
168
+
169
+ def compute_similarity(self) -> float:
170
+ """Compute similarity between two responses using SequenceMatcher.
171
+
172
+ Returns:
173
+ float: Similarity score etween 0.0 and 1.0.
174
+ """
175
+ question = self.question
176
+ text1 = self.response1.get_response(question)
177
+ text2 = self.response2.get_response(question)
178
+
179
+ matcher = SequenceMatcher(None, text1, text2)
180
+ return matcher.ratio()
181
+
182
+ def compute_semantic_similarity(self) -> float:
183
+ """Compute semantic similarity using Spacy.
184
+
185
+ Returns:
186
+ float: Semantic similarity score between 0.0 and 1.0, or -1.0 if SpaCy is unavailable.
187
+ """
188
+ if self.nlp is None:
189
+ return -1.0
190
+
191
+ question = self.question
192
+ text1 = self.response1.get_response(question)
193
+ text2 = self.response2.get_response(question)
194
+
195
+ doc1 = self.nlp(text1)
196
+ doc2 = self.nlp(text2)
197
+ return doc1.similarity(doc2)
198
+
199
+ def highlight_differences(self) -> str:
200
+ """Highlight differences between two responses.
201
+
202
+ Returns:
203
+ str: A formatted string showing both responses.
204
+ """
205
+
206
+ question = self.question
207
+ text1 = self.response1.get_response(question)
208
+ text2 = self.response2.get_response(question)
209
+
210
+ return f"Response 1 ({self.response1.get_model_name()}):\n{text1}\n\nResponse 2 ({self.response2.get_model_name()}):\n{text2}"
211
+
212
+ def save_comparison_to_csv(self, filename: str):
213
+ import csv
214
+ question = self.question
215
+ with open(filename, 'w', newline='') as f:
216
+ writer = csv.writer(f)
217
+ writer.writerow(['Prompt', 'Model1', 'Response1', 'Model2', 'Response2', 'Text Similarity', 'Semantic Similarity'])
218
+ writer.writerow([
219
+ question,
220
+ self.response1.get_model_name(),
221
+ self.response1.get_response(question),
222
+ self.response2.get_model_name(),
223
+ self.response2.get_response(question),
224
+ self.compute_similarity(),
225
+ self.compute_semantic_similarity()
226
+ ])
227
+
228
+ def plot_similarity(self):
229
+ similarities = [self.compute_similarity(), self.compute_semantic_similarity()]
230
+ labels = ['Text Similarity', 'Semantic Similarity']
231
+ plt.bar(labels, similarities)
232
+ plt.ylim(0, 1)
233
+ plt.title('Response Similarity Comparison')
234
+ plt.savefig('similarity_plot.png')
235
+ plt.show()
236
+
237
+ class MultiAnswerAnalyzer:
238
+ """Class to compare responses from LLMs for the same prompt.
239
+
240
+ Args:
241
+ list[ModelResponse]: The LLM responses.
242
+
243
+ Attributes:
244
+ list[ModelResponse]: many LLMs, not only two.
245
+
246
+ Methods:
247
+ compare_all(): compare responses.
248
+
249
+ """
250
+
251
+ def __init__(self, responses: list[ModelResponse], question: str = None):
252
+ """Initialize MultiAnswerAnalyzer.
253
+
254
+ Args:
255
+ list[ModelResponse]: The LLM responses.
256
+
257
+ Attributes:
258
+ list[ModelResponse]: many LLMs, not only two.
259
+
260
+ """
261
+
262
+ self.question = question
263
+ self.responses = responses
264
+
265
+
266
+ def compare_all(self):
267
+ for i in range(len(self.responses)):
268
+ for j in range(i + 1, len(self.responses)):
269
+ analyzer = AnswerAnalyzer(self.responses[i], self.responses[j], self.question)
270
+ print(f"Comparing {self.responses[i].get_model_name()} vs {self.responses[j].get_model_name()}")
271
+ print(analyzer.highlight_differences())
@@ -0,0 +1,84 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Dict, Tuple
3
+
4
+ import requests
5
+ import logging
6
+ import yaml
7
+ import os
8
+
9
+ # 配置日志记录
10
+ logging.basicConfig(
11
+ filename="chat_history.log",
12
+ level=logging.INFO,
13
+ format="%(asctime)s - %(levelname)s - %(message)s",
14
+ )
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # 加载配置文件
18
+ def load_config(config_path: str = "config.yaml") -> Dict:
19
+ """加载配置文件"""
20
+ try:
21
+ with open(config_path, "r", encoding="utf-8") as f:
22
+ return yaml.safe_load(f)
23
+ except (FileNotFoundError, yaml.YAMLError) as e:
24
+ logger.error(f"配置文件加载失败: {str(e)}")
25
+ raise
26
+
27
+ # == 基础类定义 ==
28
+ class BaseClient(ABC):
29
+ """Base class for large language model clients.
30
+
31
+ Args:
32
+ provider (str): The provider of the LLM model.
33
+ config (Dict): The model parameters of the LLM model.
34
+
35
+ Attributes:
36
+ provider (str): The provider.
37
+ config (Dict): Other parameters.
38
+ """
39
+
40
+ def __init__(self, provider: str, config: Dict = None):
41
+ """初始化客户端配置"""
42
+
43
+ providers = {"gemini", "openai", "grok", "deepseek"}
44
+
45
+ if provider not in providers:
46
+ print("Unknown provider, Using DeepSeek instead")
47
+ self.provider = "deepseek"
48
+ else:
49
+ self.provider = provider
50
+
51
+ config = load_config()
52
+ self.timeout = config[self.provider].get("timeout", 30)
53
+ self.max_retries = config[self.provider].get("max_retries", 3)
54
+ self.max_history = config[self.provider].get("max_history", 10) # 保留最近10轮对话
55
+
56
+ if self.provider == "openai":
57
+ self.api_key = config[self.provider].get("api_key", os.getenv("OPENAI_API_KEY"))
58
+ self.base_url = config[self.provider].get("base_url", "https://api.openai.com/v1")
59
+ self.model_name = config[self.provider].get("model", "gpt-4")
60
+
61
+ elif self.provider == "gemini":
62
+ self.api_key = config[self.provider].get("api_key", os.getenv("GEMINI_API_KEY"))
63
+ self.base_url = config[self.provider].get(
64
+ "base_url", "https://generativelanguage.googleapis.com/v1beta"
65
+ )
66
+ self.model_name = config[self.provider].get("model", "gemini-1.5-pro")
67
+
68
+ elif self.provider == "grok":
69
+ self.api_key = config[self.provider].get("api_key", os.getenv("XAI_API_KEY"))
70
+ self.base_url = config[self.provider].get("base_url", "https://api.x.ai/v1")
71
+ self.model_name = config[self.provider].get("model", "grok-2-latest")
72
+
73
+ else:
74
+ self.api_key = config[self.provider].get("api_key", os.getenv("DEEPSEEK_API_KEY"))
75
+ self.base_url = config[self.provider].get("base_url", "https://api.deepseek.com")
76
+ self.model_name = config[self.provider].get("model", "deepseek-chat")
77
+
78
+ self.headers = {
79
+ "Content-Type": "application/json",
80
+ "Authorization": f"Bearer {self.api_key}",
81
+ }
82
+
83
+ if self.provider == "gemini":
84
+ self.headers.pop("Authorization")
@@ -0,0 +1,295 @@
1
+ import os
2
+ import datetime
3
+ from markdown import markdown
4
+ from typing import List, Dict, Tuple
5
+ import requests
6
+ import json
7
+
8
+ from abc import ABC, abstractmethod
9
+
10
+ from .base_client import BaseClient, logger
11
+
12
+ import openai
13
+ import google.generativeai as genai
14
+
15
+ # === 响应处理类 ===
16
+ class ModelResponse(BaseClient):
17
+ """Class to handle responses from a specific LLM for a given prompt.
18
+
19
+ Args:
20
+ client (BaseClient): The LLM client instance.
21
+ prompt (str): The input prompt for the LLM.
22
+
23
+ Attributes:
24
+ client (BaseClient): The LLM client.
25
+ prompt (str): The input prompt.
26
+ """
27
+
28
+ def __init__(self, client: BaseClient, prompt: str = None):
29
+ self.client = client
30
+ self.prompt = prompt
31
+ self._response = None
32
+ # self.response = None
33
+
34
+ self.latency = 0 # latency=time.time() - start
35
+ self.tokens = 0 # tokens=len(response.text.split())
36
+ self.error = None
37
+
38
+ self.history: List[Dict] = [] # 存储完整对话历史
39
+ self.max_history = 20 # 最大保留历史轮次
40
+
41
+ """无历史响应"""
42
+
43
+ @abstractmethod
44
+ def call_api(self, prompt):
45
+ """调用模型API的抽象方法"""
46
+ raise NotImplementedError()
47
+
48
+ def get_model_name(self):
49
+ """获取模型响应"""
50
+ return self.client.model_name
51
+
52
+ def get_prompt_str(self):
53
+ """获取提示词"""
54
+ return self.prompt
55
+
56
+ def api_response(self, prompt: str):
57
+ """获取模型响应"""
58
+ if not self._response:
59
+ try:
60
+ self.prompt = prompt
61
+ self._response = self.call_api(prompt)
62
+ except Exception as e:
63
+ self._response = f"Error: {str(e)}"
64
+ return self._response
65
+
66
+ """历史响应"""
67
+ @abstractmethod
68
+ def get_response(
69
+ self,
70
+ prompt: str,
71
+ history: List[Dict] = None,
72
+ temperature: float = 0.7,
73
+ max_tokens: int = 4000,
74
+ ) -> str:
75
+ """同步生成回复"""
76
+ raise NotImplementedError()
77
+
78
+ @abstractmethod
79
+ def get_response_history(
80
+ self, prompt: str, history: List[Dict] = None
81
+ ) -> Tuple[str, List[Dict]]:
82
+ """发送消息并返回响应和更新后的历史"""
83
+ pass
84
+
85
+ def _build_messages(self, prompt: str, history: List[Dict]) -> List[Dict]:
86
+ """构造对话历史(可被子类重写)"""
87
+ messages = history.copy()
88
+ messages.append({"role": "user", "content": prompt})
89
+ return messages
90
+
91
+
92
+ class OpenAIResponse(ModelResponse):
93
+ """OpenAI API 客户端实现"""
94
+
95
+ def call_api(self, prompt):
96
+ client = openai.OpenAI(
97
+ api_key=self.client.api_key, base_url=self.client.base_url
98
+ )
99
+ response = client.chat.completions.create(
100
+ model=self.client.model_name, messages=[{"role": "user", "content": prompt}]
101
+ )
102
+ return response.choices[0].message.content
103
+
104
+ """直接调用 OpenAI API 底层接口"""
105
+
106
+ def get_response(self, prompt, history=None, temperature=0.7, max_tokens=4000):
107
+ if not self._response:
108
+ try:
109
+ self.prompt = prompt
110
+ messages = self._build_messages(prompt, history or [])
111
+ response = requests.post(
112
+ f"{self.client.base_url}/chat/completions",
113
+ headers=self.client.headers,
114
+ json={
115
+ "model": self.client.model_name,
116
+ "messages": messages,
117
+ "temperature": temperature,
118
+ "max_tokens": max_tokens,
119
+ },
120
+ )
121
+ result = response.json()
122
+ self._response = result["choices"][0]["message"]["content"]
123
+ except Exception as e:
124
+ self._response = f"Error: {str(e)}"
125
+ return self._response
126
+
127
+ def get_response_history(
128
+ self, message: str, history: List[Dict] = []
129
+ ) -> Tuple[str, List[Dict]]:
130
+
131
+ new_history = self._update_history(history, message, role="user")
132
+
133
+ headers = {
134
+ "Content-Type": "application/json",
135
+ "Authorization": f"Bearer {self.client.api_key}",
136
+ }
137
+
138
+ payload = {
139
+ "model": self.client.model_name,
140
+ "messages": new_history[-self.client.max_history * 2 :], # 保留最近的对话
141
+ }
142
+
143
+ for attempt in range(self.client.max_retries):
144
+ try:
145
+ response = requests.post(
146
+ f"{self.client.base_url}/chat/completions",
147
+ headers=headers,
148
+ json=payload,
149
+ timeout=self.client.timeout,
150
+ )
151
+ response.raise_for_status()
152
+ response_data = response.json()
153
+ assistant_message = response_data["choices"][0]["message"][
154
+ "content"
155
+ ].strip()
156
+
157
+ updated_history = self._update_history(
158
+ new_history, assistant_message, role="assistant"
159
+ )
160
+ return (
161
+ assistant_message,
162
+ updated_history[-self.client.max_history * 2 :],
163
+ )
164
+
165
+ except requests.exceptions.RequestException as e:
166
+ logger.warning(
167
+ f"API请求失败,尝试 {attempt + 1}/{self.client.max_retries}: {str(e)}"
168
+ )
169
+ if attempt == self.client.max_retries - 1:
170
+ raise RuntimeError("API请求超过最大重试次数") from e
171
+
172
+ except (KeyError, IndexError, TypeError) as e:
173
+ logger.error(f"响应解析错误: {str(e)}")
174
+ raise RuntimeError("服务器返回了无效的响应格式") from e
175
+
176
+ return "", history
177
+
178
+ def _build_messages(self, prompt: str, history: List[Dict]) -> List[Dict]:
179
+ return [*history, {"role": "user", "content": prompt}]
180
+
181
+ def _update_history(
182
+ self, history: List[Dict], content: str, role: str
183
+ ) -> List[Dict]:
184
+ """更新对话历史"""
185
+ return history + [{"role": role, "content": content}]
186
+
187
+
188
+ class GeminiResponse(ModelResponse):
189
+
190
+ def init_google_genai(self):
191
+ genai.configure(api_key=self.client.api_key)
192
+ self.model = genai.GenerativeModel(self.client.model_name)
193
+
194
+ """Gemini API 客户端实现"""
195
+
196
+ def call_api(self, prompt):
197
+ self.init_google_genai()
198
+ response = self.model.generate_content(prompt)
199
+ return response.text
200
+
201
+ # 使用 request 直接调用 Gemini API 底层接口。
202
+ # 底层接口:​由 Google 提供的 HTTP REST 或 gRPC 端点,如
203
+ # https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent
204
+ # 开发者可以直接通过 HTTP 请求调用,无需依赖特定语言的库。适合需要灵活性的高级用户或跨语言集成。
205
+
206
+ def get_response(self, prompt, history=None, temperature=0.7, max_tokens=4000):
207
+ if not self._response:
208
+ try:
209
+ self.prompt = prompt
210
+ messages = self._build_messages(prompt, history or [])
211
+ response = requests.post(
212
+ f"{self.client.base_url}/models/{self.client.model_name}:generateContent?key={self.client.api_key}",
213
+ headers=self.client.headers,
214
+ json={
215
+ "contents": messages,
216
+ "generationConfig": {
217
+ "temperature": temperature,
218
+ "maxOutputTokens": max_tokens,
219
+ },
220
+ },
221
+ )
222
+ result = response.json()
223
+ self._response = result["candidates"][0]["content"]["parts"][0]["text"]
224
+ except Exception as e:
225
+ self._response = f"Error: {str(e)}"
226
+ return self._response
227
+
228
+ def get_response_history(self, prompt: str, history=None) -> Tuple[str, List[Dict]]:
229
+ """执行带历史管理的对话"""
230
+ try:
231
+ # 构造请求头
232
+ headers = {
233
+ "Content-Type": "application/json",
234
+ "x-goog-api-key": self.client.api_key,
235
+ }
236
+
237
+ # 添加当前对话到历史
238
+ self.history.append({"type": "chat", "role": "user", "content": prompt})
239
+
240
+ # 发送API请求
241
+ response = requests.post(
242
+ f"{self.client.base_url}/models/{self.client.model_name}:generateContent",
243
+ headers=headers,
244
+ json=self._build_payload(prompt),
245
+ )
246
+ response.raise_for_status()
247
+
248
+ # 解析响应
249
+ ai_response = self._parse_response(response.json())
250
+
251
+ # 记录AI响应历史
252
+ self.history.append(
253
+ {"type": "chat", "role": "model", "content": ai_response}
254
+ )
255
+
256
+ return ai_response, self.history
257
+
258
+ except requests.exceptions.RequestException as e:
259
+ # 网络错误处理
260
+ self.history.append({"type": "system", "content": f"网络错误: {str(e)}"})
261
+ return f"API请求失败: {str(e)}"
262
+
263
+ except json.JSONDecodeError:
264
+ # JSON解析错误
265
+ return "响应解析失败"
266
+
267
+ def _build_messages(self, prompt, history):
268
+ # Gemini的messages结构不同
269
+ return [
270
+ {"parts": [{"text": msg["content"]}], "role": msg["role"].upper()}
271
+ for msg in super()._build_messages(prompt, history)
272
+ ]
273
+
274
+ def _build_payload(self, prompt: str) -> Dict:
275
+ """构造包含聊天历史的请求体"""
276
+ parts = [{"text": prompt}]
277
+
278
+ # 添加历史上下文(排除系统指令)
279
+ context = [
280
+ {"role": entry["role"], "parts": [{"text": entry["content"]}]}
281
+ for entry in self.history[-self.max_history :]
282
+ if entry["type"] == "chat"
283
+ ]
284
+
285
+ return {
286
+ "contents": context + [{"role": "user", "parts": parts}],
287
+ "generationConfig": {"temperature": 0.9, "maxOutputTokens": 4000},
288
+ }
289
+
290
+ def _parse_response(self, response: Dict) -> str:
291
+ """解析API响应并提取文本内容"""
292
+ try:
293
+ return response["candidates"][0]["content"]["parts"][0]["text"]
294
+ except (KeyError, IndexError) as e:
295
+ raise ValueError(f"无效的API响应结构: {str(e)}")
@@ -0,0 +1,239 @@
1
+ import datetime
2
+ from typing import Dict, List
3
+
4
+ from markdown import markdown
5
+
6
+ import os
7
+ import subprocess
8
+ import re
9
+
10
+ # from difflib import Differ
11
+
12
+ SAVE_PATH = "docs/refers/Chats" # 保存目录
13
+
14
+ class WriteMarkDown:
15
+ def __init__(self, filename: str):
16
+
17
+ self._init_md_structure(filename)
18
+
19
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
20
+ output_path = f"{SAVE_PATH}/{filename}_{timestamp}.md"
21
+ self.output_path = output_path
22
+
23
+ def _init_md_structure(self, filename: str):
24
+ """初始化Markdown文档结构"""
25
+ self.md_content = [
26
+ f"# {filename}\n",
27
+ f"**生成时间**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n",
28
+ ]
29
+
30
+ def add_comparison_table(
31
+ self, metrics: Dict[str, List[float]], model_names: List[str]
32
+ ):
33
+ """
34
+ 添加指标对比表格
35
+ :param metrics: 指标字典 {指标名称: [模型1值, 模型2值...]}
36
+ :param model_names: 模型名称列表
37
+ """
38
+ # 生成表头
39
+ header = "| 指标 " + " ".join([f"| {name} " for name in model_names]) + "|"
40
+ separator = "|-----" + " ".join(["|----" for _ in model_names]) + "|"
41
+
42
+ # 生成数据行
43
+ rows = []
44
+ for metric, values in metrics.items():
45
+ row = f"| ​**{metric}** " + " ".join([f"| {v} " for v in values]) + "|"
46
+ rows.append(row)
47
+
48
+ # 组装表格
49
+ table = [header, separator] + rows
50
+ self.md_content.extend(["\n## 性能对比\n"] + table)
51
+
52
+ def add_conclusion(self, conclusions: List[str], highlight: str = None):
53
+ """
54
+ 添加分析结论
55
+ :param conclusions: 结论条目列表
56
+ :param highlight: 需要高亮显示的关键结论
57
+ """
58
+ conclusion_section = ["\n## 最终结论\n"]
59
+ for item in conclusions:
60
+ conclusion_section.append(f"- {item}")
61
+
62
+ if highlight:
63
+ conclusion_section.append(f"\n**关键结论**: {highlight}")
64
+
65
+ self.md_content.extend(conclusion_section)
66
+
67
+ def save(self, mode: str = "w"):
68
+ """保存Markdown文件"""
69
+ with open(self.output_path, mode, encoding="utf-8") as f:
70
+ f.write("\n".join(self.md_content))
71
+
72
+ def add_section(self, title: str, content: str, level: int = 2):
73
+ """添加自定义章节"""
74
+ prefix = "#" * level
75
+ self.md_content.append(f"\n{prefix} {title}\n{content}")
76
+
77
+ def add_plot(self, plot_path: str, caption: str = ""):
78
+ """插入本地图片"""
79
+ self.md_content.append(f"\n![{caption}]({plot_path})")
80
+
81
+ def add_diff_analysis(self, text_a: str, text_b: str):
82
+ """文本差异对比"""
83
+ differ = Differ()
84
+ diff = list(differ.compare(text_a.split(), text_b.split()))
85
+ self.add_section("响应差异分析", "```diff\n" + "\n".join(diff) + "\n```")
86
+
87
+ def add_response(self, question: str, model: str, answer: str):
88
+ """保存对话记录到Markdown文件"""
89
+
90
+ self.add_section("用户提问", question)
91
+ self.add_section(f"{model}模型回复", answer)
92
+
93
+
94
+ def convert_with_pandoc(input_path, output_path):
95
+ try:
96
+ subprocess.run(
97
+ [
98
+ "pandoc",
99
+ "-s",
100
+ input_path,
101
+ "-t",
102
+ "markdown+hard_line_breaks+pipe_tables",
103
+ "-o",
104
+ output_path,
105
+ "--extract-media=./images",
106
+ ],
107
+ check=True,
108
+ )
109
+ return True
110
+ except subprocess.CalledProcessError as e:
111
+ print(f"转换错误: {str(e)}")
112
+ return False
113
+
114
+
115
+ def docx_to_sphinx_rst(docx_path, rst_path):
116
+ # Step 1: 使用 Pandoc 转换基础格式
117
+ subprocess.run(
118
+ [
119
+ "pandoc",
120
+ "--extract-media=./_static",
121
+ "--shift-heading-level-by=1",
122
+ "-t",
123
+ "rst+auto_identifiers",
124
+ "-o",
125
+ rst_path,
126
+ docx_path,
127
+ ],
128
+ check=True,
129
+ )
130
+
131
+ # Step 2: 后处理优化
132
+ with open(rst_path, "r+", encoding="utf-8") as f:
133
+ content = f.read()
134
+
135
+ # 修复表格对齐问题
136
+ content = re.sub(r"(\+-+)+\+", lambda m: m.group().replace("-", "="), content)
137
+
138
+ # 转换 Word 注释为 Sphinx 警告框
139
+ content = re.sub(r"^\.\.\scomment::", ".. warning::", content, flags=re.M)
140
+
141
+ # 添加 docutils 特殊指令支持
142
+ if ".. code-block::" in content:
143
+ content = ".. highlight:: python\n\n" + content
144
+
145
+ f.seek(0)
146
+ f.write(content)
147
+ f.truncate()
148
+
149
+
150
+ def md_to_rst(md_path, rst_path):
151
+ # Step 1: 使用 Pandoc 转换
152
+ subprocess.run(
153
+ [
154
+ "pandoc",
155
+ "-f",
156
+ "markdown+yaml_metadata_block",
157
+ "-t",
158
+ "rst",
159
+ "-o",
160
+ rst_path,
161
+ md_path,
162
+ ],
163
+ check=True,
164
+ )
165
+
166
+ # Step 2: 格式修正
167
+ with open(rst_path, "r+", encoding="utf-8") as f:
168
+ content = f.read()
169
+
170
+ # 修复代码块语法
171
+ content = re.sub(r"^\.\. code::\s*$", ".. code-block::", content, flags=re.M)
172
+
173
+ # 转换表格格式
174
+ content = re.sub(r"(\+-+)+\+", lambda m: m.group().replace("-", "="), content)
175
+
176
+ # 修正图片路径
177
+ content = re.sub(r"!$$(.*?)$$$(.*?)$", r".. image:: \2\n :alt: \1", content)
178
+
179
+ f.seek(0)
180
+ f.write(content)
181
+ f.truncate()
182
+
183
+
184
+ # 使用示例
185
+
186
+ if __name__ == "__main__":
187
+ # # 初始化分析器
188
+ # analyzer = WriteMarkDown("comparison")
189
+
190
+ # # 添加性能对比表格
191
+ # metrics = {
192
+ # "准确率(%)": [92.3, 89.7],
193
+ # "推理速度(ms)": [15.2, 28.4],
194
+ # "内存占用(GB)": [1.2, 0.8],
195
+ # "F1 Score": [0.88, 0.85]
196
+ # }
197
+ # analyzer.add_comparison_table(metrics, ["GPT-4", "DeepSeek-R1"])
198
+
199
+ # # 添加结论
200
+ # conclusions = [
201
+ # "GPT-4在精度指标上全面领先",
202
+ # "DeepSeek-R1在内存效率方面表现优异",
203
+ # "两者响应速度均满足实时需求"
204
+ # ]
205
+ # analyzer.add_conclusion(conclusions,
206
+ # highlight="推荐高精度场景使用GPT-4,资源受限环境选择DeepSeek-R1")
207
+
208
+ # # 添加自定义分析模块
209
+ # analyzer.add_section(
210
+ # "错误案例分析",
211
+ # "```python\n# 典型错误示例\nprint(f'1 + 1 = {1+1=}') # DeepSeek-R1输出格式问题\n```"
212
+ # )
213
+
214
+ # text_a = "GPT-4在精度指标上全面领先"
215
+ # text_b = "DeepSeek-R1在内存指标方面表现优异"
216
+ # analyzer.add_diff_analysis(text_a, text_b)
217
+
218
+ # caption = "cenjoy"
219
+ # plot_path = "docs/source/_static/cenjoy.png"
220
+ # analyzer.add_plot(plot_path, caption)
221
+
222
+ # # 生成最终报告
223
+ # analyzer.save()
224
+
225
+ # # 文档转换使用示例
226
+ input_path = "xcenjoy.docx"
227
+ output_path = "xcenjoy.md"
228
+ convert_with_pandoc(input_path, output_path)
229
+
230
+ # input_path = "Letter.docx"
231
+ # output_path = "Letter.rst"
232
+ # docx_to_sphinx_rst(input_path, output_path)
233
+
234
+ # input_path = "Letter_cn.md"
235
+ # output_path = "Letter_cn.rst"
236
+ # md_to_rst(input_path, output_path)
237
+
238
+ # # rstfromdocx
239
+ # os.system("rstfromdocx -lurg 项目概要.docx")
File without changes
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.4
2
+ Name: chat2llms
3
+ Version: 0.1.0
4
+ Summary: A framework for comparing responses from different large language models.
5
+ Home-page: https://github.com/goldollarch/chat2llms
6
+ Author: scitao
7
+ Author-email: goldollarch@gmail.com
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.7
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Text Processing :: Linguistic
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.7
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: requests
26
+ Dynamic: author
27
+ Dynamic: author-email
28
+ Dynamic: classifier
29
+ Dynamic: description
30
+ Dynamic: description-content-type
31
+ Dynamic: home-page
32
+ Dynamic: license-file
33
+ Dynamic: requires-dist
34
+ Dynamic: requires-python
35
+ Dynamic: summary
36
+
37
+ # chat2llms
38
+
39
+ A Python library to compare responses from different large language models (e.g., DeepSeek, Gemini, OpenAI, Grok).
40
+
41
+ [![PyPI version](https://badge.fury.io/py/chat2llms.svg)](https://badge.fury.io/py/chat2llms)
42
+ [![Documentation Status](https://readthedocs.org/projects/chat2llms/badge/?version=latest)](https://chat2llms.readthedocs.io/en/latest/?badge=latest)
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ pip install chat2llms
48
+
@@ -0,0 +1,16 @@
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ src/chat2llms/__init__.py
5
+ src/chat2llms/analyzer.py
6
+ src/chat2llms/base_client.py
7
+ src/chat2llms/model_response.py
8
+ src/chat2llms/output_files.py
9
+ src/chat2llms/utils.py
10
+ src/chat2llms.egg-info/PKG-INFO
11
+ src/chat2llms.egg-info/SOURCES.txt
12
+ src/chat2llms.egg-info/dependency_links.txt
13
+ src/chat2llms.egg-info/entry_points.txt
14
+ src/chat2llms.egg-info/requires.txt
15
+ src/chat2llms.egg-info/top_level.txt
16
+ tests/test_analyzer.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ chat2llms = chat2llms.cli.cli_click:main
@@ -0,0 +1 @@
1
+ requests
@@ -0,0 +1 @@
1
+ chat2llms
@@ -0,0 +1,35 @@
1
+ import pytest
2
+
3
+ import os, sys
4
+ sys.path.append(os.path.join(os.path.dirname(__file__),'..') )
5
+
6
+ from src.chat2llms.analyzer import AnswerAnalyzer
7
+ from src.chat2llms.model_response import OpenAIResponse, GeminiResponse
8
+ from src.chat2llms.base_client import BaseClient
9
+
10
+ def test_semantic_similarity():
11
+ gemini = BaseClient("gemini")
12
+ deepseek = BaseClient("deepseek")
13
+ question = "What is 2 + 2?"
14
+ gemini_response = GeminiResponse(gemini)
15
+ deepseek_response = OpenAIResponse(deepseek)
16
+ analyzer = AnswerAnalyzer(gemini_response, deepseek_response, question)
17
+ semantic_sim = analyzer.compute_semantic_similarity()
18
+ assert semantic_sim >= -1.0 # -1.0 if SpaCy is unavailable
19
+
20
+ # 使用示例
21
+ if __name__ == "__main__":
22
+
23
+ # 初始化客户端
24
+ gemini = BaseClient("gemini")
25
+ deepseek = BaseClient("deepseek")
26
+
27
+ # 获取响应
28
+ question = "What is 2 + 2?"
29
+ gemini_response = GeminiResponse(gemini)
30
+ deepseek_response = OpenAIResponse(deepseek)
31
+ analyzer = AnswerAnalyzer(gemini_response, deepseek_response, question)
32
+
33
+ print(f"Similarity: {analyzer.compute_similarity():.2f}")
34
+ print(f"semantic_sim: {analyzer.compute_semantic_similarity():.2f}")
35
+ print(analyzer.highlight_differences())