auto-coder 0.1.205__py3-none-any.whl → 0.1.206__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

autocoder/benchmark.py ADDED
@@ -0,0 +1,135 @@
1
+ from openai import AsyncOpenAI
2
+ import asyncio
3
+ import time
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+ import numpy as np
7
+ import ray
8
+ from loguru import logger
9
+ import byzerllm
10
+ from concurrent.futures import ThreadPoolExecutor
11
+
12
+ async def benchmark_openai(model: str, parallel: int, api_key: str, base_url: str = None, rounds: int = 1):
13
+ client = AsyncOpenAI(api_key=api_key, base_url=base_url if base_url else None)
14
+ start_time = time.time()
15
+
16
+ async def single_request():
17
+ try:
18
+ t1 = time.time()
19
+ response = await client.chat.completions.create(
20
+ model=model,
21
+ messages=[{"role": "user", "content": "Hello, how are you?"}]
22
+ )
23
+ t2 = time.time()
24
+ return t2 - t1
25
+ except Exception as e:
26
+ logger.error(f"Request failed: {e}")
27
+ return None
28
+
29
+ all_results = []
30
+ for round_num in range(rounds):
31
+ print(f"Running round {round_num + 1}/{rounds}")
32
+ tasks = [single_request() for _ in range(parallel)]
33
+ results = await asyncio.gather(*tasks)
34
+ all_results.extend(results)
35
+
36
+ results = all_results
37
+
38
+ # Filter out None values from failed requests
39
+ results = [r for r in results if r is not None]
40
+
41
+ end_time = time.time()
42
+ total_time = end_time - start_time
43
+
44
+ if not results:
45
+ print("All requests failed")
46
+ return
47
+
48
+ # Calculate statistics
49
+ avg_time = np.mean(results)
50
+ p50 = np.percentile(results, 50)
51
+ p90 = np.percentile(results, 90)
52
+ p95 = np.percentile(results, 95)
53
+ p99 = np.percentile(results, 99)
54
+
55
+ # Create rich table for output
56
+ console = Console()
57
+ table = Table(title=f"OpenAI Client Benchmark Results (Parallel={parallel})")
58
+
59
+ table.add_column("Metric", style="cyan")
60
+ table.add_column("Value (seconds)", style="magenta")
61
+
62
+ table.add_row("Total Time", f"{total_time:.2f}")
63
+ table.add_row("Average Response Time", f"{avg_time:.2f}")
64
+ table.add_row("Median (P50)", f"{p50:.2f}")
65
+ table.add_row("P90", f"{p90:.2f}")
66
+ table.add_row("P95", f"{p95:.2f}")
67
+ table.add_row("P99", f"{p99:.2f}")
68
+ table.add_row("Requests/Second", f"{parallel/total_time:.2f}")
69
+
70
+ console.print(table)
71
+
72
+ def benchmark_byzerllm(model: str, parallel: int, rounds: int = 1):
73
+ byzerllm.connect_cluster(address="auto")
74
+ llm = byzerllm.ByzerLLM()
75
+ llm.setup_default_model_name(model)
76
+
77
+ def single_request(llm):
78
+ try:
79
+ t1 = time.time()
80
+ llm.chat_oai(conversations=[{
81
+ "role": "user",
82
+ "content": "Hello, how are you?"
83
+ }])
84
+ t2 = time.time()
85
+ return t2 - t1
86
+ except Exception as e:
87
+ logger.error(f"Request failed: {e}")
88
+ return None
89
+
90
+ start_time = time.time()
91
+ all_results = []
92
+ for round_num in range(rounds):
93
+ print(f"Running round {round_num + 1}/{rounds}")
94
+ with ThreadPoolExecutor(max_workers=parallel) as executor:
95
+ # submit tasks to the executor
96
+ futures = [executor.submit(single_request, llm) for _ in range(parallel)]
97
+ # get results from futures
98
+ results = [future.result() for future in futures]
99
+ all_results.extend(results)
100
+
101
+ results = all_results
102
+
103
+ # Filter out None values from failed requests
104
+ results = [r for r in results if r is not None]
105
+
106
+ end_time = time.time()
107
+ total_time = end_time - start_time
108
+
109
+ if not results:
110
+ print("All requests failed")
111
+ return
112
+
113
+ # Calculate statistics
114
+ avg_time = np.mean(results)
115
+ p50 = np.percentile(results, 50)
116
+ p90 = np.percentile(results, 90)
117
+ p95 = np.percentile(results, 95)
118
+ p99 = np.percentile(results, 99)
119
+
120
+ # Create rich table for output
121
+ console = Console()
122
+ table = Table(title=f"ByzerLLM Client Benchmark Results (Parallel={parallel})")
123
+
124
+ table.add_column("Metric", style="cyan")
125
+ table.add_column("Value (seconds)", style="magenta")
126
+
127
+ table.add_row("Total Time", f"{total_time:.2f}")
128
+ table.add_row("Average Response Time", f"{avg_time:.2f}")
129
+ table.add_row("Median (P50)", f"{p50:.2f}")
130
+ table.add_row("P90", f"{p90:.2f}")
131
+ table.add_row("P95", f"{p95:.2f}")
132
+ table.add_row("P99", f"{p99:.2f}")
133
+ table.add_row("Requests/Second", f"{parallel/total_time:.2f}")
134
+
135
+ console.print(table)
@@ -58,6 +58,7 @@ from prompt_toolkit.patch_stdout import patch_stdout
58
58
  import byzerllm
59
59
  from byzerllm.utils import format_str_jinja2
60
60
  from autocoder.chat_auto_coder_lang import get_message
61
+ from autocoder.utils import operate_config_api
61
62
 
62
63
 
63
64
  class SymbolItem(BaseModel):
@@ -0,0 +1,91 @@
1
+ from typing import List, Optional
2
+ import byzerllm
3
+ from loguru import logger
4
+ import json
5
+ from byzerllm.utils.client.code_utils import extract_code
6
+
7
+ ## This function is generated by auto-coder.chat in Korea
8
+ def validate_chunk(llm: byzerllm.ByzerLLM, content: Optional[List[str]] = None, query: Optional[str] = None) -> str:
9
+ """
10
+ 验证文本分块模型的效果
11
+
12
+ Args:
13
+ llm: ByzerLLM实例
14
+ content: 待验证的内容列表
15
+ query: 相关问题
16
+
17
+ Returns:
18
+ 验证结果
19
+ """
20
+ if content is None:
21
+ content = [
22
+ """
23
+ class TokenLimiter:
24
+ def __init__(
25
+ self,
26
+ count_tokens: Callable[[str], int],
27
+ full_text_limit: int,
28
+ segment_limit: int,
29
+ buff_limit: int,
30
+ llm:ByzerLLM,
31
+ disable_segment_reorder: bool,
32
+ ):
33
+ self.count_tokens = count_tokens
34
+ self.full_text_limit = full_text_limit
35
+ self.segment_limit = segment_limit
36
+ self.buff_limit = buff_limit
37
+ self.llm = llm
38
+
39
+ def limit_tokens(self, relevant_docs: List[SourceCode]):
40
+ pass
41
+ """
42
+ ]
43
+
44
+ if query is None:
45
+ query = "What are the main methods in TokenLimiter class?"
46
+
47
+ try:
48
+ from autocoder.rag.token_limiter import TokenLimiter
49
+ def count_tokens(text:str):
50
+ return 0
51
+ token_limiter = TokenLimiter(
52
+ llm=llm,
53
+ count_tokens=count_tokens,
54
+ full_text_limit=1000,
55
+ segment_limit=1000,
56
+ buff_limit=1000,
57
+ disable_segment_reorder=False
58
+ )
59
+ conversations = [
60
+ {"role": "user", "content": query}
61
+ ]
62
+ result = token_limiter.extract_relevance_range_from_docs_with_conversation.with_llm(llm).run(conversations, content)
63
+
64
+ # 结果验证和解析
65
+ validation_result = []
66
+ for doc_idx, doc in enumerate(content):
67
+ doc_lines = doc.split('\n')
68
+ source_code_with_line_number = ""
69
+ for idx, line in enumerate(doc_lines):
70
+ source_code_with_line_number += f"{idx+1} {line}\n"
71
+
72
+ json_str = extract_code(result)[0][1]
73
+ json_objs = json.loads(json_str)
74
+
75
+ for json_obj in json_objs:
76
+ start_line = json_obj["start_line"] - 1
77
+ end_line = json_obj["end_line"]
78
+ if start_line >= 0 and end_line > start_line and end_line <= len(doc_lines):
79
+ chunk = "\n".join(doc_lines[start_line:end_line])
80
+ validation_result.append(
81
+ f"Document {doc_idx + 1} - Extracted Range (lines {json_obj['start_line']}-{json_obj['end_line']}):\n{chunk}"
82
+ )
83
+
84
+ if not validation_result:
85
+ return "No valid ranges extracted from the documents."
86
+
87
+ return "\n\n".join(validation_result)
88
+
89
+ except Exception as e:
90
+ logger.error(f"Error validating chunk model: {str(e)}")
91
+ return f"Error: {str(e)}"
@@ -0,0 +1,58 @@
1
+ from typing import List, Optional
2
+ import byzerllm
3
+ from loguru import logger
4
+ from autocoder.rag.doc_filter import _check_relevance_with_conversation
5
+ from autocoder.rag.relevant_utils import parse_relevance
6
+
7
+ def validate_recall(llm: byzerllm.ByzerLLM, content: Optional[List[str]] = None, query: Optional[str] = None) -> bool:
8
+ """
9
+ 验证召回模型的效果
10
+
11
+ Args:
12
+ llm: ByzerLLM实例
13
+ content: 待验证的内容列表
14
+ query: 查询语句
15
+
16
+ Returns:
17
+ 验证成功返回True,失败返回False
18
+ """
19
+ if content is None:
20
+ content = [
21
+ """
22
+ # ByzerLLM API Guide
23
+
24
+ ByzerLLM provides a simple API for interacting with language models.
25
+ Here's how to use it:
26
+
27
+ 1. Initialize the client
28
+ 2. Send requests
29
+ 3. Process responses
30
+
31
+ Example:
32
+ ```python
33
+ import byzerllm
34
+ llm = byzerllm.ByzerLLM()
35
+ response = llm.chat(prompt="Hello")
36
+ ```
37
+ """
38
+ ]
39
+
40
+ if query is None:
41
+ query = "How do I use the ByzerLLM API?"
42
+
43
+ conversations = [
44
+ {"role": "user", "content": query}
45
+ ]
46
+
47
+ try:
48
+ relevance_str = _check_relevance_with_conversation.with_llm(llm).run(conversations, content)
49
+ relevance = parse_relevance(relevance_str)
50
+
51
+ if relevance is None:
52
+ logger.error("Failed to parse relevance result")
53
+ return False
54
+
55
+ return relevance.is_relevant
56
+ except Exception as e:
57
+ logger.error(f"Error validating recall: {str(e)}")
58
+ return False