auto-coder 0.1.208__py3-none-any.whl → 0.1.211__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -5,6 +5,7 @@ from autocoder.common import (
5
5
  split_code_into_segments,
6
6
  SourceCode,
7
7
  )
8
+ from autocoder.common.buildin_tokenizer import BuildinTokenizer
8
9
  from autocoder.pyproject import PyProject, Level1PyProject
9
10
  from autocoder.tsproject import TSProject
10
11
  from autocoder.suffixproject import SuffixProject
@@ -23,9 +24,19 @@ import os
23
24
  from autocoder.common.image_to_page import ImageToPage, ImageToPageDirectly
24
25
  from autocoder.utils.conversation_store import store_code_model_conversation
25
26
  from loguru import logger
27
+ import time
26
28
 
27
29
 
28
- class ActionTSProject:
30
+ class BaseAction:
31
+ def _get_content_length(self, content: str) -> int:
32
+ try:
33
+ tokenizer = BuildinTokenizer()
34
+ return tokenizer.count_tokens(content)
35
+ except Exception as e:
36
+ logger.warning(f"Failed to use tokenizer to count tokens, fallback to len(): {e}")
37
+ return len(content)
38
+
39
+ class ActionTSProject(BaseAction):
29
40
  def __init__(
30
41
  self, args: AutoCoderArgs, llm: Optional[byzerllm.ByzerLLM] = None
31
42
  ) -> None:
@@ -74,14 +85,15 @@ class ActionTSProject:
74
85
  args = self.args
75
86
 
76
87
  if args.execute and self.llm and not args.human_as_model:
77
- if len(content) > self.args.model_max_input_length:
88
+ content_length = self._get_content_length(content)
89
+ if content_length > self.args.model_max_input_length:
78
90
  logger.warning(
79
- f"Content length is {len(content)}, which is larger than the maximum input length {self.args.model_max_input_length}. chunk it..."
80
- )
81
- content = content[: self.args.model_max_input_length]
91
+ f"Content(send to model) is {content_length} tokens, which is larger than the maximum input length {self.args.model_max_input_length}"
92
+ )
82
93
 
83
- if args.execute:
94
+ if args.execute:
84
95
  logger.info("Auto generate the code...")
96
+ start_time = time.time()
85
97
  if args.auto_merge == "diff":
86
98
  generate = CodeAutoGenerateDiff(
87
99
  llm=self.llm, args=self.args, action=self
@@ -104,6 +116,7 @@ class ActionTSProject:
104
116
  generate_result = generate.single_round_run(
105
117
  query=args.query, source_content=content
106
118
  )
119
+ logger.info(f"Code generation completed in {time.time() - start_time:.2f} seconds")
107
120
  merge_result = None
108
121
  if args.execute and args.auto_merge:
109
122
  logger.info("Auto merge the code...")
@@ -141,7 +154,7 @@ class ActionTSProject:
141
154
  file.write(content)
142
155
 
143
156
 
144
- class ActionPyScriptProject:
157
+ class ActionPyScriptProject(BaseAction):
145
158
  def __init__(
146
159
  self, args: AutoCoderArgs, llm: Optional[byzerllm.ByzerLLM] = None
147
160
  ) -> None:
@@ -162,6 +175,8 @@ class ActionPyScriptProject:
162
175
  def process_content(self, content: str):
163
176
  args = self.args
164
177
  if args.execute:
178
+ logger.info("Auto generate the code...")
179
+ start_time = time.time()
165
180
  if args.auto_merge == "diff":
166
181
  generate = CodeAutoGenerateDiff(
167
182
  llm=self.llm, args=self.args, action=self
@@ -184,6 +199,8 @@ class ActionPyScriptProject:
184
199
  generate_result = generate.single_round_run(
185
200
  query=args.query, source_content=content
186
201
  )
202
+
203
+ logger.info(f"Code generation completed in {time.time() - start_time:.2f} seconds")
187
204
  merge_result = None
188
205
  if args.execute and args.auto_merge:
189
206
  logger.info("Auto merge the code...")
@@ -218,11 +235,13 @@ class ActionPyScriptProject:
218
235
  model=self.llm.default_model_name,
219
236
  )
220
237
 
238
+ end_time = time.time()
239
+ logger.info(f"Code generation completed in {end_time - start_time:.2f} seconds")
221
240
  with open(self.args.target_file, "w") as file:
222
241
  file.write(content)
223
242
 
224
243
 
225
- class ActionPyProject:
244
+ class ActionPyProject(BaseAction):
226
245
  def __init__(
227
246
  self, args: AutoCoderArgs, llm: Optional[byzerllm.ByzerLLM] = None
228
247
  ) -> None:
@@ -250,15 +269,15 @@ class ActionPyProject:
250
269
  args = self.args
251
270
 
252
271
  if args.execute and self.llm and not args.human_as_model:
253
- if len(content) > self.args.model_max_input_length:
272
+ content_length = self._get_content_length(content)
273
+ if content_length > self.args.model_max_input_length:
254
274
  logger.warning(
255
- f'''Content length is {len(content)}(you may collect too much files), which is larger than the maximum input length {self.args.model_max_input_length}. chunk it...'''
275
+ f'''Content(send to model) is {content_length} tokens (you may collect too much files), which is larger than the maximum input length {self.args.model_max_input_length}'''
256
276
  )
257
- content = content[: self.args.model_max_input_length]
258
277
 
259
278
  if args.execute:
260
279
  logger.info("Auto generate the code...")
261
-
280
+ start_time = time.time()
262
281
  if args.auto_merge == "diff":
263
282
  generate = CodeAutoGenerateDiff(
264
283
  llm=self.llm, args=self.args, action=self
@@ -283,7 +302,7 @@ class ActionPyProject:
283
302
  generate_result = generate.single_round_run(
284
303
  query=args.query, source_content=content
285
304
  )
286
-
305
+ logger.info(f"Code generation completed in {time.time() - start_time:.2f} seconds")
287
306
  merge_result = None
288
307
  if args.execute and args.auto_merge:
289
308
  logger.info("Auto merge the code...")
@@ -316,13 +335,12 @@ class ActionPyProject:
316
335
  instruction=self.args.query,
317
336
  conversations=generate_result.conversations[0],
318
337
  model=self.llm.default_model_name,
319
- )
320
-
338
+ )
321
339
  with open(args.target_file, "w") as file:
322
340
  file.write(content)
323
341
 
324
342
 
325
- class ActionSuffixProject:
343
+ class ActionSuffixProject(BaseAction):
326
344
  def __init__(
327
345
  self, args: AutoCoderArgs, llm: Optional[byzerllm.ByzerLLM] = None
328
346
  ) -> None:
@@ -346,14 +364,15 @@ class ActionSuffixProject:
346
364
  args = self.args
347
365
 
348
366
  if args.execute and self.llm and not args.human_as_model:
349
- if len(content) > self.args.model_max_input_length:
367
+ content_length = self._get_content_length(content)
368
+ if content_length > self.args.model_max_input_length:
350
369
  logger.warning(
351
- f"Content length is {len(content)}, which is larger than the maximum input length {self.args.model_max_input_length}. chunk it..."
352
- )
353
- content = content[: self.args.model_max_input_length]
370
+ f"Content(send to model) is {content_length} tokens, which is larger than the maximum input length {self.args.model_max_input_length}"
371
+ )
354
372
 
355
373
  if args.execute:
356
374
  logger.info("Auto generate the code...")
375
+ start_time = time.time()
357
376
  if args.auto_merge == "diff":
358
377
  generate = CodeAutoGenerateDiff(
359
378
  llm=self.llm, args=self.args, action=self
@@ -377,7 +396,7 @@ class ActionSuffixProject:
377
396
  query=args.query, source_content=content
378
397
  )
379
398
 
380
-
399
+ logger.info(f"Code generation completed in {time.time() - start_time:.2f} seconds")
381
400
  merge_result = None
382
401
  if args.execute and args.auto_merge:
383
402
  logger.info("Auto merge the code...")
@@ -98,7 +98,10 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
98
98
  process_file_in_multi_process, files_to_process)
99
99
 
100
100
  for file_info, result in zip(files_to_process, results):
101
- self.update_cache(file_info, result)
101
+ if result: # 只有当result不为空时才更新缓存
102
+ self.update_cache(file_info, result)
103
+ else:
104
+ logger.warning(f"Empty result for file: {file_info[0]}, skipping cache update")
102
105
 
103
106
  self.write_cache()
104
107
 
@@ -138,7 +141,10 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
138
141
  logger.info(f"{file_info[0]} is detected to be updated")
139
142
  try:
140
143
  result = process_file_local(file_info[0])
141
- self.update_cache(file_info, result)
144
+ if result: # 只有当result不为空时才更新缓存
145
+ self.update_cache(file_info, result)
146
+ else:
147
+ logger.warning(f"Empty result for file: {file_info[0]}, skipping cache update")
142
148
  except Exception as e:
143
149
  logger.error(
144
150
  f"SimpleCache Error in process_queue: {e}")
@@ -1,7 +1,7 @@
1
1
  from io import BytesIO
2
2
  import docx2txt
3
3
  from autocoder.utils._markitdown import MarkItDown
4
-
4
+ import traceback
5
5
 
6
6
  def extract_text_from_docx_old(docx_path):
7
7
  with open(docx_path, "rb") as f:
@@ -16,5 +16,6 @@ def extract_text_from_docx(docx_path):
16
16
  md_converter = MarkItDown()
17
17
  result = md_converter.convert(docx_path)
18
18
  return result.text_content
19
- except Exception as e:
19
+ except (BaseException, Exception) as e:
20
+ traceback.print_exc()
20
21
  return extract_text_from_docx_old(docx_path)
@@ -1,6 +1,7 @@
1
1
  from io import BytesIO
2
2
  from pypdf import PdfReader
3
3
  from autocoder.utils._markitdown import MarkItDown
4
+ import traceback
4
5
 
5
6
 
6
7
  def extract_text_from_pdf_old(file_path):
@@ -18,5 +19,6 @@ def extract_text_from_pdf(file_path):
18
19
  md_converter = MarkItDown()
19
20
  result = md_converter.convert(file_path)
20
21
  return result.text_content
21
- except Exception as e:
22
+ except (BaseException, Exception) as e:
23
+ traceback.print_exc()
22
24
  return extract_text_from_pdf_old(file_path)
@@ -182,19 +182,25 @@ class LongContextRAG:
182
182
  使用以下文档和对话历史来提取相关信息。
183
183
 
184
184
  文档:
185
+ <documents>
185
186
  {% for doc in documents %}
186
187
  {{ doc }}
187
188
  {% endfor %}
189
+ </documents>
188
190
 
189
191
  对话历史:
192
+ <conversations>
190
193
  {% for msg in conversations %}
191
- <{{ msg.role }}>: {{ msg.content }}
194
+ [{{ msg.role }}]:
195
+ {{ msg.content }}
196
+
192
197
  {% endfor %}
198
+ </conversations>
193
199
 
194
200
  请根据提供的文档内容、用户对话历史以及最后一个问题,提取并总结文档中与问题相关的重要信息。
195
201
  如果文档中没有相关信息,请回复"该文档中没有与问题相关的信息"。
196
202
  提取的信息尽量保持和原文中的一样,并且只输出这些信息。
197
- """
203
+ """
198
204
 
199
205
  @byzerllm.prompt()
200
206
  def _answer_question(
@@ -202,9 +208,11 @@ class LongContextRAG:
202
208
  ) -> Generator[str, None, None]:
203
209
  """
204
210
  文档:
211
+ <documents>
205
212
  {% for doc in relevant_docs %}
206
213
  {{ doc }}
207
214
  {% endfor %}
215
+ </documents>
208
216
 
209
217
  使用以上文档来回答用户的问题。回答要求:
210
218
 
autocoder/rag/utils.py CHANGED
@@ -9,13 +9,14 @@ import time
9
9
  from loguru import logger
10
10
  import traceback
11
11
 
12
+
12
13
  def process_file_in_multi_process(
13
14
  file_info: Tuple[str, str, float]
14
15
  ) -> List[SourceCode]:
15
16
  start_time = time.time()
16
17
  file_path, relative_path, _, _ = file_info
17
18
  try:
18
- if file_path.endswith(".pdf"):
19
+ if file_path.endswith(".pdf"):
19
20
  content = extract_text_from_pdf(file_path)
20
21
  v = [
21
22
  SourceCode(
@@ -24,7 +25,7 @@ def process_file_in_multi_process(
24
25
  tokens=count_tokens_worker(content),
25
26
  )
26
27
  ]
27
- elif file_path.endswith(".docx"):
28
+ elif file_path.endswith(".docx"):
28
29
  content = extract_text_from_docx(file_path)
29
30
  v = [
30
31
  SourceCode(
@@ -45,7 +46,8 @@ def process_file_in_multi_process(
45
46
  ]
46
47
  elif file_path.endswith(".pptx"):
47
48
  slides = extract_text_from_ppt(file_path)
48
- content = "".join(f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
49
+ content = "".join(
50
+ f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
49
51
  v = [
50
52
  SourceCode(
51
53
  module_name=f"##File: {file_path}",
@@ -65,15 +67,16 @@ def process_file_in_multi_process(
65
67
  ]
66
68
  logger.info(f"Load file {file_path} in {time.time() - start_time}")
67
69
  return v
68
- except Exception as e:
70
+ except (BaseException, Exception) as e:
69
71
  logger.error(f"Error processing file {file_path}: {str(e)}")
72
+ logger.error(f"Error type: {type(e).__name__}")
70
73
  return []
71
74
 
72
75
 
73
76
  def process_file_local(file_path: str) -> List[SourceCode]:
74
77
  start_time = time.time()
75
78
  try:
76
- if file_path.endswith(".pdf"):
79
+ if file_path.endswith(".pdf"):
77
80
  content = extract_text_from_pdf(file_path)
78
81
  v = [
79
82
  SourceCode(
@@ -82,7 +85,7 @@ def process_file_local(file_path: str) -> List[SourceCode]:
82
85
  tokens=count_tokens(content),
83
86
  )
84
87
  ]
85
- elif file_path.endswith(".docx"):
88
+ elif file_path.endswith(".docx"):
86
89
  content = extract_text_from_docx(file_path)
87
90
  v = [
88
91
  SourceCode(
@@ -103,7 +106,8 @@ def process_file_local(file_path: str) -> List[SourceCode]:
103
106
  ]
104
107
  elif file_path.endswith(".pptx"):
105
108
  slides = extract_text_from_ppt(file_path)
106
- content = "".join(f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
109
+ content = "".join(
110
+ f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
107
111
  v = [
108
112
  SourceCode(
109
113
  module_name=f"##File: {file_path}",
@@ -123,7 +127,8 @@ def process_file_local(file_path: str) -> List[SourceCode]:
123
127
  ]
124
128
  logger.info(f"Load file {file_path} in {time.time() - start_time}")
125
129
  return v
126
- except Exception as e:
130
+ except (BaseException, Exception) as e:
127
131
  logger.error(f"Error processing file {file_path}: {str(e)}")
132
+ logger.error(f"Error type: {type(e).__name__}")
128
133
  traceback.print_exc()
129
- return []
134
+ return []
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.208"
1
+ __version__ = "0.1.211"