auto-coder 0.1.207__py3-none-any.whl → 0.1.209__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

Files changed (37) hide show
  1. {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/METADATA +4 -3
  2. {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/RECORD +37 -34
  3. autocoder/agent/auto_demand_organizer.py +212 -0
  4. autocoder/agent/auto_guess_query.py +284 -0
  5. autocoder/auto_coder.py +64 -19
  6. autocoder/auto_coder_rag.py +6 -0
  7. autocoder/chat_auto_coder.py +119 -16
  8. autocoder/command_args.py +21 -5
  9. autocoder/common/__init__.py +7 -1
  10. autocoder/common/code_auto_generate.py +32 -10
  11. autocoder/common/code_auto_generate_diff.py +85 -47
  12. autocoder/common/code_auto_generate_editblock.py +50 -28
  13. autocoder/common/code_auto_generate_strict_diff.py +79 -45
  14. autocoder/common/code_auto_merge.py +51 -15
  15. autocoder/common/code_auto_merge_diff.py +55 -2
  16. autocoder/common/code_auto_merge_editblock.py +84 -14
  17. autocoder/common/code_auto_merge_strict_diff.py +69 -32
  18. autocoder/common/code_modification_ranker.py +100 -0
  19. autocoder/common/command_completer.py +6 -4
  20. autocoder/common/types.py +10 -2
  21. autocoder/dispacher/actions/action.py +141 -94
  22. autocoder/dispacher/actions/plugins/action_regex_project.py +35 -25
  23. autocoder/lang.py +9 -1
  24. autocoder/pyproject/__init__.py +4 -0
  25. autocoder/rag/cache/simple_cache.py +8 -2
  26. autocoder/rag/loaders/docx_loader.py +3 -2
  27. autocoder/rag/loaders/pdf_loader.py +3 -1
  28. autocoder/rag/long_context_rag.py +12 -2
  29. autocoder/rag/rag_entry.py +2 -2
  30. autocoder/rag/utils.py +14 -9
  31. autocoder/suffixproject/__init__.py +2 -0
  32. autocoder/tsproject/__init__.py +4 -0
  33. autocoder/version.py +1 -1
  34. {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/LICENSE +0 -0
  35. {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/WHEEL +0 -0
  36. {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/entry_points.txt +0 -0
  37. {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/top_level.txt +0 -0
@@ -97,38 +97,48 @@ class ActionTSProject:
97
97
  else:
98
98
  generate = CodeAutoGenerate(llm=self.llm, args=self.args, action=self)
99
99
  if self.args.enable_multi_round_generate:
100
- result, conversations = generate.multi_round_run(
100
+ generate_result = generate.multi_round_run(
101
101
  query=args.query, source_content=content
102
102
  )
103
103
  else:
104
- result, conversations = generate.single_round_run(
104
+ generate_result = generate.single_round_run(
105
105
  query=args.query, source_content=content
106
106
  )
107
- content = "\n\n".join(result)
108
-
109
- store_code_model_conversation(
110
- args=self.args,
111
- instruction=self.args.query,
112
- conversations=conversations,
113
- model=self.llm.default_model_name,
114
- )
115
- with open(args.target_file, "w") as file:
116
- file.write(content)
117
-
118
- if args.execute and args.auto_merge:
119
- logger.info("Auto merge the code...")
120
- if args.auto_merge == "diff":
121
- code_merge = CodeAutoMergeDiff(llm=self.llm, args=self.args)
122
- code_merge.merge_code(content=content)
123
- elif args.auto_merge == "strict_diff":
124
- code_merge = CodeAutoMergeStrictDiff(llm=self.llm, args=self.args)
125
- code_merge.merge_code(content=content)
126
- elif args.auto_merge == "editblock":
127
- code_merge = CodeAutoMergeEditBlock(llm=self.llm, args=self.args)
128
- code_merge.merge_code(content=content)
129
- else:
130
- code_merge = CodeAutoMerge(llm=self.llm, args=self.args)
131
- code_merge.merge_code(content=content)
107
+ merge_result = None
108
+ if args.execute and args.auto_merge:
109
+ logger.info("Auto merge the code...")
110
+ if args.auto_merge == "diff":
111
+ code_merge = CodeAutoMergeDiff(llm=self.llm, args=self.args)
112
+ merge_result = code_merge.merge_code(generate_result=generate_result)
113
+ elif args.auto_merge == "strict_diff":
114
+ code_merge = CodeAutoMergeStrictDiff(llm=self.llm, args=self.args)
115
+ merge_result = code_merge.merge_code(generate_result=generate_result)
116
+ elif args.auto_merge == "editblock":
117
+ code_merge = CodeAutoMergeEditBlock(llm=self.llm, args=self.args)
118
+ merge_result = code_merge.merge_code(generate_result=generate_result)
119
+ else:
120
+ code_merge = CodeAutoMerge(llm=self.llm, args=self.args)
121
+ merge_result = code_merge.merge_code(generate_result=generate_result)
122
+
123
+ if merge_result is not None:
124
+ content = merge_result.contents[0]
125
+ store_code_model_conversation(
126
+ args=self.args,
127
+ instruction=self.args.query,
128
+ conversations=merge_result.conversations[0],
129
+ model=self.llm.default_model_name,
130
+ )
131
+ else:
132
+ content = generate_result.contents[0]
133
+ store_code_model_conversation(
134
+ args=self.args,
135
+ instruction=self.args.query,
136
+ conversations=generate_result.conversations[0],
137
+ model=self.llm.default_model_name,
138
+ )
139
+
140
+ with open(args.target_file, "w") as file:
141
+ file.write(content)
132
142
 
133
143
 
134
144
  class ActionPyScriptProject:
@@ -167,38 +177,49 @@ class ActionPyScriptProject:
167
177
  else:
168
178
  generate = CodeAutoGenerate(llm=self.llm, args=self.args, action=self)
169
179
  if self.args.enable_multi_round_generate:
170
- result, conversations = generate.multi_round_run(
180
+ generate_result = generate.multi_round_run(
171
181
  query=args.query, source_content=content
172
182
  )
173
183
  else:
174
- result, conversations = generate.single_round_run(
184
+ generate_result = generate.single_round_run(
175
185
  query=args.query, source_content=content
176
186
  )
177
- content = "\n\n".join(result)
187
+ merge_result = None
188
+ if args.execute and args.auto_merge:
189
+ logger.info("Auto merge the code...")
190
+ if args.auto_merge == "diff":
191
+ code_merge = CodeAutoMergeDiff(llm=self.llm, args=self.args)
192
+ merge_result = code_merge.merge_code(generate_result=generate_result)
193
+ elif args.auto_merge == "strict_diff":
194
+ code_merge = CodeAutoMergeStrictDiff(llm=self.llm, args=self.args)
195
+ merge_result = code_merge.merge_code(generate_result=generate_result)
196
+ elif args.auto_merge == "editblock":
197
+ code_merge = CodeAutoMergeEditBlock(llm=self.llm, args=self.args)
198
+ merge_result = code_merge.merge_code(generate_result=generate_result)
199
+ else:
200
+ code_merge = CodeAutoMerge(llm=self.llm, args=self.args)
201
+ merge_result = code_merge.merge_code(generate_result=generate_result)
202
+
203
+ content = merge_result.contents[0]
204
+
205
+ store_code_model_conversation(
206
+ args=self.args,
207
+ instruction=self.args.query,
208
+ conversations=merge_result.conversations[0],
209
+ model=self.llm.default_model_name,
210
+ )
211
+ else:
212
+ content = generate_result.contents[0]
178
213
 
179
- store_code_model_conversation(
180
- args=self.args,
181
- instruction=self.args.query,
182
- conversations=conversations,
183
- model=self.llm.default_model_name,
184
- )
185
- with open(self.args.target_file, "w") as file:
186
- file.write(content)
214
+ store_code_model_conversation(
215
+ args=self.args,
216
+ instruction=self.args.query,
217
+ conversations=generate_result.conversations[0],
218
+ model=self.llm.default_model_name,
219
+ )
187
220
 
188
- if args.execute and args.auto_merge:
189
- logger.info("Auto merge the code...")
190
- if args.auto_merge == "diff":
191
- code_merge = CodeAutoMergeDiff(llm=self.llm, args=self.args)
192
- code_merge.merge_code(content=content)
193
- elif args.auto_merge == "strict_diff":
194
- code_merge = CodeAutoMergeStrictDiff(llm=self.llm, args=self.args)
195
- code_merge.merge_code(content=content)
196
- elif args.auto_merge == "editblock":
197
- code_merge = CodeAutoMergeEditBlock(llm=self.llm, args=self.args)
198
- code_merge.merge_code(content=content)
199
- else:
200
- code_merge = CodeAutoMerge(llm=self.llm, args=self.args)
201
- code_merge.merge_code(content=content)
221
+ with open(self.args.target_file, "w") as file:
222
+ file.write(content)
202
223
 
203
224
 
204
225
  class ActionPyProject:
@@ -255,39 +276,50 @@ class ActionPyProject:
255
276
 
256
277
 
257
278
  if self.args.enable_multi_round_generate:
258
- result, conversations = generate.multi_round_run(
279
+ generate_result = generate.multi_round_run(
259
280
  query=args.query, source_content=content
260
281
  )
261
282
  else:
262
- result, conversations = generate.single_round_run(
283
+ generate_result = generate.single_round_run(
263
284
  query=args.query, source_content=content
264
285
  )
265
286
 
266
- content = "\n\n".join(result)
287
+ merge_result = None
288
+ if args.execute and args.auto_merge:
289
+ logger.info("Auto merge the code...")
290
+ if args.auto_merge == "diff":
291
+ code_merge = CodeAutoMergeDiff(llm=self.llm, args=self.args)
292
+ merge_result = code_merge.merge_code(generate_result=generate_result)
293
+ elif args.auto_merge == "strict_diff":
294
+ code_merge = CodeAutoMergeStrictDiff(llm=self.llm, args=self.args)
295
+ merge_result = code_merge.merge_code(generate_result=generate_result)
296
+ elif args.auto_merge == "editblock":
297
+ code_merge = CodeAutoMergeEditBlock(llm=self.llm, args=self.args)
298
+ merge_result = code_merge.merge_code(generate_result=generate_result)
299
+ else:
300
+ code_merge = CodeAutoMerge(llm=self.llm, args=self.args)
301
+ merge_result = code_merge.merge_code(generate_result=generate_result)
302
+
303
+ content = merge_result.contents[0]
304
+
305
+ store_code_model_conversation(
306
+ args=self.args,
307
+ instruction=self.args.query,
308
+ conversations=merge_result.conversations[0],
309
+ model=self.llm.default_model_name,
310
+ )
311
+ else:
312
+ content = generate_result.contents[0]
267
313
 
268
- store_code_model_conversation(
269
- args=self.args,
270
- instruction=self.args.query,
271
- conversations=conversations,
272
- model=self.llm.default_model_name,
273
- )
274
- with open(args.target_file, "w") as file:
275
- file.write(content)
314
+ store_code_model_conversation(
315
+ args=self.args,
316
+ instruction=self.args.query,
317
+ conversations=generate_result.conversations[0],
318
+ model=self.llm.default_model_name,
319
+ )
276
320
 
277
- if args.execute and args.auto_merge:
278
- logger.info("Auto merge the code...")
279
- if args.auto_merge == "diff":
280
- code_merge = CodeAutoMergeDiff(llm=self.llm, args=self.args)
281
- code_merge.merge_code(content=content)
282
- elif args.auto_merge == "strict_diff":
283
- code_merge = CodeAutoMergeStrictDiff(llm=self.llm, args=self.args)
284
- code_merge.merge_code(content=content)
285
- elif args.auto_merge == "editblock":
286
- code_merge = CodeAutoMergeEditBlock(llm=self.llm, args=self.args)
287
- code_merge.merge_code(content=content)
288
- else:
289
- code_merge = CodeAutoMerge(llm=self.llm, args=self.args)
290
- code_merge.merge_code(content=content)
321
+ with open(args.target_file, "w") as file:
322
+ file.write(content)
291
323
 
292
324
 
293
325
  class ActionSuffixProject:
@@ -337,36 +369,51 @@ class ActionSuffixProject:
337
369
  else:
338
370
  generate = CodeAutoGenerate(llm=self.llm, args=self.args, action=self)
339
371
  if self.args.enable_multi_round_generate:
340
- result, conversations = generate.multi_round_run(
372
+ generate_result = generate.multi_round_run(
341
373
  query=args.query, source_content=content
342
374
  )
343
375
  else:
344
- result, conversations = generate.single_round_run(
376
+ generate_result = generate.single_round_run(
345
377
  query=args.query, source_content=content
346
378
  )
347
- content = "\n\n".join(result)
348
-
349
- store_code_model_conversation(
350
- args=self.args,
351
- instruction=self.args.query,
352
- conversations=conversations,
353
- model=self.llm.default_model_name,
354
- )
355
-
356
- with open(args.target_file, "w") as file:
357
- file.write(content)
379
+
358
380
 
381
+ merge_result = None
359
382
  if args.execute and args.auto_merge:
360
383
  logger.info("Auto merge the code...")
361
384
  if args.auto_merge == "diff":
362
385
  code_merge = CodeAutoMergeDiff(llm=self.llm, args=self.args)
363
- code_merge.merge_code(content=content)
386
+ merge_result = code_merge.merge_code(generate_result=generate_result)
364
387
  elif args.auto_merge == "strict_diff":
365
388
  code_merge = CodeAutoMergeStrictDiff(llm=self.llm, args=self.args)
366
- code_merge.merge_code(content=content)
389
+ merge_result = code_merge.merge_code(generate_result=generate_result)
367
390
  elif args.auto_merge == "editblock":
368
391
  code_merge = CodeAutoMergeEditBlock(llm=self.llm, args=self.args)
369
- code_merge.merge_code(content=content)
392
+ merge_result = code_merge.merge_code(generate_result=generate_result)
370
393
  else:
371
394
  code_merge = CodeAutoMerge(llm=self.llm, args=self.args)
372
- code_merge.merge_code(content=content)
395
+ merge_result = code_merge.merge_code(generate_result=generate_result)
396
+
397
+ if merge_result is not None:
398
+ content = merge_result.contents[0]
399
+ store_code_model_conversation(
400
+ args=self.args,
401
+ instruction=self.args.query,
402
+ conversations=merge_result.conversations[0],
403
+ model=self.llm.default_model_name,
404
+ )
405
+ with open(args.target_file, "w") as file:
406
+ file.write(content)
407
+ else:
408
+ content = generate_result.contents[0]
409
+
410
+ store_code_model_conversation(
411
+ args=self.args,
412
+ instruction=self.args.query,
413
+ conversations=generate_result.conversations[0],
414
+ model=self.llm.default_model_name,
415
+ )
416
+
417
+ with open(args.target_file, "w") as file:
418
+ file.write(content)
419
+
@@ -68,35 +68,45 @@ class ActionRegexProject:
68
68
  else:
69
69
  generate = CodeAutoGenerate(llm=self.llm, args=self.args, action=self)
70
70
  if self.args.enable_multi_round_generate:
71
- result, conversations = generate.multi_round_run(
71
+ generate_result = generate.multi_round_run(
72
72
  query=args.query, source_content=content
73
73
  )
74
74
  else:
75
- result, conversations = generate.single_round_run(
75
+ generate_result = generate.single_round_run(
76
76
  query=args.query, source_content=content
77
77
  )
78
- content = "\n\n".join(result)
78
+ merge_result = None
79
+ if args.execute and args.auto_merge:
80
+ logger.info("Auto merge the code...")
81
+ if args.auto_merge == "diff":
82
+ code_merge = CodeAutoMergeDiff(llm=self.llm, args=self.args)
83
+ merge_result = code_merge.merge_code(generate_result=generate_result)
84
+ elif args.auto_merge == "strict_diff":
85
+ code_merge = CodeAutoMergeStrictDiff(llm=self.llm, args=self.args)
86
+ merge_result = code_merge.merge_code(generate_result=generate_result)
87
+ elif args.auto_merge == "editblock":
88
+ code_merge = CodeAutoMergeEditBlock(llm=self.llm, args=self.args)
89
+ merge_result = code_merge.merge_code(generate_result=generate_result)
90
+ else:
91
+ code_merge = CodeAutoMerge(llm=self.llm, args=self.args)
92
+ merge_result = code_merge.merge_code(generate_result=generate_result)
79
93
 
80
- store_code_model_conversation(
81
- args=self.args,
82
- instruction=self.args.query,
83
- conversations=conversations,
84
- model=self.llm.default_model_name,
85
- )
86
- with open(args.target_file, "w") as file:
87
- file.write(content)
88
-
89
- if args.execute and args.auto_merge:
90
- logger.info("Auto merge the code...")
91
- if args.auto_merge == "diff":
92
- code_merge = CodeAutoMergeDiff(llm=self.llm, args=self.args)
93
- code_merge.merge_code(content=content)
94
- elif args.auto_merge == "strict_diff":
95
- code_merge = CodeAutoMergeStrictDiff(llm=self.llm, args=self.args)
96
- code_merge.merge_code(content=content)
97
- elif args.auto_merge == "editblock":
98
- code_merge = CodeAutoMergeEditBlock(llm=self.llm, args=self.args)
99
- code_merge.merge_code(content=content)
94
+ if merge_result is not None:
95
+ content = merge_result.contents[0]
96
+ store_code_model_conversation(
97
+ args=self.args,
98
+ instruction=self.args.query,
99
+ conversations=merge_result.conversations[0],
100
+ model=self.llm.default_model_name,
101
+ )
100
102
  else:
101
- code_merge = CodeAutoMerge(llm=self.llm, args=self.args)
102
- code_merge.merge_code(content=content)
103
+ content = generate_result.contents[0]
104
+ store_code_model_conversation(
105
+ args=self.args,
106
+ instruction=self.args.query,
107
+ conversations=generate_result.conversations[0],
108
+ model=self.llm.default_model_name,
109
+ )
110
+
111
+ with open(args.target_file, "w") as file:
112
+ file.write(content)
autocoder/lang.py CHANGED
@@ -21,6 +21,7 @@ lang_desc = {
21
21
  "cmd_args_title": "Command Line Arguments:",
22
22
  "py_packages": "The Python packages added to context, only works for py project type. Default is empty.",
23
23
  "human_as_model": "Use human as model or not. Default is False",
24
+ "human_model_num": "Number of human models to use. Default is 1",
24
25
  "urls": "The urls to crawl and extract text from, separated by comma",
25
26
  "search_engine": "The search engine to use. Supported engines: bing, google. Default is empty",
26
27
  "search_engine_token": "The token for the search engine API. Default is empty",
@@ -62,6 +63,8 @@ lang_desc = {
62
63
  "screenshot_url": "The URL of the webpage to capture",
63
64
  "screenshot_output": "The directory to save the screenshots",
64
65
  "code_model": "The name of the code model to use. Default is empty",
66
+ "generate_rerank_model": "The name of the generate rerank model to use. Default is empty",
67
+ "inference_model": "The name of the inference model to use. Default is empty",
65
68
  "system_prompt": "The system prompt for the model. Default is empty",
66
69
  "planner_model": "The name of the planner model to use. Default is empty",
67
70
  "designer_model": "The name of the designer model to use. Default is empty",
@@ -78,6 +81,7 @@ lang_desc = {
78
81
  "rag_token": "The token for the RAG service. Default is empty",
79
82
  "rag_type": "RAG type (simple/storage), default is storage",
80
83
  "rag_params_max_tokens": "The maximum number of tokens for RAG parameters. Default is 4096",
84
+ "generate_times_same_model": "Number of times to generate using the same model. Default is 1",
81
85
  },
82
86
  "zh": {
83
87
  "request_id": "Request ID",
@@ -101,6 +105,7 @@ lang_desc = {
101
105
  "cmd_args_title": "命令行参数:",
102
106
  "py_packages": "添加到上下文的Python包,仅适用于py项目类型。默认为空。",
103
107
  "human_as_model": "是否使用人工作为模型。默认为False",
108
+ "human_model_num": "使用的人工模型数量。默认为1",
104
109
  "urls": "要爬取并提取文本的URL,多个URL以逗号分隔",
105
110
  "search_engine": "要使用的搜索引擎。支持的引擎:bing、google。默认为空",
106
111
  "search_engine_token": "搜索引擎API的令牌。默认为空",
@@ -142,6 +147,8 @@ lang_desc = {
142
147
  "screenshot_url": "要捕获的网页的URL",
143
148
  "screenshot_output": "保存截图的目录",
144
149
  "code_model": "要使用的代码模型的名称。默认为空",
150
+ "generate_rerank_model": "要使用的生成重排序模型的名称。默认为空",
151
+ "inference_model": "要使用的推理模型的名称。默认为空",
145
152
  "system_prompt": "模型使用的系统提示词。默认为空",
146
153
  "next_desc": "基于上一个action文件创建一个新的action文件",
147
154
  "planner_model": "要使用的规划模型的名称。默认为空",
@@ -157,6 +164,7 @@ lang_desc = {
157
164
  "rag_url": "RAG服务的URL",
158
165
  "rag_token": "RAG服务的令牌",
159
166
  "rag_type": "RAG类型(simple/storage),默认是storage",
160
- "rag_params_max_tokens": "RAG参数的最大token数。默认为4096",
167
+ "rag_params_max_tokens": "RAG参数的最大token数。默认为4096",
168
+ "generate_times_same_model": "使用相同模型生成的次数。默认为1",
161
169
  }
162
170
  }
@@ -114,6 +114,10 @@ class PyProject:
114
114
  "dist",
115
115
  "__pycache__",
116
116
  "node_modules",
117
+ ".auto-coder",
118
+ "actions",
119
+ ".vscode",
120
+ ".idea",
117
121
  ]
118
122
 
119
123
  @byzerllm.prompt()
@@ -98,7 +98,10 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
98
98
  process_file_in_multi_process, files_to_process)
99
99
 
100
100
  for file_info, result in zip(files_to_process, results):
101
- self.update_cache(file_info, result)
101
+ if result: # 只有当result不为空时才更新缓存
102
+ self.update_cache(file_info, result)
103
+ else:
104
+ logger.warning(f"Empty result for file: {file_info[0]}, skipping cache update")
102
105
 
103
106
  self.write_cache()
104
107
 
@@ -138,7 +141,10 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
138
141
  logger.info(f"{file_info[0]} is detected to be updated")
139
142
  try:
140
143
  result = process_file_local(file_info[0])
141
- self.update_cache(file_info, result)
144
+ if result: # 只有当result不为空时才更新缓存
145
+ self.update_cache(file_info, result)
146
+ else:
147
+ logger.warning(f"Empty result for file: {file_info[0]}, skipping cache update")
142
148
  except Exception as e:
143
149
  logger.error(
144
150
  f"SimpleCache Error in process_queue: {e}")
@@ -1,7 +1,7 @@
1
1
  from io import BytesIO
2
2
  import docx2txt
3
3
  from autocoder.utils._markitdown import MarkItDown
4
-
4
+ import traceback
5
5
 
6
6
  def extract_text_from_docx_old(docx_path):
7
7
  with open(docx_path, "rb") as f:
@@ -16,5 +16,6 @@ def extract_text_from_docx(docx_path):
16
16
  md_converter = MarkItDown()
17
17
  result = md_converter.convert(docx_path)
18
18
  return result.text_content
19
- except Exception as e:
19
+ except (BaseException, Exception) as e:
20
+ traceback.print_exc()
20
21
  return extract_text_from_docx_old(docx_path)
@@ -1,6 +1,7 @@
1
1
  from io import BytesIO
2
2
  from pypdf import PdfReader
3
3
  from autocoder.utils._markitdown import MarkItDown
4
+ import traceback
4
5
 
5
6
 
6
7
  def extract_text_from_pdf_old(file_path):
@@ -18,5 +19,6 @@ def extract_text_from_pdf(file_path):
18
19
  md_converter = MarkItDown()
19
20
  result = md_converter.convert(file_path)
20
21
  return result.text_content
21
- except Exception as e:
22
+ except (BaseException, Exception) as e:
23
+ traceback.print_exc()
22
24
  return extract_text_from_pdf_old(file_path)
@@ -182,19 +182,25 @@ class LongContextRAG:
182
182
  使用以下文档和对话历史来提取相关信息。
183
183
 
184
184
  文档:
185
+ <documents>
185
186
  {% for doc in documents %}
186
187
  {{ doc }}
187
188
  {% endfor %}
189
+ </documents>
188
190
 
189
191
  对话历史:
192
+ <conversations>
190
193
  {% for msg in conversations %}
191
- <{{ msg.role }}>: {{ msg.content }}
194
+ [{{ msg.role }}]:
195
+ {{ msg.content }}
196
+
192
197
  {% endfor %}
198
+ </conversations>
193
199
 
194
200
  请根据提供的文档内容、用户对话历史以及最后一个问题,提取并总结文档中与问题相关的重要信息。
195
201
  如果文档中没有相关信息,请回复"该文档中没有与问题相关的信息"。
196
202
  提取的信息尽量保持和原文中的一样,并且只输出这些信息。
197
- """
203
+ """
198
204
 
199
205
  @byzerllm.prompt()
200
206
  def _answer_question(
@@ -202,9 +208,11 @@ class LongContextRAG:
202
208
  ) -> Generator[str, None, None]:
203
209
  """
204
210
  文档:
211
+ <documents>
205
212
  {% for doc in relevant_docs %}
206
213
  {{ doc }}
207
214
  {% endfor %}
215
+ </documents>
208
216
 
209
217
  使用以上文档来回答用户的问题。回答要求:
210
218
 
@@ -395,6 +403,7 @@ class LongContextRAG:
395
403
  inference_deep_thought=self.args.inference_deep_thought,
396
404
  inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
397
405
  precision=self.args.inference_compute_precision,
406
+ data_cells_max_num=self.args.data_cells_max_num,
398
407
  )
399
408
  conversations = conversations[:-1]
400
409
  new_conversations = llm_compute_engine.process_conversation(
@@ -576,6 +585,7 @@ class LongContextRAG:
576
585
  inference_enhance=not self.args.disable_inference_enhance,
577
586
  inference_deep_thought=self.args.inference_deep_thought,
578
587
  precision=self.args.inference_compute_precision,
588
+ data_cells_max_num=self.args.data_cells_max_num,
579
589
  debug=False,
580
590
  )
581
591
  new_conversations = llm_compute_engine.process_conversation(
@@ -1,4 +1,4 @@
1
- from typing import List, Dict, Any, Optional
1
+ from typing import List, Dict, Any, Optional,Union
2
2
  from autocoder.common import AutoCoderArgs, SourceCode
3
3
  from byzerllm import ByzerLLM
4
4
  from .simple_rag import SimpleRAG
@@ -7,7 +7,7 @@ class RAGFactory:
7
7
 
8
8
 
9
9
  @staticmethod
10
- def get_rag(llm: ByzerLLM, args: AutoCoderArgs, path: str,**kargs) -> SimpleRAG | LongContextRAG:
10
+ def get_rag(llm: ByzerLLM, args: AutoCoderArgs, path: str,**kargs) -> Union[SimpleRAG, LongContextRAG]:
11
11
  """
12
12
  Factory method to get the appropriate RAG implementation based on arguments.
13
13
 
autocoder/rag/utils.py CHANGED
@@ -9,13 +9,14 @@ import time
9
9
  from loguru import logger
10
10
  import traceback
11
11
 
12
+
12
13
  def process_file_in_multi_process(
13
14
  file_info: Tuple[str, str, float]
14
15
  ) -> List[SourceCode]:
15
16
  start_time = time.time()
16
17
  file_path, relative_path, _, _ = file_info
17
18
  try:
18
- if file_path.endswith(".pdf"):
19
+ if file_path.endswith(".pdf"):
19
20
  content = extract_text_from_pdf(file_path)
20
21
  v = [
21
22
  SourceCode(
@@ -24,7 +25,7 @@ def process_file_in_multi_process(
24
25
  tokens=count_tokens_worker(content),
25
26
  )
26
27
  ]
27
- elif file_path.endswith(".docx"):
28
+ elif file_path.endswith(".docx"):
28
29
  content = extract_text_from_docx(file_path)
29
30
  v = [
30
31
  SourceCode(
@@ -45,7 +46,8 @@ def process_file_in_multi_process(
45
46
  ]
46
47
  elif file_path.endswith(".pptx"):
47
48
  slides = extract_text_from_ppt(file_path)
48
- content = "".join(f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
49
+ content = "".join(
50
+ f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
49
51
  v = [
50
52
  SourceCode(
51
53
  module_name=f"##File: {file_path}",
@@ -65,15 +67,16 @@ def process_file_in_multi_process(
65
67
  ]
66
68
  logger.info(f"Load file {file_path} in {time.time() - start_time}")
67
69
  return v
68
- except Exception as e:
70
+ except (BaseException, Exception) as e:
69
71
  logger.error(f"Error processing file {file_path}: {str(e)}")
72
+ logger.error(f"Error type: {type(e).__name__}")
70
73
  return []
71
74
 
72
75
 
73
76
  def process_file_local(file_path: str) -> List[SourceCode]:
74
77
  start_time = time.time()
75
78
  try:
76
- if file_path.endswith(".pdf"):
79
+ if file_path.endswith(".pdf"):
77
80
  content = extract_text_from_pdf(file_path)
78
81
  v = [
79
82
  SourceCode(
@@ -82,7 +85,7 @@ def process_file_local(file_path: str) -> List[SourceCode]:
82
85
  tokens=count_tokens(content),
83
86
  )
84
87
  ]
85
- elif file_path.endswith(".docx"):
88
+ elif file_path.endswith(".docx"):
86
89
  content = extract_text_from_docx(file_path)
87
90
  v = [
88
91
  SourceCode(
@@ -103,7 +106,8 @@ def process_file_local(file_path: str) -> List[SourceCode]:
103
106
  ]
104
107
  elif file_path.endswith(".pptx"):
105
108
  slides = extract_text_from_ppt(file_path)
106
- content = "".join(f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
109
+ content = "".join(
110
+ f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
107
111
  v = [
108
112
  SourceCode(
109
113
  module_name=f"##File: {file_path}",
@@ -123,7 +127,8 @@ def process_file_local(file_path: str) -> List[SourceCode]:
123
127
  ]
124
128
  logger.info(f"Load file {file_path} in {time.time() - start_time}")
125
129
  return v
126
- except Exception as e:
130
+ except (BaseException, Exception) as e:
127
131
  logger.error(f"Error processing file {file_path}: {str(e)}")
132
+ logger.error(f"Error type: {type(e).__name__}")
128
133
  traceback.print_exc()
129
- return []
134
+ return []
@@ -55,6 +55,8 @@ class SuffixProject:
55
55
  "node_modules",
56
56
  ".auto-coder",
57
57
  ".vscode",
58
+ "actions",
59
+ ".idea",
58
60
  ]
59
61
 
60
62
  @byzerllm.prompt()