auto-coder 0.1.176__py3-none-any.whl → 0.1.177__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.176
3
+ Version: 0.1.177
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -7,7 +7,7 @@ autocoder/chat_auto_coder.py,sha256=i5xIuWlTqF0pJz8kXoa-_bW3Ic3SfCFvU2WJIMxrUHU,
7
7
  autocoder/chat_auto_coder_lang.py,sha256=QYtu5gWEQmWKVovR_qUZ8plySZarNFX_Onk-1vN9IiA,8524
8
8
  autocoder/command_args.py,sha256=ftWw6HnFUZPiQPt1oV-SfpHQe69XN3knaFy1lpROBcU,26854
9
9
  autocoder/lang.py,sha256=e-07rYTgimpxS8sm-AxKSmH4kKQX4N05YFHJBg9trVs,12598
10
- autocoder/version.py,sha256=yiACry4Tn-v8T0DYTTygfQmb9WG4pVkXXkB6IB4a1yg,23
10
+ autocoder/version.py,sha256=jOsxKVrfpzRExZ84ji63BWpk4HGAHjIouu90Hi2jqGI,23
11
11
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  autocoder/agent/auto_tool.py,sha256=DBzip-P_T6ZtT2eHexPcusmKYD0h7ufzp7TLwXAY10E,11554
13
13
  autocoder/agent/coder.py,sha256=dnITYHqkcOip8zV4lywbkYNH9w7Q3qyYaUArJ4WPrTs,866
@@ -60,9 +60,9 @@ autocoder/pyproject/__init__.py,sha256=-2-ImQVw6e3NQZQOyDlHEP5b4xVs5ur2G5izB-JCa
60
60
  autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  autocoder/rag/api_server.py,sha256=zokIlDJlk7ucRorSLQm80uICO1mecfmn4J2zVqEBskE,6786
62
62
  autocoder/rag/doc_filter.py,sha256=LqU8Wi6klwpY9WTHVtkioSHpmo9IWhRz39dzV1gvp6E,9315
63
- autocoder/rag/document_retriever.py,sha256=plwm8BpC55VJTUWCZyG4HsXYm-niqUsXaBMDLrLgYj0,23348
63
+ autocoder/rag/document_retriever.py,sha256=itypkUdY9vUITMAGhxHvWe-IZpxLi5h0A1mJuIVW6QA,23406
64
64
  autocoder/rag/llm_wrapper.py,sha256=xRbTBpLUH43Ah5jplL8WWWU-kjKfNgEJoUntLGBq5F4,2484
65
- autocoder/rag/long_context_rag.py,sha256=626f5-XFyTxmnbUJ_a9GiaMPuqWhTDVMcg0b0ePW_mQ,19471
65
+ autocoder/rag/long_context_rag.py,sha256=n3HkjIYZqjrHHLcGDb1u0SVEae7qChu6py30x-Jp-KQ,20160
66
66
  autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
67
67
  autocoder/rag/rag_entry.py,sha256=V1RJ8RGqM30DNPmzymv64rZjNRGWn6kfc8sRy_LECg0,2451
68
68
  autocoder/rag/raw_rag.py,sha256=yS2Ur6kG0IRjhCj2_VonwxjY_xls_E62jO5Gz5j2nqE,2952
@@ -71,7 +71,7 @@ autocoder/rag/simple_directory_reader.py,sha256=LkKreCkNdEOoL4fNhc3_hDoyyWTQUte4
71
71
  autocoder/rag/simple_rag.py,sha256=I902EUqOK1WM0Y2WFd7RzDJYofElvTZNLVCBtX5A9rc,14885
72
72
  autocoder/rag/token_checker.py,sha256=jc76x6KWmvVxds6W8juZfQGaoErudc2HenG3sNQfSLs,2819
73
73
  autocoder/rag/token_counter.py,sha256=9ujfI5xQvwzKpN9XFWQGnXpm0h1sL7kgIJxgposcxNo,2096
74
- autocoder/rag/token_limiter.py,sha256=nUxaaKJTWEi4J5c5Tz4BkwU4G1B74VxLlMinqu5s41A,10660
74
+ autocoder/rag/token_limiter.py,sha256=4cGy2kFCvbsM5CtONfuvLmXgpK_1HZTHehLTob08eks,10959
75
75
  autocoder/rag/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
76
  autocoder/rag/variable_holder.py,sha256=pDayuCnlKj7-bkn4iUHX5gea9UObddbi3ZnXotmxCs4,45
77
77
  autocoder/rag/loaders/__init__.py,sha256=EQHEZ5Cmz-mGP2SllUTvcIbYCnF7W149dNpNItfs0yE,304
@@ -95,9 +95,9 @@ autocoder/utils/request_event_queue.py,sha256=r3lo5qGsB1dIjzVQ05dnr0z_9Z3zOkBdP1
95
95
  autocoder/utils/request_queue.py,sha256=nwp6PMtgTCiuwJI24p8OLNZjUiprC-TsefQrhMI-yPE,3889
96
96
  autocoder/utils/rest.py,sha256=3tXA8KZG6jKz_tddHNLGx77Icee88WcUeesfNsgPno4,8790
97
97
  autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
98
- auto_coder-0.1.176.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
99
- auto_coder-0.1.176.dist-info/METADATA,sha256=-Jm1GW-7-Htzi_6l3MGRGTvl0ytk1ZyMGB2ZpiZoYa8,2352
100
- auto_coder-0.1.176.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
101
- auto_coder-0.1.176.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
102
- auto_coder-0.1.176.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
103
- auto_coder-0.1.176.dist-info/RECORD,,
98
+ auto_coder-0.1.177.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
99
+ auto_coder-0.1.177.dist-info/METADATA,sha256=IBRPsgVVwqup2c8xX7d5xeLh2ay5kIiPBrxQthG2JBM,2352
100
+ auto_coder-0.1.177.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
101
+ auto_coder-0.1.177.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
102
+ auto_coder-0.1.177.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
103
+ auto_coder-0.1.177.dist-info/RECORD,,
@@ -159,6 +159,8 @@ def process_file_local(file_path: str) -> List[SourceCode]:
159
159
  return v
160
160
  except Exception as e:
161
161
  logger.error(f"Error processing file {file_path}: {str(e)}")
162
+ import traceback
163
+ traceback.print_exc()
162
164
  return []
163
165
 
164
166
 
@@ -392,7 +394,7 @@ class AutoCoderRAGAsyncUpdateQueue:
392
394
  elif isinstance(file_list, AddOrUpdateEvent):
393
395
  for file_info in file_list.file_infos:
394
396
  logger.info(f"{file_info[0]} is detected to be updated")
395
- result = process_file_local(file_info)
397
+ result = process_file_local(file_info[0])
396
398
  self.update_cache(file_info, result)
397
399
 
398
400
  self.write_cache()
@@ -7,7 +7,6 @@ import byzerllm
7
7
  import pandas as pd
8
8
  import pathspec
9
9
  from byzerllm import ByzerLLM
10
- from jinja2 import Template
11
10
  from loguru import logger
12
11
  from openai import OpenAI
13
12
  from rich.console import Console
@@ -30,6 +29,11 @@ from autocoder.rag.token_limiter import TokenLimiter
30
29
  from tokenizers import Tokenizer
31
30
  from autocoder.rag import variable_holder
32
31
 
32
+ try:
33
+ from autocoder_pro.rag.llm_compute import LLMComputeEngine
34
+ except ImportError:
35
+ LLMComputeEngine = None
36
+
33
37
 
34
38
  class LongContextRAG:
35
39
  def __init__(
@@ -119,13 +123,13 @@ class LongContextRAG:
119
123
  self.monitor_mode,
120
124
  ## 确保全文区至少能放下一个文件
121
125
  single_file_token_limit=self.full_text_limit - 100,
122
- disable_auto_window=self.args.disable_auto_window
126
+ disable_auto_window=self.args.disable_auto_window,
123
127
  )
124
128
 
125
129
  self.doc_filter = DocFilter(
126
130
  self.index_model, self.args, on_ray=self.on_ray, path=self.path
127
131
  )
128
-
132
+
129
133
  doc_num = 0
130
134
  token_num = 0
131
135
  token_counts = []
@@ -151,7 +155,7 @@ class LongContextRAG:
151
155
  f" Max doc tokens: {max(token_counts) if token_counts else 0}\n"
152
156
  f" Min doc tokens: {min(token_counts) if token_counts else 0}\n"
153
157
  f" Avg doc tokens: {avg_tokens:.2f}\n"
154
- f" Median doc tokens: {median_tokens:.2f}\n"
158
+ f" Median doc tokens: {median_tokens:.2f}\n"
155
159
  )
156
160
 
157
161
  def count_tokens(self, text: str) -> int:
@@ -391,8 +395,13 @@ class LongContextRAG:
391
395
  relevant_docs_info = []
392
396
  for doc in relevant_docs:
393
397
  info = f"- {doc.module_name.replace(self.path,'',1)}"
394
- if 'original_docs' in doc.metadata:
395
- original_docs = ", ".join([doc.replace(self.path,"",1) for doc in doc.metadata['original_docs']])
398
+ if "original_docs" in doc.metadata:
399
+ original_docs = ", ".join(
400
+ [
401
+ doc.replace(self.path, "", 1)
402
+ for doc in doc.metadata["original_docs"]
403
+ ]
404
+ )
396
405
  info += f" (Original docs: {original_docs})"
397
406
  relevant_docs_info.append(info)
398
407
 
@@ -411,7 +420,7 @@ class LongContextRAG:
411
420
  segment_limit=self.segment_limit,
412
421
  buff_limit=self.buff_limit,
413
422
  llm=self.llm,
414
- disable_segment_reorder = self.args.disable_segment_reorder
423
+ disable_segment_reorder=self.args.disable_segment_reorder,
415
424
  )
416
425
  final_relevant_docs = token_limiter.limit_tokens(
417
426
  relevant_docs=relevant_docs,
@@ -443,13 +452,20 @@ class LongContextRAG:
443
452
 
444
453
  # Add relevant docs information
445
454
  final_relevant_docs_info = []
446
- for doc in relevant_docs:
455
+ for doc in relevant_docs:
447
456
  info = f"- {doc.module_name.replace(self.path,'',1)}"
448
- if 'original_docs' in doc.metadata:
449
- original_docs = ", ".join([doc.replace(self.path,"",1) for doc in doc.metadata['original_docs']])
457
+ if "original_docs" in doc.metadata:
458
+ original_docs = ", ".join(
459
+ [
460
+ doc.replace(self.path, "", 1)
461
+ for doc in doc.metadata["original_docs"]
462
+ ]
463
+ )
450
464
  info += f" (Original docs: {original_docs})"
451
465
  if "chunk_ranges" in doc.metadata:
452
- chunk_ranges = json.dumps(doc.metadata['chunk_ranges'],ensure_ascii=False)
466
+ chunk_ranges = json.dumps(
467
+ doc.metadata["chunk_ranges"], ensure_ascii=False
468
+ )
453
469
  info += f" (Chunk ranges: {chunk_ranges})"
454
470
  final_relevant_docs_info.append(info)
455
471
 
@@ -465,10 +481,29 @@ class LongContextRAG:
465
481
 
466
482
  # Log the panel using rich
467
483
  console.print(panel)
468
-
484
+
469
485
  request_tokens = sum([doc.tokens for doc in relevant_docs])
470
- target_model = model or self.llm.default_model_name
471
- logger.info(f"Start to send to model {target_model} with {request_tokens} tokens")
486
+ target_model = model or self.llm.default_model_name
487
+ logger.info(
488
+ f"Start to send to model {target_model} with {request_tokens} tokens"
489
+ )
490
+
491
+ if LLMComputeEngine is not None:
492
+ llm_compute_engine = LLMComputeEngine(llm=self.llm)
493
+ new_conversations = llm_compute_engine.process_conversation(
494
+ conversations, query, [doc.source_code for doc in relevant_docs]
495
+ )
496
+
497
+ return (
498
+ llm_compute_engine.stream_chat_oai(
499
+ conversations=new_conversations,
500
+ model=model,
501
+ role_mapping=role_mapping,
502
+ llm_config=llm_config,
503
+ delta_mode=True,
504
+ ),
505
+ context,
506
+ )
472
507
 
473
508
  new_conversations = conversations[:-1] + [
474
509
  {
@@ -480,10 +515,6 @@ class LongContextRAG:
480
515
  }
481
516
  ]
482
517
 
483
- # # 将 new_conversations 转化为 JSON 并写入文件
484
- # with open('/tmp/rag.json', 'w', encoding='utf-8') as f:
485
- # json.dump(new_conversations, f, ensure_ascii=False, indent=2)
486
-
487
518
  chunks = self.llm.stream_chat_oai(
488
519
  conversations=new_conversations,
489
520
  model=model,
@@ -491,4 +522,5 @@ class LongContextRAG:
491
522
  llm_config=llm_config,
492
523
  delta_mode=True,
493
524
  )
525
+
494
526
  return (chunk[0] for chunk in chunks), context
@@ -17,7 +17,7 @@ class TokenLimiter:
17
17
  segment_limit: int,
18
18
  buff_limit: int,
19
19
  llm,
20
- disable_segment_reorder:bool
20
+ disable_segment_reorder: bool,
21
21
  ):
22
22
  self.count_tokens = count_tokens
23
23
  self.full_text_limit = full_text_limit
@@ -96,8 +96,7 @@ class TokenLimiter:
96
96
  doc_num_count = 0
97
97
 
98
98
  reorder_relevant_docs = []
99
- added_docs = set()
100
-
99
+
101
100
  ## 文档分段(单个文档过大)和重排序逻辑
102
101
  ## 1. 背景:在检索过程中,许多文档被切割成多个段落(segments)
103
102
  ## 2. 问题:这些segments在召回时因为是按相关分做了排序可能是乱序的,不符合原文顺序,会强化大模型的幻觉。
@@ -106,20 +105,33 @@ class TokenLimiter:
106
105
  ## a) 方案一(保留位置):统一文档的不同segments 根据chunk_index 来置换位置
107
106
  ## b) 方案二(当前实现):遍历文档,发现某文档的segment A,立即查找该文档的所有其他segments,
108
107
  ## 对它们进行排序,并将排序后多个segments插入到当前的segment A 位置中。
109
- ## TODO:
110
- ## 1. 未来根据参数决定是否开启重排以及重排的策略
111
- if not self.disable_segment_reorder:
108
+ ## TODO:
109
+ ## 1. 未来根据参数决定是否开启重排以及重排的策略
110
+ if not self.disable_segment_reorder:
111
+ num_count = 0
112
112
  for doc in relevant_docs:
113
- if doc.metadata.get('original_doc') and doc.metadata.get('chunk_index'):
114
- if doc.metadata['original_doc'] not in added_docs:
115
- original_doc = doc.metadata['original_doc']
116
- chunks = [d for d in relevant_docs if d.metadata.get('original_doc') == original_doc]
117
- chunks.sort(key=lambda x: x.metadata['chunk_index'])
118
- reorder_relevant_docs.extend(chunks)
119
- added_docs.add(original_doc)
120
- elif doc not in added_docs:
121
- reorder_relevant_docs.append(doc)
122
- added_docs.add(doc.module_name)
113
+ num_count += 1
114
+ reorder_relevant_docs.append(doc)
115
+ if "original_doc" in doc.metadata and "chunk_index" in doc.metadata:
116
+ original_doc_name = doc.metadata["original_doc"].module_name
117
+
118
+ temp_docs = []
119
+ for temp_doc in relevant_docs[num_count:]:
120
+ if (
121
+ "original_doc" in temp_doc.metadata
122
+ and "chunk_index" in temp_doc.metadata
123
+ ):
124
+ if (
125
+ temp_doc.metadata["original_doc"].module_name
126
+ == original_doc_name
127
+ ):
128
+ if temp_doc not in reorder_relevant_docs:
129
+ temp_docs.append(temp_doc)
130
+
131
+ temp_docs.sort(key=lambda x: x.metadata["chunk_index"])
132
+ reorder_relevant_docs.extend(temp_docs)
133
+ else:
134
+ reorder_relevant_docs = relevant_docs
123
135
 
124
136
  ## 非窗口分区实现
125
137
  for doc in reorder_relevant_docs:
@@ -247,7 +259,5 @@ class TokenLimiter:
247
259
  f"Failed to process doc {doc.module_name} after {max_retries} attempts: {str(e)}"
248
260
  )
249
261
  return SourceCode(
250
- module_name=doc.module_name,
251
- source_code="",
252
- tokens= 0
262
+ module_name=doc.module_name, source_code="", tokens=0
253
263
  )
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.176"
1
+ __version__ = "0.1.177"