auto-coder 0.1.172__py3-none-any.whl → 0.1.175__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -13,11 +13,15 @@ class TokenLimiter:
13
13
  def __init__(
14
14
  self,
15
15
  count_tokens: Callable[[str], int],
16
- token_limit: int,
16
+ full_text_limit: int,
17
+ segment_limit: int,
18
+ buff_limit: int,
17
19
  llm,
18
20
  ):
19
21
  self.count_tokens = count_tokens
20
- self.token_limit = token_limit
22
+ self.full_text_limit = full_text_limit
23
+ self.segment_limit = segment_limit
24
+ self.buff_limit = buff_limit
21
25
  self.llm = llm
22
26
  self.first_round_full_docs = []
23
27
  self.second_round_extracted_docs = []
@@ -88,19 +92,22 @@ class TokenLimiter:
88
92
  final_relevant_docs = []
89
93
  token_count = 0
90
94
  doc_num_count = 0
95
+
96
+ ## 非窗口分区实现
91
97
  for doc in relevant_docs:
92
98
  doc_tokens = self.count_tokens(doc.source_code)
93
99
  doc_num_count += 1
94
- if token_count + doc_tokens <= self.token_limit:
100
+ if token_count + doc_tokens <= self.full_text_limit + self.segment_limit:
95
101
  final_relevant_docs.append(doc)
96
102
  token_count += doc_tokens
97
103
  else:
98
104
  break
99
105
 
106
+ ## 如果窗口无法放下所有的相关文档,则需要分区
100
107
  if len(final_relevant_docs) < len(relevant_docs):
101
-
108
+ ## 先填充full_text分区
102
109
  token_count = 0
103
- new_token_limit = self.token_limit * 0.8
110
+ new_token_limit = self.full_text_limit
104
111
  doc_num_count = 0
105
112
  for doc in relevant_docs:
106
113
  doc_tokens = self.count_tokens(doc.source_code)
@@ -111,8 +118,18 @@ class TokenLimiter:
111
118
  else:
112
119
  break
113
120
 
121
+ if len(self.first_round_full_docs) > 0:
122
+ remaining_tokens = (
123
+ self.full_text_limit + self.segment_limit - token_count
124
+ )
125
+ else:
126
+ logger.warning(
127
+ "Full text area is empty, this is may caused by the single doc is too long"
128
+ )
129
+ remaining_tokens = self.full_text_limit + self.segment_limit
130
+
131
+ ## 继续填充segment分区
114
132
  sencond_round_start_time = time.time()
115
- remaining_tokens = self.token_limit - new_token_limit
116
133
  remaining_docs = relevant_docs[len(self.first_round_full_docs) :]
117
134
  logger.info(
118
135
  f"first round docs: {len(self.first_round_full_docs)} remaining docs: {len(remaining_docs)} index_filter_workers: {index_filter_workers}"
@@ -130,7 +147,7 @@ class TokenLimiter:
130
147
  result = future.result()
131
148
  if result and remaining_tokens > 0:
132
149
  self.second_round_extracted_docs.append(result)
133
- tokens = self.count_tokens(result.source_code)
150
+ tokens = result.tokens
134
151
  if tokens > 0:
135
152
  remaining_tokens -= tokens
136
153
  else:
@@ -184,7 +201,13 @@ class TokenLimiter:
184
201
  content += chunk + "\n"
185
202
 
186
203
  return SourceCode(
187
- module_name=doc.module_name, source_code=content.strip()
204
+ module_name=doc.module_name,
205
+ source_code=content.strip(),
206
+ tokens=self.count_tokens(content),
207
+ metadata={
208
+ "original_doc": doc.module_name,
209
+ "chunk_ranges": json_objs,
210
+ },
188
211
  )
189
212
  except Exception as e:
190
213
  if attempt < max_retries - 1:
@@ -196,5 +219,7 @@ class TokenLimiter:
196
219
  f"Failed to process doc {doc.module_name} after {max_retries} attempts: {str(e)}"
197
220
  )
198
221
  return SourceCode(
199
- module_name=doc.module_name, source_code=content.strip()
222
+ module_name=doc.module_name,
223
+ source_code="",
224
+ tokens= 0
200
225
  )
@@ -0,0 +1,2 @@
1
+ TOKENIZER_PATH = None
2
+ TOKENIZER_MODEL = None
@@ -121,7 +121,11 @@ class SuffixProject:
121
121
 
122
122
  def convert_to_source_code(self, file_path):
123
123
  module_name = file_path
124
- source_code = self.read_file_content(file_path)
124
+ try:
125
+ source_code = self.read_file_content(file_path)
126
+ except Exception as e:
127
+ logger.warning(f"Failed to read file: {file_path}. Error: {str(e)}")
128
+ return None
125
129
  return SourceCode(module_name=module_name, source_code=source_code)
126
130
 
127
131
  def get_source_codes(self) -> Generator[SourceCode, None, None]:
@@ -152,7 +152,11 @@ class TSProject:
152
152
  return None
153
153
 
154
154
  module_name = file_path
155
- source_code = self.read_file_content(file_path)
155
+ try:
156
+ source_code = self.read_file_content(file_path)
157
+ except Exception as e:
158
+ logger.warning(f"Failed to read file: {file_path}. Error: {str(e)}")
159
+ return None
156
160
 
157
161
  if not FileUtils.has_sufficient_content(source_code, min_line_count=1):
158
162
  return None
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.172"
1
+ __version__ = "0.1.175"