auto-coder 0.1.270__py3-none-any.whl → 0.1.272__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
1
  from autocoder.common import AutoCoderArgs, SourceCode
2
2
  from pydantic import BaseModel
3
3
  import re
4
- from typing import Optional
4
+ from typing import Optional, List
5
5
 
6
6
 
7
7
  class DocRelevance(BaseModel):
@@ -23,6 +23,17 @@ class FilterDoc(BaseModel):
23
23
  task_timing: TaskTiming
24
24
 
25
25
 
26
+ class DocFilterResult(BaseModel):
27
+ # 注意, docs 只保留最后成功过滤的文档
28
+ docs: List[FilterDoc]
29
+ # 注意, raw_docs 保留所有文档
30
+ raw_docs: List[FilterDoc]
31
+ input_tokens_counts: List[int]
32
+ generated_tokens_counts: List[int]
33
+ durations: List[float]
34
+ model_name: str = "unknown"
35
+
36
+
26
37
  def parse_relevance(text: Optional[str]) -> Optional[DocRelevance]:
27
38
  if text is None:
28
39
  return None
@@ -7,7 +7,9 @@ from autocoder.common import SourceCode
7
7
  from byzerllm.utils.client.code_utils import extract_code
8
8
  import byzerllm
9
9
  from byzerllm import ByzerLLM
10
-
10
+ from autocoder.rag.relevant_utils import TaskTiming
11
+ from byzerllm import MetaHolder
12
+ from autocoder.rag.token_limiter_utils import TokenLimiterResult
11
13
 
12
14
  class TokenLimiter:
13
15
  def __init__(
@@ -94,10 +96,24 @@ class TokenLimiter:
94
96
  relevant_docs: List[SourceCode],
95
97
  conversations: List[Dict[str, str]],
96
98
  index_filter_workers: int,
97
- ) -> List[SourceCode]:
99
+ ) -> TokenLimiterResult:
100
+ logger.info(f"=== TokenLimiter Starting ===")
101
+ logger.info(f"Configuration: full_text_limit={self.full_text_limit}, segment_limit={self.segment_limit}, buff_limit={self.buff_limit}")
102
+ logger.info(f"Processing {len(relevant_docs)} source code documents")
103
+
104
+ start_time = time.time()
98
105
  final_relevant_docs = []
99
106
  token_count = 0
100
107
  doc_num_count = 0
108
+ model_name = self.chunk_llm.default_model_name or "unknown"
109
+ token_limiter_result = TokenLimiterResult(
110
+ docs=[],
111
+ raw_docs=[],
112
+ input_tokens_counts=[],
113
+ generated_tokens_counts=[],
114
+ durations=[],
115
+ model_name=model_name
116
+ )
101
117
 
102
118
  reorder_relevant_docs = []
103
119
 
@@ -112,6 +128,7 @@ class TokenLimiter:
112
128
  ## TODO:
113
129
  ## 1. 未来根据参数决定是否开启重排以及重排的策略
114
130
  if not self.disable_segment_reorder:
131
+ logger.info("Document reordering enabled - organizing segments by original document order")
115
132
  num_count = 0
116
133
  for doc in relevant_docs:
117
134
  num_count += 1
@@ -135,8 +152,11 @@ class TokenLimiter:
135
152
  temp_docs.sort(key=lambda x: x.metadata["chunk_index"])
136
153
  reorder_relevant_docs.extend(temp_docs)
137
154
  else:
155
+ logger.info("Document reordering disabled - using original retrieval order")
138
156
  reorder_relevant_docs = relevant_docs
139
157
 
158
+ logger.info(f"After reordering: {len(reorder_relevant_docs)} documents to process")
159
+
140
160
  ## 非窗口分区实现
141
161
  for doc in reorder_relevant_docs:
142
162
  doc_tokens = self.count_tokens(doc.source_code)
@@ -149,10 +169,15 @@ class TokenLimiter:
149
169
 
150
170
  ## 如果窗口无法放下所有的相关文档,则需要分区
151
171
  if len(final_relevant_docs) < len(reorder_relevant_docs):
172
+ logger.info(f"Token limit exceeded: {len(final_relevant_docs)}/{len(reorder_relevant_docs)} docs fit in window")
173
+ logger.info(f"=== Starting First Round: Full Text Loading ===")
174
+
152
175
  ## 先填充full_text分区
153
176
  token_count = 0
154
177
  new_token_limit = self.full_text_limit
155
178
  doc_num_count = 0
179
+ first_round_start_time = time.time()
180
+
156
181
  for doc in reorder_relevant_docs:
157
182
  doc_tokens = self.count_tokens(doc.source_code)
158
183
  doc_num_count += 1
@@ -161,11 +186,18 @@ class TokenLimiter:
161
186
  token_count += doc_tokens
162
187
  else:
163
188
  break
189
+
190
+ first_round_duration = time.time() - first_round_start_time
191
+ logger.info(
192
+ f"First round complete: loaded {len(self.first_round_full_docs)} documents"
193
+ f" ({token_count} tokens) in {first_round_duration:.2f}s"
194
+ )
164
195
 
165
196
  if len(self.first_round_full_docs) > 0:
166
197
  remaining_tokens = (
167
198
  self.full_text_limit + self.segment_limit - token_count
168
199
  )
200
+ logger.info(f"Remaining token budget: {remaining_tokens}")
169
201
  else:
170
202
  logger.warning(
171
203
  "Full text area is empty, this is may caused by the single doc is too long"
@@ -175,49 +207,117 @@ class TokenLimiter:
175
207
  ## 继续填充segment分区
176
208
  sencond_round_start_time = time.time()
177
209
  remaining_docs = reorder_relevant_docs[len(self.first_round_full_docs) :]
210
+
178
211
  logger.info(
179
- f"first round docs: {len(self.first_round_full_docs)} remaining docs: {len(remaining_docs)} index_filter_workers: {index_filter_workers}"
212
+ f"=== Starting Second Round: Chunk Extraction ==="
213
+ f"\n * Documents to process: {len(remaining_docs)}"
214
+ f"\n * Remaining token budget: {remaining_tokens}"
215
+ f"\n * Thread pool size: {index_filter_workers or 5}"
180
216
  )
181
217
 
218
+ total_processed = 0
219
+ successful_extractions = 0
220
+
221
+
182
222
  with ThreadPoolExecutor(max_workers=index_filter_workers or 5) as executor:
183
- future_to_doc = {
184
- executor.submit(self.process_range_doc, doc, conversations): doc
185
- for doc in remaining_docs
186
- }
223
+ future_to_doc = {}
224
+ for doc in remaining_docs:
225
+ submit_time = time.time()
226
+ future = executor.submit(self.process_range_doc, doc, conversations)
227
+ future_to_doc[future] = (doc, submit_time)
187
228
 
188
229
  for future in as_completed(future_to_doc):
189
- doc = future_to_doc[future]
230
+ doc, submit_time = future_to_doc[future]
231
+ end_time = time.time()
232
+ total_processed += 1
233
+ progress_percent = (total_processed / len(remaining_docs)) * 100
234
+
190
235
  try:
191
236
  result = future.result()
237
+ task_duration = end_time - submit_time
238
+
192
239
  if result and remaining_tokens > 0:
193
240
  self.second_round_extracted_docs.append(result)
241
+ token_limiter_result.raw_docs.append(result)
242
+
243
+ if "rag" in result.metadata and "chunk" in result.metadata["rag"]:
244
+ chunk_meta = result.metadata["rag"]["chunk"]
245
+ token_limiter_result.input_tokens_counts.append(chunk_meta.get("input_tokens_count", 0))
246
+ token_limiter_result.generated_tokens_counts.append(chunk_meta.get("generated_tokens_count", 0))
247
+ token_limiter_result.durations.append(chunk_meta.get("duration", 0))
248
+
194
249
  tokens = result.tokens
250
+ successful_extractions += 1
251
+
252
+ logger.info(
253
+ f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
254
+ f"\n - File: {doc.module_name}"
255
+ f"\n - Chunks: {len(result.metadata.get('chunk_ranges', []))}"
256
+ f"\n - Extracted tokens: {tokens}"
257
+ f"\n - Remaining tokens: {remaining_tokens - tokens if tokens > 0 else remaining_tokens}"
258
+ f"\n - Processing time: {task_duration:.2f}s"
259
+ )
260
+
195
261
  if tokens > 0:
196
262
  remaining_tokens -= tokens
197
263
  else:
198
264
  logger.warning(
199
265
  f"Token count for doc {doc.module_name} is 0 or negative"
200
266
  )
267
+ elif result:
268
+ logger.info(
269
+ f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
270
+ f"\n - File: {doc.module_name}"
271
+ f"\n - Skipped: Token budget exhausted ({remaining_tokens} remaining)"
272
+ f"\n - Processing time: {task_duration:.2f}s"
273
+ )
274
+ else:
275
+ logger.warning(
276
+ f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
277
+ f"\n - File: {doc.module_name}"
278
+ f"\n - Result: No content extracted"
279
+ f"\n - Processing time: {task_duration:.2f}s"
280
+ )
201
281
  except Exception as exc:
202
282
  logger.error(
203
- f"Processing doc {doc.module_name} generated an exception: {exc}"
283
+ f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
284
+ f"\n - File: {doc.module_name}"
285
+ f"\n - Error: {exc}"
286
+ f"\n - Processing time: {end_time - submit_time:.2f}s"
204
287
  )
205
288
 
206
289
  final_relevant_docs = (
207
290
  self.first_round_full_docs + self.second_round_extracted_docs
208
291
  )
209
292
  self.sencond_round_time = time.time() - sencond_round_start_time
293
+ total_time = time.time() - start_time
294
+
210
295
  logger.info(
211
- f"Second round processing time: {self.sencond_round_time:.2f} seconds"
296
+ f"=== Second round complete ==="
297
+ f"\n * Time: {self.sencond_round_time:.2f}s"
298
+ f"\n * Documents processed: {total_processed}/{len(remaining_docs)}"
299
+ f"\n * Successful extractions: {successful_extractions}"
300
+ f"\n * Extracted tokens: {sum(doc.tokens for doc in self.second_round_extracted_docs)}"
212
301
  )
213
-
214
- return final_relevant_docs
302
+ else:
303
+ logger.info(f"All {len(reorder_relevant_docs)} documents fit within token limits")
304
+ total_time = time.time() - start_time
305
+
306
+ logger.info(
307
+ f"=== TokenLimiter Complete ==="
308
+ f"\n * Total time: {total_time:.2f}s"
309
+ f"\n * Documents selected: {len(final_relevant_docs)}/{len(relevant_docs)}"
310
+ f"\n * Total tokens: {sum(doc.tokens for doc in final_relevant_docs)}"
311
+ )
312
+ token_limiter_result.docs = final_relevant_docs
313
+ return token_limiter_result
215
314
 
216
315
  def process_range_doc(
217
316
  self, doc: SourceCode, conversations: List[Dict[str, str]], max_retries=3
218
317
  ) -> SourceCode:
219
318
  for attempt in range(max_retries):
220
319
  content = ""
320
+ start_time = time.time()
221
321
  try:
222
322
  source_code_with_line_number = ""
223
323
  source_code_lines = doc.source_code.split("\n")
@@ -225,14 +325,19 @@ class TokenLimiter:
225
325
  source_code_with_line_number += f"{idx+1} {line}\n"
226
326
 
227
327
  llm = self.chunk_llm
328
+ model_name = llm.default_model_name or "unknown"
329
+ meta_holder = MetaHolder()
228
330
 
331
+ extraction_start_time = time.time()
229
332
  extracted_info = (
230
333
  self.extract_relevance_range_from_docs_with_conversation.options(
231
334
  {"llm_config": {"max_length": 100}}
232
335
  )
233
- .with_llm(llm)
336
+ .with_llm(llm).with_meta(meta_holder)
234
337
  .run(conversations, [source_code_with_line_number])
235
338
  )
339
+ extraction_duration = time.time() - extraction_start_time
340
+
236
341
  json_str = extract_code(extracted_info)[0][1]
237
342
  json_objs = json.loads(json_str)
238
343
 
@@ -242,23 +347,59 @@ class TokenLimiter:
242
347
  chunk = "\n".join(source_code_lines[start_line:end_line])
243
348
  content += chunk + "\n"
244
349
 
350
+ total_duration = time.time() - start_time
351
+
352
+
353
+ meta = meta_holder.get_meta_model()
354
+
355
+ input_tokens_count = 0
356
+ generated_tokens_count = 0
357
+
358
+ if meta:
359
+ input_tokens_count = meta.input_tokens_count
360
+ generated_tokens_count = meta.generated_tokens_count
361
+
362
+ logger.debug(
363
+ f"Document {doc.module_name} chunk extraction details:"
364
+ f"\n - Chunks found: {len(json_objs)}"
365
+ f"\n - Input tokens: {input_tokens_count}"
366
+ f"\n - Generated tokens: {generated_tokens_count}"
367
+ f"\n - LLM time: {extraction_duration:.2f}s"
368
+ f"\n - Total processing time: {total_duration:.2f}s"
369
+ )
370
+
371
+ if "rag" not in doc.metadata:
372
+ doc.metadata["rag"] = {}
373
+
374
+ doc.metadata["rag"]["chunk"] = {
375
+ "original_doc": doc.module_name,
376
+ "chunk_ranges": json_objs,
377
+ "processing_time": total_duration,
378
+ "llm_time": extraction_duration,
379
+
380
+ "input_tokens_count": input_tokens_count,
381
+ "generated_tokens_count": generated_tokens_count,
382
+ "duration": extraction_duration,
383
+ "chunk_model":model_name
384
+ }
385
+
245
386
  return SourceCode(
246
387
  module_name=doc.module_name,
247
388
  source_code=content.strip(),
248
- tokens=self.count_tokens(content),
389
+ tokens=input_tokens_count + generated_tokens_count,
249
390
  metadata={
250
- "original_doc": doc.module_name,
251
- "chunk_ranges": json_objs,
391
+ **doc.metadata
252
392
  },
253
393
  )
254
394
  except Exception as e:
395
+ err_duration = time.time() - start_time
255
396
  if attempt < max_retries - 1:
256
397
  logger.warning(
257
- f"Error processing doc {doc.module_name}, retrying... (Attempt {attempt + 1}) Error: {str(e)}"
398
+ f"Error processing doc {doc.module_name}, retrying... (Attempt {attempt + 1}) Error: {str(e)}, duration: {err_duration:.2f}s"
258
399
  )
259
400
  else:
260
401
  logger.error(
261
- f"Failed to process doc {doc.module_name} after {max_retries} attempts: {str(e)}"
402
+ f"Failed to process doc {doc.module_name} after {max_retries} attempts: {str(e)}, total duration: {err_duration:.2f}s"
262
403
  )
263
404
  return SourceCode(
264
405
  module_name=doc.module_name, source_code="", tokens=0
@@ -0,0 +1,13 @@
1
+ import pydantic
2
+ from typing import List
3
+ from autocoder.common import SourceCode
4
+
5
+ class TokenLimiterResult(pydantic.BaseModel):
6
+ # 注意, docs 只保留结果文档
7
+ docs: List[SourceCode]
8
+ # 注意, raw_docs 保留所有文档
9
+ raw_docs: List[SourceCode]
10
+ input_tokens_counts: List[int]
11
+ generated_tokens_counts: List[int]
12
+ durations: List[float]
13
+ model_name: str = "unknown"
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.270"
1
+ __version__ = "0.1.272"