auto-coder 0.1.374__py3-none-any.whl → 0.1.376__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

Files changed (61) hide show
  1. {auto_coder-0.1.374.dist-info → auto_coder-0.1.376.dist-info}/METADATA +2 -2
  2. {auto_coder-0.1.374.dist-info → auto_coder-0.1.376.dist-info}/RECORD +27 -57
  3. autocoder/agent/base_agentic/base_agent.py +202 -52
  4. autocoder/agent/base_agentic/default_tools.py +38 -6
  5. autocoder/agent/base_agentic/tools/list_files_tool_resolver.py +83 -43
  6. autocoder/agent/base_agentic/tools/read_file_tool_resolver.py +88 -25
  7. autocoder/agent/base_agentic/tools/replace_in_file_tool_resolver.py +171 -62
  8. autocoder/agent/base_agentic/tools/search_files_tool_resolver.py +101 -56
  9. autocoder/agent/base_agentic/tools/talk_to_group_tool_resolver.py +5 -0
  10. autocoder/agent/base_agentic/tools/talk_to_tool_resolver.py +5 -0
  11. autocoder/agent/base_agentic/tools/write_to_file_tool_resolver.py +145 -32
  12. autocoder/auto_coder_rag.py +80 -11
  13. autocoder/models.py +2 -2
  14. autocoder/rag/agentic_rag.py +217 -0
  15. autocoder/rag/cache/local_duckdb_storage_cache.py +63 -33
  16. autocoder/rag/conversation_to_queries.py +37 -5
  17. autocoder/rag/long_context_rag.py +161 -41
  18. autocoder/rag/tools/__init__.py +10 -0
  19. autocoder/rag/tools/recall_tool.py +163 -0
  20. autocoder/rag/tools/search_tool.py +126 -0
  21. autocoder/rag/types.py +36 -0
  22. autocoder/utils/_markitdown.py +59 -13
  23. autocoder/version.py +1 -1
  24. autocoder/agent/agentic_edit.py +0 -833
  25. autocoder/agent/agentic_edit_tools/__init__.py +0 -28
  26. autocoder/agent/agentic_edit_tools/ask_followup_question_tool_resolver.py +0 -32
  27. autocoder/agent/agentic_edit_tools/attempt_completion_tool_resolver.py +0 -29
  28. autocoder/agent/agentic_edit_tools/base_tool_resolver.py +0 -29
  29. autocoder/agent/agentic_edit_tools/execute_command_tool_resolver.py +0 -84
  30. autocoder/agent/agentic_edit_tools/list_code_definition_names_tool_resolver.py +0 -75
  31. autocoder/agent/agentic_edit_tools/list_files_tool_resolver.py +0 -62
  32. autocoder/agent/agentic_edit_tools/plan_mode_respond_tool_resolver.py +0 -30
  33. autocoder/agent/agentic_edit_tools/read_file_tool_resolver.py +0 -36
  34. autocoder/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +0 -95
  35. autocoder/agent/agentic_edit_tools/search_files_tool_resolver.py +0 -70
  36. autocoder/agent/agentic_edit_tools/use_mcp_tool_resolver.py +0 -55
  37. autocoder/agent/agentic_edit_tools/write_to_file_tool_resolver.py +0 -98
  38. autocoder/agent/agentic_edit_types.py +0 -124
  39. autocoder/auto_coder_lang.py +0 -60
  40. autocoder/auto_coder_rag_client_mcp.py +0 -170
  41. autocoder/auto_coder_rag_mcp.py +0 -193
  42. autocoder/common/llm_rerank.py +0 -84
  43. autocoder/common/model_speed_test.py +0 -392
  44. autocoder/common/v2/agent/agentic_edit_conversation.py +0 -188
  45. autocoder/common/v2/agent/ignore_utils.py +0 -50
  46. autocoder/dispacher/actions/plugins/action_translate.py +0 -214
  47. autocoder/ignorefiles/__init__.py +0 -4
  48. autocoder/ignorefiles/ignore_file_utils.py +0 -63
  49. autocoder/ignorefiles/test_ignore_file_utils.py +0 -91
  50. autocoder/linters/code_linter.py +0 -588
  51. autocoder/rag/loaders/test_image_loader.py +0 -209
  52. autocoder/rag/raw_rag.py +0 -96
  53. autocoder/rag/simple_directory_reader.py +0 -646
  54. autocoder/rag/simple_rag.py +0 -404
  55. autocoder/regex_project/__init__.py +0 -162
  56. autocoder/utils/coder.py +0 -125
  57. autocoder/utils/tests.py +0 -37
  58. {auto_coder-0.1.374.dist-info → auto_coder-0.1.376.dist-info}/LICENSE +0 -0
  59. {auto_coder-0.1.374.dist-info → auto_coder-0.1.376.dist-info}/WHEEL +0 -0
  60. {auto_coder-0.1.374.dist-info → auto_coder-0.1.376.dist-info}/entry_points.txt +0 -0
  61. {auto_coder-0.1.374.dist-info → auto_coder-0.1.376.dist-info}/top_level.txt +0 -0
@@ -1,404 +0,0 @@
1
- from typing import Any, Dict, List, Optional, Tuple
2
- from autocoder.common import SourceCode, AutoCoderArgs
3
- from autocoder.common.llm_rerank import LLMRerank
4
- from autocoder.rag.simple_directory_reader import AutoCoderSimpleDirectoryReader
5
- import fsspec
6
-
7
- from byzerllm.apps.llama_index.simple_retrieval import SimpleRetrieval
8
- from byzerllm.apps.llama_index.collection_manager import (
9
- CollectionManager,
10
- CollectionItem,
11
- )
12
- from byzerllm.utils.ray_utils import is_ray_in_client_mode
13
-
14
- from llama_index.core import QueryBundle, StorageContext
15
- from llama_index.core.readers.file.base import default_file_metadata_func
16
- from llama_index.core import VectorStoreIndex
17
- from llama_index.core.node_parser import HierarchicalNodeParser, get_leaf_nodes
18
- from llama_index.core.base.llms.types import ChatMessage, MessageRole
19
- from llama_index.core.retrievers import AutoMergingRetriever
20
- from llama_index.core.query_engine import RetrieverQueryEngine, RouterQueryEngine
21
- from llama_index.core.tools import QueryEngineTool
22
- from llama_index.core.selectors import (
23
- LLMSingleSelector,
24
- )
25
-
26
- import byzerllm
27
- from loguru import logger
28
- import hashlib
29
- import json
30
- from openai import OpenAI
31
-
32
-
33
- def file_metadata_func(
34
- file_path: str, fs: Optional[fsspec.AbstractFileSystem] = None
35
- ) -> Dict:
36
- """Get some handy metadata from filesystem.
37
-
38
- Args:
39
- file_path: str: file path in str
40
- """
41
-
42
- def generate_file_md5(file_path: str) -> str:
43
- md5_hash = hashlib.md5()
44
- with open(file_path, "rb") as f:
45
- for chunk in iter(lambda: f.read(4096), b""):
46
- md5_hash.update(chunk)
47
- return md5_hash.hexdigest()
48
-
49
- return {
50
- **default_file_metadata_func(file_path=file_path, fs=fs),
51
- "md5": generate_file_md5(file_path=file_path),
52
- }
53
-
54
-
55
- class SimpleRAG:
56
- def __init__(self, llm: byzerllm.ByzerLLM, args: AutoCoderArgs, path: str) -> None:
57
- from byzerllm.apps.llama_index import get_service_context, get_storage_context
58
-
59
- self.llm = llm
60
- self.args = args
61
- self.path = path
62
- self.collection_manager = CollectionManager(args.base_dir)
63
-
64
- self.collections = self.args.collection
65
- if self.args.collections:
66
- self.collections = self.args.collections
67
-
68
- self.collections = self.collections.split(",") if self.collections else []
69
-
70
- if not self.collections:
71
- logger.warning(
72
- """No RAG collection is set, we will use the `default` collection.
73
- You can set the collection by passing the `--collections`argument in command line or set the `collections` attribute in the config file."""
74
- )
75
- self.collections = ["default"]
76
-
77
- if not self.llm.default_emb_model_name:
78
- raise ValueError("emb_model should be set")
79
-
80
- self.retrieval = byzerllm.ByzerRetrieval()
81
-
82
- if not is_ray_in_client_mode():
83
- self.retrieval.launch_gateway()
84
- self.service_context = get_service_context(self.llm)
85
- self.storage_context_dict: Dict[str, StorageContext] = {}
86
- for collection in self.collections:
87
- self.storage_context_dict[collection] = get_storage_context(
88
- self.llm,
89
- self.retrieval,
90
- chunk_collection=collection,
91
- namespace=collection,
92
- )
93
-
94
- self.simple_retrieval_dict: Dict[str, SimpleRetrieval] = {}
95
- for collection in self.collections:
96
- self.simple_retrieval_dict[collection] = SimpleRetrieval(
97
- llm=llm, retrieval=self.retrieval, chunk_collection=collection
98
- )
99
- else:
100
- if not args.rag_url:
101
- raise ValueError(
102
- "You are in client mode, please provide the RAG URL. e.g. rag_url: http://localhost:8000/v1"
103
- )
104
-
105
- if not args.rag_url.startswith("http://"):
106
- raise ValueError("The RAG URL should start with http://")
107
-
108
- self.client = OpenAI(api_key=args.rag_token, base_url=args.rag_url)
109
-
110
- def _get_indices(self) -> List[Tuple[CollectionItem, VectorStoreIndex]]:
111
- indices = []
112
- for collection in self.collections:
113
- storage_context = self.storage_context_dict[collection]
114
- index = VectorStoreIndex.from_vector_store(
115
- vector_store=storage_context.vector_store,
116
- service_context=self.service_context,
117
- )
118
- collection_item = self.collection_manager.get_collection(collection)
119
- indices.append((collection_item, index))
120
-
121
- return indices
122
-
123
- def _get_query_engine(self, streaming: bool = False):
124
- indices = self._get_indices()
125
- retrievers = []
126
-
127
- for collection_item, index in indices:
128
- retriever = AutoMergingRetriever(
129
- index.as_retriever(),
130
- storage_context=self.storage_context_dict[collection_item.name],
131
- )
132
- retrievers.append(retriever)
133
-
134
- query_engines = [
135
- RetrieverQueryEngine.from_args(
136
- retriever, service_context=self.service_context, streaming=streaming
137
- )
138
- for retriever in retrievers
139
- ]
140
-
141
- if len(query_engines) == 1:
142
- return query_engines[0]
143
-
144
- tools = []
145
- for (collection_item, index), query_engine in zip(indices, query_engines):
146
- tool = QueryEngineTool.from_defaults(
147
- query_engine=query_engine,
148
- description=collection_item.description,
149
- )
150
- tools.append(tool)
151
-
152
- query_engine = RouterQueryEngine(
153
- selector=LLMSingleSelector.from_defaults(
154
- service_context=self.service_context
155
- ),
156
- query_engine_tools=tools,
157
- llm=self.service_context.llm,
158
- service_context=self.service_context,
159
- verbose=True,
160
- )
161
- return query_engine
162
-
163
- def _get_retriever(self):
164
- indices = self._get_indices()
165
- retrievers = []
166
-
167
- for collection_item, index in indices:
168
- retriever = AutoMergingRetriever(
169
- index.as_retriever(),
170
- storage_context=self.storage_context_dict[collection_item.name],
171
- )
172
- retrievers.append(retriever)
173
-
174
- return retrievers
175
-
176
- def stream_search(self, query: str):
177
- query_bundle = QueryBundle(query_str=query)
178
- query_engine = self._get_query_engine(streaming=True)
179
- streaming_response = query_engine.query(query_bundle)
180
- contexts = []
181
- for node in streaming_response.source_nodes:
182
- contexts.append(
183
- {
184
- "raw_chunk": node.node.text,
185
- "doc_url": node.node.metadata["file_path"],
186
- "_id": node.node.id_,
187
- }
188
- )
189
- return streaming_response.response_gen, contexts
190
-
191
- def retrieve(self, query: str) -> List[Dict[str, Any]]:
192
- query_bundle = QueryBundle(query_str=query)
193
- retrievers = self._get_retriever()
194
-
195
- result = []
196
-
197
- for retriever in retrievers:
198
- nodes = retriever.retrieve(query_bundle)
199
-
200
- reranker = LLMRerank(llm=self.llm)
201
- retrieved_nodes = reranker.postprocess_nodes(
202
- nodes, query_bundle, choice_batch_size=5, top_n=1
203
- )
204
- result.extend(
205
- [
206
- {
207
- "raw_chunk": node.node.text,
208
- "doc_url": node.node.metadata["file_path"],
209
- "_id": node.node.id_,
210
- }
211
- for node in retrieved_nodes
212
- ]
213
- )
214
- return result
215
-
216
- def stream_chat_oai(
217
- self,
218
- conversations,
219
- model: Optional[str] = None,
220
- role_mapping=None,
221
- llm_config: Dict[str, Any] = {},
222
- ):
223
- if len(self.collections) != 1:
224
- raise ValueError("When chat mode, only one collection can be set")
225
-
226
- index = VectorStoreIndex.from_vector_store(
227
- vector_store=self.storage_context_dict[self.collections[0]].vector_store,
228
- service_context=self.service_context,
229
- )
230
- chat_engine = index.as_chat_engine(
231
- chat_mode="condense_plus_context",
232
- verbose=False,
233
- )
234
- history = []
235
- for conv in conversations[:-1]:
236
- if conv["role"] == "user":
237
- role = MessageRole.USER
238
- elif conv["role"] == "assistant":
239
- role = MessageRole.ASSISTANT
240
- else:
241
- role = MessageRole.SYSTEM
242
- history.append(ChatMessage(role=role, content=conv["content"]))
243
- return (
244
- chat_engine.stream_chat(
245
- conversations[-1]["content"], chat_history=history
246
- ).response_gen,
247
- [],
248
- )
249
-
250
- def stream_chat_repl(self, query: str):
251
- if len(self.collections) != 1:
252
- raise ValueError("When chat mode, only one collection can be set")
253
- from llama_index.core.memory import ChatMemoryBuffer
254
-
255
- memory = ChatMemoryBuffer.from_defaults(token_limit=8092)
256
- index = VectorStoreIndex.from_vector_store(
257
- vector_store=self.storage_context_dict[self.collections[0]].vector_store,
258
- service_context=self.service_context,
259
- )
260
- chat_engine = index.as_chat_engine(
261
- chat_mode="condense_plus_context",
262
- memory=memory,
263
- verbose=False,
264
- )
265
- chat_engine.streaming_chat_repl()
266
-
267
- def search(self, query: str) -> List[SourceCode]:
268
- if not is_ray_in_client_mode():
269
- return self.inner_search(query)
270
-
271
- target_query = query
272
-
273
- if isinstance(self.args.enable_rag_search, str):
274
- target_query = self.args.enable_rag_search
275
-
276
- response = self.client.chat.completions.create(
277
- messages=[{"role": "user", "content": target_query}],
278
- model="xxxx",
279
- )
280
- return [
281
- SourceCode(
282
- module_name=f"RAG:{target_query}",
283
- source_code=response.choices[0].message.content,
284
- )
285
- ]
286
-
287
- def inner_search(self, query: str) -> List[SourceCode]:
288
- if self.args.enable_rag_search:
289
- target_query = query
290
- if isinstance(self.args.enable_rag_search, str):
291
- target_query = self.args.enable_rag_search
292
- texts, contexts = self.stream_search(target_query)
293
- s = "".join([text for text in texts])
294
- urls = ",".join(set([context["doc_url"] for context in contexts]))
295
- ## append RAG: prefix is used to protect avoid the source code is modified by the code auto execute
296
- return [SourceCode(module_name=f"RAG:{urls}", source_code=s)]
297
- elif self.args.enable_rag_context:
298
- target_query = query
299
- if isinstance(self.args.enable_rag_context, str):
300
- target_query = self.args.enable_rag_context
301
- contexts = self.retrieve(target_query)
302
- for context in contexts:
303
- context["raw_chunk"]
304
- try:
305
- with open(context["doc_url"], "r") as f:
306
- context["raw_chunk"] = f.read()
307
- except Exception as e:
308
- logger.warning(f"Error reading file {context['doc_url']}")
309
- pass
310
-
311
- return [
312
- SourceCode(
313
- module_name=context["doc_url"], source_code=context["raw_chunk"]
314
- )
315
- for context in contexts
316
- ]
317
- return []
318
-
319
- def build(self):
320
-
321
- if len(self.collections) != 1:
322
- raise ValueError("When build, only one collection should be set")
323
-
324
- if is_ray_in_client_mode():
325
- raise ValueError(
326
- "You are in client mode, please run the build in the server."
327
- )
328
-
329
- collection = self.collections[0]
330
-
331
- collection_exists = self.collection_manager.check_collection_exists(collection)
332
- if not collection_exists:
333
- logger.warning(
334
- f"Collection {collection} not found, creating it automatically"
335
- )
336
- if not self.args.description:
337
- logger.error("Please provide a description for the collection")
338
- return
339
- item = CollectionItem(
340
- name=collection, description=self.args.description or ""
341
- )
342
- self.collection_manager.add_collection(item)
343
-
344
- retrieval_client = self.simple_retrieval_dict[collection]
345
- # retrieval_client.delete_from_doc_collection(collection)
346
- # retrieval_client.delete_from_chunk_collection(collection)
347
-
348
- required_exts = self.args.required_exts or None
349
-
350
- if required_exts:
351
- required_exts = required_exts.split(",")
352
-
353
- documents = AutoCoderSimpleDirectoryReader(
354
- self.path,
355
- recursive=True,
356
- filename_as_id=True,
357
- required_exts=required_exts,
358
- file_metadata=file_metadata_func,
359
- ).load_data()
360
- docs_keep = []
361
- for document in documents:
362
- doc = retrieval_client.get_doc(
363
- f"ref_doc_info/{document.doc_id}", collection
364
- )
365
- if doc:
366
- md5 = json.loads(doc["json_data"])["metadata"].get("md5", "")
367
- file_path = document.metadata["file_path"]
368
- new__md5 = document.metadata["md5"]
369
- if md5 != new__md5:
370
- retrieval_client.delete_doc_and_chunks_by_filename(
371
- collection, file_path
372
- )
373
- docs_keep.append(document)
374
- else:
375
- docs_keep.append(document)
376
-
377
- retrieval_client.commit_doc()
378
- retrieval_client.commit_chunk()
379
-
380
- for document in docs_keep:
381
- logger.info(f"\nUpsert {document.doc_id}")
382
-
383
- if docs_keep:
384
- hirerachical = HierarchicalNodeParser.from_defaults(
385
- chunk_sizes=[6000, 3000, 1024]
386
- )
387
- # sp = SentenceSplitter(chunk_size=1024, chunk_overlap=0)
388
-
389
- nodes = hirerachical.get_nodes_from_documents(docs_keep, show_progress=True)
390
-
391
- leaf_nodes = get_leaf_nodes(nodes)
392
- self.storage_context_dict[collection].docstore.add_documents(nodes)
393
-
394
- _ = VectorStoreIndex(
395
- nodes=leaf_nodes,
396
- store_nodes_override=True,
397
- storage_context=self.storage_context_dict[collection],
398
- service_context=self.service_context,
399
- )
400
-
401
- retrieval_client.commit_doc()
402
- retrieval_client.commit_chunk()
403
- else:
404
- logger.info("There is no new doc to build")
@@ -1,162 +0,0 @@
1
- import re
2
- from autocoder.common import SourceCode,AutoCoderArgs
3
- from autocoder import common as FileUtils
4
- from autocoder.utils.rest import HttpDoc
5
- import os
6
- from typing import Optional, Generator, List, Dict, Any, Callable
7
- from git import Repo
8
- import byzerllm
9
- from autocoder.common.search import Search,SearchEngine
10
- from autocoder.rag.simple_rag import SimpleRAG
11
- from loguru import logger
12
- from pydantic import BaseModel,Field
13
-
14
- class RegPattern(BaseModel):
15
- pattern: str = Field(..., title="Pattern", description="The regex pattern can be used by `re.search` in python.")
16
-
17
- class RegexProject():
18
-
19
- def __init__(self, args: AutoCoderArgs, llm: Optional[byzerllm.ByzerLLM] = None,file_filter=None):
20
- self.args = args
21
- self.directory = args.source_dir
22
- self.git_url = args.git_url
23
- self.target_file = args.target_file
24
- self.project_type = args.project_type
25
- self.file_filter = file_filter
26
- self.sources = []
27
- self.llm = llm
28
- self.regex_pattern = self.extract_regex_pattern(self.project_type)
29
-
30
- @byzerllm.prompt()
31
- def generate_regex_pattern(self,desc:str)->RegPattern:
32
- '''
33
- Generate a regex pattern based on the following description:
34
-
35
- {{ desc }}
36
- '''
37
-
38
-
39
- def extract_regex_pattern(self, project_type):
40
- project_type = project_type.strip()
41
- if project_type.startswith("regex://"):
42
- return project_type[8:]
43
- if project_type.startswith("human://"):
44
- desc = project_type[8:]
45
- v = self.generate_regex_pattern.with_llm(self.llm).run(desc=desc)
46
- if not v:
47
- raise ValueError("Fail to generate regex pattern, try again.")
48
- logger.info(f"Generated regex pattern: {v.pattern}")
49
- return v.pattern
50
- else:
51
- raise ValueError("Invalid project_type format. Expected 'regex//<pattern>'")
52
-
53
- def output(self):
54
- return open(self.target_file, "r").read()
55
-
56
- def is_regex_match(self, file_path):
57
- return re.search(self.regex_pattern, file_path) is not None
58
-
59
- def read_file_content(self, file_path):
60
- with open(file_path, "r") as file:
61
- return file.read()
62
-
63
- def convert_to_source_code(self, file_path):
64
- module_name = file_path
65
- source_code = self.read_file_content(file_path)
66
- return SourceCode(module_name=module_name, source_code=source_code)
67
-
68
- def get_source_codes(self) -> Generator[SourceCode, None, None]:
69
- for root, dirs, files in os.walk(self.directory):
70
- for file in files:
71
- file_path = os.path.join(root, file)
72
- if self.is_regex_match(file_path):
73
- if self.file_filter is None or self.file_filter(file_path, [self.regex_pattern]):
74
- logger.info(f"collect file: {file_path}")
75
- source_code = self.convert_to_source_code(file_path)
76
- if source_code is not None:
77
- yield source_code
78
-
79
- def get_rest_source_codes(self) -> Generator[SourceCode, None, None]:
80
- if self.args.urls:
81
- urls = self.args.urls
82
- if isinstance(self.args.urls, str):
83
- urls = self.args.urls.split(",")
84
- http_doc = HttpDoc(args =self.args, llm=self.llm,urls=urls)
85
- sources = http_doc.crawl_urls()
86
- for source in sources:
87
- source.tag = "REST"
88
- return sources
89
- return []
90
-
91
- def get_rag_source_codes(self):
92
- if not self.args.enable_rag_search and not self.args.enable_rag_context:
93
- return []
94
- rag = SimpleRAG(self.llm,self.args,self.args.source_dir)
95
- docs = rag.search(self.args.query)
96
- for doc in docs:
97
- doc.tag = "RAG"
98
- return docs
99
-
100
- def get_search_source_codes(self):
101
- temp = self.get_rag_source_codes()
102
- if self.args.search_engine and self.args.search_engine_token:
103
- if self.args.search_engine == "bing":
104
- search_engine = SearchEngine.BING
105
- else:
106
- search_engine = SearchEngine.GOOGLE
107
-
108
- searcher=Search(args=self.args,llm=self.llm,search_engine=search_engine,subscription_key=self.args.search_engine_token)
109
- search_query = self.args.search or self.args.query
110
- search_context = searcher.answer_with_the_most_related_context(search_query)
111
- return temp + [SourceCode(module_name="SEARCH_ENGINE", source_code=search_context,tag="SEARCH")]
112
- return temp + []
113
-
114
- def run(self):
115
- if self.git_url is not None:
116
- self.clone_repository()
117
-
118
- if self.target_file is None:
119
- for code in self.get_source_codes():
120
- self.sources.append(code)
121
- print(f"##File: {code.module_name}")
122
- print(code.source_code)
123
-
124
- for code in self.get_rest_source_codes():
125
- self.sources.append(code)
126
- print(f"##File: {code.module_name}")
127
- print(code.source_code)
128
-
129
- for code in self.get_search_source_codes():
130
- self.sources.append(code)
131
- print(f"##File: {code.module_name}")
132
- print(code.source_code)
133
-
134
-
135
- else:
136
- with open(self.target_file, "w") as file:
137
- for code in self.get_source_codes():
138
- self.sources.append(code)
139
- file.write(f"##File: {code.module_name}\n")
140
- file.write(f"{code.source_code}\n\n")
141
-
142
- for code in self.get_rest_source_codes():
143
- self.sources.append(code)
144
- file.write(f"##File: {code.module_name}\n")
145
- file.write(f"{code.source_code}\n\n")
146
-
147
- for code in self.get_search_source_codes():
148
- self.sources.append(code)
149
- file.write(f"##File: {code.module_name}\n")
150
- file.write(f"{code.source_code}\n\n")
151
-
152
-
153
-
154
- def clone_repository(self):
155
- if self.git_url is None:
156
- raise ValueError("git_url is required to clone the repository")
157
-
158
- if os.path.exists(self.directory):
159
- print(f"Directory {self.directory} already exists. Skipping cloning.")
160
- else:
161
- print(f"Cloning repository {self.git_url} into {self.directory}")
162
- Repo.clone_from(self.git_url, self.directory)
autocoder/utils/coder.py DELETED
@@ -1,125 +0,0 @@
1
-
2
- from autocoder.common import AutoCoderArgs
3
- import byzerllm
4
- import pydantic
5
- from enum import Enum
6
-
7
- class TaskTypeDef(pydantic.BaseModel):
8
- name: str
9
- desc: str = ""
10
- guidance: str = ""
11
-
12
-
13
- class TaskType(Enum):
14
- """By identifying specific types of tasks, we can inject human priors (guidance) to help task solving"""
15
-
16
- @byzerllm.prompt(render="jinja2")
17
- def data_preprocess_prompt():
18
- '''
19
- The current task is about data preprocessing, please note the following:
20
- - Monitor data types per column, applying appropriate methods.
21
- - Ensure operations are on existing dataset columns.
22
- - Avoid writing processed data to files.
23
- - Avoid any change to label column, such as standardization, etc.
24
- - Prefer alternatives to one-hot encoding for categorical data.
25
- - Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.
26
- - Each step do data preprocessing to train, must do same for test separately at the same time.
27
- - Always copy the DataFrame before processing it and use the copy to process.
28
- '''
29
-
30
- @byzerllm.prompt(render="jinja2")
31
- def image2webpage_prompt():
32
- '''
33
- The current task is about converting image into webpage code. please note the following:
34
- - Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow.
35
- - Save webpages: Be sure to use the save method provided.
36
- '''
37
-
38
- DATA_PREPROCESS = TaskTypeDef(
39
- name="data preprocessing",
40
- desc="For preprocessing dataset in a data analysis or machine learning task ONLY,"
41
- "general data operation doesn't fall into this type",
42
- guidance=data_preprocess_prompt(),
43
- )
44
-
45
- IMAGE2WEBPAGE = TaskTypeDef(
46
- name="image2webpage",
47
- desc="For converting image into webpage code.",
48
- guidance=image2webpage_prompt(),
49
- )
50
- OTHER = TaskTypeDef(name="other", desc="Any tasks not in the defined categories")
51
-
52
- @property
53
- def type_name(self):
54
- return self.value.name
55
-
56
- @classmethod
57
- def get_type(cls, type_name):
58
- for member in cls:
59
- if member.type_name == type_name:
60
- return member.value
61
- return None
62
-
63
- class Thought(pydantic.BaseModel):
64
- thoughts:str = pydantic.Field(...,description="Thoughts on current situation, reflect on how you should proceed to fulfill the user requirement")
65
- state:bool = pydantic.Field(...,description="Decide whether you need to take more actions to complete the user requirement. Return true if you think so. Return false if you think the requirement has been completely fulfilled.")
66
-
67
- class Plan(pydantic.BaseModel):
68
- task_id:str = pydantic.Field(...,description="unique identifier for a task in plan, can be an ordinal")
69
- dependent_task_ids:list[str] = pydantic.Field(...,description="ids of tasks prerequisite to this task")
70
- instruction:str = pydantic.Field(...,description="what you should do in this task, one short phrase or sentence")
71
- task_type:str = pydantic.Field(...,description="type of this task, should be one of Available Task Types")
72
-
73
- class Coder:
74
- def __init__(self,llm:byzerllm.ByzerLLM,args:AutoCoderArgs) -> None:
75
- self.llm = llm
76
- self.args = args
77
- self.working_memory = []
78
-
79
- def get_task_type_desc(self):
80
- task_type_desc = "\n".join([f"- **{tt.type_name}**: {tt.value.desc}" for tt in TaskType])
81
- return task_type_desc
82
-
83
- @byzerllm.prompt(llm=lambda self:self.llm,render="jinja2")
84
- def write_plan(self,context:str,task_type_desc:str,max_tasks:int=3):
85
- '''
86
- # Context:
87
- {{ context }}
88
- # Available Task Types:
89
- {{ task_type_desc}}
90
- # Task:
91
- Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to {{ max_tasks }} tasks.
92
- If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
93
- If you encounter errors on the current task, revise and output the current single task only.
94
- Output a list of jsons following the format:
95
- ```json
96
- [
97
- {
98
- "task_id": str = "unique identifier for a task in plan, can be an ordinal",
99
- "dependent_task_ids": list[str] = "ids of tasks prerequisite to this task",
100
- "instruction": "what you should do in this task, one short phrase or sentence",
101
- "task_type": "type of this task, should be one of Available Task Types",
102
- },
103
- ...
104
- ]
105
- ```
106
- '''
107
- pass
108
-
109
- @byzerllm.prompt(llm=lambda self:self.llm,render="jinja2")
110
- def react_think(self,user_requirement:str,context:str)->str:
111
- '''
112
- # User Requirement
113
- {{ user_requirement }}
114
- # Context
115
- {{ context }}
116
-
117
- Output a json following the format:
118
- ```json
119
- {
120
- "thoughts": str = "Thoughts on current situation, reflect on how you should proceed to fulfill the user requirement",
121
- "state": bool = "Decide whether you need to take more actions to complete the user requirement. Return true if you think so. Return false if you think the requirement has been completely fulfilled."
122
- }
123
- ```
124
- '''
125
- pass