auto-coder 0.1.183__py3-none-any.whl → 0.1.184__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.183.dist-info → auto_coder-0.1.184.dist-info}/METADATA +2 -2
- {auto_coder-0.1.183.dist-info → auto_coder-0.1.184.dist-info}/RECORD +20 -14
- autocoder/auto_coder.py +136 -17
- autocoder/auto_coder_lang.py +14 -2
- autocoder/auto_coder_rag.py +92 -1
- autocoder/chat_auto_coder.py +25 -32
- autocoder/common/__init__.py +2 -0
- autocoder/rag/cache/__init__.py +0 -0
- autocoder/rag/cache/base_cache.py +14 -0
- autocoder/rag/cache/byzer_storage_cache.py +394 -0
- autocoder/rag/cache/file_monitor_cache.py +146 -0
- autocoder/rag/cache/simple_cache.py +204 -0
- autocoder/rag/document_retriever.py +56 -475
- autocoder/rag/long_context_rag.py +16 -6
- autocoder/rag/utils.py +133 -0
- autocoder/version.py +1 -1
- {auto_coder-0.1.183.dist-info → auto_coder-0.1.184.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.183.dist-info → auto_coder-0.1.184.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.183.dist-info → auto_coder-0.1.184.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.183.dist-info → auto_coder-0.1.184.dist-info}/top_level.txt +0 -0
autocoder/rag/utils.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from autocoder.common import SourceCode
|
|
2
|
+
from autocoder.rag.token_counter import count_tokens_worker, count_tokens
|
|
3
|
+
from autocoder.rag.loaders.pdf_loader import extract_text_from_pdf
|
|
4
|
+
from autocoder.rag.loaders.docx_loader import extract_text_from_docx
|
|
5
|
+
from autocoder.rag.loaders.excel_loader import extract_text_from_excel
|
|
6
|
+
from autocoder.rag.loaders.ppt_loader import extract_text_from_ppt
|
|
7
|
+
from typing import List, Tuple
|
|
8
|
+
import time
|
|
9
|
+
from loguru import logger
|
|
10
|
+
import traceback
|
|
11
|
+
|
|
12
|
+
def process_file_in_multi_process(
|
|
13
|
+
file_info: Tuple[str, str, float]
|
|
14
|
+
) -> List[SourceCode]:
|
|
15
|
+
start_time = time.time()
|
|
16
|
+
file_path, relative_path, _ = file_info
|
|
17
|
+
try:
|
|
18
|
+
if file_path.endswith(".pdf"):
|
|
19
|
+
with open(file_path, "rb") as f:
|
|
20
|
+
content = extract_text_from_pdf(f.read())
|
|
21
|
+
v = [
|
|
22
|
+
SourceCode(
|
|
23
|
+
module_name=file_path,
|
|
24
|
+
source_code=content,
|
|
25
|
+
tokens=count_tokens_worker(content),
|
|
26
|
+
)
|
|
27
|
+
]
|
|
28
|
+
elif file_path.endswith(".docx"):
|
|
29
|
+
with open(file_path, "rb") as f:
|
|
30
|
+
content = extract_text_from_docx(f.read())
|
|
31
|
+
v = [
|
|
32
|
+
SourceCode(
|
|
33
|
+
module_name=f"##File: {file_path}",
|
|
34
|
+
source_code=content,
|
|
35
|
+
tokens=count_tokens_worker(content),
|
|
36
|
+
)
|
|
37
|
+
]
|
|
38
|
+
elif file_path.endswith(".xlsx") or file_path.endswith(".xls"):
|
|
39
|
+
sheets = extract_text_from_excel(file_path)
|
|
40
|
+
v = [
|
|
41
|
+
SourceCode(
|
|
42
|
+
module_name=f"##File: {file_path}#{sheet[0]}",
|
|
43
|
+
source_code=sheet[1],
|
|
44
|
+
tokens=count_tokens_worker(sheet[1]),
|
|
45
|
+
)
|
|
46
|
+
for sheet in sheets
|
|
47
|
+
]
|
|
48
|
+
elif file_path.endswith(".pptx"):
|
|
49
|
+
slides = extract_text_from_ppt(file_path)
|
|
50
|
+
content = "".join(f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
|
|
51
|
+
v = [
|
|
52
|
+
SourceCode(
|
|
53
|
+
module_name=f"##File: {file_path}",
|
|
54
|
+
source_code=content,
|
|
55
|
+
tokens=count_tokens_worker(content),
|
|
56
|
+
)
|
|
57
|
+
]
|
|
58
|
+
else:
|
|
59
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
60
|
+
content = f.read()
|
|
61
|
+
v = [
|
|
62
|
+
SourceCode(
|
|
63
|
+
module_name=f"##File: {file_path}",
|
|
64
|
+
source_code=content,
|
|
65
|
+
tokens=count_tokens_worker(content),
|
|
66
|
+
)
|
|
67
|
+
]
|
|
68
|
+
logger.info(f"Load file {file_path} in {time.time() - start_time}")
|
|
69
|
+
return v
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.error(f"Error processing file {file_path}: {str(e)}")
|
|
72
|
+
return []
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def process_file_local(file_path: str) -> List[SourceCode]:
|
|
76
|
+
start_time = time.time()
|
|
77
|
+
try:
|
|
78
|
+
if file_path.endswith(".pdf"):
|
|
79
|
+
with open(file_path, "rb") as f:
|
|
80
|
+
content = extract_text_from_pdf(f.read())
|
|
81
|
+
v = [
|
|
82
|
+
SourceCode(
|
|
83
|
+
module_name=file_path,
|
|
84
|
+
source_code=content,
|
|
85
|
+
tokens=count_tokens(content),
|
|
86
|
+
)
|
|
87
|
+
]
|
|
88
|
+
elif file_path.endswith(".docx"):
|
|
89
|
+
with open(file_path, "rb") as f:
|
|
90
|
+
content = extract_text_from_docx(f.read())
|
|
91
|
+
v = [
|
|
92
|
+
SourceCode(
|
|
93
|
+
module_name=f"##File: {file_path}",
|
|
94
|
+
source_code=content,
|
|
95
|
+
tokens=count_tokens(content),
|
|
96
|
+
)
|
|
97
|
+
]
|
|
98
|
+
elif file_path.endswith(".xlsx") or file_path.endswith(".xls"):
|
|
99
|
+
sheets = extract_text_from_excel(file_path)
|
|
100
|
+
v = [
|
|
101
|
+
SourceCode(
|
|
102
|
+
module_name=f"##File: {file_path}#{sheet[0]}",
|
|
103
|
+
source_code=sheet[1],
|
|
104
|
+
tokens=count_tokens(sheet[1]),
|
|
105
|
+
)
|
|
106
|
+
for sheet in sheets
|
|
107
|
+
]
|
|
108
|
+
elif file_path.endswith(".pptx"):
|
|
109
|
+
slides = extract_text_from_ppt(file_path)
|
|
110
|
+
content = "".join(f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
|
|
111
|
+
v = [
|
|
112
|
+
SourceCode(
|
|
113
|
+
module_name=f"##File: {file_path}",
|
|
114
|
+
source_code=content,
|
|
115
|
+
tokens=count_tokens(content),
|
|
116
|
+
)
|
|
117
|
+
]
|
|
118
|
+
else:
|
|
119
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
120
|
+
content = f.read()
|
|
121
|
+
v = [
|
|
122
|
+
SourceCode(
|
|
123
|
+
module_name=f"##File: {file_path}",
|
|
124
|
+
source_code=content,
|
|
125
|
+
tokens=count_tokens(content),
|
|
126
|
+
)
|
|
127
|
+
]
|
|
128
|
+
logger.info(f"Load file {file_path} in {time.time() - start_time}")
|
|
129
|
+
return v
|
|
130
|
+
except Exception as e:
|
|
131
|
+
logger.error(f"Error processing file {file_path}: {str(e)}")
|
|
132
|
+
traceback.print_exc()
|
|
133
|
+
return []
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.184"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|