PraisonAI 0.0.57__tar.gz → 0.0.59__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PraisonAI might be problematic. Click here for more details.
- {praisonai-0.0.57 → praisonai-0.0.59}/PKG-INFO +2 -1
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/cli.py +86 -1
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/deploy.py +1 -1
- praisonai-0.0.59/praisonai/inc/config.py +96 -0
- praisonai-0.0.59/praisonai/setup/__init__.py +0 -0
- praisonai-0.0.59/praisonai/setup/build.py +21 -0
- praisonai-0.0.59/praisonai/setup/config.yaml +60 -0
- praisonai-0.0.59/praisonai/setup/post_install.py +20 -0
- praisonai-0.0.59/praisonai/setup/setup_conda_env.py +25 -0
- praisonai-0.0.59/praisonai/setup/setup_conda_env.sh +72 -0
- praisonai-0.0.59/praisonai/train.py +276 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/context.py +87 -51
- {praisonai-0.0.57 → praisonai-0.0.59}/pyproject.toml +15 -3
- {praisonai-0.0.57 → praisonai-0.0.59}/LICENSE +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/README.md +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/__init__.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/__main__.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/agents_generator.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/auto.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/chainlit_ui.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/inbuilt_tools/__init__.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/inbuilt_tools/autogen_tools.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/inc/__init__.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/inc/models.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/android-chrome-192x192.png +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/android-chrome-512x512.png +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/apple-touch-icon.png +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/fantasy.svg +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/favicon-16x16.png +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/favicon-32x32.png +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/favicon.ico +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/game.svg +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/logo_dark.png +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/logo_light.png +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/movie.svg +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/thriller.svg +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/test.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/chat.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/code.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/fantasy.svg +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/game.svg +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/logo_dark.png +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/logo_light.png +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/movie.svg +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/thriller.svg +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/sql_alchemy.py +0 -0
- {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: PraisonAI
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.59
|
|
4
4
|
Summary: PraisonAI application combines AutoGen and CrewAI or similar frameworks into a low-code solution for building and managing multi-agent LLM systems, focusing on simplicity, customization, and efficient human-agent collaboration.
|
|
5
5
|
Author: Mervin Praison
|
|
6
6
|
Requires-Python: >=3.10,<3.13
|
|
@@ -17,6 +17,7 @@ Provides-Extra: cohere
|
|
|
17
17
|
Provides-Extra: google
|
|
18
18
|
Provides-Extra: gradio
|
|
19
19
|
Provides-Extra: openai
|
|
20
|
+
Provides-Extra: train
|
|
20
21
|
Provides-Extra: ui
|
|
21
22
|
Requires-Dist: agentops (>=0.2.6) ; extra == "agentops"
|
|
22
23
|
Requires-Dist: aiosqlite (>=0.20.0) ; extra == "chat" or extra == "code"
|
|
@@ -12,7 +12,9 @@ import argparse
|
|
|
12
12
|
from .auto import AutoGenerator
|
|
13
13
|
from .agents_generator import AgentsGenerator
|
|
14
14
|
from .inbuilt_tools import *
|
|
15
|
+
from .inc.config import generate_config
|
|
15
16
|
import shutil
|
|
17
|
+
import subprocess
|
|
16
18
|
import logging
|
|
17
19
|
logging.basicConfig(level=os.environ.get('LOGLEVEL', 'INFO'), format='%(asctime)s - %(levelname)s - %(message)s')
|
|
18
20
|
|
|
@@ -27,6 +29,34 @@ try:
|
|
|
27
29
|
GRADIO_AVAILABLE = True
|
|
28
30
|
except ImportError:
|
|
29
31
|
GRADIO_AVAILABLE = False
|
|
32
|
+
|
|
33
|
+
def stream_subprocess(command, env=None):
|
|
34
|
+
"""
|
|
35
|
+
Execute a subprocess command and stream the output to the terminal in real-time.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
command (list): A list containing the command and its arguments.
|
|
39
|
+
env (dict, optional): Environment variables for the subprocess.
|
|
40
|
+
"""
|
|
41
|
+
process = subprocess.Popen(
|
|
42
|
+
command,
|
|
43
|
+
stdout=subprocess.PIPE,
|
|
44
|
+
stderr=subprocess.STDOUT,
|
|
45
|
+
text=True,
|
|
46
|
+
bufsize=1,
|
|
47
|
+
universal_newlines=True,
|
|
48
|
+
env=env
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
for line in iter(process.stdout.readline, ''):
|
|
52
|
+
print(line, end='')
|
|
53
|
+
sys.stdout.flush() # Ensure output is flushed immediately
|
|
54
|
+
|
|
55
|
+
process.stdout.close()
|
|
56
|
+
return_code = process.wait()
|
|
57
|
+
|
|
58
|
+
if return_code != 0:
|
|
59
|
+
raise subprocess.CalledProcessError(return_code, command)
|
|
30
60
|
|
|
31
61
|
class PraisonAI:
|
|
32
62
|
def __init__(self, agent_file="agents.yaml", framework="", auto=False, init=False, agent_yaml=None):
|
|
@@ -99,6 +129,57 @@ class PraisonAI:
|
|
|
99
129
|
self.create_code_interface()
|
|
100
130
|
return
|
|
101
131
|
|
|
132
|
+
if args.agent_file == 'train':
|
|
133
|
+
package_root = os.path.dirname(os.path.abspath(__file__))
|
|
134
|
+
config_yaml_destination = os.path.join(os.getcwd(), 'config.yaml')
|
|
135
|
+
|
|
136
|
+
# Create config.yaml only if it doesn't exist or --model or --dataset is provided
|
|
137
|
+
if not os.path.exists(config_yaml_destination) or args.model or args.dataset:
|
|
138
|
+
config = generate_config(
|
|
139
|
+
model_name=args.model,
|
|
140
|
+
hf_model_name=args.hf,
|
|
141
|
+
ollama_model_name=args.ollama,
|
|
142
|
+
dataset=[{
|
|
143
|
+
"name": args.dataset
|
|
144
|
+
}]
|
|
145
|
+
)
|
|
146
|
+
with open('config.yaml', 'w') as f:
|
|
147
|
+
yaml.dump(config, f, default_flow_style=False, indent=2)
|
|
148
|
+
|
|
149
|
+
# Overwrite huggingface_save and ollama_save if --hf or --ollama are provided
|
|
150
|
+
if args.hf:
|
|
151
|
+
config["huggingface_save"] = "true"
|
|
152
|
+
if args.ollama:
|
|
153
|
+
config["ollama_save"] = "true"
|
|
154
|
+
|
|
155
|
+
if 'init' in sys.argv:
|
|
156
|
+
from praisonai.setup.setup_conda_env import main as setup_conda_main
|
|
157
|
+
setup_conda_main()
|
|
158
|
+
print("All packages installed")
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
result = subprocess.check_output(['conda', 'env', 'list'])
|
|
163
|
+
if 'praison_env' in result.decode('utf-8'):
|
|
164
|
+
print("Conda environment 'praison_env' found.")
|
|
165
|
+
else:
|
|
166
|
+
raise subprocess.CalledProcessError(1, 'grep')
|
|
167
|
+
except subprocess.CalledProcessError:
|
|
168
|
+
print("Conda environment 'praison_env' not found. Setting it up...")
|
|
169
|
+
from praisonai.setup.setup_conda_env import main as setup_conda_main
|
|
170
|
+
setup_conda_main()
|
|
171
|
+
print("All packages installed.")
|
|
172
|
+
|
|
173
|
+
train_args = sys.argv[2:] # Get all arguments after 'train'
|
|
174
|
+
train_script_path = os.path.join(package_root, 'train.py')
|
|
175
|
+
|
|
176
|
+
# Set environment variables
|
|
177
|
+
env = os.environ.copy()
|
|
178
|
+
env['PYTHONUNBUFFERED'] = '1'
|
|
179
|
+
|
|
180
|
+
stream_subprocess(['conda', 'run', '--no-capture-output', '--name', 'praison_env', 'python', '-u', train_script_path, 'train'] + train_args, env=env)
|
|
181
|
+
return
|
|
182
|
+
|
|
102
183
|
invocation_cmd = "praisonai"
|
|
103
184
|
version_string = f"PraisonAI version {__version__}"
|
|
104
185
|
|
|
@@ -169,7 +250,11 @@ class PraisonAI:
|
|
|
169
250
|
parser.add_argument("--auto", nargs=argparse.REMAINDER, help="Enable auto mode and pass arguments for it")
|
|
170
251
|
parser.add_argument("--init", nargs=argparse.REMAINDER, help="Enable auto mode and pass arguments for it")
|
|
171
252
|
parser.add_argument("agent_file", nargs="?", help="Specify the agent file")
|
|
172
|
-
parser.add_argument("--deploy", action="store_true", help="Deploy the application")
|
|
253
|
+
parser.add_argument("--deploy", action="store_true", help="Deploy the application")
|
|
254
|
+
parser.add_argument("--model", type=str, help="Model name")
|
|
255
|
+
parser.add_argument("--hf", type=str, help="Hugging Face model name")
|
|
256
|
+
parser.add_argument("--ollama", type=str, help="Ollama model name")
|
|
257
|
+
parser.add_argument("--dataset", type=str, help="Dataset name for training", default="yahma/alpaca-cleaned")
|
|
173
258
|
args, unknown_args = parser.parse_known_args()
|
|
174
259
|
|
|
175
260
|
if unknown_args and unknown_args[0] == '-b' and unknown_args[1] == 'api:app':
|
|
@@ -56,7 +56,7 @@ class CloudDeployer:
|
|
|
56
56
|
file.write("FROM python:3.11-slim\n")
|
|
57
57
|
file.write("WORKDIR /app\n")
|
|
58
58
|
file.write("COPY . .\n")
|
|
59
|
-
file.write("RUN pip install flask praisonai==0.0.
|
|
59
|
+
file.write("RUN pip install flask praisonai==0.0.59 gunicorn markdown\n")
|
|
60
60
|
file.write("EXPOSE 8080\n")
|
|
61
61
|
file.write('CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]\n')
|
|
62
62
|
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
def generate_config(
|
|
2
|
+
ollama_save=None,
|
|
3
|
+
huggingface_save=None,
|
|
4
|
+
train=None,
|
|
5
|
+
model_name=None,
|
|
6
|
+
hf_model_name=None,
|
|
7
|
+
ollama_model_name=None,
|
|
8
|
+
model_parameters=None,
|
|
9
|
+
max_seq_length=None,
|
|
10
|
+
load_in_4bit=None,
|
|
11
|
+
lora_r=None,
|
|
12
|
+
lora_target_modules=None,
|
|
13
|
+
lora_alpha=None,
|
|
14
|
+
lora_dropout=None,
|
|
15
|
+
lora_bias=None,
|
|
16
|
+
use_gradient_checkpointing=None,
|
|
17
|
+
random_state=None,
|
|
18
|
+
use_rslora=None,
|
|
19
|
+
loftq_config=None,
|
|
20
|
+
dataset=None,
|
|
21
|
+
dataset_text_field=None,
|
|
22
|
+
dataset_num_proc=None,
|
|
23
|
+
packing=None,
|
|
24
|
+
per_device_train_batch_size=None,
|
|
25
|
+
gradient_accumulation_steps=None,
|
|
26
|
+
warmup_steps=None,
|
|
27
|
+
num_train_epochs=None,
|
|
28
|
+
max_steps=None,
|
|
29
|
+
learning_rate=None,
|
|
30
|
+
logging_steps=None,
|
|
31
|
+
optim=None,
|
|
32
|
+
weight_decay=None,
|
|
33
|
+
lr_scheduler_type=None,
|
|
34
|
+
seed=None,
|
|
35
|
+
output_dir=None,
|
|
36
|
+
quantization_method=None
|
|
37
|
+
):
|
|
38
|
+
"""Generates the configuration for PraisonAI with dynamic overrides."""
|
|
39
|
+
|
|
40
|
+
config = {
|
|
41
|
+
"ollama_save": ollama_save or "true",
|
|
42
|
+
"huggingface_save": huggingface_save or "true",
|
|
43
|
+
"train": train or "true",
|
|
44
|
+
|
|
45
|
+
"model_name": model_name or "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
|
|
46
|
+
"hf_model_name": hf_model_name or "mervinpraison/llama-3.1-tamilan-8B-test",
|
|
47
|
+
"ollama_model": ollama_model_name or "mervinpraison/llama3.1-tamilan-test",
|
|
48
|
+
"model_parameters": model_parameters or "8b",
|
|
49
|
+
|
|
50
|
+
"dataset": dataset or [
|
|
51
|
+
{
|
|
52
|
+
"name": "yahma/alpaca-cleaned",
|
|
53
|
+
"split_type": "train",
|
|
54
|
+
"processing_func": "format_prompts",
|
|
55
|
+
"rename": {"input": "input", "output": "output", "instruction": "instruction"},
|
|
56
|
+
"filter_data": False,
|
|
57
|
+
"filter_column_value": "id",
|
|
58
|
+
"filter_value": "alpaca",
|
|
59
|
+
"num_samples": 20000
|
|
60
|
+
}
|
|
61
|
+
],
|
|
62
|
+
|
|
63
|
+
"dataset_text_field": dataset_text_field or "text",
|
|
64
|
+
"dataset_num_proc": dataset_num_proc or 2,
|
|
65
|
+
"packing": packing or False,
|
|
66
|
+
|
|
67
|
+
"max_seq_length": max_seq_length or 2048,
|
|
68
|
+
"load_in_4bit": load_in_4bit or True,
|
|
69
|
+
"lora_r": lora_r or 16,
|
|
70
|
+
"lora_target_modules": lora_target_modules or [
|
|
71
|
+
"q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"
|
|
72
|
+
],
|
|
73
|
+
"lora_alpha": lora_alpha or 16,
|
|
74
|
+
"lora_dropout": lora_dropout or 0,
|
|
75
|
+
"lora_bias": lora_bias or "none",
|
|
76
|
+
"use_gradient_checkpointing": use_gradient_checkpointing or "unsloth",
|
|
77
|
+
"random_state": random_state or 3407,
|
|
78
|
+
"use_rslora": use_rslora or False,
|
|
79
|
+
"loftq_config": loftq_config or None,
|
|
80
|
+
|
|
81
|
+
"per_device_train_batch_size": per_device_train_batch_size or 2,
|
|
82
|
+
"gradient_accumulation_steps": gradient_accumulation_steps or 2,
|
|
83
|
+
"warmup_steps": warmup_steps or 5,
|
|
84
|
+
"num_train_epochs": num_train_epochs or 1,
|
|
85
|
+
"max_steps": max_steps or 10,
|
|
86
|
+
"learning_rate": learning_rate or 2.0e-4,
|
|
87
|
+
"logging_steps": logging_steps or 1,
|
|
88
|
+
"optim": optim or "adamw_8bit",
|
|
89
|
+
"weight_decay": weight_decay or 0.01,
|
|
90
|
+
"lr_scheduler_type": lr_scheduler_type or "linear",
|
|
91
|
+
"seed": seed or 3407,
|
|
92
|
+
"output_dir": output_dir or "outputs",
|
|
93
|
+
|
|
94
|
+
"quantization_method": quantization_method or ["q4_k_m"]
|
|
95
|
+
}
|
|
96
|
+
return config
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import sys
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
def build(setup_kwargs):
|
|
6
|
+
try:
|
|
7
|
+
# Get the directory of the current script
|
|
8
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
9
|
+
|
|
10
|
+
# Construct the path to post_install.py
|
|
11
|
+
post_install_script = os.path.join(script_dir, 'post_install.py')
|
|
12
|
+
|
|
13
|
+
# Run the post_install.py script
|
|
14
|
+
subprocess.check_call([sys.executable, post_install_script])
|
|
15
|
+
except subprocess.CalledProcessError as e:
|
|
16
|
+
print(f"Error occurred while running the post-install script: {e}")
|
|
17
|
+
sys.exit(1)
|
|
18
|
+
return setup_kwargs
|
|
19
|
+
|
|
20
|
+
if __name__ == "__main__":
|
|
21
|
+
build({})
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
ollama_save: "true"
|
|
2
|
+
huggingface_save: "true"
|
|
3
|
+
train: "true"
|
|
4
|
+
|
|
5
|
+
model_name: "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
|
|
6
|
+
hf_model_name: "mervinpraison/llama-3.1-tamilan-8B-test"
|
|
7
|
+
ollama_model: "mervinpraison/llama3.1-tamilan-test"
|
|
8
|
+
model_parameters: "8b"
|
|
9
|
+
|
|
10
|
+
max_seq_length: 2048
|
|
11
|
+
load_in_4bit: true
|
|
12
|
+
lora_r: 16
|
|
13
|
+
lora_target_modules:
|
|
14
|
+
- "q_proj"
|
|
15
|
+
- "k_proj"
|
|
16
|
+
- "v_proj"
|
|
17
|
+
- "o_proj"
|
|
18
|
+
- "gate_proj"
|
|
19
|
+
- "up_proj"
|
|
20
|
+
- "down_proj"
|
|
21
|
+
lora_alpha: 16
|
|
22
|
+
lora_dropout: 0
|
|
23
|
+
lora_bias: "none"
|
|
24
|
+
use_gradient_checkpointing: "unsloth"
|
|
25
|
+
random_state: 3407
|
|
26
|
+
use_rslora: false
|
|
27
|
+
loftq_config: null
|
|
28
|
+
|
|
29
|
+
dataset:
|
|
30
|
+
- name: "yahma/alpaca-cleaned"
|
|
31
|
+
split_type: "train"
|
|
32
|
+
processing_func: "format_prompts"
|
|
33
|
+
rename:
|
|
34
|
+
input: "input"
|
|
35
|
+
output: "output"
|
|
36
|
+
instruction: "instruction"
|
|
37
|
+
filter_data: false
|
|
38
|
+
filter_column_value: "id"
|
|
39
|
+
filter_value: "alpaca"
|
|
40
|
+
num_samples: 20000
|
|
41
|
+
|
|
42
|
+
dataset_text_field: "text"
|
|
43
|
+
dataset_num_proc: 2
|
|
44
|
+
packing: false
|
|
45
|
+
|
|
46
|
+
per_device_train_batch_size: 2
|
|
47
|
+
gradient_accumulation_steps: 2
|
|
48
|
+
warmup_steps: 5
|
|
49
|
+
num_train_epochs: 1
|
|
50
|
+
max_steps: 10
|
|
51
|
+
learning_rate: 2.0e-4
|
|
52
|
+
logging_steps: 1
|
|
53
|
+
optim: "adamw_8bit"
|
|
54
|
+
weight_decay: 0.01
|
|
55
|
+
lr_scheduler_type: "linear"
|
|
56
|
+
seed: 3407
|
|
57
|
+
output_dir: "outputs"
|
|
58
|
+
|
|
59
|
+
quantization_method:
|
|
60
|
+
- "q4_k_m"
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import sys
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
def main():
|
|
6
|
+
try:
|
|
7
|
+
# Get the absolute path of the current file
|
|
8
|
+
current_file = os.path.abspath(__file__)
|
|
9
|
+
|
|
10
|
+
# Get the directory of the current file
|
|
11
|
+
script_dir = os.path.dirname(current_file)
|
|
12
|
+
|
|
13
|
+
# Construct the path to setup_conda_env.py
|
|
14
|
+
setup_script = os.path.join(script_dir, 'setup_conda_env.py')
|
|
15
|
+
except subprocess.CalledProcessError as e:
|
|
16
|
+
print(f"Error occurred while running the setup script: {e}")
|
|
17
|
+
sys.exit(1)
|
|
18
|
+
|
|
19
|
+
if __name__ == "__main__":
|
|
20
|
+
main()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import platform
|
|
5
|
+
|
|
6
|
+
def main():
|
|
7
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
8
|
+
script_path = os.path.join(script_dir, 'setup_conda_env.sh')
|
|
9
|
+
|
|
10
|
+
if platform.system() == 'Windows':
|
|
11
|
+
print("Windows detected. Please run the setup_conda_env.sh script manually in Git Bash or WSL.")
|
|
12
|
+
print(f"Script location: {script_path}")
|
|
13
|
+
sys.exit(1)
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
subprocess.check_call(['bash', script_path])
|
|
17
|
+
except subprocess.CalledProcessError as e:
|
|
18
|
+
print(f"Error occurred while running the setup script: {e}")
|
|
19
|
+
print("Setup failed. Please check the error message above and try to resolve the issue.")
|
|
20
|
+
sys.exit(1)
|
|
21
|
+
|
|
22
|
+
print("Conda environment setup completed successfully!")
|
|
23
|
+
|
|
24
|
+
if __name__ == "__main__":
|
|
25
|
+
main()
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Detect OS and architecture
|
|
4
|
+
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
5
|
+
# macOS
|
|
6
|
+
if [[ $(uname -m) == 'arm64' ]]; then
|
|
7
|
+
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh"
|
|
8
|
+
else
|
|
9
|
+
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh"
|
|
10
|
+
fi
|
|
11
|
+
elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
|
|
12
|
+
# Linux
|
|
13
|
+
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"
|
|
14
|
+
elif [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then
|
|
15
|
+
# Windows
|
|
16
|
+
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe"
|
|
17
|
+
echo "Windows detected. Please run this script in Git Bash or WSL."
|
|
18
|
+
exit 1
|
|
19
|
+
else
|
|
20
|
+
echo "Unsupported operating system: $OSTYPE"
|
|
21
|
+
exit 1
|
|
22
|
+
fi
|
|
23
|
+
|
|
24
|
+
# Check if conda is already installed
|
|
25
|
+
if ! command -v conda &> /dev/null; then
|
|
26
|
+
echo "Conda is not installed. Installing Miniconda..."
|
|
27
|
+
wget $MINICONDA_URL -O ~/miniconda.sh
|
|
28
|
+
bash ~/miniconda.sh -b -p $HOME/miniconda
|
|
29
|
+
source $HOME/miniconda/bin/activate
|
|
30
|
+
conda init
|
|
31
|
+
else
|
|
32
|
+
echo "Conda is already installed."
|
|
33
|
+
fi
|
|
34
|
+
|
|
35
|
+
# Create and activate the Conda environment
|
|
36
|
+
ENV_NAME="praison_env"
|
|
37
|
+
if conda info --envs | grep -q $ENV_NAME; then
|
|
38
|
+
echo "Environment $ENV_NAME already exists. Recreating..."
|
|
39
|
+
conda env remove -y -n $ENV_NAME # Remove existing environment
|
|
40
|
+
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
41
|
+
# macOS (both Intel and M1/M2)
|
|
42
|
+
conda create --name $ENV_NAME python=3.10 pytorch=2.3.0 -c pytorch -y
|
|
43
|
+
elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
|
|
44
|
+
# Linux
|
|
45
|
+
conda create --name $ENV_NAME python=3.10 pytorch=2.3.0 cudatoolkit=11.8 -c pytorch -c nvidia -y
|
|
46
|
+
fi
|
|
47
|
+
# conda activate $ENV_NAME
|
|
48
|
+
else
|
|
49
|
+
echo "Creating new environment $ENV_NAME..."
|
|
50
|
+
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
51
|
+
# macOS (both Intel and M1/M2)
|
|
52
|
+
conda create --name $ENV_NAME python=3.10 pytorch=2.3.0 -c pytorch -y
|
|
53
|
+
elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
|
|
54
|
+
# Linux
|
|
55
|
+
conda create --name $ENV_NAME python=3.10 pytorch=2.3.0 cudatoolkit=11.8 -c pytorch -c nvidia -y
|
|
56
|
+
fi
|
|
57
|
+
# conda activate $ENV_NAME
|
|
58
|
+
fi
|
|
59
|
+
|
|
60
|
+
# source $HOME/miniconda/bin/activate $ENV_NAME
|
|
61
|
+
|
|
62
|
+
# Get full path of pip
|
|
63
|
+
PIP_FULL_PATH=$(conda run -n $ENV_NAME which pip)
|
|
64
|
+
|
|
65
|
+
# Install other packages within the activated environment
|
|
66
|
+
# Use PIP_FULL_PATH to run pip commands
|
|
67
|
+
$PIP_FULL_PATH install --upgrade pip
|
|
68
|
+
$PIP_FULL_PATH install "xformers==0.0.26.post1"
|
|
69
|
+
$PIP_FULL_PATH install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@4e570be9ae4ced8cdc64e498125708e34942befc"
|
|
70
|
+
$PIP_FULL_PATH install --no-deps "trl<0.9.0" peft accelerate bitsandbytes
|
|
71
|
+
|
|
72
|
+
echo "Setup completed successfully!"
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import yaml
|
|
5
|
+
import torch
|
|
6
|
+
import shutil
|
|
7
|
+
from transformers import TextStreamer
|
|
8
|
+
from unsloth import FastLanguageModel, is_bfloat16_supported
|
|
9
|
+
from trl import SFTTrainer
|
|
10
|
+
from transformers import TrainingArguments
|
|
11
|
+
from datasets import load_dataset, concatenate_datasets, Dataset
|
|
12
|
+
from psutil import virtual_memory
|
|
13
|
+
|
|
14
|
+
class train:
|
|
15
|
+
def __init__(self, config_path="config.yaml"):
|
|
16
|
+
self.load_config(config_path)
|
|
17
|
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
18
|
+
self.model, self.tokenizer = None, None
|
|
19
|
+
|
|
20
|
+
def load_config(self, path):
|
|
21
|
+
with open(path, "r") as file:
|
|
22
|
+
self.config = yaml.safe_load(file)
|
|
23
|
+
|
|
24
|
+
def print_system_info(self):
|
|
25
|
+
print(f"PyTorch version: {torch.__version__}")
|
|
26
|
+
print(f"CUDA version: {torch.version.cuda}")
|
|
27
|
+
if torch.cuda.is_available():
|
|
28
|
+
device_capability = torch.cuda.get_device_capability()
|
|
29
|
+
print(f"CUDA Device Capability: {device_capability}")
|
|
30
|
+
else:
|
|
31
|
+
print("CUDA is not available")
|
|
32
|
+
|
|
33
|
+
python_version = sys.version
|
|
34
|
+
pip_version = subprocess.check_output(['pip', '--version']).decode().strip()
|
|
35
|
+
python_path = sys.executable
|
|
36
|
+
pip_path = subprocess.check_output(['which', 'pip']).decode().strip()
|
|
37
|
+
print(f"Python Version: {python_version}")
|
|
38
|
+
print(f"Pip Version: {pip_version}")
|
|
39
|
+
print(f"Python Path: {python_path}")
|
|
40
|
+
print(f"Pip Path: {pip_path}")
|
|
41
|
+
|
|
42
|
+
def check_gpu(self):
|
|
43
|
+
gpu_stats = torch.cuda.get_device_properties(0)
|
|
44
|
+
print(f"GPU = {gpu_stats.name}. Max memory = {round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)} GB.")
|
|
45
|
+
|
|
46
|
+
def check_ram(self):
|
|
47
|
+
ram_gb = virtual_memory().total / 1e9
|
|
48
|
+
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
|
|
49
|
+
if ram_gb < 20:
|
|
50
|
+
print('Not using a high-RAM runtime')
|
|
51
|
+
else:
|
|
52
|
+
print('You are using a high-RAM runtime!')
|
|
53
|
+
|
|
54
|
+
# def install_packages(self):
|
|
55
|
+
# subprocess.run(["pip", "install", "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@4e570be9ae4ced8cdc64e498125708e34942befc"])
|
|
56
|
+
# subprocess.run(["pip", "install", "--no-deps", "trl<0.9.0", "peft==0.12.0", "accelerate==0.33.0", "bitsandbytes==0.43.3"])
|
|
57
|
+
|
|
58
|
+
def prepare_model(self):
|
|
59
|
+
self.model, self.tokenizer = FastLanguageModel.from_pretrained(
|
|
60
|
+
model_name=self.config["model_name"],
|
|
61
|
+
max_seq_length=self.config["max_seq_length"],
|
|
62
|
+
dtype=None,
|
|
63
|
+
load_in_4bit=self.config["load_in_4bit"]
|
|
64
|
+
)
|
|
65
|
+
self.model = FastLanguageModel.get_peft_model(
|
|
66
|
+
self.model,
|
|
67
|
+
r=self.config["lora_r"],
|
|
68
|
+
target_modules=self.config["lora_target_modules"],
|
|
69
|
+
lora_alpha=self.config["lora_alpha"],
|
|
70
|
+
lora_dropout=self.config["lora_dropout"],
|
|
71
|
+
bias=self.config["lora_bias"],
|
|
72
|
+
use_gradient_checkpointing=self.config["use_gradient_checkpointing"],
|
|
73
|
+
random_state=self.config["random_state"],
|
|
74
|
+
use_rslora=self.config["use_rslora"],
|
|
75
|
+
loftq_config=self.config["loftq_config"],
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def process_dataset(self, dataset_info):
|
|
79
|
+
dataset_name = dataset_info["name"]
|
|
80
|
+
split_type = dataset_info.get("split_type", "train")
|
|
81
|
+
processing_func = getattr(self, dataset_info.get("processing_func", "format_prompts"))
|
|
82
|
+
rename = dataset_info.get("rename", {})
|
|
83
|
+
filter_data = dataset_info.get("filter_data", False)
|
|
84
|
+
filter_column_value = dataset_info.get("filter_column_value", "id")
|
|
85
|
+
filter_value = dataset_info.get("filter_value", "alpaca")
|
|
86
|
+
num_samples = dataset_info.get("num_samples", 20000)
|
|
87
|
+
|
|
88
|
+
dataset = load_dataset(dataset_name, split=split_type)
|
|
89
|
+
|
|
90
|
+
if rename:
|
|
91
|
+
dataset = dataset.rename_columns(rename)
|
|
92
|
+
if filter_data:
|
|
93
|
+
dataset = dataset.filter(lambda example: filter_value in example[filter_column_value]).shuffle(seed=42).select(range(num_samples))
|
|
94
|
+
dataset = dataset.map(processing_func, batched=True)
|
|
95
|
+
return dataset
|
|
96
|
+
|
|
97
|
+
def format_prompts(self, examples):
|
|
98
|
+
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
|
99
|
+
|
|
100
|
+
### Instruction:
|
|
101
|
+
{}
|
|
102
|
+
|
|
103
|
+
### Input:
|
|
104
|
+
{}
|
|
105
|
+
|
|
106
|
+
### Response:
|
|
107
|
+
{}"""
|
|
108
|
+
texts = [alpaca_prompt.format(ins, inp, out) + self.tokenizer.eos_token for ins, inp, out in zip(examples["instruction"], examples["input"], examples["output"])]
|
|
109
|
+
return {"text": texts}
|
|
110
|
+
|
|
111
|
+
def load_datasets(self):
|
|
112
|
+
datasets = []
|
|
113
|
+
for dataset_info in self.config["dataset"]:
|
|
114
|
+
datasets.append(self.process_dataset(dataset_info))
|
|
115
|
+
return concatenate_datasets(datasets)
|
|
116
|
+
|
|
117
|
+
def train_model(self):
|
|
118
|
+
dataset = self.load_datasets()
|
|
119
|
+
trainer = SFTTrainer(
|
|
120
|
+
model=self.model,
|
|
121
|
+
tokenizer=self.tokenizer,
|
|
122
|
+
train_dataset=dataset,
|
|
123
|
+
dataset_text_field=self.config["dataset_text_field"],
|
|
124
|
+
max_seq_length=self.config["max_seq_length"],
|
|
125
|
+
dataset_num_proc=self.config["dataset_num_proc"],
|
|
126
|
+
packing=self.config["packing"],
|
|
127
|
+
args=TrainingArguments(
|
|
128
|
+
per_device_train_batch_size=self.config["per_device_train_batch_size"],
|
|
129
|
+
gradient_accumulation_steps=self.config["gradient_accumulation_steps"],
|
|
130
|
+
warmup_steps=self.config["warmup_steps"],
|
|
131
|
+
num_train_epochs=self.config["num_train_epochs"],
|
|
132
|
+
max_steps=self.config["max_steps"],
|
|
133
|
+
learning_rate=self.config["learning_rate"],
|
|
134
|
+
fp16=not is_bfloat16_supported(),
|
|
135
|
+
bf16=is_bfloat16_supported(),
|
|
136
|
+
logging_steps=self.config["logging_steps"],
|
|
137
|
+
optim=self.config["optim"],
|
|
138
|
+
weight_decay=self.config["weight_decay"],
|
|
139
|
+
lr_scheduler_type=self.config["lr_scheduler_type"],
|
|
140
|
+
seed=self.config["seed"],
|
|
141
|
+
output_dir=self.config["output_dir"],
|
|
142
|
+
),
|
|
143
|
+
)
|
|
144
|
+
trainer.train()
|
|
145
|
+
self.model.save_pretrained("lora_model") # Local saving
|
|
146
|
+
self.tokenizer.save_pretrained("lora_model")
|
|
147
|
+
|
|
148
|
+
def inference(self, instruction, input_text):
|
|
149
|
+
FastLanguageModel.for_inference(self.model)
|
|
150
|
+
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
|
151
|
+
|
|
152
|
+
### Instruction:
|
|
153
|
+
{}
|
|
154
|
+
|
|
155
|
+
### Input:
|
|
156
|
+
{}
|
|
157
|
+
|
|
158
|
+
### Response:
|
|
159
|
+
{}"""
|
|
160
|
+
inputs = self.tokenizer([alpaca_prompt.format(instruction, input_text, "")], return_tensors="pt").to("cuda")
|
|
161
|
+
outputs = self.model.generate(**inputs, max_new_tokens=64, use_cache=True)
|
|
162
|
+
print(self.tokenizer.batch_decode(outputs))
|
|
163
|
+
|
|
164
|
+
def load_model(self):
|
|
165
|
+
"""Loads the model and tokenizer using the FastLanguageModel library."""
|
|
166
|
+
from unsloth import FastLanguageModel
|
|
167
|
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
168
|
+
model_name=self.config["output_dir"],
|
|
169
|
+
max_seq_length=2048,
|
|
170
|
+
dtype=None,
|
|
171
|
+
load_in_4bit=self.config["load_in_4bit"],
|
|
172
|
+
)
|
|
173
|
+
return model, tokenizer
|
|
174
|
+
|
|
175
|
+
def save_model_merged(self):
|
|
176
|
+
if os.path.exists(self.config["hf_model_name"]):
|
|
177
|
+
shutil.rmtree(self.config["hf_model_name"])
|
|
178
|
+
self.model.push_to_hub_merged(
|
|
179
|
+
self.config["hf_model_name"],
|
|
180
|
+
self.tokenizer,
|
|
181
|
+
save_method="merged_16bit",
|
|
182
|
+
token=os.getenv('HF_TOKEN')
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def push_model_gguf(self):
|
|
186
|
+
self.model.push_to_hub_gguf(
|
|
187
|
+
self.config["hf_model_name"],
|
|
188
|
+
self.tokenizer,
|
|
189
|
+
quantization_method=self.config["quantization_method"],
|
|
190
|
+
token=os.getenv('HF_TOKEN')
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def save_model_gguf(self):
|
|
194
|
+
self.model.save_pretrained_gguf(
|
|
195
|
+
self.config["hf_model_name"],
|
|
196
|
+
self.tokenizer,
|
|
197
|
+
quantization_method="q4_k_m"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
def prepare_modelfile_content(self):
|
|
201
|
+
output_model = self.config["hf_model_name"]
|
|
202
|
+
gguf_path = f"{output_model}/unsloth.Q4_K_M.gguf"
|
|
203
|
+
|
|
204
|
+
# Check if the GGUF file exists. If not, generate it ## TODO Multiple Quantisation other than Q4_K_M.gguf
|
|
205
|
+
if not os.path.exists(gguf_path):
|
|
206
|
+
self.model, self.tokenizer = self.load_model()
|
|
207
|
+
self.save_model_gguf()
|
|
208
|
+
return f"""FROM {output_model}/unsloth.Q4_K_M.gguf
|
|
209
|
+
|
|
210
|
+
TEMPLATE \"\"\"Below are some instructions that describe some tasks. Write responses that appropriately complete each request.{{{{ if .Prompt }}}}
|
|
211
|
+
|
|
212
|
+
### Instruction:
|
|
213
|
+
{{{{ .Prompt }}}}
|
|
214
|
+
|
|
215
|
+
{{{{ end }}}}### Response:
|
|
216
|
+
{{{{ .Response }}}}\"\"\"
|
|
217
|
+
|
|
218
|
+
PARAMETER stop ""
|
|
219
|
+
PARAMETER stop ""
|
|
220
|
+
PARAMETER stop ""
|
|
221
|
+
PARAMETER stop ""
|
|
222
|
+
PARAMETER stop "<|reserved_special_token_"
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
def create_and_push_ollama_model(self):
|
|
226
|
+
modelfile_content = self.prepare_modelfile_content()
|
|
227
|
+
with open('Modelfile', 'w') as file:
|
|
228
|
+
file.write(modelfile_content)
|
|
229
|
+
|
|
230
|
+
subprocess.run(["ollama", "serve"])
|
|
231
|
+
subprocess.run(["ollama", "create", f"{self.config['ollama_model']}:{self.config['model_parameters']}", "-f", "Modelfile"])
|
|
232
|
+
subprocess.run(["ollama", "push", f"{self.config['ollama_model']}:{self.config['model_parameters']}"])
|
|
233
|
+
|
|
234
|
+
def run(self):
|
|
235
|
+
self.print_system_info()
|
|
236
|
+
self.check_gpu()
|
|
237
|
+
self.check_ram()
|
|
238
|
+
# self.install_packages()
|
|
239
|
+
if self.config.get("train", "true").lower() == "true":
|
|
240
|
+
self.prepare_model()
|
|
241
|
+
self.train_model()
|
|
242
|
+
|
|
243
|
+
if self.config.get("huggingface_save", "true").lower() == "true":
|
|
244
|
+
self.model, self.tokenizer = self.load_model()
|
|
245
|
+
self.save_model_merged()
|
|
246
|
+
|
|
247
|
+
if self.config.get("huggingface_save_gguf", "true").lower() == "true":
|
|
248
|
+
self.model, self.tokenizer = self.load_model()
|
|
249
|
+
self.push_model_gguf()
|
|
250
|
+
|
|
251
|
+
# if self.config.get("save_gguf", "true").lower() == "true": ## TODO
|
|
252
|
+
# self.model, self.tokenizer = self.load_model()
|
|
253
|
+
# self.save_model_gguf()
|
|
254
|
+
|
|
255
|
+
# if self.config.get("save_merged", "true").lower() == "true": ## TODO
|
|
256
|
+
# self.model, self.tokenizer = self.load_model()
|
|
257
|
+
# self.save_model_merged()
|
|
258
|
+
|
|
259
|
+
if self.config.get("ollama_save", "true").lower() == "true":
|
|
260
|
+
self.create_and_push_ollama_model()
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def main():
|
|
264
|
+
import argparse
|
|
265
|
+
parser = argparse.ArgumentParser(description='PraisonAI Training Script')
|
|
266
|
+
parser.add_argument('command', choices=['train'], help='Command to execute')
|
|
267
|
+
parser.add_argument('--config', default='config.yaml', help='Path to configuration file')
|
|
268
|
+
args = parser.parse_args()
|
|
269
|
+
|
|
270
|
+
if args.command == 'train':
|
|
271
|
+
ai = train(config_path=args.config)
|
|
272
|
+
ai.run()
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
if __name__ == '__main__':
|
|
276
|
+
main()
|
|
@@ -97,17 +97,36 @@ class ContextGatherer:
|
|
|
97
97
|
return modified_ignore_patterns
|
|
98
98
|
|
|
99
99
|
def get_include_paths(self):
|
|
100
|
+
"""
|
|
101
|
+
Loads include paths from:
|
|
102
|
+
1. .praisoninclude (includes ONLY files/directories listed)
|
|
103
|
+
2. .praisoncontext (if .praisoninclude doesn't exist, this is used
|
|
104
|
+
to include all other relevant files, excluding ignore patterns)
|
|
105
|
+
"""
|
|
100
106
|
include_paths = []
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
include_file = os.path.join(self.directory, '.
|
|
107
|
+
include_all = False # Flag to indicate if we need to include all files
|
|
108
|
+
|
|
109
|
+
include_file = os.path.join(self.directory, '.praisoncontext')
|
|
104
110
|
if os.path.exists(include_file):
|
|
105
111
|
with open(include_file, 'r') as f:
|
|
106
112
|
include_paths.extend(
|
|
107
113
|
line.strip() for line in f
|
|
108
114
|
if line.strip() and not line.startswith('#')
|
|
109
115
|
)
|
|
110
|
-
|
|
116
|
+
|
|
117
|
+
# If .praisoncontext doesn't exist, fall back to .praisoninclude
|
|
118
|
+
# for including all relevant files
|
|
119
|
+
if not include_paths:
|
|
120
|
+
include_file = os.path.join(self.directory, '.praisoninclude')
|
|
121
|
+
if os.path.exists(include_file):
|
|
122
|
+
with open(include_file, 'r') as f:
|
|
123
|
+
include_paths.extend(
|
|
124
|
+
line.strip() for line in f
|
|
125
|
+
if line.strip() and not line.startswith('#')
|
|
126
|
+
)
|
|
127
|
+
include_all = True # Include all files along with specified paths
|
|
128
|
+
|
|
129
|
+
return include_paths, include_all
|
|
111
130
|
|
|
112
131
|
def should_ignore(self, file_path):
|
|
113
132
|
"""
|
|
@@ -130,61 +149,78 @@ class ContextGatherer:
|
|
|
130
149
|
any(file_path.endswith(ext) for ext in self.relevant_extensions)
|
|
131
150
|
|
|
132
151
|
def gather_context(self):
|
|
133
|
-
"""
|
|
152
|
+
"""
|
|
153
|
+
Gather context from relevant files, respecting ignore patterns
|
|
154
|
+
and include options from .praisoninclude and .praisoncontext.
|
|
155
|
+
"""
|
|
134
156
|
context = []
|
|
135
157
|
total_files = 0
|
|
136
158
|
processed_files = 0
|
|
159
|
+
self.include_paths, include_all = self.get_include_paths()
|
|
137
160
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
161
|
+
def add_file_content(file_path):
|
|
162
|
+
"""Helper function to add file content to context."""
|
|
163
|
+
try:
|
|
164
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
165
|
+
content = f.read()
|
|
166
|
+
context.append(
|
|
167
|
+
f"File: {file_path}\n\n{content}\n\n{'=' * 50}\n"
|
|
168
|
+
)
|
|
169
|
+
self.included_files.append(
|
|
170
|
+
Path(file_path).relative_to(self.directory)
|
|
171
|
+
)
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.error(f"Error reading {file_path}: {e}")
|
|
174
|
+
|
|
175
|
+
def process_path(path):
|
|
176
|
+
"""Helper function to process a single path (file or directory)."""
|
|
177
|
+
nonlocal total_files, processed_files
|
|
178
|
+
if os.path.isdir(path):
|
|
179
|
+
for root, dirs, files in os.walk(path):
|
|
180
|
+
total_files += len(files)
|
|
181
|
+
dirs[:] = [
|
|
182
|
+
d
|
|
183
|
+
for d in dirs
|
|
184
|
+
if not self.should_ignore(os.path.join(root, d))
|
|
185
|
+
]
|
|
186
|
+
for file in files:
|
|
187
|
+
file_path = os.path.join(root, file)
|
|
188
|
+
if not self.should_ignore(file_path) and self.is_relevant_file(file_path):
|
|
189
|
+
add_file_content(file_path)
|
|
190
|
+
processed_files += 1
|
|
191
|
+
print(
|
|
192
|
+
f"\rProcessed {processed_files}/{total_files} files",
|
|
193
|
+
end="",
|
|
194
|
+
flush=True,
|
|
195
|
+
)
|
|
196
|
+
elif os.path.isfile(path) and self.is_relevant_file(path):
|
|
197
|
+
add_file_content(path)
|
|
198
|
+
processed_files += 1
|
|
199
|
+
print(
|
|
200
|
+
f"\rProcessed {processed_files}/1 files",
|
|
201
|
+
end="",
|
|
202
|
+
flush=True,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
if include_all:
|
|
206
|
+
# Include ALL relevant files from the entire directory
|
|
207
|
+
process_path(self.directory)
|
|
208
|
+
|
|
209
|
+
# Include files from .praisoninclude specifically
|
|
210
|
+
for include_path in self.include_paths:
|
|
211
|
+
full_path = os.path.join(self.directory, include_path)
|
|
212
|
+
process_path(full_path)
|
|
213
|
+
elif self.include_paths:
|
|
214
|
+
# Include only files specified in .praisoncontext
|
|
157
215
|
for include_path in self.include_paths:
|
|
158
216
|
full_path = os.path.join(self.directory, include_path)
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
for file in files:
|
|
164
|
-
file_path = os.path.join(root, file)
|
|
165
|
-
if not self.should_ignore(file_path) and self.is_relevant_file(file_path):
|
|
166
|
-
try:
|
|
167
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
168
|
-
content = f.read()
|
|
169
|
-
context.append(f"File: {file_path}\n\n{content}\n\n{'='*50}\n")
|
|
170
|
-
self.included_files.append(Path(file_path).relative_to(self.directory))
|
|
171
|
-
except Exception as e:
|
|
172
|
-
logger.error(f"Error reading {file_path}: {e}")
|
|
173
|
-
processed_files += 1
|
|
174
|
-
print(f"\rProcessed {processed_files}/{total_files} files", end="", flush=True)
|
|
175
|
-
elif os.path.isfile(full_path) and self.is_relevant_file(full_path):
|
|
176
|
-
try:
|
|
177
|
-
with open(full_path, 'r', encoding='utf-8') as f:
|
|
178
|
-
content = f.read()
|
|
179
|
-
context.append(f"File: {full_path}\n\n{content}\n\n{'='*50}\n")
|
|
180
|
-
self.included_files.append(Path(full_path).relative_to(self.directory))
|
|
181
|
-
except Exception as e:
|
|
182
|
-
logger.error(f"Error reading {full_path}: {e}")
|
|
183
|
-
processed_files += 1
|
|
184
|
-
print(f"\rProcessed {processed_files}/{total_files} files", end="", flush=True)
|
|
217
|
+
process_path(full_path)
|
|
218
|
+
else:
|
|
219
|
+
# No include options, process the entire directory
|
|
220
|
+
process_path(self.directory)
|
|
185
221
|
|
|
186
222
|
print() # New line after progress indicator
|
|
187
|
-
return
|
|
223
|
+
return "\n".join(context)
|
|
188
224
|
|
|
189
225
|
def count_tokens(self, text):
|
|
190
226
|
"""Count tokens using a simple whitespace-based tokenizer."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "PraisonAI"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.59"
|
|
4
4
|
description = "PraisonAI application combines AutoGen and CrewAI or similar frameworks into a low-code solution for building and managing multi-agent LLM systems, focusing on simplicity, customization, and efficient human-agent collaboration."
|
|
5
5
|
authors = ["Mervin Praison"]
|
|
6
6
|
license = ""
|
|
@@ -20,7 +20,7 @@ pyautogen = ">=0.2.19"
|
|
|
20
20
|
crewai = ">=0.32.0"
|
|
21
21
|
markdown = ">=3.5"
|
|
22
22
|
praisonai-tools = ">=0.0.7"
|
|
23
|
-
pyparsing = ">=3.0.0"
|
|
23
|
+
pyparsing = ">=3.0.0"
|
|
24
24
|
chainlit = {version = "^1.1.301", optional = true}
|
|
25
25
|
gradio = {version = ">=4.26.0", optional = true}
|
|
26
26
|
flask = {version = ">=3.0.0", optional = true}
|
|
@@ -89,6 +89,8 @@ build-backend = "poetry.core.masonry.api"
|
|
|
89
89
|
|
|
90
90
|
[tool.poetry.scripts]
|
|
91
91
|
praisonai = "praisonai.__main__:main"
|
|
92
|
+
setup-conda-env = "setup.setup_conda_env:main"
|
|
93
|
+
post-install = "setup.post_install:main"
|
|
92
94
|
|
|
93
95
|
[tool.poetry.extras]
|
|
94
96
|
ui = ["chainlit"]
|
|
@@ -100,4 +102,14 @@ openai = ["langchain-openai"]
|
|
|
100
102
|
anthropic = ["langchain-anthropic"]
|
|
101
103
|
cohere = ["langchain-cohere"]
|
|
102
104
|
chat = ["chainlit", "litellm", "aiosqlite", "greenlet"]
|
|
103
|
-
code = ["chainlit", "litellm", "aiosqlite", "greenlet"]
|
|
105
|
+
code = ["chainlit", "litellm", "aiosqlite", "greenlet"]
|
|
106
|
+
train = ["setup-conda-env"]
|
|
107
|
+
|
|
108
|
+
[tool.poetry-dynamic-versioning]
|
|
109
|
+
enable = true
|
|
110
|
+
vcs = "git"
|
|
111
|
+
style = "semver"
|
|
112
|
+
|
|
113
|
+
[tool.poetry.build]
|
|
114
|
+
generate-setup-file = false
|
|
115
|
+
script = "praisonai/setup/post_install.py"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|