PraisonAI 0.0.57__tar.gz → 0.0.59__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PraisonAI might be problematic. Click here for more details.

Files changed (47) hide show
  1. {praisonai-0.0.57 → praisonai-0.0.59}/PKG-INFO +2 -1
  2. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/cli.py +86 -1
  3. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/deploy.py +1 -1
  4. praisonai-0.0.59/praisonai/inc/config.py +96 -0
  5. praisonai-0.0.59/praisonai/setup/__init__.py +0 -0
  6. praisonai-0.0.59/praisonai/setup/build.py +21 -0
  7. praisonai-0.0.59/praisonai/setup/config.yaml +60 -0
  8. praisonai-0.0.59/praisonai/setup/post_install.py +20 -0
  9. praisonai-0.0.59/praisonai/setup/setup_conda_env.py +25 -0
  10. praisonai-0.0.59/praisonai/setup/setup_conda_env.sh +72 -0
  11. praisonai-0.0.59/praisonai/train.py +276 -0
  12. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/context.py +87 -51
  13. {praisonai-0.0.57 → praisonai-0.0.59}/pyproject.toml +15 -3
  14. {praisonai-0.0.57 → praisonai-0.0.59}/LICENSE +0 -0
  15. {praisonai-0.0.57 → praisonai-0.0.59}/README.md +0 -0
  16. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/__init__.py +0 -0
  17. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/__main__.py +0 -0
  18. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/agents_generator.py +0 -0
  19. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/auto.py +0 -0
  20. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/chainlit_ui.py +0 -0
  21. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/inbuilt_tools/__init__.py +0 -0
  22. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/inbuilt_tools/autogen_tools.py +0 -0
  23. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/inc/__init__.py +0 -0
  24. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/inc/models.py +0 -0
  25. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/android-chrome-192x192.png +0 -0
  26. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/android-chrome-512x512.png +0 -0
  27. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/apple-touch-icon.png +0 -0
  28. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/fantasy.svg +0 -0
  29. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/favicon-16x16.png +0 -0
  30. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/favicon-32x32.png +0 -0
  31. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/favicon.ico +0 -0
  32. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/game.svg +0 -0
  33. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/logo_dark.png +0 -0
  34. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/logo_light.png +0 -0
  35. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/movie.svg +0 -0
  36. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/public/thriller.svg +0 -0
  37. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/test.py +0 -0
  38. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/chat.py +0 -0
  39. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/code.py +0 -0
  40. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/fantasy.svg +0 -0
  41. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/game.svg +0 -0
  42. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/logo_dark.png +0 -0
  43. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/logo_light.png +0 -0
  44. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/movie.svg +0 -0
  45. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/public/thriller.svg +0 -0
  46. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/ui/sql_alchemy.py +0 -0
  47. {praisonai-0.0.57 → praisonai-0.0.59}/praisonai/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PraisonAI
3
- Version: 0.0.57
3
+ Version: 0.0.59
4
4
  Summary: PraisonAI application combines AutoGen and CrewAI or similar frameworks into a low-code solution for building and managing multi-agent LLM systems, focusing on simplicity, customization, and efficient human-agent collaboration.
5
5
  Author: Mervin Praison
6
6
  Requires-Python: >=3.10,<3.13
@@ -17,6 +17,7 @@ Provides-Extra: cohere
17
17
  Provides-Extra: google
18
18
  Provides-Extra: gradio
19
19
  Provides-Extra: openai
20
+ Provides-Extra: train
20
21
  Provides-Extra: ui
21
22
  Requires-Dist: agentops (>=0.2.6) ; extra == "agentops"
22
23
  Requires-Dist: aiosqlite (>=0.20.0) ; extra == "chat" or extra == "code"
@@ -12,7 +12,9 @@ import argparse
12
12
  from .auto import AutoGenerator
13
13
  from .agents_generator import AgentsGenerator
14
14
  from .inbuilt_tools import *
15
+ from .inc.config import generate_config
15
16
  import shutil
17
+ import subprocess
16
18
  import logging
17
19
  logging.basicConfig(level=os.environ.get('LOGLEVEL', 'INFO'), format='%(asctime)s - %(levelname)s - %(message)s')
18
20
 
@@ -27,6 +29,34 @@ try:
27
29
  GRADIO_AVAILABLE = True
28
30
  except ImportError:
29
31
  GRADIO_AVAILABLE = False
32
+
33
+ def stream_subprocess(command, env=None):
34
+ """
35
+ Execute a subprocess command and stream the output to the terminal in real-time.
36
+
37
+ Args:
38
+ command (list): A list containing the command and its arguments.
39
+ env (dict, optional): Environment variables for the subprocess.
40
+ """
41
+ process = subprocess.Popen(
42
+ command,
43
+ stdout=subprocess.PIPE,
44
+ stderr=subprocess.STDOUT,
45
+ text=True,
46
+ bufsize=1,
47
+ universal_newlines=True,
48
+ env=env
49
+ )
50
+
51
+ for line in iter(process.stdout.readline, ''):
52
+ print(line, end='')
53
+ sys.stdout.flush() # Ensure output is flushed immediately
54
+
55
+ process.stdout.close()
56
+ return_code = process.wait()
57
+
58
+ if return_code != 0:
59
+ raise subprocess.CalledProcessError(return_code, command)
30
60
 
31
61
  class PraisonAI:
32
62
  def __init__(self, agent_file="agents.yaml", framework="", auto=False, init=False, agent_yaml=None):
@@ -99,6 +129,57 @@ class PraisonAI:
99
129
  self.create_code_interface()
100
130
  return
101
131
 
132
+ if args.agent_file == 'train':
133
+ package_root = os.path.dirname(os.path.abspath(__file__))
134
+ config_yaml_destination = os.path.join(os.getcwd(), 'config.yaml')
135
+
136
+ # Create config.yaml only if it doesn't exist or --model or --dataset is provided
137
+ if not os.path.exists(config_yaml_destination) or args.model or args.dataset:
138
+ config = generate_config(
139
+ model_name=args.model,
140
+ hf_model_name=args.hf,
141
+ ollama_model_name=args.ollama,
142
+ dataset=[{
143
+ "name": args.dataset
144
+ }]
145
+ )
146
+ with open('config.yaml', 'w') as f:
147
+ yaml.dump(config, f, default_flow_style=False, indent=2)
148
+
149
+ # Overwrite huggingface_save and ollama_save if --hf or --ollama are provided
150
+ if args.hf:
151
+ config["huggingface_save"] = "true"
152
+ if args.ollama:
153
+ config["ollama_save"] = "true"
154
+
155
+ if 'init' in sys.argv:
156
+ from praisonai.setup.setup_conda_env import main as setup_conda_main
157
+ setup_conda_main()
158
+ print("All packages installed")
159
+ return
160
+
161
+ try:
162
+ result = subprocess.check_output(['conda', 'env', 'list'])
163
+ if 'praison_env' in result.decode('utf-8'):
164
+ print("Conda environment 'praison_env' found.")
165
+ else:
166
+ raise subprocess.CalledProcessError(1, 'grep')
167
+ except subprocess.CalledProcessError:
168
+ print("Conda environment 'praison_env' not found. Setting it up...")
169
+ from praisonai.setup.setup_conda_env import main as setup_conda_main
170
+ setup_conda_main()
171
+ print("All packages installed.")
172
+
173
+ train_args = sys.argv[2:] # Get all arguments after 'train'
174
+ train_script_path = os.path.join(package_root, 'train.py')
175
+
176
+ # Set environment variables
177
+ env = os.environ.copy()
178
+ env['PYTHONUNBUFFERED'] = '1'
179
+
180
+ stream_subprocess(['conda', 'run', '--no-capture-output', '--name', 'praison_env', 'python', '-u', train_script_path, 'train'] + train_args, env=env)
181
+ return
182
+
102
183
  invocation_cmd = "praisonai"
103
184
  version_string = f"PraisonAI version {__version__}"
104
185
 
@@ -169,7 +250,11 @@ class PraisonAI:
169
250
  parser.add_argument("--auto", nargs=argparse.REMAINDER, help="Enable auto mode and pass arguments for it")
170
251
  parser.add_argument("--init", nargs=argparse.REMAINDER, help="Enable auto mode and pass arguments for it")
171
252
  parser.add_argument("agent_file", nargs="?", help="Specify the agent file")
172
- parser.add_argument("--deploy", action="store_true", help="Deploy the application") # New argument
253
+ parser.add_argument("--deploy", action="store_true", help="Deploy the application")
254
+ parser.add_argument("--model", type=str, help="Model name")
255
+ parser.add_argument("--hf", type=str, help="Hugging Face model name")
256
+ parser.add_argument("--ollama", type=str, help="Ollama model name")
257
+ parser.add_argument("--dataset", type=str, help="Dataset name for training", default="yahma/alpaca-cleaned")
173
258
  args, unknown_args = parser.parse_known_args()
174
259
 
175
260
  if unknown_args and unknown_args[0] == '-b' and unknown_args[1] == 'api:app':
@@ -56,7 +56,7 @@ class CloudDeployer:
56
56
  file.write("FROM python:3.11-slim\n")
57
57
  file.write("WORKDIR /app\n")
58
58
  file.write("COPY . .\n")
59
- file.write("RUN pip install flask praisonai==0.0.57 gunicorn markdown\n")
59
+ file.write("RUN pip install flask praisonai==0.0.59 gunicorn markdown\n")
60
60
  file.write("EXPOSE 8080\n")
61
61
  file.write('CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]\n')
62
62
 
@@ -0,0 +1,96 @@
1
+ def generate_config(
2
+ ollama_save=None,
3
+ huggingface_save=None,
4
+ train=None,
5
+ model_name=None,
6
+ hf_model_name=None,
7
+ ollama_model_name=None,
8
+ model_parameters=None,
9
+ max_seq_length=None,
10
+ load_in_4bit=None,
11
+ lora_r=None,
12
+ lora_target_modules=None,
13
+ lora_alpha=None,
14
+ lora_dropout=None,
15
+ lora_bias=None,
16
+ use_gradient_checkpointing=None,
17
+ random_state=None,
18
+ use_rslora=None,
19
+ loftq_config=None,
20
+ dataset=None,
21
+ dataset_text_field=None,
22
+ dataset_num_proc=None,
23
+ packing=None,
24
+ per_device_train_batch_size=None,
25
+ gradient_accumulation_steps=None,
26
+ warmup_steps=None,
27
+ num_train_epochs=None,
28
+ max_steps=None,
29
+ learning_rate=None,
30
+ logging_steps=None,
31
+ optim=None,
32
+ weight_decay=None,
33
+ lr_scheduler_type=None,
34
+ seed=None,
35
+ output_dir=None,
36
+ quantization_method=None
37
+ ):
38
+ """Generates the configuration for PraisonAI with dynamic overrides."""
39
+
40
+ config = {
41
+ "ollama_save": ollama_save or "true",
42
+ "huggingface_save": huggingface_save or "true",
43
+ "train": train or "true",
44
+
45
+ "model_name": model_name or "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
46
+ "hf_model_name": hf_model_name or "mervinpraison/llama-3.1-tamilan-8B-test",
47
+ "ollama_model": ollama_model_name or "mervinpraison/llama3.1-tamilan-test",
48
+ "model_parameters": model_parameters or "8b",
49
+
50
+ "dataset": dataset or [
51
+ {
52
+ "name": "yahma/alpaca-cleaned",
53
+ "split_type": "train",
54
+ "processing_func": "format_prompts",
55
+ "rename": {"input": "input", "output": "output", "instruction": "instruction"},
56
+ "filter_data": False,
57
+ "filter_column_value": "id",
58
+ "filter_value": "alpaca",
59
+ "num_samples": 20000
60
+ }
61
+ ],
62
+
63
+ "dataset_text_field": dataset_text_field or "text",
64
+ "dataset_num_proc": dataset_num_proc or 2,
65
+ "packing": packing or False,
66
+
67
+ "max_seq_length": max_seq_length or 2048,
68
+ "load_in_4bit": load_in_4bit or True,
69
+ "lora_r": lora_r or 16,
70
+ "lora_target_modules": lora_target_modules or [
71
+ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"
72
+ ],
73
+ "lora_alpha": lora_alpha or 16,
74
+ "lora_dropout": lora_dropout or 0,
75
+ "lora_bias": lora_bias or "none",
76
+ "use_gradient_checkpointing": use_gradient_checkpointing or "unsloth",
77
+ "random_state": random_state or 3407,
78
+ "use_rslora": use_rslora or False,
79
+ "loftq_config": loftq_config or None,
80
+
81
+ "per_device_train_batch_size": per_device_train_batch_size or 2,
82
+ "gradient_accumulation_steps": gradient_accumulation_steps or 2,
83
+ "warmup_steps": warmup_steps or 5,
84
+ "num_train_epochs": num_train_epochs or 1,
85
+ "max_steps": max_steps or 10,
86
+ "learning_rate": learning_rate or 2.0e-4,
87
+ "logging_steps": logging_steps or 1,
88
+ "optim": optim or "adamw_8bit",
89
+ "weight_decay": weight_decay or 0.01,
90
+ "lr_scheduler_type": lr_scheduler_type or "linear",
91
+ "seed": seed or 3407,
92
+ "output_dir": output_dir or "outputs",
93
+
94
+ "quantization_method": quantization_method or ["q4_k_m"]
95
+ }
96
+ return config
File without changes
@@ -0,0 +1,21 @@
1
+ import subprocess
2
+ import sys
3
+ import os
4
+
5
+ def build(setup_kwargs):
6
+ try:
7
+ # Get the directory of the current script
8
+ script_dir = os.path.dirname(os.path.abspath(__file__))
9
+
10
+ # Construct the path to post_install.py
11
+ post_install_script = os.path.join(script_dir, 'post_install.py')
12
+
13
+ # Run the post_install.py script
14
+ subprocess.check_call([sys.executable, post_install_script])
15
+ except subprocess.CalledProcessError as e:
16
+ print(f"Error occurred while running the post-install script: {e}")
17
+ sys.exit(1)
18
+ return setup_kwargs
19
+
20
+ if __name__ == "__main__":
21
+ build({})
@@ -0,0 +1,60 @@
1
+ ollama_save: "true"
2
+ huggingface_save: "true"
3
+ train: "true"
4
+
5
+ model_name: "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
6
+ hf_model_name: "mervinpraison/llama-3.1-tamilan-8B-test"
7
+ ollama_model: "mervinpraison/llama3.1-tamilan-test"
8
+ model_parameters: "8b"
9
+
10
+ max_seq_length: 2048
11
+ load_in_4bit: true
12
+ lora_r: 16
13
+ lora_target_modules:
14
+ - "q_proj"
15
+ - "k_proj"
16
+ - "v_proj"
17
+ - "o_proj"
18
+ - "gate_proj"
19
+ - "up_proj"
20
+ - "down_proj"
21
+ lora_alpha: 16
22
+ lora_dropout: 0
23
+ lora_bias: "none"
24
+ use_gradient_checkpointing: "unsloth"
25
+ random_state: 3407
26
+ use_rslora: false
27
+ loftq_config: null
28
+
29
+ dataset:
30
+ - name: "yahma/alpaca-cleaned"
31
+ split_type: "train"
32
+ processing_func: "format_prompts"
33
+ rename:
34
+ input: "input"
35
+ output: "output"
36
+ instruction: "instruction"
37
+ filter_data: false
38
+ filter_column_value: "id"
39
+ filter_value: "alpaca"
40
+ num_samples: 20000
41
+
42
+ dataset_text_field: "text"
43
+ dataset_num_proc: 2
44
+ packing: false
45
+
46
+ per_device_train_batch_size: 2
47
+ gradient_accumulation_steps: 2
48
+ warmup_steps: 5
49
+ num_train_epochs: 1
50
+ max_steps: 10
51
+ learning_rate: 2.0e-4
52
+ logging_steps: 1
53
+ optim: "adamw_8bit"
54
+ weight_decay: 0.01
55
+ lr_scheduler_type: "linear"
56
+ seed: 3407
57
+ output_dir: "outputs"
58
+
59
+ quantization_method:
60
+ - "q4_k_m"
@@ -0,0 +1,20 @@
1
+ import subprocess
2
+ import sys
3
+ import os
4
+
5
+ def main():
6
+ try:
7
+ # Get the absolute path of the current file
8
+ current_file = os.path.abspath(__file__)
9
+
10
+ # Get the directory of the current file
11
+ script_dir = os.path.dirname(current_file)
12
+
13
+ # Construct the path to setup_conda_env.py
14
+ setup_script = os.path.join(script_dir, 'setup_conda_env.py')
15
+ except subprocess.CalledProcessError as e:
16
+ print(f"Error occurred while running the setup script: {e}")
17
+ sys.exit(1)
18
+
19
+ if __name__ == "__main__":
20
+ main()
@@ -0,0 +1,25 @@
1
+ import subprocess
2
+ import os
3
+ import sys
4
+ import platform
5
+
6
+ def main():
7
+ script_dir = os.path.dirname(os.path.abspath(__file__))
8
+ script_path = os.path.join(script_dir, 'setup_conda_env.sh')
9
+
10
+ if platform.system() == 'Windows':
11
+ print("Windows detected. Please run the setup_conda_env.sh script manually in Git Bash or WSL.")
12
+ print(f"Script location: {script_path}")
13
+ sys.exit(1)
14
+
15
+ try:
16
+ subprocess.check_call(['bash', script_path])
17
+ except subprocess.CalledProcessError as e:
18
+ print(f"Error occurred while running the setup script: {e}")
19
+ print("Setup failed. Please check the error message above and try to resolve the issue.")
20
+ sys.exit(1)
21
+
22
+ print("Conda environment setup completed successfully!")
23
+
24
+ if __name__ == "__main__":
25
+ main()
@@ -0,0 +1,72 @@
1
+ #!/bin/bash
2
+
3
+ # Detect OS and architecture
4
+ if [[ "$OSTYPE" == "darwin"* ]]; then
5
+ # macOS
6
+ if [[ $(uname -m) == 'arm64' ]]; then
7
+ MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh"
8
+ else
9
+ MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh"
10
+ fi
11
+ elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
12
+ # Linux
13
+ MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"
14
+ elif [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then
15
+ # Windows
16
+ MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe"
17
+ echo "Windows detected. Please run this script in Git Bash or WSL."
18
+ exit 1
19
+ else
20
+ echo "Unsupported operating system: $OSTYPE"
21
+ exit 1
22
+ fi
23
+
24
+ # Check if conda is already installed
25
+ if ! command -v conda &> /dev/null; then
26
+ echo "Conda is not installed. Installing Miniconda..."
27
+ wget $MINICONDA_URL -O ~/miniconda.sh
28
+ bash ~/miniconda.sh -b -p $HOME/miniconda
29
+ source $HOME/miniconda/bin/activate
30
+ conda init
31
+ else
32
+ echo "Conda is already installed."
33
+ fi
34
+
35
+ # Create and activate the Conda environment
36
+ ENV_NAME="praison_env"
37
+ if conda info --envs | grep -q $ENV_NAME; then
38
+ echo "Environment $ENV_NAME already exists. Recreating..."
39
+ conda env remove -y -n $ENV_NAME # Remove existing environment
40
+ if [[ "$OSTYPE" == "darwin"* ]]; then
41
+ # macOS (both Intel and M1/M2)
42
+ conda create --name $ENV_NAME python=3.10 pytorch=2.3.0 -c pytorch -y
43
+ elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
44
+ # Linux
45
+ conda create --name $ENV_NAME python=3.10 pytorch=2.3.0 cudatoolkit=11.8 -c pytorch -c nvidia -y
46
+ fi
47
+ # conda activate $ENV_NAME
48
+ else
49
+ echo "Creating new environment $ENV_NAME..."
50
+ if [[ "$OSTYPE" == "darwin"* ]]; then
51
+ # macOS (both Intel and M1/M2)
52
+ conda create --name $ENV_NAME python=3.10 pytorch=2.3.0 -c pytorch -y
53
+ elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
54
+ # Linux
55
+ conda create --name $ENV_NAME python=3.10 pytorch=2.3.0 cudatoolkit=11.8 -c pytorch -c nvidia -y
56
+ fi
57
+ # conda activate $ENV_NAME
58
+ fi
59
+
60
+ # source $HOME/miniconda/bin/activate $ENV_NAME
61
+
62
+ # Get full path of pip
63
+ PIP_FULL_PATH=$(conda run -n $ENV_NAME which pip)
64
+
65
+ # Install other packages within the activated environment
66
+ # Use PIP_FULL_PATH to run pip commands
67
+ $PIP_FULL_PATH install --upgrade pip
68
+ $PIP_FULL_PATH install "xformers==0.0.26.post1"
69
+ $PIP_FULL_PATH install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@4e570be9ae4ced8cdc64e498125708e34942befc"
70
+ $PIP_FULL_PATH install --no-deps "trl<0.9.0" peft accelerate bitsandbytes
71
+
72
+ echo "Setup completed successfully!"
@@ -0,0 +1,276 @@
1
+ import subprocess
2
+ import os
3
+ import sys
4
+ import yaml
5
+ import torch
6
+ import shutil
7
+ from transformers import TextStreamer
8
+ from unsloth import FastLanguageModel, is_bfloat16_supported
9
+ from trl import SFTTrainer
10
+ from transformers import TrainingArguments
11
+ from datasets import load_dataset, concatenate_datasets, Dataset
12
+ from psutil import virtual_memory
13
+
14
+ class train:
15
+ def __init__(self, config_path="config.yaml"):
16
+ self.load_config(config_path)
17
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ self.model, self.tokenizer = None, None
19
+
20
+ def load_config(self, path):
21
+ with open(path, "r") as file:
22
+ self.config = yaml.safe_load(file)
23
+
24
+ def print_system_info(self):
25
+ print(f"PyTorch version: {torch.__version__}")
26
+ print(f"CUDA version: {torch.version.cuda}")
27
+ if torch.cuda.is_available():
28
+ device_capability = torch.cuda.get_device_capability()
29
+ print(f"CUDA Device Capability: {device_capability}")
30
+ else:
31
+ print("CUDA is not available")
32
+
33
+ python_version = sys.version
34
+ pip_version = subprocess.check_output(['pip', '--version']).decode().strip()
35
+ python_path = sys.executable
36
+ pip_path = subprocess.check_output(['which', 'pip']).decode().strip()
37
+ print(f"Python Version: {python_version}")
38
+ print(f"Pip Version: {pip_version}")
39
+ print(f"Python Path: {python_path}")
40
+ print(f"Pip Path: {pip_path}")
41
+
42
+ def check_gpu(self):
43
+ gpu_stats = torch.cuda.get_device_properties(0)
44
+ print(f"GPU = {gpu_stats.name}. Max memory = {round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)} GB.")
45
+
46
+ def check_ram(self):
47
+ ram_gb = virtual_memory().total / 1e9
48
+ print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
49
+ if ram_gb < 20:
50
+ print('Not using a high-RAM runtime')
51
+ else:
52
+ print('You are using a high-RAM runtime!')
53
+
54
+ # def install_packages(self):
55
+ # subprocess.run(["pip", "install", "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@4e570be9ae4ced8cdc64e498125708e34942befc"])
56
+ # subprocess.run(["pip", "install", "--no-deps", "trl<0.9.0", "peft==0.12.0", "accelerate==0.33.0", "bitsandbytes==0.43.3"])
57
+
58
+ def prepare_model(self):
59
+ self.model, self.tokenizer = FastLanguageModel.from_pretrained(
60
+ model_name=self.config["model_name"],
61
+ max_seq_length=self.config["max_seq_length"],
62
+ dtype=None,
63
+ load_in_4bit=self.config["load_in_4bit"]
64
+ )
65
+ self.model = FastLanguageModel.get_peft_model(
66
+ self.model,
67
+ r=self.config["lora_r"],
68
+ target_modules=self.config["lora_target_modules"],
69
+ lora_alpha=self.config["lora_alpha"],
70
+ lora_dropout=self.config["lora_dropout"],
71
+ bias=self.config["lora_bias"],
72
+ use_gradient_checkpointing=self.config["use_gradient_checkpointing"],
73
+ random_state=self.config["random_state"],
74
+ use_rslora=self.config["use_rslora"],
75
+ loftq_config=self.config["loftq_config"],
76
+ )
77
+
78
+ def process_dataset(self, dataset_info):
79
+ dataset_name = dataset_info["name"]
80
+ split_type = dataset_info.get("split_type", "train")
81
+ processing_func = getattr(self, dataset_info.get("processing_func", "format_prompts"))
82
+ rename = dataset_info.get("rename", {})
83
+ filter_data = dataset_info.get("filter_data", False)
84
+ filter_column_value = dataset_info.get("filter_column_value", "id")
85
+ filter_value = dataset_info.get("filter_value", "alpaca")
86
+ num_samples = dataset_info.get("num_samples", 20000)
87
+
88
+ dataset = load_dataset(dataset_name, split=split_type)
89
+
90
+ if rename:
91
+ dataset = dataset.rename_columns(rename)
92
+ if filter_data:
93
+ dataset = dataset.filter(lambda example: filter_value in example[filter_column_value]).shuffle(seed=42).select(range(num_samples))
94
+ dataset = dataset.map(processing_func, batched=True)
95
+ return dataset
96
+
97
+ def format_prompts(self, examples):
98
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
99
+
100
+ ### Instruction:
101
+ {}
102
+
103
+ ### Input:
104
+ {}
105
+
106
+ ### Response:
107
+ {}"""
108
+ texts = [alpaca_prompt.format(ins, inp, out) + self.tokenizer.eos_token for ins, inp, out in zip(examples["instruction"], examples["input"], examples["output"])]
109
+ return {"text": texts}
110
+
111
+ def load_datasets(self):
112
+ datasets = []
113
+ for dataset_info in self.config["dataset"]:
114
+ datasets.append(self.process_dataset(dataset_info))
115
+ return concatenate_datasets(datasets)
116
+
117
+ def train_model(self):
118
+ dataset = self.load_datasets()
119
+ trainer = SFTTrainer(
120
+ model=self.model,
121
+ tokenizer=self.tokenizer,
122
+ train_dataset=dataset,
123
+ dataset_text_field=self.config["dataset_text_field"],
124
+ max_seq_length=self.config["max_seq_length"],
125
+ dataset_num_proc=self.config["dataset_num_proc"],
126
+ packing=self.config["packing"],
127
+ args=TrainingArguments(
128
+ per_device_train_batch_size=self.config["per_device_train_batch_size"],
129
+ gradient_accumulation_steps=self.config["gradient_accumulation_steps"],
130
+ warmup_steps=self.config["warmup_steps"],
131
+ num_train_epochs=self.config["num_train_epochs"],
132
+ max_steps=self.config["max_steps"],
133
+ learning_rate=self.config["learning_rate"],
134
+ fp16=not is_bfloat16_supported(),
135
+ bf16=is_bfloat16_supported(),
136
+ logging_steps=self.config["logging_steps"],
137
+ optim=self.config["optim"],
138
+ weight_decay=self.config["weight_decay"],
139
+ lr_scheduler_type=self.config["lr_scheduler_type"],
140
+ seed=self.config["seed"],
141
+ output_dir=self.config["output_dir"],
142
+ ),
143
+ )
144
+ trainer.train()
145
+ self.model.save_pretrained("lora_model") # Local saving
146
+ self.tokenizer.save_pretrained("lora_model")
147
+
148
+ def inference(self, instruction, input_text):
149
+ FastLanguageModel.for_inference(self.model)
150
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
151
+
152
+ ### Instruction:
153
+ {}
154
+
155
+ ### Input:
156
+ {}
157
+
158
+ ### Response:
159
+ {}"""
160
+ inputs = self.tokenizer([alpaca_prompt.format(instruction, input_text, "")], return_tensors="pt").to("cuda")
161
+ outputs = self.model.generate(**inputs, max_new_tokens=64, use_cache=True)
162
+ print(self.tokenizer.batch_decode(outputs))
163
+
164
+ def load_model(self):
165
+ """Loads the model and tokenizer using the FastLanguageModel library."""
166
+ from unsloth import FastLanguageModel
167
+ model, tokenizer = FastLanguageModel.from_pretrained(
168
+ model_name=self.config["output_dir"],
169
+ max_seq_length=2048,
170
+ dtype=None,
171
+ load_in_4bit=self.config["load_in_4bit"],
172
+ )
173
+ return model, tokenizer
174
+
175
+ def save_model_merged(self):
176
+ if os.path.exists(self.config["hf_model_name"]):
177
+ shutil.rmtree(self.config["hf_model_name"])
178
+ self.model.push_to_hub_merged(
179
+ self.config["hf_model_name"],
180
+ self.tokenizer,
181
+ save_method="merged_16bit",
182
+ token=os.getenv('HF_TOKEN')
183
+ )
184
+
185
+ def push_model_gguf(self):
186
+ self.model.push_to_hub_gguf(
187
+ self.config["hf_model_name"],
188
+ self.tokenizer,
189
+ quantization_method=self.config["quantization_method"],
190
+ token=os.getenv('HF_TOKEN')
191
+ )
192
+
193
+ def save_model_gguf(self):
194
+ self.model.save_pretrained_gguf(
195
+ self.config["hf_model_name"],
196
+ self.tokenizer,
197
+ quantization_method="q4_k_m"
198
+ )
199
+
200
+ def prepare_modelfile_content(self):
201
+ output_model = self.config["hf_model_name"]
202
+ gguf_path = f"{output_model}/unsloth.Q4_K_M.gguf"
203
+
204
+ # Check if the GGUF file exists. If not, generate it ## TODO Multiple Quantisation other than Q4_K_M.gguf
205
+ if not os.path.exists(gguf_path):
206
+ self.model, self.tokenizer = self.load_model()
207
+ self.save_model_gguf()
208
+ return f"""FROM {output_model}/unsloth.Q4_K_M.gguf
209
+
210
+ TEMPLATE \"\"\"Below are some instructions that describe some tasks. Write responses that appropriately complete each request.{{{{ if .Prompt }}}}
211
+
212
+ ### Instruction:
213
+ {{{{ .Prompt }}}}
214
+
215
+ {{{{ end }}}}### Response:
216
+ {{{{ .Response }}}}\"\"\"
217
+
218
+ PARAMETER stop ""
219
+ PARAMETER stop ""
220
+ PARAMETER stop ""
221
+ PARAMETER stop ""
222
+ PARAMETER stop "<|reserved_special_token_"
223
+ """
224
+
225
+ def create_and_push_ollama_model(self):
226
+ modelfile_content = self.prepare_modelfile_content()
227
+ with open('Modelfile', 'w') as file:
228
+ file.write(modelfile_content)
229
+
230
+ subprocess.run(["ollama", "serve"])
231
+ subprocess.run(["ollama", "create", f"{self.config['ollama_model']}:{self.config['model_parameters']}", "-f", "Modelfile"])
232
+ subprocess.run(["ollama", "push", f"{self.config['ollama_model']}:{self.config['model_parameters']}"])
233
+
234
+ def run(self):
235
+ self.print_system_info()
236
+ self.check_gpu()
237
+ self.check_ram()
238
+ # self.install_packages()
239
+ if self.config.get("train", "true").lower() == "true":
240
+ self.prepare_model()
241
+ self.train_model()
242
+
243
+ if self.config.get("huggingface_save", "true").lower() == "true":
244
+ self.model, self.tokenizer = self.load_model()
245
+ self.save_model_merged()
246
+
247
+ if self.config.get("huggingface_save_gguf", "true").lower() == "true":
248
+ self.model, self.tokenizer = self.load_model()
249
+ self.push_model_gguf()
250
+
251
+ # if self.config.get("save_gguf", "true").lower() == "true": ## TODO
252
+ # self.model, self.tokenizer = self.load_model()
253
+ # self.save_model_gguf()
254
+
255
+ # if self.config.get("save_merged", "true").lower() == "true": ## TODO
256
+ # self.model, self.tokenizer = self.load_model()
257
+ # self.save_model_merged()
258
+
259
+ if self.config.get("ollama_save", "true").lower() == "true":
260
+ self.create_and_push_ollama_model()
261
+
262
+
263
+ def main():
264
+ import argparse
265
+ parser = argparse.ArgumentParser(description='PraisonAI Training Script')
266
+ parser.add_argument('command', choices=['train'], help='Command to execute')
267
+ parser.add_argument('--config', default='config.yaml', help='Path to configuration file')
268
+ args = parser.parse_args()
269
+
270
+ if args.command == 'train':
271
+ ai = train(config_path=args.config)
272
+ ai.run()
273
+
274
+
275
+ if __name__ == '__main__':
276
+ main()
@@ -97,17 +97,36 @@ class ContextGatherer:
97
97
  return modified_ignore_patterns
98
98
 
99
99
  def get_include_paths(self):
100
+ """
101
+ Loads include paths from:
102
+ 1. .praisoninclude (includes ONLY files/directories listed)
103
+ 2. .praisoncontext (if .praisoninclude doesn't exist, this is used
104
+ to include all other relevant files, excluding ignore patterns)
105
+ """
100
106
  include_paths = []
101
-
102
- # 1. Load from .praisoninclude
103
- include_file = os.path.join(self.directory, '.praisoninclude')
107
+ include_all = False # Flag to indicate if we need to include all files
108
+
109
+ include_file = os.path.join(self.directory, '.praisoncontext')
104
110
  if os.path.exists(include_file):
105
111
  with open(include_file, 'r') as f:
106
112
  include_paths.extend(
107
113
  line.strip() for line in f
108
114
  if line.strip() and not line.startswith('#')
109
115
  )
110
- return include_paths
116
+
117
+ # If .praisoncontext doesn't exist, fall back to .praisoninclude
118
+ # for including all relevant files
119
+ if not include_paths:
120
+ include_file = os.path.join(self.directory, '.praisoninclude')
121
+ if os.path.exists(include_file):
122
+ with open(include_file, 'r') as f:
123
+ include_paths.extend(
124
+ line.strip() for line in f
125
+ if line.strip() and not line.startswith('#')
126
+ )
127
+ include_all = True # Include all files along with specified paths
128
+
129
+ return include_paths, include_all
111
130
 
112
131
  def should_ignore(self, file_path):
113
132
  """
@@ -130,61 +149,78 @@ class ContextGatherer:
130
149
  any(file_path.endswith(ext) for ext in self.relevant_extensions)
131
150
 
132
151
  def gather_context(self):
133
- """Gather context from relevant files, respecting ignore patterns and include paths."""
152
+ """
153
+ Gather context from relevant files, respecting ignore patterns
154
+ and include options from .praisoninclude and .praisoncontext.
155
+ """
134
156
  context = []
135
157
  total_files = 0
136
158
  processed_files = 0
159
+ self.include_paths, include_all = self.get_include_paths()
137
160
 
138
- if not self.include_paths:
139
- # No include paths specified, process the entire directory
140
- for root, dirs, files in os.walk(self.directory):
141
- total_files += len(files)
142
- dirs[:] = [d for d in dirs if not self.should_ignore(os.path.join(root, d))]
143
- for file in files:
144
- file_path = os.path.join(root, file)
145
- if not self.should_ignore(file_path) and self.is_relevant_file(file_path):
146
- try:
147
- with open(file_path, 'r', encoding='utf-8') as f:
148
- content = f.read()
149
- context.append(f"File: {file_path}\n\n{content}\n\n{'='*50}\n")
150
- self.included_files.append(Path(file_path).relative_to(self.directory))
151
- except Exception as e:
152
- logger.error(f"Error reading {file_path}: {e}")
153
- processed_files += 1
154
- print(f"\rProcessed {processed_files}/{total_files} files", end="", flush=True)
155
- else:
156
- # Process specified include paths
161
+ def add_file_content(file_path):
162
+ """Helper function to add file content to context."""
163
+ try:
164
+ with open(file_path, 'r', encoding='utf-8') as f:
165
+ content = f.read()
166
+ context.append(
167
+ f"File: {file_path}\n\n{content}\n\n{'=' * 50}\n"
168
+ )
169
+ self.included_files.append(
170
+ Path(file_path).relative_to(self.directory)
171
+ )
172
+ except Exception as e:
173
+ logger.error(f"Error reading {file_path}: {e}")
174
+
175
+ def process_path(path):
176
+ """Helper function to process a single path (file or directory)."""
177
+ nonlocal total_files, processed_files
178
+ if os.path.isdir(path):
179
+ for root, dirs, files in os.walk(path):
180
+ total_files += len(files)
181
+ dirs[:] = [
182
+ d
183
+ for d in dirs
184
+ if not self.should_ignore(os.path.join(root, d))
185
+ ]
186
+ for file in files:
187
+ file_path = os.path.join(root, file)
188
+ if not self.should_ignore(file_path) and self.is_relevant_file(file_path):
189
+ add_file_content(file_path)
190
+ processed_files += 1
191
+ print(
192
+ f"\rProcessed {processed_files}/{total_files} files",
193
+ end="",
194
+ flush=True,
195
+ )
196
+ elif os.path.isfile(path) and self.is_relevant_file(path):
197
+ add_file_content(path)
198
+ processed_files += 1
199
+ print(
200
+ f"\rProcessed {processed_files}/1 files",
201
+ end="",
202
+ flush=True,
203
+ )
204
+
205
+ if include_all:
206
+ # Include ALL relevant files from the entire directory
207
+ process_path(self.directory)
208
+
209
+ # Include files from .praisoninclude specifically
210
+ for include_path in self.include_paths:
211
+ full_path = os.path.join(self.directory, include_path)
212
+ process_path(full_path)
213
+ elif self.include_paths:
214
+ # Include only files specified in .praisoncontext
157
215
  for include_path in self.include_paths:
158
216
  full_path = os.path.join(self.directory, include_path)
159
- if os.path.isdir(full_path):
160
- for root, dirs, files in os.walk(full_path):
161
- total_files += len(files)
162
- dirs[:] = [d for d in dirs if not self.should_ignore(os.path.join(root, d))]
163
- for file in files:
164
- file_path = os.path.join(root, file)
165
- if not self.should_ignore(file_path) and self.is_relevant_file(file_path):
166
- try:
167
- with open(file_path, 'r', encoding='utf-8') as f:
168
- content = f.read()
169
- context.append(f"File: {file_path}\n\n{content}\n\n{'='*50}\n")
170
- self.included_files.append(Path(file_path).relative_to(self.directory))
171
- except Exception as e:
172
- logger.error(f"Error reading {file_path}: {e}")
173
- processed_files += 1
174
- print(f"\rProcessed {processed_files}/{total_files} files", end="", flush=True)
175
- elif os.path.isfile(full_path) and self.is_relevant_file(full_path):
176
- try:
177
- with open(full_path, 'r', encoding='utf-8') as f:
178
- content = f.read()
179
- context.append(f"File: {full_path}\n\n{content}\n\n{'='*50}\n")
180
- self.included_files.append(Path(full_path).relative_to(self.directory))
181
- except Exception as e:
182
- logger.error(f"Error reading {full_path}: {e}")
183
- processed_files += 1
184
- print(f"\rProcessed {processed_files}/{total_files} files", end="", flush=True)
217
+ process_path(full_path)
218
+ else:
219
+ # No include options, process the entire directory
220
+ process_path(self.directory)
185
221
 
186
222
  print() # New line after progress indicator
187
- return '\n'.join(context)
223
+ return "\n".join(context)
188
224
 
189
225
  def count_tokens(self, text):
190
226
  """Count tokens using a simple whitespace-based tokenizer."""
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "PraisonAI"
3
- version = "0.0.57"
3
+ version = "0.0.59"
4
4
  description = "PraisonAI application combines AutoGen and CrewAI or similar frameworks into a low-code solution for building and managing multi-agent LLM systems, focusing on simplicity, customization, and efficient human-agent collaboration."
5
5
  authors = ["Mervin Praison"]
6
6
  license = ""
@@ -20,7 +20,7 @@ pyautogen = ">=0.2.19"
20
20
  crewai = ">=0.32.0"
21
21
  markdown = ">=3.5"
22
22
  praisonai-tools = ">=0.0.7"
23
- pyparsing = ">=3.0.0" # Added to fix a colab issue. Temp Fix.
23
+ pyparsing = ">=3.0.0"
24
24
  chainlit = {version = "^1.1.301", optional = true}
25
25
  gradio = {version = ">=4.26.0", optional = true}
26
26
  flask = {version = ">=3.0.0", optional = true}
@@ -89,6 +89,8 @@ build-backend = "poetry.core.masonry.api"
89
89
 
90
90
  [tool.poetry.scripts]
91
91
  praisonai = "praisonai.__main__:main"
92
+ setup-conda-env = "setup.setup_conda_env:main"
93
+ post-install = "setup.post_install:main"
92
94
 
93
95
  [tool.poetry.extras]
94
96
  ui = ["chainlit"]
@@ -100,4 +102,14 @@ openai = ["langchain-openai"]
100
102
  anthropic = ["langchain-anthropic"]
101
103
  cohere = ["langchain-cohere"]
102
104
  chat = ["chainlit", "litellm", "aiosqlite", "greenlet"]
103
- code = ["chainlit", "litellm", "aiosqlite", "greenlet"]
105
+ code = ["chainlit", "litellm", "aiosqlite", "greenlet"]
106
+ train = ["setup-conda-env"]
107
+
108
+ [tool.poetry-dynamic-versioning]
109
+ enable = true
110
+ vcs = "git"
111
+ style = "semver"
112
+
113
+ [tool.poetry.build]
114
+ generate-setup-file = false
115
+ script = "praisonai/setup/post_install.py"
File without changes
File without changes
File without changes
File without changes