amd-gaia 0.15.2__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gaia/cli.py CHANGED
@@ -144,6 +144,7 @@ def initialize_lemonade_for_agent(
144
144
  "docker": 32768,
145
145
  "talk": 32768,
146
146
  "rag": 32768,
147
+ "sd": 8192, # SD agent needs 8K for image + story workflow
147
148
  "mcp": 4096,
148
149
  "minimal": 4096,
149
150
  "vlm": 8192,
@@ -987,6 +988,62 @@ def main():
987
988
  help="Port for the Blender MCP server (default: 9876)",
988
989
  )
989
990
 
991
+ # Add SD (Stable Diffusion) image generation command
992
+ sd_parser = subparsers.add_parser(
993
+ "sd",
994
+ help="Generate images using Stable Diffusion",
995
+ parents=[parent_parser],
996
+ )
997
+ sd_parser.add_argument(
998
+ "prompt",
999
+ nargs="?",
1000
+ help="Text description of the image to generate",
1001
+ )
1002
+ sd_parser.add_argument(
1003
+ "-i",
1004
+ "--interactive",
1005
+ action="store_true",
1006
+ help="Run in interactive mode",
1007
+ )
1008
+ sd_parser.add_argument(
1009
+ "--sd-model",
1010
+ dest="sd_model",
1011
+ choices=["SD-1.5", "SD-Turbo", "SDXL-Base-1.0", "SDXL-Turbo"],
1012
+ default="SDXL-Turbo",
1013
+ help="SD model: SDXL-Turbo (fast, good quality, default), SD-Turbo (faster but lower quality), SDXL-Base-1.0 (photorealistic, slow)",
1014
+ )
1015
+ sd_parser.add_argument(
1016
+ "--size",
1017
+ choices=["512x512", "768x768", "1024x1024"],
1018
+ help="Image size (auto-selected if not specified: 512px for SD-1.5/Turbo, 1024px for SDXL)",
1019
+ )
1020
+ sd_parser.add_argument(
1021
+ "--steps",
1022
+ type=int,
1023
+ help="Inference steps (auto-selected if not specified: 4 for Turbo, 20 for Base)",
1024
+ )
1025
+ sd_parser.add_argument(
1026
+ "--cfg-scale",
1027
+ dest="cfg_scale",
1028
+ type=float,
1029
+ help="CFG scale (auto-selected if not specified: 1.0 for Turbo, 7.5 for Base)",
1030
+ )
1031
+ sd_parser.add_argument(
1032
+ "--output-dir",
1033
+ default=".gaia/cache/sd/images",
1034
+ help="Directory to save generated images",
1035
+ )
1036
+ sd_parser.add_argument(
1037
+ "--seed",
1038
+ type=int,
1039
+ help="Random seed for reproducibility",
1040
+ )
1041
+ sd_parser.add_argument(
1042
+ "--no-open",
1043
+ action="store_true",
1044
+ help="Skip prompt to open image in viewer (for automation/scripting)",
1045
+ )
1046
+
990
1047
  # Add Jira app command
991
1048
  jira_parser = subparsers.add_parser(
992
1049
  "jira",
@@ -2034,8 +2091,8 @@ Examples:
2034
2091
  "--profile",
2035
2092
  "-p",
2036
2093
  default="chat",
2037
- choices=["minimal", "chat", "code", "rag", "all"],
2038
- help="Profile to initialize: minimal, chat, code, rag, all (default: chat)",
2094
+ choices=["minimal", "sd", "chat", "code", "rag", "all"],
2095
+ help="Profile to initialize: minimal, sd (image gen), chat, code, rag, all (default: chat)",
2039
2096
  )
2040
2097
  init_parser.add_argument(
2041
2098
  "--minimal",
@@ -2047,6 +2104,11 @@ Examples:
2047
2104
  action="store_true",
2048
2105
  help="Skip model downloads (only install Lemonade)",
2049
2106
  )
2107
+ init_parser.add_argument(
2108
+ "--skip-lemonade",
2109
+ action="store_true",
2110
+ help="Skip Lemonade installation check (for CI with pre-installed Lemonade)",
2111
+ )
2050
2112
  init_parser.add_argument(
2051
2113
  "--force-reinstall",
2052
2114
  action="store_true",
@@ -4003,6 +4065,11 @@ Let me know your answer!
4003
4065
  handle_blender_command(args)
4004
4066
  return
4005
4067
 
4068
+ # Handle SD (image generation) command
4069
+ if args.action == "sd":
4070
+ handle_sd_command(args)
4071
+ return
4072
+
4006
4073
  # Handle Jira command
4007
4074
  if args.action == "jira":
4008
4075
  handle_jira_command(args)
@@ -4037,6 +4104,7 @@ Let me know your answer!
4037
4104
  exit_code = run_init(
4038
4105
  profile=profile,
4039
4106
  skip_models=args.skip_models,
4107
+ skip_lemonade=getattr(args, "skip_lemonade", False),
4040
4108
  force_reinstall=args.force_reinstall,
4041
4109
  force_models=args.force_models,
4042
4110
  yes=args.yes,
@@ -4892,6 +4960,178 @@ def handle_visualize_command(args):
4892
4960
  print(f"⚠️ Error stopping server: {e}")
4893
4961
 
4894
4962
 
4963
+ def handle_sd_command(args):
4964
+ """
4965
+ Handle the SD (Stable Diffusion) image generation command.
4966
+
4967
+ Args:
4968
+ args: Parsed command line arguments for the sd command
4969
+ """
4970
+ # No prompt and not interactive - show help (no server needed)
4971
+ if not args.prompt and not args.interactive:
4972
+ print("Usage: gaia sd <prompt> [options]")
4973
+ print(" gaia sd -i (interactive mode)")
4974
+ print()
4975
+ print("Examples:")
4976
+ print(' gaia sd "a sunset over mountains"')
4977
+ print(' gaia sd "cyberpunk city" --sd-model SDXL-Turbo --size 1024x1024')
4978
+ print(" gaia sd -i")
4979
+ return
4980
+
4981
+ from gaia.agents.sd import SDAgent, SDAgentConfig
4982
+
4983
+ # Ensure Lemonade is ready with proper context size for SD agent
4984
+ # SD agent needs 8K context for image + story workflow
4985
+ success, _ = initialize_lemonade_for_agent(
4986
+ agent="sd",
4987
+ use_claude=getattr(args, "use_claude", False),
4988
+ use_chatgpt=getattr(args, "use_chatgpt", False),
4989
+ quiet=False,
4990
+ )
4991
+
4992
+ if not success and not (
4993
+ getattr(args, "use_claude", False) or getattr(args, "use_chatgpt", False)
4994
+ ):
4995
+ print("Failed to initialize Lemonade Server with required 8K context.")
4996
+ print("Try: lemonade-server serve --ctx-size 8192")
4997
+ sys.exit(1)
4998
+
4999
+ # Create config - ensure LLM model is set
5000
+ llm_model = getattr(args, "model", None)
5001
+ if not llm_model:
5002
+ llm_model = "Qwen3-8B-GGUF" # Default LLM for prompt enhancement
5003
+
5004
+ config = SDAgentConfig(
5005
+ sd_model=args.sd_model,
5006
+ output_dir=args.output_dir,
5007
+ prompt_to_open=not args.no_open,
5008
+ show_stats=getattr(args, "stats", False),
5009
+ use_claude=getattr(args, "use_claude", False),
5010
+ use_chatgpt=getattr(args, "use_chatgpt", False),
5011
+ base_url=getattr(args, "base_url", "http://localhost:8000/api/v1"),
5012
+ model_id=llm_model,
5013
+ )
5014
+
5015
+ # Create agent with LLM prompt enhancement
5016
+ agent = SDAgent(config)
5017
+
5018
+ # Check health
5019
+ health = agent.sd_health_check()
5020
+ if health["status"] != "healthy":
5021
+ print(f"Error: {health.get('error', 'SD endpoint unavailable')}")
5022
+ print("Make sure Lemonade Server is running and SD model is available:")
5023
+ print(" lemonade-server serve")
5024
+ print(" lemonade-server pull SD-Turbo")
5025
+ sys.exit(1)
5026
+
5027
+ print()
5028
+ print("=" * 80)
5029
+ print(f"🖼️ SD Image Generator - {args.sd_model}")
5030
+ print("=" * 80)
5031
+ print("LLM-powered prompt enhancement for better image quality")
5032
+ print(f"Output: {args.output_dir}")
5033
+ if not args.no_open:
5034
+ print("You'll be prompted to open images after generation")
5035
+ print("=" * 80)
5036
+ print()
5037
+
5038
+ # Interactive mode
5039
+ if args.interactive:
5040
+ print("Type 'quit' to exit.")
5041
+ print()
5042
+
5043
+ while True:
5044
+ try:
5045
+ user_prompt = input("You: ").strip()
5046
+ if not user_prompt:
5047
+ continue
5048
+ if user_prompt.lower() in ("quit", "exit", "q"):
5049
+ print("Goodbye!")
5050
+ break
5051
+
5052
+ # Track images before this query
5053
+ initial_count = len(agent.sd_generations)
5054
+
5055
+ # Use agent.process_query() for LLM enhancement
5056
+ result = agent.process_query(user_prompt)
5057
+ if result.get("final_answer"):
5058
+ print(f"\nAgent: {result['final_answer']}\n")
5059
+ else:
5060
+ print("\nAgent: Generation complete\n")
5061
+
5062
+ # Prompt to open image(s) after agent completes
5063
+ if not args.no_open and result.get("status") != "error":
5064
+ try:
5065
+ # Get all newly generated images from this query
5066
+ new_images = agent.sd_generations[initial_count:]
5067
+
5068
+ if new_images:
5069
+ num_images = len(new_images)
5070
+ prompt_text = (
5071
+ f"Open {num_images} images in default viewer? [Y/n]: "
5072
+ if num_images > 1
5073
+ else "Open image in default viewer? [Y/n]: "
5074
+ )
5075
+ response = input(prompt_text).strip().lower()
5076
+
5077
+ if response in ("", "y", "yes"):
5078
+ for img in new_images:
5079
+ path = img["image_path"]
5080
+ if sys.platform == "win32":
5081
+ os.startfile(path) # pylint: disable=no-member
5082
+ elif sys.platform == "darwin":
5083
+ subprocess.run(["open", path], check=False)
5084
+ else:
5085
+ subprocess.run(["xdg-open", path], check=False)
5086
+ plural = "s" if num_images > 1 else ""
5087
+ print(f"[{num_images} image{plural} opened]\n")
5088
+ except (KeyboardInterrupt, EOFError):
5089
+ pass
5090
+
5091
+ except KeyboardInterrupt:
5092
+ print("\nGoodbye!")
5093
+ break
5094
+
5095
+ # Single prompt mode
5096
+ else:
5097
+ # Track images before this command
5098
+ initial_count = len(agent.sd_generations)
5099
+
5100
+ # Use agent.process_query() for LLM enhancement
5101
+ result = agent.process_query(args.prompt)
5102
+ if result.get("final_answer"):
5103
+ print(f"\n{result['final_answer']}\n")
5104
+
5105
+ # Prompt to open image(s) after agent completes
5106
+ if not args.no_open and result.get("status") != "error":
5107
+ try:
5108
+ # Get all newly generated images from this command
5109
+ new_images = agent.sd_generations[initial_count:]
5110
+
5111
+ if new_images:
5112
+ num_images = len(new_images)
5113
+ prompt_text = (
5114
+ f"Open {num_images} images in default viewer? [Y/n]: "
5115
+ if num_images > 1
5116
+ else "Open image in default viewer? [Y/n]: "
5117
+ )
5118
+ response = input(prompt_text).strip().lower()
5119
+
5120
+ if response in ("", "y", "yes"):
5121
+ for img in new_images:
5122
+ path = img["image_path"]
5123
+ if sys.platform == "win32":
5124
+ os.startfile(path) # pylint: disable=no-member
5125
+ elif sys.platform == "darwin":
5126
+ subprocess.run(["open", path], check=False)
5127
+ else:
5128
+ subprocess.run(["xdg-open", path], check=False)
5129
+ plural = "s" if num_images > 1 else ""
5130
+ print(f"[{num_images} image{plural} opened]\n")
5131
+ except (KeyboardInterrupt, EOFError):
5132
+ pass
5133
+
5134
+
4895
5135
  def handle_blender_command(args):
4896
5136
  """
4897
5137
  Handle the Blender agent command.