prompture 0.0.32.dev1__py3-none-any.whl → 0.0.33.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
prompture/__init__.py CHANGED
@@ -13,7 +13,7 @@ from .core import (
13
13
  extract_from_pandas,
14
14
  render_output,
15
15
  )
16
- from .drivers import get_driver, get_driver_for_model, OpenAIDriver, LocalHTTPDriver, OllamaDriver, ClaudeDriver, LMStudioDriver, AzureDriver, GoogleDriver, GroqDriver, OpenRouterDriver, GrokDriver
16
+ from .drivers import get_driver, get_driver_for_model, OpenAIDriver, LocalHTTPDriver, OllamaDriver, ClaudeDriver, LMStudioDriver, AzureDriver, GoogleDriver, GroqDriver, OpenRouterDriver, GrokDriver, AirLLMDriver
17
17
  from .tools import clean_json_text, clean_toon_text
18
18
  from .field_definitions import (
19
19
  FIELD_DEFINITIONS, get_field_definition, get_required_fields, get_field_names,
@@ -87,6 +87,7 @@ __all__ = [
87
87
  "GroqDriver",
88
88
  "OpenRouterDriver",
89
89
  "GrokDriver",
90
+ "AirLLMDriver",
90
91
  # Discovery
91
92
  "get_available_models",
92
93
  ]
@@ -8,6 +8,7 @@ from .google_driver import GoogleDriver
8
8
  from .groq_driver import GroqDriver
9
9
  from .openrouter_driver import OpenRouterDriver
10
10
  from .grok_driver import GrokDriver
11
+ from .airllm_driver import AirLLMDriver
11
12
  from ..settings import settings
12
13
 
13
14
 
@@ -54,6 +55,10 @@ DRIVER_REGISTRY = {
54
55
  api_key=settings.grok_api_key,
55
56
  model=model or settings.grok_model
56
57
  ),
58
+ "airllm": lambda model=None: AirLLMDriver(
59
+ model=model or settings.airllm_model,
60
+ compression=settings.airllm_compression,
61
+ ),
57
62
  }
58
63
 
59
64
 
@@ -115,6 +120,7 @@ __all__ = [
115
120
  "GroqDriver",
116
121
  "OpenRouterDriver",
117
122
  "GrokDriver",
123
+ "AirLLMDriver",
118
124
  "get_driver",
119
125
  "get_driver_for_model",
120
126
  ]
@@ -0,0 +1,116 @@
1
+ import logging
2
+ from ..driver import Driver
3
+ from typing import Any, Dict, Optional
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class AirLLMDriver(Driver):
9
+ """Driver for AirLLM — run large models (70B+) on consumer GPUs via
10
+ layer-by-layer memory management.
11
+
12
+ The ``airllm`` package is a lazy dependency: it is imported on first
13
+ ``generate()`` call so the rest of Prompture works without it installed.
14
+ """
15
+
16
+ MODEL_PRICING = {
17
+ "default": {"prompt": 0.0, "completion": 0.0}
18
+ }
19
+
20
+ def __init__(self, model: str = "meta-llama/Llama-2-7b-hf",
21
+ compression: Optional[str] = None):
22
+ """
23
+ Args:
24
+ model: HuggingFace repo ID (e.g. ``"meta-llama/Llama-2-70b-hf"``).
25
+ compression: Optional quantization mode — ``"4bit"`` or ``"8bit"``.
26
+ """
27
+ self.model = model
28
+ self.compression = compression
29
+ self.options: Dict[str, Any] = {}
30
+ self._llm = None
31
+ self._tokenizer = None
32
+
33
+ # ------------------------------------------------------------------
34
+ # Lazy model loading
35
+ # ------------------------------------------------------------------
36
+ def _ensure_loaded(self):
37
+ """Load the AirLLM model and tokenizer on first use."""
38
+ if self._llm is not None:
39
+ return
40
+
41
+ try:
42
+ from airllm import AutoModel
43
+ except ImportError:
44
+ raise ImportError(
45
+ "The 'airllm' package is required for the AirLLM driver. "
46
+ "Install it with: pip install prompture[airllm]"
47
+ )
48
+
49
+ try:
50
+ from transformers import AutoTokenizer
51
+ except ImportError:
52
+ raise ImportError(
53
+ "The 'transformers' package is required for the AirLLM driver. "
54
+ "Install it with: pip install transformers"
55
+ )
56
+
57
+ logger.info(f"Loading AirLLM model: {self.model} "
58
+ f"(compression={self.compression})")
59
+
60
+ load_kwargs: Dict[str, Any] = {}
61
+ if self.compression:
62
+ load_kwargs["compression"] = self.compression
63
+
64
+ self._llm = AutoModel.from_pretrained(self.model, **load_kwargs)
65
+ self._tokenizer = AutoTokenizer.from_pretrained(self.model)
66
+ logger.info("AirLLM model loaded successfully")
67
+
68
+ # ------------------------------------------------------------------
69
+ # Driver interface
70
+ # ------------------------------------------------------------------
71
+ def generate(self, prompt: str, options: Dict[str, Any] = None) -> Dict[str, Any]:
72
+ self._ensure_loaded()
73
+
74
+ merged_options = self.options.copy()
75
+ if options:
76
+ merged_options.update(options)
77
+
78
+ max_new_tokens = merged_options.get("max_new_tokens", 256)
79
+
80
+ # Tokenize
81
+ input_ids = self._tokenizer(
82
+ prompt, return_tensors="pt"
83
+ ).input_ids
84
+
85
+ prompt_tokens = input_ids.shape[1]
86
+
87
+ logger.debug(f"AirLLM generating with max_new_tokens={max_new_tokens}, "
88
+ f"prompt_tokens={prompt_tokens}")
89
+
90
+ # Generate
91
+ output_ids = self._llm.generate(
92
+ input_ids,
93
+ max_new_tokens=max_new_tokens,
94
+ )
95
+
96
+ # Decode only the newly generated tokens (strip the prompt prefix)
97
+ new_tokens = output_ids[0, prompt_tokens:]
98
+ completion_tokens = len(new_tokens)
99
+ text = self._tokenizer.decode(new_tokens, skip_special_tokens=True)
100
+
101
+ total_tokens = prompt_tokens + completion_tokens
102
+
103
+ meta = {
104
+ "prompt_tokens": prompt_tokens,
105
+ "completion_tokens": completion_tokens,
106
+ "total_tokens": total_tokens,
107
+ "cost": 0.0,
108
+ "raw_response": {
109
+ "model": self.model,
110
+ "compression": self.compression,
111
+ "max_new_tokens": max_new_tokens,
112
+ },
113
+ "model_name": self.model,
114
+ }
115
+
116
+ return {"text": text, "meta": meta}
prompture/settings.py CHANGED
@@ -48,6 +48,10 @@ class Settings(BaseSettings):
48
48
  grok_api_key: Optional[str] = None
49
49
  grok_model: str = "grok-4-fast-reasoning"
50
50
 
51
+ # AirLLM
52
+ airllm_model: str = "meta-llama/Llama-2-7b-hf"
53
+ airllm_compression: Optional[str] = None # "4bit" or "8bit"
54
+
51
55
  model_config = SettingsConfigDict(
52
56
  env_file=".env",
53
57
  extra="ignore",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: prompture
3
- Version: 0.0.32.dev1
3
+ Version: 0.0.33.dev1
4
4
  Summary: Ask LLMs to return structured JSON and run cross-model tests. API-first.
5
5
  Home-page: https://github.com/jhd3197/prompture
6
6
  Author: Juan Denis
@@ -29,6 +29,8 @@ Requires-Dist: tukuy>=0.0.6
29
29
  Requires-Dist: pyyaml>=6.0
30
30
  Provides-Extra: test
31
31
  Requires-Dist: pytest>=7.0; extra == "test"
32
+ Provides-Extra: airllm
33
+ Requires-Dist: airllm>=2.8.0; extra == "airllm"
32
34
  Dynamic: author
33
35
  Dynamic: author-email
34
36
  Dynamic: classifier
@@ -1,14 +1,15 @@
1
- prompture/__init__.py,sha256=kCcOseMTHaJkl-vtzXVbbBdWRQlIWWBr-C-l9E2mScU,2689
1
+ prompture/__init__.py,sha256=Bs3nDZWEl9tFrS-zH7RThl_svKwPM47Q8bIuD8qKlGM,2723
2
2
  prompture/cli.py,sha256=vA86GNjtKSHz8eRMl5YDaT9HHIWuhkeJtfx8jqTaqtM,809
3
3
  prompture/core.py,sha256=x_FhOY37ygQVHo4zHUyiWsV4BuOClkELsVhEV-K4jJ0,53689
4
4
  prompture/discovery.py,sha256=qQ7Quz0Tqo0f2h9DqMlV7RqMP4XOeue_ZwzXq4bf6B8,6788
5
5
  prompture/driver.py,sha256=w8pdXHujImIGF3ee8rkG8f6-UD0h2jLHhucSPInRrYI,989
6
6
  prompture/field_definitions.py,sha256=6kDMYNedccTK5l2L_I8_NI3_av-iYHqGPwkKDy8214c,21731
7
7
  prompture/runner.py,sha256=5xwal3iBQQj4_q7l3Rjr0e3RrUMJPaPDLiEchO0mmHo,4192
8
- prompture/settings.py,sha256=vHRkBAZNP6yRsI2Sm4FMa_FCw0Zxy2VX97ooiVYWvks,1500
8
+ prompture/settings.py,sha256=-S1AJa2AUblwGyzY1lOvEI_ceNRwgPeGvJYsbGulwv4,1630
9
9
  prompture/tools.py,sha256=qyT8oJl_v9GolABkflW0SvEx22yNkEJZKTu-40nJbs0,40329
10
10
  prompture/validator.py,sha256=oLzVsNveHuF-N_uOd11_uDa9Q5rFyo0wrk_l1N4zqDk,996
11
- prompture/drivers/__init__.py,sha256=IQ7DsWC_FP45h2CprWRhQ7lKi3-9ZO6CgweNX6IxTUA,3896
11
+ prompture/drivers/__init__.py,sha256=hi2u4Z2KQFfgqce1QvjRlDKRzB2xfJZpidGNMsQ82oI,4105
12
+ prompture/drivers/airllm_driver.py,sha256=g1WmQDwSfK0BIyG96JrZY7W_VHXOS7wDSeegE7B1q4Y,3956
12
13
  prompture/drivers/azure_driver.py,sha256=GROhK3hqMfMurnEgpAawa1DPS-FhOU0YQcgy9SNGTzM,4622
13
14
  prompture/drivers/claude_driver.py,sha256=ZEHQNqNThLZ0p-WmGVuKiNyiudGYGP07xIzbgZhLY1g,3293
14
15
  prompture/drivers/google_driver.py,sha256=bCsCSuCRise0L_HOmw-jBh1hrpd8glNBkVFlOZeP0DM,6338
@@ -20,9 +21,9 @@ prompture/drivers/local_http_driver.py,sha256=S2diikvtQOQHF7fB07zU2X0QWkej4Of__r
20
21
  prompture/drivers/ollama_driver.py,sha256=fq_eFgwmCT3SK1D-ICHjxLjcm_An0suwkFIWC38xsS0,4681
21
22
  prompture/drivers/openai_driver.py,sha256=9q9OjQslquRFvIl1Hd9JVmFFFVh6OBIWrFulw1mkYWg,3976
22
23
  prompture/drivers/openrouter_driver.py,sha256=GKvLOFDhsyopH-k3iaD3VWllm7xbGuopRSA02MfCKoM,5031
23
- prompture-0.0.32.dev1.dist-info/licenses/LICENSE,sha256=0HgDepH7aaHNFhHF-iXuW6_GqDfYPnVkjtiCAZ4yS8I,1060
24
- prompture-0.0.32.dev1.dist-info/METADATA,sha256=3oNb4hhkYR7ZuLsrG5wrRxJjbuLnazBaQHKaW2yAM0Y,18043
25
- prompture-0.0.32.dev1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
- prompture-0.0.32.dev1.dist-info/entry_points.txt,sha256=AFPG3lJR86g4IJMoWQUW5Ph7G6MLNWG3A2u2Tp9zkp8,48
27
- prompture-0.0.32.dev1.dist-info/top_level.txt,sha256=to86zq_kjfdoLeAxQNr420UWqT0WzkKoZ509J7Qr2t4,10
28
- prompture-0.0.32.dev1.dist-info/RECORD,,
24
+ prompture-0.0.33.dev1.dist-info/licenses/LICENSE,sha256=0HgDepH7aaHNFhHF-iXuW6_GqDfYPnVkjtiCAZ4yS8I,1060
25
+ prompture-0.0.33.dev1.dist-info/METADATA,sha256=1W2vCL_ZzJvAcxi78mbYfdaIb__mudc44Ryb_b0GlIU,18114
26
+ prompture-0.0.33.dev1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
27
+ prompture-0.0.33.dev1.dist-info/entry_points.txt,sha256=AFPG3lJR86g4IJMoWQUW5Ph7G6MLNWG3A2u2Tp9zkp8,48
28
+ prompture-0.0.33.dev1.dist-info/top_level.txt,sha256=to86zq_kjfdoLeAxQNr420UWqT0WzkKoZ509J7Qr2t4,10
29
+ prompture-0.0.33.dev1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5