prompture 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Juan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.4
2
+ Name: prompture
3
+ Version: 0.0.1
4
+ Summary: Ask LLMs to return structured JSON and run cross-model tests. API-first.
5
+ Home-page: https://github.com/jhd3197/prompture
6
+ Author: Juan Denis
7
+ Author-email: juan@vene.co
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: requests>=2.28
15
+ Requires-Dist: jsonschema>=4.0
16
+ Requires-Dist: pydantic>=1.10
17
+ Requires-Dist: pydantic-settings>=2.0
18
+ Requires-Dist: click>=8.0
19
+ Provides-Extra: test
20
+ Requires-Dist: pytest>=7.0; extra == "test"
21
+ Dynamic: author
22
+ Dynamic: author-email
23
+ Dynamic: classifier
24
+ Dynamic: description
25
+ Dynamic: description-content-type
26
+ Dynamic: home-page
27
+ Dynamic: license-file
28
+ Dynamic: provides-extra
29
+ Dynamic: requires-dist
30
+ Dynamic: requires-python
31
+ Dynamic: summary
32
+
33
+ # Prompture
34
+
35
+ `Prompture` is an API-first library for requesting structured **JSON** output from LLMs (or any structure), validating it against a schema, and running comparative tests between models.
36
+
37
+ ## ✨ Features
38
+
39
+ - ✅ **Structured Output**: Request models to return JSON only
40
+ - ✅ **Validation**: Automatic validation with `jsonschema`
41
+ - ✅ **Multi-driver**: Run the same specification against multiple drivers (OpenAI, Ollama, HTTP, mock)
42
+ - ✅ **Reports**: Generate JSON reports with results
43
+ - ✅ **Usage Tracking**: **NEW** - Automatic token and cost monitoring for all calls
44
+
45
+ ## 🆕 Token and Cost Tracking (New)
46
+
47
+ Starting with this version, `extract_and_jsonify` and `ask_for_json` automatically include token usage and cost information:
48
+
49
+ ```python
50
+ from prompture import extract_and_jsonify
51
+ from prompture.drivers import OllamaDriver
52
+
53
+ driver = OllamaDriver(endpoint="http://localhost:11434/api/generate", model="gemma3")
54
+ result = extract_and_jsonify(driver, "Text to process", json_schema)
55
+
56
+ # Now returns both the response and usage information
57
+ json_output = result["json_string"]
58
+ usage = result["usage"]
59
+
60
+ print(f"Tokens used: {usage['total_tokens']}")
61
+ print(f"Cost: ${usage['cost']:.6f}")
62
+ ```
63
+
64
+ ### Return Structure
65
+
66
+ The main functions now return:
67
+ ```python
68
+ {
69
+ "json_string": str, # The original JSON string
70
+ "json_object": dict, # The parsed JSON object
71
+ "usage": {
72
+ "prompt_tokens": int,
73
+ "completion_tokens": int,
74
+ "total_tokens": int,
75
+ "cost": float # Cost in USD (0.0 for free models)
76
+ }
77
+ }
78
+ ```
79
+
80
+ ### Supported Drivers
81
+
82
+ - **OllamaDriver**: Cost = $0.00 (free local models)
83
+ - **OpenAIDriver**: Cost automatically calculated based on the model
84
+
85
+ ## Batch Running and Testing Prompts
86
+
87
+ `run_suite_from_spec` enables you to define and run test suites against multiple models using a specification file. This powerful feature allows you to systematically test and compare different models using a consistent set of prompts and validation criteria. Here's how it works:
88
+
89
+ ```python
90
+ from prompture import run_suite_from_spec
91
+ from prompture.drivers import MockDriver
92
+
93
+ spec = {
94
+ "meta": {"project": "test"},
95
+ "models": [{"id": "mock1", "driver": "mock", "options": {}}],
96
+ "tests": [
97
+ {
98
+ "id": "t1",
99
+ "prompt_template": "Extract user info: '{text}'",
100
+ "inputs": [{"text": "Juan is 28 and lives in Miami. He likes basketball and coding."}],
101
+ "schema": {"type": "object", "required": ["name", "interests"]}
102
+ }
103
+ ]
104
+ }
105
+ drivers = {"mock": MockDriver()}
106
+ report = run_suite_from_spec(spec, drivers)
107
+ print(report)
108
+ ```
109
+
110
+ The generated report includes comprehensive results for each test, model, and input combination:
111
+ - Validation status for each response
112
+ - Usage statistics (tokens, costs) per model
113
+ - Execution times
114
+ - Generated JSON responses
115
+
116
+ ## Quick Usage (example):
117
+
118
+ ```py
119
+ from prompture import run_suite_from_spec, drivers
120
+ spec = { ... }
121
+ report = run_suite_from_spec(spec, drivers={"mock": drivers.MockDriver()})
122
+ print(report)
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.4
2
+ Name: prompture
3
+ Version: 0.0.1
4
+ Summary: Ask LLMs to return structured JSON and run cross-model tests. API-first.
5
+ Home-page: https://github.com/jhd3197/prompture
6
+ Author: Juan Denis
7
+ Author-email: juan@vene.co
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: requests>=2.28
15
+ Requires-Dist: jsonschema>=4.0
16
+ Requires-Dist: pydantic>=1.10
17
+ Requires-Dist: pydantic-settings>=2.0
18
+ Requires-Dist: click>=8.0
19
+ Provides-Extra: test
20
+ Requires-Dist: pytest>=7.0; extra == "test"
21
+ Dynamic: author
22
+ Dynamic: author-email
23
+ Dynamic: classifier
24
+ Dynamic: description
25
+ Dynamic: description-content-type
26
+ Dynamic: home-page
27
+ Dynamic: license-file
28
+ Dynamic: provides-extra
29
+ Dynamic: requires-dist
30
+ Dynamic: requires-python
31
+ Dynamic: summary
32
+
33
+ # Prompture
34
+
35
+ `Prompture` is an API-first library for requesting structured **JSON** output from LLMs (or any structure), validating it against a schema, and running comparative tests between models.
36
+
37
+ ## ✨ Features
38
+
39
+ - ✅ **Structured Output**: Request models to return JSON only
40
+ - ✅ **Validation**: Automatic validation with `jsonschema`
41
+ - ✅ **Multi-driver**: Run the same specification against multiple drivers (OpenAI, Ollama, HTTP, mock)
42
+ - ✅ **Reports**: Generate JSON reports with results
43
+ - ✅ **Usage Tracking**: **NEW** - Automatic token and cost monitoring for all calls
44
+
45
+ ## 🆕 Token and Cost Tracking (New)
46
+
47
+ Starting with this version, `extract_and_jsonify` and `ask_for_json` automatically include token usage and cost information:
48
+
49
+ ```python
50
+ from prompture import extract_and_jsonify
51
+ from prompture.drivers import OllamaDriver
52
+
53
+ driver = OllamaDriver(endpoint="http://localhost:11434/api/generate", model="gemma3")
54
+ result = extract_and_jsonify(driver, "Text to process", json_schema)
55
+
56
+ # Now returns both the response and usage information
57
+ json_output = result["json_string"]
58
+ usage = result["usage"]
59
+
60
+ print(f"Tokens used: {usage['total_tokens']}")
61
+ print(f"Cost: ${usage['cost']:.6f}")
62
+ ```
63
+
64
+ ### Return Structure
65
+
66
+ The main functions now return:
67
+ ```python
68
+ {
69
+ "json_string": str, # The original JSON string
70
+ "json_object": dict, # The parsed JSON object
71
+ "usage": {
72
+ "prompt_tokens": int,
73
+ "completion_tokens": int,
74
+ "total_tokens": int,
75
+ "cost": float # Cost in USD (0.0 for free models)
76
+ }
77
+ }
78
+ ```
79
+
80
+ ### Supported Drivers
81
+
82
+ - **OllamaDriver**: Cost = $0.00 (free local models)
83
+ - **OpenAIDriver**: Cost automatically calculated based on the model
84
+
85
+ ## Batch Running and Testing Prompts
86
+
87
+ `run_suite_from_spec` enables you to define and run test suites against multiple models using a specification file. This powerful feature allows you to systematically test and compare different models using a consistent set of prompts and validation criteria. Here's how it works:
88
+
89
+ ```python
90
+ from prompture import run_suite_from_spec
91
+ from prompture.drivers import MockDriver
92
+
93
+ spec = {
94
+ "meta": {"project": "test"},
95
+ "models": [{"id": "mock1", "driver": "mock", "options": {}}],
96
+ "tests": [
97
+ {
98
+ "id": "t1",
99
+ "prompt_template": "Extract user info: '{text}'",
100
+ "inputs": [{"text": "Juan is 28 and lives in Miami. He likes basketball and coding."}],
101
+ "schema": {"type": "object", "required": ["name", "interests"]}
102
+ }
103
+ ]
104
+ }
105
+ drivers = {"mock": MockDriver()}
106
+ report = run_suite_from_spec(spec, drivers)
107
+ print(report)
108
+ ```
109
+
110
+ The generated report includes comprehensive results for each test, model, and input combination:
111
+ - Validation status for each response
112
+ - Usage statistics (tokens, costs) per model
113
+ - Execution times
114
+ - Generated JSON responses
115
+
116
+ ## Quick Usage (example):
117
+
118
+ ```py
119
+ from prompture import run_suite_from_spec, drivers
120
+ spec = { ... }
121
+ report = run_suite_from_spec(spec, drivers={"mock": drivers.MockDriver()})
122
+ print(report)
@@ -0,0 +1,31 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ Prompture.egg-info/PKG-INFO
6
+ Prompture.egg-info/SOURCES.txt
7
+ Prompture.egg-info/dependency_links.txt
8
+ Prompture.egg-info/entry_points.txt
9
+ Prompture.egg-info/requires.txt
10
+ Prompture.egg-info/top_level.txt
11
+ prompture/__init__.py
12
+ prompture/cli.py
13
+ prompture/core.py
14
+ prompture/runner.py
15
+ prompture/settings.py
16
+ prompture/validator.py
17
+ prompture.egg-info/PKG-INFO
18
+ prompture.egg-info/SOURCES.txt
19
+ prompture.egg-info/dependency_links.txt
20
+ prompture.egg-info/entry_points.txt
21
+ prompture.egg-info/requires.txt
22
+ prompture.egg-info/top_level.txt
23
+ prompture/drivers/__init__.py
24
+ prompture/drivers/local_http_driver.py
25
+ prompture/drivers/mock_driver.py
26
+ prompture/drivers/ollama_driver.py
27
+ prompture/drivers/openai_driver.py
28
+ tests/test_cli.py
29
+ tests/test_core.py
30
+ tests/test_drivers.py
31
+ tests/test_runner.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ prompture = prompture.cli:cli
@@ -0,0 +1,8 @@
1
+ requests>=2.28
2
+ jsonschema>=4.0
3
+ pydantic>=1.10
4
+ pydantic-settings>=2.0
5
+ click>=8.0
6
+
7
+ [test]
8
+ pytest>=7.0
@@ -0,0 +1 @@
1
+ prompture
@@ -0,0 +1,90 @@
1
+ # Prompture
2
+
3
+ `Prompture` is an API-first library for requesting structured **JSON** output from LLMs (or any structure), validating it against a schema, and running comparative tests between models.
4
+
5
+ ## ✨ Features
6
+
7
+ - ✅ **Structured Output**: Request models to return JSON only
8
+ - ✅ **Validation**: Automatic validation with `jsonschema`
9
+ - ✅ **Multi-driver**: Run the same specification against multiple drivers (OpenAI, Ollama, HTTP, mock)
10
+ - ✅ **Reports**: Generate JSON reports with results
11
+ - ✅ **Usage Tracking**: **NEW** - Automatic token and cost monitoring for all calls
12
+
13
+ ## 🆕 Token and Cost Tracking (New)
14
+
15
+ Starting with this version, `extract_and_jsonify` and `ask_for_json` automatically include token usage and cost information:
16
+
17
+ ```python
18
+ from prompture import extract_and_jsonify
19
+ from prompture.drivers import OllamaDriver
20
+
21
+ driver = OllamaDriver(endpoint="http://localhost:11434/api/generate", model="gemma3")
22
+ result = extract_and_jsonify(driver, "Text to process", json_schema)
23
+
24
+ # Now returns both the response and usage information
25
+ json_output = result["json_string"]
26
+ usage = result["usage"]
27
+
28
+ print(f"Tokens used: {usage['total_tokens']}")
29
+ print(f"Cost: ${usage['cost']:.6f}")
30
+ ```
31
+
32
+ ### Return Structure
33
+
34
+ The main functions now return:
35
+ ```python
36
+ {
37
+ "json_string": str, # The original JSON string
38
+ "json_object": dict, # The parsed JSON object
39
+ "usage": {
40
+ "prompt_tokens": int,
41
+ "completion_tokens": int,
42
+ "total_tokens": int,
43
+ "cost": float # Cost in USD (0.0 for free models)
44
+ }
45
+ }
46
+ ```
47
+
48
+ ### Supported Drivers
49
+
50
+ - **OllamaDriver**: Cost = $0.00 (free local models)
51
+ - **OpenAIDriver**: Cost automatically calculated based on the model
52
+
53
+ ## Batch Running and Testing Prompts
54
+
55
+ `run_suite_from_spec` enables you to define and run test suites against multiple models using a specification file. This powerful feature allows you to systematically test and compare different models using a consistent set of prompts and validation criteria. Here's how it works:
56
+
57
+ ```python
58
+ from prompture import run_suite_from_spec
59
+ from prompture.drivers import MockDriver
60
+
61
+ spec = {
62
+ "meta": {"project": "test"},
63
+ "models": [{"id": "mock1", "driver": "mock", "options": {}}],
64
+ "tests": [
65
+ {
66
+ "id": "t1",
67
+ "prompt_template": "Extract user info: '{text}'",
68
+ "inputs": [{"text": "Juan is 28 and lives in Miami. He likes basketball and coding."}],
69
+ "schema": {"type": "object", "required": ["name", "interests"]}
70
+ }
71
+ ]
72
+ }
73
+ drivers = {"mock": MockDriver()}
74
+ report = run_suite_from_spec(spec, drivers)
75
+ print(report)
76
+ ```
77
+
78
+ The generated report includes comprehensive results for each test, model, and input combination:
79
+ - Validation status for each response
80
+ - Usage statistics (tokens, costs) per model
81
+ - Execution times
82
+ - Generated JSON responses
83
+
84
+ ## Quick Usage (example):
85
+
86
+ ```py
87
+ from prompture import run_suite_from_spec, drivers
88
+ spec = { ... }
89
+ report = run_suite_from_spec(spec, drivers={"mock": drivers.MockDriver()})
90
+ print(report)
@@ -0,0 +1,8 @@
1
+ """prompture - API package to convert LLM outputs into JSON + test harness."""
2
+
3
+ from .core import ask_for_json, extract_and_jsonify, Driver, clean_json_text, clean_json_text_with_ai
4
+ from .runner import run_suite_from_spec
5
+ from .validator import validate_against_schema
6
+
7
+ __all__ = ["ask_for_json", "extract_and_jsonify", "run_suite_from_spec", "validate_against_schema", "Driver", "clean_json_text", "clean_json_text_with_ai"]
8
+ __version__ = "0.0.1"
@@ -0,0 +1,23 @@
1
+ import json
2
+ import click
3
+ from .runner import run_suite_from_spec
4
+
5
+ @click.group()
6
+ def cli():
7
+ """CLI simple para correr specs JSON"""
8
+ pass
9
+
10
+ @cli.command()
11
+ @click.argument("specfile", type=click.Path(exists=True))
12
+ @click.argument("outfile", type=click.Path())
13
+ def run(specfile, outfile):
14
+ """Run a spec JSON and save report."""
15
+ with open(specfile, "r", encoding="utf-8") as fh:
16
+ spec = json.load(fh)
17
+ # drivers mínimos: mock disponible por defecto
18
+ from .drivers import MockDriver
19
+ drivers = {"mock": MockDriver()}
20
+ report = run_suite_from_spec(spec, drivers)
21
+ with open(outfile, "w", encoding="utf-8") as fh:
22
+ json.dump(report, fh, indent=2, ensure_ascii=False)
23
+ click.echo(f"Report saved to {outfile}")
@@ -0,0 +1,172 @@
1
+ """Core utilities: Driver base class y helper para pedir JSON al LLM.
2
+ Comentarios en español.
3
+ """
4
+ from __future__ import annotations
5
+ import json
6
+ from typing import Any, Dict, Optional
7
+
8
+ class Driver:
9
+ """Adapter base. Implementar generate(prompt, options) -> {"text": ... , "meta": {...}}
10
+
11
+ The 'meta' object in the response should have a standardized structure:
12
+
13
+ {
14
+ "prompt_tokens": int, # Number of tokens in the prompt
15
+ "completion_tokens": int, # Number of tokens in the completion
16
+ "total_tokens": int, # Total tokens used (prompt + completion)
17
+ "cost": float, # Cost in USD (0.0 for free models)
18
+ "raw_response": dict # Raw response from LLM provider
19
+ }
20
+
21
+ All drivers must populate these fields. The 'raw_response' field can contain
22
+ additional provider-specific metadata while the core fields provide
23
+ standardized access to token usage and cost information.
24
+ """
25
+ def generate(self, prompt: str, options: Dict[str,Any]) -> Dict[str,Any]:
26
+ raise NotImplementedError
27
+
28
+
29
+ def clean_json_text(text: str) -> str:
30
+ """Intentos básicos para extraer JSON si viene con ````` o explicaciones.
31
+ No es perfecto; se recomienda usar prompts con ejemplos para forzar JSON válido.
32
+ """
33
+ # eliminar fences ```json ``` o ```
34
+ text = text.strip()
35
+ # detect code fence and extract first code block
36
+ if text.startswith("```"):
37
+ # Find the first opening ```
38
+ start_fence = text.find("```")
39
+ if start_fence != -1:
40
+ # Skip the opening fence and language tag
41
+ start_content = text.find("\n", start_fence)
42
+ if start_content != -1:
43
+ # Find the first closing ```
44
+ end_fence = text.find("```", start_content)
45
+ if end_fence != -1:
46
+ # Extract content between fences
47
+ rest = text[start_content + 1:end_fence]
48
+ return rest.strip()
49
+ else:
50
+ # No closing fence, take from start content to end
51
+ rest = text[start_content + 1:]
52
+ return rest.strip()
53
+ # intentar extraer la primera ocurrencia de un objeto JSON
54
+ start = text.find("{")
55
+ end = text.rfind("}")
56
+ if start != -1 and end != -1 and end > start:
57
+ return text[start:end+1]
58
+ return text
59
+
60
+ def clean_json_text_with_ai(driver: Driver, text: str, options: Dict[str, Any] = {}) -> str:
61
+ """Use LLM to fix malformed JSON strings.
62
+
63
+ Creates a specialized prompt instructing the LLM to correct the provided text
64
+ into a valid JSON object, then cleans the response to ensure no markdown fences remain.
65
+ """
66
+ prompt = (
67
+ "The following text is supposed to be a single JSON object, but it is malformed. "
68
+ "Please correct it and return only the valid JSON object. Do not add any explanations or markdown. "
69
+ f"The text to correct is:\n\n{text}"
70
+ )
71
+ resp = driver.generate(prompt, options)
72
+ raw = resp.get("text", "")
73
+ cleaned = clean_json_text(raw)
74
+ return cleaned
75
+
76
+ def ask_for_json(driver: Driver, content_prompt: str, json_schema: Dict[str, Any], ai_cleanup: bool = False, options: Dict[str, Any] = {}) -> Dict[str, Any]:
77
+ """Sends a prompt to the driver and returns both JSON output and usage metadata.
78
+
79
+ This function enforces a schema-first approach by requiring a json_schema parameter
80
+ and automatically generating instructions for the LLM to return valid JSON matching the schema.
81
+
82
+ Args:
83
+ driver: adapter that implements generate(prompt, options)
84
+ content_prompt: main prompt content (may include examples)
85
+ json_schema: required JSON schema dictionary defining the expected structure
86
+ ai_cleanup: whether to attempt AI-based cleanup if JSON parsing fails
87
+ options: additional options to pass to the driver
88
+
89
+ Returns:
90
+ A dictionary containing:
91
+ - json_string: the JSON string output
92
+ - json_object: the parsed JSON object
93
+ - usage: token usage and cost information from the driver's meta object
94
+ """
95
+ schema_string = json.dumps(json_schema, indent=2)
96
+ instruct = (
97
+ "Return only a single JSON object (no markdown, no extra text) that validates against this JSON schema:\n"
98
+ f"{schema_string}\n\n"
99
+ "If a value is unknown use null. Use double quotes for keys and strings."
100
+ )
101
+ full_prompt = f"{content_prompt}\n\n{instruct}"
102
+ resp = driver.generate(full_prompt, options)
103
+ raw = resp.get("text", "")
104
+ cleaned = clean_json_text(raw)
105
+ try:
106
+ json_obj = json.loads(cleaned)
107
+ return {
108
+ "json_string": cleaned,
109
+ "json_object": json_obj,
110
+ "usage": resp.get("meta", {})
111
+ }
112
+ except json.JSONDecodeError:
113
+ if ai_cleanup:
114
+ # clean_json_text_with_ai returns just the cleaned string, so we need to get fresh metadata
115
+ cleaned_fixed = clean_json_text_with_ai(driver, cleaned, options)
116
+ try:
117
+ json_obj = json.loads(cleaned_fixed)
118
+ return {
119
+ "json_string": cleaned_fixed,
120
+ "json_object": json_obj,
121
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "cost": 0.0} # Placeholder for cleanup call
122
+ }
123
+ except json.JSONDecodeError:
124
+ raise
125
+ else:
126
+ raise
127
+
128
+ def extract_and_jsonify(
129
+ driver: Driver,
130
+ text: str,
131
+ json_schema: Dict[str, Any],
132
+ instruction_template: str = "Extract information from the following text:",
133
+ ai_cleanup: bool = False,
134
+ options: Dict[str, Any] = {}
135
+ ) -> Dict[str, Any]:
136
+ """Extracts structured information from text and returns it as a JSON object with usage metadata.
137
+
138
+ This is a higher-level function that simplifies the process of extracting information
139
+ into JSON by automatically constructing the content prompt and calling ask_for_json.
140
+
141
+ Args:
142
+ driver: The LLM driver instance to use for generation
143
+ text: The raw text to extract information from
144
+ json_schema: JSON schema dictionary defining the expected structure
145
+ instruction_template: Template string for the extraction instruction
146
+ (default: "Extract information from the following text:")
147
+ ai_cleanup: Whether to attempt AI-based cleanup if JSON parsing fails
148
+ options: Additional options to pass to the driver
149
+
150
+ Returns:
151
+ A dictionary containing:
152
+ - json_string: the JSON string output
153
+ - json_object: the parsed JSON object
154
+ - usage: token usage and cost information from the driver's meta object
155
+
156
+ Raises:
157
+ ValueError: If text is empty or None
158
+ json.JSONDecodeError: If the response cannot be parsed as JSON and ai_cleanup is False
159
+
160
+ Example:
161
+ >>> schema = {"type": "object", "properties": {"name": {"type": "string"}}}
162
+ >>> result = extract_and_jsonify(driver, "John is a developer", schema)
163
+ >>> result["json_string"]
164
+ '{"name": "John"}'
165
+ >>> result["usage"]["total_tokens"]
166
+ 150
167
+ """
168
+ if not text or not text.strip():
169
+ raise ValueError("Text input cannot be empty")
170
+
171
+ content_prompt = f"{instruction_template} {text}"
172
+ return ask_for_json(driver, content_prompt, json_schema, ai_cleanup, options)
@@ -0,0 +1,20 @@
1
+ from .mock_driver import MockDriver
2
+ from .openai_driver import OpenAIDriver
3
+ from .local_http_driver import LocalHTTPDriver
4
+ from .ollama_driver import OllamaDriver
5
+ from ..settings import settings
6
+
7
+ # Factory to get a driver instance
8
+ def get_driver(provider_name: str = None):
9
+ provider = provider_name or settings.default_provider
10
+ if provider == "mock":
11
+ return MockDriver()
12
+ if provider == "openai":
13
+ return OpenAIDriver(api_key=settings.openai_api_key, model=settings.openai_model)
14
+ if provider == "local_http":
15
+ return LocalHTTPDriver(endpoint=settings.hf_endpoint)
16
+ if provider == "ollama":
17
+ return OllamaDriver(endpoint=settings.ollama_endpoint, model=settings.ollama_model)
18
+ raise ValueError(f"Unknown provider: {provider}")
19
+
20
+ __all__ = ["MockDriver", "OpenAIDriver", "LocalHTTPDriver", "OllamaDriver", "get_driver"]
@@ -0,0 +1,14 @@
1
+ import requests
2
+ from ..core import Driver
3
+ from typing import Any, Dict
4
+
5
+ class LocalHTTPDriver(Driver):
6
+ def __init__(self, endpoint: str):
7
+ self.endpoint = endpoint
8
+
9
+ def generate(self, prompt: str, options: Dict[str,Any]) -> Dict[str,Any]:
10
+ payload = {"prompt": prompt, "options": options}
11
+ r = requests.post(self.endpoint, json=payload, timeout=30)
12
+ r.raise_for_status()
13
+ # se espera {"text": "...", "meta": {...}}
14
+ return r.json()