prompture 0.0.30.dev1__py3-none-any.whl → 0.0.31.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
prompture/__init__.py CHANGED
@@ -6,12 +6,13 @@ from .core import (
6
6
  extract_and_jsonify,
7
7
  manual_extract_and_jsonify,
8
8
  Driver,
9
- clean_json_text,
10
- clean_toon_text,
11
9
  clean_json_text_with_ai,
12
10
  extract_with_model,
13
11
  stepwise_extract_with_model,
12
+ extract_from_data,
13
+ extract_from_pandas,
14
14
  )
15
+ from .tools import clean_json_text, clean_toon_text
15
16
  from .field_definitions import (
16
17
  FIELD_DEFINITIONS, get_field_definition, get_required_fields, get_field_names,
17
18
  field_from_registry, register_field, add_field_definition, add_field_definitions,
@@ -50,6 +51,9 @@ __all__ = [
50
51
  "clean_json_text_with_ai",
51
52
  "extract_with_model",
52
53
  "stepwise_extract_with_model",
54
+ # TOON Data Extraction Functions
55
+ "extract_from_data",
56
+ "extract_from_pandas",
53
57
  # Field Definitions
54
58
  "FIELD_DEFINITIONS",
55
59
  "get_field_definition",
prompture/core.py CHANGED
@@ -23,7 +23,6 @@ from .tools import (
23
23
  convert_value,
24
24
  log_debug,
25
25
  clean_json_text,
26
- clean_toon_text,
27
26
  LogLevel,
28
27
  get_field_default,
29
28
  )
@@ -168,41 +167,32 @@ def ask_for_json(
168
167
  if output_format not in ("json", "toon"):
169
168
  raise ValueError(f"Unsupported output_format '{output_format}'. Use 'json' or 'toon'.")
170
169
 
171
- if output_format == "toon":
172
- if toon is None:
173
- raise RuntimeError(
174
- "TOON requested but 'python-toon' is not installed. "
175
- "Install it with 'pip install python-toon'."
176
- )
177
- instruct = (
178
- "Reply only in TOON (Token-Oriented Object Notation).\n"
179
- "- Scalars: key: value\n"
180
- "- Lists: list[count]: item1,item2 (comma separated, no semicolons or line breaks)\n"
181
- "- Object arrays: name[count,]{f1,f2}:\n"
182
- " value1,value2\n"
183
- "Use two spaces before each table row, lowercase true/false/null, include every field from the schema (use null if unknown), and output no markdown, prose, or braces beyond the headers.\n"
184
- f"Schema:\n{json.dumps(json_schema, separators=(',', ':'))}"
185
- )
186
- else:
187
- schema_string = json.dumps(json_schema, indent=2)
188
- instruct = (
189
- "Return only a single JSON object (no markdown, no extra text) that validates against this JSON schema:\n"
190
- f"{schema_string}\n\n"
191
- "If a value is unknown use null. Use double quotes for keys and strings."
170
+ schema_string = json.dumps(json_schema, indent=2)
171
+ if output_format == "toon" and toon is None:
172
+ raise RuntimeError(
173
+ "TOON requested but 'python-toon' is not installed. "
174
+ "Install it with 'pip install python-toon'."
192
175
  )
193
176
 
177
+ instruct = (
178
+ "Return only a single JSON object (no markdown, no extra text) that validates against this JSON schema:\n"
179
+ f"{schema_string}\n\n"
180
+ "If a value is unknown use null. Use double quotes for keys and strings."
181
+ )
182
+ if output_format == "toon":
183
+ instruct += "\n\n(Respond with JSON only; Prompture will convert to TOON.)"
184
+
194
185
  full_prompt = f"{content_prompt}\n\n{instruct}"
195
186
  resp = driver.generate(full_prompt, options)
196
187
  raw = resp.get("text", "")
197
- cleaned = clean_toon_text(raw) if output_format == "toon" else clean_json_text(raw)
188
+ cleaned = clean_json_text(raw)
198
189
 
199
190
  try:
191
+ json_obj = json.loads(cleaned)
192
+ json_string = cleaned
193
+ toon_string = None
200
194
  if output_format == "toon":
201
- json_obj = toon.decode(cleaned)
202
- json_string = json.dumps(json_obj)
203
- else:
204
- json_obj = json.loads(cleaned)
205
- json_string = cleaned
195
+ toon_string = toon.encode(json_obj)
206
196
 
207
197
  usage = {
208
198
  **resp.get("meta", {}),
@@ -213,37 +203,42 @@ def ask_for_json(
213
203
  "cost": resp.get("meta", {}).get("cost", 0.0),
214
204
  "model_name": model_name or getattr(driver, "model", "")
215
205
  }
216
- return {
206
+ result = {
217
207
  "json_string": json_string,
218
208
  "json_object": json_obj,
219
209
  "usage": usage
220
210
  }
221
- except Exception as e:
222
- if output_format == "toon":
223
- raise ValueError(f"Failed to parse TOON: {e}\nRaw: {cleaned}") from e
224
-
225
- if isinstance(e, json.JSONDecodeError):
226
- if ai_cleanup:
227
- cleaned_fixed = clean_json_text_with_ai(driver, cleaned, model_name, options)
228
- try:
229
- json_obj = json.loads(cleaned_fixed)
230
- return {
231
- "json_string": cleaned_fixed,
232
- "json_object": json_obj,
233
- "usage": {
234
- "prompt_tokens": 0,
235
- "completion_tokens": 0,
236
- "total_tokens": 0,
237
- "cost": 0.0,
238
- "model_name": options.get("model", getattr(driver, "model", "")),
239
- "raw_response": {}
240
- },
241
- }
242
- except json.JSONDecodeError:
243
- raise e
244
- else:
211
+ if toon_string is not None:
212
+ result["toon_string"] = toon_string
213
+ result["output_format"] = "toon"
214
+ else:
215
+ result["output_format"] = "json"
216
+ return result
217
+ except json.JSONDecodeError as e:
218
+ if ai_cleanup:
219
+ cleaned_fixed = clean_json_text_with_ai(driver, cleaned, model_name, options)
220
+ try:
221
+ json_obj = json.loads(cleaned_fixed)
222
+ result = {
223
+ "json_string": cleaned_fixed,
224
+ "json_object": json_obj,
225
+ "usage": {
226
+ "prompt_tokens": 0,
227
+ "completion_tokens": 0,
228
+ "total_tokens": 0,
229
+ "cost": 0.0,
230
+ "model_name": options.get("model", getattr(driver, "model", "")),
231
+ "raw_response": {}
232
+ },
233
+ "output_format": "json" if output_format != "toon" else "toon",
234
+ }
235
+ if output_format == "toon":
236
+ result["toon_string"] = toon.encode(json_obj)
237
+ return result
238
+ except json.JSONDecodeError:
245
239
  raise e
246
- raise e
240
+ else:
241
+ raise e
247
242
 
248
243
  def extract_and_jsonify(
249
244
  text: Union[str, Driver], # Can be either text or driver for backward compatibility
@@ -862,3 +857,345 @@ def stepwise_extract_with_model(
862
857
  "__getattr__": lambda self, key: self.get(key),
863
858
  "__call__": lambda self: None # Return None when called if validation failed
864
859
  })(error_result)
860
+
861
+
862
+
863
+ def _json_to_toon(data: Union[List[Dict[str, Any]], Dict[str, Any]], data_key: Optional[str] = None) -> str:
864
+ """Convert JSON array or dict containing array to TOON format.
865
+
866
+ Args:
867
+ data: List of dicts (uniform array) or dict containing array under a key
868
+ data_key: If data is a dict, the key containing the array
869
+
870
+ Returns:
871
+ TOON formatted string
872
+
873
+ Raises:
874
+ ValueError: If TOON conversion fails or data format is invalid
875
+ RuntimeError: If python-toon is not installed
876
+ """
877
+ if toon is None:
878
+ raise RuntimeError(
879
+ "TOON conversion requested but 'python-toon' is not installed. "
880
+ "Install it with 'pip install python-toon'."
881
+ )
882
+
883
+ # Handle different data formats
884
+ if isinstance(data, list):
885
+ array_data = data
886
+ elif isinstance(data, dict):
887
+ if data_key:
888
+ if data_key not in data:
889
+ raise ValueError(f"Key '{data_key}' not found in data")
890
+ array_data = data[data_key]
891
+ else:
892
+ # Try to find the first array value in the dict
893
+ array_data = None
894
+ for key, value in data.items():
895
+ if isinstance(value, list) and value:
896
+ array_data = value
897
+ break
898
+ if array_data is None:
899
+ raise ValueError("No array found in data. Specify data_key or provide a list directly.")
900
+ else:
901
+ raise ValueError("Data must be a list of dicts or a dict containing an array")
902
+
903
+ if not isinstance(array_data, list):
904
+ raise ValueError("Array data must be a list")
905
+
906
+ if not array_data:
907
+ raise ValueError("Array data cannot be empty")
908
+
909
+ # Validate that all items in array are dicts (uniform structure)
910
+ if not all(isinstance(item, dict) for item in array_data):
911
+ raise ValueError("All items in array must be dictionaries for TOON conversion")
912
+
913
+ try:
914
+ return toon.encode(array_data)
915
+ except Exception as e:
916
+ raise ValueError(f"Failed to convert data to TOON format: {e}")
917
+
918
+
919
+ def _dataframe_to_toon(df) -> str:
920
+ """Convert Pandas DataFrame to TOON format.
921
+
922
+ Args:
923
+ df: Pandas DataFrame to convert
924
+
925
+ Returns:
926
+ TOON formatted string
927
+
928
+ Raises:
929
+ ValueError: If DataFrame conversion fails
930
+ RuntimeError: If pandas or python-toon is not installed
931
+ """
932
+ try:
933
+ import pandas as pd
934
+ except ImportError:
935
+ raise RuntimeError(
936
+ "Pandas DataFrame conversion requested but 'pandas' is not installed. "
937
+ "Install it with 'pip install pandas' or 'pip install prompture[pandas]'."
938
+ )
939
+
940
+ if toon is None:
941
+ raise RuntimeError(
942
+ "TOON conversion requested but 'python-toon' is not installed. "
943
+ "Install it with 'pip install python-toon'."
944
+ )
945
+
946
+ dataframe_type = getattr(pd, "DataFrame", None)
947
+ if isinstance(dataframe_type, type):
948
+ if not isinstance(df, dataframe_type):
949
+ raise ValueError("Input must be a pandas DataFrame")
950
+ else:
951
+ # Duck-type fallback for tests that provide a lightweight mock
952
+ if not hasattr(df, "to_dict") or not hasattr(df, "empty"):
953
+ raise ValueError("Input must be a pandas DataFrame")
954
+
955
+ if df.empty:
956
+ raise ValueError("DataFrame cannot be empty")
957
+
958
+ try:
959
+ # Convert DataFrame to list of dicts
960
+ data = df.to_dict('records')
961
+ return toon.encode(data)
962
+ except Exception as e:
963
+ raise ValueError(f"Failed to convert DataFrame to TOON format: {e}")
964
+
965
+
966
+ def _calculate_token_savings(json_text: str, toon_text: str) -> Dict[str, Any]:
967
+ """Calculate estimated token savings between JSON and TOON formats.
968
+
969
+ This is a rough estimation based on character count ratios.
970
+ Actual token counts may vary by model and tokenizer.
971
+
972
+ Args:
973
+ json_text: JSON formatted text
974
+ toon_text: TOON formatted text
975
+
976
+ Returns:
977
+ Dict containing savings statistics
978
+ """
979
+ json_chars = len(json_text)
980
+ toon_chars = len(toon_text)
981
+
982
+ # Rough estimation: 4 characters ≈ 1 token (varies by model)
983
+ json_tokens_est = json_chars // 4
984
+ toon_tokens_est = toon_chars // 4
985
+
986
+ savings_chars = json_chars - toon_chars
987
+ savings_tokens_est = json_tokens_est - toon_tokens_est
988
+
989
+ percentage_saved = (savings_chars / json_chars * 100) if json_chars > 0 else 0
990
+
991
+ return {
992
+ "json_characters": json_chars,
993
+ "toon_characters": toon_chars,
994
+ "saved_characters": savings_chars,
995
+ "estimated_json_tokens": json_tokens_est,
996
+ "estimated_toon_tokens": toon_tokens_est,
997
+ "estimated_saved_tokens": savings_tokens_est,
998
+ "percentage_saved": round(percentage_saved, 1)
999
+ }
1000
+
1001
+
1002
+ def extract_from_data(
1003
+ data: Union[List[Dict[str, Any]], Dict[str, Any]],
1004
+ question: str,
1005
+ json_schema: Dict[str, Any],
1006
+ *,
1007
+ model_name: str,
1008
+ data_key: Optional[str] = None,
1009
+ instruction_template: str = "Analyze the following data and answer: {question}",
1010
+ ai_cleanup: bool = True,
1011
+ options: Optional[Dict[str, Any]] = None,
1012
+ ) -> Dict[str, Any]:
1013
+ """Extract information from structured data by converting to TOON format for token efficiency.
1014
+
1015
+ This function takes JSON array data, converts it to TOON format to reduce tokens,
1016
+ sends it to the LLM with a question, and returns the JSON response.
1017
+
1018
+ Args:
1019
+ data: List of dicts (uniform array) or dict containing array under a key
1020
+ question: The question to ask about the data
1021
+ json_schema: Expected JSON schema for the response
1022
+ model_name: Model identifier in format "provider/model" (e.g., "openai/gpt-4")
1023
+ data_key: If data is a dict, the key containing the array (e.g., "products")
1024
+ instruction_template: Template with {question} placeholder
1025
+ ai_cleanup: Whether to attempt AI-based cleanup if JSON parsing fails
1026
+ options: Additional options to pass to the driver
1027
+
1028
+ Returns:
1029
+ Dict containing:
1030
+ - json_object: The parsed JSON response
1031
+ - json_string: The JSON string response
1032
+ - usage: Token usage and cost information (includes token_savings)
1033
+ - toon_data: The TOON formatted input data
1034
+ - token_savings: Statistics about token savings vs JSON input
1035
+
1036
+ Raises:
1037
+ ValueError: If data format is invalid or conversion fails
1038
+ RuntimeError: If required dependencies are missing
1039
+
1040
+ Example:
1041
+ >>> products = [
1042
+ ... {"id": 1, "name": "Laptop", "price": 999.99, "category": "electronics"},
1043
+ ... {"id": 2, "name": "Book", "price": 19.99, "category": "books"}
1044
+ ... ]
1045
+ >>> schema = {
1046
+ ... "type": "object",
1047
+ ... "properties": {
1048
+ ... "average_price": {"type": "number"},
1049
+ ... "total_items": {"type": "integer"}
1050
+ ... }
1051
+ ... }
1052
+ >>> result = extract_from_data(
1053
+ ... data=products,
1054
+ ... question="What is the average price and total number of items?",
1055
+ ... json_schema=schema,
1056
+ ... model_name="openai/gpt-4"
1057
+ ... )
1058
+ >>> print(result["json_object"])
1059
+ {'average_price': 509.99, 'total_items': 2}
1060
+ """
1061
+ if not question or not question.strip():
1062
+ raise ValueError("Question cannot be empty")
1063
+
1064
+ if not json_schema:
1065
+ raise ValueError("JSON schema cannot be empty")
1066
+
1067
+ if options is None:
1068
+ options = {}
1069
+
1070
+ # Convert data to TOON format
1071
+ toon_data = _json_to_toon(data, data_key)
1072
+
1073
+ # Calculate token savings (for comparison with JSON)
1074
+ json_data = json.dumps(data if isinstance(data, list) else data.get(data_key, data), indent=2)
1075
+ token_savings = _calculate_token_savings(json_data, toon_data)
1076
+
1077
+ # Build the prompt with TOON data
1078
+ content_prompt = instruction_template.format(question=question)
1079
+ full_prompt = f"{content_prompt}\n\nData (in TOON format):\n{toon_data}"
1080
+
1081
+ # Call the LLM
1082
+ result = ask_for_json(
1083
+ driver=get_driver_for_model(model_name),
1084
+ content_prompt=full_prompt,
1085
+ json_schema=json_schema,
1086
+ ai_cleanup=ai_cleanup,
1087
+ model_name=model_name.split('/')[-1] if '/' in model_name else model_name,
1088
+ options=options,
1089
+ output_format="json" # Always return JSON, not TOON
1090
+ )
1091
+
1092
+ # Add our additional data to the result
1093
+ result["toon_data"] = toon_data
1094
+ result["token_savings"] = token_savings
1095
+
1096
+ return result
1097
+
1098
+
1099
+ def extract_from_pandas(
1100
+ df, # pandas.DataFrame - optional import
1101
+ question: str,
1102
+ json_schema: Dict[str, Any],
1103
+ *,
1104
+ model_name: str,
1105
+ instruction_template: str = "Analyze the following data and answer: {question}",
1106
+ ai_cleanup: bool = True,
1107
+ options: Optional[Dict[str, Any]] = None,
1108
+ ) -> Dict[str, Any]:
1109
+ """Extract information from Pandas DataFrame by converting to TOON format for token efficiency.
1110
+
1111
+ This function takes a Pandas DataFrame, converts it to TOON format to reduce tokens,
1112
+ sends it to the LLM with a question, and returns the JSON response.
1113
+
1114
+ Args:
1115
+ df: Pandas DataFrame to analyze
1116
+ question: The question to ask about the data
1117
+ json_schema: Expected JSON schema for the response
1118
+ model_name: Model identifier in format "provider/model" (e.g., "openai/gpt-4")
1119
+ instruction_template: Template with {question} placeholder
1120
+ ai_cleanup: Whether to attempt AI-based cleanup if JSON parsing fails
1121
+ options: Additional options to pass to the driver
1122
+
1123
+ Returns:
1124
+ Dict containing:
1125
+ - json_object: The parsed JSON response
1126
+ - json_string: The JSON string response
1127
+ - usage: Token usage and cost information (includes token_savings)
1128
+ - toon_data: The TOON formatted input data
1129
+ - token_savings: Statistics about token savings vs JSON input
1130
+ - dataframe_info: Basic info about the original DataFrame
1131
+
1132
+ Raises:
1133
+ ValueError: If DataFrame is invalid or conversion fails
1134
+ RuntimeError: If required dependencies are missing
1135
+
1136
+ Example:
1137
+ >>> import pandas as pd
1138
+ >>> df = pd.DataFrame([
1139
+ ... {"id": 1, "name": "Laptop", "price": 999.99, "category": "electronics"},
1140
+ ... {"id": 2, "name": "Book", "price": 19.99, "category": "books"}
1141
+ ... ])
1142
+ >>> schema = {
1143
+ ... "type": "object",
1144
+ ... "properties": {
1145
+ ... "highest_priced_item": {"type": "string"},
1146
+ ... "price_range": {"type": "number"}
1147
+ ... }
1148
+ ... }
1149
+ >>> result = extract_from_pandas(
1150
+ ... df=df,
1151
+ ... question="What is the highest priced item and price range?",
1152
+ ... json_schema=schema,
1153
+ ... model_name="openai/gpt-4"
1154
+ ... )
1155
+ >>> print(result["json_object"])
1156
+ {'highest_priced_item': 'Laptop', 'price_range': 980.0}
1157
+ """
1158
+ if not question or not question.strip():
1159
+ raise ValueError("Question cannot be empty")
1160
+
1161
+ if not json_schema:
1162
+ raise ValueError("JSON schema cannot be empty")
1163
+
1164
+ if options is None:
1165
+ options = {}
1166
+
1167
+ # Convert DataFrame to TOON format
1168
+ toon_data = _dataframe_to_toon(df)
1169
+
1170
+ # Calculate token savings (for comparison with JSON)
1171
+ json_data = df.to_json(indent=2, orient='records')
1172
+ token_savings = _calculate_token_savings(json_data, toon_data)
1173
+
1174
+ # Get basic DataFrame info
1175
+ dataframe_info = {
1176
+ "shape": df.shape,
1177
+ "columns": list(df.columns),
1178
+ "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()}
1179
+ }
1180
+
1181
+ # Build the prompt with TOON data
1182
+ content_prompt = instruction_template.format(question=question)
1183
+ full_prompt = f"{content_prompt}\n\nData (in TOON format):\n{toon_data}"
1184
+
1185
+ # Call the LLM
1186
+ result = ask_for_json(
1187
+ driver=get_driver_for_model(model_name),
1188
+ content_prompt=full_prompt,
1189
+ json_schema=json_schema,
1190
+ ai_cleanup=ai_cleanup,
1191
+ model_name=model_name.split('/')[-1] if '/' in model_name else model_name,
1192
+ options=options,
1193
+ output_format="json" # Always return JSON, not TOON
1194
+ )
1195
+
1196
+ # Add our additional data to the result
1197
+ result["toon_data"] = toon_data
1198
+ result["token_savings"] = token_savings
1199
+ result["dataframe_info"] = dataframe_info
1200
+
1201
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: prompture
3
- Version: 0.0.30.dev1
3
+ Version: 0.0.31.dev1
4
4
  Summary: Ask LLMs to return structured JSON and run cross-model tests. API-first.
5
5
  Home-page: https://github.com/jhd3197/prompture
6
6
  Author: Juan Denis
@@ -18,9 +18,11 @@ Requires-Dist: groq>=0.4.0
18
18
  Requires-Dist: httpx>=0.25.0
19
19
  Requires-Dist: jsonschema>=4.0
20
20
  Requires-Dist: openai>=1.0.0
21
+ Requires-Dist: pandas>=1.3.0
21
22
  Requires-Dist: pydantic>=1.10
22
23
  Requires-Dist: pydantic-settings>=2.0
23
24
  Requires-Dist: python-dotenv>=0.19.0
25
+ Requires-Dist: python-toon>=0.1.0
24
26
  Requires-Dist: requests>=2.28
25
27
  Requires-Dist: python-dateutil>=2.9.0
26
28
  Requires-Dist: tukuy>=0.0.6
@@ -53,6 +55,7 @@ Dynamic: summary
53
55
  ## ✨ Features
54
56
 
55
57
  - ✅ **Structured output** → JSON schema enforcement, or direct **Pydantic** instances
58
+ - ✅ **TOON input conversion** → 45-60% token savings for structured data analysis with `extract_from_data()` and `extract_from_pandas()`
56
59
  - ✅ **Stepwise extraction** → Per-field prompts, with smart type conversion (incl. shorthand numbers)
57
60
  - ✅ **Multi-driver** → OpenAI, Azure, Claude, Ollama, LM Studio, Google, Groq, OpenRouter, Grok, HTTP, Mock, HuggingFace (via `get_driver()`)
58
61
  - ✅ **Usage & cost** → Token + $ tracking on every call (`usage` from driver meta)
@@ -145,6 +148,55 @@ print(person.dict())
145
148
 
146
149
  **Why start here?** It's fast (one call), cost-efficient, and returns a validated Pydantic instance.
147
150
 
151
+
152
+ ## 🚀 TOON Input Conversion: 45-60% Token Savings
153
+
154
+ Analyze structured data with automatic TOON (Token-Oriented Object Notation) conversion for massive token savings.
155
+
156
+ ```python
157
+ from prompture import extract_from_data, extract_from_pandas
158
+
159
+ # Your product data
160
+ products = [
161
+ {"id": 1, "name": "Laptop", "price": 999.99, "rating": 4.5},
162
+ {"id": 2, "name": "Book", "price": 19.99, "rating": 4.2},
163
+ {"id": 3, "name": "Headphones", "price": 149.99, "rating": 4.7}
164
+ ]
165
+
166
+ # Ask questions about your data - automatically uses TOON format for 60%+ token savings
167
+ result = extract_from_data(
168
+ data=products,
169
+ question="What is the average price and highest rated product?",
170
+ json_schema={
171
+ "type": "object",
172
+ "properties": {
173
+ "average_price": {"type": "number"},
174
+ "highest_rated": {"type": "string"}
175
+ }
176
+ },
177
+ model_name="openai/gpt-4"
178
+ )
179
+
180
+ print(result["json_object"])
181
+ # {"average_price": 389.96, "highest_rated": "Headphones"}
182
+
183
+ print(f"Token savings: {result['token_savings']['percentage_saved']}%")
184
+ # Token savings: 62.3%
185
+
186
+ # Works with Pandas DataFrames too!
187
+ import pandas as pd
188
+ df = pd.DataFrame(products)
189
+ result = extract_from_pandas(df=df, question="...", json_schema=schema, model_name="openai/gpt-4")
190
+ ```
191
+
192
+ **Preview token savings without LLM calls:**
193
+ ```bash
194
+ python examples/token_comparison_utility.py
195
+ ```
196
+
197
+ > **Note:** Both `python-toon` and `pandas` are now included by default when you install Prompture!
198
+
199
+ ---
148
200
  ---
149
201
 
150
202
  ## 📋 Field Definitions
@@ -241,7 +293,7 @@ print(resp2["json_object"], resp2["usage"])
241
293
 
242
294
  ### Experimental TOON output
243
295
 
244
- Prompture can ask for TOON (Token-Oriented Object Notation) instead of JSON by setting `output_format="toon"` on `ask_for_json`, `extract_and_jsonify`, `manual_extract_and_jsonify`, or `extract_with_model`. Responses are parsed back into Python dicts, so your downstream code still receives JSON-compatible structures.
296
+ Prompture can ask for TOON (Token-Oriented Object Notation) instead of JSON by setting `output_format="toon"` on `ask_for_json`, `extract_and_jsonify`, `manual_extract_and_jsonify`, or `extract_with_model`. The LLM is still instructed to return JSON (for reliability); Prompture parses it and emits a TOON string via `python-toon`.
245
297
 
246
298
  ```python
247
299
  result = extract_and_jsonify(
@@ -250,8 +302,9 @@ result = extract_and_jsonify(
250
302
  model_name="lmstudio/deepseek/deepseek-r1-0528-qwen3-8b",
251
303
  output_format="toon",
252
304
  )
253
- print(result["json_string"]) # TOON text
254
- print(result["json_object"]) # regular dict
305
+ print(result["toon_string"]) # TOON text generated locally
306
+ print(result["json_object"]) # regular dict parsed from the JSON response
307
+ # result["json_string"] still contains the original JSON text
255
308
  ```
256
309
 
257
310
  > [!IMPORTANT]
@@ -1,6 +1,6 @@
1
- prompture/__init__.py,sha256=lHcVL7ICLeJIx2pzezOkNR3cx85V8R04VkqOTOIcWyA,1952
1
+ prompture/__init__.py,sha256=aWlRheKlKjFh2v0FifG52hgEXthsFZLcMd7qlYzdnoc,2099
2
2
  prompture/cli.py,sha256=vA86GNjtKSHz8eRMl5YDaT9HHIWuhkeJtfx8jqTaqtM,809
3
- prompture/core.py,sha256=6QfWi7qmO2Bg6RrtnD5Da0mLmBb3z9FQPQOBpxxovuE,38393
3
+ prompture/core.py,sha256=Er08vBlkL7Ho4LmqR6-B9q7ILLNMJB059yx6TBg_Xu4,50448
4
4
  prompture/driver.py,sha256=w8pdXHujImIGF3ee8rkG8f6-UD0h2jLHhucSPInRrYI,989
5
5
  prompture/field_definitions.py,sha256=6kDMYNedccTK5l2L_I8_NI3_av-iYHqGPwkKDy8214c,21731
6
6
  prompture/runner.py,sha256=5xwal3iBQQj4_q7l3Rjr0e3RrUMJPaPDLiEchO0mmHo,4192
@@ -19,9 +19,9 @@ prompture/drivers/local_http_driver.py,sha256=S2diikvtQOQHF7fB07zU2X0QWkej4Of__r
19
19
  prompture/drivers/ollama_driver.py,sha256=fq_eFgwmCT3SK1D-ICHjxLjcm_An0suwkFIWC38xsS0,4681
20
20
  prompture/drivers/openai_driver.py,sha256=9q9OjQslquRFvIl1Hd9JVmFFFVh6OBIWrFulw1mkYWg,3976
21
21
  prompture/drivers/openrouter_driver.py,sha256=GKvLOFDhsyopH-k3iaD3VWllm7xbGuopRSA02MfCKoM,5031
22
- prompture-0.0.30.dev1.dist-info/licenses/LICENSE,sha256=0HgDepH7aaHNFhHF-iXuW6_GqDfYPnVkjtiCAZ4yS8I,1060
23
- prompture-0.0.30.dev1.dist-info/METADATA,sha256=fLbeG7qnNAzSVjjuei3DE6ytu45Bft1pHNhyaVNhEiE,15521
24
- prompture-0.0.30.dev1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
- prompture-0.0.30.dev1.dist-info/entry_points.txt,sha256=AFPG3lJR86g4IJMoWQUW5Ph7G6MLNWG3A2u2Tp9zkp8,48
26
- prompture-0.0.30.dev1.dist-info/top_level.txt,sha256=to86zq_kjfdoLeAxQNr420UWqT0WzkKoZ509J7Qr2t4,10
27
- prompture-0.0.30.dev1.dist-info/RECORD,,
22
+ prompture-0.0.31.dev1.dist-info/licenses/LICENSE,sha256=0HgDepH7aaHNFhHF-iXuW6_GqDfYPnVkjtiCAZ4yS8I,1060
23
+ prompture-0.0.31.dev1.dist-info/METADATA,sha256=jjKlX4Bjrf5zA3_QJcMwBcO0fVFn8eIVkjIcJZslTCI,17320
24
+ prompture-0.0.31.dev1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
+ prompture-0.0.31.dev1.dist-info/entry_points.txt,sha256=AFPG3lJR86g4IJMoWQUW5Ph7G6MLNWG3A2u2Tp9zkp8,48
26
+ prompture-0.0.31.dev1.dist-info/top_level.txt,sha256=to86zq_kjfdoLeAxQNr420UWqT0WzkKoZ509J7Qr2t4,10
27
+ prompture-0.0.31.dev1.dist-info/RECORD,,