headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
"""JSON structure handler.
|
|
2
|
+
|
|
3
|
+
Extracts structural elements from JSON content:
|
|
4
|
+
- Keys (navigational - tells LLM what fields exist)
|
|
5
|
+
- Brackets and colons (structural syntax)
|
|
6
|
+
- Short values like booleans, nulls, small numbers
|
|
7
|
+
|
|
8
|
+
Values (strings, long numbers, nested content) are marked as compressible.
|
|
9
|
+
|
|
10
|
+
This enables the LLM to see the full schema while values are compressed.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any, cast
|
|
19
|
+
|
|
20
|
+
from headroom.compression.handlers.base import BaseStructureHandler, HandlerResult
|
|
21
|
+
from headroom.compression.masks import EntropyScore, StructureMask
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class JSONTokenType(Enum):
|
|
25
|
+
"""Types of JSON tokens for structure detection."""
|
|
26
|
+
|
|
27
|
+
KEY = "key" # Object key (always structural)
|
|
28
|
+
STRING_VALUE = "string_value" # String value (compressible)
|
|
29
|
+
NUMBER = "number" # Numeric value (preserve if short)
|
|
30
|
+
BOOLEAN = "boolean" # true/false (always structural)
|
|
31
|
+
NULL = "null" # null (always structural)
|
|
32
|
+
BRACKET = "bracket" # {, }, [, ] (always structural)
|
|
33
|
+
COLON = "colon" # : (always structural)
|
|
34
|
+
COMMA = "comma" # , (always structural)
|
|
35
|
+
WHITESPACE = "whitespace" # spaces, newlines (compressible)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class JSONToken:
|
|
40
|
+
"""A token in JSON content with its type and position."""
|
|
41
|
+
|
|
42
|
+
text: str
|
|
43
|
+
token_type: JSONTokenType
|
|
44
|
+
start: int
|
|
45
|
+
end: int
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def is_structural(self) -> bool:
|
|
49
|
+
"""Whether this token should be preserved."""
|
|
50
|
+
return self.token_type in (
|
|
51
|
+
JSONTokenType.KEY,
|
|
52
|
+
JSONTokenType.BOOLEAN,
|
|
53
|
+
JSONTokenType.NULL,
|
|
54
|
+
JSONTokenType.BRACKET,
|
|
55
|
+
JSONTokenType.COLON,
|
|
56
|
+
JSONTokenType.COMMA,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class JSONStructureHandler(BaseStructureHandler):
|
|
61
|
+
"""Handler for JSON content.
|
|
62
|
+
|
|
63
|
+
Preserves:
|
|
64
|
+
- All keys (navigational - LLM sees what fields exist)
|
|
65
|
+
- Structural syntax ({, }, [, ], :, ,)
|
|
66
|
+
- Booleans and nulls (small, semantically important)
|
|
67
|
+
- High-entropy strings (UUIDs, hashes - identifiers)
|
|
68
|
+
- Short numbers (often IDs or important values)
|
|
69
|
+
|
|
70
|
+
Compresses:
|
|
71
|
+
- Long string values (descriptions, content)
|
|
72
|
+
- Whitespace
|
|
73
|
+
- Redundant array elements (after first few)
|
|
74
|
+
|
|
75
|
+
Example:
|
|
76
|
+
>>> handler = JSONStructureHandler()
|
|
77
|
+
>>> result = handler.get_mask('{"name": "Alice", "id": "usr_123"}')
|
|
78
|
+
>>> # Keys "name" and "id" preserved, values may be compressed
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
preserve_short_values: bool = True,
|
|
84
|
+
short_value_threshold: int = 20,
|
|
85
|
+
preserve_high_entropy: bool = True,
|
|
86
|
+
entropy_threshold: float = 0.85,
|
|
87
|
+
max_array_items_full: int = 3, # Keep first N items fully
|
|
88
|
+
max_number_digits: int = 10, # Preserve numbers up to N digits
|
|
89
|
+
):
|
|
90
|
+
"""Initialize the JSON handler.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
preserve_short_values: Preserve short string values.
|
|
94
|
+
short_value_threshold: Max length for "short" values.
|
|
95
|
+
preserve_high_entropy: Preserve high-entropy strings (UUIDs, etc.).
|
|
96
|
+
entropy_threshold: Entropy threshold for preservation.
|
|
97
|
+
max_array_items_full: Number of array items to keep in full.
|
|
98
|
+
max_number_digits: Max digits for numbers to preserve (often IDs).
|
|
99
|
+
"""
|
|
100
|
+
super().__init__(name="json")
|
|
101
|
+
self.preserve_short_values = preserve_short_values
|
|
102
|
+
self.short_value_threshold = short_value_threshold
|
|
103
|
+
self.preserve_high_entropy = preserve_high_entropy
|
|
104
|
+
self.entropy_threshold = entropy_threshold
|
|
105
|
+
self.max_array_items_full = max_array_items_full
|
|
106
|
+
self.max_number_digits = max_number_digits
|
|
107
|
+
|
|
108
|
+
def can_handle(self, content: str) -> bool:
|
|
109
|
+
"""Check if content is valid JSON."""
|
|
110
|
+
stripped = content.strip()
|
|
111
|
+
if not stripped.startswith(("{", "[")):
|
|
112
|
+
return False
|
|
113
|
+
try:
|
|
114
|
+
json.loads(stripped)
|
|
115
|
+
return True
|
|
116
|
+
except (json.JSONDecodeError, ValueError):
|
|
117
|
+
return False
|
|
118
|
+
|
|
119
|
+
def _extract_mask(
|
|
120
|
+
self,
|
|
121
|
+
content: str,
|
|
122
|
+
tokens: list[str],
|
|
123
|
+
**kwargs: Any,
|
|
124
|
+
) -> HandlerResult:
|
|
125
|
+
"""Extract structure mask from JSON content.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
content: JSON content.
|
|
129
|
+
tokens: Character-level tokens.
|
|
130
|
+
**kwargs: Additional options.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
HandlerResult with mask marking structural elements.
|
|
134
|
+
"""
|
|
135
|
+
# Tokenize JSON to identify structure
|
|
136
|
+
json_tokens = self._tokenize_json(content)
|
|
137
|
+
|
|
138
|
+
# Build character-level mask
|
|
139
|
+
mask = [False] * len(content)
|
|
140
|
+
|
|
141
|
+
# Track array depth for selective preservation
|
|
142
|
+
array_depth = 0
|
|
143
|
+
array_item_counts: dict[int, int] = {} # depth -> count
|
|
144
|
+
|
|
145
|
+
for token in json_tokens:
|
|
146
|
+
# Track array items
|
|
147
|
+
if token.token_type == JSONTokenType.BRACKET:
|
|
148
|
+
if token.text == "[":
|
|
149
|
+
array_depth += 1
|
|
150
|
+
array_item_counts[array_depth] = 0
|
|
151
|
+
elif token.text == "]":
|
|
152
|
+
if array_depth in array_item_counts:
|
|
153
|
+
del array_item_counts[array_depth]
|
|
154
|
+
array_depth = max(0, array_depth - 1)
|
|
155
|
+
|
|
156
|
+
# Count array items at commas
|
|
157
|
+
if token.token_type == JSONTokenType.COMMA and array_depth > 0:
|
|
158
|
+
array_item_counts[array_depth] = array_item_counts.get(array_depth, 0) + 1
|
|
159
|
+
|
|
160
|
+
# Determine if this token should be preserved
|
|
161
|
+
preserve = self._should_preserve_token(
|
|
162
|
+
token,
|
|
163
|
+
array_depth,
|
|
164
|
+
array_item_counts.get(array_depth, 0),
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Mark in mask
|
|
168
|
+
if preserve:
|
|
169
|
+
for i in range(token.start, min(token.end, len(mask))):
|
|
170
|
+
mask[i] = True
|
|
171
|
+
|
|
172
|
+
# Convert to character tokens if needed
|
|
173
|
+
char_tokens = list(content) if tokens == list(content) else tokens
|
|
174
|
+
|
|
175
|
+
return HandlerResult(
|
|
176
|
+
mask=StructureMask(tokens=char_tokens, mask=mask),
|
|
177
|
+
handler_name=self.name,
|
|
178
|
+
confidence=1.0,
|
|
179
|
+
metadata={
|
|
180
|
+
"token_count": len(json_tokens),
|
|
181
|
+
"key_count": sum(1 for t in json_tokens if t.token_type == JSONTokenType.KEY),
|
|
182
|
+
},
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def _should_preserve_token(
|
|
186
|
+
self,
|
|
187
|
+
token: JSONToken,
|
|
188
|
+
array_depth: int,
|
|
189
|
+
array_item_index: int,
|
|
190
|
+
) -> bool:
|
|
191
|
+
"""Determine if a token should be preserved.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
token: The JSON token.
|
|
195
|
+
array_depth: Current array nesting depth.
|
|
196
|
+
array_item_index: Index of current item in array.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
True if token should be preserved.
|
|
200
|
+
"""
|
|
201
|
+
# Always preserve structural tokens
|
|
202
|
+
if token.is_structural:
|
|
203
|
+
return True
|
|
204
|
+
|
|
205
|
+
# Whitespace is never preserved
|
|
206
|
+
if token.token_type == JSONTokenType.WHITESPACE:
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
# Numbers: preserve short ones (often IDs)
|
|
210
|
+
if token.token_type == JSONTokenType.NUMBER:
|
|
211
|
+
return len(token.text) <= self.max_number_digits
|
|
212
|
+
|
|
213
|
+
# String values: selective preservation
|
|
214
|
+
if token.token_type == JSONTokenType.STRING_VALUE:
|
|
215
|
+
# Check if we're past the max array items threshold
|
|
216
|
+
if array_depth > 0 and array_item_index >= self.max_array_items_full:
|
|
217
|
+
# In deep array, be more aggressive
|
|
218
|
+
return False
|
|
219
|
+
|
|
220
|
+
# Preserve short values
|
|
221
|
+
if self.preserve_short_values and len(token.text) <= self.short_value_threshold:
|
|
222
|
+
return True
|
|
223
|
+
|
|
224
|
+
# Preserve high-entropy values (UUIDs, hashes)
|
|
225
|
+
if self.preserve_high_entropy:
|
|
226
|
+
# Strip quotes for entropy calculation
|
|
227
|
+
value = token.text.strip('"')
|
|
228
|
+
score = EntropyScore.compute(value, self.entropy_threshold)
|
|
229
|
+
if score.should_preserve:
|
|
230
|
+
return True
|
|
231
|
+
|
|
232
|
+
return False
|
|
233
|
+
|
|
234
|
+
return False
|
|
235
|
+
|
|
236
|
+
def _tokenize_json(self, content: str) -> list[JSONToken]:
|
|
237
|
+
"""Tokenize JSON content into typed tokens.
|
|
238
|
+
|
|
239
|
+
This is a simple tokenizer that identifies JSON structure.
|
|
240
|
+
It's not a full parser - just enough to identify keys vs values.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
content: JSON content.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
List of JSONToken objects.
|
|
247
|
+
"""
|
|
248
|
+
tokens: list[JSONToken] = []
|
|
249
|
+
i = 0
|
|
250
|
+
n = len(content)
|
|
251
|
+
|
|
252
|
+
# Track if we're expecting a key (after { or ,)
|
|
253
|
+
expect_key = False
|
|
254
|
+
brace_stack: list[str] = []
|
|
255
|
+
|
|
256
|
+
while i < n:
|
|
257
|
+
char = content[i]
|
|
258
|
+
|
|
259
|
+
# Whitespace
|
|
260
|
+
if char in " \t\n\r":
|
|
261
|
+
start = i
|
|
262
|
+
while i < n and content[i] in " \t\n\r":
|
|
263
|
+
i += 1
|
|
264
|
+
tokens.append(JSONToken(content[start:i], JSONTokenType.WHITESPACE, start, i))
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
# Brackets
|
|
268
|
+
if char in "{}[]":
|
|
269
|
+
tokens.append(JSONToken(char, JSONTokenType.BRACKET, i, i + 1))
|
|
270
|
+
if char == "{":
|
|
271
|
+
brace_stack.append("{")
|
|
272
|
+
expect_key = True
|
|
273
|
+
elif char == "}":
|
|
274
|
+
if brace_stack and brace_stack[-1] == "{":
|
|
275
|
+
brace_stack.pop()
|
|
276
|
+
expect_key = False
|
|
277
|
+
elif char == "[":
|
|
278
|
+
brace_stack.append("[")
|
|
279
|
+
expect_key = False
|
|
280
|
+
elif char == "]":
|
|
281
|
+
if brace_stack and brace_stack[-1] == "[":
|
|
282
|
+
brace_stack.pop()
|
|
283
|
+
i += 1
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
# Colon
|
|
287
|
+
if char == ":":
|
|
288
|
+
tokens.append(JSONToken(char, JSONTokenType.COLON, i, i + 1))
|
|
289
|
+
expect_key = False
|
|
290
|
+
i += 1
|
|
291
|
+
continue
|
|
292
|
+
|
|
293
|
+
# Comma
|
|
294
|
+
if char == ",":
|
|
295
|
+
tokens.append(JSONToken(char, JSONTokenType.COMMA, i, i + 1))
|
|
296
|
+
# After comma in object, expect key
|
|
297
|
+
if brace_stack and brace_stack[-1] == "{":
|
|
298
|
+
expect_key = True
|
|
299
|
+
i += 1
|
|
300
|
+
continue
|
|
301
|
+
|
|
302
|
+
# String (key or value)
|
|
303
|
+
if char == '"':
|
|
304
|
+
start = i
|
|
305
|
+
i += 1
|
|
306
|
+
while i < n and content[i] != '"':
|
|
307
|
+
if content[i] == "\\":
|
|
308
|
+
i += 2 # Skip escaped character
|
|
309
|
+
else:
|
|
310
|
+
i += 1
|
|
311
|
+
i += 1 # Include closing quote
|
|
312
|
+
|
|
313
|
+
text = content[start:i]
|
|
314
|
+
|
|
315
|
+
# Determine if this is a key or value
|
|
316
|
+
# Look ahead for colon (skipping whitespace)
|
|
317
|
+
j = i
|
|
318
|
+
while j < n and content[j] in " \t\n\r":
|
|
319
|
+
j += 1
|
|
320
|
+
|
|
321
|
+
is_key = j < n and content[j] == ":" and expect_key
|
|
322
|
+
|
|
323
|
+
if is_key:
|
|
324
|
+
tokens.append(JSONToken(text, JSONTokenType.KEY, start, i))
|
|
325
|
+
expect_key = False
|
|
326
|
+
else:
|
|
327
|
+
tokens.append(JSONToken(text, JSONTokenType.STRING_VALUE, start, i))
|
|
328
|
+
|
|
329
|
+
continue
|
|
330
|
+
|
|
331
|
+
# Number
|
|
332
|
+
if char in "-0123456789":
|
|
333
|
+
start = i
|
|
334
|
+
# Match JSON number pattern
|
|
335
|
+
if char == "-":
|
|
336
|
+
i += 1
|
|
337
|
+
while i < n and content[i] in "0123456789":
|
|
338
|
+
i += 1
|
|
339
|
+
if i < n and content[i] == ".":
|
|
340
|
+
i += 1
|
|
341
|
+
while i < n and content[i] in "0123456789":
|
|
342
|
+
i += 1
|
|
343
|
+
if i < n and content[i] in "eE":
|
|
344
|
+
i += 1
|
|
345
|
+
if i < n and content[i] in "+-":
|
|
346
|
+
i += 1
|
|
347
|
+
while i < n and content[i] in "0123456789":
|
|
348
|
+
i += 1
|
|
349
|
+
|
|
350
|
+
tokens.append(JSONToken(content[start:i], JSONTokenType.NUMBER, start, i))
|
|
351
|
+
continue
|
|
352
|
+
|
|
353
|
+
# Boolean or null
|
|
354
|
+
if content[i : i + 4] == "true":
|
|
355
|
+
tokens.append(JSONToken("true", JSONTokenType.BOOLEAN, i, i + 4))
|
|
356
|
+
i += 4
|
|
357
|
+
continue
|
|
358
|
+
if content[i : i + 5] == "false":
|
|
359
|
+
tokens.append(JSONToken("false", JSONTokenType.BOOLEAN, i, i + 5))
|
|
360
|
+
i += 5
|
|
361
|
+
continue
|
|
362
|
+
if content[i : i + 4] == "null":
|
|
363
|
+
tokens.append(JSONToken("null", JSONTokenType.NULL, i, i + 4))
|
|
364
|
+
i += 4
|
|
365
|
+
continue
|
|
366
|
+
|
|
367
|
+
# Unknown character - skip
|
|
368
|
+
i += 1
|
|
369
|
+
|
|
370
|
+
return tokens
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def extract_json_schema(content: str) -> dict[str, Any] | list[Any]:
|
|
374
|
+
"""Extract the schema (keys only) from JSON content.
|
|
375
|
+
|
|
376
|
+
Useful for understanding the structure without the values.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
content: JSON content.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Schema dictionary with keys and types (no values).
|
|
383
|
+
|
|
384
|
+
Example:
|
|
385
|
+
>>> extract_json_schema('{"name": "Alice", "age": 30}')
|
|
386
|
+
{'name': 'string', 'age': 'number'}
|
|
387
|
+
"""
|
|
388
|
+
|
|
389
|
+
def _extract(obj: Any) -> Any:
|
|
390
|
+
if isinstance(obj, dict):
|
|
391
|
+
return {k: _extract(v) for k, v in obj.items()}
|
|
392
|
+
elif isinstance(obj, list):
|
|
393
|
+
if obj:
|
|
394
|
+
return [_extract(obj[0])] # Schema of first item
|
|
395
|
+
return []
|
|
396
|
+
elif isinstance(obj, str):
|
|
397
|
+
return "string"
|
|
398
|
+
elif isinstance(obj, bool):
|
|
399
|
+
return "boolean"
|
|
400
|
+
elif isinstance(obj, int):
|
|
401
|
+
return "integer"
|
|
402
|
+
elif isinstance(obj, float):
|
|
403
|
+
return "number"
|
|
404
|
+
elif obj is None:
|
|
405
|
+
return "null"
|
|
406
|
+
else:
|
|
407
|
+
return "unknown"
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
parsed = json.loads(content)
|
|
411
|
+
result = _extract(parsed)
|
|
412
|
+
if isinstance(result, dict):
|
|
413
|
+
return cast(dict[str, Any], result)
|
|
414
|
+
elif isinstance(result, list):
|
|
415
|
+
return cast(list[Any], result)
|
|
416
|
+
return {}
|
|
417
|
+
except (json.JSONDecodeError, ValueError):
|
|
418
|
+
return {}
|