headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,418 @@
1
+ """JSON structure handler.
2
+
3
+ Extracts structural elements from JSON content:
4
+ - Keys (navigational - tells LLM what fields exist)
5
+ - Brackets and colons (structural syntax)
6
+ - Short values like booleans, nulls, small numbers
7
+
8
+ Values (strings, long numbers, nested content) are marked as compressible.
9
+
10
+ This enables the LLM to see the full schema while values are compressed.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from dataclasses import dataclass
17
+ from enum import Enum
18
+ from typing import Any, cast
19
+
20
+ from headroom.compression.handlers.base import BaseStructureHandler, HandlerResult
21
+ from headroom.compression.masks import EntropyScore, StructureMask
22
+
23
+
24
+ class JSONTokenType(Enum):
25
+ """Types of JSON tokens for structure detection."""
26
+
27
+ KEY = "key" # Object key (always structural)
28
+ STRING_VALUE = "string_value" # String value (compressible)
29
+ NUMBER = "number" # Numeric value (preserve if short)
30
+ BOOLEAN = "boolean" # true/false (always structural)
31
+ NULL = "null" # null (always structural)
32
+ BRACKET = "bracket" # {, }, [, ] (always structural)
33
+ COLON = "colon" # : (always structural)
34
+ COMMA = "comma" # , (always structural)
35
+ WHITESPACE = "whitespace" # spaces, newlines (compressible)
36
+
37
+
38
+ @dataclass
39
+ class JSONToken:
40
+ """A token in JSON content with its type and position."""
41
+
42
+ text: str
43
+ token_type: JSONTokenType
44
+ start: int
45
+ end: int
46
+
47
+ @property
48
+ def is_structural(self) -> bool:
49
+ """Whether this token should be preserved."""
50
+ return self.token_type in (
51
+ JSONTokenType.KEY,
52
+ JSONTokenType.BOOLEAN,
53
+ JSONTokenType.NULL,
54
+ JSONTokenType.BRACKET,
55
+ JSONTokenType.COLON,
56
+ JSONTokenType.COMMA,
57
+ )
58
+
59
+
60
+ class JSONStructureHandler(BaseStructureHandler):
61
+ """Handler for JSON content.
62
+
63
+ Preserves:
64
+ - All keys (navigational - LLM sees what fields exist)
65
+ - Structural syntax ({, }, [, ], :, ,)
66
+ - Booleans and nulls (small, semantically important)
67
+ - High-entropy strings (UUIDs, hashes - identifiers)
68
+ - Short numbers (often IDs or important values)
69
+
70
+ Compresses:
71
+ - Long string values (descriptions, content)
72
+ - Whitespace
73
+ - Redundant array elements (after first few)
74
+
75
+ Example:
76
+ >>> handler = JSONStructureHandler()
77
+ >>> result = handler.get_mask('{"name": "Alice", "id": "usr_123"}')
78
+ >>> # Keys "name" and "id" preserved, values may be compressed
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ preserve_short_values: bool = True,
84
+ short_value_threshold: int = 20,
85
+ preserve_high_entropy: bool = True,
86
+ entropy_threshold: float = 0.85,
87
+ max_array_items_full: int = 3, # Keep first N items fully
88
+ max_number_digits: int = 10, # Preserve numbers up to N digits
89
+ ):
90
+ """Initialize the JSON handler.
91
+
92
+ Args:
93
+ preserve_short_values: Preserve short string values.
94
+ short_value_threshold: Max length for "short" values.
95
+ preserve_high_entropy: Preserve high-entropy strings (UUIDs, etc.).
96
+ entropy_threshold: Entropy threshold for preservation.
97
+ max_array_items_full: Number of array items to keep in full.
98
+ max_number_digits: Max digits for numbers to preserve (often IDs).
99
+ """
100
+ super().__init__(name="json")
101
+ self.preserve_short_values = preserve_short_values
102
+ self.short_value_threshold = short_value_threshold
103
+ self.preserve_high_entropy = preserve_high_entropy
104
+ self.entropy_threshold = entropy_threshold
105
+ self.max_array_items_full = max_array_items_full
106
+ self.max_number_digits = max_number_digits
107
+
108
+ def can_handle(self, content: str) -> bool:
109
+ """Check if content is valid JSON."""
110
+ stripped = content.strip()
111
+ if not stripped.startswith(("{", "[")):
112
+ return False
113
+ try:
114
+ json.loads(stripped)
115
+ return True
116
+ except (json.JSONDecodeError, ValueError):
117
+ return False
118
+
119
+ def _extract_mask(
120
+ self,
121
+ content: str,
122
+ tokens: list[str],
123
+ **kwargs: Any,
124
+ ) -> HandlerResult:
125
+ """Extract structure mask from JSON content.
126
+
127
+ Args:
128
+ content: JSON content.
129
+ tokens: Character-level tokens.
130
+ **kwargs: Additional options.
131
+
132
+ Returns:
133
+ HandlerResult with mask marking structural elements.
134
+ """
135
+ # Tokenize JSON to identify structure
136
+ json_tokens = self._tokenize_json(content)
137
+
138
+ # Build character-level mask
139
+ mask = [False] * len(content)
140
+
141
+ # Track array depth for selective preservation
142
+ array_depth = 0
143
+ array_item_counts: dict[int, int] = {} # depth -> count
144
+
145
+ for token in json_tokens:
146
+ # Track array items
147
+ if token.token_type == JSONTokenType.BRACKET:
148
+ if token.text == "[":
149
+ array_depth += 1
150
+ array_item_counts[array_depth] = 0
151
+ elif token.text == "]":
152
+ if array_depth in array_item_counts:
153
+ del array_item_counts[array_depth]
154
+ array_depth = max(0, array_depth - 1)
155
+
156
+ # Count array items at commas
157
+ if token.token_type == JSONTokenType.COMMA and array_depth > 0:
158
+ array_item_counts[array_depth] = array_item_counts.get(array_depth, 0) + 1
159
+
160
+ # Determine if this token should be preserved
161
+ preserve = self._should_preserve_token(
162
+ token,
163
+ array_depth,
164
+ array_item_counts.get(array_depth, 0),
165
+ )
166
+
167
+ # Mark in mask
168
+ if preserve:
169
+ for i in range(token.start, min(token.end, len(mask))):
170
+ mask[i] = True
171
+
172
+ # Convert to character tokens if needed
173
+ char_tokens = list(content) if tokens == list(content) else tokens
174
+
175
+ return HandlerResult(
176
+ mask=StructureMask(tokens=char_tokens, mask=mask),
177
+ handler_name=self.name,
178
+ confidence=1.0,
179
+ metadata={
180
+ "token_count": len(json_tokens),
181
+ "key_count": sum(1 for t in json_tokens if t.token_type == JSONTokenType.KEY),
182
+ },
183
+ )
184
+
185
+ def _should_preserve_token(
186
+ self,
187
+ token: JSONToken,
188
+ array_depth: int,
189
+ array_item_index: int,
190
+ ) -> bool:
191
+ """Determine if a token should be preserved.
192
+
193
+ Args:
194
+ token: The JSON token.
195
+ array_depth: Current array nesting depth.
196
+ array_item_index: Index of current item in array.
197
+
198
+ Returns:
199
+ True if token should be preserved.
200
+ """
201
+ # Always preserve structural tokens
202
+ if token.is_structural:
203
+ return True
204
+
205
+ # Whitespace is never preserved
206
+ if token.token_type == JSONTokenType.WHITESPACE:
207
+ return False
208
+
209
+ # Numbers: preserve short ones (often IDs)
210
+ if token.token_type == JSONTokenType.NUMBER:
211
+ return len(token.text) <= self.max_number_digits
212
+
213
+ # String values: selective preservation
214
+ if token.token_type == JSONTokenType.STRING_VALUE:
215
+ # Check if we're past the max array items threshold
216
+ if array_depth > 0 and array_item_index >= self.max_array_items_full:
217
+ # In deep array, be more aggressive
218
+ return False
219
+
220
+ # Preserve short values
221
+ if self.preserve_short_values and len(token.text) <= self.short_value_threshold:
222
+ return True
223
+
224
+ # Preserve high-entropy values (UUIDs, hashes)
225
+ if self.preserve_high_entropy:
226
+ # Strip quotes for entropy calculation
227
+ value = token.text.strip('"')
228
+ score = EntropyScore.compute(value, self.entropy_threshold)
229
+ if score.should_preserve:
230
+ return True
231
+
232
+ return False
233
+
234
+ return False
235
+
236
+ def _tokenize_json(self, content: str) -> list[JSONToken]:
237
+ """Tokenize JSON content into typed tokens.
238
+
239
+ This is a simple tokenizer that identifies JSON structure.
240
+ It's not a full parser - just enough to identify keys vs values.
241
+
242
+ Args:
243
+ content: JSON content.
244
+
245
+ Returns:
246
+ List of JSONToken objects.
247
+ """
248
+ tokens: list[JSONToken] = []
249
+ i = 0
250
+ n = len(content)
251
+
252
+ # Track if we're expecting a key (after { or ,)
253
+ expect_key = False
254
+ brace_stack: list[str] = []
255
+
256
+ while i < n:
257
+ char = content[i]
258
+
259
+ # Whitespace
260
+ if char in " \t\n\r":
261
+ start = i
262
+ while i < n and content[i] in " \t\n\r":
263
+ i += 1
264
+ tokens.append(JSONToken(content[start:i], JSONTokenType.WHITESPACE, start, i))
265
+ continue
266
+
267
+ # Brackets
268
+ if char in "{}[]":
269
+ tokens.append(JSONToken(char, JSONTokenType.BRACKET, i, i + 1))
270
+ if char == "{":
271
+ brace_stack.append("{")
272
+ expect_key = True
273
+ elif char == "}":
274
+ if brace_stack and brace_stack[-1] == "{":
275
+ brace_stack.pop()
276
+ expect_key = False
277
+ elif char == "[":
278
+ brace_stack.append("[")
279
+ expect_key = False
280
+ elif char == "]":
281
+ if brace_stack and brace_stack[-1] == "[":
282
+ brace_stack.pop()
283
+ i += 1
284
+ continue
285
+
286
+ # Colon
287
+ if char == ":":
288
+ tokens.append(JSONToken(char, JSONTokenType.COLON, i, i + 1))
289
+ expect_key = False
290
+ i += 1
291
+ continue
292
+
293
+ # Comma
294
+ if char == ",":
295
+ tokens.append(JSONToken(char, JSONTokenType.COMMA, i, i + 1))
296
+ # After comma in object, expect key
297
+ if brace_stack and brace_stack[-1] == "{":
298
+ expect_key = True
299
+ i += 1
300
+ continue
301
+
302
+ # String (key or value)
303
+ if char == '"':
304
+ start = i
305
+ i += 1
306
+ while i < n and content[i] != '"':
307
+ if content[i] == "\\":
308
+ i += 2 # Skip escaped character
309
+ else:
310
+ i += 1
311
+ i += 1 # Include closing quote
312
+
313
+ text = content[start:i]
314
+
315
+ # Determine if this is a key or value
316
+ # Look ahead for colon (skipping whitespace)
317
+ j = i
318
+ while j < n and content[j] in " \t\n\r":
319
+ j += 1
320
+
321
+ is_key = j < n and content[j] == ":" and expect_key
322
+
323
+ if is_key:
324
+ tokens.append(JSONToken(text, JSONTokenType.KEY, start, i))
325
+ expect_key = False
326
+ else:
327
+ tokens.append(JSONToken(text, JSONTokenType.STRING_VALUE, start, i))
328
+
329
+ continue
330
+
331
+ # Number
332
+ if char in "-0123456789":
333
+ start = i
334
+ # Match JSON number pattern
335
+ if char == "-":
336
+ i += 1
337
+ while i < n and content[i] in "0123456789":
338
+ i += 1
339
+ if i < n and content[i] == ".":
340
+ i += 1
341
+ while i < n and content[i] in "0123456789":
342
+ i += 1
343
+ if i < n and content[i] in "eE":
344
+ i += 1
345
+ if i < n and content[i] in "+-":
346
+ i += 1
347
+ while i < n and content[i] in "0123456789":
348
+ i += 1
349
+
350
+ tokens.append(JSONToken(content[start:i], JSONTokenType.NUMBER, start, i))
351
+ continue
352
+
353
+ # Boolean or null
354
+ if content[i : i + 4] == "true":
355
+ tokens.append(JSONToken("true", JSONTokenType.BOOLEAN, i, i + 4))
356
+ i += 4
357
+ continue
358
+ if content[i : i + 5] == "false":
359
+ tokens.append(JSONToken("false", JSONTokenType.BOOLEAN, i, i + 5))
360
+ i += 5
361
+ continue
362
+ if content[i : i + 4] == "null":
363
+ tokens.append(JSONToken("null", JSONTokenType.NULL, i, i + 4))
364
+ i += 4
365
+ continue
366
+
367
+ # Unknown character - skip
368
+ i += 1
369
+
370
+ return tokens
371
+
372
+
373
+ def extract_json_schema(content: str) -> dict[str, Any] | list[Any]:
374
+ """Extract the schema (keys only) from JSON content.
375
+
376
+ Useful for understanding the structure without the values.
377
+
378
+ Args:
379
+ content: JSON content.
380
+
381
+ Returns:
382
+ Schema dictionary with keys and types (no values).
383
+
384
+ Example:
385
+ >>> extract_json_schema('{"name": "Alice", "age": 30}')
386
+ {'name': 'string', 'age': 'number'}
387
+ """
388
+
389
+ def _extract(obj: Any) -> Any:
390
+ if isinstance(obj, dict):
391
+ return {k: _extract(v) for k, v in obj.items()}
392
+ elif isinstance(obj, list):
393
+ if obj:
394
+ return [_extract(obj[0])] # Schema of first item
395
+ return []
396
+ elif isinstance(obj, str):
397
+ return "string"
398
+ elif isinstance(obj, bool):
399
+ return "boolean"
400
+ elif isinstance(obj, int):
401
+ return "integer"
402
+ elif isinstance(obj, float):
403
+ return "number"
404
+ elif obj is None:
405
+ return "null"
406
+ else:
407
+ return "unknown"
408
+
409
+ try:
410
+ parsed = json.loads(content)
411
+ result = _extract(parsed)
412
+ if isinstance(result, dict):
413
+ return cast(dict[str, Any], result)
414
+ elif isinstance(result, list):
415
+ return cast(list[Any], result)
416
+ return {}
417
+ except (json.JSONDecodeError, ValueError):
418
+ return {}