kweaver-dolphin 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DolphinLanguageSDK/__init__.py +58 -0
- dolphin/__init__.py +62 -0
- dolphin/cli/__init__.py +20 -0
- dolphin/cli/args/__init__.py +9 -0
- dolphin/cli/args/parser.py +567 -0
- dolphin/cli/builtin_agents/__init__.py +22 -0
- dolphin/cli/commands/__init__.py +4 -0
- dolphin/cli/interrupt/__init__.py +8 -0
- dolphin/cli/interrupt/handler.py +205 -0
- dolphin/cli/interrupt/keyboard.py +82 -0
- dolphin/cli/main.py +49 -0
- dolphin/cli/multimodal/__init__.py +34 -0
- dolphin/cli/multimodal/clipboard.py +327 -0
- dolphin/cli/multimodal/handler.py +249 -0
- dolphin/cli/multimodal/image_processor.py +214 -0
- dolphin/cli/multimodal/input_parser.py +149 -0
- dolphin/cli/runner/__init__.py +8 -0
- dolphin/cli/runner/runner.py +989 -0
- dolphin/cli/ui/__init__.py +10 -0
- dolphin/cli/ui/console.py +2795 -0
- dolphin/cli/ui/input.py +340 -0
- dolphin/cli/ui/layout.py +425 -0
- dolphin/cli/ui/stream_renderer.py +302 -0
- dolphin/cli/utils/__init__.py +8 -0
- dolphin/cli/utils/helpers.py +135 -0
- dolphin/cli/utils/version.py +49 -0
- dolphin/core/__init__.py +107 -0
- dolphin/core/agent/__init__.py +10 -0
- dolphin/core/agent/agent_state.py +69 -0
- dolphin/core/agent/base_agent.py +970 -0
- dolphin/core/code_block/__init__.py +0 -0
- dolphin/core/code_block/agent_init_block.py +0 -0
- dolphin/core/code_block/assign_block.py +98 -0
- dolphin/core/code_block/basic_code_block.py +1865 -0
- dolphin/core/code_block/explore_block.py +1327 -0
- dolphin/core/code_block/explore_block_v2.py +712 -0
- dolphin/core/code_block/explore_strategy.py +672 -0
- dolphin/core/code_block/judge_block.py +220 -0
- dolphin/core/code_block/prompt_block.py +32 -0
- dolphin/core/code_block/skill_call_deduplicator.py +291 -0
- dolphin/core/code_block/tool_block.py +129 -0
- dolphin/core/common/__init__.py +17 -0
- dolphin/core/common/constants.py +176 -0
- dolphin/core/common/enums.py +1173 -0
- dolphin/core/common/exceptions.py +133 -0
- dolphin/core/common/multimodal.py +539 -0
- dolphin/core/common/object_type.py +165 -0
- dolphin/core/common/output_format.py +432 -0
- dolphin/core/common/types.py +36 -0
- dolphin/core/config/__init__.py +16 -0
- dolphin/core/config/global_config.py +1289 -0
- dolphin/core/config/ontology_config.py +133 -0
- dolphin/core/context/__init__.py +12 -0
- dolphin/core/context/context.py +1580 -0
- dolphin/core/context/context_manager.py +161 -0
- dolphin/core/context/var_output.py +82 -0
- dolphin/core/context/variable_pool.py +356 -0
- dolphin/core/context_engineer/__init__.py +41 -0
- dolphin/core/context_engineer/config/__init__.py +5 -0
- dolphin/core/context_engineer/config/settings.py +402 -0
- dolphin/core/context_engineer/core/__init__.py +7 -0
- dolphin/core/context_engineer/core/budget_manager.py +327 -0
- dolphin/core/context_engineer/core/context_assembler.py +583 -0
- dolphin/core/context_engineer/core/context_manager.py +637 -0
- dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
- dolphin/core/context_engineer/example/incremental_example.py +267 -0
- dolphin/core/context_engineer/example/traditional_example.py +334 -0
- dolphin/core/context_engineer/services/__init__.py +5 -0
- dolphin/core/context_engineer/services/compressor.py +399 -0
- dolphin/core/context_engineer/utils/__init__.py +6 -0
- dolphin/core/context_engineer/utils/context_utils.py +441 -0
- dolphin/core/context_engineer/utils/message_formatter.py +270 -0
- dolphin/core/context_engineer/utils/token_utils.py +139 -0
- dolphin/core/coroutine/__init__.py +15 -0
- dolphin/core/coroutine/context_snapshot.py +154 -0
- dolphin/core/coroutine/context_snapshot_profile.py +922 -0
- dolphin/core/coroutine/context_snapshot_store.py +268 -0
- dolphin/core/coroutine/execution_frame.py +145 -0
- dolphin/core/coroutine/execution_state_registry.py +161 -0
- dolphin/core/coroutine/resume_handle.py +101 -0
- dolphin/core/coroutine/step_result.py +101 -0
- dolphin/core/executor/__init__.py +18 -0
- dolphin/core/executor/debug_controller.py +630 -0
- dolphin/core/executor/dolphin_executor.py +1063 -0
- dolphin/core/executor/executor.py +624 -0
- dolphin/core/flags/__init__.py +27 -0
- dolphin/core/flags/definitions.py +49 -0
- dolphin/core/flags/manager.py +113 -0
- dolphin/core/hook/__init__.py +95 -0
- dolphin/core/hook/expression_evaluator.py +499 -0
- dolphin/core/hook/hook_dispatcher.py +380 -0
- dolphin/core/hook/hook_types.py +248 -0
- dolphin/core/hook/isolated_variable_pool.py +284 -0
- dolphin/core/interfaces.py +53 -0
- dolphin/core/llm/__init__.py +0 -0
- dolphin/core/llm/llm.py +495 -0
- dolphin/core/llm/llm_call.py +100 -0
- dolphin/core/llm/llm_client.py +1285 -0
- dolphin/core/llm/message_sanitizer.py +120 -0
- dolphin/core/logging/__init__.py +20 -0
- dolphin/core/logging/logger.py +526 -0
- dolphin/core/message/__init__.py +8 -0
- dolphin/core/message/compressor.py +749 -0
- dolphin/core/parser/__init__.py +8 -0
- dolphin/core/parser/parser.py +405 -0
- dolphin/core/runtime/__init__.py +10 -0
- dolphin/core/runtime/runtime_graph.py +926 -0
- dolphin/core/runtime/runtime_instance.py +446 -0
- dolphin/core/skill/__init__.py +14 -0
- dolphin/core/skill/context_retention.py +157 -0
- dolphin/core/skill/skill_function.py +686 -0
- dolphin/core/skill/skill_matcher.py +282 -0
- dolphin/core/skill/skillkit.py +700 -0
- dolphin/core/skill/skillset.py +72 -0
- dolphin/core/trajectory/__init__.py +10 -0
- dolphin/core/trajectory/recorder.py +189 -0
- dolphin/core/trajectory/trajectory.py +522 -0
- dolphin/core/utils/__init__.py +9 -0
- dolphin/core/utils/cache_kv.py +212 -0
- dolphin/core/utils/tools.py +340 -0
- dolphin/lib/__init__.py +93 -0
- dolphin/lib/debug/__init__.py +8 -0
- dolphin/lib/debug/visualizer.py +409 -0
- dolphin/lib/memory/__init__.py +28 -0
- dolphin/lib/memory/async_processor.py +220 -0
- dolphin/lib/memory/llm_calls.py +195 -0
- dolphin/lib/memory/manager.py +78 -0
- dolphin/lib/memory/sandbox.py +46 -0
- dolphin/lib/memory/storage.py +245 -0
- dolphin/lib/memory/utils.py +51 -0
- dolphin/lib/ontology/__init__.py +12 -0
- dolphin/lib/ontology/basic/__init__.py +0 -0
- dolphin/lib/ontology/basic/base.py +102 -0
- dolphin/lib/ontology/basic/concept.py +130 -0
- dolphin/lib/ontology/basic/object.py +11 -0
- dolphin/lib/ontology/basic/relation.py +63 -0
- dolphin/lib/ontology/datasource/__init__.py +27 -0
- dolphin/lib/ontology/datasource/datasource.py +66 -0
- dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
- dolphin/lib/ontology/datasource/sql.py +845 -0
- dolphin/lib/ontology/mapping.py +177 -0
- dolphin/lib/ontology/ontology.py +733 -0
- dolphin/lib/ontology/ontology_context.py +16 -0
- dolphin/lib/ontology/ontology_manager.py +107 -0
- dolphin/lib/skill_results/__init__.py +31 -0
- dolphin/lib/skill_results/cache_backend.py +559 -0
- dolphin/lib/skill_results/result_processor.py +181 -0
- dolphin/lib/skill_results/result_reference.py +179 -0
- dolphin/lib/skill_results/skillkit_hook.py +324 -0
- dolphin/lib/skill_results/strategies.py +328 -0
- dolphin/lib/skill_results/strategy_registry.py +150 -0
- dolphin/lib/skillkits/__init__.py +44 -0
- dolphin/lib/skillkits/agent_skillkit.py +155 -0
- dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
- dolphin/lib/skillkits/env_skillkit.py +250 -0
- dolphin/lib/skillkits/mcp_adapter.py +616 -0
- dolphin/lib/skillkits/mcp_skillkit.py +771 -0
- dolphin/lib/skillkits/memory_skillkit.py +650 -0
- dolphin/lib/skillkits/noop_skillkit.py +31 -0
- dolphin/lib/skillkits/ontology_skillkit.py +89 -0
- dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
- dolphin/lib/skillkits/resource/__init__.py +52 -0
- dolphin/lib/skillkits/resource/models/__init__.py +6 -0
- dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
- dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
- dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
- dolphin/lib/skillkits/resource/skill_cache.py +215 -0
- dolphin/lib/skillkits/resource/skill_loader.py +395 -0
- dolphin/lib/skillkits/resource/skill_validator.py +406 -0
- dolphin/lib/skillkits/resource_skillkit.py +11 -0
- dolphin/lib/skillkits/search_skillkit.py +163 -0
- dolphin/lib/skillkits/sql_skillkit.py +274 -0
- dolphin/lib/skillkits/system_skillkit.py +509 -0
- dolphin/lib/skillkits/vm_skillkit.py +65 -0
- dolphin/lib/utils/__init__.py +9 -0
- dolphin/lib/utils/data_process.py +207 -0
- dolphin/lib/utils/handle_progress.py +178 -0
- dolphin/lib/utils/security.py +139 -0
- dolphin/lib/utils/text_retrieval.py +462 -0
- dolphin/lib/vm/__init__.py +11 -0
- dolphin/lib/vm/env_executor.py +895 -0
- dolphin/lib/vm/python_session_manager.py +453 -0
- dolphin/lib/vm/vm.py +610 -0
- dolphin/sdk/__init__.py +60 -0
- dolphin/sdk/agent/__init__.py +12 -0
- dolphin/sdk/agent/agent_factory.py +236 -0
- dolphin/sdk/agent/dolphin_agent.py +1106 -0
- dolphin/sdk/api/__init__.py +4 -0
- dolphin/sdk/runtime/__init__.py +8 -0
- dolphin/sdk/runtime/env.py +363 -0
- dolphin/sdk/skill/__init__.py +10 -0
- dolphin/sdk/skill/global_skills.py +706 -0
- dolphin/sdk/skill/traditional_toolkit.py +260 -0
- kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
- kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
- kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
- kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
- kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
- kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,922 @@
|
|
|
1
|
+
"""ContextSnapshot Profile - Snapshot Visualization Analysis Tool
|
|
2
|
+
Follows Design Document v1.1 Specification
|
|
3
|
+
"""
|
|
4
|
+
import json
|
|
5
|
+
import zlib
|
|
6
|
+
from dataclasses import dataclass, field, asdict
|
|
7
|
+
from typing import Dict, List, Any, Optional, Tuple
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class VariableInfo:
|
|
13
|
+
"""Variable Information"""
|
|
14
|
+
name: str
|
|
15
|
+
type: str
|
|
16
|
+
size_bytes: int
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ComponentCompression:
|
|
21
|
+
"""Component Compression Information"""
|
|
22
|
+
original_bytes: int
|
|
23
|
+
compressed_bytes: int
|
|
24
|
+
compression_ratio: float
|
|
25
|
+
space_saved_ratio: float
|
|
26
|
+
compressibility: str # high / medium / low
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class MessageBuckets:
|
|
31
|
+
"""Message Bucket Statistics"""
|
|
32
|
+
by_role: Dict[str, int] = field(default_factory=dict)
|
|
33
|
+
by_role_size_bytes: Dict[str, int] = field(default_factory=dict)
|
|
34
|
+
by_size: Dict[str, int] = field(default_factory=dict)
|
|
35
|
+
by_size_bytes: Dict[str, int] = field(default_factory=dict)
|
|
36
|
+
by_content_type: Dict[str, int] = field(default_factory=dict)
|
|
37
|
+
by_content_type_size_bytes: Dict[str, int] = field(default_factory=dict)
|
|
38
|
+
by_token_range: Dict[str, int] = field(default_factory=dict)
|
|
39
|
+
tool_calls_count: int = 0
|
|
40
|
+
estimated_total_tokens: int = 0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class VariableBuckets:
|
|
45
|
+
"""Bucket Statistics for Variables"""
|
|
46
|
+
by_type: Dict[str, int] = field(default_factory=dict)
|
|
47
|
+
by_type_size_bytes: Dict[str, int] = field(default_factory=dict)
|
|
48
|
+
by_size: Dict[str, int] = field(default_factory=dict)
|
|
49
|
+
by_size_bytes: Dict[str, int] = field(default_factory=dict)
|
|
50
|
+
by_namespace: Dict[str, int] = field(default_factory=dict)
|
|
51
|
+
by_namespace_size_bytes: Dict[str, int] = field(default_factory=dict)
|
|
52
|
+
top_variables: List[VariableInfo] = field(default_factory=list)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class CompressionBuckets:
|
|
57
|
+
"""Compressed Bucket Statistics"""
|
|
58
|
+
components: Dict[str, ComponentCompression] = field(default_factory=dict)
|
|
59
|
+
highly_compressible: List[Tuple[str, int, str]] = field(default_factory=list)
|
|
60
|
+
poorly_compressible: List[Tuple[str, int, str]] = field(default_factory=list)
|
|
61
|
+
optimization_suggestions: List[str] = field(default_factory=list)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class SnapshotProfile:
|
|
66
|
+
"""Snapshot Profile Data Model"""
|
|
67
|
+
# Metadata
|
|
68
|
+
snapshot_id: str
|
|
69
|
+
frame_id: str
|
|
70
|
+
timestamp: float
|
|
71
|
+
schema_version: str = "1.1"
|
|
72
|
+
|
|
73
|
+
# Message Statistics
|
|
74
|
+
message_count: int = 0
|
|
75
|
+
message_size_bytes: int = 0
|
|
76
|
+
message_buckets: Optional[MessageBuckets] = None
|
|
77
|
+
|
|
78
|
+
# Variable Statistics
|
|
79
|
+
variable_count: int = 0
|
|
80
|
+
variable_size_bytes: int = 0
|
|
81
|
+
variable_buckets: Optional[VariableBuckets] = None
|
|
82
|
+
|
|
83
|
+
# Compress Information
|
|
84
|
+
original_size_bytes: int = 0
|
|
85
|
+
compressed_size_bytes: int = 0
|
|
86
|
+
compression_ratio: float = 0.0
|
|
87
|
+
space_saved_ratio: float = 0.0
|
|
88
|
+
compression_buckets: Optional[CompressionBuckets] = None
|
|
89
|
+
|
|
90
|
+
# Status Information
|
|
91
|
+
runtime_state_size_bytes: int = 0
|
|
92
|
+
skillkit_state_size_bytes: int = 0
|
|
93
|
+
estimated_memory_mb: float = 0.0
|
|
94
|
+
optimization_suggestions: List[str] = field(default_factory=list)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class ProfileOptions:
|
|
98
|
+
"""Profile Configuration Options"""
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
# Rendering Options
|
|
102
|
+
ascii: bool = True,
|
|
103
|
+
bar_width: int = 12,
|
|
104
|
+
max_output_kb: int = 100,
|
|
105
|
+
# Threshold Configuration
|
|
106
|
+
size_thresholds: Optional[Dict[str, List[int]]] = None,
|
|
107
|
+
token_ranges: Optional[List[int]] = None,
|
|
108
|
+
max_rows: Optional[Dict[str, int]] = None,
|
|
109
|
+
# Security and Privacy
|
|
110
|
+
redact_sensitive: bool = False,
|
|
111
|
+
sensitive_patterns: Optional[List[str]] = None,
|
|
112
|
+
redact_values: bool = False,
|
|
113
|
+
# Performance Optimization
|
|
114
|
+
enable_sampling: bool = False,
|
|
115
|
+
sampling_threshold_mb: int = 5,
|
|
116
|
+
cache_encoded: bool = True,
|
|
117
|
+
):
|
|
118
|
+
self.ascii = ascii
|
|
119
|
+
self.bar_width = bar_width
|
|
120
|
+
self.max_output_kb = max_output_kb
|
|
121
|
+
|
|
122
|
+
self.size_thresholds = size_thresholds or {
|
|
123
|
+
'message_bytes': [1000, 10000],
|
|
124
|
+
'variable_bytes': [1000, 10000]
|
|
125
|
+
}
|
|
126
|
+
self.token_ranges = token_ranges or [100, 1000]
|
|
127
|
+
self.max_rows = max_rows or {'top_variables': 10}
|
|
128
|
+
|
|
129
|
+
self.redact_sensitive = redact_sensitive
|
|
130
|
+
self.sensitive_patterns = sensitive_patterns or ['password', 'api_key', 'secret']
|
|
131
|
+
self.redact_values = redact_values
|
|
132
|
+
|
|
133
|
+
self.enable_sampling = enable_sampling
|
|
134
|
+
self.sampling_threshold_mb = sampling_threshold_mb
|
|
135
|
+
self.cache_encoded = cache_encoded
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class SnapshotProfileAnalyzer:
|
|
139
|
+
"""Snapshot Analyzer - Core Analysis Logic"""
|
|
140
|
+
|
|
141
|
+
def __init__(self, snapshot: 'ContextSnapshot', options: Optional[ProfileOptions] = None):
|
|
142
|
+
self.snapshot = snapshot
|
|
143
|
+
self.options = options or ProfileOptions()
|
|
144
|
+
self._encoded_cache: Optional[bytes] = None
|
|
145
|
+
|
|
146
|
+
def analyze(self) -> SnapshotProfile:
|
|
147
|
+
"""Analyze snapshots and generate Profile"""
|
|
148
|
+
# Calculate size
|
|
149
|
+
original_bytes = self._calc_total_size()
|
|
150
|
+
compressed_bytes = self._calc_compressed_size()
|
|
151
|
+
|
|
152
|
+
# Analyze message
|
|
153
|
+
message_buckets = self._analyze_messages()
|
|
154
|
+
message_size = self._calc_component_size(self.snapshot.messages)
|
|
155
|
+
|
|
156
|
+
# Analyze variables
|
|
157
|
+
variable_buckets = self._analyze_variables()
|
|
158
|
+
variable_size = self._calc_component_size(self.snapshot.variables)
|
|
159
|
+
|
|
160
|
+
# Analyze Compression
|
|
161
|
+
compression_buckets = self._analyze_compression()
|
|
162
|
+
|
|
163
|
+
# Calculate compression ratio
|
|
164
|
+
compression_ratio = compressed_bytes / original_bytes if original_bytes > 0 else 0.0
|
|
165
|
+
space_saved_ratio = 1.0 - compression_ratio
|
|
166
|
+
|
|
167
|
+
# State size
|
|
168
|
+
runtime_size = self._calc_component_size(self.snapshot.runtime_state)
|
|
169
|
+
skillkit_size = self._calc_component_size(self.snapshot.skillkit_state)
|
|
170
|
+
|
|
171
|
+
# Memory Estimation
|
|
172
|
+
estimated_memory = self._estimate_memory(original_bytes)
|
|
173
|
+
|
|
174
|
+
# Generate optimization suggestions
|
|
175
|
+
suggestions = self._generate_suggestions(variable_buckets, message_buckets)
|
|
176
|
+
|
|
177
|
+
return SnapshotProfile(
|
|
178
|
+
snapshot_id=self.snapshot.snapshot_id,
|
|
179
|
+
frame_id=self.snapshot.frame_id,
|
|
180
|
+
timestamp=self.snapshot.timestamp,
|
|
181
|
+
schema_version="1.1",
|
|
182
|
+
message_count=len(self.snapshot.messages),
|
|
183
|
+
message_size_bytes=message_size,
|
|
184
|
+
message_buckets=message_buckets,
|
|
185
|
+
variable_count=len(self.snapshot.variables),
|
|
186
|
+
variable_size_bytes=variable_size,
|
|
187
|
+
variable_buckets=variable_buckets,
|
|
188
|
+
original_size_bytes=original_bytes,
|
|
189
|
+
compressed_size_bytes=compressed_bytes,
|
|
190
|
+
compression_ratio=compression_ratio,
|
|
191
|
+
space_saved_ratio=space_saved_ratio,
|
|
192
|
+
compression_buckets=compression_buckets,
|
|
193
|
+
runtime_state_size_bytes=runtime_size,
|
|
194
|
+
skillkit_state_size_bytes=skillkit_size,
|
|
195
|
+
estimated_memory_mb=estimated_memory,
|
|
196
|
+
optimization_suggestions=suggestions,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def _calc_size(self, obj: Any) -> int:
|
|
200
|
+
"""Calculate the byte size after serializing an object"""
|
|
201
|
+
try:
|
|
202
|
+
serialized = json.dumps(obj, ensure_ascii=False)
|
|
203
|
+
return len(serialized.encode('utf-8'))
|
|
204
|
+
except (TypeError, ValueError):
|
|
205
|
+
# Unserializable object, attempted repr
|
|
206
|
+
try:
|
|
207
|
+
repr_str = repr(obj)[:1000] # Up to 1000 characters
|
|
208
|
+
return len(f"<non-serializable: {type(obj).__name__}>".encode('utf-8'))
|
|
209
|
+
except:
|
|
210
|
+
return 50 # Default placeholder
|
|
211
|
+
|
|
212
|
+
def _calc_component_size(self, component: Any) -> int:
|
|
213
|
+
"""Calculate component size"""
|
|
214
|
+
return self._calc_size(component)
|
|
215
|
+
|
|
216
|
+
def _calc_total_size(self) -> int:
|
|
217
|
+
"""Calculate total size"""
|
|
218
|
+
if self.options.cache_encoded and self._encoded_cache is None:
|
|
219
|
+
encoded = self.snapshot.encode()
|
|
220
|
+
self._encoded_cache = json.dumps(encoded, ensure_ascii=False).encode('utf-8')
|
|
221
|
+
|
|
222
|
+
if self._encoded_cache:
|
|
223
|
+
return len(self._encoded_cache)
|
|
224
|
+
|
|
225
|
+
return self._calc_size(self.snapshot.encode())
|
|
226
|
+
|
|
227
|
+
def _calc_compressed_size(self) -> int:
|
|
228
|
+
"""Calculate compressed size"""
|
|
229
|
+
if self.options.cache_encoded and self._encoded_cache:
|
|
230
|
+
return len(zlib.compress(self._encoded_cache, level=6))
|
|
231
|
+
|
|
232
|
+
encoded = json.dumps(self.snapshot.encode(), ensure_ascii=False).encode('utf-8')
|
|
233
|
+
return len(zlib.compress(encoded, level=6))
|
|
234
|
+
|
|
235
|
+
def _estimate_memory(self, size_bytes: int) -> float:
|
|
236
|
+
"""Estimate memory usage (MB)"""
|
|
237
|
+
return size_bytes * 3.0 / 1e6
|
|
238
|
+
|
|
239
|
+
def _estimate_tokens(self, text: str) -> int:
|
|
240
|
+
"""Estimate token count (approximate)"""
|
|
241
|
+
if not text:
|
|
242
|
+
return 0
|
|
243
|
+
|
|
244
|
+
# Count character types
|
|
245
|
+
ascii_count = sum(1 for c in text if ord(c) < 128)
|
|
246
|
+
non_ascii_count = len(text) - ascii_count
|
|
247
|
+
|
|
248
|
+
# Mixed text adopts a conservative estimate
|
|
249
|
+
if non_ascii_count > 0:
|
|
250
|
+
return len(text) // 2 # Chinese/Japanese/Korean
|
|
251
|
+
else:
|
|
252
|
+
return len(text) // 4 # Pure ASCII
|
|
253
|
+
|
|
254
|
+
def _estimate_message_tokens(self, message: Dict) -> int:
|
|
255
|
+
"""Estimate the number of tokens in a message"""
|
|
256
|
+
text_parts = []
|
|
257
|
+
|
|
258
|
+
# Extract text content
|
|
259
|
+
if 'content' in message:
|
|
260
|
+
content = message['content']
|
|
261
|
+
if isinstance(content, str):
|
|
262
|
+
text_parts.append(content)
|
|
263
|
+
elif isinstance(content, list):
|
|
264
|
+
for item in content:
|
|
265
|
+
if isinstance(item, dict) and 'text' in item:
|
|
266
|
+
text_parts.append(item['text'])
|
|
267
|
+
|
|
268
|
+
# Tool Calling
|
|
269
|
+
if 'tool_calls' in message:
|
|
270
|
+
text_parts.append(json.dumps(message['tool_calls']))
|
|
271
|
+
|
|
272
|
+
combined_text = ' '.join(text_parts)
|
|
273
|
+
return self._estimate_tokens(combined_text)
|
|
274
|
+
|
|
275
|
+
def _extract_namespace(self, var_name: str) -> str:
|
|
276
|
+
"""Extract variable namespace"""
|
|
277
|
+
# Handling None or empty strings
|
|
278
|
+
if not var_name:
|
|
279
|
+
return '(unnamed)'
|
|
280
|
+
|
|
281
|
+
if var_name.startswith('__'):
|
|
282
|
+
return '__builtin__'
|
|
283
|
+
if var_name.startswith('_'):
|
|
284
|
+
return '_private'
|
|
285
|
+
|
|
286
|
+
if '_' in var_name:
|
|
287
|
+
return var_name.split('_')[0] + '_'
|
|
288
|
+
|
|
289
|
+
return '(other)'
|
|
290
|
+
|
|
291
|
+
def _get_size_bucket(self, size_bytes: int, thresholds: List[int]) -> str:
|
|
292
|
+
"""Get size buckets"""
|
|
293
|
+
if size_bytes < thresholds[0]:
|
|
294
|
+
return f'<{thresholds[0] // 1000} KB'
|
|
295
|
+
elif size_bytes < thresholds[1]:
|
|
296
|
+
return f'{thresholds[0] // 1000}–{thresholds[1] // 1000} KB'
|
|
297
|
+
else:
|
|
298
|
+
return f'>={thresholds[1] // 1000} KB'
|
|
299
|
+
|
|
300
|
+
def _get_token_bucket(self, tokens: int) -> str:
|
|
301
|
+
"""Get token buckets"""
|
|
302
|
+
ranges = self.options.token_ranges
|
|
303
|
+
if tokens < ranges[0]:
|
|
304
|
+
return f'<{ranges[0]}'
|
|
305
|
+
elif tokens < ranges[1]:
|
|
306
|
+
return f'{ranges[0]}–{ranges[1]}'
|
|
307
|
+
else:
|
|
308
|
+
return f'>={ranges[1]}'
|
|
309
|
+
|
|
310
|
+
def _get_compressibility(self, compression_ratio: float) -> str:
|
|
311
|
+
"""Get compressibility classification"""
|
|
312
|
+
if compression_ratio < 0.4:
|
|
313
|
+
return 'high'
|
|
314
|
+
elif compression_ratio < 0.7:
|
|
315
|
+
return 'medium'
|
|
316
|
+
else:
|
|
317
|
+
return 'low'
|
|
318
|
+
|
|
319
|
+
def _analyze_messages(self) -> MessageBuckets:
|
|
320
|
+
"""Analyze message"""
|
|
321
|
+
buckets = MessageBuckets()
|
|
322
|
+
|
|
323
|
+
for msg in self.snapshot.messages:
|
|
324
|
+
# Role Bucket
|
|
325
|
+
role = msg.get('role', 'unknown')
|
|
326
|
+
buckets.by_role[role] = buckets.by_role.get(role, 0) + 1
|
|
327
|
+
|
|
328
|
+
# Size
|
|
329
|
+
msg_size = self._calc_size(msg)
|
|
330
|
+
buckets.by_role_size_bytes[role] = buckets.by_role_size_bytes.get(role, 0) + msg_size
|
|
331
|
+
|
|
332
|
+
# Bucket Size
|
|
333
|
+
size_bucket = self._get_size_bucket(msg_size, self.options.size_thresholds['message_bytes'])
|
|
334
|
+
buckets.by_size[size_bucket] = buckets.by_size.get(size_bucket, 0) + 1
|
|
335
|
+
buckets.by_size_bytes[size_bucket] = buckets.by_size_bytes.get(size_bucket, 0) + msg_size
|
|
336
|
+
|
|
337
|
+
# Content Type
|
|
338
|
+
content_type = 'text'
|
|
339
|
+
if 'tool_calls' in msg:
|
|
340
|
+
content_type = 'tool_call'
|
|
341
|
+
buckets.tool_calls_count += len(msg['tool_calls'])
|
|
342
|
+
elif role == 'tool':
|
|
343
|
+
content_type = 'tool_response'
|
|
344
|
+
|
|
345
|
+
buckets.by_content_type[content_type] = buckets.by_content_type.get(content_type, 0) + 1
|
|
346
|
+
buckets.by_content_type_size_bytes[content_type] = buckets.by_content_type_size_bytes.get(content_type, 0) + msg_size
|
|
347
|
+
|
|
348
|
+
# Token Estimation
|
|
349
|
+
tokens = self._estimate_message_tokens(msg)
|
|
350
|
+
buckets.estimated_total_tokens += tokens
|
|
351
|
+
|
|
352
|
+
token_bucket = self._get_token_bucket(tokens)
|
|
353
|
+
buckets.by_token_range[token_bucket] = buckets.by_token_range.get(token_bucket, 0) + 1
|
|
354
|
+
|
|
355
|
+
return buckets
|
|
356
|
+
|
|
357
|
+
def _analyze_variables(self) -> VariableBuckets:
|
|
358
|
+
"""Analyze variables"""
|
|
359
|
+
buckets = VariableBuckets()
|
|
360
|
+
var_info_list = []
|
|
361
|
+
|
|
362
|
+
for name, value in self.snapshot.variables.items():
|
|
363
|
+
# Type
|
|
364
|
+
var_type = type(value).__name__
|
|
365
|
+
buckets.by_type[var_type] = buckets.by_type.get(var_type, 0) + 1
|
|
366
|
+
|
|
367
|
+
# Size
|
|
368
|
+
var_size = self._calc_size(value)
|
|
369
|
+
buckets.by_type_size_bytes[var_type] = buckets.by_type_size_bytes.get(var_type, 0) + var_size
|
|
370
|
+
|
|
371
|
+
# Bucket Size
|
|
372
|
+
size_bucket = self._get_size_bucket(var_size, self.options.size_thresholds['variable_bytes'])
|
|
373
|
+
buckets.by_size[size_bucket] = buckets.by_size.get(size_bucket, 0) + 1
|
|
374
|
+
buckets.by_size_bytes[size_bucket] = buckets.by_size_bytes.get(size_bucket, 0) + var_size
|
|
375
|
+
|
|
376
|
+
# Namespace
|
|
377
|
+
namespace = self._extract_namespace(name)
|
|
378
|
+
buckets.by_namespace[namespace] = buckets.by_namespace.get(namespace, 0) + 1
|
|
379
|
+
buckets.by_namespace_size_bytes[namespace] = buckets.by_namespace_size_bytes.get(namespace, 0) + var_size
|
|
380
|
+
|
|
381
|
+
# Collect information for Top N
|
|
382
|
+
var_info_list.append(VariableInfo(name=name, type=var_type, size_bytes=var_size))
|
|
383
|
+
|
|
384
|
+
# Top N
|
|
385
|
+
var_info_list.sort(key=lambda x: x.size_bytes, reverse=True)
|
|
386
|
+
max_top = self.options.max_rows.get('top_variables', 10)
|
|
387
|
+
buckets.top_variables = var_info_list[:max_top]
|
|
388
|
+
|
|
389
|
+
return buckets
|
|
390
|
+
|
|
391
|
+
def _analyze_compression(self) -> CompressionBuckets:
|
|
392
|
+
"""Analyze Compression"""
|
|
393
|
+
buckets = CompressionBuckets()
|
|
394
|
+
|
|
395
|
+
# Analyze each component (including context_manager_state)
|
|
396
|
+
components = {
|
|
397
|
+
'messages': self.snapshot.messages,
|
|
398
|
+
'variables': self.snapshot.variables,
|
|
399
|
+
'runtime_state': self.snapshot.runtime_state,
|
|
400
|
+
'skillkit_state': self.snapshot.skillkit_state,
|
|
401
|
+
'context_manager_state': self.snapshot.context_manager_state,
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
for name, component in components.items():
|
|
405
|
+
original = self._calc_component_size(component)
|
|
406
|
+
compressed = len(zlib.compress(json.dumps(component, ensure_ascii=False).encode('utf-8'), level=6))
|
|
407
|
+
|
|
408
|
+
ratio = compressed / original if original > 0 else 1.0
|
|
409
|
+
saved = 1.0 - ratio
|
|
410
|
+
compressibility = self._get_compressibility(ratio)
|
|
411
|
+
|
|
412
|
+
buckets.components[name] = ComponentCompression(
|
|
413
|
+
original_bytes=original,
|
|
414
|
+
compressed_bytes=compressed,
|
|
415
|
+
compression_ratio=ratio,
|
|
416
|
+
space_saved_ratio=saved,
|
|
417
|
+
compressibility=compressibility,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
# High/Low Compressibility
|
|
421
|
+
if compressibility == 'high':
|
|
422
|
+
buckets.highly_compressible.append((name, original, f"{ratio:.1%} compression ratio"))
|
|
423
|
+
elif compressibility == 'low':
|
|
424
|
+
buckets.poorly_compressible.append((name, original, f"{ratio:.1%} compression ratio"))
|
|
425
|
+
|
|
426
|
+
return buckets
|
|
427
|
+
|
|
428
|
+
def _generate_suggestions(self, var_buckets: VariableBuckets, msg_buckets: MessageBuckets) -> List[str]:
|
|
429
|
+
"""Generate optimization suggestions"""
|
|
430
|
+
suggestions = []
|
|
431
|
+
|
|
432
|
+
# Top Variable Suggestions
|
|
433
|
+
if var_buckets.top_variables:
|
|
434
|
+
top_var = var_buckets.top_variables[0]
|
|
435
|
+
if top_var.size_bytes > 3000: # > 3KB
|
|
436
|
+
suggestions.append(
|
|
437
|
+
f"{top_var.name} ({top_var.size_bytes / 1000:.2f} KB) - Consider incremental snapshot or partial loading"
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
# Temporary Variable Cleanup
|
|
441
|
+
temp_count = var_buckets.by_namespace.get('temp_', 0)
|
|
442
|
+
if temp_count > 0:
|
|
443
|
+
temp_size = var_buckets.by_namespace_size_bytes.get('temp_', 0)
|
|
444
|
+
suggestions.append(
|
|
445
|
+
f"{temp_count} temp_* variables ({temp_size / 1000:.2f} KB) - Cleanup before snapshot"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
# Message Suggestions
|
|
449
|
+
assistant_size = msg_buckets.by_role_size_bytes.get('assistant', 0)
|
|
450
|
+
if assistant_size > 5000: # > 5KB
|
|
451
|
+
suggestions.append(
|
|
452
|
+
f"Assistant messages ({assistant_size / 1000:.2f} KB) - Consider archiving older messages"
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
return suggestions
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def dataclass_to_dict(obj: Any) -> Any:
|
|
459
|
+
"""Recursively convert dataclass to dictionary"""
|
|
460
|
+
if hasattr(obj, '__dataclass_fields__'):
|
|
461
|
+
result = {}
|
|
462
|
+
for key, value in asdict(obj).items():
|
|
463
|
+
result[key] = dataclass_to_dict(value)
|
|
464
|
+
return result
|
|
465
|
+
elif isinstance(obj, dict):
|
|
466
|
+
return {k: dataclass_to_dict(v) for k, v in obj.items()}
|
|
467
|
+
elif isinstance(obj, list):
|
|
468
|
+
return [dataclass_to_dict(item) for item in obj]
|
|
469
|
+
elif isinstance(obj, tuple):
|
|
470
|
+
return tuple(dataclass_to_dict(item) for item in obj)
|
|
471
|
+
else:
|
|
472
|
+
return obj
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
class MarkdownFormatter:
|
|
476
|
+
"""Markdown formatted output"""
|
|
477
|
+
|
|
478
|
+
def __init__(self, profile: SnapshotProfile, title: Optional[str] = None, options: Optional[ProfileOptions] = None):
|
|
479
|
+
self.profile = profile
|
|
480
|
+
self.title = title or "ContextSnapshot Profile"
|
|
481
|
+
self.options = options or ProfileOptions()
|
|
482
|
+
|
|
483
|
+
def format(self) -> str:
|
|
484
|
+
"""Generate Markdown output"""
|
|
485
|
+
sections = [
|
|
486
|
+
self._format_header(),
|
|
487
|
+
self._format_overview(),
|
|
488
|
+
self._format_messages(),
|
|
489
|
+
self._format_variables(),
|
|
490
|
+
self._format_compression(),
|
|
491
|
+
self._format_runtime_skillkit(),
|
|
492
|
+
self._format_summary(),
|
|
493
|
+
]
|
|
494
|
+
return '\n\n'.join(sections)
|
|
495
|
+
|
|
496
|
+
def _format_header(self) -> str:
|
|
497
|
+
"""Formatting Titles"""
|
|
498
|
+
timestamp_str = datetime.fromtimestamp(self.profile.timestamp).strftime('%Y-%m-%d %H:%M:%S')
|
|
499
|
+
return f"""# {self.title}
|
|
500
|
+
|
|
501
|
+
Snapshot ID: `{self.profile.snapshot_id[:8]}` | Frame ID: `{self.profile.frame_id[:8]}`
|
|
502
|
+
Timestamp: {timestamp_str} | Schema Version: {self.profile.schema_version}
|
|
503
|
+
|
|
504
|
+
---"""
|
|
505
|
+
|
|
506
|
+
def _format_overview(self) -> str:
|
|
507
|
+
"""Formatting Overview"""
|
|
508
|
+
p = self.profile
|
|
509
|
+
return f"""## Overview
|
|
510
|
+
|
|
511
|
+
| Metric | Value |
|
|
512
|
+
|-----------------------|--------------|
|
|
513
|
+
| Original Size | {self._format_bytes(p.original_size_bytes)} |
|
|
514
|
+
| Compressed Size | {self._format_bytes(p.compressed_size_bytes)} |
|
|
515
|
+
| Compression Ratio | {p.compression_ratio:.1%} |
|
|
516
|
+
| Space Saved Ratio | {p.space_saved_ratio:.1%} |
|
|
517
|
+
| Estimated Memory (MB) | ~{p.estimated_memory_mb:.3f} MB |
|
|
518
|
+
|
|
519
|
+
---"""
|
|
520
|
+
|
|
521
|
+
def _format_messages(self) -> str:
|
|
522
|
+
"""Message Statistics Formatting"""
|
|
523
|
+
p = self.profile
|
|
524
|
+
b = p.message_buckets
|
|
525
|
+
|
|
526
|
+
if not b or p.message_count == 0:
|
|
527
|
+
return "## Messages\n\nNo messages in snapshot."
|
|
528
|
+
|
|
529
|
+
sections = [
|
|
530
|
+
f"## Messages ({p.message_count} items, {self._format_bytes(p.message_size_bytes)})",
|
|
531
|
+
"",
|
|
532
|
+
"### Bucket: By Role",
|
|
533
|
+
"",
|
|
534
|
+
self._format_table(
|
|
535
|
+
['Role', 'Count', '% (by count)', 'Size (KB)', '% (by bytes)'],
|
|
536
|
+
self._build_role_rows(b)
|
|
537
|
+
),
|
|
538
|
+
"",
|
|
539
|
+
"### Bucket: By Size",
|
|
540
|
+
"",
|
|
541
|
+
self._format_table(
|
|
542
|
+
['Range', 'Count', '% (by count)', 'Size (KB)'],
|
|
543
|
+
self._build_size_rows(b.by_size, b.by_size_bytes, p.message_count, 'message_bytes')
|
|
544
|
+
),
|
|
545
|
+
"",
|
|
546
|
+
"### Bucket: By Content Type",
|
|
547
|
+
"",
|
|
548
|
+
self._format_table(
|
|
549
|
+
['Type', 'Count', '% (by count)', 'Size (KB)'],
|
|
550
|
+
self._build_content_type_rows(b)
|
|
551
|
+
),
|
|
552
|
+
"",
|
|
553
|
+
"### Bucket: By Token Range",
|
|
554
|
+
"",
|
|
555
|
+
self._format_table(
|
|
556
|
+
['Range', 'Count', '% (by count)'],
|
|
557
|
+
self._build_token_range_rows(b)
|
|
558
|
+
),
|
|
559
|
+
"",
|
|
560
|
+
f"Stats: tool_calls = {b.tool_calls_count} • estimated_total_tokens ≈ {b.estimated_total_tokens:,}",
|
|
561
|
+
]
|
|
562
|
+
|
|
563
|
+
return '\n'.join(sections) + "\n\n---"
|
|
564
|
+
|
|
565
|
+
def _format_variables(self) -> str:
|
|
566
|
+
"""Formatting Variable Statistics"""
|
|
567
|
+
p = self.profile
|
|
568
|
+
b = p.variable_buckets
|
|
569
|
+
|
|
570
|
+
if not b or p.variable_count == 0:
|
|
571
|
+
return "## Variables\n\nNo variables in snapshot."
|
|
572
|
+
|
|
573
|
+
sections = [
|
|
574
|
+
f"## Variables ({p.variable_count} items, {self._format_bytes(p.variable_size_bytes)})",
|
|
575
|
+
"",
|
|
576
|
+
"### Bucket: By Type",
|
|
577
|
+
"",
|
|
578
|
+
self._format_table(
|
|
579
|
+
['Type', 'Count', '% (by count)', 'Size (KB)', '% (by bytes)'],
|
|
580
|
+
self._build_type_rows(b, p.variable_size_bytes)
|
|
581
|
+
),
|
|
582
|
+
"",
|
|
583
|
+
"### Bucket: By Size",
|
|
584
|
+
"",
|
|
585
|
+
self._format_table(
|
|
586
|
+
['Range', 'Count', '% (by count)', 'Size (KB)'],
|
|
587
|
+
self._build_size_rows(b.by_size, b.by_size_bytes, p.variable_count, 'variable_bytes')
|
|
588
|
+
),
|
|
589
|
+
"",
|
|
590
|
+
"### Bucket: By Namespace",
|
|
591
|
+
"",
|
|
592
|
+
self._format_table(
|
|
593
|
+
['Namespace', 'Count', '% (by count)', 'Size (KB)', 'Note'],
|
|
594
|
+
self._build_namespace_rows(b)
|
|
595
|
+
),
|
|
596
|
+
]
|
|
597
|
+
|
|
598
|
+
# Top Variables
|
|
599
|
+
if b.top_variables:
|
|
600
|
+
sections.extend([
|
|
601
|
+
"",
|
|
602
|
+
"### Top 10 Largest Variables",
|
|
603
|
+
"",
|
|
604
|
+
self._format_table(
|
|
605
|
+
['Rank', 'Name', 'Type', 'Size (KB)', '% (of total)'],
|
|
606
|
+
self._build_top_variables_rows(b.top_variables, p.variable_size_bytes)
|
|
607
|
+
),
|
|
608
|
+
])
|
|
609
|
+
|
|
610
|
+
return '\n'.join(sections) + "\n\n---"
|
|
611
|
+
|
|
612
|
+
def _format_compression(self) -> str:
|
|
613
|
+
"""Formatting compression statistics"""
|
|
614
|
+
p = self.profile
|
|
615
|
+
b = p.compression_buckets
|
|
616
|
+
|
|
617
|
+
if not b:
|
|
618
|
+
return ""
|
|
619
|
+
|
|
620
|
+
sections = [
|
|
621
|
+
f"## Compression ({self._format_bytes(p.original_size_bytes)} → {self._format_bytes(p.compressed_size_bytes)})",
|
|
622
|
+
"",
|
|
623
|
+
"### By Component",
|
|
624
|
+
"",
|
|
625
|
+
self._format_table(
|
|
626
|
+
['Component', 'Original (KB)', 'Compressed (KB)', 'Compression Ratio', 'Space Saved Ratio', 'Compressibility'],
|
|
627
|
+
self._build_compression_rows(b)
|
|
628
|
+
),
|
|
629
|
+
]
|
|
630
|
+
|
|
631
|
+
# Optimization Suggestions
|
|
632
|
+
if p.optimization_suggestions:
|
|
633
|
+
sections.extend([
|
|
634
|
+
"",
|
|
635
|
+
"### Optimization Suggestions",
|
|
636
|
+
"",
|
|
637
|
+
*[f"- {suggestion}" for suggestion in p.optimization_suggestions]
|
|
638
|
+
])
|
|
639
|
+
|
|
640
|
+
return '\n'.join(sections) + "\n\n---"
|
|
641
|
+
|
|
642
|
+
def _format_runtime_skillkit(self) -> str:
|
|
643
|
+
"""Formatting runtime and Skillkit status"""
|
|
644
|
+
p = self.profile
|
|
645
|
+
b = p.compression_buckets
|
|
646
|
+
|
|
647
|
+
runtime_comp = b.components.get('runtime_state') if b else None
|
|
648
|
+
skillkit_comp = b.components.get('skillkit_state') if b else None
|
|
649
|
+
|
|
650
|
+
lines = ["## Runtime & Skillkit", ""]
|
|
651
|
+
|
|
652
|
+
if runtime_comp:
|
|
653
|
+
lines.append(f"- Runtime State: {self._format_bytes(runtime_comp.original_bytes)} → "
|
|
654
|
+
f"{self._format_bytes(runtime_comp.compressed_bytes)} "
|
|
655
|
+
f"(compression_ratio {runtime_comp.compression_ratio:.1%}, {runtime_comp.compressibility})")
|
|
656
|
+
|
|
657
|
+
if skillkit_comp:
|
|
658
|
+
if skillkit_comp.compression_ratio >= 0.99:
|
|
659
|
+
lines.append(f"- Skillkit State: {self._format_bytes(skillkit_comp.original_bytes)} "
|
|
660
|
+
f"(uncompressed, {skillkit_comp.compressibility} compressibility)")
|
|
661
|
+
else:
|
|
662
|
+
lines.append(f"- Skillkit State: {self._format_bytes(skillkit_comp.original_bytes)} → "
|
|
663
|
+
f"{self._format_bytes(skillkit_comp.compressed_bytes)} "
|
|
664
|
+
f"(compression_ratio {skillkit_comp.compression_ratio:.1%}, {skillkit_comp.compressibility})")
|
|
665
|
+
|
|
666
|
+
return '\n'.join(lines) + "\n\n---"
|
|
667
|
+
|
|
668
|
+
def _format_summary(self) -> str:
|
|
669
|
+
"""Formatting Summary"""
|
|
670
|
+
p = self.profile
|
|
671
|
+
b = p.message_buckets
|
|
672
|
+
|
|
673
|
+
ns_count = len(p.variable_buckets.by_namespace) if p.variable_buckets else 0
|
|
674
|
+
tool_calls = b.tool_calls_count if b else 0
|
|
675
|
+
total_tokens = b.estimated_total_tokens if b else 0
|
|
676
|
+
|
|
677
|
+
return f"""## Summary
|
|
678
|
+
|
|
679
|
+
- Messages: {p.message_count} (tool_calls={tool_calls}, ~{total_tokens / 1000:.1f}K tokens)
|
|
680
|
+
- Variables: {p.variable_count} (namespaces={ns_count})
|
|
681
|
+
- Size: {self._format_bytes(p.original_size_bytes)} → {self._format_bytes(p.compressed_size_bytes)} (space_saved_ratio {p.space_saved_ratio:.1%})
|
|
682
|
+
- Estimated Memory: ~{p.estimated_memory_mb:.3f} MB"""
|
|
683
|
+
|
|
684
|
+
def _format_bytes(self, bytes_count: int) -> str:
|
|
685
|
+
"""Format byte count as KB/MB"""
|
|
686
|
+
if bytes_count < 1000:
|
|
687
|
+
return f"{bytes_count} B"
|
|
688
|
+
elif bytes_count < 1000000:
|
|
689
|
+
return f"{bytes_count / 1000:.2f} KB"
|
|
690
|
+
else:
|
|
691
|
+
return f"{bytes_count / 1000000:.2f} MB"
|
|
692
|
+
|
|
693
|
+
def _format_table(self, headers: List[str], rows: List[List[str]]) -> str:
|
|
694
|
+
"""Formatted Table"""
|
|
695
|
+
if not rows:
|
|
696
|
+
return "*(no data)*"
|
|
697
|
+
|
|
698
|
+
# Calculate column width
|
|
699
|
+
col_widths = [len(h) for h in headers]
|
|
700
|
+
for row in rows:
|
|
701
|
+
for i, cell in enumerate(row):
|
|
702
|
+
col_widths[i] = max(col_widths[i], len(str(cell)))
|
|
703
|
+
|
|
704
|
+
# Formatting Header
|
|
705
|
+
header_line = '| ' + ' | '.join(h.ljust(col_widths[i]) for i, h in enumerate(headers)) + ' |'
|
|
706
|
+
separator = '|' + '|'.join('-' * (w + 2) for w in col_widths) + '|'
|
|
707
|
+
|
|
708
|
+
# Format data line
|
|
709
|
+
data_lines = []
|
|
710
|
+
for row in rows:
|
|
711
|
+
line = '| ' + ' | '.join(str(cell).ljust(col_widths[i]) for i, cell in enumerate(row)) + ' |'
|
|
712
|
+
data_lines.append(line)
|
|
713
|
+
|
|
714
|
+
return '\n'.join([header_line, separator] + data_lines)
|
|
715
|
+
|
|
716
|
+
def _build_role_rows(self, buckets: MessageBuckets) -> List[List[str]]:
|
|
717
|
+
"""Build character bucket table row"""
|
|
718
|
+
rows = []
|
|
719
|
+
total_count = sum(buckets.by_role.values())
|
|
720
|
+
total_size = sum(buckets.by_role_size_bytes.values())
|
|
721
|
+
|
|
722
|
+
for role in sorted(buckets.by_role.keys()):
|
|
723
|
+
count = buckets.by_role[role]
|
|
724
|
+
size = buckets.by_role_size_bytes.get(role, 0)
|
|
725
|
+
rows.append([
|
|
726
|
+
role,
|
|
727
|
+
str(count),
|
|
728
|
+
f"{count / total_count * 100:.1f}%" if total_count > 0 else "0%",
|
|
729
|
+
f"{size / 1000:.2f}",
|
|
730
|
+
f"{size / total_size * 100:.1f}%" if total_size > 0 else "0%",
|
|
731
|
+
])
|
|
732
|
+
|
|
733
|
+
# Total lines
|
|
734
|
+
rows.append([
|
|
735
|
+
'Total',
|
|
736
|
+
str(total_count),
|
|
737
|
+
'100%',
|
|
738
|
+
f"{total_size / 1000:.2f}",
|
|
739
|
+
'100%',
|
|
740
|
+
])
|
|
741
|
+
|
|
742
|
+
return rows
|
|
743
|
+
|
|
744
|
+
def _get_size_bucket_order(self, thresholds: List[int]) -> List[str]:
|
|
745
|
+
"""Dynamically generate bucket order by threshold"""
|
|
746
|
+
return [
|
|
747
|
+
f'<{thresholds[0] // 1000} KB',
|
|
748
|
+
f'{thresholds[0] // 1000}–{thresholds[1] // 1000} KB',
|
|
749
|
+
f'>={thresholds[1] // 1000} KB'
|
|
750
|
+
]
|
|
751
|
+
|
|
752
|
+
def _get_token_bucket_order(self, ranges: List[int]) -> List[str]:
|
|
753
|
+
"""Dynamically generate token bucket order according to configuration"""
|
|
754
|
+
return [
|
|
755
|
+
f'<{ranges[0]}',
|
|
756
|
+
f'{ranges[0]}–{ranges[1]}',
|
|
757
|
+
f'>={ranges[1]}'
|
|
758
|
+
]
|
|
759
|
+
|
|
760
|
+
def _build_size_rows(self, by_size: Dict, by_size_bytes: Dict, total_count: int, threshold_key: str = 'message_bytes') -> List[List[str]]:
|
|
761
|
+
"""Build size bucket table row"""
|
|
762
|
+
rows = []
|
|
763
|
+
thresholds = self.options.size_thresholds.get(threshold_key, [1000, 10000])
|
|
764
|
+
order = self._get_size_bucket_order(thresholds)
|
|
765
|
+
|
|
766
|
+
for bucket in order:
|
|
767
|
+
count = by_size.get(bucket, 0)
|
|
768
|
+
size = by_size_bytes.get(bucket, 0)
|
|
769
|
+
if count > 0 or bucket in by_size:
|
|
770
|
+
rows.append([
|
|
771
|
+
bucket,
|
|
772
|
+
str(count),
|
|
773
|
+
f"{count / total_count * 100:.1f}%" if total_count > 0 else "0%",
|
|
774
|
+
f"{size / 1000:.2f}",
|
|
775
|
+
])
|
|
776
|
+
|
|
777
|
+
return rows
|
|
778
|
+
|
|
779
|
+
def _build_content_type_rows(self, buckets: MessageBuckets) -> List[List[str]]:
|
|
780
|
+
"""Build content type table row"""
|
|
781
|
+
rows = []
|
|
782
|
+
total_count = sum(buckets.by_content_type.values())
|
|
783
|
+
|
|
784
|
+
for content_type in sorted(buckets.by_content_type.keys()):
|
|
785
|
+
count = buckets.by_content_type[content_type]
|
|
786
|
+
size = buckets.by_content_type_size_bytes.get(content_type, 0)
|
|
787
|
+
rows.append([
|
|
788
|
+
content_type,
|
|
789
|
+
str(count),
|
|
790
|
+
f"{count / total_count * 100:.1f}%" if total_count > 0 else "0%",
|
|
791
|
+
f"{size / 1000:.2f}",
|
|
792
|
+
])
|
|
793
|
+
|
|
794
|
+
return rows
|
|
795
|
+
|
|
796
|
+
def _build_token_range_rows(self, buckets: MessageBuckets) -> List[List[str]]:
|
|
797
|
+
"""Build token range table row"""
|
|
798
|
+
rows = []
|
|
799
|
+
total_count = sum(buckets.by_token_range.values())
|
|
800
|
+
|
|
801
|
+
order = self._get_token_bucket_order(self.options.token_ranges)
|
|
802
|
+
for bucket in order:
|
|
803
|
+
count = buckets.by_token_range.get(bucket, 0)
|
|
804
|
+
if count > 0 or bucket in buckets.by_token_range:
|
|
805
|
+
rows.append([
|
|
806
|
+
bucket,
|
|
807
|
+
str(count),
|
|
808
|
+
f"{count / total_count * 100:.1f}%" if total_count > 0 else "0%",
|
|
809
|
+
])
|
|
810
|
+
|
|
811
|
+
return rows
|
|
812
|
+
|
|
813
|
+
def _build_type_rows(self, buckets: VariableBuckets, total_size: int) -> List[List[str]]:
|
|
814
|
+
"""Build type bucket table row"""
|
|
815
|
+
rows = []
|
|
816
|
+
total_count = sum(buckets.by_type.values())
|
|
817
|
+
|
|
818
|
+
for var_type in sorted(buckets.by_type.keys()):
|
|
819
|
+
count = buckets.by_type[var_type]
|
|
820
|
+
size = buckets.by_type_size_bytes.get(var_type, 0)
|
|
821
|
+
rows.append([
|
|
822
|
+
var_type,
|
|
823
|
+
str(count),
|
|
824
|
+
f"{count / total_count * 100:.1f}%" if total_count > 0 else "0%",
|
|
825
|
+
f"{size / 1000:.2f}",
|
|
826
|
+
f"{size / total_size * 100:.1f}%" if total_size > 0 else "0%",
|
|
827
|
+
])
|
|
828
|
+
|
|
829
|
+
# Total lines
|
|
830
|
+
rows.append([
|
|
831
|
+
'Total',
|
|
832
|
+
str(total_count),
|
|
833
|
+
'100%',
|
|
834
|
+
f"{total_size / 1000:.2f}",
|
|
835
|
+
'100%',
|
|
836
|
+
])
|
|
837
|
+
|
|
838
|
+
return rows
|
|
839
|
+
|
|
840
|
+
def _build_namespace_rows(self, buckets: VariableBuckets) -> List[List[str]]:
|
|
841
|
+
"""Build namespace table row"""
|
|
842
|
+
rows = []
|
|
843
|
+
total_count = sum(buckets.by_namespace.values())
|
|
844
|
+
|
|
845
|
+
# Special namespace priority
|
|
846
|
+
priority = ['temp_', 'result_', 'cache_', '_private']
|
|
847
|
+
other_namespaces = [ns for ns in buckets.by_namespace.keys() if ns not in priority and ns != '(other)']
|
|
848
|
+
|
|
849
|
+
for ns in priority + sorted(other_namespaces):
|
|
850
|
+
if ns not in buckets.by_namespace:
|
|
851
|
+
continue
|
|
852
|
+
|
|
853
|
+
count = buckets.by_namespace[ns]
|
|
854
|
+
size = buckets.by_namespace_size_bytes.get(ns, 0)
|
|
855
|
+
note = "Can cleanup" if ns == 'temp_' else "-"
|
|
856
|
+
|
|
857
|
+
rows.append([
|
|
858
|
+
ns,
|
|
859
|
+
str(count),
|
|
860
|
+
f"{count / total_count * 100:.1f}%" if total_count > 0 else "0%",
|
|
861
|
+
f"{size / 1000:.2f}",
|
|
862
|
+
note,
|
|
863
|
+
])
|
|
864
|
+
|
|
865
|
+
# (other)
|
|
866
|
+
if '(other)' in buckets.by_namespace:
|
|
867
|
+
count = buckets.by_namespace['(other)']
|
|
868
|
+
size = buckets.by_namespace_size_bytes.get('(other)', 0)
|
|
869
|
+
rows.append([
|
|
870
|
+
'(other)',
|
|
871
|
+
str(count),
|
|
872
|
+
f"{count / total_count * 100:.1f}%" if total_count > 0 else "0%",
|
|
873
|
+
f"{size / 1000:.2f}",
|
|
874
|
+
"-",
|
|
875
|
+
])
|
|
876
|
+
|
|
877
|
+
return rows
|
|
878
|
+
|
|
879
|
+
def _build_top_variables_rows(self, top_vars: List[VariableInfo], total_size: int) -> List[List[str]]:
|
|
880
|
+
"""Build Top Variables table row"""
|
|
881
|
+
rows = []
|
|
882
|
+
for i, var in enumerate(top_vars, 1):
|
|
883
|
+
rows.append([
|
|
884
|
+
str(i),
|
|
885
|
+
var.name,
|
|
886
|
+
var.type,
|
|
887
|
+
f"{var.size_bytes / 1000:.2f}",
|
|
888
|
+
f"{var.size_bytes / total_size * 100:.1f}%" if total_size > 0 else "0%",
|
|
889
|
+
])
|
|
890
|
+
return rows
|
|
891
|
+
|
|
892
|
+
def _build_compression_rows(self, buckets: CompressionBuckets) -> List[List[str]]:
|
|
893
|
+
"""Build compressed component table row"""
|
|
894
|
+
rows = []
|
|
895
|
+
order = ['messages', 'variables', 'runtime_state', 'skillkit_state', 'context_manager_state']
|
|
896
|
+
|
|
897
|
+
for name in order:
|
|
898
|
+
if name not in buckets.components:
|
|
899
|
+
continue
|
|
900
|
+
|
|
901
|
+
comp = buckets.components[name]
|
|
902
|
+
rows.append([
|
|
903
|
+
name,
|
|
904
|
+
f"{comp.original_bytes / 1000:.2f}",
|
|
905
|
+
f"{comp.compressed_bytes / 1000:.2f}",
|
|
906
|
+
f"{comp.compression_ratio:.1%}",
|
|
907
|
+
f"{comp.space_saved_ratio:.1%}",
|
|
908
|
+
comp.compressibility,
|
|
909
|
+
])
|
|
910
|
+
|
|
911
|
+
return rows
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
class JSONFormatter:
|
|
915
|
+
"""JSON formatted output"""
|
|
916
|
+
|
|
917
|
+
def __init__(self, profile: SnapshotProfile):
|
|
918
|
+
self.profile = profile
|
|
919
|
+
|
|
920
|
+
def format(self) -> Dict:
|
|
921
|
+
"""Generate JSON output"""
|
|
922
|
+
return dataclass_to_dict(self.profile)
|