ostruct-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/__init__.py +0 -0
- ostruct/cli/__init__.py +19 -0
- ostruct/cli/cache_manager.py +175 -0
- ostruct/cli/cli.py +2033 -0
- ostruct/cli/errors.py +329 -0
- ostruct/cli/file_info.py +316 -0
- ostruct/cli/file_list.py +151 -0
- ostruct/cli/file_utils.py +518 -0
- ostruct/cli/path_utils.py +123 -0
- ostruct/cli/progress.py +105 -0
- ostruct/cli/security.py +311 -0
- ostruct/cli/security_types.py +49 -0
- ostruct/cli/template_env.py +55 -0
- ostruct/cli/template_extensions.py +51 -0
- ostruct/cli/template_filters.py +650 -0
- ostruct/cli/template_io.py +261 -0
- ostruct/cli/template_rendering.py +347 -0
- ostruct/cli/template_schema.py +565 -0
- ostruct/cli/template_utils.py +288 -0
- ostruct/cli/template_validation.py +375 -0
- ostruct/cli/utils.py +31 -0
- ostruct/py.typed +0 -0
- ostruct_cli-0.1.0.dist-info/LICENSE +21 -0
- ostruct_cli-0.1.0.dist-info/METADATA +182 -0
- ostruct_cli-0.1.0.dist-info/RECORD +27 -0
- ostruct_cli-0.1.0.dist-info/WHEEL +4 -0
- ostruct_cli-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,650 @@
|
|
1
|
+
"""Template filters for Jinja2 environment."""
|
2
|
+
|
3
|
+
import datetime
|
4
|
+
import itertools
|
5
|
+
import json
|
6
|
+
import logging
|
7
|
+
import re
|
8
|
+
import textwrap
|
9
|
+
from collections import Counter
|
10
|
+
from typing import Any, Dict, List, Optional, Sequence, TypeVar, Union
|
11
|
+
|
12
|
+
import tiktoken
|
13
|
+
from jinja2 import Environment
|
14
|
+
from pygments import highlight
|
15
|
+
from pygments.formatters import HtmlFormatter, NullFormatter, TerminalFormatter
|
16
|
+
from pygments.lexers import TextLexer, get_lexer_by_name, guess_lexer
|
17
|
+
from pygments.util import ClassNotFound
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
T = TypeVar("T")
|
22
|
+
|
23
|
+
|
24
|
+
def extract_keywords(text: str) -> List[str]:
|
25
|
+
"""Extract keywords from text."""
|
26
|
+
return text.split()
|
27
|
+
|
28
|
+
|
29
|
+
def word_count(text: str) -> int:
|
30
|
+
"""Count words in text."""
|
31
|
+
return len(text.split())
|
32
|
+
|
33
|
+
|
34
|
+
def char_count(text: str) -> int:
|
35
|
+
"""Count characters in text."""
|
36
|
+
return len(text)
|
37
|
+
|
38
|
+
|
39
|
+
def to_json(obj: Any) -> str:
|
40
|
+
"""Convert object to JSON string."""
|
41
|
+
return json.dumps(obj, indent=2)
|
42
|
+
|
43
|
+
|
44
|
+
def from_json(text: str) -> Any:
|
45
|
+
"""Parse JSON string to object."""
|
46
|
+
return json.loads(text)
|
47
|
+
|
48
|
+
|
49
|
+
def remove_comments(text: str) -> str:
|
50
|
+
"""Remove comments from text."""
|
51
|
+
return re.sub(r"#.*$|//.*$|/\*[\s\S]*?\*/", "", text, flags=re.MULTILINE)
|
52
|
+
|
53
|
+
|
54
|
+
def wrap_text(text: str, width: int = 80) -> str:
|
55
|
+
"""Wrap text to specified width."""
|
56
|
+
return textwrap.fill(text, width)
|
57
|
+
|
58
|
+
|
59
|
+
def indent_text(text: str, width: int = 4) -> str:
|
60
|
+
"""Indent text by specified width."""
|
61
|
+
return textwrap.indent(text, " " * width)
|
62
|
+
|
63
|
+
|
64
|
+
def dedent_text(text: str) -> str:
|
65
|
+
"""Remove common leading whitespace from text."""
|
66
|
+
return textwrap.dedent(text)
|
67
|
+
|
68
|
+
|
69
|
+
def normalize_text(text: str) -> str:
|
70
|
+
"""Normalize whitespace in text."""
|
71
|
+
return " ".join(text.split())
|
72
|
+
|
73
|
+
|
74
|
+
def strip_markdown(text: str) -> str:
|
75
|
+
"""Remove markdown formatting characters."""
|
76
|
+
return re.sub(r"[#*`_~]", "", text)
|
77
|
+
|
78
|
+
|
79
|
+
def format_table(headers: Sequence[Any], rows: Sequence[Sequence[Any]]) -> str:
|
80
|
+
"""Format data as a markdown table."""
|
81
|
+
return (
|
82
|
+
f"| {' | '.join(str(h) for h in headers)} |\n"
|
83
|
+
f"| {' | '.join('-' * max(len(str(h)), 3) for h in headers)} |\n"
|
84
|
+
+ "\n".join(
|
85
|
+
f"| {' | '.join(str(cell) for cell in row)} |" for row in rows
|
86
|
+
)
|
87
|
+
)
|
88
|
+
|
89
|
+
|
90
|
+
def align_table(
|
91
|
+
headers: Sequence[Any],
|
92
|
+
rows: Sequence[Sequence[Any]],
|
93
|
+
alignments: Optional[Sequence[str]] = None,
|
94
|
+
) -> str:
|
95
|
+
"""Format table with column alignments."""
|
96
|
+
alignments_list = alignments or ["left"] * len(headers)
|
97
|
+
alignment_markers = []
|
98
|
+
for a in alignments_list:
|
99
|
+
if a == "center":
|
100
|
+
alignment_markers.append(":---:")
|
101
|
+
elif a == "left":
|
102
|
+
alignment_markers.append(":---")
|
103
|
+
elif a == "right":
|
104
|
+
alignment_markers.append("---:")
|
105
|
+
else:
|
106
|
+
alignment_markers.append("---")
|
107
|
+
|
108
|
+
return (
|
109
|
+
f"| {' | '.join(str(h) for h in headers)} |\n"
|
110
|
+
f"| {' | '.join(alignment_markers)} |\n"
|
111
|
+
+ "\n".join(
|
112
|
+
f"| {' | '.join(str(cell) for cell in row)} |" for row in rows
|
113
|
+
)
|
114
|
+
)
|
115
|
+
|
116
|
+
|
117
|
+
def dict_to_table(data: Dict[Any, Any]) -> str:
|
118
|
+
"""Convert dictionary to markdown table."""
|
119
|
+
return "| Key | Value |\n| --- | --- |\n" + "\n".join(
|
120
|
+
f"| {k} | {v} |" for k, v in data.items()
|
121
|
+
)
|
122
|
+
|
123
|
+
|
124
|
+
def list_to_table(
|
125
|
+
items: Sequence[Any], headers: Optional[Sequence[str]] = None
|
126
|
+
) -> str:
|
127
|
+
"""Convert list to markdown table."""
|
128
|
+
if not headers:
|
129
|
+
return "| # | Value |\n| --- | --- |\n" + "\n".join(
|
130
|
+
f"| {i+1} | {item} |" for i, item in enumerate(items)
|
131
|
+
)
|
132
|
+
return (
|
133
|
+
f"| {' | '.join(headers)} |\n| {' | '.join('-' * len(h) for h in headers)} |\n"
|
134
|
+
+ "\n".join(
|
135
|
+
f"| {' | '.join(str(cell) for cell in row)} |" for row in items
|
136
|
+
)
|
137
|
+
)
|
138
|
+
|
139
|
+
|
140
|
+
def escape_special(text: str) -> str:
|
141
|
+
"""Escape special characters in text."""
|
142
|
+
return re.sub(r'([{}\[\]"\'\\])', r"\\\1", text)
|
143
|
+
|
144
|
+
|
145
|
+
def debug_print(x: Any) -> None:
|
146
|
+
"""Print debug information."""
|
147
|
+
print(f"DEBUG: {x}")
|
148
|
+
|
149
|
+
|
150
|
+
def type_of(x: Any) -> str:
|
151
|
+
"""Get type name of object."""
|
152
|
+
return type(x).__name__
|
153
|
+
|
154
|
+
|
155
|
+
def dir_of(x: Any) -> List[str]:
|
156
|
+
"""Get list of attributes."""
|
157
|
+
return dir(x)
|
158
|
+
|
159
|
+
|
160
|
+
def len_of(x: Any) -> Optional[int]:
|
161
|
+
"""Get length of object if available."""
|
162
|
+
return len(x) if hasattr(x, "__len__") else None
|
163
|
+
|
164
|
+
|
165
|
+
def validate_json(text: str) -> bool:
|
166
|
+
"""Check if text is valid JSON."""
|
167
|
+
if not text:
|
168
|
+
return False
|
169
|
+
try:
|
170
|
+
json.loads(text)
|
171
|
+
return True
|
172
|
+
except json.JSONDecodeError:
|
173
|
+
return False
|
174
|
+
|
175
|
+
|
176
|
+
def format_error(e: Exception) -> str:
|
177
|
+
"""Format exception as string."""
|
178
|
+
return f"{type(e).__name__}: {str(e)}"
|
179
|
+
|
180
|
+
|
181
|
+
def estimate_tokens(text: str) -> int:
|
182
|
+
"""Estimate number of tokens in text."""
|
183
|
+
try:
|
184
|
+
encoding = tiktoken.encoding_for_model("gpt-4")
|
185
|
+
return len(encoding.encode(str(text)))
|
186
|
+
except Exception as e:
|
187
|
+
logger.warning(f"Failed to estimate tokens: {e}")
|
188
|
+
return len(str(text).split())
|
189
|
+
|
190
|
+
|
191
|
+
def format_json(obj: Any) -> str:
|
192
|
+
"""Format JSON with indentation."""
|
193
|
+
return json.dumps(obj, indent=2, default=str)
|
194
|
+
|
195
|
+
|
196
|
+
def auto_table(data: Any) -> str:
|
197
|
+
"""Format data as table based on type."""
|
198
|
+
if isinstance(data, dict):
|
199
|
+
return dict_to_table(data)
|
200
|
+
if isinstance(data, (list, tuple)):
|
201
|
+
return list_to_table(data)
|
202
|
+
return str(data)
|
203
|
+
|
204
|
+
|
205
|
+
def sort_by(items: Sequence[T], key: str) -> List[T]:
|
206
|
+
"""Sort items by key."""
|
207
|
+
|
208
|
+
def get_key(x: T) -> Any:
|
209
|
+
if isinstance(x, dict):
|
210
|
+
return x.get(key, 0)
|
211
|
+
return getattr(x, key, 0)
|
212
|
+
|
213
|
+
return sorted(items, key=get_key)
|
214
|
+
|
215
|
+
|
216
|
+
def group_by(items: Sequence[T], key: str) -> Dict[Any, List[T]]:
|
217
|
+
"""Group items by key."""
|
218
|
+
|
219
|
+
def safe_get_key(x: T) -> Any:
|
220
|
+
if isinstance(x, dict):
|
221
|
+
return x.get(key)
|
222
|
+
return getattr(x, key, None)
|
223
|
+
|
224
|
+
sorted_items = sorted(items, key=safe_get_key)
|
225
|
+
return {
|
226
|
+
k: list(g)
|
227
|
+
for k, g in itertools.groupby(sorted_items, key=safe_get_key)
|
228
|
+
}
|
229
|
+
|
230
|
+
|
231
|
+
def filter_by(items: Sequence[T], key: str, value: Any) -> List[T]:
|
232
|
+
"""Filter items by key-value pair."""
|
233
|
+
return [
|
234
|
+
x
|
235
|
+
for x in items
|
236
|
+
if (x.get(key) if isinstance(x, dict) else getattr(x, key, None))
|
237
|
+
== value
|
238
|
+
]
|
239
|
+
|
240
|
+
|
241
|
+
def extract_field(items: Sequence[Any], key: str) -> List[Any]:
|
242
|
+
"""Extract field from each item."""
|
243
|
+
return [
|
244
|
+
x.get(key) if isinstance(x, dict) else getattr(x, key, None)
|
245
|
+
for x in items
|
246
|
+
]
|
247
|
+
|
248
|
+
|
249
|
+
def frequency(items: Sequence[T]) -> Dict[T, int]:
|
250
|
+
"""Count frequency of items."""
|
251
|
+
return dict(Counter(items))
|
252
|
+
|
253
|
+
|
254
|
+
def aggregate(
|
255
|
+
items: Sequence[Any], key: Optional[str] = None
|
256
|
+
) -> Dict[str, Union[int, float]]:
|
257
|
+
"""Calculate aggregate statistics."""
|
258
|
+
if not items:
|
259
|
+
return {"count": 0, "sum": 0, "avg": 0, "min": 0, "max": 0}
|
260
|
+
|
261
|
+
def get_value(x: Any) -> float:
|
262
|
+
if key is None:
|
263
|
+
if isinstance(x, (int, float)):
|
264
|
+
return float(x)
|
265
|
+
raise ValueError(f"Cannot convert {type(x)} to float")
|
266
|
+
val = x.get(key) if isinstance(x, dict) else getattr(x, key, 0)
|
267
|
+
if val is None:
|
268
|
+
return 0.0
|
269
|
+
return float(val)
|
270
|
+
|
271
|
+
values = [get_value(x) for x in items]
|
272
|
+
return {
|
273
|
+
"count": len(values),
|
274
|
+
"sum": sum(values),
|
275
|
+
"avg": sum(values) / len(values),
|
276
|
+
"min": min(values),
|
277
|
+
"max": max(values),
|
278
|
+
}
|
279
|
+
|
280
|
+
|
281
|
+
def unique(items: Sequence[Any]) -> List[Any]:
|
282
|
+
"""Get unique values while preserving order."""
|
283
|
+
return list(dict.fromkeys(items))
|
284
|
+
|
285
|
+
|
286
|
+
def pivot_table(
|
287
|
+
data: Sequence[Dict[str, Any]],
|
288
|
+
index: str,
|
289
|
+
value: str,
|
290
|
+
aggfunc: str = "sum",
|
291
|
+
) -> Dict[str, Dict[str, Any]]:
|
292
|
+
"""Create pivot table from data."""
|
293
|
+
if not data:
|
294
|
+
logger.debug("Empty data provided to pivot_table")
|
295
|
+
return {
|
296
|
+
"aggregates": {},
|
297
|
+
"metadata": {"total_records": 0, "null_index_count": 0},
|
298
|
+
}
|
299
|
+
|
300
|
+
# Validate aggfunc
|
301
|
+
valid_aggfuncs = {"sum", "mean", "count"}
|
302
|
+
if aggfunc not in valid_aggfuncs:
|
303
|
+
raise ValueError(
|
304
|
+
f"Invalid aggfunc: {aggfunc}. Must be one of {valid_aggfuncs}"
|
305
|
+
)
|
306
|
+
|
307
|
+
# Validate columns exist in first row
|
308
|
+
if data and (index not in data[0] or value not in data[0]):
|
309
|
+
missing = []
|
310
|
+
if index not in data[0]:
|
311
|
+
missing.append(f"index column '{index}'")
|
312
|
+
if value not in data[0]:
|
313
|
+
missing.append(f"value column '{value}'")
|
314
|
+
raise ValueError(f"Missing required columns: {', '.join(missing)}")
|
315
|
+
|
316
|
+
# Count records with null index
|
317
|
+
null_index_count = sum(1 for row in data if row.get(index) is None)
|
318
|
+
if null_index_count:
|
319
|
+
logger.warning(f"Found {null_index_count} rows with null index values")
|
320
|
+
|
321
|
+
# Group by index
|
322
|
+
groups: Dict[str, List[float]] = {}
|
323
|
+
invalid_values = 0
|
324
|
+
for row in data:
|
325
|
+
idx = str(row.get(index, ""))
|
326
|
+
try:
|
327
|
+
val = float(row.get(value, 0))
|
328
|
+
except (TypeError, ValueError):
|
329
|
+
invalid_values += 1
|
330
|
+
logger.warning(
|
331
|
+
f"Invalid value for {value} in row with index {idx}, using 0"
|
332
|
+
)
|
333
|
+
val = 0.0
|
334
|
+
|
335
|
+
if idx not in groups:
|
336
|
+
groups[idx] = []
|
337
|
+
groups[idx].append(val)
|
338
|
+
|
339
|
+
if invalid_values:
|
340
|
+
logger.warning(
|
341
|
+
f"Found {invalid_values} invalid values in column {value}"
|
342
|
+
)
|
343
|
+
|
344
|
+
result: Dict[str, Dict[str, Any]] = {"aggregates": {}, "metadata": {}}
|
345
|
+
for idx, values in groups.items():
|
346
|
+
if aggfunc == "sum":
|
347
|
+
result["aggregates"][idx] = {"value": sum(values)}
|
348
|
+
elif aggfunc == "mean":
|
349
|
+
result["aggregates"][idx] = {"value": sum(values) / len(values)}
|
350
|
+
else: # count
|
351
|
+
result["aggregates"][idx] = {"value": len(values)}
|
352
|
+
|
353
|
+
result["metadata"] = {
|
354
|
+
"total_records": len(data),
|
355
|
+
"null_index_count": null_index_count,
|
356
|
+
"invalid_values": invalid_values,
|
357
|
+
}
|
358
|
+
return result
|
359
|
+
|
360
|
+
|
361
|
+
def summarize(
|
362
|
+
data: Sequence[Any], keys: Optional[Sequence[str]] = None
|
363
|
+
) -> Dict[str, Any]:
|
364
|
+
"""Generate summary statistics for data fields."""
|
365
|
+
if not data:
|
366
|
+
logger.debug("Empty data provided to summarize")
|
367
|
+
return {"total_records": 0, "fields": {}}
|
368
|
+
|
369
|
+
# Validate data type
|
370
|
+
if not isinstance(data[0], dict) and not hasattr(data[0], "__dict__"):
|
371
|
+
raise TypeError("Data items must be dictionaries or objects")
|
372
|
+
|
373
|
+
def get_field_value(item: Any, field: str) -> Any:
|
374
|
+
try:
|
375
|
+
if isinstance(item, dict):
|
376
|
+
return item.get(field)
|
377
|
+
return getattr(item, field, None)
|
378
|
+
except Exception as e:
|
379
|
+
logger.warning(f"Error accessing field {field}: {e}")
|
380
|
+
return None
|
381
|
+
|
382
|
+
def get_field_type(values: List[Any]) -> str:
|
383
|
+
"""Determine field type from non-null values."""
|
384
|
+
non_null = [v for v in values if v is not None]
|
385
|
+
if not non_null:
|
386
|
+
return "NoneType"
|
387
|
+
|
388
|
+
# Check if all values are of the same type
|
389
|
+
types = {type(v) for v in non_null}
|
390
|
+
if len(types) == 1:
|
391
|
+
return next(iter(types)).__name__
|
392
|
+
|
393
|
+
# Handle mixed numeric types
|
394
|
+
if all(isinstance(v, (int, float)) for v in non_null):
|
395
|
+
return "number"
|
396
|
+
|
397
|
+
# Default to most specific common ancestor type
|
398
|
+
return "mixed"
|
399
|
+
|
400
|
+
def analyze_field(field: str) -> Dict[str, Any]:
|
401
|
+
logger.debug(f"Analyzing field: {field}")
|
402
|
+
values = [get_field_value(x, field) for x in data]
|
403
|
+
non_null = [v for v in values if v is not None]
|
404
|
+
|
405
|
+
stats = {
|
406
|
+
"type": get_field_type(values),
|
407
|
+
"total": len(values),
|
408
|
+
"null_count": len(values) - len(non_null),
|
409
|
+
"unique": len(set(non_null)),
|
410
|
+
}
|
411
|
+
|
412
|
+
# Add numeric statistics if applicable
|
413
|
+
if stats["type"] in ("int", "float", "number"):
|
414
|
+
try:
|
415
|
+
nums = [float(x) for x in non_null]
|
416
|
+
stats.update(
|
417
|
+
{
|
418
|
+
"min": min(nums) if nums else None,
|
419
|
+
"max": max(nums) if nums else None,
|
420
|
+
"avg": sum(nums) / len(nums) if nums else None,
|
421
|
+
}
|
422
|
+
)
|
423
|
+
except (ValueError, TypeError) as e:
|
424
|
+
logger.warning(
|
425
|
+
f"Error calculating numeric stats for {field}: {e}"
|
426
|
+
)
|
427
|
+
|
428
|
+
# Add most common values
|
429
|
+
if non_null:
|
430
|
+
try:
|
431
|
+
most_common = Counter(non_null).most_common(5)
|
432
|
+
stats["most_common"] = [
|
433
|
+
{"value": str(v), "count": c} for v, c in most_common
|
434
|
+
]
|
435
|
+
except TypeError as e:
|
436
|
+
logger.warning(
|
437
|
+
f"Error calculating most common values for {field}: {e}"
|
438
|
+
)
|
439
|
+
|
440
|
+
return stats
|
441
|
+
|
442
|
+
try:
|
443
|
+
available_keys = keys or (
|
444
|
+
list(data[0].keys())
|
445
|
+
if isinstance(data[0], dict)
|
446
|
+
else [k for k in dir(data[0]) if not k.startswith("_")]
|
447
|
+
)
|
448
|
+
|
449
|
+
if not available_keys:
|
450
|
+
raise ValueError("No valid keys found in data")
|
451
|
+
|
452
|
+
logger.debug(
|
453
|
+
f"Analyzing {len(data)} records with {len(available_keys)} fields"
|
454
|
+
)
|
455
|
+
result = {
|
456
|
+
"total_records": len(data),
|
457
|
+
"fields": {k: analyze_field(k) for k in available_keys},
|
458
|
+
}
|
459
|
+
logger.debug("Analysis complete")
|
460
|
+
return result
|
461
|
+
|
462
|
+
except Exception as e:
|
463
|
+
logger.error(f"Failed to analyze data: {e}", exc_info=True)
|
464
|
+
raise ValueError(f"Failed to analyze data: {str(e)}")
|
465
|
+
|
466
|
+
|
467
|
+
def strip_comments(text: str, lang: str = "python") -> str:
|
468
|
+
"""Remove comments from code text based on language.
|
469
|
+
|
470
|
+
Args:
|
471
|
+
text: Code text to process
|
472
|
+
lang: Programming language
|
473
|
+
|
474
|
+
Returns:
|
475
|
+
Text with comments removed if language is supported,
|
476
|
+
otherwise returns original text with a warning
|
477
|
+
"""
|
478
|
+
# Define comment patterns for different languages
|
479
|
+
single_line_comments = {
|
480
|
+
"python": "#",
|
481
|
+
"javascript": "//",
|
482
|
+
"typescript": "//",
|
483
|
+
"java": "//",
|
484
|
+
"c": "//",
|
485
|
+
"cpp": "//",
|
486
|
+
"go": "//",
|
487
|
+
"rust": "//",
|
488
|
+
"swift": "//",
|
489
|
+
"ruby": "#",
|
490
|
+
"perl": "#",
|
491
|
+
"shell": "#",
|
492
|
+
"bash": "#",
|
493
|
+
"php": "//",
|
494
|
+
}
|
495
|
+
|
496
|
+
multi_line_comments = {
|
497
|
+
"javascript": ("/*", "*/"),
|
498
|
+
"typescript": ("/*", "*/"),
|
499
|
+
"java": ("/*", "*/"),
|
500
|
+
"c": ("/*", "*/"),
|
501
|
+
"cpp": ("/*", "*/"),
|
502
|
+
"go": ("/*", "*/"),
|
503
|
+
"rust": ("/*", "*/"),
|
504
|
+
"swift": ("/*", "*/"),
|
505
|
+
"php": ("/*", "*/"),
|
506
|
+
}
|
507
|
+
|
508
|
+
# Return original text if language is not supported
|
509
|
+
if lang not in single_line_comments and lang not in multi_line_comments:
|
510
|
+
logger.debug(
|
511
|
+
f"Language '{lang}' is not supported for comment removal. "
|
512
|
+
f"Comments will be preserved in the output."
|
513
|
+
)
|
514
|
+
return text
|
515
|
+
|
516
|
+
lines = text.splitlines()
|
517
|
+
cleaned_lines = []
|
518
|
+
|
519
|
+
# Handle single-line comments
|
520
|
+
if lang in single_line_comments:
|
521
|
+
comment_char = single_line_comments[lang]
|
522
|
+
for line in lines:
|
523
|
+
# Remove inline comments
|
524
|
+
line = re.sub(f"\\s*{re.escape(comment_char)}.*$", "", line)
|
525
|
+
# Keep non-empty lines
|
526
|
+
if line.strip():
|
527
|
+
cleaned_lines.append(line)
|
528
|
+
text = "\n".join(cleaned_lines)
|
529
|
+
|
530
|
+
# Handle multi-line comments
|
531
|
+
if lang in multi_line_comments:
|
532
|
+
start, end = multi_line_comments[lang]
|
533
|
+
# Remove multi-line comments
|
534
|
+
text = re.sub(
|
535
|
+
f"{re.escape(start)}.*?{re.escape(end)}", "", text, flags=re.DOTALL
|
536
|
+
)
|
537
|
+
|
538
|
+
return text
|
539
|
+
|
540
|
+
|
541
|
+
def format_code(
|
542
|
+
text: str, output_format: str = "terminal", language: str = "python"
|
543
|
+
) -> str:
|
544
|
+
"""Format code with syntax highlighting.
|
545
|
+
|
546
|
+
Args:
|
547
|
+
text (str): The code text to format
|
548
|
+
output_format (str): The output format ('terminal', 'html', or 'plain')
|
549
|
+
language (str): The programming language for syntax highlighting
|
550
|
+
|
551
|
+
Returns:
|
552
|
+
str: Formatted code string
|
553
|
+
|
554
|
+
Raises:
|
555
|
+
ValueError: If output_format is not one of 'terminal', 'html', or 'plain'
|
556
|
+
"""
|
557
|
+
if not text:
|
558
|
+
return ""
|
559
|
+
|
560
|
+
if output_format not in ["terminal", "html", "plain"]:
|
561
|
+
raise ValueError(
|
562
|
+
"output_format must be one of 'terminal', 'html', or 'plain'"
|
563
|
+
)
|
564
|
+
|
565
|
+
try:
|
566
|
+
lexer = get_lexer_by_name(language)
|
567
|
+
except ClassNotFound:
|
568
|
+
try:
|
569
|
+
lexer = guess_lexer(text)
|
570
|
+
except ClassNotFound:
|
571
|
+
lexer = TextLexer()
|
572
|
+
|
573
|
+
try:
|
574
|
+
if output_format == "terminal":
|
575
|
+
formatter: Union[
|
576
|
+
TerminalFormatter[str], HtmlFormatter[str], NullFormatter[str]
|
577
|
+
] = TerminalFormatter[str]()
|
578
|
+
elif output_format == "html":
|
579
|
+
formatter = HtmlFormatter[str]()
|
580
|
+
else: # plain
|
581
|
+
formatter = NullFormatter[str]()
|
582
|
+
|
583
|
+
return highlight(text, lexer, formatter)
|
584
|
+
except Exception as e:
|
585
|
+
logger.error(f"Error formatting code: {e}")
|
586
|
+
return text
|
587
|
+
|
588
|
+
|
589
|
+
def register_template_filters(env: Environment) -> None:
|
590
|
+
"""Register all template filters with the Jinja2 environment.
|
591
|
+
|
592
|
+
Args:
|
593
|
+
env: The Jinja2 environment to register filters with.
|
594
|
+
"""
|
595
|
+
filters = {
|
596
|
+
# Text processing
|
597
|
+
"extract_keywords": extract_keywords,
|
598
|
+
"word_count": word_count,
|
599
|
+
"char_count": char_count,
|
600
|
+
"to_json": to_json,
|
601
|
+
"from_json": from_json,
|
602
|
+
"remove_comments": remove_comments,
|
603
|
+
"wrap": wrap_text,
|
604
|
+
"indent": indent_text,
|
605
|
+
"dedent": dedent_text,
|
606
|
+
"normalize": normalize_text,
|
607
|
+
"strip_markdown": strip_markdown,
|
608
|
+
# Data processing
|
609
|
+
"sort_by": sort_by,
|
610
|
+
"group_by": group_by,
|
611
|
+
"filter_by": filter_by,
|
612
|
+
"extract_field": extract_field,
|
613
|
+
"unique": unique,
|
614
|
+
"frequency": frequency,
|
615
|
+
"aggregate": aggregate,
|
616
|
+
# Table formatting
|
617
|
+
"table": format_table,
|
618
|
+
"align_table": align_table,
|
619
|
+
"dict_to_table": dict_to_table,
|
620
|
+
"list_to_table": list_to_table,
|
621
|
+
# Code processing
|
622
|
+
"format_code": format_code,
|
623
|
+
"strip_comments": strip_comments,
|
624
|
+
# Special character handling
|
625
|
+
"escape_special": escape_special,
|
626
|
+
# Table utilities
|
627
|
+
"auto_table": auto_table,
|
628
|
+
}
|
629
|
+
|
630
|
+
env.filters.update(filters)
|
631
|
+
|
632
|
+
# Add template globals
|
633
|
+
env.globals.update(
|
634
|
+
{
|
635
|
+
"estimate_tokens": estimate_tokens,
|
636
|
+
"format_json": format_json,
|
637
|
+
"now": datetime.datetime.now,
|
638
|
+
"debug": debug_print,
|
639
|
+
"type_of": type_of,
|
640
|
+
"dir_of": dir_of,
|
641
|
+
"len_of": len_of,
|
642
|
+
"validate_json": validate_json,
|
643
|
+
"format_error": format_error,
|
644
|
+
# Data analysis globals
|
645
|
+
"summarize": summarize,
|
646
|
+
"pivot_table": pivot_table,
|
647
|
+
# Table utilities
|
648
|
+
"auto_table": auto_table,
|
649
|
+
}
|
650
|
+
)
|