contpress 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ from contpress.budgets import TokenBudget
2
+ from contpress.builder import PromptBuilder
3
+ from contpress.compressors.extractive import ExtractiveCompressor
4
+ from contpress.contracts import OutputContract
5
+ from contpress.core import ContextPress, OptimizedPrompt
6
+ from contpress.formatters import compact_json, compact_table, drop_nulls, json_to_csv_if_tabular, shorten_keys
7
+ from contpress.memory.conversation import ConversationPruner
8
+ from contpress.prompt_cache import PromptCacheLayout
9
+ from contpress.rag.filter import ContextFilter
10
+ from contpress.reports import UsageReport
11
+ from contpress.tokenizer import TokenCounter
12
+ from contpress.tools import AgentTraceCompactor, ToolSchemaCompactor
13
+
14
+ __all__ = [
15
+ "ContextFilter",
16
+ "ContextPress",
17
+ "ConversationPruner",
18
+ "ExtractiveCompressor",
19
+ "OptimizedPrompt",
20
+ "OutputContract",
21
+ "PromptBuilder",
22
+ "PromptCacheLayout",
23
+ "TokenBudget",
24
+ "TokenCounter",
25
+ "ToolSchemaCompactor",
26
+ "UsageReport",
27
+ "AgentTraceCompactor",
28
+ "compact_json",
29
+ "compact_table",
30
+ "drop_nulls",
31
+ "json_to_csv_if_tabular",
32
+ "shorten_keys",
33
+ ]
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+
5
+ from contpress.tokenizer import TokenCounter
6
+
7
+
8
+ MODEL_CONTEXT_WINDOWS = {
9
+ "gpt-4o": 128_000,
10
+ "gpt-4o-mini": 128_000,
11
+ "gpt-4.1": 1_047_576,
12
+ "gpt-4.1-mini": 1_047_576,
13
+ "gpt-4.1-nano": 1_047_576,
14
+ "o3": 200_000,
15
+ "o4-mini": 200_000,
16
+ }
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class TokenBudget:
21
+ model: str = "gpt-4o-mini"
22
+ max_input_tokens: int | None = None
23
+ reserve_output_tokens: int = 0
24
+ system_prompt: str = ""
25
+ tool_schema: str = ""
26
+ rag_context_ratio: float = 0.6
27
+ history_ratio: float = 0.3
28
+ counter: TokenCounter = field(init=False, repr=False)
29
+
30
+ def __post_init__(self) -> None:
31
+ if self.reserve_output_tokens < 0:
32
+ raise ValueError("reserve_output_tokens must be >= 0")
33
+ if self.max_input_tokens is not None and self.max_input_tokens < 0:
34
+ raise ValueError("max_input_tokens must be >= 0")
35
+ if not 0 <= self.rag_context_ratio <= 1:
36
+ raise ValueError("rag_context_ratio must be between 0 and 1")
37
+ if not 0 <= self.history_ratio <= 1:
38
+ raise ValueError("history_ratio must be between 0 and 1")
39
+ self.counter = TokenCounter(self.model)
40
+
41
+ @property
42
+ def context_window(self) -> int:
43
+ return MODEL_CONTEXT_WINDOWS.get(self.model, 128_000)
44
+
45
+ @property
46
+ def overhead_tokens(self) -> int:
47
+ return self.counter.count(self.system_prompt) + self.counter.count(self.tool_schema)
48
+
49
+ @property
50
+ def input_budget(self) -> int:
51
+ ceiling = self.max_input_tokens if self.max_input_tokens is not None else self.context_window
52
+ return max(0, ceiling - self.reserve_output_tokens - self.overhead_tokens)
53
+
54
+ @property
55
+ def rag_context_budget(self) -> int:
56
+ return int(self.input_budget * self.rag_context_ratio)
57
+
58
+ @property
59
+ def conversation_history_budget(self) -> int:
60
+ return int(self.input_budget * self.history_ratio)
61
+
62
+ def enforce(self, text: str) -> str:
63
+ return self.counter.trim(text, self.input_budget)
@@ -0,0 +1,49 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Iterable
5
+
6
+
7
+ def _bullets(items: Iterable[str]) -> str:
8
+ return "\n".join(f"- {item}" for item in items if str(item).strip())
9
+
10
+
11
+ @dataclass
12
+ class PromptBuilder:
13
+ _blocks: list[tuple[str, str]] = field(default_factory=list)
14
+
15
+ def role(self, value: str) -> "PromptBuilder":
16
+ return self._add("Role", value)
17
+
18
+ def task(self, value: str) -> "PromptBuilder":
19
+ return self._add("Task", value)
20
+
21
+ def constraints(self, values: Iterable[str]) -> "PromptBuilder":
22
+ return self._add("Constraints", _bullets(values))
23
+
24
+ def instructions(self, values: Iterable[str]) -> "PromptBuilder":
25
+ return self._add("Instructions", _bullets(values))
26
+
27
+ def context(self, value: str) -> "PromptBuilder":
28
+ return self._add("Context", value)
29
+
30
+ def output(self, values: Iterable[str] | str) -> "PromptBuilder":
31
+ if isinstance(values, str):
32
+ return self._add("Output", values)
33
+ return self._add("Output", _bullets(values))
34
+
35
+ def block(self, title: str, value: str | Iterable[str]) -> "PromptBuilder":
36
+ if isinstance(value, str):
37
+ body = value
38
+ else:
39
+ body = _bullets(value)
40
+ return self._add(title, body)
41
+
42
+ def build(self) -> str:
43
+ return "\n".join(f"{title}:\n{body}" if "\n" in body else f"{title}: {body}" for title, body in self._blocks if body)
44
+
45
+ def _add(self, title: str, body: str) -> "PromptBuilder":
46
+ clean = (body or "").strip()
47
+ if clean:
48
+ self._blocks.append((title.strip(), clean))
49
+ return self
@@ -0,0 +1,5 @@
1
+ from contpress.cache.exact import ExactPromptCache
2
+ from contpress.cache.semantic import SemanticCache
3
+ from contpress.cache.stores import InMemoryStore, RedisStore
4
+
5
+ __all__ = ["ExactPromptCache", "InMemoryStore", "RedisStore", "SemanticCache"]
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from dataclasses import dataclass, field
5
+ from typing import Any
6
+
7
+
8
+ @dataclass
9
+ class ExactPromptCache:
10
+ namespace: str = "contpress"
11
+ _data: dict[str, Any] = field(default_factory=dict)
12
+
13
+ def lookup(self, prompt: str) -> Any:
14
+ return self._data.get(self._key(prompt))
15
+
16
+ def add(self, prompt: str, value: Any) -> None:
17
+ self._data[self._key(prompt)] = value
18
+
19
+ def _key(self, prompt: str) -> str:
20
+ digest = hashlib.sha256((prompt or "").encode("utf-8")).hexdigest()
21
+ return f"{self.namespace}:{digest}"
@@ -0,0 +1,12 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class SemanticCache:
5
+ def __init__(self, *args, **kwargs) -> None:
6
+ try:
7
+ import diskcache # noqa: F401
8
+ import faiss # noqa: F401
9
+ from sentence_transformers import SentenceTransformer # noqa: F401
10
+ except ImportError as exc:
11
+ raise ImportError("Install with: pip install contpress[semantic]") from exc
12
+ raise NotImplementedError("SemanticCache requires a vector store implementation; install extras and configure a store.")
@@ -0,0 +1,31 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any
5
+
6
+
7
+ @dataclass
8
+ class InMemoryStore:
9
+ data: dict[str, Any] = field(default_factory=dict)
10
+
11
+ def get(self, key: str) -> Any:
12
+ return self.data.get(key)
13
+
14
+ def set(self, key: str, value: Any) -> None:
15
+ self.data[key] = value
16
+
17
+
18
+ class RedisStore:
19
+ def __init__(self, url: str = "redis://localhost:6379/0", prefix: str = "contpress") -> None:
20
+ try:
21
+ import redis
22
+ except ImportError as exc:
23
+ raise ImportError("Install redis separately to use RedisStore: pip install redis") from exc
24
+ self.client = redis.Redis.from_url(url)
25
+ self.prefix = prefix
26
+
27
+ def get(self, key: str) -> bytes | None:
28
+ return self.client.get(f"{self.prefix}:{key}")
29
+
30
+ def set(self, key: str, value: bytes | str) -> None:
31
+ self.client.set(f"{self.prefix}:{key}", value)
contextpress/cli.py ADDED
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from pathlib import Path
6
+
7
+ from rich.console import Console
8
+
9
+ from contpress.core import ContextPress
10
+ from contpress.formatters import compact_json
11
+ from contpress.reports import UsageReport
12
+ from contpress.tokenizer import TokenCounter
13
+
14
+
15
+ console = Console()
16
+
17
+
18
+ def _read(path: str) -> str:
19
+ return Path(path).read_text(encoding="utf-8")
20
+
21
+
22
+ def main(argv: list[str] | None = None) -> int:
23
+ parser = argparse.ArgumentParser(prog="contpress")
24
+ sub = parser.add_subparsers(dest="command", required=True)
25
+
26
+ count = sub.add_parser("count")
27
+ count.add_argument("file")
28
+ count.add_argument("--model", default="gpt-4o-mini")
29
+ count.add_argument("--budget", type=int, default=8_000)
30
+
31
+ trim = sub.add_parser("trim")
32
+ trim.add_argument("file")
33
+ trim.add_argument("--model", default="gpt-4o-mini")
34
+ trim.add_argument("--max-tokens", type=int, required=True)
35
+
36
+ compress = sub.add_parser("compress")
37
+ compress.add_argument("file")
38
+ compress.add_argument("--model", default="gpt-4o-mini")
39
+ compress.add_argument("--target-tokens", type=int, default=1_000)
40
+ compress.add_argument("--task", default="")
41
+
42
+ compact = sub.add_parser("compact")
43
+ compact.add_argument("file")
44
+
45
+ report = sub.add_parser("report")
46
+ report.add_argument("file")
47
+ report.add_argument("--model", default="gpt-4o-mini")
48
+ report.add_argument("--budget", type=int, default=8_000)
49
+
50
+ args = parser.parse_args(argv)
51
+
52
+ if args.command == "count":
53
+ counter = TokenCounter(args.model)
54
+ tokens = counter.count(_read(args.file))
55
+ console.print(f"File: {args.file}")
56
+ console.print(f"Model: {args.model}")
57
+ console.print(f"Tokens: {tokens:,}")
58
+ console.print(f"Fits {args.budget // 1000}k: {'yes' if tokens <= args.budget else 'no'}")
59
+ return 0
60
+
61
+ if args.command == "trim":
62
+ console.print(TokenCounter(args.model).trim(_read(args.file), args.max_tokens))
63
+ return 0
64
+
65
+ if args.command == "compress":
66
+ cp = ContextPress(model=args.model, max_input_tokens=args.target_tokens)
67
+ console.print(cp.optimize(task=args.task or "Compress this prompt.", context=_read(args.file)).text)
68
+ return 0
69
+
70
+ if args.command == "compact":
71
+ console.print(compact_json(json.loads(_read(args.file))))
72
+ return 0
73
+
74
+ if args.command == "report":
75
+ counter = TokenCounter(args.model)
76
+ text = _read(args.file)
77
+ before = counter.count(text)
78
+ after = min(before, args.budget)
79
+ console.print(UsageReport(args.model, before, after).summary())
80
+ return 0
81
+
82
+ return 1
83
+
84
+
85
+ if __name__ == "__main__":
86
+ raise SystemExit(main())
@@ -0,0 +1,15 @@
1
+ from contpress.compressors.base import BaseCompressor
2
+ from contpress.compressors.extractive import ExtractiveCompressor
3
+ from contpress.compressors.llmlingua import LLMLinguaCompressor
4
+ from contpress.compressors.reports import CompressionReport, compression_diff, compression_report
5
+ from contpress.compressors.sentence_filter import SentenceFilterCompressor
6
+
7
+ __all__ = [
8
+ "BaseCompressor",
9
+ "CompressionReport",
10
+ "ExtractiveCompressor",
11
+ "LLMLinguaCompressor",
12
+ "SentenceFilterCompressor",
13
+ "compression_diff",
14
+ "compression_report",
15
+ ]
@@ -0,0 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Protocol
4
+
5
+
6
+ class BaseCompressor(Protocol):
7
+ def compress(self, text: str, query: str = "", max_tokens: int = 1_000) -> str:
8
+ ...
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass, field
5
+
6
+ from contpress.tokenizer import TokenCounter
7
+
8
+
9
+ _SENTENCE_RE = re.compile(r"(?<=[.!?])\s+|\n+")
10
+ _WORD_RE = re.compile(r"[A-Za-z0-9_#./:-]+")
11
+
12
+
13
+ @dataclass(slots=True)
14
+ class ExtractiveCompressor:
15
+ model: str = "gpt-4o-mini"
16
+ counter: TokenCounter = field(init=False, repr=False)
17
+
18
+ def __post_init__(self) -> None:
19
+ self.counter = TokenCounter(self.model)
20
+
21
+ def compress(self, text: str, query: str = "", max_tokens: int = 1_000) -> str:
22
+ if max_tokens < 0:
23
+ raise ValueError("max_tokens must be >= 0")
24
+ if self.counter.fits(text, max_tokens):
25
+ return text
26
+
27
+ sentences = [part.strip() for part in _SENTENCE_RE.split(text or "") if part.strip()]
28
+ scored = [(self._score(sentence, query), index, sentence) for index, sentence in enumerate(sentences)]
29
+ scored.sort(key=lambda item: (-item[0], item[1]))
30
+
31
+ chosen: list[tuple[int, str]] = []
32
+ used = 0
33
+ for score, index, sentence in scored:
34
+ sentence_tokens = self.counter.count(sentence)
35
+ if sentence_tokens == 0 or used + sentence_tokens > max_tokens:
36
+ continue
37
+ if score > 0 or not chosen:
38
+ chosen.append((index, sentence))
39
+ used += sentence_tokens
40
+
41
+ chosen.sort(key=lambda item: item[0])
42
+ result = " ".join(sentence for _, sentence in chosen)
43
+ return result if self.counter.fits(result, max_tokens) else self.counter.trim(result, max_tokens)
44
+
45
+ def _score(self, sentence: str, query: str) -> float:
46
+ lower = sentence.lower()
47
+ sentence_terms = set(_WORD_RE.findall(lower))
48
+ query_terms = set(_WORD_RE.findall((query or "").lower()))
49
+ score = len(sentence_terms & query_terms) * 3
50
+
51
+ if re.search(r"\d", sentence):
52
+ score += 2
53
+ if re.search(r"https?://|www\.", lower):
54
+ score += 2
55
+ if re.search(r"\b[A-Za-z_][A-Za-z0-9_]*\(", sentence) or re.search(r"\b[A-Z][A-Za-z0-9]+[A-Z][A-Za-z0-9]*\b", sentence):
56
+ score += 1.5
57
+ if sentence.endswith(":") or sentence.startswith(("#", "-", "*")):
58
+ score += 1.5
59
+ if re.search(r"\b(must|should|required|preserve|constraint|risk|warning|error|security)\b", lower):
60
+ score += 2
61
+ if re.search(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b", sentence):
62
+ score += 1
63
+ return score
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class LLMLinguaCompressor:
5
+ def __init__(self, *args, **kwargs) -> None:
6
+ try:
7
+ from llmlingua import PromptCompressor
8
+ except ImportError as exc:
9
+ raise ImportError("Install with: pip install contpress[compress]") from exc
10
+ self._compressor = PromptCompressor(*args, **kwargs)
11
+
12
+ def compress(
13
+ self,
14
+ prompt: str | None = None,
15
+ instruction: str = "Preserve code, numbers, entities, requirements, and constraints.",
16
+ target_tokens: int = 1_000,
17
+ **kwargs,
18
+ ) -> str:
19
+ source = prompt if prompt is not None else kwargs.pop("text", "")
20
+ result = self._compressor.compress_prompt(
21
+ source,
22
+ instruction=instruction,
23
+ target_token=target_tokens,
24
+ **kwargs,
25
+ )
26
+ if isinstance(result, dict):
27
+ return result.get("compressed_prompt", "")
28
+ return str(result)
@@ -0,0 +1,49 @@
1
+ from __future__ import annotations
2
+
3
+ import difflib
4
+ from dataclasses import dataclass
5
+
6
+ from contpress.tokenizer import TokenCounter
7
+
8
+
9
+ @dataclass(slots=True)
10
+ class CompressionReport:
11
+ original_tokens: int
12
+ compressed_tokens: int
13
+ method: str
14
+
15
+ @property
16
+ def saved_tokens(self) -> int:
17
+ return max(0, self.original_tokens - self.compressed_tokens)
18
+
19
+ @property
20
+ def compression_ratio(self) -> float:
21
+ if self.original_tokens == 0:
22
+ return 1.0
23
+ return self.compressed_tokens / self.original_tokens
24
+
25
+ def as_dict(self) -> dict[str, object]:
26
+ return {
27
+ "original_tokens": self.original_tokens,
28
+ "compressed_tokens": self.compressed_tokens,
29
+ "saved_tokens": self.saved_tokens,
30
+ "compression_ratio": round(self.compression_ratio, 3),
31
+ "method": self.method,
32
+ }
33
+
34
+
35
+ def compression_report(original: str, compressed: str, method: str = "extractive", model: str = "gpt-4o-mini") -> CompressionReport:
36
+ counter = TokenCounter(model)
37
+ return CompressionReport(counter.count(original), counter.count(compressed), method)
38
+
39
+
40
+ def compression_diff(original: str, compressed: str) -> str:
41
+ return "\n".join(
42
+ difflib.unified_diff(
43
+ original.splitlines(),
44
+ compressed.splitlines(),
45
+ fromfile="original",
46
+ tofile="compressed",
47
+ lineterm="",
48
+ )
49
+ )
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from contpress.compressors.extractive import ExtractiveCompressor
4
+
5
+
6
+ class SentenceFilterCompressor(ExtractiveCompressor):
7
+ pass
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any
5
+
6
+ from contpress.formatters import compact_json
7
+
8
+
9
+ @dataclass(slots=True)
10
+ class OutputContract:
11
+ fields: dict[str, str]
12
+ format: str = "json"
13
+ required: list[str] = field(default_factory=list)
14
+ additional_instructions: list[str] = field(default_factory=list)
15
+
16
+ def prompt(self) -> str:
17
+ if self.format == "json":
18
+ schema = {
19
+ "format": "json",
20
+ "required": self.required or list(self.fields),
21
+ "fields": self.fields,
22
+ }
23
+ body = compact_json(schema)
24
+ else:
25
+ body = f"Format: {self.format}\nFields: {', '.join(self.fields)}"
26
+ extras = "\n".join(f"- {item}" for item in self.additional_instructions)
27
+ return f"Output contract:\n{body}" + (f"\n{extras}" if extras else "")
contextpress/core.py ADDED
@@ -0,0 +1,109 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable, Sequence
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+
7
+ from contpress.budgets import TokenBudget
8
+ from contpress.builder import PromptBuilder
9
+ from contpress.compressors.extractive import ExtractiveCompressor
10
+ from contpress.formatters import compact_json
11
+ from contpress.rag.filter import ContextFilter
12
+ from contpress.reports import UsageReport
13
+ from contpress.tokenizer import TokenCounter
14
+
15
+
16
+ @dataclass(slots=True)
17
+ class OptimizedPrompt:
18
+ text: str
19
+ report: dict[str, Any]
20
+
21
+
22
+ class ContextPress:
23
+ def __init__(
24
+ self,
25
+ model: str = "gpt-4o-mini",
26
+ max_input_tokens: int = 4_000,
27
+ max_output_tokens: int | None = None,
28
+ reserve_output_tokens: int | None = None,
29
+ compression: str = "extractive",
30
+ ) -> None:
31
+ self.model = model
32
+ self.max_input_tokens = max_input_tokens
33
+ self.max_output_tokens = max_output_tokens if max_output_tokens is not None else reserve_output_tokens or 0
34
+ self.compression = compression
35
+ self.counter = TokenCounter(model)
36
+ self.budget = TokenBudget(
37
+ model=model,
38
+ max_input_tokens=max_input_tokens,
39
+ reserve_output_tokens=self.max_output_tokens,
40
+ )
41
+
42
+ def optimize(
43
+ self,
44
+ task: str,
45
+ context: str | Sequence[str] | dict[str, Any] | list[Any] = "",
46
+ instructions: Iterable[str] | None = None,
47
+ output: Iterable[str] | str | None = None,
48
+ role: str | None = None,
49
+ ) -> OptimizedPrompt:
50
+ methods: list[str] = []
51
+ original_context = self._stringify_context(context)
52
+ original_prompt = self._build_prompt(task, original_context, instructions, output, role)
53
+ original_tokens = self.counter.count(original_prompt)
54
+
55
+ context_budget = max(0, int(self.budget.input_budget * 0.75))
56
+ optimized_context = original_context
57
+ if isinstance(context, Sequence) and not isinstance(context, (str, bytes, dict)) and all(isinstance(item, str) for item in context):
58
+ optimized_context = ContextFilter(self.model).filter(task, list(context), max_tokens=context_budget)
59
+ methods.append("context_filter")
60
+ elif self.compression in {"extractive", "sentence_filter"} and not self.counter.fits(original_prompt, self.budget.input_budget):
61
+ optimized_context = ExtractiveCompressor(self.model).compress(original_context, query=task, max_tokens=context_budget)
62
+ methods.append("sentence_filter")
63
+
64
+ prompt = self._build_prompt(task, optimized_context, instructions, output, role)
65
+ methods.append("compact_format")
66
+ if not self.counter.fits(prompt, self.budget.input_budget):
67
+ prompt = self.counter.trim(prompt, self.budget.input_budget)
68
+ methods.append("trim")
69
+
70
+ optimized_tokens = self.counter.count(prompt)
71
+ report = UsageReport(
72
+ model=self.model,
73
+ input_tokens_before=original_tokens,
74
+ input_tokens_after=optimized_tokens,
75
+ output_tokens_limit=self.max_output_tokens,
76
+ methods=list(dict.fromkeys(methods)),
77
+ ).as_dict()
78
+ return OptimizedPrompt(text=prompt, report=report)
79
+
80
+ def _build_prompt(
81
+ self,
82
+ task: str,
83
+ context: str,
84
+ instructions: Iterable[str] | None,
85
+ output: Iterable[str] | str | None,
86
+ role: str | None,
87
+ ) -> str:
88
+ builder = PromptBuilder()
89
+ if role:
90
+ builder.role(role)
91
+ builder.task(task)
92
+ if instructions:
93
+ builder.instructions(instructions)
94
+ if context:
95
+ builder.context(context)
96
+ if output:
97
+ builder.output(output)
98
+ return builder.build()
99
+
100
+ def _stringify_context(self, context: str | Sequence[str] | dict[str, Any] | list[Any]) -> str:
101
+ if isinstance(context, str):
102
+ return context
103
+ if isinstance(context, dict):
104
+ return compact_json(context)
105
+ if isinstance(context, list) and (not context or not all(isinstance(item, str) for item in context)):
106
+ return compact_json(context)
107
+ if isinstance(context, Sequence):
108
+ return "\n\n".join(str(item) for item in context)
109
+ return str(context)
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import io
5
+ import json
6
+ from collections.abc import Mapping, Sequence
7
+ from typing import Any
8
+
9
+
10
+ def compact_json(data: Any, ensure_ascii: bool = False) -> str:
11
+ return json.dumps(data, separators=(",", ":"), ensure_ascii=ensure_ascii)
12
+
13
+
14
+ def drop_nulls(data: Any) -> Any:
15
+ if isinstance(data, Mapping):
16
+ return {key: drop_nulls(value) for key, value in data.items() if value is not None}
17
+ if isinstance(data, list):
18
+ return [drop_nulls(item) for item in data if item is not None]
19
+ return data
20
+
21
+
22
+ def shorten_keys(data: Any, mapping: Mapping[str, str]) -> Any:
23
+ if isinstance(data, Mapping):
24
+ return {mapping.get(str(key), key): shorten_keys(value, mapping) for key, value in data.items()}
25
+ if isinstance(data, list):
26
+ return [shorten_keys(item, mapping) for item in data]
27
+ return data
28
+
29
+
30
+ def json_to_csv_if_tabular(data: Any) -> str | None:
31
+ if not isinstance(data, Sequence) or isinstance(data, (str, bytes)) or not data:
32
+ return None
33
+ if not all(isinstance(row, Mapping) for row in data):
34
+ return None
35
+
36
+ headers: list[str] = []
37
+ for row in data:
38
+ for key in row:
39
+ if str(key) not in headers:
40
+ headers.append(str(key))
41
+
42
+ output = io.StringIO()
43
+ writer = csv.DictWriter(output, fieldnames=headers, lineterminator="\n", extrasaction="ignore")
44
+ writer.writeheader()
45
+ writer.writerows(data)
46
+ return output.getvalue().strip()
47
+
48
+
49
+ def compact_table(rows: Sequence[Mapping[str, Any]], headers: Sequence[str] | None = None) -> str:
50
+ if not rows:
51
+ return ""
52
+ selected_headers = list(headers or rows[0].keys())
53
+ lines = ["|".join(str(header) for header in selected_headers)]
54
+ for row in rows:
55
+ lines.append("|".join("" if row.get(header) is None else str(row.get(header)) for header in selected_headers))
56
+ return "\n".join(lines)