kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,39 @@
1
+ # src/kontra/scout/reporters/__init__.py
2
+ """
3
+ Kontra Scout reporters for different output formats.
4
+ """
5
+
6
+ from typing import Literal
7
+
8
+ from kontra.scout.types import DatasetProfile
9
+
10
+ from .json_reporter import render_json, render_llm
11
+ from .markdown_reporter import render_markdown
12
+ from .rich_reporter import render_rich
13
+
14
+
15
+ def render_profile(
16
+ profile: DatasetProfile,
17
+ format: Literal["rich", "json", "markdown", "llm"] = "rich",
18
+ ) -> str:
19
+ """
20
+ Render a DatasetProfile to the specified format.
21
+
22
+ Args:
23
+ profile: The DatasetProfile to render
24
+ format: Output format ("rich", "json", "markdown", "llm")
25
+
26
+ Returns:
27
+ Formatted string output
28
+ """
29
+ if format == "json":
30
+ return render_json(profile)
31
+ elif format == "markdown":
32
+ return render_markdown(profile)
33
+ elif format == "llm":
34
+ return render_llm(profile)
35
+ else:
36
+ return render_rich(profile)
37
+
38
+
39
+ __all__ = ["render_profile", "render_json", "render_markdown", "render_rich", "render_llm"]
@@ -0,0 +1,165 @@
1
+ # src/kontra/scout/reporters/json_reporter.py
2
+ """
3
+ JSON reporter for Kontra Scout - optimized for LLM consumption.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ from typing import Any, Dict
10
+
11
+ from kontra.scout.types import DatasetProfile
12
+
13
+
14
+ def render_json(profile: DatasetProfile, indent: int = 2) -> str:
15
+ """
16
+ Render a DatasetProfile as JSON.
17
+
18
+ Args:
19
+ profile: The DatasetProfile to render
20
+ indent: JSON indentation (default: 2)
21
+
22
+ Returns:
23
+ JSON string
24
+ """
25
+ payload = profile.to_dict()
26
+ return json.dumps(payload, indent=indent, default=str, ensure_ascii=False)
27
+
28
+
29
+ def build_compact_json(profile: DatasetProfile) -> Dict[str, Any]:
30
+ """
31
+ Build a compact JSON representation optimized for LLM context.
32
+ Omits null/empty fields for minimal token usage.
33
+ """
34
+ d = profile.to_dict()
35
+ return _strip_nulls(d)
36
+
37
+
38
+ def _strip_nulls(obj: Any) -> Any:
39
+ """Recursively remove None values and empty lists/dicts."""
40
+ if isinstance(obj, dict):
41
+ return {
42
+ k: _strip_nulls(v)
43
+ for k, v in obj.items()
44
+ if v is not None and v != [] and v != {}
45
+ }
46
+ elif isinstance(obj, list):
47
+ return [_strip_nulls(item) for item in obj if item is not None]
48
+ return obj
49
+
50
+
51
+ def render_llm(profile: DatasetProfile) -> str:
52
+ """
53
+ Render a DatasetProfile in token-optimized format for LLM context.
54
+
55
+ Design goals:
56
+ - Minimal tokens while preserving signal
57
+ - Easy for LLM to parse and reason about
58
+ - Key info: schema, null rates, cardinality, semantic types
59
+ - Actionable: enough info to infer validation rules
60
+
61
+ Format:
62
+ ```
63
+ # Dataset: source_uri
64
+ rows=N cols=N
65
+
66
+ ## Columns
67
+ col_name: type | nulls=N% | distinct=N | semantic_type
68
+ values: [val1, val2, ...] or top: val1(N%), val2(N%)
69
+ ```
70
+ """
71
+ lines = []
72
+
73
+ # Header
74
+ lines.append(f"# Dataset: {profile.source_uri}")
75
+ lines.append(f"rows={profile.row_count:,} cols={profile.column_count}")
76
+ lines.append("")
77
+ lines.append("## Columns")
78
+
79
+ for col in profile.columns:
80
+ # Main column line: name: type | nulls | distinct | semantic
81
+ parts = [col.dtype]
82
+
83
+ # Null rate (only if > 0)
84
+ if col.null_rate > 0:
85
+ null_pct = col.null_rate * 100
86
+ if null_pct < 0.1:
87
+ parts.append("nulls=<0.1%")
88
+ else:
89
+ parts.append(f"nulls={null_pct:.1f}%")
90
+
91
+ # Distinct count with uniqueness hint
92
+ if col.uniqueness_ratio >= 0.99 and col.distinct_count > 100:
93
+ parts.append(f"distinct={col.distinct_count:,} (unique)")
94
+ elif col.distinct_count <= 20:
95
+ parts.append(f"distinct={col.distinct_count}")
96
+ else:
97
+ parts.append(f"distinct={col.distinct_count:,}")
98
+
99
+ # Semantic type
100
+ if col.semantic_type:
101
+ parts.append(col.semantic_type)
102
+
103
+ # Pattern detection
104
+ if col.detected_patterns:
105
+ parts.append(f"pattern:{col.detected_patterns[0]}")
106
+
107
+ lines.append(f"{col.name}: {' | '.join(parts)}")
108
+
109
+ # Values line (if low cardinality or has top values)
110
+ if col.values and col.is_low_cardinality:
111
+ # All values for low cardinality
112
+ vals_str = ", ".join(repr(v) for v in col.values[:10])
113
+ if len(col.values) > 10:
114
+ vals_str += f", ... ({len(col.values)} total)"
115
+ lines.append(f" values: [{vals_str}]")
116
+ elif col.top_values:
117
+ # Top values with percentages
118
+ top_parts = []
119
+ for tv in col.top_values[:5]:
120
+ val_repr = repr(tv.value) if isinstance(tv.value, str) else str(tv.value)
121
+ top_parts.append(f"{val_repr}({tv.pct:.0f}%)")
122
+ lines.append(f" top: {', '.join(top_parts)}")
123
+
124
+ # Temporal range (useful for freshness rules)
125
+ if col.temporal and (col.temporal.date_min or col.temporal.date_max):
126
+ date_range = f"{col.temporal.date_min or '?'} to {col.temporal.date_max or '?'}"
127
+ lines.append(f" range: {date_range}")
128
+
129
+ # Footer with quick stats
130
+ lines.append("")
131
+ lines.append("## Summary")
132
+
133
+ # Count column types
134
+ type_counts: Dict[str, int] = {}
135
+ for col in profile.columns:
136
+ t = col.dtype
137
+ type_counts[t] = type_counts.get(t, 0) + 1
138
+ type_summary = ", ".join(f"{t}:{n}" for t, n in sorted(type_counts.items()))
139
+ lines.append(f"types: {type_summary}")
140
+
141
+ # Identify potential issues
142
+ issues = []
143
+ for col in profile.columns:
144
+ if col.null_rate > 0.1: # >10% nulls
145
+ issues.append(f"{col.name}:{col.null_rate*100:.0f}%null")
146
+ if issues:
147
+ lines.append(f"high_nulls: {', '.join(issues[:5])}")
148
+
149
+ # Identify unique columns (likely identifiers)
150
+ unique_cols = [
151
+ col.name for col in profile.columns
152
+ if col.uniqueness_ratio >= 0.99 and col.distinct_count > 100
153
+ ]
154
+ if unique_cols:
155
+ lines.append(f"likely_ids: {', '.join(unique_cols[:5])}")
156
+
157
+ # Identify categorical columns
158
+ categorical = [
159
+ col.name for col in profile.columns
160
+ if col.is_low_cardinality or col.semantic_type == "category"
161
+ ]
162
+ if categorical:
163
+ lines.append(f"categorical: {', '.join(categorical[:5])}")
164
+
165
+ return "\n".join(lines)
@@ -0,0 +1,152 @@
1
+ # src/kontra/scout/reporters/markdown_reporter.py
2
+ """
3
+ Markdown reporter for Kontra Scout - documentation-friendly output.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import List
9
+
10
+ from kontra.scout.types import DatasetProfile, ColumnProfile
11
+
12
+
13
+ def render_markdown(profile: DatasetProfile) -> str:
14
+ """
15
+ Render a DatasetProfile as Markdown.
16
+
17
+ Returns Markdown string suitable for documentation or GitHub.
18
+ """
19
+ lines: List[str] = []
20
+
21
+ # Header
22
+ lines.append(f"# Data Profile: {profile.source_uri}")
23
+ lines.append("")
24
+
25
+ # Summary
26
+ lines.append("## Summary")
27
+ lines.append("")
28
+ lines.append(f"- **Format:** {profile.source_format}")
29
+ lines.append(f"- **Rows:** {profile.row_count:,}")
30
+ lines.append(f"- **Columns:** {profile.column_count}")
31
+ if profile.estimated_size_bytes:
32
+ size_mb = profile.estimated_size_bytes / (1024 * 1024)
33
+ lines.append(f"- **Size:** {size_mb:.1f} MB")
34
+ if profile.sampled:
35
+ lines.append(f"- **Sampled:** {profile.sample_size:,} rows")
36
+ lines.append(f"- **Profiled at:** {profile.profiled_at}")
37
+ lines.append(f"- **Duration:** {profile.profile_duration_ms} ms")
38
+ lines.append("")
39
+
40
+ # Schema table
41
+ lines.append("## Schema")
42
+ lines.append("")
43
+ lines.append("| Column | Type | Nulls | Distinct | Cardinality |")
44
+ lines.append("|--------|------|-------|----------|-------------|")
45
+
46
+ for col in profile.columns:
47
+ null_pct = f"{col.null_rate * 100:.1f}%"
48
+ distinct = f"{col.distinct_count:,}"
49
+ card = _cardinality_label(col)
50
+ lines.append(f"| {col.name} | {col.dtype} | {null_pct} | {distinct} | {card} |")
51
+
52
+ lines.append("")
53
+
54
+ # Low cardinality columns (categorical)
55
+ low_card_cols = [c for c in profile.columns if c.is_low_cardinality and c.values]
56
+ if low_card_cols:
57
+ lines.append("## Categorical Columns")
58
+ lines.append("")
59
+ for col in low_card_cols:
60
+ lines.append(f"### {col.name}")
61
+ lines.append("")
62
+ if col.values:
63
+ lines.append(f"**Values ({len(col.values)}):** `{', '.join(str(v) for v in col.values)}`")
64
+ if col.top_values:
65
+ lines.append("")
66
+ lines.append("| Value | Count | % |")
67
+ lines.append("|-------|-------|---|")
68
+ for tv in col.top_values:
69
+ lines.append(f"| {tv.value} | {tv.count:,} | {tv.pct:.1f}% |")
70
+ lines.append("")
71
+
72
+ # Numeric columns
73
+ numeric_cols = [c for c in profile.columns if c.numeric]
74
+ if numeric_cols:
75
+ lines.append("## Numeric Columns")
76
+ lines.append("")
77
+ lines.append("| Column | Min | Max | Mean | Median | Std |")
78
+ lines.append("|--------|-----|-----|------|--------|-----|")
79
+ for col in numeric_cols:
80
+ n = col.numeric
81
+ lines.append(
82
+ f"| {col.name} | "
83
+ f"{_fmt(n.min)} | {_fmt(n.max)} | "
84
+ f"{_fmt(n.mean)} | {_fmt(n.median)} | {_fmt(n.std)} |"
85
+ )
86
+ lines.append("")
87
+
88
+ # String columns
89
+ string_cols = [c for c in profile.columns if c.string]
90
+ if string_cols:
91
+ lines.append("## String Columns")
92
+ lines.append("")
93
+ lines.append("| Column | Min Len | Max Len | Avg Len | Empty |")
94
+ lines.append("|--------|---------|---------|---------|-------|")
95
+ for col in string_cols:
96
+ s = col.string
97
+ lines.append(
98
+ f"| {col.name} | "
99
+ f"{s.min_length or 'N/A'} | {s.max_length or 'N/A'} | "
100
+ f"{_fmt(s.avg_length)} | {s.empty_count:,} |"
101
+ )
102
+ lines.append("")
103
+
104
+ # Temporal columns
105
+ temporal_cols = [c for c in profile.columns if c.temporal]
106
+ if temporal_cols:
107
+ lines.append("## Temporal Columns")
108
+ lines.append("")
109
+ lines.append("| Column | Min Date | Max Date |")
110
+ lines.append("|--------|----------|----------|")
111
+ for col in temporal_cols:
112
+ t = col.temporal
113
+ lines.append(f"| {col.name} | {t.date_min or 'N/A'} | {t.date_max or 'N/A'} |")
114
+ lines.append("")
115
+
116
+ # Pattern detection
117
+ pattern_cols = [c for c in profile.columns if c.detected_patterns]
118
+ if pattern_cols:
119
+ lines.append("## Detected Patterns")
120
+ lines.append("")
121
+ for col in pattern_cols:
122
+ patterns = ", ".join(col.detected_patterns)
123
+ lines.append(f"- **{col.name}:** {patterns}")
124
+ lines.append("")
125
+
126
+ # Footer
127
+ lines.append("---")
128
+ lines.append(f"*Generated by Kontra Scout v{profile.engine_version}*")
129
+
130
+ return "\n".join(lines)
131
+
132
+
133
+ def _cardinality_label(col: ColumnProfile) -> str:
134
+ """Get cardinality label for a column."""
135
+ if col.uniqueness_ratio >= 0.99 and col.null_rate == 0:
136
+ return "unique"
137
+ if col.is_low_cardinality:
138
+ return "low"
139
+ if col.distinct_count < 100:
140
+ return "medium"
141
+ return "high"
142
+
143
+
144
+ def _fmt(val: float | None) -> str:
145
+ """Format a number for Markdown."""
146
+ if val is None:
147
+ return "N/A"
148
+ if abs(val) >= 1000:
149
+ return f"{val:,.0f}"
150
+ if abs(val) >= 1:
151
+ return f"{val:.2f}"
152
+ return f"{val:.4f}"
@@ -0,0 +1,144 @@
1
+ # src/kontra/scout/reporters/rich_reporter.py
2
+ """
3
+ Rich console reporter for Kontra Scout.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import List
9
+
10
+ from rich.console import Console
11
+ from rich.table import Table
12
+ from rich.panel import Panel
13
+ from rich.text import Text
14
+
15
+ from kontra.scout.types import DatasetProfile, ColumnProfile
16
+
17
+
18
+ def render_rich(profile: DatasetProfile) -> str:
19
+ """
20
+ Render a DatasetProfile as Rich console output.
21
+
22
+ Returns a string representation (for compatibility with other reporters).
23
+ For direct console output, use print_rich() instead.
24
+ """
25
+ # Use a string buffer to capture output without duplicating
26
+ from io import StringIO
27
+ string_io = StringIO()
28
+ console = Console(file=string_io, force_terminal=True, width=120)
29
+ _print_to_console(console, profile)
30
+ return string_io.getvalue()
31
+
32
+
33
+ def print_rich(profile: DatasetProfile) -> None:
34
+ """Print profile directly to console with Rich formatting."""
35
+ console = Console()
36
+ _print_to_console(console, profile)
37
+
38
+
39
+ def _print_to_console(console: Console, profile: DatasetProfile) -> None:
40
+ """Internal: render profile to a console instance."""
41
+ # Header
42
+ title = f"[bold cyan]Kontra Scout[/bold cyan] - {profile.source_uri}"
43
+ size_str = ""
44
+ if profile.estimated_size_bytes:
45
+ size_mb = profile.estimated_size_bytes / (1024 * 1024)
46
+ size_str = f" | Size: {size_mb:.1f} MB"
47
+ sample_str = f" (sampled: {profile.sample_size:,} rows)" if profile.sampled else ""
48
+
49
+ header = (
50
+ f"Rows: [bold]{profile.row_count:,}[/bold] | "
51
+ f"Columns: [bold]{profile.column_count}[/bold]{size_str} | "
52
+ f"Duration: [bold]{profile.profile_duration_ms}[/bold] ms{sample_str}"
53
+ )
54
+ console.print(Panel(header, title=title, border_style="cyan"))
55
+
56
+ # Column table
57
+ table = Table(show_header=True, header_style="bold magenta", expand=True)
58
+ table.add_column("Column", style="cyan", no_wrap=True)
59
+ table.add_column("Type", style="green")
60
+ table.add_column("Nulls", justify="right")
61
+ table.add_column("Distinct", justify="right")
62
+ table.add_column("Cardinality")
63
+ table.add_column("Info")
64
+
65
+ for col in profile.columns:
66
+ null_pct = f"{col.null_rate * 100:.1f}%"
67
+ distinct_str = f"{col.distinct_count:,}"
68
+
69
+ # Cardinality classification
70
+ if col.uniqueness_ratio >= 0.99 and col.null_rate == 0:
71
+ card = "[bold green]unique[/bold green]"
72
+ elif col.is_low_cardinality:
73
+ if col.values:
74
+ vals = ", ".join(str(v) for v in col.values[:5])
75
+ if len(col.values) > 5:
76
+ vals += f" +{len(col.values) - 5} more"
77
+ card = f"[yellow]low[/yellow]: [{vals}]"
78
+ else:
79
+ card = "[yellow]low[/yellow]"
80
+ elif col.distinct_count < 100:
81
+ card = "[blue]medium[/blue]"
82
+ else:
83
+ card = "high"
84
+
85
+ # Info column
86
+ info_parts: List[str] = []
87
+ if col.semantic_type:
88
+ info_parts.append(f"[dim]{col.semantic_type}[/dim]")
89
+ if col.detected_patterns:
90
+ info_parts.append(f"[magenta]{', '.join(col.detected_patterns)}[/magenta]")
91
+ if col.numeric:
92
+ info_parts.append(
93
+ f"[dim]min={_fmt_num(col.numeric.min)}, "
94
+ f"max={_fmt_num(col.numeric.max)}, "
95
+ f"mean={_fmt_num(col.numeric.mean)}[/dim]"
96
+ )
97
+ if col.temporal:
98
+ info_parts.append(f"[dim]{col.temporal.date_min} to {col.temporal.date_max}[/dim]")
99
+
100
+ table.add_row(
101
+ col.name,
102
+ col.dtype,
103
+ null_pct,
104
+ distinct_str,
105
+ card,
106
+ " | ".join(info_parts) if info_parts else "",
107
+ )
108
+
109
+ console.print(table)
110
+
111
+ # Top values section (if any columns have them)
112
+ cols_with_top = [c for c in profile.columns if c.top_values and c.is_low_cardinality]
113
+ if cols_with_top:
114
+ console.print()
115
+ console.print("[bold]Top Values:[/bold]")
116
+ for col in cols_with_top[:5]: # Limit to 5 columns
117
+ vals = ", ".join(
118
+ f"{tv.value} ({tv.pct:.1f}%)" for tv in col.top_values[:3]
119
+ )
120
+ console.print(f" [cyan]{col.name}[/cyan]: {vals}")
121
+
122
+ # Numeric summary
123
+ numeric_cols = [c for c in profile.columns if c.numeric]
124
+ if numeric_cols:
125
+ console.print()
126
+ console.print("[bold]Numeric Summary:[/bold]")
127
+ for col in numeric_cols[:5]: # Limit to 5
128
+ n = col.numeric
129
+ console.print(
130
+ f" [cyan]{col.name}[/cyan]: "
131
+ f"min={_fmt_num(n.min)}, max={_fmt_num(n.max)}, "
132
+ f"mean={_fmt_num(n.mean)}, median={_fmt_num(n.median)}"
133
+ )
134
+
135
+
136
+ def _fmt_num(val: float | None) -> str:
137
+ """Format a number for display."""
138
+ if val is None:
139
+ return "N/A"
140
+ if abs(val) >= 1000:
141
+ return f"{val:,.0f}"
142
+ if abs(val) >= 1:
143
+ return f"{val:.2f}"
144
+ return f"{val:.4f}"