semql 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semql/__init__.py +153 -0
- semql/__main__.py +110 -0
- semql/_resolve.py +63 -0
- semql/backend.py +446 -0
- semql/catalog.py +304 -0
- semql/compile.py +1139 -0
- semql/dialect.py +109 -0
- semql/docs.py +176 -0
- semql/errors.py +149 -0
- semql/introspect.py +442 -0
- semql/model.py +412 -0
- semql/plan.py +177 -0
- semql/prompt.py +571 -0
- semql/py.typed +0 -0
- semql/safe.py +50 -0
- semql/spec.py +170 -0
- semql/validate.py +316 -0
- semql/visualize.py +237 -0
- semql-0.1.0.dist-info/METADATA +126 -0
- semql-0.1.0.dist-info/RECORD +22 -0
- semql-0.1.0.dist-info/WHEEL +4 -0
- semql-0.1.0.dist-info/licenses/LICENSE +28 -0
semql/__init__.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Public surface of the semantic layer.
|
|
2
|
+
|
|
3
|
+
Most users only need ``Catalog``, ``Cube``, the field types
|
|
4
|
+
(``Measure`` / ``Dimension`` / ``TimeDimension``), and
|
|
5
|
+
``SemanticQuery``. The rest is exported for callers building their own
|
|
6
|
+
tooling on top of the compiler (custom validators, MCP servers,
|
|
7
|
+
prompt rendering, etc.).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from semql.catalog import Catalog
|
|
13
|
+
from semql.compile import MAX_UNGROUPED_ROWS, Compiled, compile_query
|
|
14
|
+
from semql.docs import render_catalog_markdown
|
|
15
|
+
from semql.errors import (
|
|
16
|
+
CompileError,
|
|
17
|
+
CrossBackendError,
|
|
18
|
+
FilterTypeError,
|
|
19
|
+
JoinPathError,
|
|
20
|
+
PhaseDeferredError,
|
|
21
|
+
PlaceholderError,
|
|
22
|
+
ResolveError,
|
|
23
|
+
SemQLError,
|
|
24
|
+
UnknownIdentifierError,
|
|
25
|
+
)
|
|
26
|
+
from semql.introspect import (
|
|
27
|
+
CATALOG_CUBES,
|
|
28
|
+
CATALOG_DIMENSIONS,
|
|
29
|
+
CATALOG_MEASURES,
|
|
30
|
+
META_CUBES,
|
|
31
|
+
ResolvedQuery,
|
|
32
|
+
iter_cubes,
|
|
33
|
+
iter_fields,
|
|
34
|
+
iter_joins,
|
|
35
|
+
resolve_field,
|
|
36
|
+
resolve_query,
|
|
37
|
+
)
|
|
38
|
+
from semql.model import (
|
|
39
|
+
AggLiteral,
|
|
40
|
+
AuthContext,
|
|
41
|
+
Backend,
|
|
42
|
+
BaseField,
|
|
43
|
+
ChartTypeLiteral,
|
|
44
|
+
Cube,
|
|
45
|
+
Dimension,
|
|
46
|
+
DimTypeLiteral,
|
|
47
|
+
FormatLiteral,
|
|
48
|
+
GranularityLiteral,
|
|
49
|
+
Join,
|
|
50
|
+
Measure,
|
|
51
|
+
ScopePredicate,
|
|
52
|
+
Segment,
|
|
53
|
+
TimeDimension,
|
|
54
|
+
View,
|
|
55
|
+
)
|
|
56
|
+
from semql.plan import (
|
|
57
|
+
DrilldownSuggestion,
|
|
58
|
+
DrilldownSuggestions,
|
|
59
|
+
Presentation,
|
|
60
|
+
QueryIntent,
|
|
61
|
+
QueryPlan,
|
|
62
|
+
QueryStep,
|
|
63
|
+
RouterDecision,
|
|
64
|
+
RouterPath,
|
|
65
|
+
)
|
|
66
|
+
from semql.prompt import (
|
|
67
|
+
build_drilldown_prompt_fragment,
|
|
68
|
+
build_planner_prompt_fragment,
|
|
69
|
+
build_presenter_prompt_fragment,
|
|
70
|
+
build_query_generator_prompt_fragment,
|
|
71
|
+
build_router_prompt_fragment,
|
|
72
|
+
render_catalogue_block,
|
|
73
|
+
)
|
|
74
|
+
from semql.safe import is_safe_select
|
|
75
|
+
from semql.spec import (
|
|
76
|
+
BoolExpr,
|
|
77
|
+
CompareWindow,
|
|
78
|
+
Filter,
|
|
79
|
+
FilterOp,
|
|
80
|
+
SemanticQuery,
|
|
81
|
+
TimeWindow,
|
|
82
|
+
)
|
|
83
|
+
from semql.validate import ValidationError, validate
|
|
84
|
+
from semql.visualize import VizColumn, VizDecision, decide_visualization
|
|
85
|
+
|
|
86
|
+
__all__ = [
|
|
87
|
+
"AggLiteral",
|
|
88
|
+
"AuthContext",
|
|
89
|
+
"Backend",
|
|
90
|
+
"BaseField",
|
|
91
|
+
"BoolExpr",
|
|
92
|
+
"CATALOG_CUBES",
|
|
93
|
+
"CATALOG_DIMENSIONS",
|
|
94
|
+
"CATALOG_MEASURES",
|
|
95
|
+
"Catalog",
|
|
96
|
+
"ChartTypeLiteral",
|
|
97
|
+
"CompareWindow",
|
|
98
|
+
"CompileError",
|
|
99
|
+
"Compiled",
|
|
100
|
+
"CrossBackendError",
|
|
101
|
+
"Cube",
|
|
102
|
+
"DimTypeLiteral",
|
|
103
|
+
"Dimension",
|
|
104
|
+
"DrilldownSuggestion",
|
|
105
|
+
"DrilldownSuggestions",
|
|
106
|
+
"Filter",
|
|
107
|
+
"FilterOp",
|
|
108
|
+
"FilterTypeError",
|
|
109
|
+
"FormatLiteral",
|
|
110
|
+
"GranularityLiteral",
|
|
111
|
+
"Join",
|
|
112
|
+
"JoinPathError",
|
|
113
|
+
"MAX_UNGROUPED_ROWS",
|
|
114
|
+
"META_CUBES",
|
|
115
|
+
"Measure",
|
|
116
|
+
"PhaseDeferredError",
|
|
117
|
+
"PlaceholderError",
|
|
118
|
+
"Presentation",
|
|
119
|
+
"QueryIntent",
|
|
120
|
+
"QueryPlan",
|
|
121
|
+
"QueryStep",
|
|
122
|
+
"ResolveError",
|
|
123
|
+
"ResolvedQuery",
|
|
124
|
+
"RouterDecision",
|
|
125
|
+
"RouterPath",
|
|
126
|
+
"ScopePredicate",
|
|
127
|
+
"Segment",
|
|
128
|
+
"SemQLError",
|
|
129
|
+
"SemanticQuery",
|
|
130
|
+
"TimeDimension",
|
|
131
|
+
"TimeWindow",
|
|
132
|
+
"UnknownIdentifierError",
|
|
133
|
+
"ValidationError",
|
|
134
|
+
"View",
|
|
135
|
+
"VizColumn",
|
|
136
|
+
"VizDecision",
|
|
137
|
+
"build_drilldown_prompt_fragment",
|
|
138
|
+
"build_planner_prompt_fragment",
|
|
139
|
+
"build_presenter_prompt_fragment",
|
|
140
|
+
"build_query_generator_prompt_fragment",
|
|
141
|
+
"build_router_prompt_fragment",
|
|
142
|
+
"compile_query",
|
|
143
|
+
"decide_visualization",
|
|
144
|
+
"is_safe_select",
|
|
145
|
+
"iter_cubes",
|
|
146
|
+
"iter_fields",
|
|
147
|
+
"iter_joins",
|
|
148
|
+
"render_catalog_markdown",
|
|
149
|
+
"render_catalogue_block",
|
|
150
|
+
"resolve_field",
|
|
151
|
+
"resolve_query",
|
|
152
|
+
"validate",
|
|
153
|
+
]
|
semql/__main__.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""``python -m semql.compile`` CLI.
|
|
2
|
+
|
|
3
|
+
Compiles a ``SemanticQuery`` JSON spec against a Catalog declared
|
|
4
|
+
in a Python module and prints the SQL + params to stdout. Useful
|
|
5
|
+
for ad-hoc cube authoring (no need to write a runner script) and
|
|
6
|
+
as a smoke target in CI.
|
|
7
|
+
|
|
8
|
+
python -m semql.compile --catalog mypkg.catalogs:default \\
|
|
9
|
+
'{"measures": ["orders.revenue"], "dimensions": ["orders.region"]}'
|
|
10
|
+
|
|
11
|
+
The ``--catalog`` arg is ``module.path:attr`` — the module is
|
|
12
|
+
imported, the named attribute must be a ``Catalog`` instance.
|
|
13
|
+
Context substitutions for ``{schema}`` / ``{tenant}`` placeholders
|
|
14
|
+
go through repeated ``--context key=value`` flags.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import importlib
|
|
21
|
+
import json
|
|
22
|
+
import sys
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from semql import Catalog, SemanticQuery
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _load_catalog(spec: str) -> Catalog:
|
|
29
|
+
"""Resolve ``module.path:attr`` into the named ``Catalog``."""
|
|
30
|
+
if ":" not in spec:
|
|
31
|
+
raise SystemExit(
|
|
32
|
+
f"--catalog must be 'module.path:attr', got {spec!r}. "
|
|
33
|
+
"Example: --catalog mypkg.catalogs:default"
|
|
34
|
+
)
|
|
35
|
+
module_path, attr = spec.rsplit(":", 1)
|
|
36
|
+
try:
|
|
37
|
+
module = importlib.import_module(module_path)
|
|
38
|
+
except ModuleNotFoundError as exc:
|
|
39
|
+
raise SystemExit(f"Could not import {module_path!r}: {exc}") from exc
|
|
40
|
+
try:
|
|
41
|
+
catalog = getattr(module, attr)
|
|
42
|
+
except AttributeError as exc:
|
|
43
|
+
raise SystemExit(f"Module {module_path!r} has no attribute {attr!r}.") from exc
|
|
44
|
+
if not isinstance(catalog, Catalog):
|
|
45
|
+
raise SystemExit(f"{spec!r} resolved to {type(catalog).__name__}, not semql.Catalog.")
|
|
46
|
+
return catalog
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _parse_context(pairs: list[str]) -> dict[str, str]:
|
|
50
|
+
"""Parse repeated ``--context key=value`` flags into a dict."""
|
|
51
|
+
out: dict[str, str] = {}
|
|
52
|
+
for pair in pairs:
|
|
53
|
+
if "=" not in pair:
|
|
54
|
+
raise SystemExit(f"--context expects key=value, got {pair!r}.")
|
|
55
|
+
k, v = pair.split("=", 1)
|
|
56
|
+
out[k] = v
|
|
57
|
+
return out
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def main(argv: list[str] | None = None) -> int:
|
|
61
|
+
parser = argparse.ArgumentParser(
|
|
62
|
+
prog="python -m semql.compile",
|
|
63
|
+
description="Compile a SemanticQuery JSON spec to SQL.",
|
|
64
|
+
)
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--catalog",
|
|
67
|
+
required=True,
|
|
68
|
+
help="Catalog locator: module.path:attr (e.g. mypkg.catalogs:default).",
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--context",
|
|
72
|
+
action="append",
|
|
73
|
+
default=[],
|
|
74
|
+
help="Compile-time context pair (key=value). May be repeated.",
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--params-format",
|
|
78
|
+
choices=("comment", "json"),
|
|
79
|
+
default="comment",
|
|
80
|
+
help="How to print the params: as a SQL comment (default) or a JSON line.",
|
|
81
|
+
)
|
|
82
|
+
parser.add_argument(
|
|
83
|
+
"spec",
|
|
84
|
+
help="SemanticQuery as a JSON string. Use '-' to read from stdin.",
|
|
85
|
+
)
|
|
86
|
+
args = parser.parse_args(argv)
|
|
87
|
+
|
|
88
|
+
raw_spec = sys.stdin.read() if args.spec == "-" else args.spec
|
|
89
|
+
try:
|
|
90
|
+
spec_dict: dict[str, Any] = json.loads(raw_spec)
|
|
91
|
+
except json.JSONDecodeError as exc:
|
|
92
|
+
raise SystemExit(f"--spec must be JSON: {exc}") from exc
|
|
93
|
+
query = SemanticQuery.model_validate(spec_dict)
|
|
94
|
+
|
|
95
|
+
catalog = _load_catalog(args.catalog)
|
|
96
|
+
ctx = _parse_context(args.context)
|
|
97
|
+
|
|
98
|
+
compiled = catalog.compile(query, context=ctx)
|
|
99
|
+
|
|
100
|
+
print(compiled.sql)
|
|
101
|
+
if args.params_format == "json":
|
|
102
|
+
print(json.dumps(compiled.params, default=str))
|
|
103
|
+
else:
|
|
104
|
+
print(f"-- params: {json.dumps(compiled.params, default=str)}")
|
|
105
|
+
print(f"-- columns: {compiled.columns}")
|
|
106
|
+
return 0
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
if __name__ == "__main__":
|
|
110
|
+
sys.exit(main())
|
semql/_resolve.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Shared identifier resolution for the semantic layer.
|
|
2
|
+
|
|
3
|
+
Both `compile.py` and `visualize.py` parse the planner's `cube.field`
|
|
4
|
+
references against the catalogue. Keeping a single resolver here makes
|
|
5
|
+
the validation, regex shape, and error class consistent across both
|
|
6
|
+
modules.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
from semql.errors import ResolveError, UnknownIdentifierError, closest_match
|
|
14
|
+
from semql.model import Cube, Dimension, Measure, TimeDimension
|
|
15
|
+
|
|
16
|
+
_QUALIFIED_RE = re.compile(r"^([a-z_][a-z0-9_]*)\.([a-z_][a-z0-9_]*)$", re.IGNORECASE)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def split(qualified: str) -> tuple[str, str]:
|
|
20
|
+
m = _QUALIFIED_RE.match(qualified)
|
|
21
|
+
if not m:
|
|
22
|
+
raise ResolveError(f"Field reference must be 'cube.field', got: {qualified!r}")
|
|
23
|
+
return m.group(1), m.group(2)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def resolve_field(
|
|
27
|
+
qualified: str,
|
|
28
|
+
catalog: dict[str, Cube],
|
|
29
|
+
) -> tuple[Cube, Measure | Dimension | TimeDimension]:
|
|
30
|
+
cube_name, field_name = split(qualified)
|
|
31
|
+
if cube_name not in catalog:
|
|
32
|
+
hint = closest_match(cube_name, catalog.keys())
|
|
33
|
+
known = ", ".join(sorted(catalog))
|
|
34
|
+
suffix = f" Did you mean {hint!r}?" if hint else ""
|
|
35
|
+
raise UnknownIdentifierError(
|
|
36
|
+
f"Unknown cube: {cube_name!r}. Known cubes: {known}.{suffix}",
|
|
37
|
+
kind="cube",
|
|
38
|
+
name=cube_name,
|
|
39
|
+
hint=hint,
|
|
40
|
+
)
|
|
41
|
+
cube = catalog[cube_name]
|
|
42
|
+
for m in cube.measures:
|
|
43
|
+
if m.name == field_name:
|
|
44
|
+
return cube, m
|
|
45
|
+
for d in cube.dimensions:
|
|
46
|
+
if d.name == field_name:
|
|
47
|
+
return cube, d
|
|
48
|
+
for td in cube.time_dimensions:
|
|
49
|
+
if td.name == field_name:
|
|
50
|
+
return cube, td
|
|
51
|
+
hint = closest_match(field_name, cube.field_names())
|
|
52
|
+
known = ", ".join(sorted(cube.field_names()))
|
|
53
|
+
suffix = f" Did you mean {hint!r}?" if hint else ""
|
|
54
|
+
raise UnknownIdentifierError(
|
|
55
|
+
f"Unknown field {field_name!r} on cube {cube_name!r}. Known fields: {known}.{suffix}",
|
|
56
|
+
kind="field",
|
|
57
|
+
name=field_name,
|
|
58
|
+
cube=cube_name,
|
|
59
|
+
hint=hint,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
__all__ = ["ResolveError", "UnknownIdentifierError", "split", "resolve_field"]
|