semql 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semql-0.1.0/LICENSE +28 -0
- semql-0.1.0/PKG-INFO +126 -0
- semql-0.1.0/README.md +101 -0
- semql-0.1.0/pyproject.toml +35 -0
- semql-0.1.0/src/semql/__init__.py +153 -0
- semql-0.1.0/src/semql/__main__.py +110 -0
- semql-0.1.0/src/semql/_resolve.py +63 -0
- semql-0.1.0/src/semql/backend.py +446 -0
- semql-0.1.0/src/semql/catalog.py +304 -0
- semql-0.1.0/src/semql/compile.py +1139 -0
- semql-0.1.0/src/semql/dialect.py +109 -0
- semql-0.1.0/src/semql/docs.py +176 -0
- semql-0.1.0/src/semql/errors.py +149 -0
- semql-0.1.0/src/semql/introspect.py +442 -0
- semql-0.1.0/src/semql/model.py +412 -0
- semql-0.1.0/src/semql/plan.py +177 -0
- semql-0.1.0/src/semql/prompt.py +571 -0
- semql-0.1.0/src/semql/py.typed +0 -0
- semql-0.1.0/src/semql/safe.py +50 -0
- semql-0.1.0/src/semql/spec.py +170 -0
- semql-0.1.0/src/semql/validate.py +316 -0
- semql-0.1.0/src/semql/visualize.py +237 -0
semql-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026, Nikhil Pallamreddy
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
semql-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: semql
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Semantic data layer: SemanticQuery → backend SQL with authorisation, row-level scoping, time-spine fill, and a typed four-role LLM prompt pipeline.
|
|
5
|
+
Author: Nikhil Pallamreddy
|
|
6
|
+
Author-email: Nikhil Pallamreddy <nikhil.pallamreddy+git@gmail.com>
|
|
7
|
+
License-Expression: BSD-3-Clause
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Database
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Dist: pydantic>=2.13.4
|
|
19
|
+
Requires-Dist: sqlglot>=30.9.0
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Project-URL: Homepage, https://github.com/npalladium/semql
|
|
22
|
+
Project-URL: Repository, https://github.com/npalladium/semql
|
|
23
|
+
Project-URL: Issues, https://github.com/npalladium/semql/issues
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# semql
|
|
27
|
+
|
|
28
|
+
Pure-Python compiler from a semantic spec to backend SQL. Define
|
|
29
|
+
cubes (dimensions, measures, time-dimensions, joins) once; emit
|
|
30
|
+
correct, parameterised SQL for Postgres, ClickHouse, DuckDB and
|
|
31
|
+
(via the strategy seam) Snowflake / BigQuery.
|
|
32
|
+
|
|
33
|
+
`semql` does **no I/O**: catalogues are Python data; the compiler
|
|
34
|
+
returns SQL + bound params; running the SQL is the caller's job.
|
|
35
|
+
Prompt-fragment rendering for LLM planners ships in the core
|
|
36
|
+
(`semql.prompt`). Sibling packages add MCP exposure (`semql-mcp`)
|
|
37
|
+
and ER diagrams (`semql-erd`).
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```sh
|
|
42
|
+
pip install semql
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Quick start
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from semql import (
|
|
49
|
+
Backend,
|
|
50
|
+
Catalog,
|
|
51
|
+
Cube,
|
|
52
|
+
Dimension,
|
|
53
|
+
Measure,
|
|
54
|
+
SemanticQuery,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
orders = Cube(
|
|
58
|
+
name="orders",
|
|
59
|
+
backend=Backend.POSTGRES,
|
|
60
|
+
table="orders",
|
|
61
|
+
alias="o",
|
|
62
|
+
measures=[
|
|
63
|
+
Measure(name="revenue", sql="{o}.amount", agg="sum", unit="currency"),
|
|
64
|
+
],
|
|
65
|
+
dimensions=[
|
|
66
|
+
Dimension(name="region", sql="{o}.region", type="string"),
|
|
67
|
+
],
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
catalog = Catalog([orders])
|
|
71
|
+
compiled = catalog.compile(
|
|
72
|
+
SemanticQuery(measures=["orders.revenue"], dimensions=["orders.region"]),
|
|
73
|
+
)
|
|
74
|
+
# compiled.sql, compiled.params, compiled.columns, compiled.backend
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
The `{o}` placeholder in a cube's `sql` is its alias; the compiler
|
|
78
|
+
resolves it (along with `{schema}`-style context placeholders and
|
|
79
|
+
`{ctx.X}` row-level-security placeholders) at compile time.
|
|
80
|
+
|
|
81
|
+
## What lives in the box
|
|
82
|
+
|
|
83
|
+
| Surface | Module |
|
|
84
|
+
|---|---|
|
|
85
|
+
| Cube / Measure / Dimension / TimeDimension / Join | `semql.model` |
|
|
86
|
+
| SemanticQuery / Filter / TimeWindow / CompareWindow | `semql.spec` |
|
|
87
|
+
| Catalog wrapper (validation, prompt, compile entry) | `semql.catalog` |
|
|
88
|
+
| Compiler — sqlglot AST → dialect SQL | `semql.compile` |
|
|
89
|
+
| Collect-all static validator | `semql.validate` |
|
|
90
|
+
| Reflection cubes (catalog_cubes, ...) | `semql.introspect` |
|
|
91
|
+
| Planner / router prompt fragments | `semql.prompt` |
|
|
92
|
+
| Backend strategies + sqlglot dialect adapter | `semql.backend`, `semql.dialect` |
|
|
93
|
+
| Visualisation decision (chart type, axes, formats) | `semql.visualize` |
|
|
94
|
+
| `is_safe_select` post-hoc SQL guard | `semql.safe` |
|
|
95
|
+
| Structured error hierarchy | `semql.errors` |
|
|
96
|
+
|
|
97
|
+
## Features
|
|
98
|
+
|
|
99
|
+
- **Compare windows** — `CompareWindow(mode="previous_period")` wraps
|
|
100
|
+
the inner query in `current` / `prior` CTEs joined via `FULL OUTER
|
|
101
|
+
JOIN` and emits `{m}_current` / `{m}_prior` / `{m}_delta` /
|
|
102
|
+
`{m}_pct_change` columns per measure.
|
|
103
|
+
- **Tenancy** — per-cube `SCHEMA` (default; `{tenant_schema}`
|
|
104
|
+
substitution) or `DISCRIMINATOR` (compiler wraps the source in a
|
|
105
|
+
subquery with `WHERE tenancy_column = $tenant`).
|
|
106
|
+
- **Row-level security** — `Cube.security_sql` AND-composes with
|
|
107
|
+
tenancy inside the isolation subquery; `{ctx.X}` placeholders bind
|
|
108
|
+
as parameters, never inline as literals.
|
|
109
|
+
- **MCP-ready** — `Catalog.prompt()` produces the planner system-prompt
|
|
110
|
+
fragment; `semql-mcp` wraps it as a server.
|
|
111
|
+
- **Pluggable backends** — `BackendStrategy` Protocol lets out-of-tree
|
|
112
|
+
Snowflake / BigQuery adapters slot in without forking the compiler.
|
|
113
|
+
|
|
114
|
+
## Philosophy
|
|
115
|
+
|
|
116
|
+
See `PHILOSOPHY.md` at the repo root. Highlights:
|
|
117
|
+
- Correct SQL, not optimal. The query planner is the database's job.
|
|
118
|
+
- The emitted SQL must read like something a human could have written.
|
|
119
|
+
- `compile()` fails at the first problem; `validate()` collects them all.
|
|
120
|
+
- The catalogue is data; reflection isn't an afterthought.
|
|
121
|
+
|
|
122
|
+
## Status
|
|
123
|
+
|
|
124
|
+
Pre-v1. The shape is stable, but minor names / fields may move before
|
|
125
|
+
the v1 contract locks. Tests pin every public behaviour the README
|
|
126
|
+
documents.
|
semql-0.1.0/README.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# semql
|
|
2
|
+
|
|
3
|
+
Pure-Python compiler from a semantic spec to backend SQL. Define
|
|
4
|
+
cubes (dimensions, measures, time-dimensions, joins) once; emit
|
|
5
|
+
correct, parameterised SQL for Postgres, ClickHouse, DuckDB and
|
|
6
|
+
(via the strategy seam) Snowflake / BigQuery.
|
|
7
|
+
|
|
8
|
+
`semql` does **no I/O**: catalogues are Python data; the compiler
|
|
9
|
+
returns SQL + bound params; running the SQL is the caller's job.
|
|
10
|
+
Prompt-fragment rendering for LLM planners ships in the core
|
|
11
|
+
(`semql.prompt`). Sibling packages add MCP exposure (`semql-mcp`)
|
|
12
|
+
and ER diagrams (`semql-erd`).
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```sh
|
|
17
|
+
pip install semql
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Quick start
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from semql import (
|
|
24
|
+
Backend,
|
|
25
|
+
Catalog,
|
|
26
|
+
Cube,
|
|
27
|
+
Dimension,
|
|
28
|
+
Measure,
|
|
29
|
+
SemanticQuery,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
orders = Cube(
|
|
33
|
+
name="orders",
|
|
34
|
+
backend=Backend.POSTGRES,
|
|
35
|
+
table="orders",
|
|
36
|
+
alias="o",
|
|
37
|
+
measures=[
|
|
38
|
+
Measure(name="revenue", sql="{o}.amount", agg="sum", unit="currency"),
|
|
39
|
+
],
|
|
40
|
+
dimensions=[
|
|
41
|
+
Dimension(name="region", sql="{o}.region", type="string"),
|
|
42
|
+
],
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
catalog = Catalog([orders])
|
|
46
|
+
compiled = catalog.compile(
|
|
47
|
+
SemanticQuery(measures=["orders.revenue"], dimensions=["orders.region"]),
|
|
48
|
+
)
|
|
49
|
+
# compiled.sql, compiled.params, compiled.columns, compiled.backend
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
The `{o}` placeholder in a cube's `sql` is its alias; the compiler
|
|
53
|
+
resolves it (along with `{schema}`-style context placeholders and
|
|
54
|
+
`{ctx.X}` row-level-security placeholders) at compile time.
|
|
55
|
+
|
|
56
|
+
## What lives in the box
|
|
57
|
+
|
|
58
|
+
| Surface | Module |
|
|
59
|
+
|---|---|
|
|
60
|
+
| Cube / Measure / Dimension / TimeDimension / Join | `semql.model` |
|
|
61
|
+
| SemanticQuery / Filter / TimeWindow / CompareWindow | `semql.spec` |
|
|
62
|
+
| Catalog wrapper (validation, prompt, compile entry) | `semql.catalog` |
|
|
63
|
+
| Compiler — sqlglot AST → dialect SQL | `semql.compile` |
|
|
64
|
+
| Collect-all static validator | `semql.validate` |
|
|
65
|
+
| Reflection cubes (catalog_cubes, ...) | `semql.introspect` |
|
|
66
|
+
| Planner / router prompt fragments | `semql.prompt` |
|
|
67
|
+
| Backend strategies + sqlglot dialect adapter | `semql.backend`, `semql.dialect` |
|
|
68
|
+
| Visualisation decision (chart type, axes, formats) | `semql.visualize` |
|
|
69
|
+
| `is_safe_select` post-hoc SQL guard | `semql.safe` |
|
|
70
|
+
| Structured error hierarchy | `semql.errors` |
|
|
71
|
+
|
|
72
|
+
## Features
|
|
73
|
+
|
|
74
|
+
- **Compare windows** — `CompareWindow(mode="previous_period")` wraps
|
|
75
|
+
the inner query in `current` / `prior` CTEs joined via `FULL OUTER
|
|
76
|
+
JOIN` and emits `{m}_current` / `{m}_prior` / `{m}_delta` /
|
|
77
|
+
`{m}_pct_change` columns per measure.
|
|
78
|
+
- **Tenancy** — per-cube `SCHEMA` (default; `{tenant_schema}`
|
|
79
|
+
substitution) or `DISCRIMINATOR` (compiler wraps the source in a
|
|
80
|
+
subquery with `WHERE tenancy_column = $tenant`).
|
|
81
|
+
- **Row-level security** — `Cube.security_sql` AND-composes with
|
|
82
|
+
tenancy inside the isolation subquery; `{ctx.X}` placeholders bind
|
|
83
|
+
as parameters, never inline as literals.
|
|
84
|
+
- **MCP-ready** — `Catalog.prompt()` produces the planner system-prompt
|
|
85
|
+
fragment; `semql-mcp` wraps it as a server.
|
|
86
|
+
- **Pluggable backends** — `BackendStrategy` Protocol lets out-of-tree
|
|
87
|
+
Snowflake / BigQuery adapters slot in without forking the compiler.
|
|
88
|
+
|
|
89
|
+
## Philosophy
|
|
90
|
+
|
|
91
|
+
See `PHILOSOPHY.md` at the repo root. Highlights:
|
|
92
|
+
- Correct SQL, not optimal. The query planner is the database's job.
|
|
93
|
+
- The emitted SQL must read like something a human could have written.
|
|
94
|
+
- `compile()` fails at the first problem; `validate()` collects them all.
|
|
95
|
+
- The catalogue is data; reflection isn't an afterthought.
|
|
96
|
+
|
|
97
|
+
## Status
|
|
98
|
+
|
|
99
|
+
Pre-v1. The shape is stable, but minor names / fields may move before
|
|
100
|
+
the v1 contract locks. Tests pin every public behaviour the README
|
|
101
|
+
documents.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "semql"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Semantic data layer: SemanticQuery → backend SQL with authorisation, row-level scoping, time-spine fill, and a typed four-role LLM prompt pipeline."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "BSD-3-Clause"
|
|
7
|
+
license-files = ["LICENSE"]
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Nikhil Pallamreddy", email = "nikhil.pallamreddy+git@gmail.com" }
|
|
10
|
+
]
|
|
11
|
+
requires-python = ">=3.12"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"pydantic>=2.13.4",
|
|
14
|
+
"sqlglot>=30.9.0",
|
|
15
|
+
]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Database",
|
|
24
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
25
|
+
"Typing :: Typed",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/npalladium/semql"
|
|
30
|
+
Repository = "https://github.com/npalladium/semql"
|
|
31
|
+
Issues = "https://github.com/npalladium/semql/issues"
|
|
32
|
+
|
|
33
|
+
[build-system]
|
|
34
|
+
requires = ["uv_build>=0.11.19,<0.12.0"]
|
|
35
|
+
build-backend = "uv_build"
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Public surface of the semantic layer.
|
|
2
|
+
|
|
3
|
+
Most users only need ``Catalog``, ``Cube``, the field types
|
|
4
|
+
(``Measure`` / ``Dimension`` / ``TimeDimension``), and
|
|
5
|
+
``SemanticQuery``. The rest is exported for callers building their own
|
|
6
|
+
tooling on top of the compiler (custom validators, MCP servers,
|
|
7
|
+
prompt rendering, etc.).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from semql.catalog import Catalog
|
|
13
|
+
from semql.compile import MAX_UNGROUPED_ROWS, Compiled, compile_query
|
|
14
|
+
from semql.docs import render_catalog_markdown
|
|
15
|
+
from semql.errors import (
|
|
16
|
+
CompileError,
|
|
17
|
+
CrossBackendError,
|
|
18
|
+
FilterTypeError,
|
|
19
|
+
JoinPathError,
|
|
20
|
+
PhaseDeferredError,
|
|
21
|
+
PlaceholderError,
|
|
22
|
+
ResolveError,
|
|
23
|
+
SemQLError,
|
|
24
|
+
UnknownIdentifierError,
|
|
25
|
+
)
|
|
26
|
+
from semql.introspect import (
|
|
27
|
+
CATALOG_CUBES,
|
|
28
|
+
CATALOG_DIMENSIONS,
|
|
29
|
+
CATALOG_MEASURES,
|
|
30
|
+
META_CUBES,
|
|
31
|
+
ResolvedQuery,
|
|
32
|
+
iter_cubes,
|
|
33
|
+
iter_fields,
|
|
34
|
+
iter_joins,
|
|
35
|
+
resolve_field,
|
|
36
|
+
resolve_query,
|
|
37
|
+
)
|
|
38
|
+
from semql.model import (
|
|
39
|
+
AggLiteral,
|
|
40
|
+
AuthContext,
|
|
41
|
+
Backend,
|
|
42
|
+
BaseField,
|
|
43
|
+
ChartTypeLiteral,
|
|
44
|
+
Cube,
|
|
45
|
+
Dimension,
|
|
46
|
+
DimTypeLiteral,
|
|
47
|
+
FormatLiteral,
|
|
48
|
+
GranularityLiteral,
|
|
49
|
+
Join,
|
|
50
|
+
Measure,
|
|
51
|
+
ScopePredicate,
|
|
52
|
+
Segment,
|
|
53
|
+
TimeDimension,
|
|
54
|
+
View,
|
|
55
|
+
)
|
|
56
|
+
from semql.plan import (
|
|
57
|
+
DrilldownSuggestion,
|
|
58
|
+
DrilldownSuggestions,
|
|
59
|
+
Presentation,
|
|
60
|
+
QueryIntent,
|
|
61
|
+
QueryPlan,
|
|
62
|
+
QueryStep,
|
|
63
|
+
RouterDecision,
|
|
64
|
+
RouterPath,
|
|
65
|
+
)
|
|
66
|
+
from semql.prompt import (
|
|
67
|
+
build_drilldown_prompt_fragment,
|
|
68
|
+
build_planner_prompt_fragment,
|
|
69
|
+
build_presenter_prompt_fragment,
|
|
70
|
+
build_query_generator_prompt_fragment,
|
|
71
|
+
build_router_prompt_fragment,
|
|
72
|
+
render_catalogue_block,
|
|
73
|
+
)
|
|
74
|
+
from semql.safe import is_safe_select
|
|
75
|
+
from semql.spec import (
|
|
76
|
+
BoolExpr,
|
|
77
|
+
CompareWindow,
|
|
78
|
+
Filter,
|
|
79
|
+
FilterOp,
|
|
80
|
+
SemanticQuery,
|
|
81
|
+
TimeWindow,
|
|
82
|
+
)
|
|
83
|
+
from semql.validate import ValidationError, validate
|
|
84
|
+
from semql.visualize import VizColumn, VizDecision, decide_visualization
|
|
85
|
+
|
|
86
|
+
__all__ = [
|
|
87
|
+
"AggLiteral",
|
|
88
|
+
"AuthContext",
|
|
89
|
+
"Backend",
|
|
90
|
+
"BaseField",
|
|
91
|
+
"BoolExpr",
|
|
92
|
+
"CATALOG_CUBES",
|
|
93
|
+
"CATALOG_DIMENSIONS",
|
|
94
|
+
"CATALOG_MEASURES",
|
|
95
|
+
"Catalog",
|
|
96
|
+
"ChartTypeLiteral",
|
|
97
|
+
"CompareWindow",
|
|
98
|
+
"CompileError",
|
|
99
|
+
"Compiled",
|
|
100
|
+
"CrossBackendError",
|
|
101
|
+
"Cube",
|
|
102
|
+
"DimTypeLiteral",
|
|
103
|
+
"Dimension",
|
|
104
|
+
"DrilldownSuggestion",
|
|
105
|
+
"DrilldownSuggestions",
|
|
106
|
+
"Filter",
|
|
107
|
+
"FilterOp",
|
|
108
|
+
"FilterTypeError",
|
|
109
|
+
"FormatLiteral",
|
|
110
|
+
"GranularityLiteral",
|
|
111
|
+
"Join",
|
|
112
|
+
"JoinPathError",
|
|
113
|
+
"MAX_UNGROUPED_ROWS",
|
|
114
|
+
"META_CUBES",
|
|
115
|
+
"Measure",
|
|
116
|
+
"PhaseDeferredError",
|
|
117
|
+
"PlaceholderError",
|
|
118
|
+
"Presentation",
|
|
119
|
+
"QueryIntent",
|
|
120
|
+
"QueryPlan",
|
|
121
|
+
"QueryStep",
|
|
122
|
+
"ResolveError",
|
|
123
|
+
"ResolvedQuery",
|
|
124
|
+
"RouterDecision",
|
|
125
|
+
"RouterPath",
|
|
126
|
+
"ScopePredicate",
|
|
127
|
+
"Segment",
|
|
128
|
+
"SemQLError",
|
|
129
|
+
"SemanticQuery",
|
|
130
|
+
"TimeDimension",
|
|
131
|
+
"TimeWindow",
|
|
132
|
+
"UnknownIdentifierError",
|
|
133
|
+
"ValidationError",
|
|
134
|
+
"View",
|
|
135
|
+
"VizColumn",
|
|
136
|
+
"VizDecision",
|
|
137
|
+
"build_drilldown_prompt_fragment",
|
|
138
|
+
"build_planner_prompt_fragment",
|
|
139
|
+
"build_presenter_prompt_fragment",
|
|
140
|
+
"build_query_generator_prompt_fragment",
|
|
141
|
+
"build_router_prompt_fragment",
|
|
142
|
+
"compile_query",
|
|
143
|
+
"decide_visualization",
|
|
144
|
+
"is_safe_select",
|
|
145
|
+
"iter_cubes",
|
|
146
|
+
"iter_fields",
|
|
147
|
+
"iter_joins",
|
|
148
|
+
"render_catalog_markdown",
|
|
149
|
+
"render_catalogue_block",
|
|
150
|
+
"resolve_field",
|
|
151
|
+
"resolve_query",
|
|
152
|
+
"validate",
|
|
153
|
+
]
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""``python -m semql.compile`` CLI.
|
|
2
|
+
|
|
3
|
+
Compiles a ``SemanticQuery`` JSON spec against a Catalog declared
|
|
4
|
+
in a Python module and prints the SQL + params to stdout. Useful
|
|
5
|
+
for ad-hoc cube authoring (no need to write a runner script) and
|
|
6
|
+
as a smoke target in CI.
|
|
7
|
+
|
|
8
|
+
python -m semql.compile --catalog mypkg.catalogs:default \\
|
|
9
|
+
'{"measures": ["orders.revenue"], "dimensions": ["orders.region"]}'
|
|
10
|
+
|
|
11
|
+
The ``--catalog`` arg is ``module.path:attr`` — the module is
|
|
12
|
+
imported, the named attribute must be a ``Catalog`` instance.
|
|
13
|
+
Context substitutions for ``{schema}`` / ``{tenant}`` placeholders
|
|
14
|
+
go through repeated ``--context key=value`` flags.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import importlib
|
|
21
|
+
import json
|
|
22
|
+
import sys
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from semql import Catalog, SemanticQuery
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _load_catalog(spec: str) -> Catalog:
|
|
29
|
+
"""Resolve ``module.path:attr`` into the named ``Catalog``."""
|
|
30
|
+
if ":" not in spec:
|
|
31
|
+
raise SystemExit(
|
|
32
|
+
f"--catalog must be 'module.path:attr', got {spec!r}. "
|
|
33
|
+
"Example: --catalog mypkg.catalogs:default"
|
|
34
|
+
)
|
|
35
|
+
module_path, attr = spec.rsplit(":", 1)
|
|
36
|
+
try:
|
|
37
|
+
module = importlib.import_module(module_path)
|
|
38
|
+
except ModuleNotFoundError as exc:
|
|
39
|
+
raise SystemExit(f"Could not import {module_path!r}: {exc}") from exc
|
|
40
|
+
try:
|
|
41
|
+
catalog = getattr(module, attr)
|
|
42
|
+
except AttributeError as exc:
|
|
43
|
+
raise SystemExit(f"Module {module_path!r} has no attribute {attr!r}.") from exc
|
|
44
|
+
if not isinstance(catalog, Catalog):
|
|
45
|
+
raise SystemExit(f"{spec!r} resolved to {type(catalog).__name__}, not semql.Catalog.")
|
|
46
|
+
return catalog
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _parse_context(pairs: list[str]) -> dict[str, str]:
|
|
50
|
+
"""Parse repeated ``--context key=value`` flags into a dict."""
|
|
51
|
+
out: dict[str, str] = {}
|
|
52
|
+
for pair in pairs:
|
|
53
|
+
if "=" not in pair:
|
|
54
|
+
raise SystemExit(f"--context expects key=value, got {pair!r}.")
|
|
55
|
+
k, v = pair.split("=", 1)
|
|
56
|
+
out[k] = v
|
|
57
|
+
return out
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def main(argv: list[str] | None = None) -> int:
|
|
61
|
+
parser = argparse.ArgumentParser(
|
|
62
|
+
prog="python -m semql.compile",
|
|
63
|
+
description="Compile a SemanticQuery JSON spec to SQL.",
|
|
64
|
+
)
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--catalog",
|
|
67
|
+
required=True,
|
|
68
|
+
help="Catalog locator: module.path:attr (e.g. mypkg.catalogs:default).",
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--context",
|
|
72
|
+
action="append",
|
|
73
|
+
default=[],
|
|
74
|
+
help="Compile-time context pair (key=value). May be repeated.",
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--params-format",
|
|
78
|
+
choices=("comment", "json"),
|
|
79
|
+
default="comment",
|
|
80
|
+
help="How to print the params: as a SQL comment (default) or a JSON line.",
|
|
81
|
+
)
|
|
82
|
+
parser.add_argument(
|
|
83
|
+
"spec",
|
|
84
|
+
help="SemanticQuery as a JSON string. Use '-' to read from stdin.",
|
|
85
|
+
)
|
|
86
|
+
args = parser.parse_args(argv)
|
|
87
|
+
|
|
88
|
+
raw_spec = sys.stdin.read() if args.spec == "-" else args.spec
|
|
89
|
+
try:
|
|
90
|
+
spec_dict: dict[str, Any] = json.loads(raw_spec)
|
|
91
|
+
except json.JSONDecodeError as exc:
|
|
92
|
+
raise SystemExit(f"--spec must be JSON: {exc}") from exc
|
|
93
|
+
query = SemanticQuery.model_validate(spec_dict)
|
|
94
|
+
|
|
95
|
+
catalog = _load_catalog(args.catalog)
|
|
96
|
+
ctx = _parse_context(args.context)
|
|
97
|
+
|
|
98
|
+
compiled = catalog.compile(query, context=ctx)
|
|
99
|
+
|
|
100
|
+
print(compiled.sql)
|
|
101
|
+
if args.params_format == "json":
|
|
102
|
+
print(json.dumps(compiled.params, default=str))
|
|
103
|
+
else:
|
|
104
|
+
print(f"-- params: {json.dumps(compiled.params, default=str)}")
|
|
105
|
+
print(f"-- columns: {compiled.columns}")
|
|
106
|
+
return 0
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
if __name__ == "__main__":
|
|
110
|
+
sys.exit(main())
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Shared identifier resolution for the semantic layer.
|
|
2
|
+
|
|
3
|
+
Both `compile.py` and `visualize.py` parse the planner's `cube.field`
|
|
4
|
+
references against the catalogue. Keeping a single resolver here makes
|
|
5
|
+
the validation, regex shape, and error class consistent across both
|
|
6
|
+
modules.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
from semql.errors import ResolveError, UnknownIdentifierError, closest_match
|
|
14
|
+
from semql.model import Cube, Dimension, Measure, TimeDimension
|
|
15
|
+
|
|
16
|
+
_QUALIFIED_RE = re.compile(r"^([a-z_][a-z0-9_]*)\.([a-z_][a-z0-9_]*)$", re.IGNORECASE)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def split(qualified: str) -> tuple[str, str]:
|
|
20
|
+
m = _QUALIFIED_RE.match(qualified)
|
|
21
|
+
if not m:
|
|
22
|
+
raise ResolveError(f"Field reference must be 'cube.field', got: {qualified!r}")
|
|
23
|
+
return m.group(1), m.group(2)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def resolve_field(
|
|
27
|
+
qualified: str,
|
|
28
|
+
catalog: dict[str, Cube],
|
|
29
|
+
) -> tuple[Cube, Measure | Dimension | TimeDimension]:
|
|
30
|
+
cube_name, field_name = split(qualified)
|
|
31
|
+
if cube_name not in catalog:
|
|
32
|
+
hint = closest_match(cube_name, catalog.keys())
|
|
33
|
+
known = ", ".join(sorted(catalog))
|
|
34
|
+
suffix = f" Did you mean {hint!r}?" if hint else ""
|
|
35
|
+
raise UnknownIdentifierError(
|
|
36
|
+
f"Unknown cube: {cube_name!r}. Known cubes: {known}.{suffix}",
|
|
37
|
+
kind="cube",
|
|
38
|
+
name=cube_name,
|
|
39
|
+
hint=hint,
|
|
40
|
+
)
|
|
41
|
+
cube = catalog[cube_name]
|
|
42
|
+
for m in cube.measures:
|
|
43
|
+
if m.name == field_name:
|
|
44
|
+
return cube, m
|
|
45
|
+
for d in cube.dimensions:
|
|
46
|
+
if d.name == field_name:
|
|
47
|
+
return cube, d
|
|
48
|
+
for td in cube.time_dimensions:
|
|
49
|
+
if td.name == field_name:
|
|
50
|
+
return cube, td
|
|
51
|
+
hint = closest_match(field_name, cube.field_names())
|
|
52
|
+
known = ", ".join(sorted(cube.field_names()))
|
|
53
|
+
suffix = f" Did you mean {hint!r}?" if hint else ""
|
|
54
|
+
raise UnknownIdentifierError(
|
|
55
|
+
f"Unknown field {field_name!r} on cube {cube_name!r}. Known fields: {known}.{suffix}",
|
|
56
|
+
kind="field",
|
|
57
|
+
name=field_name,
|
|
58
|
+
cube=cube_name,
|
|
59
|
+
hint=hint,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
__all__ = ["ResolveError", "UnknownIdentifierError", "split", "resolve_field"]
|