pytrilogy 0.3.149__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
- pytrilogy-0.3.149.dist-info/METADATA +555 -0
- pytrilogy-0.3.149.dist-info/RECORD +207 -0
- pytrilogy-0.3.149.dist-info/WHEEL +4 -0
- pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +27 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +119 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +454 -0
- trilogy/core/env_processor.py +239 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1240 -0
- trilogy/core/graph_models.py +142 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2670 -0
- trilogy/core/models/build.py +2603 -0
- trilogy/core/models/build_environment.py +165 -0
- trilogy/core/models/core.py +506 -0
- trilogy/core/models/datasource.py +436 -0
- trilogy/core/models/environment.py +756 -0
- trilogy/core/models/execute.py +1213 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +270 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +207 -0
- trilogy/core/processing/node_generators/node_merge_node.py +695 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +846 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +522 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +604 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1432 -0
- trilogy/dialect/bigquery.py +314 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +159 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +397 -0
- trilogy/dialect/enums.py +151 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/__init__.py +17 -0
- trilogy/execution/config.py +119 -0
- trilogy/execution/state/__init__.py +0 -0
- trilogy/execution/state/exceptions.py +26 -0
- trilogy/execution/state/file_state_store.py +0 -0
- trilogy/execution/state/sqllite_state_store.py +0 -0
- trilogy/execution/state/state_store.py +406 -0
- trilogy/executor.py +692 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +135 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2876 -0
- trilogy/parsing/render.py +775 -0
- trilogy/parsing/trilogy.lark +546 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +45 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +306 -0
- trilogy/scripts/common.py +432 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +311 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +555 -0
- trilogy/scripts/environment.py +59 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +487 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +762 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/refresh.py +161 -0
- trilogy/scripts/run.py +79 -0
- trilogy/scripts/serve.py +202 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +143 -0
- trilogy/scripts/trilogy.py +75 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
use pest::Parser;
|
|
2
|
+
use pest_derive::Parser;
|
|
3
|
+
use std::path::{Path, PathBuf};
|
|
4
|
+
use thiserror::Error;
|
|
5
|
+
|
|
6
|
+
#[derive(Parser)]
|
|
7
|
+
#[grammar = "preql.pest"]
|
|
8
|
+
pub struct PreqlParser;
|
|
9
|
+
|
|
10
|
+
/// Represents a parsed import statement
|
|
11
|
+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
|
12
|
+
pub struct ImportStatement {
|
|
13
|
+
/// The raw path as written in the import (e.g., "models.customer")
|
|
14
|
+
pub raw_path: String,
|
|
15
|
+
/// Number of parent directory traversals (from leading dots)
|
|
16
|
+
pub parent_dirs: usize,
|
|
17
|
+
/// Optional alias for the import
|
|
18
|
+
pub alias: Option<String>,
|
|
19
|
+
/// Whether this is a stdlib import
|
|
20
|
+
pub is_stdlib: bool,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
impl ImportStatement {
|
|
24
|
+
/// Resolve this import to an absolute file path
|
|
25
|
+
pub fn resolve(&self, working_dir: &Path) -> Option<PathBuf> {
|
|
26
|
+
if self.is_stdlib {
|
|
27
|
+
return None; // Skip stdlib imports
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
let mut base = working_dir.to_path_buf();
|
|
31
|
+
|
|
32
|
+
// Navigate up parent directories
|
|
33
|
+
for _ in 0..self.parent_dirs {
|
|
34
|
+
base = base.parent()?.to_path_buf();
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Convert dot-separated path to file path
|
|
38
|
+
let parts: Vec<&str> = self.raw_path.split('.').collect();
|
|
39
|
+
for part in &parts {
|
|
40
|
+
base.push(part);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Add .preql extension
|
|
44
|
+
base.set_extension("preql");
|
|
45
|
+
|
|
46
|
+
Some(base)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/// Get the effective alias (uses last path component if no explicit alias)
|
|
50
|
+
pub fn effective_alias(&self) -> &str {
|
|
51
|
+
self.alias
|
|
52
|
+
.as_deref()
|
|
53
|
+
.unwrap_or_else(|| self.raw_path.split('.').last().unwrap_or(&self.raw_path))
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/// Represents a datasource declaration
|
|
58
|
+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
|
59
|
+
pub struct DatasourceDeclaration {
|
|
60
|
+
/// The name/identifier of the datasource
|
|
61
|
+
pub name: String,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/// Represents a persist statement that updates a datasource
|
|
65
|
+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
|
66
|
+
pub struct PersistStatement {
|
|
67
|
+
/// The mode of persistence (append, overwrite, persist)
|
|
68
|
+
pub mode: PersistMode,
|
|
69
|
+
/// The target datasource being updated
|
|
70
|
+
pub target_datasource: String,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
74
|
+
pub enum PersistMode {
|
|
75
|
+
Append,
|
|
76
|
+
Overwrite,
|
|
77
|
+
Persist,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
impl std::fmt::Display for PersistMode {
|
|
81
|
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
82
|
+
match self {
|
|
83
|
+
PersistMode::Append => write!(f, "append"),
|
|
84
|
+
PersistMode::Overwrite => write!(f, "overwrite"),
|
|
85
|
+
PersistMode::Persist => write!(f, "persist"),
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/// All parsed elements from a PreQL file relevant to dependency resolution
|
|
91
|
+
#[derive(Debug, Clone, Default)]
|
|
92
|
+
pub struct ParsedFile {
|
|
93
|
+
pub imports: Vec<ImportStatement>,
|
|
94
|
+
pub datasources: Vec<DatasourceDeclaration>,
|
|
95
|
+
pub persists: Vec<PersistStatement>,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
#[derive(Error, Debug)]
|
|
99
|
+
pub enum ParseError {
|
|
100
|
+
#[error("Failed to parse file: {0}")]
|
|
101
|
+
PestError(#[from] pest::error::Error<Rule>),
|
|
102
|
+
|
|
103
|
+
#[error("Invalid import statement structure")]
|
|
104
|
+
InvalidImportStructure,
|
|
105
|
+
|
|
106
|
+
#[error("Invalid datasource statement structure")]
|
|
107
|
+
InvalidDatasourceStructure,
|
|
108
|
+
|
|
109
|
+
#[error("Invalid persist statement structure")]
|
|
110
|
+
InvalidPersistStructure,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/// Parse a PreQL file and extract all dependency-relevant statements
|
|
114
|
+
pub fn parse_file(content: &str) -> Result<ParsedFile, ParseError> {
|
|
115
|
+
let pairs = PreqlParser::parse(Rule::file, content)?;
|
|
116
|
+
let mut result = ParsedFile::default();
|
|
117
|
+
|
|
118
|
+
for pair in pairs {
|
|
119
|
+
if pair.as_rule() == Rule::file {
|
|
120
|
+
for inner in pair.into_inner() {
|
|
121
|
+
if inner.as_rule() == Rule::statement {
|
|
122
|
+
for stmt in inner.into_inner() {
|
|
123
|
+
match stmt.as_rule() {
|
|
124
|
+
Rule::import_statement => {
|
|
125
|
+
if let Some(import) = parse_import_statement(stmt)? {
|
|
126
|
+
result.imports.push(import);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
Rule::datasource_statement => {
|
|
130
|
+
if let Some(ds) = parse_datasource_statement(stmt)? {
|
|
131
|
+
result.datasources.push(ds);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
Rule::persist_statement => {
|
|
135
|
+
if let Some(persist) = parse_persist_statement(stmt)? {
|
|
136
|
+
result.persists.push(persist);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
_ => {}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
Ok(result)
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/// Legacy function for backward compatibility
|
|
151
|
+
pub fn parse_imports(content: &str) -> Result<Vec<ImportStatement>, ParseError> {
|
|
152
|
+
Ok(parse_file(content)?.imports)
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
fn parse_import_statement(
|
|
156
|
+
pair: pest::iterators::Pair<Rule>,
|
|
157
|
+
) -> Result<Option<ImportStatement>, ParseError> {
|
|
158
|
+
let mut parent_dirs: usize = 0;
|
|
159
|
+
let mut raw_path = String::new();
|
|
160
|
+
let mut alias = None;
|
|
161
|
+
|
|
162
|
+
for inner in pair.into_inner() {
|
|
163
|
+
match inner.as_rule() {
|
|
164
|
+
Rule::relative_dots => {
|
|
165
|
+
let dots = inner.as_str();
|
|
166
|
+
// First dot is part of the syntax, each additional dot goes up one more dir
|
|
167
|
+
parent_dirs = dots.len().saturating_sub(1);
|
|
168
|
+
}
|
|
169
|
+
Rule::import_path => {
|
|
170
|
+
raw_path = inner.as_str().to_string();
|
|
171
|
+
}
|
|
172
|
+
Rule::import_alias => {
|
|
173
|
+
for alias_inner in inner.into_inner() {
|
|
174
|
+
if alias_inner.as_rule() == Rule::identifier {
|
|
175
|
+
alias = Some(alias_inner.as_str().to_string());
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
_ => {}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if raw_path.is_empty() {
|
|
184
|
+
return Err(ParseError::InvalidImportStructure);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Check if it's a stdlib import
|
|
188
|
+
let is_stdlib = raw_path.starts_with("std.");
|
|
189
|
+
|
|
190
|
+
Ok(Some(ImportStatement {
|
|
191
|
+
raw_path,
|
|
192
|
+
parent_dirs,
|
|
193
|
+
alias,
|
|
194
|
+
is_stdlib,
|
|
195
|
+
}))
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
fn parse_datasource_statement(
|
|
199
|
+
pair: pest::iterators::Pair<Rule>,
|
|
200
|
+
) -> Result<Option<DatasourceDeclaration>, ParseError> {
|
|
201
|
+
for inner in pair.into_inner() {
|
|
202
|
+
if inner.as_rule() == Rule::identifier {
|
|
203
|
+
let name = inner.as_str().to_string();
|
|
204
|
+
return Ok(Some(DatasourceDeclaration { name }));
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
Err(ParseError::InvalidDatasourceStructure)
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
fn parse_persist_statement(
|
|
211
|
+
pair: pest::iterators::Pair<Rule>,
|
|
212
|
+
) -> Result<Option<PersistStatement>, ParseError> {
|
|
213
|
+
for inner in pair.into_inner() {
|
|
214
|
+
match inner.as_rule() {
|
|
215
|
+
Rule::auto_persist => {
|
|
216
|
+
return parse_auto_persist(inner);
|
|
217
|
+
}
|
|
218
|
+
Rule::full_persist => {
|
|
219
|
+
return parse_full_persist(inner);
|
|
220
|
+
}
|
|
221
|
+
_ => {}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
Err(ParseError::InvalidPersistStructure)
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
fn parse_auto_persist(
|
|
228
|
+
pair: pest::iterators::Pair<Rule>,
|
|
229
|
+
) -> Result<Option<PersistStatement>, ParseError> {
|
|
230
|
+
let mut mode = None;
|
|
231
|
+
let mut target = None;
|
|
232
|
+
|
|
233
|
+
for inner in pair.into_inner() {
|
|
234
|
+
match inner.as_rule() {
|
|
235
|
+
Rule::persist_mode => {
|
|
236
|
+
mode = Some(parse_persist_mode(inner.as_str()));
|
|
237
|
+
}
|
|
238
|
+
Rule::identifier => {
|
|
239
|
+
if target.is_none() {
|
|
240
|
+
target = Some(inner.as_str().to_string());
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
_ => {}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
match (mode, target) {
|
|
248
|
+
(Some(mode), Some(target_datasource)) => Ok(Some(PersistStatement {
|
|
249
|
+
mode,
|
|
250
|
+
target_datasource,
|
|
251
|
+
})),
|
|
252
|
+
_ => Err(ParseError::InvalidPersistStructure),
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
fn parse_full_persist(
|
|
257
|
+
pair: pest::iterators::Pair<Rule>,
|
|
258
|
+
) -> Result<Option<PersistStatement>, ParseError> {
|
|
259
|
+
let mut mode = None;
|
|
260
|
+
let mut target = None;
|
|
261
|
+
|
|
262
|
+
for inner in pair.into_inner() {
|
|
263
|
+
match inner.as_rule() {
|
|
264
|
+
Rule::persist_mode => {
|
|
265
|
+
mode = Some(parse_persist_mode(inner.as_str()));
|
|
266
|
+
}
|
|
267
|
+
Rule::target_identifier => {
|
|
268
|
+
// Get the identifier inside target_identifier
|
|
269
|
+
for id in inner.into_inner() {
|
|
270
|
+
if id.as_rule() == Rule::identifier {
|
|
271
|
+
target = Some(id.as_str().to_string());
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
_ => {}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
match (mode, target) {
|
|
280
|
+
(Some(mode), Some(target_datasource)) => Ok(Some(PersistStatement {
|
|
281
|
+
mode,
|
|
282
|
+
target_datasource,
|
|
283
|
+
})),
|
|
284
|
+
_ => Err(ParseError::InvalidPersistStructure),
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
fn parse_persist_mode(s: &str) -> PersistMode {
|
|
289
|
+
match s.to_lowercase().as_str() {
|
|
290
|
+
"append" => PersistMode::Append,
|
|
291
|
+
"overwrite" => PersistMode::Overwrite,
|
|
292
|
+
_ => PersistMode::Persist,
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
#[cfg(test)]
|
|
297
|
+
mod tests {
|
|
298
|
+
use super::*;
|
|
299
|
+
|
|
300
|
+
#[test]
|
|
301
|
+
fn test_simple_import() {
|
|
302
|
+
let content = "import models.customer;";
|
|
303
|
+
let parsed = parse_file(content).unwrap();
|
|
304
|
+
assert_eq!(parsed.imports.len(), 1);
|
|
305
|
+
assert_eq!(parsed.imports[0].raw_path, "models.customer");
|
|
306
|
+
assert_eq!(parsed.imports[0].parent_dirs, 0);
|
|
307
|
+
assert!(parsed.imports[0].alias.is_none());
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
#[test]
|
|
311
|
+
fn test_import_with_alias() {
|
|
312
|
+
let content = "import models.customer as cust;";
|
|
313
|
+
let parsed = parse_file(content).unwrap();
|
|
314
|
+
assert_eq!(parsed.imports.len(), 1);
|
|
315
|
+
assert_eq!(parsed.imports[0].raw_path, "models.customer");
|
|
316
|
+
assert_eq!(parsed.imports[0].alias, Some("cust".to_string()));
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
#[test]
|
|
320
|
+
fn test_relative_import() {
|
|
321
|
+
let content = "import ..models.customer;";
|
|
322
|
+
let parsed = parse_file(content).unwrap();
|
|
323
|
+
assert_eq!(parsed.imports.len(), 1);
|
|
324
|
+
assert_eq!(parsed.imports[0].raw_path, "models.customer");
|
|
325
|
+
assert_eq!(parsed.imports[0].parent_dirs, 1);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
#[test]
|
|
329
|
+
fn test_stdlib_import() {
|
|
330
|
+
let content = "import std.aggregates;";
|
|
331
|
+
let parsed = parse_file(content).unwrap();
|
|
332
|
+
assert_eq!(parsed.imports.len(), 1);
|
|
333
|
+
assert!(parsed.imports[0].is_stdlib);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
#[test]
|
|
337
|
+
fn test_datasource_simple() {
|
|
338
|
+
let content = r#"
|
|
339
|
+
datasource orders (
|
|
340
|
+
order_id: key,
|
|
341
|
+
customer_id,
|
|
342
|
+
amount: metric
|
|
343
|
+
)
|
|
344
|
+
address my_database.orders;
|
|
345
|
+
"#;
|
|
346
|
+
let parsed = parse_file(content).unwrap();
|
|
347
|
+
assert_eq!(parsed.datasources.len(), 1);
|
|
348
|
+
assert_eq!(parsed.datasources[0].name, "orders");
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
#[test]
|
|
352
|
+
fn test_datasource_with_grain() {
|
|
353
|
+
let content = r#"
|
|
354
|
+
datasource customers (
|
|
355
|
+
id: customer_id,
|
|
356
|
+
name: customer_name
|
|
357
|
+
)
|
|
358
|
+
grain (customer_id)
|
|
359
|
+
address `my_db.customers`;
|
|
360
|
+
"#;
|
|
361
|
+
let parsed = parse_file(content).unwrap();
|
|
362
|
+
assert_eq!(parsed.datasources.len(), 1);
|
|
363
|
+
assert_eq!(parsed.datasources[0].name, "customers");
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
#[test]
|
|
367
|
+
fn test_auto_persist() {
|
|
368
|
+
let content = "persist orders;";
|
|
369
|
+
let parsed = parse_file(content).unwrap();
|
|
370
|
+
assert_eq!(parsed.persists.len(), 1);
|
|
371
|
+
assert_eq!(parsed.persists[0].target_datasource, "orders");
|
|
372
|
+
assert_eq!(parsed.persists[0].mode, PersistMode::Persist);
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
#[test]
|
|
376
|
+
fn test_auto_persist_with_where() {
|
|
377
|
+
let content = "append orders where status = 'active';";
|
|
378
|
+
let parsed = parse_file(content).unwrap();
|
|
379
|
+
assert_eq!(parsed.persists.len(), 1);
|
|
380
|
+
assert_eq!(parsed.persists[0].target_datasource, "orders");
|
|
381
|
+
assert_eq!(parsed.persists[0].mode, PersistMode::Append);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
#[test]
|
|
385
|
+
fn test_full_persist() {
|
|
386
|
+
let content = "overwrite into target_orders from select order_id, amount;";
|
|
387
|
+
let parsed = parse_file(content).unwrap();
|
|
388
|
+
assert_eq!(parsed.persists.len(), 1);
|
|
389
|
+
assert_eq!(parsed.persists[0].target_datasource, "target_orders");
|
|
390
|
+
assert_eq!(parsed.persists[0].mode, PersistMode::Overwrite);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
#[test]
|
|
394
|
+
fn test_full_persist_with_source() {
|
|
395
|
+
let content = "persist staging into final_orders by customer_id from select *;";
|
|
396
|
+
let parsed = parse_file(content).unwrap();
|
|
397
|
+
assert_eq!(parsed.persists.len(), 1);
|
|
398
|
+
assert_eq!(parsed.persists[0].target_datasource, "final_orders");
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
#[test]
|
|
402
|
+
fn test_mixed_file() {
|
|
403
|
+
let content = r#"
|
|
404
|
+
import models.customer;
|
|
405
|
+
import models.orders as ord;
|
|
406
|
+
|
|
407
|
+
datasource local_orders (
|
|
408
|
+
order_id: key,
|
|
409
|
+
amount: metric
|
|
410
|
+
)
|
|
411
|
+
address local.orders;
|
|
412
|
+
|
|
413
|
+
persist local_orders where date > '2024-01-01';
|
|
414
|
+
|
|
415
|
+
overwrite into aggregated_orders from
|
|
416
|
+
select customer_id, sum(amount) -> total_amount;
|
|
417
|
+
"#;
|
|
418
|
+
let parsed = parse_file(content).unwrap();
|
|
419
|
+
assert_eq!(parsed.imports.len(), 2);
|
|
420
|
+
assert_eq!(parsed.datasources.len(), 1);
|
|
421
|
+
assert_eq!(parsed.persists.len(), 2);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
#[test]
|
|
425
|
+
fn test_multiple_imports() {
|
|
426
|
+
let content = r#"
|
|
427
|
+
import models.customer;
|
|
428
|
+
import models.orders as ord;
|
|
429
|
+
// comment
|
|
430
|
+
import ..shared.utils;
|
|
431
|
+
"#;
|
|
432
|
+
let parsed = parse_file(content).unwrap();
|
|
433
|
+
assert_eq!(parsed.imports.len(), 3);
|
|
434
|
+
}
|
|
435
|
+
}
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
// PreQL Grammar - Dependency Resolution Parser
|
|
2
|
+
// Parses imports, datasources, and persist statements for dependency ordering
|
|
3
|
+
|
|
4
|
+
// Whitespace and comments
|
|
5
|
+
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
|
|
6
|
+
COMMENT = _{ line_comment | block_comment }
|
|
7
|
+
line_comment = { ("#" | "//") ~ (!NEWLINE ~ ANY)* ~ NEWLINE? }
|
|
8
|
+
block_comment = { "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
|
|
9
|
+
NEWLINE = _{ "\n" | "\r\n" }
|
|
10
|
+
|
|
11
|
+
// Entry point - a file can contain multiple statements
|
|
12
|
+
file = { SOI ~ statement* ~ EOI }
|
|
13
|
+
|
|
14
|
+
// Statements we care about for dependency resolution
|
|
15
|
+
statement = { (import_statement | datasource_statement | persist_statement | other_statement) ~ terminator? }
|
|
16
|
+
terminator = { ";" }
|
|
17
|
+
|
|
18
|
+
// =============================================================================
|
|
19
|
+
// IMPORT STATEMENT
|
|
20
|
+
// =============================================================================
|
|
21
|
+
// import path.to.module as alias
|
|
22
|
+
// Supports relative imports with leading dots
|
|
23
|
+
import_statement = {
|
|
24
|
+
^"import" ~
|
|
25
|
+
relative_dots? ~
|
|
26
|
+
import_path ~
|
|
27
|
+
import_alias?
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Relative import dots (each dot goes up one directory after the first)
|
|
31
|
+
relative_dots = @{ "."+ }
|
|
32
|
+
|
|
33
|
+
// Import path: identifier separated by dots
|
|
34
|
+
import_path = @{ identifier ~ ("." ~ identifier)* }
|
|
35
|
+
|
|
36
|
+
// Optional alias: as identifier
|
|
37
|
+
import_alias = { ^"as" ~ identifier }
|
|
38
|
+
|
|
39
|
+
// =============================================================================
|
|
40
|
+
// DATASOURCE STATEMENT
|
|
41
|
+
// =============================================================================
|
|
42
|
+
// datasource name ( column_assignments ) grain? address|query where? ...
|
|
43
|
+
datasource_statement = {
|
|
44
|
+
^"datasource" ~
|
|
45
|
+
identifier ~
|
|
46
|
+
"(" ~ column_assignment_list? ~ ")" ~
|
|
47
|
+
datasource_clauses*
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Column assignments inside datasource
|
|
51
|
+
column_assignment_list = { column_assignment ~ ("," ~ column_assignment)* ~ ","? }
|
|
52
|
+
|
|
53
|
+
column_assignment = {
|
|
54
|
+
(raw_column_assignment | identifier | quoted_identifier | expr_content) ~
|
|
55
|
+
(":" ~ concept_assignment)?
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
raw_column_assignment = { ^"raw" ~ "(" ~ multiline_string ~ ")" }
|
|
59
|
+
|
|
60
|
+
concept_assignment = { shorthand_modifier* ~ dotted_identifier }
|
|
61
|
+
|
|
62
|
+
shorthand_modifier = { "~" | "?" }
|
|
63
|
+
|
|
64
|
+
// Various clauses that can follow datasource column list
|
|
65
|
+
datasource_clauses = _{
|
|
66
|
+
grain_clause |
|
|
67
|
+
complete_clause |
|
|
68
|
+
address_clause |
|
|
69
|
+
query_clause |
|
|
70
|
+
where_clause |
|
|
71
|
+
increment_clause |
|
|
72
|
+
partition_clause |
|
|
73
|
+
status_clause
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
grain_clause = { ^"grain" ~ "(" ~ identifier_list ~ ")" }
|
|
77
|
+
complete_clause = { ^"complete" ~ where_clause }
|
|
78
|
+
address_clause = { ^"address" ~ (quoted_address | dotted_identifier) }
|
|
79
|
+
query_clause = { ^"query" ~ multiline_string }
|
|
80
|
+
where_clause = { ^"where" ~ conditional_content }
|
|
81
|
+
increment_clause = { ^"incremental" ~ ^"by" ~ identifier_list }
|
|
82
|
+
partition_clause = { ^"partition" ~ ^"by" ~ identifier_list }
|
|
83
|
+
status_clause = { ^"state" ~ (^"published" | ^"unpublished") }
|
|
84
|
+
|
|
85
|
+
identifier_list = { identifier ~ ("," ~ identifier)* ~ ","? }
|
|
86
|
+
|
|
87
|
+
quoted_address = { "`" ~ (!"`" ~ ANY)* ~ "`" }
|
|
88
|
+
|
|
89
|
+
// =============================================================================
|
|
90
|
+
// PERSIST STATEMENT
|
|
91
|
+
// =============================================================================
|
|
92
|
+
// Two forms:
|
|
93
|
+
// 1. auto_persist: (append|overwrite|persist) identifier where?
|
|
94
|
+
// 2. full_persist: (append|overwrite|persist) identifier? into identifier by? from select
|
|
95
|
+
|
|
96
|
+
persist_statement = { full_persist | auto_persist }
|
|
97
|
+
|
|
98
|
+
persist_mode = { ^"append" | ^"overwrite" | ^"persist" }
|
|
99
|
+
|
|
100
|
+
// Short form: persist datasource_name where?
|
|
101
|
+
auto_persist = {
|
|
102
|
+
persist_mode ~
|
|
103
|
+
identifier ~
|
|
104
|
+
!^"into" ~
|
|
105
|
+
where_clause?
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Full form: persist into target_datasource by? from select
|
|
109
|
+
full_persist = {
|
|
110
|
+
persist_mode ~
|
|
111
|
+
source_identifier? ~
|
|
112
|
+
into_keyword ~
|
|
113
|
+
target_identifier ~
|
|
114
|
+
persist_partition_clause? ~
|
|
115
|
+
^"from" ~
|
|
116
|
+
select_content
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
into_keyword = { ^"into" }
|
|
120
|
+
source_identifier = { identifier ~ &into_keyword }
|
|
121
|
+
target_identifier = { identifier }
|
|
122
|
+
|
|
123
|
+
persist_partition_clause = { ^"by" ~ identifier_list }
|
|
124
|
+
|
|
125
|
+
// =============================================================================
|
|
126
|
+
// SUPPORT RULES
|
|
127
|
+
// =============================================================================
|
|
128
|
+
|
|
129
|
+
// Conditional content - consume until we hit a keyword boundary
|
|
130
|
+
conditional_content = { conditional_unit+ }
|
|
131
|
+
conditional_unit = _{
|
|
132
|
+
nested_parens |
|
|
133
|
+
string_literal |
|
|
134
|
+
multiline_string |
|
|
135
|
+
// Stop at statement boundaries or clause keywords
|
|
136
|
+
(!(terminator | ^"grain" | ^"complete" | ^"address" | ^"query" |
|
|
137
|
+
^"incremental" | ^"partition" | ^"state" | ^"select" | ^"from" |
|
|
138
|
+
^"into" | ^"order" | ^"limit" | ^"having" | ^"merge" | ^"align") ~ ANY)
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Select content - consume the select statement
|
|
142
|
+
select_content = { select_unit+ }
|
|
143
|
+
select_unit = _{
|
|
144
|
+
nested_parens |
|
|
145
|
+
nested_braces |
|
|
146
|
+
nested_brackets |
|
|
147
|
+
string_literal |
|
|
148
|
+
multiline_string |
|
|
149
|
+
(!(terminator) ~ ANY)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Expression content for column assignments
|
|
153
|
+
expr_content = { expr_unit+ }
|
|
154
|
+
expr_unit = _{
|
|
155
|
+
nested_parens |
|
|
156
|
+
nested_braces |
|
|
157
|
+
nested_brackets |
|
|
158
|
+
string_literal |
|
|
159
|
+
multiline_string |
|
|
160
|
+
(!(":" | "," | ")" | terminator) ~ ANY)
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// =============================================================================
|
|
164
|
+
// OTHER STATEMENTS (skip)
|
|
165
|
+
// =============================================================================
|
|
166
|
+
other_statement = { other_statement_content+ }
|
|
167
|
+
|
|
168
|
+
other_statement_content = _{
|
|
169
|
+
nested_parens |
|
|
170
|
+
nested_braces |
|
|
171
|
+
nested_brackets |
|
|
172
|
+
multiline_string |
|
|
173
|
+
string_literal |
|
|
174
|
+
(!(";" | "(" | ")" | "{" | "}" | "[" | "]" | "'''" | "\"" | "'") ~ ANY)
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// =============================================================================
|
|
178
|
+
// NESTED STRUCTURES
|
|
179
|
+
// =============================================================================
|
|
180
|
+
nested_parens = { "(" ~ nested_content* ~ ")" }
|
|
181
|
+
nested_braces = { "{" ~ nested_content* ~ "}" }
|
|
182
|
+
nested_brackets = { "[" ~ nested_content* ~ "]" }
|
|
183
|
+
|
|
184
|
+
nested_content = _{
|
|
185
|
+
nested_parens |
|
|
186
|
+
nested_braces |
|
|
187
|
+
nested_brackets |
|
|
188
|
+
multiline_string |
|
|
189
|
+
string_literal |
|
|
190
|
+
(!")" ~ !"(" ~ !"{" ~ !"}" ~ !"[" ~ !"]" ~ !"'''" ~ !"\"" ~ !"'" ~ ANY)
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// =============================================================================
|
|
194
|
+
// LITERALS AND IDENTIFIERS
|
|
195
|
+
// =============================================================================
|
|
196
|
+
multiline_string = { "'''" ~ (!"'''" ~ ANY)* ~ "'''" }
|
|
197
|
+
|
|
198
|
+
string_literal = {
|
|
199
|
+
("\"" ~ (!"\"" ~ !"\\" ~ ANY | "\\" ~ ANY)* ~ "\"") |
|
|
200
|
+
("'" ~ !"''" ~ (!"'" ~ !"\\" ~ ANY | "\\" ~ ANY)* ~ "'")
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
identifier = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
|
|
204
|
+
|
|
205
|
+
// Dotted identifier for datasource addresses etc.
|
|
206
|
+
dotted_identifier = @{ identifier ~ ("." ~ identifier)* }
|
|
207
|
+
|
|
208
|
+
quoted_identifier = { "`" ~ (!"`" ~ ANY)* ~ "`" }
|