pytrilogy 0.3.142__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-313-x86_64-linux-gnu.so +0 -0
- pytrilogy-0.3.142.dist-info/METADATA +555 -0
- pytrilogy-0.3.142.dist-info/RECORD +200 -0
- pytrilogy-0.3.142.dist-info/WHEEL +5 -0
- pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +16 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2669 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +501 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +751 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1392 -0
- trilogy/dialect/bigquery.py +308 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +231 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +769 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +303 -0
- trilogy/scripts/common.py +355 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +303 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +512 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +471 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +713 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/run.py +63 -0
- trilogy/scripts/serve.py +140 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +119 -0
- trilogy/scripts/trilogy.py +68 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,716 @@
|
|
|
1
|
+
use crate::parser::{parse_file, DatasourceDeclaration, ImportStatement, ParseError, PersistStatement, ParsedFile};
|
|
2
|
+
use serde::Serialize;
|
|
3
|
+
use std::collections::{HashMap, HashSet, VecDeque};
|
|
4
|
+
use std::fs;
|
|
5
|
+
use std::path::{Path, PathBuf};
|
|
6
|
+
use thiserror::Error;
|
|
7
|
+
|
|
8
|
+
#[derive(Error, Debug)]
|
|
9
|
+
pub enum ResolveError {
|
|
10
|
+
#[error("Failed to read file {path}: {source}")]
|
|
11
|
+
IoError {
|
|
12
|
+
path: PathBuf,
|
|
13
|
+
source: std::io::Error,
|
|
14
|
+
},
|
|
15
|
+
|
|
16
|
+
#[error("Parse error in {path}: {source}")]
|
|
17
|
+
ParseError { path: PathBuf, source: ParseError },
|
|
18
|
+
|
|
19
|
+
#[error("Circular dependency detected: {cycle}")]
|
|
20
|
+
CircularDependency { cycle: String },
|
|
21
|
+
|
|
22
|
+
#[error("Import not found: {import_path} (resolved to {resolved_path})")]
|
|
23
|
+
ImportNotFound {
|
|
24
|
+
import_path: String,
|
|
25
|
+
resolved_path: PathBuf,
|
|
26
|
+
},
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/// Information about a single import
|
|
30
|
+
#[derive(Debug, Clone, Serialize)]
|
|
31
|
+
pub struct ImportInfo {
|
|
32
|
+
pub raw_path: String,
|
|
33
|
+
pub alias: Option<String>,
|
|
34
|
+
pub resolved_path: Option<PathBuf>,
|
|
35
|
+
pub is_stdlib: bool,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
impl From<&ImportStatement> for ImportInfo {
|
|
39
|
+
fn from(stmt: &ImportStatement) -> Self {
|
|
40
|
+
ImportInfo {
|
|
41
|
+
raw_path: stmt.raw_path.clone(),
|
|
42
|
+
alias: stmt.alias.clone(),
|
|
43
|
+
resolved_path: None,
|
|
44
|
+
is_stdlib: stmt.is_stdlib,
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/// Information about a datasource declaration
|
|
50
|
+
#[derive(Debug, Clone, Serialize)]
|
|
51
|
+
pub struct DatasourceInfo {
|
|
52
|
+
pub name: String,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
impl From<&DatasourceDeclaration> for DatasourceInfo {
|
|
56
|
+
fn from(ds: &DatasourceDeclaration) -> Self {
|
|
57
|
+
DatasourceInfo {
|
|
58
|
+
name: ds.name.clone(),
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/// Information about a persist statement
|
|
64
|
+
#[derive(Debug, Clone, Serialize)]
|
|
65
|
+
pub struct PersistInfo {
|
|
66
|
+
pub mode: String,
|
|
67
|
+
pub target_datasource: String,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
impl From<&PersistStatement> for PersistInfo {
|
|
71
|
+
fn from(ps: &PersistStatement) -> Self {
|
|
72
|
+
PersistInfo {
|
|
73
|
+
mode: ps.mode.to_string(),
|
|
74
|
+
target_datasource: ps.target_datasource.clone(),
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/// A node in the dependency graph
|
|
80
|
+
#[derive(Debug, Clone, Serialize)]
|
|
81
|
+
pub struct FileNode {
|
|
82
|
+
/// Absolute path to the file
|
|
83
|
+
pub path: PathBuf,
|
|
84
|
+
/// Relative path from the root
|
|
85
|
+
pub relative_path: PathBuf,
|
|
86
|
+
/// List of imports in this file
|
|
87
|
+
pub imports: Vec<ImportInfo>,
|
|
88
|
+
/// List of datasources declared in this file
|
|
89
|
+
pub datasources: Vec<DatasourceInfo>,
|
|
90
|
+
/// List of persist statements in this file
|
|
91
|
+
pub persists: Vec<PersistInfo>,
|
|
92
|
+
/// List of resolved import dependencies (paths to other files)
|
|
93
|
+
pub import_dependencies: Vec<PathBuf>,
|
|
94
|
+
/// Datasources this file updates (via persist)
|
|
95
|
+
pub updates_datasources: Vec<String>,
|
|
96
|
+
/// Datasources this file declares
|
|
97
|
+
pub declares_datasources: Vec<String>,
|
|
98
|
+
/// Datasources this file depends on (through imports)
|
|
99
|
+
pub depends_on_datasources: Vec<String>,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/// Result of dependency resolution
|
|
103
|
+
#[derive(Debug, Clone, Serialize)]
|
|
104
|
+
pub struct DependencyGraph {
|
|
105
|
+
/// Root file that was analyzed
|
|
106
|
+
pub root: PathBuf,
|
|
107
|
+
/// All files in dependency order (dependencies come before dependents)
|
|
108
|
+
pub order: Vec<PathBuf>,
|
|
109
|
+
/// Detailed information about each file
|
|
110
|
+
pub files: HashMap<PathBuf, FileNode>,
|
|
111
|
+
/// Mapping of datasource names to the file that declares them
|
|
112
|
+
pub datasource_declarations: HashMap<String, PathBuf>,
|
|
113
|
+
/// Mapping of datasource names to files that update them (via persist)
|
|
114
|
+
pub datasource_updaters: HashMap<String, Vec<PathBuf>>,
|
|
115
|
+
/// Any errors encountered (non-fatal)
|
|
116
|
+
pub warnings: Vec<String>,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/// Resolver for PreQL import dependencies
|
|
120
|
+
pub struct ImportResolver {
|
|
121
|
+
/// Cache of parsed files
|
|
122
|
+
parsed_cache: HashMap<PathBuf, ParsedFile>,
|
|
123
|
+
/// Track files currently being processed (for cycle detection)
|
|
124
|
+
#[allow(dead_code)]
|
|
125
|
+
processing: HashSet<PathBuf>,
|
|
126
|
+
/// Track fully processed files
|
|
127
|
+
#[allow(dead_code)]
|
|
128
|
+
processed: HashSet<PathBuf>,
|
|
129
|
+
/// Warnings accumulated during resolution
|
|
130
|
+
warnings: Vec<String>,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
impl ImportResolver {
|
|
134
|
+
pub fn new() -> Self {
|
|
135
|
+
Self {
|
|
136
|
+
parsed_cache: HashMap::new(),
|
|
137
|
+
processing: HashSet::new(),
|
|
138
|
+
processed: HashSet::new(),
|
|
139
|
+
warnings: Vec::new(),
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/// Resolve all dependencies starting from a root file
|
|
144
|
+
pub fn resolve(&mut self, root_path: &Path) -> Result<DependencyGraph, ResolveError> {
|
|
145
|
+
let root_path = fs::canonicalize(root_path).map_err(|e| ResolveError::IoError {
|
|
146
|
+
path: root_path.to_path_buf(),
|
|
147
|
+
source: e,
|
|
148
|
+
})?;
|
|
149
|
+
|
|
150
|
+
let root_dir = root_path.parent().unwrap_or(Path::new("."));
|
|
151
|
+
let mut files: HashMap<PathBuf, FileNode> = HashMap::new();
|
|
152
|
+
|
|
153
|
+
// BFS to collect all files first
|
|
154
|
+
let mut queue: VecDeque<PathBuf> = VecDeque::new();
|
|
155
|
+
let mut seen: HashSet<PathBuf> = HashSet::new();
|
|
156
|
+
|
|
157
|
+
queue.push_back(root_path.clone());
|
|
158
|
+
seen.insert(root_path.clone());
|
|
159
|
+
|
|
160
|
+
while let Some(current_path) = queue.pop_front() {
|
|
161
|
+
let parsed = self.parse_file(¤t_path)?;
|
|
162
|
+
let file_dir = current_path.parent().unwrap_or(Path::new("."));
|
|
163
|
+
|
|
164
|
+
let mut import_infos: Vec<ImportInfo> = Vec::new();
|
|
165
|
+
let mut import_dependencies: Vec<PathBuf> = Vec::new();
|
|
166
|
+
|
|
167
|
+
for import in &parsed.imports {
|
|
168
|
+
let mut info = ImportInfo::from(import);
|
|
169
|
+
|
|
170
|
+
if import.is_stdlib {
|
|
171
|
+
import_infos.push(info);
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if let Some(resolved) = import.resolve(file_dir) {
|
|
176
|
+
if resolved.exists() {
|
|
177
|
+
let canonical =
|
|
178
|
+
fs::canonicalize(&resolved).map_err(|e| ResolveError::IoError {
|
|
179
|
+
path: resolved.clone(),
|
|
180
|
+
source: e,
|
|
181
|
+
})?;
|
|
182
|
+
|
|
183
|
+
info.resolved_path = Some(canonical.clone());
|
|
184
|
+
import_dependencies.push(canonical.clone());
|
|
185
|
+
|
|
186
|
+
if !seen.contains(&canonical) {
|
|
187
|
+
seen.insert(canonical.clone());
|
|
188
|
+
queue.push_back(canonical);
|
|
189
|
+
}
|
|
190
|
+
} else {
|
|
191
|
+
self.warnings.push(format!(
|
|
192
|
+
"Import '{}' in {} resolved to non-existent file: {}",
|
|
193
|
+
import.raw_path,
|
|
194
|
+
current_path.display(),
|
|
195
|
+
resolved.display()
|
|
196
|
+
));
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
import_infos.push(info);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
let datasource_infos: Vec<DatasourceInfo> =
|
|
204
|
+
parsed.datasources.iter().map(DatasourceInfo::from).collect();
|
|
205
|
+
let persist_infos: Vec<PersistInfo> =
|
|
206
|
+
parsed.persists.iter().map(PersistInfo::from).collect();
|
|
207
|
+
|
|
208
|
+
let declares_datasources: Vec<String> =
|
|
209
|
+
parsed.datasources.iter().map(|d| d.name.clone()).collect();
|
|
210
|
+
let updates_datasources: Vec<String> = parsed
|
|
211
|
+
.persists
|
|
212
|
+
.iter()
|
|
213
|
+
.map(|p| p.target_datasource.clone())
|
|
214
|
+
.collect();
|
|
215
|
+
|
|
216
|
+
let relative_path = pathdiff::diff_paths(¤t_path, root_dir)
|
|
217
|
+
.unwrap_or_else(|| current_path.clone());
|
|
218
|
+
|
|
219
|
+
files.insert(
|
|
220
|
+
current_path.clone(),
|
|
221
|
+
FileNode {
|
|
222
|
+
path: current_path,
|
|
223
|
+
relative_path,
|
|
224
|
+
imports: import_infos,
|
|
225
|
+
datasources: datasource_infos,
|
|
226
|
+
persists: persist_infos,
|
|
227
|
+
import_dependencies,
|
|
228
|
+
updates_datasources,
|
|
229
|
+
declares_datasources,
|
|
230
|
+
depends_on_datasources: Vec::new(), // Will be computed later
|
|
231
|
+
},
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Build datasource mappings
|
|
236
|
+
let mut datasource_declarations: HashMap<String, PathBuf> = HashMap::new();
|
|
237
|
+
let mut datasource_updaters: HashMap<String, Vec<PathBuf>> = HashMap::new();
|
|
238
|
+
|
|
239
|
+
for (path, node) in &files {
|
|
240
|
+
for ds_name in &node.declares_datasources {
|
|
241
|
+
if let Some(existing) = datasource_declarations.get(ds_name) {
|
|
242
|
+
self.warnings.push(format!(
|
|
243
|
+
"Datasource '{}' declared in multiple files: {} and {}",
|
|
244
|
+
ds_name,
|
|
245
|
+
existing.display(),
|
|
246
|
+
path.display()
|
|
247
|
+
));
|
|
248
|
+
} else {
|
|
249
|
+
datasource_declarations.insert(ds_name.clone(), path.clone());
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
for ds_name in &node.updates_datasources {
|
|
254
|
+
datasource_updaters
|
|
255
|
+
.entry(ds_name.clone())
|
|
256
|
+
.or_insert_with(Vec::new)
|
|
257
|
+
.push(path.clone());
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Compute transitive datasource dependencies through imports
|
|
262
|
+
self.compute_datasource_dependencies(&mut files, &datasource_declarations);
|
|
263
|
+
|
|
264
|
+
// Topological sort with datasource-aware ordering
|
|
265
|
+
let order =
|
|
266
|
+
self.topological_sort_with_datasources(&files, &datasource_declarations, &datasource_updaters)?;
|
|
267
|
+
|
|
268
|
+
Ok(DependencyGraph {
|
|
269
|
+
root: root_path,
|
|
270
|
+
order,
|
|
271
|
+
files,
|
|
272
|
+
datasource_declarations,
|
|
273
|
+
datasource_updaters,
|
|
274
|
+
warnings: self.warnings.clone(),
|
|
275
|
+
})
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
fn parse_file(&mut self, path: &Path) -> Result<ParsedFile, ResolveError> {
|
|
279
|
+
if let Some(cached) = self.parsed_cache.get(path) {
|
|
280
|
+
return Ok(cached.clone());
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
let content = fs::read_to_string(path).map_err(|e| ResolveError::IoError {
|
|
284
|
+
path: path.to_path_buf(),
|
|
285
|
+
source: e,
|
|
286
|
+
})?;
|
|
287
|
+
|
|
288
|
+
let parsed = parse_file(&content).map_err(|e| ResolveError::ParseError {
|
|
289
|
+
path: path.to_path_buf(),
|
|
290
|
+
source: e,
|
|
291
|
+
})?;
|
|
292
|
+
|
|
293
|
+
self.parsed_cache.insert(path.to_path_buf(), parsed.clone());
|
|
294
|
+
Ok(parsed)
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/// Compute which datasources each file depends on through its import chain
|
|
298
|
+
fn compute_datasource_dependencies(
|
|
299
|
+
&self,
|
|
300
|
+
files: &mut HashMap<PathBuf, FileNode>,
|
|
301
|
+
_datasource_declarations: &HashMap<String, PathBuf>,
|
|
302
|
+
) {
|
|
303
|
+
// For each file, find all datasources reachable through imports
|
|
304
|
+
let paths: Vec<PathBuf> = files.keys().cloned().collect();
|
|
305
|
+
|
|
306
|
+
for path in paths {
|
|
307
|
+
let mut reachable_datasources: HashSet<String> = HashSet::new();
|
|
308
|
+
let mut visited: HashSet<PathBuf> = HashSet::new();
|
|
309
|
+
let mut stack: Vec<PathBuf> = vec![path.clone()];
|
|
310
|
+
|
|
311
|
+
while let Some(current) = stack.pop() {
|
|
312
|
+
if visited.contains(¤t) {
|
|
313
|
+
continue;
|
|
314
|
+
}
|
|
315
|
+
visited.insert(current.clone());
|
|
316
|
+
|
|
317
|
+
if let Some(node) = files.get(¤t) {
|
|
318
|
+
// Add datasources declared in imported files (not the file itself for the starting file)
|
|
319
|
+
if current != path {
|
|
320
|
+
for ds in &node.declares_datasources {
|
|
321
|
+
reachable_datasources.insert(ds.clone());
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Follow imports
|
|
326
|
+
for dep in &node.import_dependencies {
|
|
327
|
+
if !visited.contains(dep) {
|
|
328
|
+
stack.push(dep.clone());
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
if let Some(node) = files.get_mut(&path) {
|
|
335
|
+
node.depends_on_datasources = reachable_datasources.into_iter().collect();
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/// Topological sort with datasource-aware dependency edges
|
|
341
|
+
///
|
|
342
|
+
/// The ordering rules are:
|
|
343
|
+
/// 1. Standard import dependencies (imported files run before importing files) - HIGHEST PRIORITY
|
|
344
|
+
/// 2. Files that UPDATE a datasource (via persist) must run BEFORE files that DECLARE that datasource
|
|
345
|
+
/// BUT ONLY if the updater doesn't import the declarer (import takes precedence)
|
|
346
|
+
/// 3. Files that DECLARE a datasource must run BEFORE files that IMPORT something containing that datasource
|
|
347
|
+
fn topological_sort_with_datasources(
|
|
348
|
+
&self,
|
|
349
|
+
files: &HashMap<PathBuf, FileNode>,
|
|
350
|
+
datasource_declarations: &HashMap<String, PathBuf>,
|
|
351
|
+
datasource_updaters: &HashMap<String, Vec<PathBuf>>,
|
|
352
|
+
) -> Result<Vec<PathBuf>, ResolveError> {
|
|
353
|
+
// Build adjacency list with all dependency edges
|
|
354
|
+
// Edge A -> B means A must be processed before B
|
|
355
|
+
let mut edges: HashMap<PathBuf, HashSet<PathBuf>> = HashMap::new();
|
|
356
|
+
|
|
357
|
+
// Initialize
|
|
358
|
+
for path in files.keys() {
|
|
359
|
+
edges.insert(path.clone(), HashSet::new());
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// Add edges for each dependency type
|
|
363
|
+
for (path, node) in files {
|
|
364
|
+
// Rule 1: Import dependencies - imported file must run before importing file (HIGHEST PRIORITY)
|
|
365
|
+
for dep in &node.import_dependencies {
|
|
366
|
+
if files.contains_key(dep) {
|
|
367
|
+
edges.get_mut(dep).unwrap().insert(path.clone());
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Rule 2: Files that UPDATE a datasource must run BEFORE files that DECLARE it
|
|
372
|
+
// BUT ONLY if the updater doesn't import the declarer
|
|
373
|
+
// If this file declares a datasource, all files that update it must run first
|
|
374
|
+
// (unless they import this file, in which case import dependency takes precedence)
|
|
375
|
+
for ds_name in &node.declares_datasources {
|
|
376
|
+
if let Some(updaters) = datasource_updaters.get(ds_name) {
|
|
377
|
+
for updater_path in updaters {
|
|
378
|
+
if updater_path != path && files.contains_key(updater_path) {
|
|
379
|
+
// Check if updater imports this file (directly or transitively)
|
|
380
|
+
let updater_node = files.get(updater_path).unwrap();
|
|
381
|
+
let imports_declarer = updater_node.import_dependencies.contains(path);
|
|
382
|
+
|
|
383
|
+
// Only add persist-before-declare edge if there's no import dependency
|
|
384
|
+
if !imports_declarer {
|
|
385
|
+
// updater must run before declarer
|
|
386
|
+
edges.get_mut(updater_path).unwrap().insert(path.clone());
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Rule 3: Files that DECLARE a datasource must run BEFORE files that depend on it (through imports)
|
|
394
|
+
// If this file depends on a datasource (through imports), the declaring file must run first
|
|
395
|
+
for ds_name in &node.depends_on_datasources {
|
|
396
|
+
if let Some(declaring_path) = datasource_declarations.get(ds_name) {
|
|
397
|
+
if declaring_path != path && files.contains_key(declaring_path) {
|
|
398
|
+
// declarer must run before dependent
|
|
399
|
+
edges.get_mut(declaring_path).unwrap().insert(path.clone());
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Kahn's algorithm
|
|
406
|
+
let mut in_degree: HashMap<PathBuf, usize> = HashMap::new();
|
|
407
|
+
for path in files.keys() {
|
|
408
|
+
in_degree.insert(path.clone(), 0);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
for dependents in edges.values() {
|
|
412
|
+
for dep in dependents {
|
|
413
|
+
*in_degree.get_mut(dep).unwrap() += 1;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
let mut queue: VecDeque<PathBuf> = VecDeque::new();
|
|
418
|
+
let mut result: Vec<PathBuf> = Vec::new();
|
|
419
|
+
|
|
420
|
+
// Start with nodes that have no incoming edges (no dependencies)
|
|
421
|
+
for (path, °ree) in &in_degree {
|
|
422
|
+
if degree == 0 {
|
|
423
|
+
queue.push_back(path.clone());
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
while let Some(current) = queue.pop_front() {
|
|
428
|
+
result.push(current.clone());
|
|
429
|
+
|
|
430
|
+
if let Some(dependents) = edges.get(¤t) {
|
|
431
|
+
for dependent in dependents {
|
|
432
|
+
let degree = in_degree.get_mut(dependent).unwrap();
|
|
433
|
+
*degree -= 1;
|
|
434
|
+
if *degree == 0 {
|
|
435
|
+
queue.push_back(dependent.clone());
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// Check for cycles
|
|
442
|
+
if result.len() != files.len() {
|
|
443
|
+
let remaining: Vec<_> = files
|
|
444
|
+
.keys()
|
|
445
|
+
.filter(|p| !result.contains(p))
|
|
446
|
+
.map(|p| p.display().to_string())
|
|
447
|
+
.collect();
|
|
448
|
+
return Err(ResolveError::CircularDependency {
|
|
449
|
+
cycle: remaining.join(" -> "),
|
|
450
|
+
});
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
Ok(result)
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
impl Default for ImportResolver {
|
|
458
|
+
fn default() -> Self {
|
|
459
|
+
Self::new()
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
#[cfg(test)]
|
|
464
|
+
mod tests {
|
|
465
|
+
use super::*;
|
|
466
|
+
use std::fs;
|
|
467
|
+
use tempfile::TempDir;
|
|
468
|
+
|
|
469
|
+
fn create_test_file(dir: &Path, name: &str, content: &str) -> PathBuf {
|
|
470
|
+
let path = dir.join(name);
|
|
471
|
+
if let Some(parent) = path.parent() {
|
|
472
|
+
fs::create_dir_all(parent).unwrap();
|
|
473
|
+
}
|
|
474
|
+
fs::write(&path, content).unwrap();
|
|
475
|
+
path
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
#[test]
|
|
479
|
+
fn test_simple_resolution() {
|
|
480
|
+
let temp = TempDir::new().unwrap();
|
|
481
|
+
let root = temp.path();
|
|
482
|
+
|
|
483
|
+
create_test_file(root, "a.preql", "import b;");
|
|
484
|
+
create_test_file(root, "b.preql", "// no imports");
|
|
485
|
+
|
|
486
|
+
let a_path = root.join("a.preql");
|
|
487
|
+
let mut resolver = ImportResolver::new();
|
|
488
|
+
let graph = resolver.resolve(&a_path).unwrap();
|
|
489
|
+
|
|
490
|
+
assert_eq!(graph.order.len(), 2);
|
|
491
|
+
// b should come before a
|
|
492
|
+
let b_idx = graph
|
|
493
|
+
.order
|
|
494
|
+
.iter()
|
|
495
|
+
.position(|p| p.ends_with("b.preql"))
|
|
496
|
+
.unwrap();
|
|
497
|
+
let a_idx = graph
|
|
498
|
+
.order
|
|
499
|
+
.iter()
|
|
500
|
+
.position(|p| p.ends_with("a.preql"))
|
|
501
|
+
.unwrap();
|
|
502
|
+
assert!(b_idx < a_idx, "b should come before a");
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
#[test]
|
|
506
|
+
fn test_datasource_declaration_ordering() {
|
|
507
|
+
let temp = TempDir::new().unwrap();
|
|
508
|
+
let root = temp.path();
|
|
509
|
+
|
|
510
|
+
// a.preql imports b, which declares datasource "orders"
|
|
511
|
+
// c.preql persists to "orders"
|
|
512
|
+
// Order should be: c (updates orders) -> b (declares orders) -> a (imports b which has orders)
|
|
513
|
+
|
|
514
|
+
create_test_file(root, "a.preql", "import b;");
|
|
515
|
+
create_test_file(
|
|
516
|
+
root,
|
|
517
|
+
"b.preql",
|
|
518
|
+
r#"
|
|
519
|
+
datasource orders (
|
|
520
|
+
id: key
|
|
521
|
+
)
|
|
522
|
+
address db.orders;
|
|
523
|
+
"#,
|
|
524
|
+
);
|
|
525
|
+
create_test_file(root, "c.preql", "persist orders;");
|
|
526
|
+
|
|
527
|
+
// Start from a file that imports all others (or just test individual relationships)
|
|
528
|
+
let a_path = root.join("a.preql");
|
|
529
|
+
let mut resolver = ImportResolver::new();
|
|
530
|
+
let graph = resolver.resolve(&a_path).unwrap();
|
|
531
|
+
|
|
532
|
+
// Verify datasource is tracked
|
|
533
|
+
assert!(graph.datasource_declarations.contains_key("orders"));
|
|
534
|
+
|
|
535
|
+
// b should come before a (import dependency)
|
|
536
|
+
let b_idx = graph.order.iter().position(|p| p.ends_with("b.preql")).unwrap();
|
|
537
|
+
let a_idx = graph.order.iter().position(|p| p.ends_with("a.preql")).unwrap();
|
|
538
|
+
assert!(b_idx < a_idx, "b should come before a due to import");
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
#[test]
|
|
542
|
+
fn test_persist_before_declare() {
|
|
543
|
+
let temp = TempDir::new().unwrap();
|
|
544
|
+
let root = temp.path();
|
|
545
|
+
|
|
546
|
+
// updater.preql persists to "orders"
|
|
547
|
+
// declarer.preql declares datasource "orders" and imports updater
|
|
548
|
+
// Order should be: updater (updates orders) -> declarer (declares orders)
|
|
549
|
+
|
|
550
|
+
create_test_file(root, "updater.preql", "persist orders;");
|
|
551
|
+
create_test_file(
|
|
552
|
+
root,
|
|
553
|
+
"declarer.preql",
|
|
554
|
+
r#"
|
|
555
|
+
import updater;
|
|
556
|
+
datasource orders (
|
|
557
|
+
id: key
|
|
558
|
+
)
|
|
559
|
+
address db.orders;
|
|
560
|
+
"#,
|
|
561
|
+
);
|
|
562
|
+
|
|
563
|
+
let declarer_path = root.join("declarer.preql");
|
|
564
|
+
let mut resolver = ImportResolver::new();
|
|
565
|
+
let graph = resolver.resolve(&declarer_path).unwrap();
|
|
566
|
+
|
|
567
|
+
let updater_idx = graph
|
|
568
|
+
.order
|
|
569
|
+
.iter()
|
|
570
|
+
.position(|p| p.ends_with("updater.preql"))
|
|
571
|
+
.unwrap();
|
|
572
|
+
let declarer_idx = graph
|
|
573
|
+
.order
|
|
574
|
+
.iter()
|
|
575
|
+
.position(|p| p.ends_with("declarer.preql"))
|
|
576
|
+
.unwrap();
|
|
577
|
+
|
|
578
|
+
assert!(
|
|
579
|
+
updater_idx < declarer_idx,
|
|
580
|
+
"updater (persist) should come before declarer (datasource)"
|
|
581
|
+
);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
#[test]
|
|
585
|
+
fn test_full_dependency_chain() {
|
|
586
|
+
let temp = TempDir::new().unwrap();
|
|
587
|
+
let root = temp.path();
|
|
588
|
+
|
|
589
|
+
// Setup:
|
|
590
|
+
// - base.preql: declares datasource "orders"
|
|
591
|
+
// - updater.preql: persists to "orders" (doesn't import base)
|
|
592
|
+
// - consumer.preql: imports base (uses orders datasource)
|
|
593
|
+
//
|
|
594
|
+
// Expected order: updater -> base -> consumer
|
|
595
|
+
// Because:
|
|
596
|
+
// - updater updates orders, so must run before base (which declares it)
|
|
597
|
+
// - base declares orders, so must run before consumer (which imports base and thus depends on orders)
|
|
598
|
+
|
|
599
|
+
create_test_file(
|
|
600
|
+
root,
|
|
601
|
+
"base.preql",
|
|
602
|
+
r#"
|
|
603
|
+
datasource orders (
|
|
604
|
+
id: key,
|
|
605
|
+
amount: metric
|
|
606
|
+
)
|
|
607
|
+
address db.orders;
|
|
608
|
+
"#,
|
|
609
|
+
);
|
|
610
|
+
create_test_file(
|
|
611
|
+
root,
|
|
612
|
+
"updater.preql",
|
|
613
|
+
r#"
|
|
614
|
+
persist orders where amount > 100;
|
|
615
|
+
"#,
|
|
616
|
+
);
|
|
617
|
+
create_test_file(
|
|
618
|
+
root,
|
|
619
|
+
"consumer.preql",
|
|
620
|
+
r#"
|
|
621
|
+
import base;
|
|
622
|
+
// uses orders datasource
|
|
623
|
+
"#,
|
|
624
|
+
);
|
|
625
|
+
|
|
626
|
+
// Create an entry point that imports everything
|
|
627
|
+
create_test_file(
|
|
628
|
+
root,
|
|
629
|
+
"main.preql",
|
|
630
|
+
r#"
|
|
631
|
+
import updater;
|
|
632
|
+
import consumer;
|
|
633
|
+
"#,
|
|
634
|
+
);
|
|
635
|
+
|
|
636
|
+
let main_path = root.join("main.preql");
|
|
637
|
+
let mut resolver = ImportResolver::new();
|
|
638
|
+
let graph = resolver.resolve(&main_path).unwrap();
|
|
639
|
+
|
|
640
|
+
assert_eq!(graph.order.len(), 4);
|
|
641
|
+
|
|
642
|
+
let updater_idx = graph
|
|
643
|
+
.order
|
|
644
|
+
.iter()
|
|
645
|
+
.position(|p| p.ends_with("updater.preql"))
|
|
646
|
+
.unwrap();
|
|
647
|
+
let base_idx = graph
|
|
648
|
+
.order
|
|
649
|
+
.iter()
|
|
650
|
+
.position(|p| p.ends_with("base.preql"))
|
|
651
|
+
.unwrap();
|
|
652
|
+
let consumer_idx = graph
|
|
653
|
+
.order
|
|
654
|
+
.iter()
|
|
655
|
+
.position(|p| p.ends_with("consumer.preql"))
|
|
656
|
+
.unwrap();
|
|
657
|
+
let main_idx = graph
|
|
658
|
+
.order
|
|
659
|
+
.iter()
|
|
660
|
+
.position(|p| p.ends_with("main.preql"))
|
|
661
|
+
.unwrap();
|
|
662
|
+
|
|
663
|
+
// updater must come before base (persist before declare)
|
|
664
|
+
assert!(
|
|
665
|
+
updater_idx < base_idx,
|
|
666
|
+
"updater should come before base: updater={}, base={}",
|
|
667
|
+
updater_idx,
|
|
668
|
+
base_idx
|
|
669
|
+
);
|
|
670
|
+
|
|
671
|
+
// base must come before consumer (consumer imports base which has the datasource)
|
|
672
|
+
assert!(
|
|
673
|
+
base_idx < consumer_idx,
|
|
674
|
+
"base should come before consumer: base={}, consumer={}",
|
|
675
|
+
base_idx,
|
|
676
|
+
consumer_idx
|
|
677
|
+
);
|
|
678
|
+
|
|
679
|
+
// main comes last (imports everything)
|
|
680
|
+
assert!(
|
|
681
|
+
updater_idx < main_idx && base_idx < main_idx && consumer_idx < main_idx,
|
|
682
|
+
"main should come after all others"
|
|
683
|
+
);
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
#[test]
|
|
687
|
+
fn test_multiple_datasources() {
|
|
688
|
+
let temp = TempDir::new().unwrap();
|
|
689
|
+
let root = temp.path();
|
|
690
|
+
|
|
691
|
+
create_test_file(
|
|
692
|
+
root,
|
|
693
|
+
"models.preql",
|
|
694
|
+
r#"
|
|
695
|
+
datasource customers (
|
|
696
|
+
id: key
|
|
697
|
+
)
|
|
698
|
+
address db.customers;
|
|
699
|
+
|
|
700
|
+
datasource orders (
|
|
701
|
+
id: key,
|
|
702
|
+
customer_id
|
|
703
|
+
)
|
|
704
|
+
address db.orders;
|
|
705
|
+
"#,
|
|
706
|
+
);
|
|
707
|
+
|
|
708
|
+
let models_path = root.join("models.preql");
|
|
709
|
+
let mut resolver = ImportResolver::new();
|
|
710
|
+
let graph = resolver.resolve(&models_path).unwrap();
|
|
711
|
+
|
|
712
|
+
assert_eq!(graph.datasource_declarations.len(), 2);
|
|
713
|
+
assert!(graph.datasource_declarations.contains_key("customers"));
|
|
714
|
+
assert!(graph.datasource_declarations.contains_key("orders"));
|
|
715
|
+
}
|
|
716
|
+
}
|