pytrilogy 0.3.142__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp312-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.142.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.142.dist-info/RECORD +200 -0
  6. pytrilogy-0.3.142.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +16 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2669 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +501 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +751 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1392 -0
  112. trilogy/dialect/bigquery.py +308 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +231 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +769 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +9 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/agent.py +41 -0
  148. trilogy/scripts/agent_info.py +303 -0
  149. trilogy/scripts/common.py +355 -0
  150. trilogy/scripts/dependency/Cargo.lock +617 -0
  151. trilogy/scripts/dependency/Cargo.toml +39 -0
  152. trilogy/scripts/dependency/README.md +131 -0
  153. trilogy/scripts/dependency/build.sh +25 -0
  154. trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
  155. trilogy/scripts/dependency/src/lib.rs +16 -0
  156. trilogy/scripts/dependency/src/main.rs +770 -0
  157. trilogy/scripts/dependency/src/parser.rs +435 -0
  158. trilogy/scripts/dependency/src/preql.pest +208 -0
  159. trilogy/scripts/dependency/src/python_bindings.rs +303 -0
  160. trilogy/scripts/dependency/src/resolver.rs +716 -0
  161. trilogy/scripts/dependency/tests/base.preql +3 -0
  162. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  163. trilogy/scripts/dependency/tests/customer.preql +6 -0
  164. trilogy/scripts/dependency/tests/main.preql +9 -0
  165. trilogy/scripts/dependency/tests/orders.preql +7 -0
  166. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  167. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  168. trilogy/scripts/dependency.py +323 -0
  169. trilogy/scripts/display.py +512 -0
  170. trilogy/scripts/environment.py +46 -0
  171. trilogy/scripts/fmt.py +32 -0
  172. trilogy/scripts/ingest.py +471 -0
  173. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  174. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  175. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  176. trilogy/scripts/ingest_helpers/typing.py +161 -0
  177. trilogy/scripts/init.py +105 -0
  178. trilogy/scripts/parallel_execution.py +713 -0
  179. trilogy/scripts/plan.py +189 -0
  180. trilogy/scripts/run.py +63 -0
  181. trilogy/scripts/serve.py +140 -0
  182. trilogy/scripts/serve_helpers/__init__.py +41 -0
  183. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  184. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  185. trilogy/scripts/serve_helpers/models.py +38 -0
  186. trilogy/scripts/single_execution.py +131 -0
  187. trilogy/scripts/testing.py +119 -0
  188. trilogy/scripts/trilogy.py +68 -0
  189. trilogy/std/__init__.py +0 -0
  190. trilogy/std/color.preql +3 -0
  191. trilogy/std/date.preql +13 -0
  192. trilogy/std/display.preql +18 -0
  193. trilogy/std/geography.preql +22 -0
  194. trilogy/std/metric.preql +15 -0
  195. trilogy/std/money.preql +67 -0
  196. trilogy/std/net.preql +14 -0
  197. trilogy/std/ranking.preql +7 -0
  198. trilogy/std/report.preql +5 -0
  199. trilogy/std/semantic.preql +6 -0
  200. trilogy/utility.py +34 -0
@@ -0,0 +1,716 @@
1
+ use crate::parser::{parse_file, DatasourceDeclaration, ImportStatement, ParseError, PersistStatement, ParsedFile};
2
+ use serde::Serialize;
3
+ use std::collections::{HashMap, HashSet, VecDeque};
4
+ use std::fs;
5
+ use std::path::{Path, PathBuf};
6
+ use thiserror::Error;
7
+
8
+ #[derive(Error, Debug)]
9
+ pub enum ResolveError {
10
+ #[error("Failed to read file {path}: {source}")]
11
+ IoError {
12
+ path: PathBuf,
13
+ source: std::io::Error,
14
+ },
15
+
16
+ #[error("Parse error in {path}: {source}")]
17
+ ParseError { path: PathBuf, source: ParseError },
18
+
19
+ #[error("Circular dependency detected: {cycle}")]
20
+ CircularDependency { cycle: String },
21
+
22
+ #[error("Import not found: {import_path} (resolved to {resolved_path})")]
23
+ ImportNotFound {
24
+ import_path: String,
25
+ resolved_path: PathBuf,
26
+ },
27
+ }
28
+
29
+ /// Information about a single import
30
+ #[derive(Debug, Clone, Serialize)]
31
+ pub struct ImportInfo {
32
+ pub raw_path: String,
33
+ pub alias: Option<String>,
34
+ pub resolved_path: Option<PathBuf>,
35
+ pub is_stdlib: bool,
36
+ }
37
+
38
+ impl From<&ImportStatement> for ImportInfo {
39
+ fn from(stmt: &ImportStatement) -> Self {
40
+ ImportInfo {
41
+ raw_path: stmt.raw_path.clone(),
42
+ alias: stmt.alias.clone(),
43
+ resolved_path: None,
44
+ is_stdlib: stmt.is_stdlib,
45
+ }
46
+ }
47
+ }
48
+
49
+ /// Information about a datasource declaration
50
+ #[derive(Debug, Clone, Serialize)]
51
+ pub struct DatasourceInfo {
52
+ pub name: String,
53
+ }
54
+
55
+ impl From<&DatasourceDeclaration> for DatasourceInfo {
56
+ fn from(ds: &DatasourceDeclaration) -> Self {
57
+ DatasourceInfo {
58
+ name: ds.name.clone(),
59
+ }
60
+ }
61
+ }
62
+
63
+ /// Information about a persist statement
64
+ #[derive(Debug, Clone, Serialize)]
65
+ pub struct PersistInfo {
66
+ pub mode: String,
67
+ pub target_datasource: String,
68
+ }
69
+
70
+ impl From<&PersistStatement> for PersistInfo {
71
+ fn from(ps: &PersistStatement) -> Self {
72
+ PersistInfo {
73
+ mode: ps.mode.to_string(),
74
+ target_datasource: ps.target_datasource.clone(),
75
+ }
76
+ }
77
+ }
78
+
79
+ /// A node in the dependency graph
80
+ #[derive(Debug, Clone, Serialize)]
81
+ pub struct FileNode {
82
+ /// Absolute path to the file
83
+ pub path: PathBuf,
84
+ /// Relative path from the root
85
+ pub relative_path: PathBuf,
86
+ /// List of imports in this file
87
+ pub imports: Vec<ImportInfo>,
88
+ /// List of datasources declared in this file
89
+ pub datasources: Vec<DatasourceInfo>,
90
+ /// List of persist statements in this file
91
+ pub persists: Vec<PersistInfo>,
92
+ /// List of resolved import dependencies (paths to other files)
93
+ pub import_dependencies: Vec<PathBuf>,
94
+ /// Datasources this file updates (via persist)
95
+ pub updates_datasources: Vec<String>,
96
+ /// Datasources this file declares
97
+ pub declares_datasources: Vec<String>,
98
+ /// Datasources this file depends on (through imports)
99
+ pub depends_on_datasources: Vec<String>,
100
+ }
101
+
102
+ /// Result of dependency resolution
103
+ #[derive(Debug, Clone, Serialize)]
104
+ pub struct DependencyGraph {
105
+ /// Root file that was analyzed
106
+ pub root: PathBuf,
107
+ /// All files in dependency order (dependencies come before dependents)
108
+ pub order: Vec<PathBuf>,
109
+ /// Detailed information about each file
110
+ pub files: HashMap<PathBuf, FileNode>,
111
+ /// Mapping of datasource names to the file that declares them
112
+ pub datasource_declarations: HashMap<String, PathBuf>,
113
+ /// Mapping of datasource names to files that update them (via persist)
114
+ pub datasource_updaters: HashMap<String, Vec<PathBuf>>,
115
+ /// Any errors encountered (non-fatal)
116
+ pub warnings: Vec<String>,
117
+ }
118
+
119
+ /// Resolver for PreQL import dependencies
120
+ pub struct ImportResolver {
121
+ /// Cache of parsed files
122
+ parsed_cache: HashMap<PathBuf, ParsedFile>,
123
+ /// Track files currently being processed (for cycle detection)
124
+ #[allow(dead_code)]
125
+ processing: HashSet<PathBuf>,
126
+ /// Track fully processed files
127
+ #[allow(dead_code)]
128
+ processed: HashSet<PathBuf>,
129
+ /// Warnings accumulated during resolution
130
+ warnings: Vec<String>,
131
+ }
132
+
133
+ impl ImportResolver {
134
+ pub fn new() -> Self {
135
+ Self {
136
+ parsed_cache: HashMap::new(),
137
+ processing: HashSet::new(),
138
+ processed: HashSet::new(),
139
+ warnings: Vec::new(),
140
+ }
141
+ }
142
+
143
+ /// Resolve all dependencies starting from a root file
144
+ pub fn resolve(&mut self, root_path: &Path) -> Result<DependencyGraph, ResolveError> {
145
+ let root_path = fs::canonicalize(root_path).map_err(|e| ResolveError::IoError {
146
+ path: root_path.to_path_buf(),
147
+ source: e,
148
+ })?;
149
+
150
+ let root_dir = root_path.parent().unwrap_or(Path::new("."));
151
+ let mut files: HashMap<PathBuf, FileNode> = HashMap::new();
152
+
153
+ // BFS to collect all files first
154
+ let mut queue: VecDeque<PathBuf> = VecDeque::new();
155
+ let mut seen: HashSet<PathBuf> = HashSet::new();
156
+
157
+ queue.push_back(root_path.clone());
158
+ seen.insert(root_path.clone());
159
+
160
+ while let Some(current_path) = queue.pop_front() {
161
+ let parsed = self.parse_file(&current_path)?;
162
+ let file_dir = current_path.parent().unwrap_or(Path::new("."));
163
+
164
+ let mut import_infos: Vec<ImportInfo> = Vec::new();
165
+ let mut import_dependencies: Vec<PathBuf> = Vec::new();
166
+
167
+ for import in &parsed.imports {
168
+ let mut info = ImportInfo::from(import);
169
+
170
+ if import.is_stdlib {
171
+ import_infos.push(info);
172
+ continue;
173
+ }
174
+
175
+ if let Some(resolved) = import.resolve(file_dir) {
176
+ if resolved.exists() {
177
+ let canonical =
178
+ fs::canonicalize(&resolved).map_err(|e| ResolveError::IoError {
179
+ path: resolved.clone(),
180
+ source: e,
181
+ })?;
182
+
183
+ info.resolved_path = Some(canonical.clone());
184
+ import_dependencies.push(canonical.clone());
185
+
186
+ if !seen.contains(&canonical) {
187
+ seen.insert(canonical.clone());
188
+ queue.push_back(canonical);
189
+ }
190
+ } else {
191
+ self.warnings.push(format!(
192
+ "Import '{}' in {} resolved to non-existent file: {}",
193
+ import.raw_path,
194
+ current_path.display(),
195
+ resolved.display()
196
+ ));
197
+ }
198
+ }
199
+
200
+ import_infos.push(info);
201
+ }
202
+
203
+ let datasource_infos: Vec<DatasourceInfo> =
204
+ parsed.datasources.iter().map(DatasourceInfo::from).collect();
205
+ let persist_infos: Vec<PersistInfo> =
206
+ parsed.persists.iter().map(PersistInfo::from).collect();
207
+
208
+ let declares_datasources: Vec<String> =
209
+ parsed.datasources.iter().map(|d| d.name.clone()).collect();
210
+ let updates_datasources: Vec<String> = parsed
211
+ .persists
212
+ .iter()
213
+ .map(|p| p.target_datasource.clone())
214
+ .collect();
215
+
216
+ let relative_path = pathdiff::diff_paths(&current_path, root_dir)
217
+ .unwrap_or_else(|| current_path.clone());
218
+
219
+ files.insert(
220
+ current_path.clone(),
221
+ FileNode {
222
+ path: current_path,
223
+ relative_path,
224
+ imports: import_infos,
225
+ datasources: datasource_infos,
226
+ persists: persist_infos,
227
+ import_dependencies,
228
+ updates_datasources,
229
+ declares_datasources,
230
+ depends_on_datasources: Vec::new(), // Will be computed later
231
+ },
232
+ );
233
+ }
234
+
235
+ // Build datasource mappings
236
+ let mut datasource_declarations: HashMap<String, PathBuf> = HashMap::new();
237
+ let mut datasource_updaters: HashMap<String, Vec<PathBuf>> = HashMap::new();
238
+
239
+ for (path, node) in &files {
240
+ for ds_name in &node.declares_datasources {
241
+ if let Some(existing) = datasource_declarations.get(ds_name) {
242
+ self.warnings.push(format!(
243
+ "Datasource '{}' declared in multiple files: {} and {}",
244
+ ds_name,
245
+ existing.display(),
246
+ path.display()
247
+ ));
248
+ } else {
249
+ datasource_declarations.insert(ds_name.clone(), path.clone());
250
+ }
251
+ }
252
+
253
+ for ds_name in &node.updates_datasources {
254
+ datasource_updaters
255
+ .entry(ds_name.clone())
256
+ .or_insert_with(Vec::new)
257
+ .push(path.clone());
258
+ }
259
+ }
260
+
261
+ // Compute transitive datasource dependencies through imports
262
+ self.compute_datasource_dependencies(&mut files, &datasource_declarations);
263
+
264
+ // Topological sort with datasource-aware ordering
265
+ let order =
266
+ self.topological_sort_with_datasources(&files, &datasource_declarations, &datasource_updaters)?;
267
+
268
+ Ok(DependencyGraph {
269
+ root: root_path,
270
+ order,
271
+ files,
272
+ datasource_declarations,
273
+ datasource_updaters,
274
+ warnings: self.warnings.clone(),
275
+ })
276
+ }
277
+
278
+ fn parse_file(&mut self, path: &Path) -> Result<ParsedFile, ResolveError> {
279
+ if let Some(cached) = self.parsed_cache.get(path) {
280
+ return Ok(cached.clone());
281
+ }
282
+
283
+ let content = fs::read_to_string(path).map_err(|e| ResolveError::IoError {
284
+ path: path.to_path_buf(),
285
+ source: e,
286
+ })?;
287
+
288
+ let parsed = parse_file(&content).map_err(|e| ResolveError::ParseError {
289
+ path: path.to_path_buf(),
290
+ source: e,
291
+ })?;
292
+
293
+ self.parsed_cache.insert(path.to_path_buf(), parsed.clone());
294
+ Ok(parsed)
295
+ }
296
+
297
+ /// Compute which datasources each file depends on through its import chain
298
+ fn compute_datasource_dependencies(
299
+ &self,
300
+ files: &mut HashMap<PathBuf, FileNode>,
301
+ _datasource_declarations: &HashMap<String, PathBuf>,
302
+ ) {
303
+ // For each file, find all datasources reachable through imports
304
+ let paths: Vec<PathBuf> = files.keys().cloned().collect();
305
+
306
+ for path in paths {
307
+ let mut reachable_datasources: HashSet<String> = HashSet::new();
308
+ let mut visited: HashSet<PathBuf> = HashSet::new();
309
+ let mut stack: Vec<PathBuf> = vec![path.clone()];
310
+
311
+ while let Some(current) = stack.pop() {
312
+ if visited.contains(&current) {
313
+ continue;
314
+ }
315
+ visited.insert(current.clone());
316
+
317
+ if let Some(node) = files.get(&current) {
318
+ // Add datasources declared in imported files (not the file itself for the starting file)
319
+ if current != path {
320
+ for ds in &node.declares_datasources {
321
+ reachable_datasources.insert(ds.clone());
322
+ }
323
+ }
324
+
325
+ // Follow imports
326
+ for dep in &node.import_dependencies {
327
+ if !visited.contains(dep) {
328
+ stack.push(dep.clone());
329
+ }
330
+ }
331
+ }
332
+ }
333
+
334
+ if let Some(node) = files.get_mut(&path) {
335
+ node.depends_on_datasources = reachable_datasources.into_iter().collect();
336
+ }
337
+ }
338
+ }
339
+
340
+ /// Topological sort with datasource-aware dependency edges
341
+ ///
342
+ /// The ordering rules are:
343
+ /// 1. Standard import dependencies (imported files run before importing files) - HIGHEST PRIORITY
344
+ /// 2. Files that UPDATE a datasource (via persist) must run BEFORE files that DECLARE that datasource
345
+ /// BUT ONLY if the updater doesn't import the declarer (import takes precedence)
346
+ /// 3. Files that DECLARE a datasource must run BEFORE files that IMPORT something containing that datasource
347
+ fn topological_sort_with_datasources(
348
+ &self,
349
+ files: &HashMap<PathBuf, FileNode>,
350
+ datasource_declarations: &HashMap<String, PathBuf>,
351
+ datasource_updaters: &HashMap<String, Vec<PathBuf>>,
352
+ ) -> Result<Vec<PathBuf>, ResolveError> {
353
+ // Build adjacency list with all dependency edges
354
+ // Edge A -> B means A must be processed before B
355
+ let mut edges: HashMap<PathBuf, HashSet<PathBuf>> = HashMap::new();
356
+
357
+ // Initialize
358
+ for path in files.keys() {
359
+ edges.insert(path.clone(), HashSet::new());
360
+ }
361
+
362
+ // Add edges for each dependency type
363
+ for (path, node) in files {
364
+ // Rule 1: Import dependencies - imported file must run before importing file (HIGHEST PRIORITY)
365
+ for dep in &node.import_dependencies {
366
+ if files.contains_key(dep) {
367
+ edges.get_mut(dep).unwrap().insert(path.clone());
368
+ }
369
+ }
370
+
371
+ // Rule 2: Files that UPDATE a datasource must run BEFORE files that DECLARE it
372
+ // BUT ONLY if the updater doesn't import the declarer
373
+ // If this file declares a datasource, all files that update it must run first
374
+ // (unless they import this file, in which case import dependency takes precedence)
375
+ for ds_name in &node.declares_datasources {
376
+ if let Some(updaters) = datasource_updaters.get(ds_name) {
377
+ for updater_path in updaters {
378
+ if updater_path != path && files.contains_key(updater_path) {
379
+ // Check if updater imports this file (directly or transitively)
380
+ let updater_node = files.get(updater_path).unwrap();
381
+ let imports_declarer = updater_node.import_dependencies.contains(path);
382
+
383
+ // Only add persist-before-declare edge if there's no import dependency
384
+ if !imports_declarer {
385
+ // updater must run before declarer
386
+ edges.get_mut(updater_path).unwrap().insert(path.clone());
387
+ }
388
+ }
389
+ }
390
+ }
391
+ }
392
+
393
+ // Rule 3: Files that DECLARE a datasource must run BEFORE files that depend on it (through imports)
394
+ // If this file depends on a datasource (through imports), the declaring file must run first
395
+ for ds_name in &node.depends_on_datasources {
396
+ if let Some(declaring_path) = datasource_declarations.get(ds_name) {
397
+ if declaring_path != path && files.contains_key(declaring_path) {
398
+ // declarer must run before dependent
399
+ edges.get_mut(declaring_path).unwrap().insert(path.clone());
400
+ }
401
+ }
402
+ }
403
+ }
404
+
405
+ // Kahn's algorithm
406
+ let mut in_degree: HashMap<PathBuf, usize> = HashMap::new();
407
+ for path in files.keys() {
408
+ in_degree.insert(path.clone(), 0);
409
+ }
410
+
411
+ for dependents in edges.values() {
412
+ for dep in dependents {
413
+ *in_degree.get_mut(dep).unwrap() += 1;
414
+ }
415
+ }
416
+
417
+ let mut queue: VecDeque<PathBuf> = VecDeque::new();
418
+ let mut result: Vec<PathBuf> = Vec::new();
419
+
420
+ // Start with nodes that have no incoming edges (no dependencies)
421
+ for (path, &degree) in &in_degree {
422
+ if degree == 0 {
423
+ queue.push_back(path.clone());
424
+ }
425
+ }
426
+
427
+ while let Some(current) = queue.pop_front() {
428
+ result.push(current.clone());
429
+
430
+ if let Some(dependents) = edges.get(&current) {
431
+ for dependent in dependents {
432
+ let degree = in_degree.get_mut(dependent).unwrap();
433
+ *degree -= 1;
434
+ if *degree == 0 {
435
+ queue.push_back(dependent.clone());
436
+ }
437
+ }
438
+ }
439
+ }
440
+
441
+ // Check for cycles
442
+ if result.len() != files.len() {
443
+ let remaining: Vec<_> = files
444
+ .keys()
445
+ .filter(|p| !result.contains(p))
446
+ .map(|p| p.display().to_string())
447
+ .collect();
448
+ return Err(ResolveError::CircularDependency {
449
+ cycle: remaining.join(" -> "),
450
+ });
451
+ }
452
+
453
+ Ok(result)
454
+ }
455
+ }
456
+
457
+ impl Default for ImportResolver {
458
+ fn default() -> Self {
459
+ Self::new()
460
+ }
461
+ }
462
+
463
+ #[cfg(test)]
464
+ mod tests {
465
+ use super::*;
466
+ use std::fs;
467
+ use tempfile::TempDir;
468
+
469
+ fn create_test_file(dir: &Path, name: &str, content: &str) -> PathBuf {
470
+ let path = dir.join(name);
471
+ if let Some(parent) = path.parent() {
472
+ fs::create_dir_all(parent).unwrap();
473
+ }
474
+ fs::write(&path, content).unwrap();
475
+ path
476
+ }
477
+
478
+ #[test]
479
+ fn test_simple_resolution() {
480
+ let temp = TempDir::new().unwrap();
481
+ let root = temp.path();
482
+
483
+ create_test_file(root, "a.preql", "import b;");
484
+ create_test_file(root, "b.preql", "// no imports");
485
+
486
+ let a_path = root.join("a.preql");
487
+ let mut resolver = ImportResolver::new();
488
+ let graph = resolver.resolve(&a_path).unwrap();
489
+
490
+ assert_eq!(graph.order.len(), 2);
491
+ // b should come before a
492
+ let b_idx = graph
493
+ .order
494
+ .iter()
495
+ .position(|p| p.ends_with("b.preql"))
496
+ .unwrap();
497
+ let a_idx = graph
498
+ .order
499
+ .iter()
500
+ .position(|p| p.ends_with("a.preql"))
501
+ .unwrap();
502
+ assert!(b_idx < a_idx, "b should come before a");
503
+ }
504
+
505
+ #[test]
506
+ fn test_datasource_declaration_ordering() {
507
+ let temp = TempDir::new().unwrap();
508
+ let root = temp.path();
509
+
510
+ // a.preql imports b, which declares datasource "orders"
511
+ // c.preql persists to "orders"
512
+ // Order should be: c (updates orders) -> b (declares orders) -> a (imports b which has orders)
513
+
514
+ create_test_file(root, "a.preql", "import b;");
515
+ create_test_file(
516
+ root,
517
+ "b.preql",
518
+ r#"
519
+ datasource orders (
520
+ id: key
521
+ )
522
+ address db.orders;
523
+ "#,
524
+ );
525
+ create_test_file(root, "c.preql", "persist orders;");
526
+
527
+ // Start from a file that imports all others (or just test individual relationships)
528
+ let a_path = root.join("a.preql");
529
+ let mut resolver = ImportResolver::new();
530
+ let graph = resolver.resolve(&a_path).unwrap();
531
+
532
+ // Verify datasource is tracked
533
+ assert!(graph.datasource_declarations.contains_key("orders"));
534
+
535
+ // b should come before a (import dependency)
536
+ let b_idx = graph.order.iter().position(|p| p.ends_with("b.preql")).unwrap();
537
+ let a_idx = graph.order.iter().position(|p| p.ends_with("a.preql")).unwrap();
538
+ assert!(b_idx < a_idx, "b should come before a due to import");
539
+ }
540
+
541
+ #[test]
542
+ fn test_persist_before_declare() {
543
+ let temp = TempDir::new().unwrap();
544
+ let root = temp.path();
545
+
546
+ // updater.preql persists to "orders"
547
+ // declarer.preql declares datasource "orders" and imports updater
548
+ // Order should be: updater (updates orders) -> declarer (declares orders)
549
+
550
+ create_test_file(root, "updater.preql", "persist orders;");
551
+ create_test_file(
552
+ root,
553
+ "declarer.preql",
554
+ r#"
555
+ import updater;
556
+ datasource orders (
557
+ id: key
558
+ )
559
+ address db.orders;
560
+ "#,
561
+ );
562
+
563
+ let declarer_path = root.join("declarer.preql");
564
+ let mut resolver = ImportResolver::new();
565
+ let graph = resolver.resolve(&declarer_path).unwrap();
566
+
567
+ let updater_idx = graph
568
+ .order
569
+ .iter()
570
+ .position(|p| p.ends_with("updater.preql"))
571
+ .unwrap();
572
+ let declarer_idx = graph
573
+ .order
574
+ .iter()
575
+ .position(|p| p.ends_with("declarer.preql"))
576
+ .unwrap();
577
+
578
+ assert!(
579
+ updater_idx < declarer_idx,
580
+ "updater (persist) should come before declarer (datasource)"
581
+ );
582
+ }
583
+
584
+ #[test]
585
+ fn test_full_dependency_chain() {
586
+ let temp = TempDir::new().unwrap();
587
+ let root = temp.path();
588
+
589
+ // Setup:
590
+ // - base.preql: declares datasource "orders"
591
+ // - updater.preql: persists to "orders" (doesn't import base)
592
+ // - consumer.preql: imports base (uses orders datasource)
593
+ //
594
+ // Expected order: updater -> base -> consumer
595
+ // Because:
596
+ // - updater updates orders, so must run before base (which declares it)
597
+ // - base declares orders, so must run before consumer (which imports base and thus depends on orders)
598
+
599
+ create_test_file(
600
+ root,
601
+ "base.preql",
602
+ r#"
603
+ datasource orders (
604
+ id: key,
605
+ amount: metric
606
+ )
607
+ address db.orders;
608
+ "#,
609
+ );
610
+ create_test_file(
611
+ root,
612
+ "updater.preql",
613
+ r#"
614
+ persist orders where amount > 100;
615
+ "#,
616
+ );
617
+ create_test_file(
618
+ root,
619
+ "consumer.preql",
620
+ r#"
621
+ import base;
622
+ // uses orders datasource
623
+ "#,
624
+ );
625
+
626
+ // Create an entry point that imports everything
627
+ create_test_file(
628
+ root,
629
+ "main.preql",
630
+ r#"
631
+ import updater;
632
+ import consumer;
633
+ "#,
634
+ );
635
+
636
+ let main_path = root.join("main.preql");
637
+ let mut resolver = ImportResolver::new();
638
+ let graph = resolver.resolve(&main_path).unwrap();
639
+
640
+ assert_eq!(graph.order.len(), 4);
641
+
642
+ let updater_idx = graph
643
+ .order
644
+ .iter()
645
+ .position(|p| p.ends_with("updater.preql"))
646
+ .unwrap();
647
+ let base_idx = graph
648
+ .order
649
+ .iter()
650
+ .position(|p| p.ends_with("base.preql"))
651
+ .unwrap();
652
+ let consumer_idx = graph
653
+ .order
654
+ .iter()
655
+ .position(|p| p.ends_with("consumer.preql"))
656
+ .unwrap();
657
+ let main_idx = graph
658
+ .order
659
+ .iter()
660
+ .position(|p| p.ends_with("main.preql"))
661
+ .unwrap();
662
+
663
+ // updater must come before base (persist before declare)
664
+ assert!(
665
+ updater_idx < base_idx,
666
+ "updater should come before base: updater={}, base={}",
667
+ updater_idx,
668
+ base_idx
669
+ );
670
+
671
+ // base must come before consumer (consumer imports base which has the datasource)
672
+ assert!(
673
+ base_idx < consumer_idx,
674
+ "base should come before consumer: base={}, consumer={}",
675
+ base_idx,
676
+ consumer_idx
677
+ );
678
+
679
+ // main comes last (imports everything)
680
+ assert!(
681
+ updater_idx < main_idx && base_idx < main_idx && consumer_idx < main_idx,
682
+ "main should come after all others"
683
+ );
684
+ }
685
+
686
+ #[test]
687
+ fn test_multiple_datasources() {
688
+ let temp = TempDir::new().unwrap();
689
+ let root = temp.path();
690
+
691
+ create_test_file(
692
+ root,
693
+ "models.preql",
694
+ r#"
695
+ datasource customers (
696
+ id: key
697
+ )
698
+ address db.customers;
699
+
700
+ datasource orders (
701
+ id: key,
702
+ customer_id
703
+ )
704
+ address db.orders;
705
+ "#,
706
+ );
707
+
708
+ let models_path = root.join("models.preql");
709
+ let mut resolver = ImportResolver::new();
710
+ let graph = resolver.resolve(&models_path).unwrap();
711
+
712
+ assert_eq!(graph.datasource_declarations.len(), 2);
713
+ assert!(graph.datasource_declarations.contains_key("customers"));
714
+ assert!(graph.datasource_declarations.contains_key("orders"));
715
+ }
716
+ }