@sqlanvil/core 0.0.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/configs.proto ADDED
@@ -0,0 +1,1028 @@
1
+ // Copyright 2023 Google LLC
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ syntax = "proto3";
16
+
17
+ package sqlanvil;
18
+
19
+ option java_package = "com.sqlanvil.protos";
20
+ option java_outer_classname = "ConfigsMeta";
21
+ option java_multiple_files = true;
22
+
23
+ option go_package = "github.com/sqlanvil/sqlanvil/protos/sqlanvil";
24
+
25
+ import "google/protobuf/struct.proto";
26
+ import "extension.proto";
27
+
28
+ // Workflow Settings defines the contents of the `workflow_settings.yaml`
29
+ // configuration file.
30
+ message WorkflowSettings {
31
+ // The desired sqlanvil core version to compile against.
32
+ string sqlanvil_core_version = 1;
33
+
34
+ // Required. The default Google Cloud project (database).
35
+ string default_project = 2;
36
+
37
+ // Required. The default dataset (schema).
38
+ string default_dataset = 3;
39
+
40
+ // Required. The default BigQuery location to use. For more information on
41
+ // BigQuery locations, see https://cloud.google.com/bigquery/docs/locations.
42
+ string default_location = 4;
43
+
44
+ // Required. The default dataset (schema) for assertions.
45
+ string default_assertion_dataset = 5;
46
+
47
+ // Optional. User-defined variables that are made available to project code
48
+ // during compilation. An object containing a list of "key": value pairs.
49
+ map<string, string> vars = 6;
50
+
51
+ // Optional. The suffix to append to all Google Cloud project references.
52
+ string project_suffix = 7;
53
+
54
+ // Optional. The suffix to append to all dataset references.
55
+ string dataset_suffix = 8;
56
+
57
+ // Optional. The prefix to append to all action names.
58
+ string name_prefix = 9;
59
+
60
+ // Optional. Default runtime options for Notebook actions.
61
+ NotebookRuntimeOptionsConfig default_notebook_runtime_options = 10;
62
+
63
+ // Optional. The prefix to append to built-in assertion names.
64
+ string builtin_assertion_name_prefix = 11;
65
+
66
+ // Optional. Default config options for Iceberg tables.
67
+ DefaultIcebergConfig default_iceberg_config = 12;
68
+
69
+ // Optional. Disables all assertions including built-in assertions
70
+ // (uniqueKey, nonNull, rowConditions) and manual assertions (type: assertion).
71
+ // When true, assertions will still be compiled but marked as disabled.
72
+ bool disable_assertions = 13;
73
+
74
+ // Optional. The default BigQuery reservation to use for execution.
75
+ // If unset, default BigQuery behavior applies.
76
+ // sqlanvil CLI only (GCP sqlanvil support pending).
77
+ string default_reservation = 14;
78
+
79
+ // Optional. An external package that provides an extension.
80
+ Extension extension = 15;
81
+
82
+ // Optional. If set to true, unit tests will be included in the compiled graph.
83
+ bool include_tests_in_compiled_graph = 16;
84
+
85
+ // Optional. The database warehouse to use, e.g. "bigquery", "postgres", "supabase".
86
+ string warehouse = 17;
87
+ }
88
+
89
+ message DefaultIcebergConfig {
90
+ // Optional. Bucket name used to construct a storage URI when creating an
91
+ // Iceberg table.
92
+ string bucket_name = 1;
93
+
94
+ // Optional. Table folder root used to construct a storage URI when creating
95
+ // an Iceberg table.
96
+ string table_folder_root = 2;
97
+
98
+ // Optional. Table folder subpath used to construct a storage URI when
99
+ // creating an Iceberg table.
100
+ string table_folder_subpath = 3;
101
+
102
+ // Optional. The connection specifying the credentials to be used to read and
103
+ // write to external storage, such as Cloud Storage.
104
+ string connection = 4;
105
+ }
106
+
107
+ // Action configs defines the contents of `actions.yaml` configuration files.
108
+ message ActionConfigs {
109
+ repeated ActionConfig actions = 1;
110
+ }
111
+
112
+ // Action config defines the contents of `actions.yaml` configuration files.
113
+ message ActionConfig {
114
+ // Target represents a unique action identifier.
115
+ message Target {
116
+ // The Google Cloud project (database) of the action.
117
+ string project = 1;
118
+
119
+ // The dataset (schema) of the action. For notebooks, this is the location.
120
+ string dataset = 2;
121
+
122
+ // The name of the action.
123
+ string name = 4;
124
+
125
+ // flag for when we want to add assertions of this dependency in
126
+ // dependency_targets as well.
127
+ bool include_dependent_assertions = 5;
128
+ }
129
+
130
+ message ColumnDescriptor {
131
+ // The identifier for the column, using multiple parts for nested records.
132
+ repeated string path = 1;
133
+
134
+ // A text description of the column.
135
+ string description = 2;
136
+
137
+ // A list of BigQuery policy tags that will be applied to the column.
138
+ repeated string bigquery_policy_tags = 3;
139
+
140
+ // A list of tags for this column which will be applied.
141
+ repeated string tags = 4;
142
+ }
143
+
144
+ // Options for shorthand specifying assertions, useable for some table-based
145
+ // action types.
146
+ message TableAssertionsConfig {
147
+ // Column(s) which constitute the dataset's unique key index.
148
+ // If set, the resulting assertion will fail if there is more than one row
149
+ // in the dataset with the same values for all of these column(s).
150
+ repeated string unique_key = 1;
151
+
152
+ // Combinations of column(s), each of which should constitute a unique key
153
+ // index for the dataset. If set, the resulting assertion(s) will fail if
154
+ // there is more than one row in the dataset with the same values for all of
155
+ // the column(s) in the unique key(s).
156
+ message UniqueKey {
157
+ repeated string unique_key = 1;
158
+ }
159
+ repeated UniqueKey unique_keys = 2;
160
+
161
+ // Column(s) which may never be `NULL`.
162
+ // If set, the resulting assertion will fail if any row contains `NULL`
163
+ // values for these column(s).
164
+ repeated string non_null = 3;
165
+
166
+ // General condition(s) which should hold true for all rows in the dataset.
167
+ // If set, the resulting assertion will fail if any row violates any of
168
+ // these condition(s).
169
+ repeated string row_conditions = 4;
170
+ }
171
+
172
+ message IcebergTableConfig {
173
+ // Supported file formats for BigQuery tables.
174
+ enum FileFormat {
175
+ // Default value.
176
+ FILE_FORMAT_UNSPECIFIED = 0;
177
+ // Apache Parquet format.
178
+ PARQUET = 1;
179
+ }
180
+
181
+ // The file format for the BigQuery table.
182
+ FileFormat file_format = 1;
183
+
184
+ // The connection specifying the credentials to be used to read and write
185
+ // to external storage, such as Cloud Storage. The connection can have the
186
+ // form `{project}.{location}.{connection_id}` or
187
+ // `projects/{project}/locations/{location}/connections/{connection_id}",
188
+ // or be set to DEFAULT.
189
+ string connection = 2;
190
+
191
+ // The name of the Cloud Storage bucket where table data is stored. This
192
+ // value is be used to construct the storage URI in the following way:
193
+ // `gs://{bucket_name}/{table_folder_root}/{table_folder_subpath}``.
194
+ // If `storage_uri` is provided, this value is ignored.
195
+ string bucket_name = 3;
196
+
197
+ // The name of the first-level folder inside the Cloud Storage bucket where
198
+ // table data is stored. This value will be used to construct the storage
199
+ // URI in the following way:
200
+ // `gs://{bucket_name}/{table_folder_root}/{table_folder_subpath}``.
201
+ // If `storage_uri` is provided, this value is ignored.
202
+ string table_folder_root = 4;
203
+
204
+ // The path under the first-level folder of the Cloud Storage bucket where
205
+ // table data is stored. This value will be used to construct the storage URI
206
+ // in the following way:
207
+ // `gs://{bucket_name}/{table_folder_root}/{table_folder_subpath}``.
208
+ // If `storage_uri` is provided, this value is ignored.
209
+ string table_folder_subpath = 5;
210
+ }
211
+
212
+ message TableConfig {
213
+ // The name of the table.
214
+ string name = 1;
215
+
216
+ // The dataset (schema) of the table.
217
+ string dataset = 2;
218
+
219
+ // The Google Cloud project (database) of the table.
220
+ string project = 3;
221
+
222
+ // Targets of actions that this action is dependent on.
223
+ repeated Target dependency_targets = 4;
224
+
225
+ // Path to the source file that the contents of the action is loaded from.
226
+ string filename = 5;
227
+
228
+ // A list of user-defined tags with which the action should be labeled.
229
+ repeated string tags = 6;
230
+
231
+ // If set to true, this action will not be executed. However, the action can
232
+ // still be depended upon. Useful for temporarily turning off broken
233
+ // actions.
234
+ bool disabled = 7;
235
+
236
+ // Queries to run before `query`. This can be useful for granting
237
+ // permissions.
238
+ repeated string pre_operations = 8;
239
+
240
+ // Queries to run after `query`.
241
+ repeated string post_operations = 9;
242
+
243
+ // Description of the table.
244
+ string description = 10;
245
+
246
+ // Descriptions of columns within the table.
247
+ repeated ColumnDescriptor columns = 11;
248
+
249
+ // The key by which to partition the table. Typically the name of a
250
+ // timestamp or the date column. See
251
+ // https://cloud.google.com/dataform/docs/partitions-clusters.
252
+ string partition_by = 12;
253
+
254
+ // The number of days for which BigQuery stores data in each partition.
255
+ // The setting applies to all partitions in a table, but is calculated
256
+ // independently for each partition based on the partition time.
257
+ int32 partition_expiration_days = 13;
258
+
259
+ // Declares whether the partitioned table requires a WHERE clause
260
+ // predicate filter that filters the partitioning column.
261
+ bool require_partition_filter = 14;
262
+
263
+ // The keys by which to cluster partitions by. See
264
+ // https://cloud.google.com/dataform/docs/partitions-clusters.
265
+ repeated string cluster_by = 15;
266
+
267
+ // Key-value pairs for BigQuery labels.
268
+ map<string, string> labels = 16;
269
+
270
+ // Key-value pairs of additional options to pass to the BigQuery API. Some
271
+ // options, for example, partitionExpirationDays, have dedicated
272
+ // type/validity checked fields. For such options, use the dedicated fields.
273
+ map<string, string> additional_options = 17;
274
+
275
+ // When set to true, assertions dependent upon any dependency will
276
+ // be add as dedpendency to this action
277
+ bool depend_on_dependency_assertions = 18;
278
+
279
+ // Assertions to be run on the dataset.
280
+ // If configured, relevant assertions will automatically be created and run
281
+ // as a dependency of this dataset.
282
+ TableAssertionsConfig assertions = 19;
283
+
284
+ // If true, this indicates that the action only depends on data from
285
+ // explicitly-declared dependencies. Otherwise if false, it indicates that
286
+ // the action depends on data from a source which has not been declared as
287
+ // a dependency.
288
+ bool hermetic = 20;
289
+
290
+ // Configuration options for an Iceberg table.
291
+ IcebergTableConfig iceberg = 22;
292
+
293
+ // Metadata for this table.
294
+ Metadata metadata = 23;
295
+
296
+ // Optional. The BigQuery reservation to use for execution.
297
+ // If unset, the value from workflow_settings.yaml is used. If neither is set, default BigQuery behavior applies.
298
+ // sqlanvil CLI only (GCP sqlanvil support pending).
299
+ string reservation = 24;
300
+
301
+ // Postgres and Supabase specific options.
302
+ PostgresOptions postgres = 25;
303
+ SupabaseOptions supabase = 26;
304
+ }
305
+
306
+ message Metadata {
307
+ // A detailed description of the data object.
308
+ string overview = 1;
309
+ // Extra properties of the data object.
310
+ google.protobuf.Struct extra_properties = 2;
311
+ }
312
+
313
+ message ViewConfig {
314
+ // The name of the view.
315
+ string name = 1;
316
+
317
+ // The dataset (schema) of the view.
318
+ string dataset = 2;
319
+
320
+ // The Google Cloud project (database) of the view.
321
+ string project = 3;
322
+
323
+ // Targets of actions that this action is dependent on.
324
+ repeated Target dependency_targets = 4;
325
+
326
+ // Path to the source file that the contents of the action is loaded from.
327
+ string filename = 5;
328
+
329
+ // A list of user-defined tags with which the action should be labeled.
330
+ repeated string tags = 6;
331
+
332
+ // If set to true, this action will not be executed. However, the action can
333
+ // still be depended upon. Useful for temporarily turning off broken
334
+ // actions.
335
+ bool disabled = 7;
336
+
337
+ // Queries to run before `query`. This can be useful for granting
338
+ // permissions.
339
+ repeated string pre_operations = 8;
340
+
341
+ // Queries to run after `query`.
342
+ repeated string post_operations = 9;
343
+
344
+ // Applies the materialized view optimization, see
345
+ // https://cloud.google.com/bigquery/docs/materialized-views-intro.
346
+ bool materialized = 10;
347
+
348
+ // Optional. Applicable only to materialized view. The key by which to partition the materialized view. Typically the name of a
349
+ // timestamp or the date column. See https://cloud.google.com/bigquery/docs/materialized-views-create#partitioned_materialized_views.
350
+ string partition_by = 18;
351
+
352
+ // Optional. Applicable only to materialized view. The keys by which to cluster partitions by. See
353
+ // https://cloud.google.com/bigquery/docs/materialized-views-create#cluster_materialized_views.
354
+ repeated string cluster_by = 19;
355
+
356
+ // Description of the view.
357
+ string description = 11;
358
+
359
+ // Descriptions of columns within the table.
360
+ repeated ColumnDescriptor columns = 12;
361
+
362
+ // Key-value pairs for BigQuery labels.
363
+ map<string, string> labels = 13;
364
+
365
+ // Key-value pairs of additional options to pass to the BigQuery API. Some
366
+ // options, for example, partitionExpirationDays, have dedicated
367
+ // type/validity checked fields. For such options, use the dedicated fields.
368
+ map<string, string> additional_options = 14;
369
+
370
+ // When set to true, assertions dependent upon any dependency will
371
+ // be add as dedpendency to this action
372
+ bool depend_on_dependency_assertions = 15;
373
+
374
+ // If true, this indicates that the action only depends on data from
375
+ // explicitly-declared dependencies. Otherwise if false, it indicates that
376
+ // the action depends on data from a source which has not been declared as
377
+ // a dependency.
378
+ bool hermetic = 16;
379
+
380
+ // Assertions to be run on the dataset.
381
+ // If configured, relevant assertions will automatically be created and run
382
+ // as a dependency of this dataset.
383
+ TableAssertionsConfig assertions = 17;
384
+
385
+ // Metadata for this view.
386
+ Metadata metadata = 20;
387
+
388
+ // Optional. The BigQuery reservation to use for execution.
389
+ // If unset, the value from workflow_settings.yaml is used. If neither is set, default BigQuery behavior applies.
390
+ // sqlanvil CLI only (GCP sqlanvil support pending).
391
+ string reservation = 21;
392
+
393
+ // Postgres-native options. For a materialized view (materialized: true),
394
+ // `no_data` (CREATE ... WITH NO DATA) and `refresh_policy`
395
+ // ("on_dependency_change" → in-place REFRESH instead of drop+recreate)
396
+ // apply; indexes also apply to materialized views.
397
+ PostgresOptions postgres = 22;
398
+ }
399
+
400
+ enum OnSchemaChange {
401
+ // Ignore any schema changes (default).
402
+ IGNORE = 0;
403
+ // Fails if the query would result in a new column(s) being added, deleted,
404
+ // or renamed.
405
+ FAIL = 1;
406
+ // Does not block any new column(s) from being added.
407
+ EXTEND = 2;
408
+ // Does not block any new column(s) from being added, deleted or renamed.
409
+ SYNCHRONIZE = 3;
410
+ }
411
+
412
+ message IncrementalTableConfig {
413
+ // The name of the incremental table.
414
+ string name = 1;
415
+
416
+ // The dataset (schema) of the incremental table.
417
+ string dataset = 2;
418
+
419
+ // The Google Cloud project (database) of the incremental table.
420
+ string project = 3;
421
+
422
+ // Targets of actions that this action is dependent on.
423
+ repeated Target dependency_targets = 4;
424
+
425
+ // Path to the source file that the contents of the action is loaded from.
426
+ string filename = 5;
427
+
428
+ // A list of user-defined tags with which the action should be labeled.
429
+ repeated string tags = 6;
430
+
431
+ // If set to true, this action will not be executed. However, the action can
432
+ // still be depended upon. Useful for temporarily turning off broken
433
+ // actions.
434
+ bool disabled = 7;
435
+
436
+ // Queries to run before `query`. This can be useful for granting
437
+ // permissions.
438
+ repeated string pre_operations = 8;
439
+
440
+ // Queries to run after `query`.
441
+ repeated string post_operations = 9;
442
+
443
+ // If true, prevents the dataset from being rebuilt from scratch.
444
+ bool protected = 10;
445
+
446
+ // If set, unique key represents a set of names of columns that will act as
447
+ // a the unique key. To enforce this, when updating the incremental
448
+ // table, sqlanvil merges rows with `uniqueKey` instead of appending them.
449
+ repeated string unique_key = 11;
450
+
451
+ // Description of the incremental table.
452
+ string description = 12;
453
+
454
+ // Descriptions of columns within the table.
455
+ repeated ColumnDescriptor columns = 13;
456
+
457
+ // The key by which to partition the table. Typically the name of a
458
+ // timestamp or the date column. See
459
+ // https://cloud.google.com/dataform/docs/partitions-clusters.
460
+ string partition_by = 14;
461
+
462
+ // The number of days for which BigQuery stores data in each partition.
463
+ // The setting applies to all partitions in a table, but is calculated
464
+ // independently for each partition based on the partition time.
465
+ int32 partition_expiration_days = 15;
466
+
467
+ // Declares whether the partitioned table requires a WHERE clause
468
+ // predicate filter that filters the partitioning column.
469
+ bool require_partition_filter = 16;
470
+
471
+ // SQL-based filter for when incremental updates are applied.
472
+ string update_partition_filter = 17;
473
+
474
+ // The keys by which to cluster partitions by. See
475
+ // https://cloud.google.com/dataform/docs/partitions-clusters.
476
+ repeated string cluster_by = 18;
477
+
478
+ // Key-value pairs for BigQuery labels.
479
+ map<string, string> labels = 19;
480
+
481
+ // Key-value pairs of additional options to pass to the BigQuery API. Some
482
+ // options, for example, partitionExpirationDays, have dedicated
483
+ // type/validity checked fields. For such options, use the dedicated fields.
484
+ map<string, string> additional_options = 20;
485
+
486
+ // When set to true, assertions dependent upon any dependency will
487
+ // be add as dedpendency to this action
488
+ bool depend_on_dependency_assertions = 21;
489
+
490
+ // Assertions to be run on the dataset.
491
+ // If configured, relevant assertions will automatically be created and run
492
+ // as a dependency of this dataset.
493
+ TableAssertionsConfig assertions = 22;
494
+
495
+ // If true, this indicates that the action only depends on data from
496
+ // explicitly-declared dependencies. Otherwise if false, it indicates that
497
+ // the action depends on data from a source which has not been declared as
498
+ // a dependency.
499
+ bool hermetic = 23;
500
+
501
+ // Defines the action behavior if the selected columns in the query
502
+ // don't the match columns in the target table.
503
+ OnSchemaChange on_schema_change = 24;
504
+
505
+ // Configuration options for an Iceberg table.
506
+ IcebergTableConfig iceberg = 25;
507
+
508
+ // Metadata for this incremental table.
509
+ Metadata metadata = 26;
510
+
511
+ // Optional. The BigQuery reservation to use for execution.
512
+ // If unset, the value from workflow_settings.yaml is used. If neither is set, default BigQuery behavior applies.
513
+ // sqlanvil CLI only (GCP sqlanvil support pending).
514
+ string reservation = 27;
515
+
516
+ // Postgres and Supabase specific options.
517
+ PostgresOptions postgres = 28;
518
+ SupabaseOptions supabase = 29;
519
+ }
520
+
521
+ message AssertionConfig {
522
+ // The name of the assertion.
523
+ string name = 1;
524
+
525
+ // The dataset (schema) of the assertion.
526
+ string dataset = 2;
527
+
528
+ // The Google Cloud project (database) of the assertion.
529
+ string project = 3;
530
+
531
+ // Targets of actions that this action is dependent on.
532
+ repeated Target dependency_targets = 4;
533
+
534
+ // Path to the source file that the contents of the action is loaded from.
535
+ string filename = 5;
536
+
537
+ // A list of user-defined tags with which the action should be labeled.
538
+ repeated string tags = 6;
539
+
540
+ // If set to true, this action will not be executed. However, the action can
541
+ // still be depended upon. Useful for temporarily turning off broken
542
+ // actions.
543
+ bool disabled = 7;
544
+
545
+ // Description of the assertion.
546
+ string description = 8;
547
+
548
+ // If true, this indicates that the action only depends on data from
549
+ // explicitly-declared dependencies. Otherwise if false, it indicates that
550
+ // the action depends on data from a source which has not been declared as
551
+ // a dependency.
552
+ bool hermetic = 9;
553
+
554
+ // If true, assertions dependent upon any of the dependencies are added as
555
+ // dependencies as well.
556
+ bool depend_on_dependency_assertions = 10;
557
+
558
+ // Optional. The BigQuery reservation to use for execution.
559
+ // If unset, the value from workflow_settings.yaml is used. If neither is set, default BigQuery behavior applies.
560
+ // sqlanvil CLI only (GCP sqlanvil support pending).
561
+ string reservation = 11;
562
+ }
563
+
564
+ message OperationConfig {
565
+ // The name of the operation.
566
+ string name = 1;
567
+
568
+ // The dataset (schema) of the operation.
569
+ string dataset = 2;
570
+
571
+ // The Google Cloud project (database) of the operation.
572
+ string project = 3;
573
+
574
+ // Targets of actions that this action is dependent on.
575
+ repeated Target dependency_targets = 4;
576
+
577
+ // Path to the source file that the contents of the action is loaded from.
578
+ string filename = 5;
579
+
580
+ // A list of user-defined tags with which the action should be labeled.
581
+ repeated string tags = 6;
582
+
583
+ // If set to true, this action will not be executed. However, the action can
584
+ // still be depended upon. Useful for temporarily turning off broken
585
+ // actions.
586
+ bool disabled = 7;
587
+
588
+ // Declares that this action creates a dataset which should be
589
+ // referenceable as a dependency target, for example by using the `ref`
590
+ // function.
591
+ bool has_output = 8;
592
+
593
+ // Description of the operation.
594
+ string description = 9;
595
+
596
+ // Descriptions of columns within the operation. Can only be set if
597
+ // hasOutput is true.
598
+ repeated ColumnDescriptor columns = 10;
599
+
600
+ // When set to true, assertions dependent upon any dependency will
601
+ // be add as dedpendency to this action
602
+ bool depend_on_dependency_assertions = 11;
603
+
604
+ // If true, this indicates that the action only depends on data from
605
+ // explicitly-declared dependencies. Otherwise if false, it indicates that
606
+ // the action depends on data from a source which has not been declared as
607
+ // a dependency.
608
+ bool hermetic = 12;
609
+
610
+ // Optional. The BigQuery reservation to use for execution.
611
+ // If unset, the value from workflow_settings.yaml is used. If neither is set, default BigQuery behavior applies.
612
+ // sqlanvil CLI only (GCP sqlanvil support pending).
613
+ string reservation = 13;
614
+ }
615
+
616
+ message DeclarationConfig {
617
+ // The name of the declaration.
618
+ string name = 1;
619
+
620
+ // The dataset (schema) of the declaration.
621
+ string dataset = 2;
622
+
623
+ // The Google Cloud project (database) of the declaration.
624
+ string project = 3;
625
+
626
+ // Description of the declaration.
627
+ string description = 4;
628
+
629
+ // Descriptions of columns within the declaration.
630
+ repeated ColumnDescriptor columns = 5;
631
+
632
+ // Path to the source file that the contents of the action is loaded from.
633
+ string filename = 6;
634
+
635
+ // A list of user-defined tags with which the action should be labeled.
636
+ repeated string tags = 7;
637
+ }
638
+
639
+ message NotebookConfig {
640
+ // The name of the notebook.
641
+ string name = 1;
642
+
643
+ // The Google Cloud location of the notebook.
644
+ string location = 2;
645
+
646
+ // The Google Cloud project (database) of the notebook.
647
+ string project = 3;
648
+
649
+ // Targets of actions that this action is dependent on.
650
+ repeated Target dependency_targets = 4;
651
+
652
+ // Path to the source file that the contents of the action is loaded from.
653
+ string filename = 5;
654
+
655
+ // A list of user-defined tags with which the action should be labeled.
656
+ repeated string tags = 6;
657
+
658
+ // If set to true, this action will not be executed. However, the action can
659
+ // still be depended upon. Useful for temporarily turning off broken
660
+ // actions.
661
+ bool disabled = 7;
662
+
663
+ // Description of the notebook.
664
+ string description = 8;
665
+
666
+ // When set to true, assertions dependent upon any dependency will
667
+ // be add as dedpendency to this action
668
+ bool depend_on_dependency_assertions = 9;
669
+
670
+ // A notebook runtime field definition could be added here, to allow atomic
671
+ // runtime settings of notebooks.
672
+ }
673
+
674
+ message DataPreparationConfig {
675
+ // The name of the data preparation.
676
+ string name = 1;
677
+
678
+ // The dataset (schema) of the destination table.
679
+ string dataset = 13;
680
+
681
+ // The Google Cloud project (database) of the destination table.
682
+ string project = 14;
683
+
684
+ // Targets of actions that this action is dependent on.
685
+ repeated Target dependency_targets = 2;
686
+
687
+ // Path to the source file that the contents of the action is loaded from.
688
+ string filename = 3;
689
+
690
+ // A list of user-defined tags with which the action should be labeled.
691
+ repeated string tags = 4;
692
+
693
+ // If set to true, this action will not be executed. However, the action can
694
+ // still be depended upon. Useful for temporarily turning off broken
695
+ // actions.
696
+ bool disabled = 7;
697
+
698
+ // Description of the data preparation.
699
+ string description = 8;
700
+
701
+ ErrorTableConfig error_table = 11;
702
+
703
+ LoadModeConfig load_mode = 12;
704
+
705
+ message ErrorTableConfig {
706
+ // The name of the error table.
707
+ string name = 1;
708
+
709
+ // The dataset (schema) of the error table.
710
+ string dataset = 2;
711
+
712
+ // The Google Cloud project (database) of the error table.
713
+ string project = 3;
714
+
715
+ int32 retention_days = 4;
716
+ }
717
+ }
718
+
719
+ oneof action {
720
+ TableConfig table = 1;
721
+ ViewConfig view = 2;
722
+ IncrementalTableConfig incremental_table = 3;
723
+ AssertionConfig assertion = 4;
724
+ OperationConfig operation = 5;
725
+ DeclarationConfig declaration = 6;
726
+ NotebookConfig notebook = 7;
727
+ DataPreparationConfig data_preparation = 8;
728
+ RlsPolicyConfig rls_policy = 9;
729
+ RealtimePublicationConfig realtime_publication = 10;
730
+ ForeignWrapperConfig foreign_wrapper = 11;
731
+ VectorIndexConfig vector_index = 12;
732
+ }
733
+
734
+ message RlsPolicyConfig {
735
+ string name = 1;
736
+ string table = 2;
737
+ string command = 3;
738
+ repeated string roles = 4;
739
+ string using = 5;
740
+ string with_check = 6;
741
+ string filename = 7;
742
+ repeated Target dependency_targets = 8;
743
+ }
744
+
745
+ message RealtimePublicationConfig {
746
+ string name = 1;
747
+ string table = 2;
748
+ repeated string events = 3;
749
+ string filename = 4;
750
+ repeated Target dependency_targets = 5;
751
+ }
752
+
753
+ message ForeignWrapperConfig {
754
+ string name = 1;
755
+ string wrapper = 2;
756
+ string server = 3;
757
+ map<string, string> options = 4;
758
+ string filename = 5;
759
+ repeated Target dependency_targets = 6;
760
+ }
761
+
762
+ message VectorIndexConfig {
763
+ string name = 1;
764
+ string table = 2;
765
+ string column = 3;
766
+ uint32 dimensions = 4;
767
+ string index_type = 5;
768
+ map<string, string> params = 6;
769
+ string filename = 7;
770
+ repeated Target dependency_targets = 8;
771
+ }
772
+
773
+ message LoadModeConfig {
774
+ LoadMode mode = 1;
775
+ // Required when mode is MAXIMUM or UNIQUE
776
+ string incremental_column = 2;
777
+ // required when mode is MERGE
778
+ repeated string unique_key = 3;
779
+ }
780
+
781
+ enum LoadMode {
782
+ // Replace existing table (default).
783
+ REPLACE_TABLE = 0;
784
+ // Insert into destination table.
785
+ APPEND = 1;
786
+ // Insert only records where the specified column value exceeds the existing
787
+ // maximum value in the destination table.
788
+ MAXIMUM = 2;
789
+ // Insert only records where the specified column value is not already
790
+ // present in the destination column values.
791
+ UNIQUE = 3;
792
+ // Merge records into the destination table, deduplicating using 1+ unique keys
793
+ MERGE = 4;
794
+ }
795
+ }
796
+
797
+ message RepositorySnapshotDestinationConfig {
798
+ // Storage URI to upload the repository snapshot to.
799
+ string repository_snapshot_uri = 1;
800
+ }
801
+
802
+ message NotebookRuntimeOptionsConfig {
803
+ oneof output_sink {
804
+ // Storage bucket to output notebooks to after their execution.
805
+ string output_bucket = 1;
806
+ }
807
+
808
+ // Colab runtime template (https://cloud.google.com/colab/docs/runtimes), from
809
+ // which a runtime is created for notebook executions.
810
+ string runtime_template_name = 2;
811
+
812
+ // The destination for the snapshot of repository files to be available for
813
+ // read-only access inside a notebook runtime.
814
+ oneof repository_snapshot_storage {
815
+ // Storage URI to upload the snapshot to.
816
+ // For empty URI it defaults to the provided output_bucket.
817
+ RepositorySnapshotDestinationConfig repository_snapshot_destination = 3;
818
+ }
819
+
820
+ }
821
+
822
+ // =============================================================================
823
+ // Postgres-first-class adapter — additions per
824
+ // https://github.com/sqlanvil/docs/blob/main/postgres_first_class_design.md. Phase 3c.
825
+ //
826
+ // These messages are declared here for wiring in subsequent phases:
827
+ // - PostgresOptions / SupabaseOptions → ActionConfig table-level blocks
828
+ // - PostgresConnection / SupabaseConnection / BigQueryConnection
829
+ // → WorkflowSettings.warehouse
830
+ // - WarehouseConfig → discriminated union over the
831
+ // connection variants
832
+ //
833
+ // Adding the messages alone does not change behavior — wiring happens in
834
+ // Phase 4 (CLI) and the parallel rewrite of ActionConfig's table/view/
835
+ // incremental_table sub-messages.
836
+ // =============================================================================
837
+
838
+ // PostgresOptions — Postgres-native table-level options. Mirrors what
839
+ // BigQueryOptions-style fields do in TableConfig but in idiomatic Postgres.
840
+ //
841
+ // Used as a peer of the existing `bigquery: {...}` shape on action configs:
842
+ // publish("daily_orders", { postgres: { tablespace: "fast_ssd", ... } })
843
+ message PostgresOptions {
844
+ // Physical storage placement (CREATE TABLE ... TABLESPACE <name>).
845
+ string tablespace = 1;
846
+
847
+ // Storage parameter — fraction of each page to fill on insert (1-100).
848
+ uint32 fillfactor = 2;
849
+
850
+ // CREATE UNLOGGED TABLE — faster writes, lost on crash. For staging/temp
851
+ // tables where durability isn't required.
852
+ bool unlogged = 3;
853
+
854
+ // Native Postgres declarative partitioning.
855
+ message Partition {
856
+ enum Kind {
857
+ RANGE = 0;
858
+ LIST = 1;
859
+ HASH = 2;
860
+ }
861
+ Kind kind = 1;
862
+ repeated string columns = 2;
863
+
864
+ // Child partitions. `values` is the raw FOR VALUES clause body matching the
865
+ // kind, e.g. "FROM ('2024-01-01') TO ('2025-01-01')" (range),
866
+ // "IN ('us', 'ca')" (list), or "WITH (MODULUS 4, REMAINDER 0)" (hash).
867
+ message Bound {
868
+ string name = 1;
869
+ string values = 2;
870
+ }
871
+ repeated Bound partitions = 3;
872
+
873
+ // Also create a catch-all DEFAULT partition so rows outside every bound
874
+ // still insert (recommended for declarative full-refresh loads).
875
+ bool include_default = 4;
876
+ }
877
+ Partition partition = 4;
878
+
879
+ // Indexes to create alongside the table.
880
+ message Index {
881
+ string name = 1;
882
+ repeated string columns = 2;
883
+
884
+ enum Method {
885
+ BTREE = 0;
886
+ HASH = 1;
887
+ GIN = 2;
888
+ GIST = 3;
889
+ BRIN = 4;
890
+ }
891
+ Method method = 3;
892
+
893
+ // Partial index predicate (WHERE <expr>).
894
+ string where = 4;
895
+
896
+ bool unique = 5;
897
+
898
+ // INCLUDE non-key columns for covering indexes.
899
+ repeated string include = 6;
900
+
901
+ // Operator class applied to each indexed column, e.g. "gin_trgm_ops"
902
+ // (pg_trgm), "jsonb_path_ops", or "vector_l2_ops" (pgvector). Required for
903
+ // gin/gist indexes on types without a default opclass.
904
+ string opclass = 7;
905
+ }
906
+ repeated Index indexes = 5;
907
+
908
+ // Materialized view: create WITH NO DATA (empty until first refresh). Default
909
+ // (false) is WITH DATA. Named for the non-default so proto3's false default
910
+ // means the sensible WITH DATA.
911
+ bool no_data = 6;
912
+
913
+ // Materialized view refresh on re-run: "on_dependency_change" refreshes an
914
+ // existing matview in place (REFRESH MATERIALIZED VIEW) instead of dropping +
915
+ // recreating. Default (unset) drops + recreates each run (safe — also picks up
916
+ // definition changes, which REFRESH does not).
917
+ string refresh_policy = 7;
918
+ }
919
+
920
+ // SupabaseOptions — Supabase-specific platform features layered on top of
921
+ // standard Postgres. Used as a peer of `postgres: {...}` for projects
922
+ // targeting `warehouse: { kind: supabase }`.
923
+ message SupabaseOptions {
924
+ // Standard Postgres options apply. Set these via `postgres:` directly or
925
+ // nest under `supabase.postgres:` — either is accepted.
926
+ PostgresOptions postgres = 1;
927
+
928
+ // ALTER PUBLICATION supabase_realtime ADD TABLE <this>.
929
+ // Implicitly sets REPLICA IDENTITY appropriately.
930
+ bool publish_to_realtime = 2;
931
+
932
+ // ALTER TABLE <this> ENABLE ROW LEVEL SECURITY.
933
+ // Note: only enables RLS — policies are declared via the `rlsPolicy`
934
+ // action type (see Phase 5).
935
+ bool enable_rls = 3;
936
+
937
+ // OWNER TO <role>. Typically "postgres" or "service_role".
938
+ string owner_role = 4;
939
+
940
+ // pgvector convenience config. Equivalent to declaring a
941
+ // PostgresOptions.Index with method=HNSW or method=GIST + ivfflat ops,
942
+ // but more ergonomic for RAG pipelines.
943
+ message VectorConfig {
944
+ string column = 1;
945
+ uint32 dimensions = 2;
946
+
947
+ enum IndexType {
948
+ IVFFLAT = 0;
949
+ HNSW = 1;
950
+ }
951
+ IndexType index_type = 3;
952
+
953
+ // ivfflat: { lists }, hnsw: { m, ef_construction }.
954
+ map<string, string> params = 4;
955
+ }
956
+ repeated VectorConfig vectors = 5;
957
+ }
958
+
959
+ // BigQueryConnection — connection params for warehouse.kind = "bigquery".
960
+ // Mirrors the legacy flat fields on WorkflowSettings (default_project,
961
+ // default_location, default_dataset) but namespaced under warehouse.
962
+ message BigQueryConnection {
963
+ // The Google Cloud project (database).
964
+ string project = 1;
965
+
966
+ // BigQuery location, e.g. "US", "EU", "europe-west4".
967
+ string location = 2;
968
+
969
+ // Default dataset (schema).
970
+ string default_dataset = 3;
971
+ }
972
+
973
+ // PostgresConnection — libpq-style connection params for
974
+ // warehouse.kind = "postgres". Standard Postgres host/port/database/user.
975
+ message PostgresConnection {
976
+ string host = 1;
977
+ uint32 port = 2;
978
+ string database = 3;
979
+ string user = 4;
980
+ string password = 5;
981
+
982
+ // SSL mode: "disable" | "allow" | "prefer" | "require" | "verify-ca"
983
+ // | "verify-full". See https://www.postgresql.org/docs/current/libpq-ssl.html.
984
+ string ssl_mode = 6;
985
+
986
+ string default_schema = 7;
987
+ }
988
+
989
+ // SupabaseConnection — connection params for warehouse.kind = "supabase".
990
+ // Supabase projects expose a Postgres connection via project_ref +
991
+ // service_role_key, or a direct connection string for bypassing PostgREST.
992
+ message SupabaseConnection {
993
+ // From the Supabase dashboard (project URL host before .supabase.co).
994
+ string project_ref = 1;
995
+
996
+ // Project service-role JWT. NEVER commit literally — use ${ENV_VAR}
997
+ // interpolation in workflow_settings.yaml.
998
+ string service_role_key = 2;
999
+
1000
+ string default_schema = 3;
1001
+
1002
+ // Optional override — direct Postgres URL bypassing the PostgREST proxy.
1003
+ // e.g. "postgresql://postgres:${PASSWORD}@db.<project_ref>.supabase.co:5432/postgres".
1004
+ // If set, takes precedence over project_ref + service_role_key for the
1005
+ // direct DB connection. service_role_key is still used for RLS bypass.
1006
+ string connection_string = 4;
1007
+ }
1008
+
1009
+ // WarehouseConfig — discriminated union over connection variants. The
1010
+ // `kind:` YAML tag selects which `oneof` arm is unmarshalled.
1011
+ //
1012
+ // Example YAML:
1013
+ // warehouse:
1014
+ // kind: postgres
1015
+ // host: db.example.com
1016
+ // port: 5432
1017
+ // database: analytics
1018
+ // user: sqlanvil_writer
1019
+ // password: ${PG_PASSWORD}
1020
+ // ssl_mode: require
1021
+ // default_schema: public
1022
+ message WarehouseConfig {
1023
+ oneof connection {
1024
+ BigQueryConnection bigquery = 1;
1025
+ PostgresConnection postgres = 2;
1026
+ SupabaseConnection supabase = 3;
1027
+ }
1028
+ }