dasl-client 1.0.23__py3-none-any.whl → 1.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dasl-client might be problematic. Click here for more details.

test/test_marshaling.py DELETED
@@ -1,921 +0,0 @@
1
- from dasl_client import *
2
-
3
- from .constants import *
4
-
5
-
6
- def test_workspace_config_marshal_unmarshal():
7
- workspace_config = WorkspaceConfig(
8
- metadata=Metadata(
9
- name="config",
10
- workspace=workspace,
11
- comment="a random comment",
12
- annotations={"key1": "value1", "key2": "value2"},
13
- created_timestamp=datetime(2023, 1, 1, 12, 30),
14
- created_by="creator_user",
15
- modified_timestamp=datetime(2023, 1, 2, 13, 45),
16
- last_successful_run_timestamp=datetime(2023, 1, 3, 14, 0),
17
- modified_by="modifier_user",
18
- version=1,
19
- deleted=False,
20
- resource_status="none",
21
- ui_status="ok",
22
- client_of_origin=get_client_identifier(),
23
- ),
24
- system_tables_config=SystemTablesConfig(
25
- catalog_name="catalog_random", var_schema="schema_random"
26
- ),
27
- default_sql_warehouse="warehouse_random",
28
- detection_rule_metadata=DetectionRuleMetadata(
29
- detection_categories=["category1", "category2"]
30
- ),
31
- notable_export=ExportConfig(
32
- destination="webhook",
33
- export_open_only=True,
34
- webhook_config=ExportConfig.WebhookConfig(
35
- destination=ExportConfig.WebhookDestination(
36
- value="wh_value1", scope="wh_scope1", key="wh_key1"
37
- )
38
- ),
39
- slack_config=ExportConfig.SlackConfig(
40
- token=ExportConfig.WebhookDestination(
41
- value="slack_token1", scope="slack_scope1", key="slack_key1"
42
- ),
43
- channel="#channel1",
44
- message="slack message 1",
45
- ),
46
- ),
47
- operational_alert_export=ExportConfig(
48
- destination="slack",
49
- export_open_only=False,
50
- webhook_config=ExportConfig.WebhookConfig(
51
- destination=ExportConfig.WebhookDestination(
52
- value="wh_value2", scope="wh_scope2", key="wh_key2"
53
- )
54
- ),
55
- slack_config=ExportConfig.SlackConfig(
56
- token=ExportConfig.WebhookDestination(
57
- value="slack_token2", scope="slack_scope2", key="slack_key2"
58
- ),
59
- channel="#channel2",
60
- message="slack message 2",
61
- ),
62
- ),
63
- observables=WorkspaceConfigObservables(
64
- kinds=[
65
- WorkspaceConfigObservables.ObservablesKinds(
66
- name="kind1", sql_type="type1"
67
- ),
68
- WorkspaceConfigObservables.ObservablesKinds(
69
- name="kind2", sql_type="type2"
70
- ),
71
- ],
72
- relationships=["rel1", "rel2"],
73
- ),
74
- dasl_storage_path="/random/storage/path",
75
- default_custom_notebook_location="/tmp/notebooks",
76
- datasources=DatasourcesConfig(
77
- catalog_name="test_catalog",
78
- bronze_schema="bronze",
79
- silver_schema="silver",
80
- gold_schema="gold",
81
- checkpoint_location="/tmp/checkpoints",
82
- ),
83
- rules=RulesConfig(
84
- checkpoint_location="/tmp/checkpoints",
85
- ),
86
- default_config=DefaultConfig(
87
- datasources=DefaultConfig.Config(
88
- notebook_location="notebook_ds",
89
- bronze_schema="bronze_ds",
90
- silver_schema="silver_ds",
91
- gold_schema="gold_ds",
92
- catalog_name="catalog_ds",
93
- default_max_resources_per_job=5,
94
- checkpoint_location="checkpoint_ds",
95
- compute_group_overrides={
96
- "override1": DefaultConfig.Config.ComputeGroupOverrides(
97
- max_resources_per_job=2
98
- ),
99
- "override2": DefaultConfig.Config.ComputeGroupOverrides(
100
- max_resources_per_job=3
101
- ),
102
- },
103
- ),
104
- transforms=DefaultConfig.Config(
105
- notebook_location="notebook_trans",
106
- bronze_schema="bronze_trans",
107
- silver_schema="silver_trans",
108
- gold_schema="gold_trans",
109
- catalog_name="catalog_trans",
110
- default_max_resources_per_job=6,
111
- checkpoint_location="checkpoint_trans",
112
- compute_group_overrides={
113
- "override3": DefaultConfig.Config.ComputeGroupOverrides(
114
- max_resources_per_job=4
115
- ),
116
- "override4": DefaultConfig.Config.ComputeGroupOverrides(
117
- max_resources_per_job=5
118
- ),
119
- },
120
- ),
121
- rules=DefaultConfig.Config(
122
- notebook_location="notebook_rules",
123
- bronze_schema="bronze_rules",
124
- silver_schema="silver_rules",
125
- gold_schema="gold_rules",
126
- catalog_name="catalog_rules",
127
- default_max_resources_per_job=7,
128
- checkpoint_location="checkpoint_rules",
129
- compute_group_overrides={
130
- "override5": DefaultConfig.Config.ComputeGroupOverrides(
131
- max_resources_per_job=6
132
- ),
133
- "override6": DefaultConfig.Config.ComputeGroupOverrides(
134
- max_resources_per_job=7
135
- ),
136
- },
137
- ),
138
- var_global=DefaultConfig.Config(
139
- notebook_location="notebook_var",
140
- bronze_schema="bronze_var",
141
- silver_schema="silver_var",
142
- gold_schema="gold_var",
143
- catalog_name="catalog_var",
144
- default_max_resources_per_job=8,
145
- checkpoint_location="checkpoint_var",
146
- compute_group_overrides={
147
- "override7": DefaultConfig.Config.ComputeGroupOverrides(
148
- max_resources_per_job=8
149
- ),
150
- "override8": DefaultConfig.Config.ComputeGroupOverrides(
151
- max_resources_per_job=9
152
- ),
153
- },
154
- ),
155
- ),
156
- managed_retention=[
157
- ManagedRetention(
158
- catalog="catalog_ret1",
159
- var_schema="schema_ret1",
160
- column="col_ret1",
161
- duration="1d",
162
- overrides=[
163
- ManagedRetention.Overrides(
164
- table="table1", column="colA", duration="1h"
165
- ),
166
- ManagedRetention.Overrides(
167
- table="table2", column="colB", duration="2h"
168
- ),
169
- ],
170
- ),
171
- ManagedRetention(
172
- catalog="catalog_ret2",
173
- var_schema="schema_ret2",
174
- column="col_ret2",
175
- duration="2d",
176
- overrides=[
177
- ManagedRetention.Overrides(
178
- table="table3", column="colC", duration="3h"
179
- ),
180
- ManagedRetention.Overrides(
181
- table="table4", column="colD", duration="4h"
182
- ),
183
- ],
184
- ),
185
- ],
186
- status=ResourceStatus(
187
- job_id=123,
188
- job_name="job_random",
189
- enabled=True,
190
- notebook_path="/path/to/notebook",
191
- created_at=datetime(2023, 1, 4, 15, 0),
192
- job_status="scheduled",
193
- events=[
194
- ResourceStatus.StatusEvent(
195
- action="create",
196
- message="job started",
197
- recorded_at=datetime(2023, 1, 4, 15, 0),
198
- ),
199
- ResourceStatus.StatusEvent(
200
- action="update",
201
- message="job finished",
202
- recorded_at=datetime(2023, 1, 4, 16, 0),
203
- ),
204
- ],
205
- ),
206
- )
207
-
208
- assert workspace_config == WorkspaceConfig.from_api_obj(
209
- workspace_config.to_api_obj()
210
- )
211
-
212
-
213
- def test_data_source_marshal_unmarshal():
214
- data_source = DataSource(
215
- metadata=Metadata(
216
- name="data_source_name",
217
- workspace="workspace1",
218
- comment="A sample data source",
219
- annotations={"env": "prod", "version": "v1"},
220
- created_timestamp=datetime(2023, 4, 1, 10, 0),
221
- created_by="creator_ds",
222
- modified_timestamp=datetime(2023, 4, 2, 11, 30),
223
- last_successful_run_timestamp=datetime(2023, 4, 3, 12, 45),
224
- modified_by="modifier_ds",
225
- version=2,
226
- deleted=False,
227
- resource_status="deletionPending",
228
- ui_status="ok",
229
- client_of_origin=get_client_identifier(),
230
- ),
231
- source="s3://data-bucket/source",
232
- source_type="custom",
233
- schedule=Schedule(
234
- at_least_every="1h",
235
- exactly="1h",
236
- continuous=True,
237
- compute_group="compute_group1",
238
- enabled=True,
239
- ),
240
- custom=DataSource.CustomNotebook(
241
- notebook="print('Hello from custom notebook')"
242
- ),
243
- use_preset="preset_alpha",
244
- autoloader=DataSource.Autoloader(
245
- format="json",
246
- location="s3://data-bucket/autoloader",
247
- schema_file="schema_autoloader.json",
248
- cloud_files=DataSource.Autoloader.CloudFiles(
249
- schema_hints_file="hints.txt", schema_hints="hint1, hint2"
250
- ),
251
- ),
252
- bronze=BronzeSpec(
253
- clustering=BronzeSpec.Clustering(
254
- column_names=["cluster_col1", "cluster_col2"], time_column="timestamp"
255
- ),
256
- bronze_table="bronze_table_1",
257
- skip_bronze_loading=False,
258
- ),
259
- silver=SilverSpec(
260
- bronze_tables=[
261
- SilverSpec.BronzeTable(
262
- name="silver_bronze_table_1",
263
- streaming=True,
264
- watermark=SilverSpec.BronzeTable.Watermark(
265
- event_time_column="event_time",
266
- delay_threshold="10m",
267
- drop_duplicates=["dup1", "dup2"],
268
- ),
269
- alias="silver_alias",
270
- join_type="inner",
271
- join_expr="a.id = b.id",
272
- ),
273
- ],
274
- pre_transform=SilverSpec.PreTransform(
275
- use_preset="silver_pre",
276
- skip_pre_transform=False,
277
- custom=SilverSpec.PreTransform.Custom(
278
- function="pre_transform_func",
279
- options={"option1": "value1", "option2": "value2"},
280
- ),
281
- filter="col > 0",
282
- post_filter="col < 100",
283
- preset_overrides=SilverSpec.PreTransform.PresetOverrides(
284
- omit_fields=["omit_field1", "omit_field2"]
285
- ),
286
- add_fields=[
287
- FieldSpec(
288
- name="pre_field1",
289
- comment="Pre transform field 1",
290
- var_assert=[
291
- FieldSpec.Assert(expr="x > 0", message="x must be positive")
292
- ],
293
- var_from="source_x",
294
- alias="alias_x",
295
- expr="x + 1",
296
- literal="10",
297
- join=FieldSpec.Join(
298
- with_table="join_table_1",
299
- with_csv=FieldSpec.Join.WithCSV(path="/path/to/join1.csv"),
300
- lhs="x",
301
- rhs="y",
302
- select="x, y",
303
- ),
304
- ),
305
- FieldSpec(
306
- name="pre_field2",
307
- comment="Pre transform field 2",
308
- var_assert=[
309
- FieldSpec.Assert(expr="y != 0", message="y must be nonzero")
310
- ],
311
- var_from="source_y",
312
- alias="alias_y",
313
- expr="y * 2",
314
- literal="20",
315
- join=FieldSpec.Join(
316
- with_table="join_table_2",
317
- with_csv=FieldSpec.Join.WithCSV(path="/path/to/join2.csv"),
318
- lhs="y",
319
- rhs="z",
320
- select="y, z",
321
- ),
322
- ),
323
- ],
324
- ),
325
- transform=SilverSpec.Transform(
326
- skip_silver_transform=False,
327
- preset_overrides=SilverSpec.Transform.PresetOverrides(
328
- modify_tables=[
329
- SilverSpec.Transform.PresetOverrides.ModifyTables(
330
- name="modify_table1",
331
- custom=SilverSpec.Transform.PresetOverrides.Custom(
332
- function="modify_func1",
333
- options={"mod_opt1": "val1", "mod_opt2": "val2"},
334
- ),
335
- omit_fields=["mod_omit1", "mod_omit2"],
336
- override_liquid_columns=["liq1", "liq2"],
337
- add_fields=[
338
- FieldSpec(
339
- name="mod_field1",
340
- comment="Modify field 1",
341
- var_assert=[
342
- FieldSpec.Assert(
343
- expr="a < b",
344
- message="a should be less than b",
345
- )
346
- ],
347
- var_from="mod_source1",
348
- alias="mod_alias1",
349
- expr="a - b",
350
- literal="5",
351
- join=FieldSpec.Join(
352
- with_table="mod_join_table",
353
- with_csv=FieldSpec.Join.WithCSV(
354
- path="/path/to/mod_join.csv"
355
- ),
356
- lhs="a",
357
- rhs="b",
358
- select="a, b",
359
- ),
360
- )
361
- ],
362
- filter="mod_filter > 0",
363
- post_filter="mod_post_filter < 100",
364
- utils=FieldUtils(
365
- unreferenced_columns=FieldUtils.UnreferencedColumns(
366
- preserve=False,
367
- embed_column="mod_embed",
368
- omit_columns=["mod_omit_col1", "mod_omit_col2"],
369
- duplicate_prefix="mod_dup_",
370
- ),
371
- json_extract=[
372
- FieldUtils.JsonExtract(
373
- source="mod_json_source",
374
- omit_fields=[
375
- "mod_json_omit1",
376
- "mod_json_omit2",
377
- ],
378
- duplicate_prefix="mod_json_dup_",
379
- embed_column="mod_json_embed",
380
- )
381
- ],
382
- ),
383
- )
384
- ],
385
- omit_tables=["omit_table1", "omit_table2"],
386
- add_tables=[
387
- SilverSpec.Transform.PresetOverrides.AddTables(
388
- custom=SilverSpec.Transform.PresetOverrides.Custom(
389
- function="add_func1",
390
- options={"add_opt1": "val1", "add_opt2": "val2"},
391
- ),
392
- name="add_table1",
393
- filter="add_filter_condition",
394
- post_filter="add_post_filter_condition",
395
- override_liquid_columns=["add_liq1", "add_liq2"],
396
- fields=[
397
- FieldSpec(
398
- name="add_field1",
399
- comment="Add table field 1",
400
- var_assert=[
401
- FieldSpec.Assert(
402
- expr="c == 1", message="c must equal 1"
403
- )
404
- ],
405
- var_from="add_source1",
406
- alias="add_alias1",
407
- expr="c + 10",
408
- literal="15",
409
- join=FieldSpec.Join(
410
- with_table="add_join_table",
411
- with_csv=FieldSpec.Join.WithCSV(
412
- path="/path/to/add_join.csv"
413
- ),
414
- lhs="c",
415
- rhs="d",
416
- select="c, d",
417
- ),
418
- )
419
- ],
420
- utils=FieldUtils(
421
- unreferenced_columns=FieldUtils.UnreferencedColumns(
422
- preserve=True,
423
- embed_column="add_embed",
424
- omit_columns=["add_omit1", "add_omit2"],
425
- duplicate_prefix="add_dup_",
426
- ),
427
- json_extract=[
428
- FieldUtils.JsonExtract(
429
- source="add_json_source",
430
- omit_fields=[
431
- "add_json_omit1",
432
- "add_json_omit2",
433
- ],
434
- duplicate_prefix="add_json_dup_",
435
- embed_column="add_json_embed",
436
- )
437
- ],
438
- ),
439
- )
440
- ],
441
- ),
442
- ),
443
- ),
444
- gold=GoldSpec(
445
- omit_tables=["gold_omit1", "gold_omit2"],
446
- modify_tables=[
447
- GoldSpec.ModifyTables(
448
- name="gold_modify_table1",
449
- source_table="gold_source_table1",
450
- custom=GoldSpec.ModifyTables.Custom(
451
- function="gold_modify_func",
452
- options={"gold_opt1": "val1", "gold_opt2": "val2"},
453
- ),
454
- omit_fields=["gold_field_omit1", "gold_field_omit2"],
455
- add_fields=[
456
- FieldSpec(
457
- name="gold_field1",
458
- comment="Gold modify field 1",
459
- var_assert=[
460
- FieldSpec.Assert(
461
- expr="z != 0", message="z must not be zero"
462
- )
463
- ],
464
- var_from="gold_source",
465
- alias="gold_alias",
466
- expr="z / 2",
467
- literal="3.14",
468
- join=FieldSpec.Join(
469
- with_table="gold_join_table",
470
- with_csv=FieldSpec.Join.WithCSV(
471
- path="/path/to/gold_join.csv"
472
- ),
473
- lhs="z",
474
- rhs="w",
475
- select="z, w",
476
- ),
477
- )
478
- ],
479
- filter="gold_filter_condition",
480
- post_filter="gold_post_filter_condition",
481
- )
482
- ],
483
- add_tables=[
484
- GoldSpec.AddTables(
485
- name="gold_add_table1",
486
- source_table="gold_add_source_table1",
487
- custom=GoldSpec.AddTables.Custom(
488
- function="gold_add_func",
489
- options={"gold_add_opt1": "val1", "gold_add_opt2": "val2"},
490
- ),
491
- filter="gold_add_filter_condition",
492
- post_filter="gold_add_post_filter_condition",
493
- fields=[
494
- FieldSpec(
495
- name="gold_add_field1",
496
- comment="Gold add field 1",
497
- var_assert=[
498
- FieldSpec.Assert(
499
- expr="a > 0", message="a must be positive"
500
- )
501
- ],
502
- var_from="gold_add_source",
503
- alias="gold_add_alias",
504
- expr="a + 10",
505
- literal="20",
506
- join=FieldSpec.Join(
507
- with_table="gold_add_join_table",
508
- with_csv=FieldSpec.Join.WithCSV(
509
- path="/path/to/gold_add_join.csv"
510
- ),
511
- lhs="a",
512
- rhs="b",
513
- select="a, b",
514
- ),
515
- )
516
- ],
517
- ),
518
- ],
519
- ),
520
- status=ResourceStatus(
521
- job_id=789,
522
- job_name="data_source_job",
523
- enabled=True,
524
- notebook_path="/path/to/datasource/notebook",
525
- created_at=datetime(2023, 5, 1, 8, 0),
526
- job_status="unscheduled",
527
- events=[
528
- ResourceStatus.StatusEvent(
529
- action="create",
530
- message="Data source job started",
531
- recorded_at=datetime(2023, 5, 1, 8, 0),
532
- ),
533
- ResourceStatus.StatusEvent(
534
- action="update",
535
- message="Data source job finished",
536
- recorded_at=datetime(2023, 5, 1, 8, 30),
537
- ),
538
- ],
539
- ),
540
- )
541
-
542
- assert data_source == DataSource.from_api_obj(data_source.to_api_obj())
543
-
544
-
545
- def test_rule_marshal_unmarshal():
546
- rule = Rule(
547
- metadata=Metadata(
548
- name="rule_meta_name",
549
- workspace="example_workspace",
550
- comment="This is a sample rule metadata comment.",
551
- annotations={"env": "prod", "source": "system"},
552
- created_timestamp=datetime(2023, 1, 1, 9, 0),
553
- created_by="rule_creator",
554
- modified_timestamp=datetime(2023, 1, 2, 10, 0),
555
- last_successful_run_timestamp=datetime(2023, 1, 3, 11, 0),
556
- modified_by="rule_modifier",
557
- version=1,
558
- deleted=False,
559
- resource_status="none",
560
- ui_status="ok",
561
- client_of_origin=get_client_identifier(),
562
- ),
563
- rule_metadata=Rule.RuleMetadata(
564
- version=1.0,
565
- category="Security",
566
- severity="High",
567
- fidelity="Investigative",
568
- mitre=[
569
- Rule.RuleMetadata.Mitre(
570
- taxonomy="MITRE ATT&CK",
571
- tactic="Initial Access",
572
- technique_id="T1190",
573
- technique="Exploit Public-Facing Application",
574
- sub_technique_id="T1190.001",
575
- sub_technique="Example Sub-technique",
576
- )
577
- ],
578
- objective="Detect unauthorized access attempts",
579
- response=Rule.RuleMetadata.Response(
580
- guidelines="Follow the incident response plan immediately.",
581
- playbooks=[
582
- Rule.RuleMetadata.Response.Playbook(
583
- notebook="incident_response.ipynb",
584
- options={"notify": "email", "severity": "high"},
585
- )
586
- ],
587
- ),
588
- ),
589
- schedule=Schedule(
590
- at_least_every="15m",
591
- exactly="15m",
592
- continuous=True,
593
- compute_group="rule_compute_group",
594
- enabled=True,
595
- ),
596
- input=Rule.Input(
597
- stream=Rule.Input.Stream(
598
- tables=[
599
- Rule.Input.Stream.Table(
600
- name="access_logs",
601
- watermark=Rule.Input.Stream.Table.Watermark(
602
- event_time_column="timestamp",
603
- delay_threshold="5m",
604
- drop_duplicates=["ip", "user_id"],
605
- ),
606
- alias="logs",
607
- join_type="inner",
608
- join_expr="access_logs.user_id = user_info.id",
609
- ),
610
- Rule.Input.Stream.Table(
611
- name="user_info",
612
- watermark=Rule.Input.Stream.Table.Watermark(
613
- event_time_column="event_time",
614
- delay_threshold="10m",
615
- drop_duplicates=["id"],
616
- ),
617
- alias="users",
618
- join_type="left",
619
- join_expr="user_info.id = access_logs.user_id",
620
- ),
621
- ],
622
- filter="status = 'active'",
623
- sql="SELECT * FROM streaming_source",
624
- custom=Rule.Input.CustomStream(
625
- notebook="stream_custom.ipynb",
626
- options={"filter": "recent", "limit": "1000"},
627
- ),
628
- ),
629
- batch=Rule.Input.Batch(
630
- sql="SELECT * FROM historical_source",
631
- custom=Rule.Input.CustomBatch(
632
- notebook="batch_custom.ipynb",
633
- options={"start_date": "2022-01-01", "end_date": "2022-12-31"},
634
- ),
635
- ),
636
- ),
637
- observables=[
638
- Rule.Observable(
639
- kind="ip",
640
- value="192.168.0.1",
641
- relationship="suspicious",
642
- risk=Rule.Observable.Risk(
643
- impact="High",
644
- confidence="Medium",
645
- ),
646
- ),
647
- Rule.Observable(
648
- kind="domain",
649
- value="malicious.com",
650
- relationship="malicious",
651
- risk=Rule.Observable.Risk(
652
- impact="Critical",
653
- confidence="High",
654
- ),
655
- ),
656
- ],
657
- output=Rule.Output(
658
- summary="Unauthorized access detected from multiple sources.",
659
- context={"alert": "Multiple failed logins", "severity": "high"},
660
- ),
661
- collate=Rule.Collate(
662
- collate_on=["ip", "user_id"],
663
- within="1h",
664
- action="append",
665
- ),
666
- status=ResourceStatus(
667
- job_id=101,
668
- job_name="rule_evaluation_job",
669
- enabled=True,
670
- notebook_path="/rules/evaluate_rule.ipynb",
671
- created_at=datetime(2023, 1, 5, 12, 0),
672
- job_status="scheduled",
673
- events=[
674
- ResourceStatus.StatusEvent(
675
- action="create",
676
- message="Rule evaluation started",
677
- recorded_at=datetime(2023, 1, 5, 12, 0),
678
- ),
679
- ResourceStatus.StatusEvent(
680
- action="update",
681
- message="Rule evaluation finished",
682
- recorded_at=datetime(2023, 1, 5, 12, 15),
683
- ),
684
- ],
685
- ),
686
- )
687
-
688
- assert rule == Rule.from_api_obj(rule.to_api_obj())
689
-
690
-
691
- def test_transform_request_marshal_unmarshal():
692
- request = TransformRequest(
693
- input=TransformRequest.Input(
694
- columns=[
695
- Dbui.TableColumnDetails(
696
- name="col1",
697
- type_name="int",
698
- type_detail="integer",
699
- position=1,
700
- nullable=False,
701
- ),
702
- Dbui.TableColumnDetails(
703
- name="col2",
704
- type_name="varchar",
705
- type_detail="string",
706
- position=2,
707
- nullable=True,
708
- ),
709
- ],
710
- data=[{"col1": "1", "col2": "a"}, {"col1": "2", "col2": "b"}],
711
- ),
712
- autoloader_input=TransformRequest.Autoloader(
713
- format="csv",
714
- location="s3://bucket/data",
715
- schema_file="schema.json",
716
- cloud_files=TransformRequest.Autoloader.CloudFiles(
717
- schema_hints_file="hints_file.csv", schema_hints="hint1, hint2"
718
- ),
719
- row_count=1,
720
- row_offset=5,
721
- ),
722
- use_preset="preset_value",
723
- transforms=[
724
- TransformRequest.Transform(
725
- transform_type="Gold",
726
- use_preset_table="table_name",
727
- filter="col > 0",
728
- post_filter="col < 100",
729
- preset_overrides=TransformRequest.Transform.PresetOverrides(
730
- omit_fields=["field1", "field2"]
731
- ),
732
- add_fields=[
733
- FieldSpec(
734
- name="field1",
735
- comment="comment1",
736
- var_assert=[
737
- FieldSpec.Assert(expr="1=1", message="assertion passed"),
738
- FieldSpec.Assert(expr="2=2", message="assertion passed 2"),
739
- ],
740
- var_from="source_field1",
741
- alias="alias1",
742
- expr="expr1",
743
- literal="literal1",
744
- join=FieldSpec.Join(
745
- with_table="table_join1",
746
- with_csv=FieldSpec.Join.WithCSV(path="csv_path1"),
747
- lhs="left1",
748
- rhs="right1",
749
- select="select_expr1",
750
- ),
751
- ),
752
- FieldSpec(
753
- name="field2",
754
- comment="comment2",
755
- var_assert=[
756
- FieldSpec.Assert(expr="a=b", message="assertion ok"),
757
- FieldSpec.Assert(expr="c=d", message="assertion ok 2"),
758
- ],
759
- var_from="source_field2",
760
- alias="alias2",
761
- expr="expr2",
762
- literal="literal2",
763
- join=FieldSpec.Join(
764
- with_table="table_join2",
765
- with_csv=FieldSpec.Join.WithCSV(path="csv_path2"),
766
- lhs="left2",
767
- rhs="right2",
768
- select="select_expr2",
769
- ),
770
- ),
771
- ],
772
- utils=FieldUtils(
773
- unreferenced_columns=FieldUtils.UnreferencedColumns(
774
- preserve=True,
775
- embed_column="all_data",
776
- omit_columns=["omit1", "omit2"],
777
- duplicate_prefix="dup_",
778
- ),
779
- json_extract=[
780
- FieldUtils.JsonExtract(
781
- source="json_column1",
782
- omit_fields=["omitA", "omitB"],
783
- duplicate_prefix="dup1",
784
- embed_column="embed1",
785
- ),
786
- FieldUtils.JsonExtract(
787
- source="json_column2",
788
- omit_fields=["omitC", "omitD"],
789
- duplicate_prefix="dup2",
790
- embed_column="embed2",
791
- ),
792
- ],
793
- ),
794
- ),
795
- TransformRequest.Transform(
796
- transform_type="SilverTransform",
797
- use_preset_table="table_b",
798
- filter="col >= 10",
799
- post_filter="col <= 50",
800
- preset_overrides=TransformRequest.Transform.PresetOverrides(
801
- omit_fields=["fieldX", "fieldY"]
802
- ),
803
- add_fields=[
804
- FieldSpec(
805
- name="field3",
806
- comment="comment3",
807
- var_assert=[
808
- FieldSpec.Assert(expr="assert_expr3a", message="message3a"),
809
- FieldSpec.Assert(expr="assert_expr3b", message="message3b"),
810
- ],
811
- var_from="source_field3",
812
- alias="alias3",
813
- expr="expr3",
814
- literal="literal3",
815
- join=FieldSpec.Join(
816
- with_table="table_join3",
817
- with_csv=FieldSpec.Join.WithCSV(path="csv_path3"),
818
- lhs="left3",
819
- rhs="right3",
820
- select="select_expr3",
821
- ),
822
- ),
823
- FieldSpec(
824
- name="field4",
825
- comment="comment4",
826
- var_assert=[
827
- FieldSpec.Assert(expr="assert_expr4a", message="message4a"),
828
- FieldSpec.Assert(expr="assert_expr4b", message="message4b"),
829
- ],
830
- var_from="source_field4",
831
- alias="alias4",
832
- expr="expr4",
833
- literal="literal4",
834
- join=FieldSpec.Join(
835
- with_table="table_join4",
836
- with_csv=FieldSpec.Join.WithCSV(path="csv_path4"),
837
- lhs="left4",
838
- rhs="right4",
839
- select="select_expr4",
840
- ),
841
- ),
842
- ],
843
- utils=FieldUtils(
844
- unreferenced_columns=FieldUtils.UnreferencedColumns(
845
- preserve=False,
846
- embed_column="extra_data",
847
- omit_columns=["omitX", "omitY", "omitZ"],
848
- duplicate_prefix="dupB",
849
- ),
850
- json_extract=[
851
- FieldUtils.JsonExtract(
852
- source="json_column3",
853
- omit_fields=["omitE", "omitF"],
854
- duplicate_prefix="dup3",
855
- embed_column="embed3",
856
- ),
857
- FieldUtils.JsonExtract(
858
- source="json_column4",
859
- omit_fields=["omitG", "omitH"],
860
- duplicate_prefix="dup4",
861
- embed_column="embed4",
862
- ),
863
- ],
864
- ),
865
- ),
866
- ],
867
- )
868
-
869
- assert request == TransformRequest.from_api_obj(request.to_api_obj())
870
-
871
-
872
- def test_transform_response_marshal_unmarshal():
873
- response = TransformResponse(
874
- stages=[
875
- TransformResponse.Stages(
876
- transform_type="Gold",
877
- columns=[
878
- Dbui.TableColumnDetails(
879
- name="id",
880
- type_name="int",
881
- type_detail="integer",
882
- position=1,
883
- nullable=False,
884
- ),
885
- Dbui.TableColumnDetails(
886
- name="name",
887
- type_name="varchar",
888
- type_detail="text",
889
- position=2,
890
- nullable=True,
891
- ),
892
- ],
893
- data=[{"id": "1", "name": "Alice"}, {"id": "2", "name": "Bob"}],
894
- ),
895
- TransformResponse.Stages(
896
- transform_type="SilverPreTransform",
897
- columns=[
898
- Dbui.TableColumnDetails(
899
- name="price",
900
- type_name="float",
901
- type_detail="decimal",
902
- position=3,
903
- nullable=False,
904
- ),
905
- Dbui.TableColumnDetails(
906
- name="quantity",
907
- type_name="int",
908
- type_detail="integer",
909
- position=4,
910
- nullable=True,
911
- ),
912
- ],
913
- data=[
914
- {"price": "9.99", "quantity": "5"},
915
- {"price": "19.99", "quantity": "10"},
916
- ],
917
- ),
918
- ]
919
- )
920
-
921
- assert response == TransformResponse.from_api_obj(response.to_api_obj())