acryl-datahub 0.15.0.2rc8__py3-none-any.whl → 0.15.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.2rc8.dist-info → acryl_datahub-0.15.0.3.dist-info}/METADATA +2505 -2505
- {acryl_datahub-0.15.0.2rc8.dist-info → acryl_datahub-0.15.0.3.dist-info}/RECORD +11 -11
- datahub/__init__.py +1 -1
- datahub/cli/delete_cli.py +16 -2
- datahub/ingestion/source/fivetran/config.py +4 -0
- datahub/ingestion/source/fivetran/fivetran.py +15 -5
- datahub/ingestion/source/gcs/gcs_source.py +2 -1
- datahub/ingestion/source/redshift/query.py +77 -47
- {acryl_datahub-0.15.0.2rc8.dist-info → acryl_datahub-0.15.0.3.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.2rc8.dist-info → acryl_datahub-0.15.0.3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.2rc8.dist-info → acryl_datahub-0.15.0.3.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=W_09oIthIpoet0P4t-RgCWaJ-k83wzO6HCCmtceQw44,573
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=IMtLWvGuiqoUSnNaCaFjhd86NHwuXSWXp2kUL-xDkk0,7950
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -61,7 +61,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
61
61
|
datahub/cli/check_cli.py,sha256=9dXNyzZayHeoFjwFjLkMVyx6DiCZfeESyI-sYtGA6bE,12850
|
|
62
62
|
datahub/cli/cli_utils.py,sha256=onbG7z9hIm0zCAm0a2ulTOsHC_NVkdIsbg__EMj02DQ,13540
|
|
63
63
|
datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
|
|
64
|
-
datahub/cli/delete_cli.py,sha256=
|
|
64
|
+
datahub/cli/delete_cli.py,sha256=oQ4Yy6hxZHcl67MYJiQumLs_8QmFEj7SPZFzxFXvDk8,23481
|
|
65
65
|
datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
|
|
66
66
|
datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,36493
|
|
67
67
|
datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
|
|
@@ -296,9 +296,9 @@ datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
|
296
296
|
datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
|
|
297
297
|
datahub/ingestion/source/dynamodb/dynamodb.py,sha256=wcEQSfQak45yPNZN7pCUEQFmjyWCpqRk1WjJJz9E2Go,22395
|
|
298
298
|
datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
299
|
-
datahub/ingestion/source/fivetran/config.py,sha256=
|
|
299
|
+
datahub/ingestion/source/fivetran/config.py,sha256=BP3KRfAQ6H5qyEeJNu9vNfZNwLoyj4Tl2kXiLVR5DNM,9027
|
|
300
300
|
datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
|
|
301
|
-
datahub/ingestion/source/fivetran/fivetran.py,sha256=
|
|
301
|
+
datahub/ingestion/source/fivetran/fivetran.py,sha256=CVJhW7_os5BTRlzaUX2KOK6CkAVJ0mWQtgTnE6F3fhE,13760
|
|
302
302
|
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP_CyAT5Cian2N4a-lb8x1NKHk,12776
|
|
303
303
|
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
|
|
304
304
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -307,7 +307,7 @@ datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=86Tm3NNWMf0xM4TklNIEeN
|
|
|
307
307
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=VbZ-Xzryl5TMRapu7nlxlsXS8T8lFZcHK9AJnEadJ8Q,11111
|
|
308
308
|
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=_oTXN0fzB4kYyFclah9X_1ds32bLayQyyWgoPeHQMw4,12923
|
|
309
309
|
datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
310
|
-
datahub/ingestion/source/gcs/gcs_source.py,sha256=
|
|
310
|
+
datahub/ingestion/source/gcs/gcs_source.py,sha256=5EZkrDqjRNQz_aUL1MLp0PTFm0Ztubmk0NYJGZTRLjU,6276
|
|
311
311
|
datahub/ingestion/source/gcs/gcs_utils.py,sha256=_78KM863XXgkVLmZLtYGF5PJNnZas1go-XRtOq-79lo,1047
|
|
312
312
|
datahub/ingestion/source/git/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
313
313
|
datahub/ingestion/source/git/git_import.py,sha256=5CT6vMDb0MDctCtShnxb3JVihULtvkYGr9judHJFsOk,4143
|
|
@@ -392,7 +392,7 @@ datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX
|
|
|
392
392
|
datahub/ingestion/source/redshift/lineage.py,sha256=bUy0uJowrqSc33Z50fIxFlJkyhe-OPM_qgPh-smSTgM,43983
|
|
393
393
|
datahub/ingestion/source/redshift/lineage_v2.py,sha256=OcVW_27sSaZOYZPTd2j-LS9SzFQ1kXz6cMzM2ZDWhJQ,16751
|
|
394
394
|
datahub/ingestion/source/redshift/profile.py,sha256=T4H79ycq2tPobLM1tTLRtu581Qa8LlKxEok49m0AirU,4294
|
|
395
|
-
datahub/ingestion/source/redshift/query.py,sha256=
|
|
395
|
+
datahub/ingestion/source/redshift/query.py,sha256=X0KlDPzM68j0SYKXhq50DkLbFUIbGuPmGCYYmr8E0v0,44353
|
|
396
396
|
datahub/ingestion/source/redshift/redshift.py,sha256=x9dKocJdGPaNs2fRdaddaBtZNxmTJFwYDhXY5nl_5zM,44444
|
|
397
397
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
398
398
|
datahub/ingestion/source/redshift/redshift_schema.py,sha256=9IYeUsnISenq3eVB3k-s7zK8nInWDAYViFnDrNjtkb0,19149
|
|
@@ -990,8 +990,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
990
990
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
991
991
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
992
992
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
993
|
-
acryl_datahub-0.15.0.
|
|
994
|
-
acryl_datahub-0.15.0.
|
|
995
|
-
acryl_datahub-0.15.0.
|
|
996
|
-
acryl_datahub-0.15.0.
|
|
997
|
-
acryl_datahub-0.15.0.
|
|
993
|
+
acryl_datahub-0.15.0.3.dist-info/METADATA,sha256=jiS4oA2DAbgkw-RvujSYKPpN8mEjXv5qmPywbUU7h9M,173241
|
|
994
|
+
acryl_datahub-0.15.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
995
|
+
acryl_datahub-0.15.0.3.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
|
|
996
|
+
acryl_datahub-0.15.0.3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
997
|
+
acryl_datahub-0.15.0.3.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
datahub/cli/delete_cli.py
CHANGED
|
@@ -265,6 +265,11 @@ def undo_by_filter(
|
|
|
265
265
|
type=str,
|
|
266
266
|
help="Urn of the entity to delete, for single entity deletion",
|
|
267
267
|
)
|
|
268
|
+
@click.option(
|
|
269
|
+
"--urn-file",
|
|
270
|
+
required=False,
|
|
271
|
+
help="Path of file with urns (one per line) to be deleted",
|
|
272
|
+
)
|
|
268
273
|
@click.option(
|
|
269
274
|
"-a",
|
|
270
275
|
"--aspect",
|
|
@@ -353,6 +358,7 @@ def undo_by_filter(
|
|
|
353
358
|
@telemetry.with_telemetry()
|
|
354
359
|
def by_filter(
|
|
355
360
|
urn: Optional[str],
|
|
361
|
+
urn_file: Optional[str],
|
|
356
362
|
aspect: Optional[str],
|
|
357
363
|
force: bool,
|
|
358
364
|
soft: bool,
|
|
@@ -373,6 +379,7 @@ def by_filter(
|
|
|
373
379
|
# Validate the cli arguments.
|
|
374
380
|
_validate_user_urn_and_filters(
|
|
375
381
|
urn=urn,
|
|
382
|
+
urn_file=urn_file,
|
|
376
383
|
entity_type=entity_type,
|
|
377
384
|
platform=platform,
|
|
378
385
|
env=env,
|
|
@@ -429,6 +436,12 @@ def by_filter(
|
|
|
429
436
|
batch_size=batch_size,
|
|
430
437
|
)
|
|
431
438
|
)
|
|
439
|
+
elif urn_file:
|
|
440
|
+
with open(urn_file, "r") as r:
|
|
441
|
+
urns = []
|
|
442
|
+
for line in r.readlines():
|
|
443
|
+
urn = line.strip().strip('"')
|
|
444
|
+
urns.append(urn)
|
|
432
445
|
else:
|
|
433
446
|
urns = list(
|
|
434
447
|
graph.get_urns_by_filter(
|
|
@@ -537,6 +550,7 @@ def _delete_urns_parallel(
|
|
|
537
550
|
|
|
538
551
|
def _validate_user_urn_and_filters(
|
|
539
552
|
urn: Optional[str],
|
|
553
|
+
urn_file: Optional[str],
|
|
540
554
|
entity_type: Optional[str],
|
|
541
555
|
platform: Optional[str],
|
|
542
556
|
env: Optional[str],
|
|
@@ -549,9 +563,9 @@ def _validate_user_urn_and_filters(
|
|
|
549
563
|
raise click.UsageError(
|
|
550
564
|
"You cannot provide both an urn and a filter rule (entity-type / platform / env / query)."
|
|
551
565
|
)
|
|
552
|
-
elif not urn and not (entity_type or platform or env or query):
|
|
566
|
+
elif not urn and not urn_file and not (entity_type or platform or env or query):
|
|
553
567
|
raise click.UsageError(
|
|
554
|
-
"You must provide either an urn or at least one filter (entity-type / platform / env / query) in order to delete entities."
|
|
568
|
+
"You must provide either an urn or urn_file or at least one filter (entity-type / platform / env / query) in order to delete entities."
|
|
555
569
|
)
|
|
556
570
|
elif query:
|
|
557
571
|
logger.warning(
|
|
@@ -167,6 +167,10 @@ class PlatformDetail(ConfigModel):
|
|
|
167
167
|
description="The database that all assets produced by this connector belong to. "
|
|
168
168
|
"For destinations, this defaults to the fivetran log config's database.",
|
|
169
169
|
)
|
|
170
|
+
include_schema_in_urn: bool = pydantic.Field(
|
|
171
|
+
default=True,
|
|
172
|
+
description="Include schema in the dataset URN. In some cases, the schema is not relevant to the dataset URN and Fivetran sets it to the source and destination table names in the connector.",
|
|
173
|
+
)
|
|
170
174
|
|
|
171
175
|
|
|
172
176
|
class FivetranSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
|
|
@@ -119,21 +119,31 @@ class FivetranSource(StatefulIngestionSourceBase):
|
|
|
119
119
|
)
|
|
120
120
|
|
|
121
121
|
for lineage in connector.lineage:
|
|
122
|
+
source_table = (
|
|
123
|
+
lineage.source_table
|
|
124
|
+
if source_details.include_schema_in_urn
|
|
125
|
+
else lineage.source_table.split(".", 1)[1]
|
|
126
|
+
)
|
|
122
127
|
input_dataset_urn = DatasetUrn.create_from_ids(
|
|
123
128
|
platform_id=source_details.platform,
|
|
124
129
|
table_name=(
|
|
125
|
-
f"{source_details.database.lower()}.{
|
|
130
|
+
f"{source_details.database.lower()}.{source_table}"
|
|
126
131
|
if source_details.database
|
|
127
|
-
else
|
|
132
|
+
else source_table
|
|
128
133
|
),
|
|
129
134
|
env=source_details.env,
|
|
130
135
|
platform_instance=source_details.platform_instance,
|
|
131
136
|
)
|
|
132
137
|
input_dataset_urn_list.append(input_dataset_urn)
|
|
133
138
|
|
|
139
|
+
destination_table = (
|
|
140
|
+
lineage.destination_table
|
|
141
|
+
if destination_details.include_schema_in_urn
|
|
142
|
+
else lineage.destination_table.split(".", 1)[1]
|
|
143
|
+
)
|
|
134
144
|
output_dataset_urn = DatasetUrn.create_from_ids(
|
|
135
145
|
platform_id=destination_details.platform,
|
|
136
|
-
table_name=f"{destination_details.database.lower()}.{
|
|
146
|
+
table_name=f"{destination_details.database.lower()}.{destination_table}",
|
|
137
147
|
env=destination_details.env,
|
|
138
148
|
platform_instance=destination_details.platform_instance,
|
|
139
149
|
)
|
|
@@ -176,12 +186,12 @@ class FivetranSource(StatefulIngestionSourceBase):
|
|
|
176
186
|
**{
|
|
177
187
|
f"source.{k}": str(v)
|
|
178
188
|
for k, v in source_details.dict().items()
|
|
179
|
-
if v is not None
|
|
189
|
+
if v is not None and not isinstance(v, bool)
|
|
180
190
|
},
|
|
181
191
|
**{
|
|
182
192
|
f"destination.{k}": str(v)
|
|
183
193
|
for k, v in destination_details.dict().items()
|
|
184
|
-
if v is not None
|
|
194
|
+
if v is not None and not isinstance(v, bool)
|
|
185
195
|
},
|
|
186
196
|
)
|
|
187
197
|
|
|
@@ -88,6 +88,7 @@ class GCSSource(StatefulIngestionSourceBase):
|
|
|
88
88
|
super().__init__(config, ctx)
|
|
89
89
|
self.config = config
|
|
90
90
|
self.report = GCSSourceReport()
|
|
91
|
+
self.platform: str = PLATFORM_GCS
|
|
91
92
|
self.s3_source = self.create_equivalent_s3_source(ctx)
|
|
92
93
|
|
|
93
94
|
@classmethod
|
|
@@ -135,7 +136,7 @@ class GCSSource(StatefulIngestionSourceBase):
|
|
|
135
136
|
|
|
136
137
|
def create_equivalent_s3_source(self, ctx: PipelineContext) -> S3Source:
|
|
137
138
|
config = self.create_equivalent_s3_config()
|
|
138
|
-
return self.s3_source_overrides(S3Source(config, ctx))
|
|
139
|
+
return self.s3_source_overrides(S3Source(config, PipelineContext(ctx.run_id)))
|
|
139
140
|
|
|
140
141
|
def s3_source_overrides(self, source: S3Source) -> S3Source:
|
|
141
142
|
source.source_config.platform = PLATFORM_GCS
|
|
@@ -797,61 +797,91 @@ class RedshiftServerlessQuery(RedshiftCommonQuery):
|
|
|
797
797
|
db_name: str, start_time: datetime, end_time: datetime
|
|
798
798
|
) -> str:
|
|
799
799
|
return """
|
|
800
|
-
|
|
801
|
-
distinct cluster,
|
|
802
|
-
target_schema,
|
|
803
|
-
target_table,
|
|
804
|
-
username,
|
|
805
|
-
source_schema,
|
|
806
|
-
source_table,
|
|
807
|
-
query_text AS ddl,
|
|
808
|
-
start_time AS timestamp
|
|
809
|
-
FROM
|
|
810
|
-
(
|
|
811
|
-
SELECT
|
|
812
|
-
sti.schema AS target_schema,
|
|
813
|
-
sti.table AS target_table,
|
|
814
|
-
sti.database AS cluster,
|
|
815
|
-
qi.table_id AS target_table_id,
|
|
816
|
-
qi.query_id AS query_id,
|
|
817
|
-
qi.start_time AS start_time
|
|
818
|
-
FROM
|
|
819
|
-
SYS_QUERY_DETAIL qi
|
|
820
|
-
JOIN
|
|
821
|
-
SVV_TABLE_INFO sti on sti.table_id = qi.table_id
|
|
822
|
-
WHERE
|
|
823
|
-
start_time >= '{start_time}' and
|
|
824
|
-
start_time < '{end_time}' and
|
|
825
|
-
cluster = '{db_name}' and
|
|
826
|
-
step_name = 'insert'
|
|
827
|
-
) AS target_tables
|
|
828
|
-
JOIN
|
|
829
|
-
(
|
|
800
|
+
WITH queries AS (
|
|
830
801
|
SELECT
|
|
831
|
-
sti.
|
|
832
|
-
sti.
|
|
833
|
-
|
|
834
|
-
qs.
|
|
835
|
-
|
|
836
|
-
|
|
802
|
+
sti.database as cluster,
|
|
803
|
+
sti.schema AS "schema",
|
|
804
|
+
sti.table AS "table",
|
|
805
|
+
qs.table_id AS table_id,
|
|
806
|
+
qs.query_id as query_id,
|
|
807
|
+
qs.step_name as step_name,
|
|
808
|
+
sui.user_name as username,
|
|
809
|
+
source,
|
|
810
|
+
MIN(qs.start_time) as "timestamp" -- multiple duplicate records with start_time increasing slightly by miliseconds
|
|
837
811
|
FROM
|
|
838
812
|
SYS_QUERY_DETAIL qs
|
|
839
813
|
JOIN
|
|
840
814
|
SVV_TABLE_INFO sti ON sti.table_id = qs.table_id
|
|
841
815
|
LEFT JOIN
|
|
842
|
-
SYS_QUERY_TEXT qt ON qt.query_id = qs.query_id
|
|
843
|
-
LEFT JOIN
|
|
844
816
|
SVV_USER_INFO sui ON qs.user_id = sui.user_id
|
|
845
817
|
WHERE
|
|
846
|
-
|
|
847
|
-
qs.
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
818
|
+
cluster = '{db_name}' AND
|
|
819
|
+
qs.user_id <> 1 AND -- this is user 'rdsdb'
|
|
820
|
+
qs.start_time >= '{start_time}' AND
|
|
821
|
+
qs.start_time < '{end_time}'
|
|
822
|
+
GROUP BY cluster, "schema", "table", qs.table_id, query_id, step_name, username, source -- to be sure we are not making duplicates ourselves the list of group by must match whatever we use in "group by" and "where" of subsequent queries ("cluster" is already set to single value in this query)
|
|
823
|
+
),
|
|
824
|
+
unique_query_text AS (
|
|
825
|
+
SELECT
|
|
826
|
+
query_id,
|
|
827
|
+
sequence,
|
|
828
|
+
text
|
|
829
|
+
FROM (
|
|
830
|
+
SELECT
|
|
831
|
+
query_id,
|
|
832
|
+
"sequence",
|
|
833
|
+
text,
|
|
834
|
+
ROW_NUMBER() OVER (
|
|
835
|
+
PARTITION BY query_id, sequence
|
|
836
|
+
) as rn
|
|
837
|
+
FROM SYS_QUERY_TEXT
|
|
838
|
+
)
|
|
839
|
+
WHERE rn = 1
|
|
840
|
+
),
|
|
841
|
+
scan_queries AS (
|
|
842
|
+
SELECT
|
|
843
|
+
"schema" as source_schema,
|
|
844
|
+
"table" as source_table,
|
|
845
|
+
table_id as source_table_id,
|
|
846
|
+
queries.query_id as query_id,
|
|
847
|
+
username,
|
|
848
|
+
LISTAGG(qt."text") WITHIN GROUP (ORDER BY sequence) AS query_text
|
|
849
|
+
FROM
|
|
850
|
+
"queries" LEFT JOIN
|
|
851
|
+
unique_query_text qt ON qt.query_id = queries.query_id
|
|
852
|
+
WHERE
|
|
853
|
+
source = 'Redshift(local)' AND
|
|
854
|
+
step_name = 'scan' AND
|
|
855
|
+
qt.sequence < 16 -- truncating query to not exceed Redshift limit on LISTAGG function (each sequence has at most 4k characters, limit is 64k, divided by 4k gives 16, starts count from 0)
|
|
856
|
+
GROUP BY source_schema, source_table, source_table_id, queries.query_id, username
|
|
857
|
+
),
|
|
858
|
+
insert_queries AS (
|
|
859
|
+
SELECT
|
|
860
|
+
"schema" as target_schema,
|
|
861
|
+
"table" as target_table,
|
|
862
|
+
table_id as target_table_id,
|
|
863
|
+
query_id,
|
|
864
|
+
cluster,
|
|
865
|
+
min("timestamp") as "timestamp"
|
|
866
|
+
FROM
|
|
867
|
+
queries
|
|
868
|
+
WHERE
|
|
869
|
+
step_name = 'insert'
|
|
870
|
+
GROUP BY cluster, target_schema, target_table, target_table_id, query_id
|
|
871
|
+
)
|
|
872
|
+
SELECT
|
|
873
|
+
cluster,
|
|
874
|
+
target_schema,
|
|
875
|
+
target_table,
|
|
876
|
+
username,
|
|
877
|
+
source_schema,
|
|
878
|
+
source_table,
|
|
879
|
+
query_text AS ddl,
|
|
880
|
+
"timestamp"
|
|
881
|
+
FROM scan_queries
|
|
882
|
+
JOIN insert_queries on insert_queries.query_id = scan_queries.query_id
|
|
883
|
+
WHERE source_table_id <> target_table_id
|
|
884
|
+
ORDER BY cluster, target_schema, target_table, "timestamp" ASC;
|
|
855
885
|
""".format(
|
|
856
886
|
# We need the original database name for filtering
|
|
857
887
|
db_name=db_name,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|