fal 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fal might be problematic. Click here for more details.

Files changed (93) hide show
  1. _fal_testing/utils.py +2 -2
  2. dbt/adapters/fal/__init__.py +21 -0
  3. dbt/adapters/fal/__version__.py +1 -0
  4. dbt/adapters/fal/connections.py +18 -0
  5. dbt/adapters/fal/impl.py +93 -0
  6. dbt/adapters/fal/load_db_profile.py +80 -0
  7. dbt/adapters/fal/wrappers.py +113 -0
  8. dbt/adapters/fal_experimental/__init__.py +11 -0
  9. dbt/adapters/fal_experimental/__version__.py +1 -0
  10. dbt/adapters/fal_experimental/adapter.py +149 -0
  11. dbt/adapters/fal_experimental/adapter_support.py +234 -0
  12. dbt/adapters/fal_experimental/connections.py +72 -0
  13. dbt/adapters/fal_experimental/impl.py +240 -0
  14. dbt/adapters/fal_experimental/support/athena.py +92 -0
  15. dbt/adapters/fal_experimental/support/bigquery.py +74 -0
  16. dbt/adapters/fal_experimental/support/duckdb.py +28 -0
  17. dbt/adapters/fal_experimental/support/postgres.py +88 -0
  18. dbt/adapters/fal_experimental/support/redshift.py +56 -0
  19. dbt/adapters/fal_experimental/support/snowflake.py +76 -0
  20. dbt/adapters/fal_experimental/support/trino.py +26 -0
  21. dbt/adapters/fal_experimental/telemetry/__init__.py +1 -0
  22. dbt/adapters/fal_experimental/telemetry/telemetry.py +411 -0
  23. dbt/adapters/fal_experimental/teleport.py +192 -0
  24. dbt/adapters/fal_experimental/teleport_adapter_support.py +23 -0
  25. dbt/adapters/fal_experimental/teleport_support/duckdb.py +122 -0
  26. dbt/adapters/fal_experimental/teleport_support/snowflake.py +72 -0
  27. dbt/adapters/fal_experimental/utils/__init__.py +50 -0
  28. dbt/adapters/fal_experimental/utils/environments.py +302 -0
  29. dbt/fal/adapters/python/__init__.py +3 -0
  30. dbt/fal/adapters/python/connections.py +319 -0
  31. dbt/fal/adapters/python/impl.py +291 -0
  32. dbt/fal/adapters/teleport/__init__.py +3 -0
  33. dbt/fal/adapters/teleport/impl.py +103 -0
  34. dbt/fal/adapters/teleport/info.py +73 -0
  35. dbt/include/fal/__init__.py +3 -0
  36. dbt/include/fal/dbt_project.yml +5 -0
  37. dbt/include/fal/macros/materializations/table.sql +46 -0
  38. dbt/include/fal/macros/teleport_duckdb.sql +8 -0
  39. dbt/include/fal/macros/teleport_snowflake.sql +31 -0
  40. dbt/include/fal_experimental/__init__.py +3 -0
  41. dbt/include/fal_experimental/dbt_project.yml +5 -0
  42. dbt/include/fal_experimental/macros/materializations/table.sql +36 -0
  43. fal/__init__.py +61 -11
  44. fal/dbt/__init__.py +11 -0
  45. fal/dbt/cli/__init__.py +1 -0
  46. fal/{cli → dbt/cli}/args.py +7 -2
  47. fal/{cli → dbt/cli}/cli.py +18 -3
  48. fal/{cli → dbt/cli}/dbt_runner.py +1 -1
  49. fal/{cli → dbt/cli}/fal_runner.py +6 -6
  50. fal/{cli → dbt/cli}/flow_runner.py +9 -9
  51. fal/{cli → dbt/cli}/model_generator/model_generator.py +5 -5
  52. fal/{cli → dbt/cli}/selectors.py +2 -2
  53. fal/{fal_script.py → dbt/fal_script.py} +4 -4
  54. {faldbt → fal/dbt/integration}/lib.py +2 -2
  55. {faldbt → fal/dbt/integration}/magics.py +2 -2
  56. {faldbt → fal/dbt/integration}/parse.py +7 -7
  57. {faldbt → fal/dbt/integration}/project.py +7 -7
  58. fal/dbt/integration/utils/yaml_helper.py +80 -0
  59. fal/dbt/new/project.py +43 -0
  60. fal/{node_graph.py → dbt/node_graph.py} +2 -2
  61. fal/{packages → dbt/packages}/dependency_analysis.py +32 -38
  62. fal/{packages → dbt/packages}/environments/__init__.py +3 -3
  63. fal/{packages → dbt/packages}/environments/base.py +2 -2
  64. fal/{packages → dbt/packages}/environments/conda.py +3 -3
  65. fal/{packages → dbt/packages}/environments/virtual_env.py +3 -3
  66. fal/{packages → dbt/packages}/isolated_runner.py +5 -5
  67. fal/{planner → dbt/planner}/executor.py +4 -4
  68. fal/{planner → dbt/planner}/plan.py +3 -3
  69. fal/{planner → dbt/planner}/schedule.py +5 -5
  70. fal/{planner → dbt/planner}/tasks.py +5 -5
  71. fal/{telemetry → dbt/telemetry}/telemetry.py +4 -4
  72. fal/{typing.py → dbt/typing.py} +2 -2
  73. fal/{utils.py → dbt/utils.py} +2 -2
  74. {fal-0.9.2.dist-info → fal-0.9.4.dist-info}/METADATA +98 -117
  75. fal-0.9.4.dist-info/RECORD +91 -0
  76. fal-0.9.4.dist-info/entry_points.txt +4 -0
  77. fal/cli/__init__.py +0 -1
  78. fal-0.9.2.dist-info/RECORD +0 -47
  79. fal-0.9.2.dist-info/entry_points.txt +0 -3
  80. {faldbt → dbt/adapters/fal_experimental}/utils/yaml_helper.py +0 -0
  81. /fal/{cli → dbt/cli}/model_generator/__init__.py +0 -0
  82. /fal/{cli → dbt/cli}/model_generator/module_check.py +0 -0
  83. /fal/{feature_store → dbt/feature_store}/__init__.py +0 -0
  84. /fal/{feature_store → dbt/feature_store}/feature.py +0 -0
  85. /fal/{packages → dbt/integration}/__init__.py +0 -0
  86. {faldbt → fal/dbt/integration}/logger.py +0 -0
  87. /fal/{planner → dbt/integration/utils}/__init__.py +0 -0
  88. {faldbt → fal/dbt/integration}/version.py +0 -0
  89. /fal/{telemetry → dbt/packages}/__init__.py +0 -0
  90. /fal/{packages → dbt/packages}/bridge.py +0 -0
  91. {faldbt → fal/dbt/planner}/__init__.py +0 -0
  92. {faldbt/utils → fal/dbt/telemetry}/__init__.py +0 -0
  93. {fal-0.9.2.dist-info → fal-0.9.4.dist-info}/WHEEL +0 -0
@@ -0,0 +1,74 @@
1
+ from dbt.adapters.base import BaseAdapter, BaseRelation
2
+ from dbt.adapters.base.connections import AdapterResponse
3
+ from dbt.adapters.fal_experimental.adapter_support import new_connection
4
+ import pandas as pd
5
+
6
+ # [bigquery] extras dependencies
7
+ import google.cloud.bigquery as bigquery
8
+ from google.cloud.bigquery.job import WriteDisposition
9
+
10
+ from dbt.adapters.bigquery import BigQueryAdapter, BigQueryConnectionManager
11
+
12
+ def read_relation_as_df(adapter: BaseAdapter, relation: BaseRelation) -> pd.DataFrame:
13
+ sql = f"SELECT * FROM {relation}"
14
+
15
+ assert adapter.type() == "bigquery"
16
+
17
+ with new_connection(adapter, "fal-bigquery:read_relation_as_df") as conn:
18
+
19
+ connection_manager: BaseConnectionManager = adapter.connections # type: ignore
20
+ client: bigquery.Client = connection_manager.get_thread_connection().handle # type: ignore
21
+
22
+ job = client.query(sql)
23
+ df = job.to_dataframe()
24
+
25
+ return df
26
+
27
+
28
+ def write_df_to_relation(
29
+ adapter: BigQueryAdapter,
30
+ data: pd.DataFrame,
31
+ relation: BaseRelation,
32
+ ) -> AdapterResponse:
33
+
34
+ assert adapter.type() == "bigquery"
35
+
36
+ project: str = relation.database # type: ignore
37
+ dataset: str = relation.schema # type: ignore
38
+ table: str = relation.identifier # type: ignore
39
+
40
+ with new_connection(adapter, "fal-bigquery:write_df_to_relation") as conn:
41
+ connection_manager: BigQueryConnectionManager = adapter.connections
42
+ client: bigquery.Client = conn.handle
43
+
44
+ table_ref = bigquery.TableReference(
45
+ bigquery.DatasetReference(project, dataset), table
46
+ )
47
+
48
+ job_config = bigquery.LoadJobConfig(
49
+ # Specify a (partial) schema. All columns are always written to the
50
+ # table. The schema is used to assist in data type definitions.
51
+ schema=[
52
+ # TODO: offer as a dbt.config parameter?
53
+ # bigquery.SchemaField.from_api_repr(field)
54
+ # for field in (fields_schema or [])
55
+ ],
56
+ source_format="PARQUET",
57
+ write_disposition=WriteDisposition.WRITE_TRUNCATE,
58
+ )
59
+
60
+ with connection_manager.exception_handler("START JOB"):
61
+ job = client.load_table_from_dataframe(
62
+ data, table_ref, job_config=job_config
63
+ )
64
+
65
+ timeout = connection_manager.get_job_execution_timeout_seconds(conn) or 300
66
+
67
+ with connection_manager.exception_handler("LOAD TABLE"):
68
+ adapter.poll_until_job_completes(job, timeout)
69
+
70
+ query_table = client.get_table(job.destination)
71
+ num_rows = query_table.num_rows
72
+
73
+ # TODO: better AdapterResponse
74
+ return AdapterResponse("OK", rows_affected=num_rows)
@@ -0,0 +1,28 @@
1
+ from dbt.adapters.base import BaseAdapter, BaseRelation
2
+ from dbt.adapters.base.connections import AdapterResponse
3
+ from dbt.adapters.fal_experimental.adapter_support import new_connection
4
+ import pandas as pd
5
+ from dbt.adapters.sql import SQLAdapter
6
+ import duckdb
7
+
8
+
9
+ def read_relation_as_df(adapter: BaseAdapter, relation: BaseRelation) -> pd.DataFrame:
10
+ db_path = adapter.config.credentials.path
11
+
12
+ con = duckdb.connect(database=db_path)
13
+ df = con.execute(f"SELECT * FROM {relation.schema}.{relation.identifier}").fetchdf()
14
+ return df
15
+
16
+
17
+ def write_df_to_relation(
18
+ adapter: SQLAdapter,
19
+ data: pd.DataFrame,
20
+ relation: BaseRelation,
21
+ ) -> AdapterResponse:
22
+
23
+ db_path = adapter.config.credentials.path
24
+ con = duckdb.connect(database=db_path)
25
+ rows_affected = con.execute(
26
+ f"CREATE OR REPLACE TABLE {relation.schema}.{relation.identifier} AS SELECT * FROM data;"
27
+ ).fetchall()[0][0]
28
+ return AdapterResponse("OK", rows_affected=rows_affected)
@@ -0,0 +1,88 @@
1
+ import csv
2
+ from io import StringIO
3
+
4
+ import pandas as pd
5
+ import sqlalchemy
6
+
7
+ from dbt.adapters.base import BaseRelation
8
+ from dbt.adapters.base.connections import AdapterResponse
9
+ from dbt.adapters.fal_experimental.adapter_support import drop_relation_if_it_exists, new_connection
10
+ from dbt.adapters.postgres import PostgresAdapter
11
+
12
+
13
+ def read_relation_as_df(
14
+ adapter: PostgresAdapter, relation: BaseRelation
15
+ ) -> pd.DataFrame:
16
+ assert adapter.type() == "postgres"
17
+
18
+ with new_connection(adapter, "fal-postgres:read_relation_as_df") as connection:
19
+ # If the given adapter supports the DBAPI (PEP 249), we can
20
+ # use its connection directly for the engine.
21
+ alchemy_engine = sqlalchemy.create_engine(
22
+ "postgresql+psycopg2://",
23
+ creator=lambda *args, **kwargs: connection.handle,
24
+ )
25
+
26
+ return pd.read_sql_table(
27
+ con=alchemy_engine,
28
+ table_name=relation.identifier,
29
+ schema=relation.schema,
30
+ )
31
+
32
+
33
+ def write_df_to_relation(
34
+ adapter: PostgresAdapter,
35
+ data: pd.DataFrame,
36
+ relation: BaseRelation,
37
+ *,
38
+ if_exists: str = "replace",
39
+ ) -> AdapterResponse:
40
+ assert adapter.type() == "postgres"
41
+
42
+ with new_connection(adapter, "fal-postgres:write_df_to_relation") as connection:
43
+ # TODO: this should probably live in the materialization macro.
44
+ temp_relation = relation.replace_path(
45
+ identifier=f"__dbt_fal_temp_{relation.identifier}"
46
+ )
47
+ drop_relation_if_it_exists(adapter, temp_relation)
48
+
49
+ alchemy_engine = sqlalchemy.create_engine(
50
+ "postgresql+psycopg2://",
51
+ creator=lambda *args, **kwargs: connection.handle,
52
+ )
53
+
54
+ # TODO: probably worth handling errors here an returning
55
+ # a proper adapter response.
56
+ rows_affected = data.to_sql(
57
+ con=alchemy_engine,
58
+ name=temp_relation.identifier,
59
+ schema=temp_relation.schema,
60
+ if_exists=if_exists,
61
+ index=False,
62
+ method=_psql_insert_copy,
63
+ )
64
+ adapter.cache.add(temp_relation)
65
+ drop_relation_if_it_exists(adapter, relation)
66
+ adapter.rename_relation(temp_relation, relation)
67
+ adapter.commit_if_has_connection()
68
+
69
+ return AdapterResponse("OK", rows_affected=rows_affected)
70
+
71
+
72
+ def _psql_insert_copy(table, conn, keys, data_iter):
73
+ """Alternative to_sql method for PostgreSQL.
74
+
75
+ Adapted from https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#io-sql-method
76
+ """
77
+ dbapi_conn = conn.connection
78
+ with dbapi_conn.cursor() as cur:
79
+ s_buf = StringIO()
80
+ writer = csv.writer(s_buf)
81
+ writer.writerows(data_iter)
82
+ s_buf.seek(0)
83
+
84
+ columns = ", ".join((f'"{k}"' for k in keys))
85
+ table_name = f"{table.schema}.{table.name}" if table.schema else table.name
86
+
87
+ sql = f"COPY {table_name} ({columns}) FROM STDIN WITH CSV"
88
+ cur.copy_expert(sql=sql, file=s_buf)
@@ -0,0 +1,56 @@
1
+ import csv
2
+ from io import StringIO
3
+
4
+ import pandas as pd
5
+ import sqlalchemy
6
+ import awswrangler as wr
7
+
8
+ from dbt.adapters.base import BaseRelation
9
+ from dbt.adapters.base.connections import AdapterResponse
10
+
11
+ from dbt.adapters.fal_experimental.adapter_support import new_connection, drop_relation_if_it_exists
12
+
13
+ from dbt.adapters.redshift import RedshiftAdapter
14
+
15
+
16
+ def read_relation_as_df(
17
+ adapter: RedshiftAdapter, relation: BaseRelation
18
+ ) -> pd.DataFrame:
19
+ sql = f"SELECT * FROM {relation}"
20
+
21
+ assert adapter.type() == "redshift"
22
+
23
+ with new_connection(adapter, "fal-redshift:read_relation_as_df") as conn:
24
+ df = wr.redshift.read_sql_query(sql, con=conn.handle)
25
+ return df
26
+
27
+
28
+ def write_df_to_relation(
29
+ adapter: RedshiftAdapter,
30
+ data: pd.DataFrame,
31
+ relation: BaseRelation,
32
+ ) -> AdapterResponse:
33
+
34
+ assert adapter.type() == "redshift"
35
+
36
+ with new_connection(adapter, "fal-redshift:write_df_to_relation") as connection:
37
+ # TODO: this should probably live in the materialization macro.
38
+ temp_relation = relation.replace_path(
39
+ identifier=f"__dbt_fal_temp_{relation.identifier}"
40
+ )
41
+ drop_relation_if_it_exists(adapter, temp_relation)
42
+
43
+ wr.redshift.to_sql(
44
+ data,
45
+ connection.handle,
46
+ table=temp_relation.identifier,
47
+ schema=temp_relation.schema,
48
+ index=False,
49
+ )
50
+
51
+ adapter.cache.add(temp_relation)
52
+ drop_relation_if_it_exists(adapter, relation)
53
+ adapter.rename_relation(temp_relation, relation)
54
+ adapter.commit_if_has_connection()
55
+
56
+ return AdapterResponse("OK")
@@ -0,0 +1,76 @@
1
+ import pandas as pd
2
+
3
+ from dbt.adapters.base import BaseRelation
4
+ from dbt.adapters.base.connections import AdapterResponse
5
+
6
+ from dbt.adapters.fal_experimental.adapter_support import new_connection
7
+
8
+ # [snowflake] extras dependencies
9
+ import snowflake.connector as snowflake
10
+
11
+ from dbt.adapters.snowflake import SnowflakeAdapter, SnowflakeConnectionManager
12
+
13
+
14
+ def read_relation_as_df(
15
+ adapter: SnowflakeAdapter, relation: BaseRelation
16
+ ) -> pd.DataFrame:
17
+ sql = f"SELECT * FROM {relation}"
18
+
19
+ assert adapter.type() == "snowflake"
20
+
21
+ with new_connection(adapter, "fal-snowflake:read_relation_as_df") as conn:
22
+ handle: snowflake.SnowflakeConnection = conn.handle
23
+ cur = handle.cursor()
24
+
25
+ cur.execute(sql)
26
+ df: pd.DataFrame = cur.fetch_pandas_all()
27
+
28
+ # HACK: manually parse ARRAY and VARIANT since they are returned as strings right now
29
+ # Related issue: https://github.com/snowflakedb/snowflake-connector-python/issues/544
30
+ for desc in cur.description:
31
+ # 5=VARIANT, 10=ARRAY -- https://docs.snowflake.com/en/user-guide/python-connector-api.html#type-codes
32
+ if desc.type_code in [5, 10]:
33
+ import json
34
+
35
+ df[desc.name] = df[desc.name].map(lambda v: json.loads(v))
36
+
37
+ return df
38
+
39
+
40
+ def write_df_to_relation(
41
+ adapter: SnowflakeAdapter,
42
+ data: pd.DataFrame,
43
+ relation: BaseRelation,
44
+ ) -> AdapterResponse:
45
+ import snowflake.connector.pandas_tools as snowflake_pandas
46
+
47
+ assert adapter.type() == "snowflake"
48
+
49
+ database: str = relation.database # type: ignore
50
+ schema: str = relation.schema # type: ignore
51
+ table: str = relation.identifier # type: ignore
52
+
53
+ with new_connection(adapter, "fal-snowflake:write_df_to_relation") as conn:
54
+ connection_manager: SnowflakeConnectionManager = adapter.connections # type: ignore
55
+ handle: snowflake.SnowflakeConnection = conn.handle
56
+
57
+ with connection_manager.exception_handler("LOAD TABLE"):
58
+ success, _, num_rows, output = snowflake_pandas.write_pandas(
59
+ handle,
60
+ data,
61
+ table_name=table,
62
+ database=database,
63
+ schema=schema,
64
+ overwrite=True, # TODO: This helps when table schema changes, but it is not atomic
65
+ auto_create_table=True,
66
+ quote_identifiers=False,
67
+ )
68
+ if not success:
69
+ # In case the failure does not raise by itself
70
+ # I have not been able to reproduce such a case
71
+ from dbt.exceptions import DbtDatabaseError
72
+
73
+ raise DbtDatabaseError(output)
74
+
75
+ # TODO: better AdapterResponse
76
+ return AdapterResponse(str(output[0][1]), rows_affected=num_rows)
@@ -0,0 +1,26 @@
1
+ from typing import Any, Dict
2
+ from dbt.adapters.base import BaseAdapter, Credentials
3
+ from trino.sqlalchemy import URL
4
+ from dbt.adapters.trino.connections import TrinoCredentials
5
+ import sqlalchemy
6
+
7
+ def create_engine(adapter: BaseAdapter) -> Any:
8
+ creds = adapter.config.credentials
9
+
10
+ connect_args = _build_connect_args(creds)
11
+
12
+ url = URL(
13
+ host=creds.host,
14
+ port=creds.port,
15
+ catalog=creds.database,
16
+ user=creds.user
17
+ )
18
+ return sqlalchemy.create_engine(url, connect_args=connect_args)
19
+
20
+ def _build_connect_args(credentials: TrinoCredentials) -> Dict[str, Any]:
21
+ # See:
22
+ # https://github.com/starburstdata/dbt-trino/blob/master/dbt/adapters/trino/connections.py
23
+ return {
24
+ "auth": credentials.trino_auth(),
25
+ "http_scheme": credentials.http_scheme.value
26
+ }
@@ -0,0 +1 @@
1
+ from .telemetry import *