sagemaker-studio-dataengineering-sessions 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. sagemaker_studio_dataengineering_sessions-1.0.2/.gitignore +18 -0
  2. sagemaker_studio_dataengineering_sessions-1.0.2/PKG-INFO +114 -0
  3. sagemaker_studio_dataengineering_sessions-1.0.2/README.md +98 -0
  4. sagemaker_studio_dataengineering_sessions-1.0.2/pyproject.toml +136 -0
  5. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/__init__.py +0 -0
  6. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/py.typed +0 -0
  7. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/__init__.py +7 -0
  8. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/base_session_manager.py +162 -0
  9. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/boto3_models/datazone/2018-05-10/service-2.json +31733 -0
  10. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/boto3_models/glue/2017-03-31/service-2.json +18867 -0
  11. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/__init__.py +0 -0
  12. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/constants.py +137 -0
  13. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/datazone_gateway.py +95 -0
  14. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/debugging_utils.py +24 -0
  15. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/exceptions.py +53 -0
  16. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/glue_gateway.py +52 -0
  17. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/ipython_display.py +31 -0
  18. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/logger_utils.py +28 -0
  19. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/metadata_utils.py +13 -0
  20. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/sagemaker_connection_display.py +44 -0
  21. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_base_session_manager/common/sagemaker_toolkit_utils.py +150 -0
  22. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/__init__.py +12 -0
  23. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/dataframe_wrapper.py +30 -0
  24. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/sagemaker_connection_magic.py +1208 -0
  25. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/sagemaker_display_magic/__init__.py +0 -0
  26. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/sagemaker_display_magic/display_compute.py +476 -0
  27. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/sagemaker_display_magic/display_function.py +92 -0
  28. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/sagemaker_display_magic/display_render.py +912 -0
  29. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/sagemaker_display_magic/run_statement.py +45 -0
  30. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/sagemaker_display_magic/style_constants.py +371 -0
  31. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/sagemaker_display_magic/utils.py +15 -0
  32. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/utils/aws_profile_helper.py +77 -0
  33. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/utils/cell_transformer.py +155 -0
  34. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/utils/constants.py +39 -0
  35. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_connection_magic/utils/credential_process_script.py +35 -0
  36. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/__init__.py +7 -0
  37. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/athena/__init__.py +0 -0
  38. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/athena/athena_config.py +6 -0
  39. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/athena/athena_connection.py +6 -0
  40. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/athena/athena_session.py +30 -0
  41. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/athena/connection_transformer.py +27 -0
  42. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/db_connection_pool.py +12 -0
  43. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/redshift/__init__.py +0 -0
  44. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/redshift/connection_transformer.py +86 -0
  45. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/redshift/redshift_config.py +5 -0
  46. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/redshift/redshift_connection.py +16 -0
  47. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/redshift/redshift_session.py +72 -0
  48. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/sagemaker_database_session_manager.py +133 -0
  49. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_database_session_manager/utils/common_utils.py +13 -0
  50. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/__init__.py +7 -0
  51. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/__init__.py +1 -0
  52. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_ec2/__init__.py +0 -0
  53. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_ec2/connection_transformer.py +67 -0
  54. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_ec2/custom_authenticator.py +40 -0
  55. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_ec2/emr_on_ec2_connection.py +19 -0
  56. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_ec2/emr_on_ec2_gateway.py +25 -0
  57. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_ec2/emr_on_ec2_session.py +190 -0
  58. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_ec2/governance_type.py +7 -0
  59. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_serverless/__init__.py +0 -0
  60. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_serverless/connection_tranformer.py +37 -0
  61. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_serverless/custom_authenticator.py +143 -0
  62. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_serverless/emr_on_serverless_connection.py +8 -0
  63. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_serverless/emr_on_serverless_session.py +158 -0
  64. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/emr_on_serverless/emr_serverless_gateway.py +36 -0
  65. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/livy_session.py +298 -0
  66. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/emr_session_manager/spark_magic.py +11 -0
  67. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/glue_session_manager/__init__.py +1 -0
  68. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/glue_session_manager/connection_transformer.py +114 -0
  69. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/glue_session_manager/glue_connection.py +19 -0
  70. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/glue_session_manager/glue_kernel_utils/GlueSessionsConstants.py +56 -0
  71. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/glue_session_manager/glue_kernel_utils/KernelGateway.py +342 -0
  72. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/glue_session_manager/glue_session.py +773 -0
  73. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/glue_session_manager/glue_session_configs/config.py +21 -0
  74. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/spark_commands/send_to_spark_command.py +76 -0
  75. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/spark_session_manager/__init__.py +0 -0
  76. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/spark_session_manager/spark_monitor_widget_utils.py +65 -0
  77. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/spark_session_manager/spark_session.py +124 -0
  78. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/utils/common_utils.py +18 -0
  79. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/utils/lib_utils.py +100 -0
  80. sagemaker_studio_dataengineering_sessions-1.0.2/sagemaker_studio_dataengineering_sessions/sagemaker_spark_session_manager/utils/metadata_utils.py +19 -0
@@ -0,0 +1,18 @@
1
+ # Peru artifact directories
2
+ /build
3
+ /private
4
+ /.hatch
5
+ /external-distribution
6
+
7
+ # Python
8
+ *.egg-info/
9
+ __pycache__/
10
+ *__pycache__/
11
+ .coverage
12
+ .ruff_cache
13
+ .pytest_cache
14
+ .mypy_cache
15
+ .venv
16
+ /htmlcov
17
+
18
+ .idea/
@@ -0,0 +1,114 @@
1
+ Metadata-Version: 2.4
2
+ Name: sagemaker-studio-dataengineering-sessions
3
+ Version: 1.0.2
4
+ Requires-Python: >=3.11
5
+ Requires-Dist: amazon-sagemaker-sql-execution<1,>=0.0.7
6
+ Requires-Dist: boto3
7
+ Requires-Dist: botocore
8
+ Requires-Dist: ipython
9
+ Requires-Dist: pandas
10
+ Requires-Dist: panel
11
+ Requires-Dist: pytest
12
+ Requires-Dist: pytest-cov
13
+ Requires-Dist: sparkmagic
14
+ Requires-Dist: sqlparse>=0.4.0
15
+ Description-Content-Type: text/markdown
16
+
17
+ # SageMakerStudioDataEngineeringSessions
18
+
19
+ SageMaker Unified Studio Data Engineering Sessions
20
+
21
+ This pacakge depends on SageMaker Unified Studio environment, if you are using SageMaker Unified Studio, see [AWS Doc](https://docs.aws.amazon.com/sagemaker-unified-studio/latest/userguide/what-is-sagemaker-unified-studio.html) for guidance.
22
+
23
+ This package contains functionality to support SageMaker Unified Studio connecting to various AWS Compute including EMR/EMR Serverless/Glue/Redshift etc.
24
+
25
+ It is utilizing [ipython magics](https://ipython.readthedocs.io/en/stable/interactive/magics.html) and [AWS DataZone Connections](https://docs.aws.amazon.com/datazone/latest/APIReference/API_ListConnections.html) to achieve the following features.
26
+
27
+ ## Features
28
+
29
+ - Connect to remote compute
30
+ - Execute Spark code in remote compute in Python/Scala
31
+ - Execute SQL queries in remote compute
32
+ - Send local variables to remote compute
33
+
34
+
35
+ ## How to setup
36
+
37
+ If you are using SageMaker Unifed Studio, you can skip this part, SageMaker Unifed Studio already set up the package.
38
+
39
+ This package contains various Jupyter Magics to achieve its functionality.
40
+
41
+ To load these magics, make sure you have iPython config file generated. If not, you could run `ipython profile create`, then a file with path `~/.ipython/profile_default/ipython_config.py` should be generated
42
+
43
+ Then you will need to add the following line in the end of that config file
44
+
45
+ ```
46
+ c.InteractiveShellApp.extensions.extend(['sagemaker_studio_dataengineering_sessions.sagemaker_connection_magic'])
47
+ ```
48
+
49
+ Once that is finished, you could restart the ipython kernel and run `%help` to see a list of supported magics
50
+
51
+ ## Examples
52
+
53
+
54
+ To connect to remote compute, a DataZone Connection is required, you could create it via [CreateConnection API](https://docs.aws.amazon.com/datazone/latest/APIReference/API_CreateConnection.html), Let's say there's an existing connection called project.spark.
55
+
56
+ ### Supported Connection Type:
57
+
58
+ - IAM
59
+ - SPARK
60
+ - REDSHIFT
61
+ - ATHENA
62
+
63
+ ### Connect to remote compute and Execute Spark Code in Python
64
+ The following example will connect to AWS Glue Interactive session and run the spark code in Glue.
65
+
66
+ ```
67
+ %%pyspark project.spark
68
+
69
+ import sys
70
+ import boto3
71
+ from awsglue.utils import getResolvedOptions
72
+ from pyspark.context import SparkContext
73
+ from pyspark.sql import SparkSession
74
+ from pyspark.sql.functions import col
75
+
76
+ args = getResolvedOptions(sys.argv, ["redshift_url", "redshift_iam_role", "redshift_tempdir","redshift_jdbc_iam_url"])
77
+ print(f"{args}")
78
+
79
+ sc = SparkContext.getOrCreate()
80
+ spark = SparkSession(sc)
81
+
82
+ df = spark.read.csv(f"s3://sagemaker-example-files-prod-{boto3.session.Session().region_name}/datasets/tabular/dirty-titanic/", header=True)
83
+ df.show(5, truncate=False)
84
+ df.printSchema()
85
+
86
+ df.createOrReplaceTempView("df_sql_tempview")
87
+ ```
88
+
89
+ ### Execute Spark Code in Scala
90
+ The following example will connect to AWS Glue Interactive session and run the spark code in Scala.
91
+
92
+ ```
93
+ %%scalaspark project.spark
94
+ val dfScala = spark.sql("SELECT count(0) FROM df_sql_tempview")
95
+ dfScala.show()
96
+ ```
97
+
98
+ ### Execute SQL query in remote compute
99
+ The following example will connect to AWS Glue Interactive session and run the spark code in Scala.
100
+
101
+ ```
102
+ %%sql project.redshift
103
+ select current_user()
104
+ ```
105
+
106
+ ### Some other helpful magics
107
+
108
+ ```
109
+ %help - list available magics and related information
110
+
111
+ %send_to_remote - send local variable to remote compute
112
+
113
+ %%configure - configure spark application config in remote compute
114
+ ```
@@ -0,0 +1,98 @@
1
+ # SageMakerStudioDataEngineeringSessions
2
+
3
+ SageMaker Unified Studio Data Engineering Sessions
4
+
5
+ This pacakge depends on SageMaker Unified Studio environment, if you are using SageMaker Unified Studio, see [AWS Doc](https://docs.aws.amazon.com/sagemaker-unified-studio/latest/userguide/what-is-sagemaker-unified-studio.html) for guidance.
6
+
7
+ This package contains functionality to support SageMaker Unified Studio connecting to various AWS Compute including EMR/EMR Serverless/Glue/Redshift etc.
8
+
9
+ It is utilizing [ipython magics](https://ipython.readthedocs.io/en/stable/interactive/magics.html) and [AWS DataZone Connections](https://docs.aws.amazon.com/datazone/latest/APIReference/API_ListConnections.html) to achieve the following features.
10
+
11
+ ## Features
12
+
13
+ - Connect to remote compute
14
+ - Execute Spark code in remote compute in Python/Scala
15
+ - Execute SQL queries in remote compute
16
+ - Send local variables to remote compute
17
+
18
+
19
+ ## How to setup
20
+
21
+ If you are using SageMaker Unifed Studio, you can skip this part, SageMaker Unifed Studio already set up the package.
22
+
23
+ This package contains various Jupyter Magics to achieve its functionality.
24
+
25
+ To load these magics, make sure you have iPython config file generated. If not, you could run `ipython profile create`, then a file with path `~/.ipython/profile_default/ipython_config.py` should be generated
26
+
27
+ Then you will need to add the following line in the end of that config file
28
+
29
+ ```
30
+ c.InteractiveShellApp.extensions.extend(['sagemaker_studio_dataengineering_sessions.sagemaker_connection_magic'])
31
+ ```
32
+
33
+ Once that is finished, you could restart the ipython kernel and run `%help` to see a list of supported magics
34
+
35
+ ## Examples
36
+
37
+
38
+ To connect to remote compute, a DataZone Connection is required, you could create it via [CreateConnection API](https://docs.aws.amazon.com/datazone/latest/APIReference/API_CreateConnection.html), Let's say there's an existing connection called project.spark.
39
+
40
+ ### Supported Connection Type:
41
+
42
+ - IAM
43
+ - SPARK
44
+ - REDSHIFT
45
+ - ATHENA
46
+
47
+ ### Connect to remote compute and Execute Spark Code in Python
48
+ The following example will connect to AWS Glue Interactive session and run the spark code in Glue.
49
+
50
+ ```
51
+ %%pyspark project.spark
52
+
53
+ import sys
54
+ import boto3
55
+ from awsglue.utils import getResolvedOptions
56
+ from pyspark.context import SparkContext
57
+ from pyspark.sql import SparkSession
58
+ from pyspark.sql.functions import col
59
+
60
+ args = getResolvedOptions(sys.argv, ["redshift_url", "redshift_iam_role", "redshift_tempdir","redshift_jdbc_iam_url"])
61
+ print(f"{args}")
62
+
63
+ sc = SparkContext.getOrCreate()
64
+ spark = SparkSession(sc)
65
+
66
+ df = spark.read.csv(f"s3://sagemaker-example-files-prod-{boto3.session.Session().region_name}/datasets/tabular/dirty-titanic/", header=True)
67
+ df.show(5, truncate=False)
68
+ df.printSchema()
69
+
70
+ df.createOrReplaceTempView("df_sql_tempview")
71
+ ```
72
+
73
+ ### Execute Spark Code in Scala
74
+ The following example will connect to AWS Glue Interactive session and run the spark code in Scala.
75
+
76
+ ```
77
+ %%scalaspark project.spark
78
+ val dfScala = spark.sql("SELECT count(0) FROM df_sql_tempview")
79
+ dfScala.show()
80
+ ```
81
+
82
+ ### Execute SQL query in remote compute
83
+ The following example will connect to AWS Glue Interactive session and run the spark code in Scala.
84
+
85
+ ```
86
+ %%sql project.redshift
87
+ select current_user()
88
+ ```
89
+
90
+ ### Some other helpful magics
91
+
92
+ ```
93
+ %help - list available magics and related information
94
+
95
+ %send_to_remote - send local variable to remote compute
96
+
97
+ %%configure - configure spark application config in remote compute
98
+ ```
@@ -0,0 +1,136 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "sagemaker-studio-dataengineering-sessions"
7
+ version = "1.0.2"
8
+ readme = "README.md"
9
+ dependencies = [
10
+ # Put your dependencies here!
11
+ "boto3",
12
+ "pytest",
13
+ "ipython",
14
+ # database session manager
15
+ "amazon_sagemaker_sql_execution>=0.0.7,<1",
16
+ "pandas",
17
+ "sqlparse>=0.4.0",
18
+ # spark session manager
19
+ "sparkmagic",
20
+ "botocore",
21
+ # connection magic
22
+ "panel",
23
+ "pytest",
24
+ "pytest-cov"
25
+ ]
26
+ requires-python = ">=3.11"
27
+
28
+ [tool.hatch.envs.default]
29
+ # This controls what version of Python you want to be the default
30
+ # when running any scripts or tools to do things like debug test failures
31
+ # or do general development. It's lockfile is ./requirements.txt
32
+ python = "3.11"
33
+ dependencies = [
34
+ "pytest",
35
+ # "mypy",
36
+ ]
37
+
38
+ [tool.pytest.ini_options]
39
+ addopts = [
40
+ "--durations=5",
41
+ "--color=yes",
42
+ ]
43
+ testpaths = [ "tests" ]
44
+
45
+ [tool.coverage.run]
46
+ source_pkgs = ["sagemaker_studio_dataengineering_sessions"]
47
+ branch = true
48
+ parallel = true
49
+
50
+ [tool.coverage.paths]
51
+ "sagemaker_studio_dataengineering_sessions" = ["src/sagemaker_studio_dataengineering_sessions", "**/site-packages/sagemaker_studio_dataengineering_sessions"]
52
+
53
+ [tool.coverage.report]
54
+ fail_under = 60
55
+ exclude_lines = [
56
+ "no cov",
57
+ "if __name__ == .__main__.:",
58
+ "if TYPE_CHECKING:",
59
+ ]
60
+ show_missing = true
61
+
62
+ [tool.coverage.xml]
63
+ output = "private/brazil-documentation/coverage/coverage.xml"
64
+
65
+ [tool.coverage.html]
66
+ directory = "private/brazil-documentation/coverage/"
67
+
68
+ [tool.ruff.lint]
69
+ isort.known-first-party = ["sagemaker_studio_dataengineering_sessions"]
70
+ exclude = [ "./build", ".hatch", "private" ]
71
+
72
+ [tool.ruff.lint.per-file-ignores]
73
+ # Tests can use magic values, assertions, and relative imports
74
+ "tests/**/*" = ["PLR2004", "S101", "TID252"]
75
+
76
+ [tool.hatch.build]
77
+ directory = "./external-distribution"
78
+ packages = ["src/sagemaker_studio_dataengineering_sessions"]
79
+
80
+ [tool.hatch.build.targets.sdist]
81
+ ignore-vcs = true
82
+
83
+ [tool.hatch.build.targets.wheel]
84
+ ignore-vcs = true
85
+
86
+ [tool.hatch.build.hooks.custom]
87
+ path = "scripts/build_hooks.py"
88
+
89
+ [tool.hatch.env]
90
+ requires = [ "hatch-pip-compile" ]
91
+
92
+ [tool.hatch.envs.default.scripts]
93
+ # These are scripts you can run using `brazil-build run <script-name>`
94
+ #typing = [
95
+ # "mkdir -p .mypy_cache",
96
+ # "mypy --install-types --non-interactive src/sagemaker_studio_dataengineering_sessions tests"
97
+ #]
98
+
99
+ # This command is for updating all your lock files across all environments
100
+ update = [ "hatch-pip-compile --upgrade --all" ]
101
+
102
+ release = [
103
+ # "typing",
104
+ # "hatch test --all --cover",
105
+ "pytest tests --cov"
106
+ ]
107
+
108
+ [[tool.hatch.envs.hatch-test.matrix]]
109
+ # This defines multiple variables you can generate combinations
110
+ # to test underneath different environments. A separate environment and
111
+ # lock file will be created for every combination located in `./requirements/`
112
+ python = ["3.11", "3.12"]
113
+
114
+ ## This environment is used solely to generate a lock file on hatch,
115
+ # and hatch-pip-compile that can be automatically updated
116
+ [tool.hatch.envs.build-tools]
117
+ # This version states what version your build tools build with. To change it,
118
+ # you will need to:
119
+ # * Remove the `requirements/requirements-build-tools.txt` file
120
+ # * Run `brazil-build run update` to generate a new lock file for the environment
121
+ python = "3.11"
122
+ detached = true
123
+ skip-install = true
124
+ dependencies = [
125
+ "hatch",
126
+ "hatch-pip-compile",
127
+ ]
128
+
129
+ # PeruHatch repository and package locking plugin
130
+ [tool.hatch.env.collectors.custom]
131
+ path = ".hatch/hatch_plugin.py"
132
+
133
+ # This is necessary to use 'uv' as the resolver if this is the top-level package
134
+ # in a monorepo (which is usually the case). Remove this if copying the
135
+ # package into a monorepo
136
+ [tool.uv.workspace]
@@ -0,0 +1,7 @@
1
+ import logging
2
+
3
+ from sagemaker_studio_dataengineering_sessions.sagemaker_base_session_manager.common.logger_utils import setup_logger
4
+
5
+ logging.basicConfig(level=logging.INFO)
6
+ logger = logging.getLogger(__name__)
7
+ setup_logger(logger, "SageMakerBaseSessionManager", "connection_magic")
@@ -0,0 +1,162 @@
1
+ import abc
2
+ import logging
3
+
4
+ from sagemaker_studio_dataengineering_sessions.sagemaker_base_session_manager.common.constants import Language
5
+ from sagemaker_studio_dataengineering_sessions.sagemaker_base_session_manager.common.sagemaker_connection_display import SageMakerConnectionDisplay
6
+
7
+
8
+ class BaseSessionManager(metaclass=abc.ABCMeta):
9
+
10
+ def __init__(self) -> None:
11
+ # The limit for the number of rows that can be returned in a SQL query. Defaults to 10000.
12
+ self.sql_result_row_limit = 10_000
13
+
14
+ @abc.abstractmethod
15
+ def create_session(self):
16
+ """
17
+ Create a new session
18
+ """
19
+ raise NotImplementedError('Must define create_session to use this BaseSessionManager')
20
+
21
+ @abc.abstractmethod
22
+ def run_statement(self, cell="", language: Language = None):
23
+ """
24
+ Run a statement against the session
25
+ :param cell: the code to run
26
+ """
27
+ raise NotImplementedError('Must define run_statement to use this BaseSessionManager')
28
+
29
+ @abc.abstractmethod
30
+ def stop_session(self):
31
+ """
32
+ Stop the session
33
+ """
34
+ raise NotImplementedError('Must define stop_session to use this BaseSessionManager')
35
+
36
+ @abc.abstractmethod
37
+ def is_session_connectable(self) -> bool:
38
+ """
39
+ Check if the session is connectable and in a valid status for running statement
40
+ :return: true if the session is in a valid status, false otherwise
41
+ """
42
+ raise NotImplementedError('Must define is_session_connectable to use this BaseSessionManager')
43
+
44
+ @abc.abstractmethod
45
+ def _configure_core(self, cell: str):
46
+ """
47
+ To configure current compute to be connected and start a new session with applied configuration
48
+ :param cell: the content to be applied for the session
49
+ """
50
+
51
+ def configure(self, cell: str, force: bool = False):
52
+ """
53
+ To configure current compute to be connected and start a new session with applied configuration
54
+ :param cell: the content to be applied for the session
55
+ :param force: a boolean to check if a user wants to force start a new session to apply configuration
56
+ """
57
+ if self.is_session_connectable():
58
+ if not force:
59
+ SageMakerConnectionDisplay.send_error(
60
+ "A session has already been started. If you intend to recreate the "
61
+ "session with new configurations, please include the -f or --force argument.")
62
+ else:
63
+ self.stop_session()
64
+ self._configure_core(cell)
65
+ self.create_session()
66
+ else:
67
+ self._configure_core(cell)
68
+ return
69
+
70
+ def send_to_remote(self, local_var: str, remote_var: str, language=Language.python):
71
+ # Not an abstract method because by default a session manager does not support this
72
+ # only Spark session manager supports this for now.
73
+ """
74
+ Send a local variable in kernel's userspace to remote compute.
75
+ e.g: for an EMR cluster, send a local variable to spark
76
+ :param local_var: local variable name
77
+ :param remote_var: remote variable name
78
+ """
79
+ raise NotImplementedError('Send_to_remote is not supported')
80
+
81
+ def get_info(self):
82
+ """
83
+ Get information about the connected compute session
84
+ """
85
+ raise NotImplementedError('get_info is not supported for current session')
86
+
87
+ def get_session_id(self):
88
+ """
89
+ Get the session ID of the connected compute session
90
+ """
91
+ raise NotImplementedError('get_session_id is not supported for current session')
92
+
93
+ def get_status(self):
94
+ """
95
+ Get the status of the connected compute session
96
+ """
97
+ raise NotImplementedError('get_status is not supported for current session')
98
+
99
+ def add_tags(self, tags: str):
100
+ """
101
+ Add tags to the connected compute session resources
102
+ """
103
+ raise NotImplementedError('add_tags is not supported for current session')
104
+
105
+ def get_logs(self):
106
+ """
107
+ Gets the current session's Livy logs
108
+ """
109
+ raise NotImplementedError('get_logs is not supported for current session')
110
+
111
+ def matplot(self, line: str):
112
+ """
113
+ Using matplotlib to plot the current session's plot'
114
+ """
115
+ raise NotImplementedError('matplot is not supported for current session')
116
+
117
+ def set_session_id_prefix(self, prefix: str, force: bool = False):
118
+ """
119
+ Sets the session ID prefix of the session to be created
120
+ """
121
+ raise NotImplementedError('set_session_id_prefix is not supported for current session')
122
+
123
+ def set_number_of_workers(self, number: str, force: bool = False):
124
+ """
125
+ Sets the number of workers of the session to be created
126
+ """
127
+ raise NotImplementedError('set_number_of_workers is not supported for current session')
128
+
129
+ def set_worker_type(self, type: str, force: bool = False):
130
+ """
131
+ Sets the worker type of the session to be created
132
+ """
133
+ raise NotImplementedError('set_worker_type is not supported for current session')
134
+
135
+ def set_session_type(self, session_type: str, force: bool = False):
136
+ """
137
+ Sets the session type of the session to be created. Acceptable session_type values are: streaming and etl.
138
+ """
139
+ raise NotImplementedError('set_session_type is not supported for current session')
140
+
141
+ def set_glue_version(self, glue_version: str, force: bool = False):
142
+ """
143
+ Sets the glue version of the session to be created
144
+ """
145
+ raise NotImplementedError('set_glue_version is not supported for current session')
146
+
147
+ def set_idle_timeout(self, idle_timeout: str, force: bool = False):
148
+ """
149
+ Sets the idle timeout value of the session to be created
150
+ """
151
+ raise NotImplementedError('set_idle_timeout is not supported for current session')
152
+
153
+ def spark_conf(self, spark_conf: str, force: bool = False):
154
+ """
155
+ Sets the spark configuration value of the session to be created
156
+ """
157
+ raise NotImplementedError('spark_conf is not supported for current session')
158
+
159
+ def get_logger(self):
160
+ if not hasattr(self, '_logger'):
161
+ self._logger = logging.getLogger(__name__)
162
+ return self._logger