dsl-spa 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsl_spa-0.0.1/LICENSE +21 -0
- dsl_spa-0.0.1/PKG-INFO +45 -0
- dsl_spa-0.0.1/README.md +30 -0
- dsl_spa-0.0.1/pyproject.toml +23 -0
- dsl_spa-0.0.1/setup.cfg +4 -0
- dsl_spa-0.0.1/src/dsl_spa/__init__.py +0 -0
- dsl_spa-0.0.1/src/dsl_spa/pipeline/__init__.py +0 -0
- dsl_spa-0.0.1/src/dsl_spa/pipeline/connector.py +142 -0
- dsl_spa-0.0.1/src/dsl_spa/pipeline/pipeline.py +1003 -0
- dsl_spa-0.0.1/src/dsl_spa/pipeline/pipeline_functions.py +283 -0
- dsl_spa-0.0.1/src/dsl_spa/utils/__init__.py +0 -0
- dsl_spa-0.0.1/src/dsl_spa/utils/schema.py +429 -0
- dsl_spa-0.0.1/src/dsl_spa.egg-info/PKG-INFO +45 -0
- dsl_spa-0.0.1/src/dsl_spa.egg-info/SOURCES.txt +14 -0
- dsl_spa-0.0.1/src/dsl_spa.egg-info/dependency_links.txt +1 -0
- dsl_spa-0.0.1/src/dsl_spa.egg-info/top_level.txt +1 -0
dsl_spa-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 superwise-brian
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dsl_spa-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dsl-spa
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A tool for building Structured Agentic Pipelines
|
|
5
|
+
Author-email: superwise-brian <brian.mcclannahan@superwise.ai>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/superwise-ai/dsl-spa
|
|
8
|
+
Project-URL: Issues, https://github.com/superwise-ai/dsl-spa/issues
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.9
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
|
|
16
|
+
# Domain Specific Language - Structured Pipeline Agent
|
|
17
|
+
|
|
18
|
+
DSL-SPA is an open-source Python library for connecting LLM Agents to ETL and other common tasks. With this tool, you can connect LLM Agents to complex Data Pipelines.
|
|
19
|
+
|
|
20
|
+
Key Features of DSL-SPA
|
|
21
|
+
|
|
22
|
+
- Building SQL Queries - construct sql queries from fields extracted by Agents
|
|
23
|
+
|
|
24
|
+
- Applying Data Transformations - selectively apply data transformations based on Agent instructions
|
|
25
|
+
|
|
26
|
+
- Generating Unstructured Text Summaries - generate summaries of query results and data transformations
|
|
27
|
+
|
|
28
|
+
- Generating Visualizations - generate vega-lite visualizations of query results and data transformations
|
|
29
|
+
|
|
30
|
+
## Getting Started
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
Get started with dsl-spa by installing the Python library via pip
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
pip install dsl-spa
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
A standard pipeline can be imported with
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
from dsl-spa.pipeline import StandardPipeline
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
For more details on constructing a pipeline, reference [Building a Pipeline](https://github.com/superwise-ai/dsl-spa/blob/main/docs/Creating_a_Pipeline_Schema.md).
|
dsl_spa-0.0.1/README.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Domain Specific Language - Structured Pipeline Agent
|
|
2
|
+
|
|
3
|
+
DSL-SPA is an open-source Python library for connecting LLM Agents to ETL and other common tasks. With this tool, you can connect LLM Agents to complex Data Pipelines.
|
|
4
|
+
|
|
5
|
+
Key Features of DSL-SPA
|
|
6
|
+
|
|
7
|
+
- Building SQL Queries - construct sql queries from fields extracted by Agents
|
|
8
|
+
|
|
9
|
+
- Applying Data Transformations - selectively apply data transformations based on Agent instructions
|
|
10
|
+
|
|
11
|
+
- Generating Unstructured Text Summaries - generate summaries of query results and data transformations
|
|
12
|
+
|
|
13
|
+
- Generating Visualizations - generate vega-lite visualizations of query results and data transformations
|
|
14
|
+
|
|
15
|
+
## Getting Started
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
Get started with dsl-spa by installing the Python library via pip
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
pip install dsl-spa
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
A standard pipeline can be imported with
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
from dsl-spa.pipeline import StandardPipeline
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
For more details on constructing a pipeline, reference [Building a Pipeline](https://github.com/superwise-ai/dsl-spa/blob/main/docs/Creating_a_Pipeline_Schema.md).
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "dsl-spa"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
authors = [
|
|
5
|
+
{ name="superwise-brian", email="brian.mcclannahan@superwise.ai" },
|
|
6
|
+
]
|
|
7
|
+
description = "A tool for building Structured Agentic Pipelines"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Programming Language :: Python :: 3",
|
|
12
|
+
"Operating System :: OS Independent",
|
|
13
|
+
]
|
|
14
|
+
license = "MIT"
|
|
15
|
+
license-files = ["LICEN[CS]E*"]
|
|
16
|
+
|
|
17
|
+
[project.urls]
|
|
18
|
+
Homepage = "https://github.com/superwise-ai/dsl-spa"
|
|
19
|
+
Issues = "https://github.com/superwise-ai/dsl-spa/issues"
|
|
20
|
+
|
|
21
|
+
[build-system]
|
|
22
|
+
requires = ["setuptools >= 77.0.3","pandas >= 2.0.0","altair >= 5.3.0","SQLAlchemy >= 2.0.0"]
|
|
23
|
+
build-backend = "setuptools.build_meta"
|
dsl_spa-0.0.1/setup.cfg
ADDED
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import sqlalchemy as sa
|
|
3
|
+
from sqlalchemy.engine import URL
|
|
4
|
+
from sqlalchemy.engine.url import make_url
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
class Connector:
|
|
9
|
+
"""Base Connector Class. Any connector should inherit from this class or a sub class.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def connect(self):
|
|
13
|
+
"""Connects to the external connection
|
|
14
|
+
"""
|
|
15
|
+
raise NotImplementedError("This method must be implemented in a subclass")
|
|
16
|
+
|
|
17
|
+
class DatabaseConnector(Connector):
|
|
18
|
+
"""Database Connector. Includes a query method to query a database.
|
|
19
|
+
"""
|
|
20
|
+
def __init__(self):
|
|
21
|
+
self.engine = None
|
|
22
|
+
|
|
23
|
+
def query(self, query_string: str) -> pd.DataFrame:
|
|
24
|
+
"""Runs the query_string query using self.engine.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
query_string (str): Query string to query
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
pd.DataFrame: Pandas Dataframe of query data
|
|
31
|
+
"""
|
|
32
|
+
raise NotImplementedError("This method must be implemented in a subclass")
|
|
33
|
+
|
|
34
|
+
class MSSQLConnector(DatabaseConnector):
|
|
35
|
+
"""Microsoft SQL Database Connector.
|
|
36
|
+
"""
|
|
37
|
+
def __init__(self, uid: str, password: str, host: str, database: str, driver: str):
|
|
38
|
+
"""Creates a Microsoft SQL Connector
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
uid (str): Username
|
|
42
|
+
password (str): Password
|
|
43
|
+
host (str): Database Host Server
|
|
44
|
+
database (str): Database Name
|
|
45
|
+
driver (str): Name of ODBC Driver
|
|
46
|
+
"""
|
|
47
|
+
super().__init__()
|
|
48
|
+
self.uid = uid
|
|
49
|
+
self.password = password
|
|
50
|
+
self.host = host
|
|
51
|
+
self.database = database
|
|
52
|
+
self.driver = driver
|
|
53
|
+
|
|
54
|
+
def connect(self):
|
|
55
|
+
"""Connects to the external connection
|
|
56
|
+
"""
|
|
57
|
+
connection_url = URL.create(
|
|
58
|
+
"mssql+pyodbc",
|
|
59
|
+
username=self.uid,
|
|
60
|
+
password=self.password,
|
|
61
|
+
host=self.host,
|
|
62
|
+
database=self.database,
|
|
63
|
+
query={"driver": self.driver},
|
|
64
|
+
)
|
|
65
|
+
self.engine = sa.create_engine(connection_url)
|
|
66
|
+
|
|
67
|
+
def query(self, sql_query: str):
|
|
68
|
+
return pd.read_sql_query(sql_query, self.engine)
|
|
69
|
+
|
|
70
|
+
class BigQueryConnector(DatabaseConnector):
|
|
71
|
+
"""Big Query Database Connector.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(self, url: str, account_type: str, project_id: str, location: str = None):
|
|
75
|
+
"""Creates a Big Query Connector
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
url (str): GCP connection URL
|
|
79
|
+
account_type (str): GCP account type
|
|
80
|
+
project_id (str): GCP Project ID
|
|
81
|
+
location (str, optional): GCP Server location. Defaults to None.
|
|
82
|
+
"""
|
|
83
|
+
super().__init__()
|
|
84
|
+
self.url = url
|
|
85
|
+
self.location = location
|
|
86
|
+
self.account_type = account_type
|
|
87
|
+
self.project_id = project_id
|
|
88
|
+
|
|
89
|
+
def connect(self):
|
|
90
|
+
"""Connects to the external connection
|
|
91
|
+
"""
|
|
92
|
+
credentials_dict = {
|
|
93
|
+
"type": self.account_type,
|
|
94
|
+
"project_id": self.project_id
|
|
95
|
+
}
|
|
96
|
+
credentials_info = None
|
|
97
|
+
if "credentials_base64" in make_url(self.url).query:
|
|
98
|
+
credentials_info = json.dumps(credentials_dict)
|
|
99
|
+
|
|
100
|
+
if self.location is None:
|
|
101
|
+
self.engine = sa.create_engine(self.url, credentials_info=credentials_info)
|
|
102
|
+
else:
|
|
103
|
+
self.engine = sa.create_engine(self.url, location=self.location, credentials_info=credentials_info)
|
|
104
|
+
|
|
105
|
+
def query(self, sql_query: str):
|
|
106
|
+
"""Runs the query_string query using self.engine.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
query_string (str): Query string to query
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
pd.DataFrame: Pandas Dataframe of query data
|
|
113
|
+
"""
|
|
114
|
+
return pd.read_sql_query(sql_query, self.engine)
|
|
115
|
+
|
|
116
|
+
class LocalCSVConnector(Connector):
|
|
117
|
+
|
|
118
|
+
def __init__(self, folder: str):
|
|
119
|
+
"""Creates a local CSV connector given the folder location of the CSVs.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
folder (str): Location of CSV directory
|
|
123
|
+
"""
|
|
124
|
+
self.folder = folder
|
|
125
|
+
self.df = None
|
|
126
|
+
|
|
127
|
+
def connect(self):
|
|
128
|
+
"""Does nothing since the CSV is local.
|
|
129
|
+
"""
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
def query(self, csv_name: str):
|
|
133
|
+
"""Runs the query_string query using self.engine.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
query_string (str): Query string to query
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
pd.DataFrame: Pandas Dataframe of query data
|
|
140
|
+
"""
|
|
141
|
+
file_location = os.path.join(self.folder,csv_name)
|
|
142
|
+
return pd.read_csv(file_location)
|