datalathe 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datalathe-1.0.0/.github/workflows/publish.yml +32 -0
- datalathe-1.0.0/.gitignore +7 -0
- datalathe-1.0.0/PKG-INFO +12 -0
- datalathe-1.0.0/README.md +221 -0
- datalathe-1.0.0/pyproject.toml +22 -0
- datalathe-1.0.0/src/datalathe/__init__.py +60 -0
- datalathe-1.0.0/src/datalathe/client.py +343 -0
- datalathe-1.0.0/src/datalathe/commands/__init__.py +11 -0
- datalathe-1.0.0/src/datalathe/commands/command.py +13 -0
- datalathe-1.0.0/src/datalathe/commands/create_chip.py +48 -0
- datalathe-1.0.0/src/datalathe/commands/extract_tables.py +32 -0
- datalathe-1.0.0/src/datalathe/commands/generate_report.py +73 -0
- datalathe-1.0.0/src/datalathe/errors.py +15 -0
- datalathe-1.0.0/src/datalathe/results/__init__.py +3 -0
- datalathe-1.0.0/src/datalathe/results/result_set.py +184 -0
- datalathe-1.0.0/src/datalathe/types.py +221 -0
- datalathe-1.0.0/tests/__init__.py +0 -0
- datalathe-1.0.0/tests/test_result_set.py +121 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
test:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.12"
|
|
18
|
+
- run: pip install -e ".[dev]"
|
|
19
|
+
- run: pytest
|
|
20
|
+
|
|
21
|
+
publish:
|
|
22
|
+
needs: test
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
environment: pypi
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v4
|
|
27
|
+
- uses: actions/setup-python@v5
|
|
28
|
+
with:
|
|
29
|
+
python-version: "3.12"
|
|
30
|
+
- run: pip install build
|
|
31
|
+
- run: python -m build
|
|
32
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
datalathe-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datalathe
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Python client for the Datalathe API
|
|
5
|
+
Author: DataLathe
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: client,data,datalathe,profiling
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Requires-Dist: requests>=2.28.0
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
12
|
+
Requires-Dist: responses>=0.23.0; extra == 'dev'
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# datalathe-client-python
|
|
2
|
+
|
|
3
|
+
Python client library for the [Datalathe](https://datalathe.com) API.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install datalathe
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Or install from source:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
git clone https://github.com/DataLathe/datalathe-client-python.git
|
|
15
|
+
cd datalathe-client-python
|
|
16
|
+
pip install .
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Quick Start
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from datalathe import DatalatheClient, DatalatheResultSet
|
|
23
|
+
|
|
24
|
+
client = DatalatheClient("http://localhost:3000")
|
|
25
|
+
|
|
26
|
+
# Create a chip from a MySQL source
|
|
27
|
+
chip_id = client.create_chip("my_database", "SELECT * FROM users", "users")
|
|
28
|
+
|
|
29
|
+
# Query the chip
|
|
30
|
+
report = client.generate_report([chip_id], ["SELECT count(*) as total FROM users"])
|
|
31
|
+
|
|
32
|
+
# Iterate over results
|
|
33
|
+
rs = DatalatheResultSet(report.results[0])
|
|
34
|
+
for row in rs:
|
|
35
|
+
print(row) # {"total": 42}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Creating Chips
|
|
39
|
+
|
|
40
|
+
### From MySQL
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
chip_id = client.create_chip("my_database", "SELECT * FROM orders", "orders")
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### From a file
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
chip_id = client.create_chip_from_file("/data/sales.csv", "sales")
|
|
50
|
+
chip_id = client.create_chip_from_file("/data/events.parquet", "events")
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### From existing chips
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
chip_id = client.create_chip_from_chip(
|
|
57
|
+
source_chip_ids=["chip-abc", "chip-def"],
|
|
58
|
+
query="SELECT a.*, b.total FROM chip_abc a JOIN chip_def b ON a.id = b.id",
|
|
59
|
+
table_name="joined",
|
|
60
|
+
)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### With partitions
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from datalathe import Partition
|
|
67
|
+
|
|
68
|
+
chip_id = client.create_chip(
|
|
69
|
+
"my_database",
|
|
70
|
+
"SELECT * FROM orders WHERE region = ?",
|
|
71
|
+
"orders",
|
|
72
|
+
partition=Partition(
|
|
73
|
+
partition_by="region",
|
|
74
|
+
partition_values=["US", "EU", "APAC"],
|
|
75
|
+
),
|
|
76
|
+
)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### With S3 storage
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from datalathe import S3StorageConfig
|
|
83
|
+
|
|
84
|
+
chip_id = client.create_chip(
|
|
85
|
+
"my_database",
|
|
86
|
+
"SELECT * FROM orders",
|
|
87
|
+
"orders",
|
|
88
|
+
storage_config=S3StorageConfig(bucket="my-bucket", key_prefix="chips/", ttl_days=30),
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Batch creation
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from datalathe import SourceRequest, SourceType
|
|
96
|
+
|
|
97
|
+
chip_ids = client.create_chips(
|
|
98
|
+
sources=[
|
|
99
|
+
SourceRequest(database_name="db", query="SELECT * FROM users", table_name="users"),
|
|
100
|
+
SourceRequest(database_name="db", query="SELECT * FROM orders", table_name="orders"),
|
|
101
|
+
],
|
|
102
|
+
source_type=SourceType.MYSQL,
|
|
103
|
+
tags={"env": "production", "team": "analytics"},
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Querying
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
report = client.generate_report(
|
|
111
|
+
chip_ids=["chip-abc"],
|
|
112
|
+
queries=[
|
|
113
|
+
"SELECT count(*) as total FROM users",
|
|
114
|
+
"SELECT status, count(*) as cnt FROM users GROUP BY status",
|
|
115
|
+
],
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Access results by query index
|
|
119
|
+
for idx, entry in report.results.items():
|
|
120
|
+
print(f"Query {idx}: {entry.result}")
|
|
121
|
+
|
|
122
|
+
# Timing info
|
|
123
|
+
if report.timing:
|
|
124
|
+
print(f"Total: {report.timing.total_ms}ms")
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Working with Results
|
|
128
|
+
|
|
129
|
+
`DatalatheResultSet` provides a cursor-based API for navigating query results.
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
rs = DatalatheResultSet(report.results[0])
|
|
133
|
+
|
|
134
|
+
# Cursor-based iteration
|
|
135
|
+
while rs.next():
|
|
136
|
+
name = rs.get_string("name")
|
|
137
|
+
age = rs.get_int("age")
|
|
138
|
+
score = rs.get_float("score")
|
|
139
|
+
active = rs.get_boolean("active")
|
|
140
|
+
print(f"{name}, {age}, {score}, {active}")
|
|
141
|
+
|
|
142
|
+
# Or iterate directly
|
|
143
|
+
for row in rs:
|
|
144
|
+
print(row)
|
|
145
|
+
|
|
146
|
+
# Convert to list of dicts
|
|
147
|
+
rows = rs.to_list()
|
|
148
|
+
|
|
149
|
+
# Column metadata
|
|
150
|
+
print(rs.get_column_count())
|
|
151
|
+
print(rs.get_column_name(1))
|
|
152
|
+
print(rs.get_column_type(1))
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Chip Management
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
# List all chips
|
|
159
|
+
response = client.list_chips()
|
|
160
|
+
for chip in response.chips:
|
|
161
|
+
print(f"{chip.chip_id}: {chip.table_name}")
|
|
162
|
+
|
|
163
|
+
# Search chips
|
|
164
|
+
response = client.search_chips(table_name="users")
|
|
165
|
+
response = client.search_chips(tag="env:production")
|
|
166
|
+
|
|
167
|
+
# Tag a chip
|
|
168
|
+
client.add_chip_tags("chip-abc", {"env": "staging", "owner": "data-team"})
|
|
169
|
+
client.delete_chip_tag("chip-abc", "owner")
|
|
170
|
+
|
|
171
|
+
# Delete a chip
|
|
172
|
+
client.delete_chip("chip-abc")
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## SQL Analysis
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
# Extract table names from a query
|
|
179
|
+
tables = client.extract_tables("SELECT u.name, o.total FROM users u JOIN orders o ON u.id = o.user_id")
|
|
180
|
+
# ["users", "orders"]
|
|
181
|
+
|
|
182
|
+
# Extract tables and transform MySQL syntax to DuckDB
|
|
183
|
+
result = client.extract_tables_with_transform(
|
|
184
|
+
"SELECT DATE_FORMAT(created_at, '%Y-%m') FROM users",
|
|
185
|
+
transform=True,
|
|
186
|
+
)
|
|
187
|
+
print(result["tables"])
|
|
188
|
+
print(result["transformed_query"])
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Client Configuration
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
client = DatalatheClient(
|
|
195
|
+
base_url="http://localhost:3000",
|
|
196
|
+
headers={"Authorization": "Bearer token"},
|
|
197
|
+
timeout=60.0, # seconds (default: 30)
|
|
198
|
+
)
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## Error Handling
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
from datalathe import DatalatheApiError, DatalatheStageError
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
chip_id = client.create_chip("bad_db", "SELECT 1", "test")
|
|
208
|
+
except DatalatheStageError as e:
|
|
209
|
+
print(f"Staging failed: {e}")
|
|
210
|
+
except DatalatheApiError as e:
|
|
211
|
+
print(f"API error {e.status_code}: {e.response_body}")
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Requirements
|
|
215
|
+
|
|
216
|
+
- Python 3.10+
|
|
217
|
+
- `requests` >= 2.28
|
|
218
|
+
|
|
219
|
+
## License
|
|
220
|
+
|
|
221
|
+
MIT
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "datalathe"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Python client for the Datalathe API"
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
authors = [{ name = "DataLathe" }]
|
|
12
|
+
keywords = ["datalathe", "data", "profiling", "client"]
|
|
13
|
+
dependencies = ["requests>=2.28.0"]
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
dev = ["pytest>=7.0.0", "responses>=0.23.0"]
|
|
17
|
+
|
|
18
|
+
[tool.hatch.build.targets.wheel]
|
|
19
|
+
packages = ["src/datalathe"]
|
|
20
|
+
|
|
21
|
+
[tool.pytest.ini_options]
|
|
22
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from datalathe.client import DatalatheClient, GenerateReportResult
|
|
2
|
+
from datalathe.types import (
|
|
3
|
+
SourceType,
|
|
4
|
+
ReportType,
|
|
5
|
+
SchemaField,
|
|
6
|
+
Partition,
|
|
7
|
+
SourceRequest,
|
|
8
|
+
S3StorageConfig,
|
|
9
|
+
StageDataRequest,
|
|
10
|
+
StageDataResponse,
|
|
11
|
+
QueryRequest,
|
|
12
|
+
ReportRequest,
|
|
13
|
+
ReportResultEntry,
|
|
14
|
+
ReportTiming,
|
|
15
|
+
ReportResponse,
|
|
16
|
+
DuckDBDatabase,
|
|
17
|
+
DatabaseTable,
|
|
18
|
+
Chip,
|
|
19
|
+
ChipMetadata,
|
|
20
|
+
ChipTag,
|
|
21
|
+
ChipsResponse,
|
|
22
|
+
)
|
|
23
|
+
from datalathe.errors import DatalatheError, DatalatheApiError, DatalatheStageError
|
|
24
|
+
from datalathe.commands.command import DatalatheCommand
|
|
25
|
+
from datalathe.commands.create_chip import CreateChipCommand
|
|
26
|
+
from datalathe.commands.generate_report import GenerateReportCommand
|
|
27
|
+
from datalathe.commands.extract_tables import ExtractTablesCommand
|
|
28
|
+
from datalathe.results.result_set import DatalatheResultSet
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"DatalatheClient",
|
|
32
|
+
"GenerateReportResult",
|
|
33
|
+
"SourceType",
|
|
34
|
+
"ReportType",
|
|
35
|
+
"SchemaField",
|
|
36
|
+
"Partition",
|
|
37
|
+
"SourceRequest",
|
|
38
|
+
"S3StorageConfig",
|
|
39
|
+
"StageDataRequest",
|
|
40
|
+
"StageDataResponse",
|
|
41
|
+
"QueryRequest",
|
|
42
|
+
"ReportRequest",
|
|
43
|
+
"ReportResultEntry",
|
|
44
|
+
"ReportTiming",
|
|
45
|
+
"ReportResponse",
|
|
46
|
+
"DuckDBDatabase",
|
|
47
|
+
"DatabaseTable",
|
|
48
|
+
"Chip",
|
|
49
|
+
"ChipMetadata",
|
|
50
|
+
"ChipTag",
|
|
51
|
+
"ChipsResponse",
|
|
52
|
+
"DatalatheError",
|
|
53
|
+
"DatalatheApiError",
|
|
54
|
+
"DatalatheStageError",
|
|
55
|
+
"DatalatheCommand",
|
|
56
|
+
"CreateChipCommand",
|
|
57
|
+
"GenerateReportCommand",
|
|
58
|
+
"ExtractTablesCommand",
|
|
59
|
+
"DatalatheResultSet",
|
|
60
|
+
]
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any
|
|
5
|
+
from urllib.parse import quote, urlencode
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from datalathe.commands.command import DatalatheCommand
|
|
10
|
+
from datalathe.commands.create_chip import CreateChipCommand
|
|
11
|
+
from datalathe.commands.extract_tables import ExtractTablesCommand
|
|
12
|
+
from datalathe.commands.generate_report import GenerateReportCommand
|
|
13
|
+
from datalathe.errors import DatalatheApiError, DatalatheStageError
|
|
14
|
+
from datalathe.types import (
|
|
15
|
+
Chip,
|
|
16
|
+
ChipMetadata,
|
|
17
|
+
ChipTag,
|
|
18
|
+
ChipsResponse,
|
|
19
|
+
DatabaseTable,
|
|
20
|
+
DuckDBDatabase,
|
|
21
|
+
Partition,
|
|
22
|
+
ReportResultEntry,
|
|
23
|
+
ReportTiming,
|
|
24
|
+
S3StorageConfig,
|
|
25
|
+
SchemaField,
|
|
26
|
+
SourceRequest,
|
|
27
|
+
SourceType,
|
|
28
|
+
_from_dict,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class GenerateReportResult:
|
|
34
|
+
results: dict[int, ReportResultEntry]
|
|
35
|
+
timing: ReportTiming | None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DatalatheClient:
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
base_url: str,
|
|
42
|
+
headers: dict[str, str] | None = None,
|
|
43
|
+
timeout: float = 30.0,
|
|
44
|
+
):
|
|
45
|
+
self._base_url = base_url.rstrip("/")
|
|
46
|
+
self._headers = headers or {}
|
|
47
|
+
self._timeout = timeout
|
|
48
|
+
self._session = requests.Session()
|
|
49
|
+
self._session.headers.update(self._headers)
|
|
50
|
+
|
|
51
|
+
# --- Chip creation ---
|
|
52
|
+
|
|
53
|
+
def create_chip(
|
|
54
|
+
self,
|
|
55
|
+
source_name: str,
|
|
56
|
+
query: str,
|
|
57
|
+
table_name: str,
|
|
58
|
+
partition: Partition | None = None,
|
|
59
|
+
chip_name: str | None = None,
|
|
60
|
+
column_replace: dict[str, str] | None = None,
|
|
61
|
+
storage_config: S3StorageConfig | None = None,
|
|
62
|
+
) -> str:
|
|
63
|
+
chips = self.create_chips(
|
|
64
|
+
sources=[SourceRequest(
|
|
65
|
+
database_name=source_name,
|
|
66
|
+
table_name=table_name,
|
|
67
|
+
query=query,
|
|
68
|
+
partition=partition,
|
|
69
|
+
column_replace=column_replace,
|
|
70
|
+
)],
|
|
71
|
+
source_type=SourceType.MYSQL,
|
|
72
|
+
chip_name=chip_name,
|
|
73
|
+
storage_config=storage_config,
|
|
74
|
+
)
|
|
75
|
+
return chips[0]
|
|
76
|
+
|
|
77
|
+
def create_chip_from_file(
|
|
78
|
+
self,
|
|
79
|
+
file_path: str,
|
|
80
|
+
table_name: str | None = None,
|
|
81
|
+
partition: Partition | None = None,
|
|
82
|
+
chip_name: str | None = None,
|
|
83
|
+
column_replace: dict[str, str] | None = None,
|
|
84
|
+
storage_config: S3StorageConfig | None = None,
|
|
85
|
+
) -> str:
|
|
86
|
+
chips = self.create_chips(
|
|
87
|
+
sources=[SourceRequest(
|
|
88
|
+
database_name="",
|
|
89
|
+
query="",
|
|
90
|
+
file_path=file_path,
|
|
91
|
+
table_name=table_name,
|
|
92
|
+
partition=partition,
|
|
93
|
+
column_replace=column_replace,
|
|
94
|
+
)],
|
|
95
|
+
source_type=SourceType.FILE,
|
|
96
|
+
chip_name=chip_name,
|
|
97
|
+
storage_config=storage_config,
|
|
98
|
+
)
|
|
99
|
+
return chips[0]
|
|
100
|
+
|
|
101
|
+
def create_chip_from_chip(
|
|
102
|
+
self,
|
|
103
|
+
source_chip_ids: list[str],
|
|
104
|
+
query: str | None = None,
|
|
105
|
+
table_name: str | None = None,
|
|
106
|
+
chip_name: str | None = None,
|
|
107
|
+
storage_config: S3StorageConfig | None = None,
|
|
108
|
+
) -> str:
|
|
109
|
+
chips = self.create_chips(
|
|
110
|
+
sources=[SourceRequest(
|
|
111
|
+
database_name="",
|
|
112
|
+
query=query or "",
|
|
113
|
+
source_chip_ids=source_chip_ids,
|
|
114
|
+
table_name=table_name,
|
|
115
|
+
)],
|
|
116
|
+
source_type=SourceType.CACHE,
|
|
117
|
+
chip_name=chip_name,
|
|
118
|
+
storage_config=storage_config,
|
|
119
|
+
)
|
|
120
|
+
return chips[0]
|
|
121
|
+
|
|
122
|
+
def create_chips(
|
|
123
|
+
self,
|
|
124
|
+
sources: list[SourceRequest],
|
|
125
|
+
chip_id: str | None = None,
|
|
126
|
+
source_type: SourceType = SourceType.MYSQL,
|
|
127
|
+
chip_name: str | None = None,
|
|
128
|
+
storage_config: S3StorageConfig | None = None,
|
|
129
|
+
tags: dict[str, str] | None = None,
|
|
130
|
+
) -> list[str]:
|
|
131
|
+
chip_ids: list[str] = []
|
|
132
|
+
for source in sources:
|
|
133
|
+
command = CreateChipCommand(
|
|
134
|
+
source_type=source_type,
|
|
135
|
+
source=source,
|
|
136
|
+
chip_id=chip_id,
|
|
137
|
+
chip_name=chip_name,
|
|
138
|
+
storage_config=storage_config,
|
|
139
|
+
tags=tags,
|
|
140
|
+
)
|
|
141
|
+
response = self.send_command(command)
|
|
142
|
+
if response.error:
|
|
143
|
+
raise DatalatheStageError(f"Failed to stage data: {response.error}")
|
|
144
|
+
chip_ids.append(response.chip_id)
|
|
145
|
+
return chip_ids
|
|
146
|
+
|
|
147
|
+
# --- Query / Report ---
|
|
148
|
+
|
|
149
|
+
def generate_report(
|
|
150
|
+
self,
|
|
151
|
+
chip_ids: list[str],
|
|
152
|
+
queries: list[str],
|
|
153
|
+
source_type: SourceType = SourceType.LOCAL,
|
|
154
|
+
transform_query: bool | None = None,
|
|
155
|
+
return_transformed_query: bool | None = None,
|
|
156
|
+
) -> GenerateReportResult:
|
|
157
|
+
command = GenerateReportCommand(
|
|
158
|
+
chip_ids=chip_ids,
|
|
159
|
+
source_type=source_type,
|
|
160
|
+
queries=queries,
|
|
161
|
+
transform_query=transform_query,
|
|
162
|
+
return_transformed_query=return_transformed_query,
|
|
163
|
+
)
|
|
164
|
+
response = self.send_command(command)
|
|
165
|
+
results: dict[int, ReportResultEntry] = {}
|
|
166
|
+
if response.result:
|
|
167
|
+
for key, entry in response.result.items():
|
|
168
|
+
results[int(key)] = entry
|
|
169
|
+
return GenerateReportResult(results=results, timing=response.timing)
|
|
170
|
+
|
|
171
|
+
# --- Database inspection ---
|
|
172
|
+
|
|
173
|
+
def get_databases(self) -> list[DuckDBDatabase]:
|
|
174
|
+
data = self._get("/lathe/stage/databases")
|
|
175
|
+
return [_from_dict(DuckDBDatabase, d) for d in data]
|
|
176
|
+
|
|
177
|
+
def get_database_schema(self, database_name: str) -> list[DatabaseTable]:
|
|
178
|
+
data = self._get(f"/lathe/stage/schema/{quote(database_name, safe='')}")
|
|
179
|
+
return [_from_dict(DatabaseTable, d) for d in data]
|
|
180
|
+
|
|
181
|
+
# --- Chip metadata & tagging ---
|
|
182
|
+
|
|
183
|
+
def list_chips(self) -> ChipsResponse:
|
|
184
|
+
return self._parse_chips_response(self._get("/lathe/chips"))
|
|
185
|
+
|
|
186
|
+
def search_chips(
|
|
187
|
+
self,
|
|
188
|
+
table_name: str | None = None,
|
|
189
|
+
partition_value: str | None = None,
|
|
190
|
+
tag: str | None = None,
|
|
191
|
+
) -> ChipsResponse:
|
|
192
|
+
params: dict[str, str] = {}
|
|
193
|
+
if table_name is not None:
|
|
194
|
+
params["table_name"] = table_name
|
|
195
|
+
if partition_value is not None:
|
|
196
|
+
params["partition_value"] = partition_value
|
|
197
|
+
if tag is not None:
|
|
198
|
+
params["tag"] = tag
|
|
199
|
+
qs = urlencode(params)
|
|
200
|
+
path = f"/lathe/chips/search?{qs}" if qs else "/lathe/chips/search"
|
|
201
|
+
return self._parse_chips_response(self._get(path))
|
|
202
|
+
|
|
203
|
+
def add_chip_tags(self, chip_id: str, tags: dict[str, str]) -> None:
|
|
204
|
+
self._post(f"/lathe/chips/{quote(chip_id, safe='')}/tags", {"tags": tags})
|
|
205
|
+
|
|
206
|
+
def delete_chip_tag(self, chip_id: str, key: str) -> None:
|
|
207
|
+
self._delete(f"/lathe/chips/{quote(chip_id, safe='')}/tags/{quote(key, safe='')}")
|
|
208
|
+
|
|
209
|
+
def delete_chip(self, chip_id: str) -> None:
|
|
210
|
+
self._delete(f"/lathe/chips/{quote(chip_id, safe='')}")
|
|
211
|
+
|
|
212
|
+
# --- Query analysis ---
|
|
213
|
+
|
|
214
|
+
def extract_tables(self, query: str) -> list[str]:
|
|
215
|
+
resp = self.extract_tables_with_transform(query)
|
|
216
|
+
return resp["tables"]
|
|
217
|
+
|
|
218
|
+
def extract_tables_with_transform(
|
|
219
|
+
self,
|
|
220
|
+
query: str,
|
|
221
|
+
transform: bool | None = None,
|
|
222
|
+
) -> dict[str, Any]:
|
|
223
|
+
command = ExtractTablesCommand(query, transform)
|
|
224
|
+
response = self.send_command(command)
|
|
225
|
+
if response.error:
|
|
226
|
+
raise DatalatheApiError(
|
|
227
|
+
f"Failed to extract tables: {response.error}",
|
|
228
|
+
400,
|
|
229
|
+
response.error,
|
|
230
|
+
)
|
|
231
|
+
return {"tables": response.tables, "transformed_query": response.transformed_query}
|
|
232
|
+
|
|
233
|
+
# --- Raw / generic ---
|
|
234
|
+
|
|
235
|
+
def stage_data(self, request: dict[str, Any]) -> Any:
|
|
236
|
+
return self._post("/lathe/stage/data", request)
|
|
237
|
+
|
|
238
|
+
def post_report(self, request: dict[str, Any]) -> Any:
|
|
239
|
+
return self._post("/lathe/report", request)
|
|
240
|
+
|
|
241
|
+
def send_command(self, command: DatalatheCommand) -> Any:
|
|
242
|
+
url = self._base_url + command.endpoint
|
|
243
|
+
resp = self._session.post(
|
|
244
|
+
url,
|
|
245
|
+
json=command.request,
|
|
246
|
+
headers={"Content-Type": "application/json"},
|
|
247
|
+
timeout=self._timeout,
|
|
248
|
+
)
|
|
249
|
+
if not resp.ok:
|
|
250
|
+
raise DatalatheApiError(
|
|
251
|
+
f"Failed to execute command: {resp.status_code} {resp.text}",
|
|
252
|
+
resp.status_code,
|
|
253
|
+
resp.text,
|
|
254
|
+
)
|
|
255
|
+
return command.parse_response(resp.json())
|
|
256
|
+
|
|
257
|
+
# --- Profiler methods ---
|
|
258
|
+
|
|
259
|
+
def get_profiler_tables(self) -> list[dict[str, Any]]:
|
|
260
|
+
return self._get("/lathe/profiler/tables")
|
|
261
|
+
|
|
262
|
+
def start_profiler(self, skip_files: bool) -> Any:
|
|
263
|
+
return self._get(f"/lathe/profiler/start/{str(skip_files).lower()}")
|
|
264
|
+
|
|
265
|
+
def get_table_description(self, table_id: str) -> list[Any]:
|
|
266
|
+
return self._get(f"/lathe/profiler/table/{quote(table_id, safe='')}/describe")
|
|
267
|
+
|
|
268
|
+
def get_table_data(self, table_id: str) -> list[Any]:
|
|
269
|
+
return self._get(f"/lathe/profiler/table/{quote(table_id, safe='')}")
|
|
270
|
+
|
|
271
|
+
def get_table_source_files(self, table_id: str) -> list[Any]:
|
|
272
|
+
return self._get(f"/lathe/profiler/table/{quote(table_id, safe='')}/source_file")
|
|
273
|
+
|
|
274
|
+
def get_table_summary(self, table_id: str) -> Any:
|
|
275
|
+
return self._get(f"/lathe/profiler/table/{quote(table_id, safe='')}/summary")
|
|
276
|
+
|
|
277
|
+
def get_profiler_config(self) -> dict[str, Any]:
|
|
278
|
+
return self._get("/lathe/profiler/config")
|
|
279
|
+
|
|
280
|
+
def update_profiler_config(self, config: dict[str, Any]) -> Any:
|
|
281
|
+
return self._post("/lathe/profiler/config/update", config)
|
|
282
|
+
|
|
283
|
+
def get_schema_mappings(self) -> list[dict[str, Any]]:
|
|
284
|
+
return self._get("/lathe/profiler/schema/mappings")
|
|
285
|
+
|
|
286
|
+
def get_profiler_schema(self, request: dict[str, Any]) -> Any:
|
|
287
|
+
return self._post("/lathe/profiler/schema", request)
|
|
288
|
+
|
|
289
|
+
# --- Source / Job methods ---
|
|
290
|
+
|
|
291
|
+
def get_source_file(self, file_id: str) -> dict[str, Any]:
|
|
292
|
+
return self._get(f"/lathe/source/file/{quote(file_id, safe='')}")
|
|
293
|
+
|
|
294
|
+
def get_all_jobs(self) -> dict[str, Any]:
|
|
295
|
+
return self._get("/lathe/jobs/all")
|
|
296
|
+
|
|
297
|
+
# --- Private HTTP methods ---
|
|
298
|
+
|
|
299
|
+
def _get(self, path: str) -> Any:
|
|
300
|
+
url = self._base_url + path
|
|
301
|
+
resp = self._session.get(url, timeout=self._timeout)
|
|
302
|
+
if not resp.ok:
|
|
303
|
+
raise DatalatheApiError(
|
|
304
|
+
f"GET {path} failed: {resp.status_code} {resp.text}",
|
|
305
|
+
resp.status_code,
|
|
306
|
+
resp.text,
|
|
307
|
+
)
|
|
308
|
+
return resp.json()
|
|
309
|
+
|
|
310
|
+
def _post(self, path: str, body: Any) -> Any:
|
|
311
|
+
url = self._base_url + path
|
|
312
|
+
resp = self._session.post(
|
|
313
|
+
url,
|
|
314
|
+
json=body,
|
|
315
|
+
headers={"Content-Type": "application/json"},
|
|
316
|
+
timeout=self._timeout,
|
|
317
|
+
)
|
|
318
|
+
if not resp.ok:
|
|
319
|
+
raise DatalatheApiError(
|
|
320
|
+
f"POST {path} failed: {resp.status_code} {resp.text}",
|
|
321
|
+
resp.status_code,
|
|
322
|
+
resp.text,
|
|
323
|
+
)
|
|
324
|
+
return resp.json()
|
|
325
|
+
|
|
326
|
+
def _delete(self, path: str) -> None:
|
|
327
|
+
url = self._base_url + path
|
|
328
|
+
resp = self._session.delete(url, timeout=self._timeout)
|
|
329
|
+
if not resp.ok:
|
|
330
|
+
raise DatalatheApiError(
|
|
331
|
+
f"DELETE {path} failed: {resp.status_code} {resp.text}",
|
|
332
|
+
resp.status_code,
|
|
333
|
+
resp.text,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
@staticmethod
|
|
337
|
+
def _parse_chips_response(data: dict[str, Any]) -> ChipsResponse:
|
|
338
|
+
chips = [_from_dict(Chip, c) for c in data.get("chips", [])]
|
|
339
|
+
metadata = [_from_dict(ChipMetadata, m) for m in data.get("metadata", [])]
|
|
340
|
+
tags = None
|
|
341
|
+
if data.get("tags") is not None:
|
|
342
|
+
tags = [_from_dict(ChipTag, t) for t in data["tags"]]
|
|
343
|
+
return ChipsResponse(chips=chips, metadata=metadata, tags=tags)
|