laketower 0.1.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {laketower-0.1.0 → laketower-0.3.0}/.github/workflows/ci-cd.yml +43 -5
  2. laketower-0.3.0/CHANGELOG.md +53 -0
  3. laketower-0.1.0/README.md → laketower-0.3.0/PKG-INFO +74 -4
  4. laketower-0.1.0/PKG-INFO → laketower-0.3.0/README.md +36 -27
  5. laketower-0.3.0/laketower/__about__.py +1 -0
  6. {laketower-0.1.0 → laketower-0.3.0}/laketower/cli.py +45 -179
  7. laketower-0.3.0/laketower/config.py +45 -0
  8. laketower-0.3.0/laketower/tables.py +134 -0
  9. laketower-0.3.0/laketower/templates/_base.html +72 -0
  10. laketower-0.3.0/laketower/templates/index.html +4 -0
  11. laketower-0.3.0/laketower/templates/tables/_macros.html +13 -0
  12. laketower-0.3.0/laketower/templates/tables/history.html +42 -0
  13. laketower-0.3.0/laketower/templates/tables/index.html +84 -0
  14. laketower-0.3.0/laketower/templates/tables/query.html +47 -0
  15. laketower-0.3.0/laketower/templates/tables/view.html +96 -0
  16. laketower-0.3.0/laketower/web.py +167 -0
  17. laketower-0.3.0/pyproject.toml +70 -0
  18. laketower-0.3.0/renovate.json +11 -0
  19. laketower-0.3.0/tests/__init__.py +0 -0
  20. laketower-0.3.0/tests/conftest.py +53 -0
  21. {laketower-0.1.0 → laketower-0.3.0}/tests/test_cli.py +46 -71
  22. laketower-0.3.0/tests/test_web.py +308 -0
  23. {laketower-0.1.0 → laketower-0.3.0}/uv.lock +295 -275
  24. laketower-0.1.0/CHANGELOG.md +0 -24
  25. laketower-0.1.0/laketower/__about__.py +0 -1
  26. laketower-0.1.0/pyproject.toml +0 -52
  27. {laketower-0.1.0 → laketower-0.3.0}/.gitignore +0 -0
  28. {laketower-0.1.0 → laketower-0.3.0}/.python-version +0 -0
  29. {laketower-0.1.0 → laketower-0.3.0}/LICENSE.md +0 -0
  30. {laketower-0.1.0 → laketower-0.3.0}/demo/generate.py +0 -0
  31. {laketower-0.1.0 → laketower-0.3.0}/demo/laketower.yml +0 -0
  32. {laketower-0.1.0 → laketower-0.3.0}/demo/sample_table/_delta_log/00000000000000000000.json +0 -0
  33. {laketower-0.1.0 → laketower-0.3.0}/demo/sample_table/_delta_log/00000000000000000001.json +0 -0
  34. {laketower-0.1.0 → laketower-0.3.0}/demo/sample_table/_delta_log/00000000000000000002.json +0 -0
  35. {laketower-0.1.0 → laketower-0.3.0}/demo/sample_table/_delta_log/00000000000000000003.json +0 -0
  36. {laketower-0.1.0 → laketower-0.3.0}/demo/sample_table/part-00001-1a31a393-6db6-4d1a-bf4e-81ea061ff8cd-c000.snappy.parquet +0 -0
  37. {laketower-0.1.0 → laketower-0.3.0}/demo/sample_table/part-00001-5af77102-9207-4c89-aaf6-37e1f815ec26-c000.snappy.parquet +0 -0
  38. {laketower-0.1.0 → laketower-0.3.0}/demo/sample_table/part-00001-b11bab55-43d0-4d05-ae88-5b9481ae57db-c000.snappy.parquet +0 -0
  39. {laketower-0.1.0 → laketower-0.3.0}/demo/weather/_delta_log/00000000000000000000.json +0 -0
  40. {laketower-0.1.0 → laketower-0.3.0}/demo/weather/_delta_log/00000000000000000001.json +0 -0
  41. {laketower-0.1.0 → laketower-0.3.0}/demo/weather/_delta_log/00000000000000000002.json +0 -0
  42. {laketower-0.1.0 → laketower-0.3.0}/demo/weather/part-00001-2323b963-be56-44e0-8c10-e237e7e6d4b9-c000.snappy.parquet +0 -0
  43. {laketower-0.1.0 → laketower-0.3.0}/demo/weather/part-00001-6360cbf8-f8a9-475f-8729-6f20b4ca64a9-c000.snappy.parquet +0 -0
  44. {laketower-0.1.0 → laketower-0.3.0}/laketower/__init__.py +0 -0
  45. {laketower-0.1.0 → laketower-0.3.0}/laketower/__main__.py +0 -0
  46. /laketower-0.1.0/tests/__init__.py → /laketower-0.3.0/laketower/static/.gitkeep +0 -0
  47. {laketower-0.1.0 → laketower-0.3.0}/tasks.py +0 -0
@@ -14,7 +14,7 @@ jobs:
14
14
  runs-on: ubuntu-latest
15
15
  strategy:
16
16
  matrix:
17
- python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
17
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
18
18
  steps:
19
19
  - uses: actions/checkout@v4
20
20
  - name: Install uv
@@ -151,10 +151,10 @@ jobs:
151
151
  with:
152
152
  repository-url: https://test.pypi.org/legacy/
153
153
  skip-existing: true
154
- - name: Install uv
155
- uses: astral-sh/setup-uv@v5
156
- - name: Validate package is available with uvx
157
- run: uvx --index https://test.pypi.org/simple/ --index-strategy unsafe-best-match laketower --version
154
+ # - name: Install uv
155
+ # uses: astral-sh/setup-uv@v5
156
+ # - name: Validate package is available with uvx
157
+ # run: uvx --index https://test.pypi.org/simple/ --index-strategy unsafe-best-match laketower --version
158
158
 
159
159
  pypi-publish:
160
160
  name: Upload release to PyPI
@@ -178,3 +178,41 @@ jobs:
178
178
  uses: astral-sh/setup-uv@v5
179
179
  - name: Validate package is available with uvx
180
180
  run: uvx laketower --version
181
+
182
+ release-publish:
183
+ name: Publish release as GitHub Release
184
+ runs-on: ubuntu-latest
185
+ needs: build
186
+ if: ${{ contains(github.ref, 'tags') }}
187
+ env:
188
+ TAG_NAME: "${{ github.ref_name }}"
189
+ RELEASE_NOTES_MD_FILE: "release_notes.md"
190
+ permissions:
191
+ contents: write
192
+ steps:
193
+ - uses: actions/checkout@v4
194
+ - name: Download package build artifacts
195
+ uses: actions/download-artifact@v4
196
+ with:
197
+ name: build
198
+ path: dist/
199
+ - name: Install uv
200
+ uses: astral-sh/setup-uv@v5
201
+ with:
202
+ python-version: '3.13'
203
+ enable-cache: true
204
+ cache-dependency-glob: "uv.lock"
205
+ - name: Extract changelog notes
206
+ run: |
207
+ uvx keepachangelog show ${TAG_NAME} >> "${RELEASE_NOTES_MD_FILE}"
208
+ cat "${RELEASE_NOTES_MD_FILE}" >> "${GITHUB_STEP_SUMMARY}"
209
+ - name: Create GitHub Release from changelog
210
+ env:
211
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
212
+ run: |
213
+ gh release create "${TAG_NAME}" \
214
+ --repo "${GITHUB_REPOSITORY}" \
215
+ --title "${TAG_NAME}" \
216
+ --notes-file "${RELEASE_NOTES_MD_FILE}" \
217
+ --verify-tag \
218
+ dist/*
@@ -0,0 +1,53 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.3.0] - 2025-02-27
11
+ Minor release with fixes and dropped Python 3.9 support.
12
+
13
+ ### BREAKING CHANGES
14
+ - deps: drop support for python 3.9
15
+
16
+ ### Fixed
17
+ - web: handle invalid tables sql query
18
+ - web: truncate long table names in sidebar
19
+
20
+ ## [0.2.0] - 2025-02-25
21
+ Introducing the Laketower web application!
22
+
23
+ ### Added
24
+ - `web` module
25
+ - List all registered tables
26
+ - Display table overview (metadata and schema)
27
+ - Display table history
28
+ - View a given table with simple query builder
29
+ - Query all registered tables with DuckDB SQL dialect
30
+ - CLI: add `tables view --version` argument to time-travel table version
31
+
32
+ ### Fixed
33
+ - Delta tables metadata compatibility when name and/or description is missing
34
+ - Delta tables history compatibility when created with Spark
35
+ - CLI: show default argument values in help
36
+
37
+ ## [0.1.0] - 2025-02-15
38
+ Initial release of `laketower`.
39
+
40
+ ### Added
41
+ - `cli` module
42
+ - Validate YAML configuration
43
+ - List all registered tables
44
+ - Display a given table metadata
45
+ - Display a given table schema
46
+ - Display a given table history
47
+ - View a given table with simple query builder
48
+ - Query all registered tables with DuckDB SQL dialect
49
+
50
+ [Unreleased]: https://github.com/datalpia/laketower/compare/0.3.0...HEAD
51
+ [0.3.0]: https://github.com/datalpia/laketower/compare/0.2.0...0.3.0
52
+ [0.2.0]: https://github.com/datalpia/laketower/compare/0.1.0...0.2.0
53
+ [0.1.0]: https://github.com/datalpia/laketower/releases/tag/0.1.0
@@ -1,10 +1,49 @@
1
+ Metadata-Version: 2.4
2
+ Name: laketower
3
+ Version: 0.3.0
4
+ Summary: Oversee your lakehouse
5
+ Project-URL: Repository, https://github.com/datalpia/laketower
6
+ Project-URL: Issues, https://github.com/datalpia/laketower/issues
7
+ Project-URL: Changelog, https://github.com/datalpia/laketower/blob/master/CHANGELOG.md
8
+ Author-email: Romain Clement <git@romain-clement.net>
9
+ License: AGPL-3.0-or-later
10
+ License-File: LICENSE.md
11
+ Keywords: data,delta-lake,lakehouse,sql
12
+ Classifier: Development Status :: 2 - Pre-Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: End Users/Desktop
15
+ Classifier: Intended Audience :: Information Technology
16
+ Classifier: Intended Audience :: Other Audience
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Database
22
+ Classifier: Topic :: Software Development
23
+ Classifier: Topic :: Utilities
24
+ Requires-Python: <3.14,>=3.10
25
+ Requires-Dist: deltalake
26
+ Requires-Dist: duckdb
27
+ Requires-Dist: fastapi
28
+ Requires-Dist: jinja2>=3
29
+ Requires-Dist: pandas
30
+ Requires-Dist: pyarrow!=19.0.0
31
+ Requires-Dist: pydantic-settings>=2
32
+ Requires-Dist: pydantic>=2
33
+ Requires-Dist: pyyaml
34
+ Requires-Dist: rich
35
+ Requires-Dist: sqlglot
36
+ Requires-Dist: uvicorn
37
+ Description-Content-Type: text/markdown
38
+
1
39
  # Laketower
2
40
 
3
41
  > Oversee your lakehouse
4
42
 
5
43
  [![PyPI](https://img.shields.io/pypi/v/laketower.svg)](https://pypi.org/project/laketower/)
44
+ [![Python Versions](https://img.shields.io/pypi/pyversions/laketower?logo=python&logoColor=white)](https://pypi.org/project/laketower/)
6
45
  [![CI/CD](https://github.com/datalpia/laketower/actions/workflows/ci-cd.yml/badge.svg)](https://github.com/datalpia/laketower/actions/workflows/ci-cd.yml)
7
- [![License](https://img.shields.io/github/license/datalpia/laketower)](https://github.com/datalpia/laketower/blob/main/LICENSE)
46
+ [![License](https://img.shields.io/github/license/datalpia/laketower)](https://github.com/datalpia/laketower/blob/main/LICENSE.md)
8
47
 
9
48
  Utility application to explore and manage tables in your data lakehouse, especially tailored for data pipelines local development.
10
49
 
@@ -17,6 +56,7 @@ Utility application to explore and manage tables in your data lakehouse, especia
17
56
  - View table content with a simple query builder
18
57
  - Query all registered tables with DuckDB SQL dialect
19
58
  - Static and versionable YAML configuration
59
+ - Web application
20
60
  - CLI application
21
61
 
22
62
  ## Installation
@@ -67,21 +107,30 @@ tables:
67
107
  format: delta
68
108
  ```
69
109
 
110
+ ### Web Application
111
+
112
+ The easiest way to get started is to launch the Laketower web application:
113
+
114
+ ```bash
115
+ $ laketower -c demo/laketower.yml web
116
+ ```
117
+
70
118
  ### CLI
71
119
 
72
120
  Laketower provides a CLI interface:
73
121
 
74
122
  ```bash
75
123
  $ laketower --help
76
- usage: laketower [-h] [--version] [--config CONFIG] {config,tables} ...
124
+ usage: laketower [-h] [--version] [--config CONFIG] {web,config,tables} ...
77
125
 
78
126
  options:
79
127
  -h, --help show this help message and exit
80
128
  --version show program's version number and exit
81
- --config, -c CONFIG Path to the Laketower YAML configuration file
129
+ --config, -c CONFIG Path to the Laketower YAML configuration file (default: laketower.yml)
82
130
 
83
131
  commands:
84
- {config,tables}
132
+ {web,config,tables}
133
+ web Launch the web application
85
134
  config Work with configuration
86
135
  tables Work with tables
87
136
  ```
@@ -205,6 +254,7 @@ Optional arguments:
205
254
  - `--sort-asc <col>`: sort by a column name in ascending order
206
255
  - `--sort-desc <col>`: sort by a column name in descending order
207
256
  - `--limit <num>` (default 10): limit the number of rows
257
+ - `--version`: time-travel to table revision number
208
258
 
209
259
  ```bash
210
260
  $ laketower -c demo/laketower.yml tables view weather
@@ -239,6 +289,26 @@ $ laketower -c demo/laketower.yml tables view weather --cols time city temperatu
239
289
  └───────────────────────────┴──────────┴───────────────────┘
240
290
  ```
241
291
 
292
+ ```bash
293
+ $ laketower -c demo/laketower.yml tables view weather --version 1
294
+
295
+ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓
296
+ ┃ time ┃ city ┃ temperature_2m ┃ relative_humidity_2m ┃ wind_speed_10m ┃
297
+ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩
298
+ │ 2025-01-26 01:00:00+01:00 │ Grenoble │ 7.0 │ 87.0 │ 8.899999618530273 │
299
+ │ 2025-01-26 02:00:00+01:00 │ Grenoble │ 6.099999904632568 │ 87.0 │ 6.199999809265137 │
300
+ │ 2025-01-26 03:00:00+01:00 │ Grenoble │ 6.0 │ 86.0 │ 2.700000047683716 │
301
+ │ 2025-01-26 04:00:00+01:00 │ Grenoble │ 6.099999904632568 │ 82.0 │ 3.0999999046325684 │
302
+ │ 2025-01-26 05:00:00+01:00 │ Grenoble │ 5.5 │ 87.0 │ 3.299999952316284 │
303
+ │ 2025-01-26 06:00:00+01:00 │ Grenoble │ 5.199999809265137 │ 91.0 │ 2.200000047683716 │
304
+ │ 2025-01-26 07:00:00+01:00 │ Grenoble │ 4.800000190734863 │ 86.0 │ 3.0 │
305
+ │ 2025-01-26 08:00:00+01:00 │ Grenoble │ 4.900000095367432 │ 83.0 │ 1.100000023841858 │
306
+ │ 2025-01-26 09:00:00+01:00 │ Grenoble │ 4.0 │ 92.0 │ 3.0999999046325684 │
307
+ │ 2025-01-26 10:00:00+01:00 │ Grenoble │ 5.0 │ 86.0 │ 6.400000095367432 │
308
+ └───────────────────────────┴──────────┴───────────────────┴──────────────────────┴────────────────────┘
309
+ ```
310
+
311
+
242
312
  #### Query all registered tables
243
313
 
244
314
  Query any registered tables using DuckDB SQL dialect!
@@ -1,33 +1,11 @@
1
- Metadata-Version: 2.4
2
- Name: laketower
3
- Version: 0.1.0
4
- Summary: Oversee your lakehouse
5
- Author-email: Romain Clement <git@romain-clement.net>
6
- License: AGPL-3.0-or-later
7
- License-File: LICENSE.md
8
- Classifier: Development Status :: 2 - Pre-Alpha
9
- Classifier: Intended Audience :: Developers
10
- Classifier: Intended Audience :: Information Technology
11
- Classifier: Topic :: Software Development
12
- Classifier: Topic :: Utilities
13
- Requires-Python: <3.14,>=3.9
14
- Requires-Dist: deltalake
15
- Requires-Dist: duckdb
16
- Requires-Dist: pandas
17
- Requires-Dist: pyarrow<19
18
- Requires-Dist: pydantic
19
- Requires-Dist: pyyaml
20
- Requires-Dist: rich
21
- Requires-Dist: sqlglot
22
- Description-Content-Type: text/markdown
23
-
24
1
  # Laketower
25
2
 
26
3
  > Oversee your lakehouse
27
4
 
28
5
  [![PyPI](https://img.shields.io/pypi/v/laketower.svg)](https://pypi.org/project/laketower/)
6
+ [![Python Versions](https://img.shields.io/pypi/pyversions/laketower?logo=python&logoColor=white)](https://pypi.org/project/laketower/)
29
7
  [![CI/CD](https://github.com/datalpia/laketower/actions/workflows/ci-cd.yml/badge.svg)](https://github.com/datalpia/laketower/actions/workflows/ci-cd.yml)
30
- [![License](https://img.shields.io/github/license/datalpia/laketower)](https://github.com/datalpia/laketower/blob/main/LICENSE)
8
+ [![License](https://img.shields.io/github/license/datalpia/laketower)](https://github.com/datalpia/laketower/blob/main/LICENSE.md)
31
9
 
32
10
  Utility application to explore and manage tables in your data lakehouse, especially tailored for data pipelines local development.
33
11
 
@@ -40,6 +18,7 @@ Utility application to explore and manage tables in your data lakehouse, especia
40
18
  - View table content with a simple query builder
41
19
  - Query all registered tables with DuckDB SQL dialect
42
20
  - Static and versionable YAML configuration
21
+ - Web application
43
22
  - CLI application
44
23
 
45
24
  ## Installation
@@ -90,21 +69,30 @@ tables:
90
69
  format: delta
91
70
  ```
92
71
 
72
+ ### Web Application
73
+
74
+ The easiest way to get started is to launch the Laketower web application:
75
+
76
+ ```bash
77
+ $ laketower -c demo/laketower.yml web
78
+ ```
79
+
93
80
  ### CLI
94
81
 
95
82
  Laketower provides a CLI interface:
96
83
 
97
84
  ```bash
98
85
  $ laketower --help
99
- usage: laketower [-h] [--version] [--config CONFIG] {config,tables} ...
86
+ usage: laketower [-h] [--version] [--config CONFIG] {web,config,tables} ...
100
87
 
101
88
  options:
102
89
  -h, --help show this help message and exit
103
90
  --version show program's version number and exit
104
- --config, -c CONFIG Path to the Laketower YAML configuration file
91
+ --config, -c CONFIG Path to the Laketower YAML configuration file (default: laketower.yml)
105
92
 
106
93
  commands:
107
- {config,tables}
94
+ {web,config,tables}
95
+ web Launch the web application
108
96
  config Work with configuration
109
97
  tables Work with tables
110
98
  ```
@@ -228,6 +216,7 @@ Optional arguments:
228
216
  - `--sort-asc <col>`: sort by a column name in ascending order
229
217
  - `--sort-desc <col>`: sort by a column name in descending order
230
218
  - `--limit <num>` (default 10): limit the number of rows
219
+ - `--version`: time-travel to table revision number
231
220
 
232
221
  ```bash
233
222
  $ laketower -c demo/laketower.yml tables view weather
@@ -262,6 +251,26 @@ $ laketower -c demo/laketower.yml tables view weather --cols time city temperatu
262
251
  └───────────────────────────┴──────────┴───────────────────┘
263
252
  ```
264
253
 
254
+ ```bash
255
+ $ laketower -c demo/laketower.yml tables view weather --version 1
256
+
257
+ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓
258
+ ┃ time ┃ city ┃ temperature_2m ┃ relative_humidity_2m ┃ wind_speed_10m ┃
259
+ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩
260
+ │ 2025-01-26 01:00:00+01:00 │ Grenoble │ 7.0 │ 87.0 │ 8.899999618530273 │
261
+ │ 2025-01-26 02:00:00+01:00 │ Grenoble │ 6.099999904632568 │ 87.0 │ 6.199999809265137 │
262
+ │ 2025-01-26 03:00:00+01:00 │ Grenoble │ 6.0 │ 86.0 │ 2.700000047683716 │
263
+ │ 2025-01-26 04:00:00+01:00 │ Grenoble │ 6.099999904632568 │ 82.0 │ 3.0999999046325684 │
264
+ │ 2025-01-26 05:00:00+01:00 │ Grenoble │ 5.5 │ 87.0 │ 3.299999952316284 │
265
+ │ 2025-01-26 06:00:00+01:00 │ Grenoble │ 5.199999809265137 │ 91.0 │ 2.200000047683716 │
266
+ │ 2025-01-26 07:00:00+01:00 │ Grenoble │ 4.800000190734863 │ 86.0 │ 3.0 │
267
+ │ 2025-01-26 08:00:00+01:00 │ Grenoble │ 4.900000095367432 │ 83.0 │ 1.100000023841858 │
268
+ │ 2025-01-26 09:00:00+01:00 │ Grenoble │ 4.0 │ 92.0 │ 3.0999999046325684 │
269
+ │ 2025-01-26 10:00:00+01:00 │ Grenoble │ 5.0 │ 86.0 │ 6.400000095367432 │
270
+ └───────────────────────────┴──────────┴───────────────────┴──────────────────────┴────────────────────┘
271
+ ```
272
+
273
+
265
274
  #### Query all registered tables
266
275
 
267
276
  Query any registered tables using DuckDB SQL dialect!
@@ -0,0 +1 @@
1
+ __version__ = "0.3.0"
@@ -1,178 +1,22 @@
1
- from __future__ import annotations
2
-
3
1
  import argparse
4
- import enum
5
- from datetime import datetime, timezone
2
+ import os
6
3
  from pathlib import Path
7
- from typing import Any
8
4
 
9
- import deltalake
10
- import duckdb
11
- import pandas as pd
12
- import pyarrow as pa
13
- import pydantic
5
+ import rich.jupyter
14
6
  import rich.panel
15
7
  import rich.table
16
8
  import rich.text
17
9
  import rich.tree
18
- import sqlglot
19
- import sqlglot.dialects
20
- import sqlglot.dialects.duckdb
21
- import sqlglot.generator
22
- import yaml
10
+ import uvicorn
23
11
 
24
12
  from laketower.__about__ import __version__
13
+ from laketower.config import load_yaml_config
14
+ from laketower.tables import execute_query, generate_table_query, load_table
25
15
 
26
16
 
27
- class TableFormats(str, enum.Enum):
28
- delta = "delta"
29
-
30
-
31
- class ConfigTable(pydantic.BaseModel):
32
- name: str
33
- uri: str
34
- table_format: TableFormats = pydantic.Field(alias="format")
35
-
36
- @pydantic.model_validator(mode="after")
37
- def check_table(self) -> "ConfigTable":
38
- def check_delta_table(table_uri: str) -> None:
39
- if not deltalake.DeltaTable.is_deltatable(table_uri):
40
- raise ValueError(f"{table_uri} is not a valid Delta table")
41
-
42
- format_check = {TableFormats.delta: check_delta_table}
43
- format_check[self.table_format](self.uri)
44
-
45
- return self
46
-
47
-
48
- class ConfigQuery(pydantic.BaseModel):
49
- name: str
50
- sql: str
51
-
52
-
53
- class ConfigDashboard(pydantic.BaseModel):
54
- name: str
55
-
56
-
57
- class Config(pydantic.BaseModel):
58
- tables: list[ConfigTable] = []
59
-
60
-
61
- def load_yaml_config(config_path: Path) -> Config:
62
- config_dict = yaml.safe_load(config_path.read_text())
63
- return Config.model_validate(config_dict)
64
-
65
-
66
- class TableMetadata(pydantic.BaseModel):
67
- table_format: TableFormats
68
- name: str
69
- description: str
70
- uri: str
71
- id: str
72
- version: int
73
- created_at: datetime
74
- partitions: list[str]
75
- configuration: dict[str, str]
76
-
77
-
78
- class TableRevision(pydantic.BaseModel):
79
- version: int
80
- timestamp: datetime
81
- client_version: str
82
- operation: str
83
- operation_parameters: dict[str, Any]
84
- operation_metrics: dict[str, Any]
85
-
86
-
87
- class TableHistory(pydantic.BaseModel):
88
- revisions: list[TableRevision]
89
-
90
-
91
- def load_table_metadata(table_config: ConfigTable) -> TableMetadata:
92
- def load_delta_table_metadata(table_config: ConfigTable) -> TableMetadata:
93
- delta_table = deltalake.DeltaTable(table_config.uri)
94
- metadata = delta_table.metadata()
95
- return TableMetadata(
96
- table_format=table_config.table_format,
97
- name=metadata.name,
98
- description=metadata.description,
99
- uri=delta_table.table_uri,
100
- id=str(metadata.id),
101
- version=delta_table.version(),
102
- created_at=datetime.fromtimestamp(
103
- metadata.created_time / 1000, tz=timezone.utc
104
- ),
105
- partitions=metadata.partition_columns,
106
- configuration=metadata.configuration,
107
- )
108
-
109
- format_handler = {TableFormats.delta: load_delta_table_metadata}
110
- return format_handler[table_config.table_format](table_config)
111
-
112
-
113
- def load_table_schema(table_config: ConfigTable) -> pa.Schema:
114
- def load_delta_table_schema(table_config: ConfigTable) -> pa.Schema:
115
- delta_table = deltalake.DeltaTable(table_config.uri)
116
- return delta_table.schema().to_pyarrow()
117
-
118
- format_handler = {TableFormats.delta: load_delta_table_schema}
119
- return format_handler[table_config.table_format](table_config)
120
-
121
-
122
- def load_table_history(table_config: ConfigTable) -> TableHistory:
123
- def load_delta_table_history(table_config: ConfigTable) -> TableHistory:
124
- delta_table = deltalake.DeltaTable(table_config.uri)
125
- delta_history = delta_table.history()
126
- revisions = [
127
- TableRevision(
128
- version=event["version"],
129
- timestamp=datetime.fromtimestamp(
130
- event["timestamp"] / 1000, tz=timezone.utc
131
- ),
132
- client_version=event["clientVersion"],
133
- operation=event["operation"],
134
- operation_parameters=event["operationParameters"],
135
- operation_metrics=event.get("operationMetrics") or {},
136
- )
137
- for event in delta_history
138
- ]
139
- return TableHistory(revisions=revisions)
140
-
141
- format_handler = {TableFormats.delta: load_delta_table_history}
142
- return format_handler[table_config.table_format](table_config)
143
-
144
-
145
- def load_table_dataset(table_config: ConfigTable) -> pa.dataset.Dataset:
146
- def load_delta_table_metadata(table_config: ConfigTable) -> pa.dataset.Dataset:
147
- delta_table = deltalake.DeltaTable(table_config.uri)
148
- return delta_table.to_pyarrow_dataset()
149
-
150
- format_handler = {TableFormats.delta: load_delta_table_metadata}
151
- return format_handler[table_config.table_format](table_config)
152
-
153
-
154
- def execute_query_table(table_config: ConfigTable, sql_query: str) -> pd.DataFrame:
155
- table_dataset = load_table_dataset(table_config)
156
- table_name = table_config.name
157
- view_name = f"{table_name}_view"
158
- conn = duckdb.connect()
159
- conn.register(view_name, table_dataset)
160
- conn.execute(f"create table {table_name} as select * from {view_name}") # nosec B608
161
- return conn.execute(sql_query).df()
162
-
163
-
164
- def execute_query(tables_config: list[ConfigTable], sql_query: str) -> pd.DataFrame:
165
- try:
166
- conn = duckdb.connect()
167
- for table_config in tables_config:
168
- table_dataset = load_table_dataset(table_config)
169
- table_name = table_config.name
170
- view_name = f"{table_name}_view"
171
- conn.register(view_name, table_dataset)
172
- conn.execute(f"create table {table_name} as select * from {view_name}") # nosec B608
173
- return conn.execute(sql_query).df()
174
- except duckdb.Error as e:
175
- raise ValueError(str(e)) from e
17
+ def run_web(config_path: Path, reload: bool) -> None: # pragma: no cover
18
+ os.environ["LAKETOWER_CONFIG_PATH"] = str(config_path.absolute())
19
+ uvicorn.run("laketower.web:create_app", factory=True, reload=reload)
176
20
 
177
21
 
178
22
  def validate_config(config_path: Path) -> None:
@@ -200,7 +44,8 @@ def list_tables(config_path: Path) -> None:
200
44
  def table_metadata(config_path: Path, table_name: str) -> None:
201
45
  config = load_yaml_config(config_path)
202
46
  table_config = next(filter(lambda x: x.name == table_name, config.tables))
203
- metadata = load_table_metadata(table_config)
47
+ table = load_table(table_config)
48
+ metadata = table.metadata()
204
49
 
205
50
  tree = rich.tree.Tree(table_name)
206
51
  tree.add(f"name: {metadata.name}")
@@ -219,7 +64,8 @@ def table_metadata(config_path: Path, table_name: str) -> None:
219
64
  def table_schema(config_path: Path, table_name: str) -> None:
220
65
  config = load_yaml_config(config_path)
221
66
  table_config = next(filter(lambda x: x.name == table_name, config.tables))
222
- schema = load_table_schema(table_config)
67
+ table = load_table(table_config)
68
+ schema = table.schema()
223
69
 
224
70
  tree = rich.tree.Tree(table_name)
225
71
  for field in schema:
@@ -232,7 +78,8 @@ def table_schema(config_path: Path, table_name: str) -> None:
232
78
  def table_history(config_path: Path, table_name: str) -> None:
233
79
  config = load_yaml_config(config_path)
234
80
  table_config = next(filter(lambda x: x.name == table_name, config.tables))
235
- history = load_table_history(table_config)
81
+ table = load_table(table_config)
82
+ history = table.history()
236
83
 
237
84
  tree = rich.tree.Tree(table_name)
238
85
  for rev in history.revisions:
@@ -257,22 +104,21 @@ def view_table(
257
104
  cols: list[str] | None = None,
258
105
  sort_asc: str | None = None,
259
106
  sort_desc: str | None = None,
107
+ version: int | None = None,
260
108
  ) -> None:
261
109
  config = load_yaml_config(config_path)
262
110
  table_config = next(filter(lambda x: x.name == table_name, config.tables))
111
+ table = load_table(table_config)
112
+ table_dataset = table.dataset(version=version)
113
+ sql_query = generate_table_query(
114
+ table_name, limit=limit, cols=cols, sort_asc=sort_asc, sort_desc=sort_desc
115
+ )
116
+ results = execute_query({table_name: table_dataset}, sql_query)
263
117
 
264
- query_expr = sqlglot.select(*(cols or ["*"])).from_(table_name).limit(limit or 10)
265
- if sort_asc:
266
- query_expr = query_expr.order_by(f"{sort_asc} asc")
267
- elif sort_desc:
268
- query_expr = query_expr.order_by(f"{sort_desc} desc")
269
- sql_query = sqlglot.Generator(dialect=sqlglot.dialects.DuckDB).generate(query_expr)
270
-
271
- results = execute_query_table(table_config, sql_query)
272
118
  out = rich.table.Table()
273
119
  for column in results.columns:
274
120
  out.add_column(column)
275
- for value_list in results.values.tolist():
121
+ for value_list in results.to_numpy().tolist():
276
122
  row = [str(x) for x in value_list]
277
123
  out.add_row(*row)
278
124
 
@@ -282,10 +128,14 @@ def view_table(
282
128
 
283
129
  def query_table(config_path: Path, sql_query: str) -> None:
284
130
  config = load_yaml_config(config_path)
131
+ tables_dataset = {
132
+ table_config.name: load_table(table_config).dataset()
133
+ for table_config in config.tables
134
+ }
285
135
 
286
136
  out: rich.jupyter.JupyterMixin
287
137
  try:
288
- results = execute_query(config.tables, sql_query)
138
+ results = execute_query(tables_dataset, sql_query)
289
139
  out = rich.table.Table()
290
140
  for column in results.columns:
291
141
  out.add_column(column)
@@ -300,7 +150,9 @@ def query_table(config_path: Path, sql_query: str) -> None:
300
150
 
301
151
 
302
152
  def cli() -> None:
303
- parser = argparse.ArgumentParser("laketower")
153
+ parser = argparse.ArgumentParser(
154
+ "laketower", formatter_class=argparse.ArgumentDefaultsHelpFormatter
155
+ )
304
156
  parser.add_argument("--version", action="version", version=__version__)
305
157
  parser.add_argument(
306
158
  "--config",
@@ -311,6 +163,17 @@ def cli() -> None:
311
163
  )
312
164
  subparsers = parser.add_subparsers(title="commands", required=True)
313
165
 
166
+ parser_web = subparsers.add_parser(
167
+ "web", help="Launch the web application", add_help=True
168
+ )
169
+ parser_web.add_argument(
170
+ "--reload",
171
+ help="Reload the web server on changes",
172
+ action="store_true",
173
+ required=False,
174
+ )
175
+ parser_web.set_defaults(func=lambda x: run_web(x.config, x.reload))
176
+
314
177
  parser_config = subparsers.add_parser(
315
178
  "config", help="Work with configuration", add_help=True
316
179
  )
@@ -364,9 +227,12 @@ def cli() -> None:
364
227
  parser_tables_view_sort_group.add_argument(
365
228
  "--sort-desc", help="Sort by given column in descending order"
366
229
  )
230
+ parser_tables_view.add_argument(
231
+ "--version", type=int, help="Time-travel to table revision number"
232
+ )
367
233
  parser_tables_view.set_defaults(
368
234
  func=lambda x: view_table(
369
- x.config, x.table, x.limit, x.cols, x.sort_asc, x.sort_desc
235
+ x.config, x.table, x.limit, x.cols, x.sort_asc, x.sort_desc, x.version
370
236
  )
371
237
  )
372
238