laktory 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ name: release
2
+
3
+ on:
4
+ pull_request:
5
+ types:
6
+ - closed
7
+ branches:
8
+ - main
9
+
10
+ jobs:
11
+ release:
12
+ if: ${{ github.event.pull_request.merged == true && contains(github.event.pull_request.labels.*.name, 'release') }}
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - name: Checkout
16
+ uses: actions/checkout@v3
17
+
18
+ - uses: actions/setup-python@v4
19
+ with:
20
+ python-version: "3.10"
21
+
22
+ - name: Install flit
23
+ run: pip install flit
24
+
25
+ - name: Build and publish laktory to pypi
26
+ run: make publish
27
+ env:
28
+ FLIT_INDEX_URL: ${{ vars.FLIT_INDEX_URL }}
29
+ FLIT_USERNAME: ${{ vars.FLIT_USERNAME }}
30
+ FLIT_PASSWORD: ${{ secrets.FLIT_PASSWORD }}
@@ -0,0 +1,40 @@
1
+ name: test
2
+
3
+ on:
4
+ pull_request:
5
+ types:
6
+ - opened
7
+ - synchronize
8
+ push:
9
+ branches:
10
+ - main
11
+
12
+ jobs:
13
+ tests:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ pyVersion:
19
+ - '3.9'
20
+ - '3.10'
21
+ - '3.11'
22
+ steps:
23
+ - name: Checkout
24
+ uses: actions/checkout@v3
25
+
26
+ # - name: Unshallow
27
+ # run: git fetch --prune --unshallow
28
+
29
+ - uses: actions/setup-python@v4
30
+ with:
31
+ python-version: ${{ matrix.pyVersion }}
32
+
33
+ - name: Install flit
34
+ run: pip install flit
35
+
36
+ - name: Run tests
37
+ run: make dev install test
38
+
39
+ # - name: Publish test coverage
40
+ # uses: codecov/codecov-action@v1
@@ -0,0 +1,45 @@
1
+ # --------------------------------------------------------------------------- #
2
+ # Compiled files #
3
+ # --------------------------------------------------------------------------- #
4
+
5
+ # Compiled files
6
+ **pytest_cache**
7
+ **/**.pyc
8
+
9
+ # --------------------------------------------------------------------------- #
10
+ # Environment and configurations #
11
+ # --------------------------------------------------------------------------- #
12
+
13
+ # PyCharm configuration
14
+ **/.idea/**
15
+
16
+ # Local settings and environment variables
17
+ #settings_local/*.*
18
+
19
+ # --------------------------------------------------------------------------- #
20
+ # Installation #
21
+ # --------------------------------------------------------------------------- #
22
+
23
+ #**/*.egg-info/**
24
+ dist
25
+ #**/dist/**
26
+
27
+ # --------------------------------------------------------------------------- #
28
+ # Testing and coverage #
29
+ # --------------------------------------------------------------------------- #
30
+
31
+ # Coverage
32
+ .coverage
33
+ coverage.xml
34
+ htmlcov
35
+
36
+ # Test results
37
+ junit
38
+
39
+ # Tox files - Global
40
+ #.tox_pip_cache_sdist
41
+ #.tox_pip_cache_whl
42
+
43
+ # Tox files - Package-specific
44
+ #**/.tox/**
45
+
@@ -0,0 +1,5 @@
1
+ # Release History
2
+
3
+ ## [0.0.1] - 2023-07-13
4
+ **Added**
5
+ - Initial pypi release.
laktory-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 opencubes
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
laktory-0.0.1/Makefile ADDED
@@ -0,0 +1,17 @@
1
+ install:
2
+ flit install
3
+
4
+ dev:
5
+ flit install -s
6
+
7
+ test:
8
+ pytest --junitxml=junit/test-results.xml --cov=laktory --cov-report=xml --cov-report=html tests
9
+
10
+ coverage:
11
+ open htmlcov/index.html
12
+
13
+ build:
14
+ flit build
15
+
16
+ publish:
17
+ flit publish
laktory-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.1
2
+ Name: laktory
3
+ Version: 0.0.1
4
+ Summary: A DataOps framework for building a lakehouse
5
+ Keywords: one,two
6
+ Author-email: Olivier Soucy <osoucy.transactions@gmail.com>
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Dist: pydantic>=2
13
+ Requires-Dist: pyyaml
14
+ Requires-Dist: pyspark
15
+ Requires-Dist: black ; extra == "dev"
16
+ Requires-Dist: flit ; extra == "dev"
17
+ Requires-Dist: pytest ; extra == "test"
18
+ Requires-Dist: pytest-cov ; extra == "test"
19
+ Project-URL: Bug Tracker, https://github.com/opencubes-ai/laktory/issues
20
+ Project-URL: Homepage, https://github.com/opencubes-ai/laktory
21
+ Provides-Extra: dev
22
+ Provides-Extra: test
23
+
24
+ # laktory
25
+ A DataOps framework for building a lakehouse.
26
+
@@ -0,0 +1,2 @@
1
+ # laktory
2
+ A DataOps framework for building a lakehouse.
@@ -0,0 +1,3 @@
1
+ from ._version import VERSION
2
+
3
+ __version__ = VERSION
@@ -0,0 +1 @@
1
+ VERSION = "0.0.1"
@@ -0,0 +1,17 @@
1
+ import inspect
2
+ from pyspark.sql import types
3
+
4
+
5
+ SUPPORTED_TYPES = {}
6
+ for k in vars(types):
7
+ o = getattr(types, k)
8
+ if "Type" in k and inspect.isclass(o):
9
+ try:
10
+ typ = o()
11
+ except TypeError:
12
+ continue
13
+
14
+ if not hasattr(typ, "simpleString"):
15
+ continue
16
+
17
+ SUPPORTED_TYPES[typ.simpleString()] = typ
@@ -0,0 +1,4 @@
1
+ from .catalog import Catalog
2
+ from .column import Column
3
+ from .database import Database
4
+ from .table import Table
@@ -0,0 +1,17 @@
1
+ import yaml
2
+ import json
3
+ from pydantic import BaseModel as _BaseModel
4
+
5
+
6
+ class BaseModel(_BaseModel):
7
+ pass
8
+
9
+ @classmethod
10
+ def model_validate_yaml(cls, fp):
11
+ data = yaml.safe_load(fp)
12
+ return cls.model_validate(data)
13
+
14
+ @classmethod
15
+ def model_validate_json_file(cls, fp):
16
+ data = json.load(fp)
17
+ return cls.model_validate(data)
@@ -0,0 +1,11 @@
1
+ from pydantic import computed_field
2
+
3
+ from laktory.models.base import BaseModel
4
+ from laktory.models.database import Database
5
+
6
+
7
+ class Catalog(BaseModel):
8
+ name: str
9
+ comment: str = None
10
+ databases: list[Database] = []
11
+ is_unity: bool = True
@@ -0,0 +1,54 @@
1
+ from typing import Literal
2
+ from pydantic import computed_field
3
+
4
+ from laktory.contants import SUPPORTED_TYPES
5
+ from laktory.models.base import BaseModel
6
+
7
+
8
+ class Column(BaseModel):
9
+ name: str
10
+ type: Literal[tuple(SUPPORTED_TYPES.keys())] = "string"
11
+ comment: str = None
12
+ unit: str = None
13
+ pii: bool = None
14
+ func_name: str = None
15
+ input_cols: list[str] = []
16
+ func_kwargs: dict = {}
17
+ parent_id: str = None
18
+
19
+ @computed_field
20
+ @property
21
+ def table_name(self) -> str:
22
+ if self.parent_id is None:
23
+ return None
24
+ return self.parent_id.split(".")[-1]
25
+
26
+ @computed_field
27
+ @property
28
+ def schema_name(self) -> str:
29
+ if self.parent_id is None or len(self.parent_id.split(".")) < 2:
30
+ return None
31
+ return self.parent_id.split(".")[-2]
32
+
33
+ @computed_field
34
+ @property
35
+ def database_name(self) -> str:
36
+ return self.schema_name
37
+
38
+ @computed_field
39
+ @property
40
+ def catalog_name(self) -> str:
41
+ if self.parent_id is None or len(self.parent_id.split(".")) < 3:
42
+ return None
43
+ return self.parent_id.split(".")[-3]
44
+
45
+
46
+ if __name__ == "__main__":
47
+ speed = Column(
48
+ name="airspeed",
49
+ type="double",
50
+ unit="kt",
51
+ parent_id="lakehouse.flights.f012",
52
+ )
53
+
54
+ print(speed)
@@ -0,0 +1,18 @@
1
+ from pydantic import computed_field
2
+
3
+ from laktory.models.base import BaseModel
4
+ from laktory.models.table import Table
5
+
6
+
7
+ class Database(BaseModel):
8
+ name: str
9
+ comment: str = None
10
+ tables: list[Table] = []
11
+ parent_id: str = None
12
+
13
+ @computed_field
14
+ @property
15
+ def catalog_name(self) -> str:
16
+ if self.parent_id is None or len(self.parent_id.split(".")) < 1:
17
+ return None
18
+ return self.parent_id.split(".")[-1]
@@ -0,0 +1,30 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import Field
4
+ from pydantic import ConfigDict
5
+
6
+ from laktory.contants import SUPPORTED_TYPES
7
+ from laktory.models.base import BaseModel
8
+
9
+
10
+ class Column(BaseModel):
11
+ name: str
12
+ type: Literal[tuple(SUPPORTED_TYPES.keys())] = "string"
13
+ comment: str = None
14
+ unit: str = None
15
+ pii: bool = None
16
+ udf_name: str = None
17
+ input_cols: list[str] = []
18
+ udf_kwargs: dict = {}
19
+ test: int = Field(...) # Set as required field, but put at end of list of fields
20
+
21
+ model_config = ConfigDict(extra="forbid")
22
+
23
+
24
+ if __name__ == "__main__":
25
+ speed = Column(
26
+ name="airspeed",
27
+ type="double",
28
+ unit="kt",
29
+ test=2,
30
+ )
@@ -0,0 +1,58 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import computed_field
4
+
5
+ from laktory.models.base import BaseModel
6
+ from laktory.models.column import Column
7
+
8
+
9
+ class Table(BaseModel):
10
+ name: str
11
+ columns: list[Column] = []
12
+ primary_key: str = None
13
+ comment: str = None
14
+ parent_id: str = None
15
+
16
+ # Lakehouse
17
+ # event_name: str = None
18
+ # pipeline_name: str = None
19
+ zone: Literal["BRONZE", "SILVER", "SILVER_STAR", "GOLD"] = None
20
+ # joins
21
+ # expectations
22
+
23
+ @computed_field
24
+ @property
25
+ def database_name(self) -> str:
26
+ if self.parent_id is None or len(self.parent_id.split(".")) < 1:
27
+ return None
28
+ return self.parent_id.split(".")[-1]
29
+
30
+ @computed_field
31
+ @property
32
+ def schema_name(self) -> str:
33
+ return self.database_name
34
+
35
+ @computed_field
36
+ @property
37
+ def catalog_name(self) -> str:
38
+ if self.parent_id is None or len(self.parent_id.split(".")) < 2:
39
+ return None
40
+ return self.parent_id.split(".")[-2]
41
+
42
+
43
+ if __name__ == "__main__":
44
+ table = Table(
45
+ name="f1549",
46
+ columns=[
47
+ {
48
+ "name": "airspeed",
49
+ "type": "double",
50
+ },
51
+ {
52
+ "name": "altitude",
53
+ "type": "double",
54
+ },
55
+ ],
56
+ )
57
+
58
+ print(table)
@@ -0,0 +1,39 @@
1
+ [build-system]
2
+ requires = ["flit_core >=3.2,<4"]
3
+ build-backend = "flit_core.buildapi"
4
+
5
+ [project]
6
+ name = "laktory"
7
+ authors = [
8
+ {name = "Olivier Soucy", email = "osoucy.transactions@gmail.com"},
9
+ ]
10
+ description = "A DataOps framework for building a lakehouse"
11
+ readme = "README.md"
12
+ requires-python = ">=3.9"
13
+ keywords = ["one", "two"]
14
+ license = {text = "MIT"}
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+ dynamic = ["version"]
21
+ dependencies = [
22
+ "pydantic>=2",
23
+ "pyyaml",
24
+ "pyspark",
25
+ ]
26
+ [project.optional-dependencies]
27
+ dev = [
28
+ "black",
29
+ "flit",
30
+ ]
31
+ test = [
32
+ "pytest",
33
+ "pytest-cov",
34
+ # "tox"
35
+ ]
36
+
37
+ [project.urls]
38
+ "Homepage" = "https://github.com/opencubes-ai/laktory"
39
+ "Bug Tracker" = "https://github.com/opencubes-ai/laktory/issues"
File without changes
@@ -0,0 +1,6 @@
1
+ {
2
+ "name": "airspeed",
3
+ "type": "double",
4
+ "unit": "kt",
5
+ "parent_id": "lakehouse.flights.f1549"
6
+ }
@@ -0,0 +1,4 @@
1
+ name: "airspeed"
2
+ type: "double"
3
+ unit: "kt"
4
+ parent_id: "lakehouse.flights.f1549"
@@ -0,0 +1,13 @@
1
+ from laktory.models import Catalog
2
+
3
+
4
+ def test_model():
5
+ cat = Catalog(
6
+ name="lakehouse",
7
+ )
8
+
9
+ assert cat.name == "lakehouse"
10
+
11
+
12
+ if __name__ == "__main__":
13
+ test_model()
@@ -0,0 +1,42 @@
1
+ import os
2
+
3
+ from laktory.models import Column
4
+
5
+ AIRSPEED = {
6
+ "name": "airspeed",
7
+ "type": "double",
8
+ "unit": "kt",
9
+ "parent_id": "lakehouse.flights.f1549",
10
+ }
11
+
12
+ root_dir = os.path.dirname(__file__)
13
+
14
+
15
+ def test_model():
16
+ c0 = Column(**AIRSPEED)
17
+ c1 = Column.model_validate(AIRSPEED)
18
+ assert c1.type == "double"
19
+ assert c1.catalog_name == "lakehouse"
20
+ assert c1.schema_name == "flights"
21
+ assert c1.table_name == "f1549"
22
+ assert "func_name" in c1.model_fields
23
+ assert "table_name" in c1.model_computed_fields
24
+ assert c0 == c1
25
+
26
+
27
+ def test_read():
28
+ c0 = Column(**AIRSPEED)
29
+
30
+ with open(f"{root_dir}/airspeed.yaml", "r") as fp:
31
+ c1 = Column.model_validate_yaml(fp)
32
+
33
+ with open(f"{root_dir}/airspeed.json", "r") as fp:
34
+ c2 = Column.model_validate_json_file(fp)
35
+
36
+ assert c1 == c0
37
+ assert c2 == c0
38
+
39
+
40
+ if __name__ == "__main__":
41
+ test_model()
42
+ test_read()
@@ -0,0 +1,14 @@
1
+ from pyspark.sql.types import DoubleType
2
+ from pyspark.sql.types import StringType
3
+ from laktory.contants import SUPPORTED_TYPES
4
+
5
+
6
+ def test_constants():
7
+
8
+ assert SUPPORTED_TYPES["double"] == DoubleType()
9
+ assert "str" not in SUPPORTED_TYPES
10
+ assert SUPPORTED_TYPES["string"] == StringType()
11
+
12
+
13
+ if __name__ == "__main__":
14
+ test_constants()
@@ -0,0 +1,50 @@
1
+ from laktory.models import Table
2
+ from laktory.models import Database
3
+ from laktory.models import Column
4
+
5
+
6
+ def test_model():
7
+ db = Database(
8
+ name="flights",
9
+ tables=[
10
+ Table(
11
+ name="f1549",
12
+ columns=[
13
+ {
14
+ "name": "airspeed",
15
+ "type": "double",
16
+ },
17
+ {
18
+ "name": "altitude",
19
+ "type": "double",
20
+ },
21
+ ],
22
+ zone="SILVER",
23
+ parent_id="lakehouse.flights",
24
+ ),
25
+ Table(
26
+ name="f0002",
27
+ columns=[
28
+ {
29
+ "name": "airspeed",
30
+ "type": "double",
31
+ },
32
+ {
33
+ "name": "altitude",
34
+ "type": "double",
35
+ },
36
+ ],
37
+ zone="SILVER",
38
+ parent_id="lakehouse.flights",
39
+ ),
40
+ ],
41
+ )
42
+
43
+ print(db.tables[0].columns[0].name)
44
+
45
+ assert db.tables[0].columns[0].name == "airspeed"
46
+ assert type(db.tables[0].columns[0]) == Column
47
+
48
+
49
+ if __name__ == "__main__":
50
+ test_model()
@@ -0,0 +1,39 @@
1
+ import pytest
2
+ from pydantic import ValidationError
3
+
4
+ from laktory.models import Column
5
+ from laktory.models import Table
6
+
7
+
8
+ def test_model():
9
+ table = Table(
10
+ name="f1549",
11
+ columns=[
12
+ {
13
+ "name": "airspeed",
14
+ "type": "double",
15
+ },
16
+ {
17
+ "name": "altitude",
18
+ "type": "double",
19
+ },
20
+ ],
21
+ zone="SILVER",
22
+ parent_id="lakehouse.flights",
23
+ )
24
+
25
+ assert table.columns == [
26
+ Column(name="airspeed", type="double"),
27
+ Column(name="altitude", type="double"),
28
+ ]
29
+ assert table.catalog_name == "lakehouse"
30
+ assert table.schema_name == "flights"
31
+ assert table.zone == "SILVER"
32
+
33
+ # Invalid zone
34
+ with pytest.raises(ValidationError):
35
+ Table(name="f0001", zone="ROUGE")
36
+
37
+
38
+ if __name__ == "__main__":
39
+ test_model()