dftly 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dftly-0.0.1/.editorconfig +13 -0
- dftly-0.0.1/.github/actions/setup/action.yaml +28 -0
- dftly-0.0.1/.github/workflows/code-quality-main.yaml +23 -0
- dftly-0.0.1/.github/workflows/code-quality-pr.yaml +34 -0
- dftly-0.0.1/.github/workflows/python-build.yaml +96 -0
- dftly-0.0.1/.github/workflows/tests.yaml +47 -0
- dftly-0.0.1/.gitignore +160 -0
- dftly-0.0.1/.pre-commit-config.yaml +79 -0
- dftly-0.0.1/AGENTS.md +147 -0
- dftly-0.0.1/CONTRIBUTING.md +93 -0
- dftly-0.0.1/LICENSE +21 -0
- dftly-0.0.1/PKG-INFO +807 -0
- dftly-0.0.1/README.md +787 -0
- dftly-0.0.1/pyproject.toml +36 -0
- dftly-0.0.1/setup.cfg +4 -0
- dftly-0.0.1/src/dftly/__init__.py +13 -0
- dftly-0.0.1/src/dftly/grammar.lark +88 -0
- dftly-0.0.1/src/dftly/nodes.py +147 -0
- dftly-0.0.1/src/dftly/parser.py +588 -0
- dftly-0.0.1/src/dftly/polars.py +237 -0
- dftly-0.0.1/src/dftly.egg-info/PKG-INFO +807 -0
- dftly-0.0.1/src/dftly.egg-info/SOURCES.txt +27 -0
- dftly-0.0.1/src/dftly.egg-info/dependency_links.txt +1 -0
- dftly-0.0.1/src/dftly.egg-info/requires.txt +6 -0
- dftly-0.0.1/src/dftly.egg-info/top_level.txt +1 -0
- dftly-0.0.1/tests/test_integration_polars.py +77 -0
- dftly-0.0.1/tests/test_parser.py +537 -0
- dftly-0.0.1/tests/test_polars_engine.py +411 -0
- dftly-0.0.1/uv.lock +385 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
name: Setup Package
|
|
2
|
+
description: This workflow sets up the package for other workflows.
|
|
3
|
+
inputs:
|
|
4
|
+
python-version:
|
|
5
|
+
description: The Python version to use for the setup.
|
|
6
|
+
required: true
|
|
7
|
+
default: "3.12"
|
|
8
|
+
group:
|
|
9
|
+
description: The dependency group to install (benchmarks, dev, docs)
|
|
10
|
+
required: false
|
|
11
|
+
default: "dev"
|
|
12
|
+
runs:
|
|
13
|
+
using: "composite"
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: "${{ inputs.python-version }}"
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v6
|
|
21
|
+
with:
|
|
22
|
+
enable-cache: true
|
|
23
|
+
cache-suffix: "py${{ inputs.python-version }}"
|
|
24
|
+
|
|
25
|
+
- name: Install packages
|
|
26
|
+
shell: bash
|
|
27
|
+
run: |
|
|
28
|
+
uv sync --locked --group ${{ inputs.group }} --extra polars
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Same as `code-quality-pr.yaml` but triggered on commit to main branch
|
|
2
|
+
# and runs on all files (instead of only the changed ones)
|
|
3
|
+
|
|
4
|
+
name: Code Quality Main
|
|
5
|
+
|
|
6
|
+
on:
|
|
7
|
+
push:
|
|
8
|
+
branches: [main]
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
code-quality:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Setup package
|
|
18
|
+
uses: ./.github/actions/setup
|
|
19
|
+
with:
|
|
20
|
+
python-version: "3.12"
|
|
21
|
+
|
|
22
|
+
- name: Run pre-commits
|
|
23
|
+
run: uv run pre-commit run --all-files --show-diff-on-failure
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# This workflow finds which files were changed, prints them,
|
|
2
|
+
# and runs `pre-commit` on those files.
|
|
3
|
+
|
|
4
|
+
# Inspired by the sktime library:
|
|
5
|
+
# https://github.com/alan-turing-institute/sktime/blob/main/.github/workflows/test.yml
|
|
6
|
+
|
|
7
|
+
name: Code Quality PR
|
|
8
|
+
|
|
9
|
+
on:
|
|
10
|
+
pull_request:
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
code-quality:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Setup package
|
|
20
|
+
uses: ./.github/actions/setup
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.12"
|
|
23
|
+
|
|
24
|
+
- name: Find modified files
|
|
25
|
+
id: file_changes
|
|
26
|
+
uses: tj-actions/changed-files@v46.0.5
|
|
27
|
+
|
|
28
|
+
- name: List all changed files
|
|
29
|
+
run: echo '${{ steps.file_changes.outputs.all_changed_files }}'
|
|
30
|
+
|
|
31
|
+
- name: Run pre-commits
|
|
32
|
+
run: >
|
|
33
|
+
uv run pre-commit run --show-diff-on-failure
|
|
34
|
+
--files ${{ steps.file_changes.outputs.all_changed_files}}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
|
|
2
|
+
|
|
3
|
+
on: push
|
|
4
|
+
|
|
5
|
+
jobs:
|
|
6
|
+
build:
|
|
7
|
+
name: Build distribution 📦
|
|
8
|
+
runs-on: ubuntu-latest
|
|
9
|
+
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v4
|
|
12
|
+
|
|
13
|
+
- name: Set up Python 3.12
|
|
14
|
+
uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: "3.12"
|
|
17
|
+
|
|
18
|
+
- name: Install uv
|
|
19
|
+
uses: astral-sh/setup-uv@v6
|
|
20
|
+
with:
|
|
21
|
+
enable-cache: true
|
|
22
|
+
|
|
23
|
+
- name: Build
|
|
24
|
+
run: uv build
|
|
25
|
+
|
|
26
|
+
- name: Store the distribution packages
|
|
27
|
+
uses: actions/upload-artifact@v4
|
|
28
|
+
with:
|
|
29
|
+
name: python-package-distributions
|
|
30
|
+
path: dist/
|
|
31
|
+
|
|
32
|
+
publish-to-pypi:
|
|
33
|
+
name: Publish Python 🐍 distribution 📦 to PyPI
|
|
34
|
+
if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
|
|
35
|
+
needs:
|
|
36
|
+
- build
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
environment:
|
|
39
|
+
name: pypi
|
|
40
|
+
url: https://pypi.org/p/<package-name> # Replace <package-name> with your PyPI project name
|
|
41
|
+
permissions:
|
|
42
|
+
id-token: write # IMPORTANT: mandatory for trusted publishing
|
|
43
|
+
|
|
44
|
+
steps:
|
|
45
|
+
- name: Download all the dists
|
|
46
|
+
uses: actions/download-artifact@v4
|
|
47
|
+
with:
|
|
48
|
+
name: python-package-distributions
|
|
49
|
+
path: dist/
|
|
50
|
+
|
|
51
|
+
- name: Publish distribution 📦 to PyPI
|
|
52
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
53
|
+
|
|
54
|
+
github-release:
|
|
55
|
+
name: >-
|
|
56
|
+
Sign the Python 🐍 distribution 📦 with Sigstore
|
|
57
|
+
and upload them to GitHub Release
|
|
58
|
+
needs:
|
|
59
|
+
- publish-to-pypi
|
|
60
|
+
runs-on: ubuntu-latest
|
|
61
|
+
|
|
62
|
+
permissions:
|
|
63
|
+
contents: write # IMPORTANT: mandatory for making GitHub Releases
|
|
64
|
+
id-token: write # IMPORTANT: mandatory for sigstore
|
|
65
|
+
|
|
66
|
+
steps:
|
|
67
|
+
- name: Download all the dists
|
|
68
|
+
uses: actions/download-artifact@v4
|
|
69
|
+
with:
|
|
70
|
+
name: python-package-distributions
|
|
71
|
+
path: dist/
|
|
72
|
+
|
|
73
|
+
- name: Sign the dists with Sigstore
|
|
74
|
+
uses: sigstore/gh-action-sigstore-python@v3.0.0
|
|
75
|
+
with:
|
|
76
|
+
inputs: >-
|
|
77
|
+
./dist/*.tar.gz
|
|
78
|
+
./dist/*.whl
|
|
79
|
+
- name: Create GitHub Release
|
|
80
|
+
env:
|
|
81
|
+
GITHUB_TOKEN: ${{ github.token }}
|
|
82
|
+
run: >-
|
|
83
|
+
gh release create
|
|
84
|
+
'${{ github.ref_name }}'
|
|
85
|
+
--repo '${{ github.repository }}'
|
|
86
|
+
--notes ""
|
|
87
|
+
- name: Upload artifact signatures to GitHub Release
|
|
88
|
+
env:
|
|
89
|
+
GITHUB_TOKEN: ${{ github.token }}
|
|
90
|
+
# Upload to GitHub Release using the `gh` CLI.
|
|
91
|
+
# `dist/` contains the built packages, and the
|
|
92
|
+
# sigstore-produced signatures and certificates.
|
|
93
|
+
run: >-
|
|
94
|
+
gh release upload
|
|
95
|
+
'${{ github.ref_name }}' dist/**
|
|
96
|
+
--repo '${{ github.repository }}'
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
run_tests_ubuntu:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
|
|
15
|
+
timeout-minutes: 30
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Setup package
|
|
21
|
+
uses: ./.github/actions/setup
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.12"
|
|
24
|
+
|
|
25
|
+
- name: Run tests
|
|
26
|
+
run: >
|
|
27
|
+
uv run pytest -v
|
|
28
|
+
--ignore=docs
|
|
29
|
+
--cov=src
|
|
30
|
+
--cov-report=xml:coverage.xml
|
|
31
|
+
--cov-report=term
|
|
32
|
+
--junitxml=junit.xml
|
|
33
|
+
|
|
34
|
+
- name: Upload coverage to Codecov
|
|
35
|
+
uses: codecov/codecov-action@v4.0.1
|
|
36
|
+
with:
|
|
37
|
+
token: ${{ secrets.CODECOV_TOKEN }}
|
|
38
|
+
files: coverage.xml
|
|
39
|
+
fail_ci_if_error: true
|
|
40
|
+
verbose: true
|
|
41
|
+
|
|
42
|
+
- name: Upload test results to Codecov
|
|
43
|
+
if: ${{ !cancelled() }}
|
|
44
|
+
uses: codecov/test-results-action@v1
|
|
45
|
+
with:
|
|
46
|
+
token: ${{ secrets.CODECOV_TOKEN }}
|
|
47
|
+
files: junit.xml
|
dftly-0.0.1/.gitignore
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py,cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# poetry
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
102
|
+
#poetry.lock
|
|
103
|
+
|
|
104
|
+
# pdm
|
|
105
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
106
|
+
#pdm.lock
|
|
107
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
108
|
+
# in version control.
|
|
109
|
+
# https://pdm.fming.dev/#use-with-ide
|
|
110
|
+
.pdm.toml
|
|
111
|
+
|
|
112
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
113
|
+
__pypackages__/
|
|
114
|
+
|
|
115
|
+
# Celery stuff
|
|
116
|
+
celerybeat-schedule
|
|
117
|
+
celerybeat.pid
|
|
118
|
+
|
|
119
|
+
# SageMath parsed files
|
|
120
|
+
*.sage.py
|
|
121
|
+
|
|
122
|
+
# Environments
|
|
123
|
+
.env
|
|
124
|
+
.venv
|
|
125
|
+
env/
|
|
126
|
+
venv/
|
|
127
|
+
ENV/
|
|
128
|
+
env.bak/
|
|
129
|
+
venv.bak/
|
|
130
|
+
|
|
131
|
+
# Spyder project settings
|
|
132
|
+
.spyderproject
|
|
133
|
+
.spyproject
|
|
134
|
+
|
|
135
|
+
# Rope project settings
|
|
136
|
+
.ropeproject
|
|
137
|
+
|
|
138
|
+
# mkdocs documentation
|
|
139
|
+
/site
|
|
140
|
+
|
|
141
|
+
# mypy
|
|
142
|
+
.mypy_cache/
|
|
143
|
+
.dmypy.json
|
|
144
|
+
dmypy.json
|
|
145
|
+
|
|
146
|
+
# Pyre type checker
|
|
147
|
+
.pyre/
|
|
148
|
+
|
|
149
|
+
# pytype static type analyzer
|
|
150
|
+
.pytype/
|
|
151
|
+
|
|
152
|
+
# Cython debug symbols
|
|
153
|
+
cython_debug/
|
|
154
|
+
|
|
155
|
+
# PyCharm
|
|
156
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
157
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
158
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
159
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
160
|
+
#.idea/
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
default_language_version:
|
|
2
|
+
python: python3.12
|
|
3
|
+
|
|
4
|
+
exclude: "docs/index.md|example/MEDS_output"
|
|
5
|
+
|
|
6
|
+
repos:
|
|
7
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
8
|
+
rev: v5.0.0
|
|
9
|
+
hooks:
|
|
10
|
+
# list of supported hooks: https://pre-commit.com/hooks.html
|
|
11
|
+
- id: trailing-whitespace
|
|
12
|
+
- id: end-of-file-fixer
|
|
13
|
+
- id: check-docstring-first
|
|
14
|
+
- id: check-yaml
|
|
15
|
+
- id: debug-statements
|
|
16
|
+
- id: detect-private-key
|
|
17
|
+
- id: check-executables-have-shebangs
|
|
18
|
+
- id: check-toml
|
|
19
|
+
- id: check-case-conflict
|
|
20
|
+
- id: check-added-large-files
|
|
21
|
+
args: [--maxkb, "800"]
|
|
22
|
+
|
|
23
|
+
# python code formatting, linting, and import sorting using ruff
|
|
24
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
25
|
+
rev: v0.12.2
|
|
26
|
+
hooks:
|
|
27
|
+
# Run the formatter
|
|
28
|
+
- id: ruff-format
|
|
29
|
+
# Run the linter
|
|
30
|
+
- id: ruff
|
|
31
|
+
args: ["--fix", "--exit-non-zero-on-fix"]
|
|
32
|
+
|
|
33
|
+
# python docstring formatting
|
|
34
|
+
- repo: https://github.com/myint/docformatter
|
|
35
|
+
rev: v1.7.7
|
|
36
|
+
hooks:
|
|
37
|
+
- id: docformatter
|
|
38
|
+
args: [--in-place, --wrap-summaries=110, --wrap-descriptions=110]
|
|
39
|
+
|
|
40
|
+
# yaml formatting
|
|
41
|
+
- repo: https://github.com/pre-commit/mirrors-prettier
|
|
42
|
+
rev: v4.0.0-alpha.8
|
|
43
|
+
hooks:
|
|
44
|
+
- id: prettier
|
|
45
|
+
types: [yaml]
|
|
46
|
+
exclude: "environment.yaml"
|
|
47
|
+
|
|
48
|
+
# shell scripts linter
|
|
49
|
+
- repo: https://github.com/shellcheck-py/shellcheck-py
|
|
50
|
+
rev: v0.10.0.1
|
|
51
|
+
hooks:
|
|
52
|
+
- id: shellcheck
|
|
53
|
+
|
|
54
|
+
# md formatting
|
|
55
|
+
- repo: https://github.com/executablebooks/mdformat
|
|
56
|
+
rev: 0.7.22
|
|
57
|
+
hooks:
|
|
58
|
+
- id: mdformat
|
|
59
|
+
args: ["--number"]
|
|
60
|
+
additional_dependencies:
|
|
61
|
+
- mdformat-ruff
|
|
62
|
+
- mdformat-gfm
|
|
63
|
+
- mdformat-gfm-alerts
|
|
64
|
+
- mdformat-tables
|
|
65
|
+
- mdformat_frontmatter
|
|
66
|
+
- mdformat-black
|
|
67
|
+
- mdformat-config
|
|
68
|
+
- mdformat-shfmt
|
|
69
|
+
- mdformat-mkdocs
|
|
70
|
+
- mdformat-toc
|
|
71
|
+
|
|
72
|
+
# word spelling linter
|
|
73
|
+
- repo: https://github.com/codespell-project/codespell
|
|
74
|
+
rev: v2.4.1
|
|
75
|
+
hooks:
|
|
76
|
+
- id: codespell
|
|
77
|
+
args:
|
|
78
|
+
- --skip=*.ipynb,*.bib,*.svg,pyproject.toml
|
|
79
|
+
- --ignore-words-list=ehr,crate
|
dftly-0.0.1/AGENTS.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# WARP.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to WARP (warp.dev) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
dftly (pronounced "deftly") is a DataFrame Transformation Language parser that provides a YAML-friendly DSL for expressing simple dataframe operations. The library parses YAML configurations into a fully-resolved intermediate representation that can be translated to different execution engines (currently supports Polars).
|
|
8
|
+
|
|
9
|
+
## Development Commands
|
|
10
|
+
|
|
11
|
+
### Installation & Setup
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Development installation with all dependencies
|
|
15
|
+
pip install -e ".[dev,tests,polars]"
|
|
16
|
+
|
|
17
|
+
# Enable pre-commit hooks
|
|
18
|
+
pre-commit install
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Testing
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Run all tests
|
|
25
|
+
pytest
|
|
26
|
+
|
|
27
|
+
# Run tests with coverage
|
|
28
|
+
pytest --cov=dftly
|
|
29
|
+
|
|
30
|
+
# Run specific test files
|
|
31
|
+
pytest tests/test_parser.py
|
|
32
|
+
pytest tests/test_polars_engine.py
|
|
33
|
+
pytest tests/test_integration_polars.py
|
|
34
|
+
|
|
35
|
+
# Run doctests in README
|
|
36
|
+
pytest --doctest-glob=README.md
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Code Quality
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Run all pre-commit hooks
|
|
43
|
+
pre-commit run --all-files
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Architecture
|
|
47
|
+
|
|
48
|
+
### Core Components
|
|
49
|
+
|
|
50
|
+
1. **Parser (`src/dftly/parser.py`)**
|
|
51
|
+
|
|
52
|
+
- Main entry point via `from_yaml()` function
|
|
53
|
+
- Handles string parsing using Lark grammar
|
|
54
|
+
- Transforms simplified YAML syntax to fully-resolved AST nodes
|
|
55
|
+
|
|
56
|
+
2. **AST Nodes (`src/dftly/nodes.py`)**
|
|
57
|
+
|
|
58
|
+
- `Literal`: Simple values (numbers, strings, booleans)
|
|
59
|
+
- `Column`: References to dataframe columns with optional type info
|
|
60
|
+
- `Expression`: Complex operations with type and arguments
|
|
61
|
+
|
|
62
|
+
3. **Grammar (`src/dftly/grammar.lark`)**
|
|
63
|
+
|
|
64
|
+
- Lark-based parser grammar for string expressions
|
|
65
|
+
- Supports operator precedence, function calls, and complex expressions
|
|
66
|
+
- Handles mathematical, boolean, and string operations
|
|
67
|
+
|
|
68
|
+
4. **Execution Engine (`src/dftly/polars.py`)**
|
|
69
|
+
|
|
70
|
+
- Translates AST nodes to Polars expressions
|
|
71
|
+
- Maps dftly operations to corresponding Polars operations
|
|
72
|
+
- Handles type conversions and complex operations
|
|
73
|
+
|
|
74
|
+
### Two-Stage Parsing Process
|
|
75
|
+
|
|
76
|
+
1. **Simplified Form → Fully Resolved Form**
|
|
77
|
+
|
|
78
|
+
- YAML/dictionary input is parsed into unambiguous AST nodes
|
|
79
|
+
- String expressions are parsed using the Lark grammar
|
|
80
|
+
- Context-aware parsing based on input schema
|
|
81
|
+
|
|
82
|
+
2. **Fully Resolved Form → Execution Engine**
|
|
83
|
+
|
|
84
|
+
- AST nodes are translated to execution-specific expressions
|
|
85
|
+
- Currently supports Polars via `to_polars()` function
|
|
86
|
+
- Extensible design for additional engines
|
|
87
|
+
|
|
88
|
+
### Expression Types Supported
|
|
89
|
+
|
|
90
|
+
The library supports a comprehensive set of operations:
|
|
91
|
+
|
|
92
|
+
- Arithmetic: `ADD`, `SUBTRACT`
|
|
93
|
+
- Boolean: `AND`, `OR`, `NOT`
|
|
94
|
+
- Conditional: `CONDITIONAL` (ternary if-else)
|
|
95
|
+
- Type operations: `TYPE_CAST`, `COALESCE`
|
|
96
|
+
- String operations: `STRING_INTERPOLATE`, `REGEX`
|
|
97
|
+
- Temporal: `RESOLVE_TIMESTAMP`, `PARSE_WITH_FORMAT_STRING`
|
|
98
|
+
- Membership: `VALUE_IN_LITERAL_SET`, `VALUE_IN_RANGE`
|
|
99
|
+
- Utility: `HASH_TO_INT`
|
|
100
|
+
|
|
101
|
+
### Key Design Principles
|
|
102
|
+
|
|
103
|
+
1. **Human-Readable Input**: YAML-friendly syntax for non-technical users
|
|
104
|
+
2. **Fully-Resolved Intermediate Form**: Unambiguous representation for reliable execution
|
|
105
|
+
3. **Engine Independence**: Core parsing separate from execution engines
|
|
106
|
+
4. **Limited Scope**: Focuses on row-wise transformations, not table-level operations
|
|
107
|
+
|
|
108
|
+
## Testing Strategy
|
|
109
|
+
|
|
110
|
+
- **Unit Tests**: Individual parser components and node types
|
|
111
|
+
- **Integration Tests**: End-to-end parsing and execution with Polars
|
|
112
|
+
- **Doctest**: Examples in README.md are automatically tested
|
|
113
|
+
- **Type Safety**: All code uses type hints and is validated
|
|
114
|
+
|
|
115
|
+
## Important Files
|
|
116
|
+
|
|
117
|
+
- `src/dftly/__init__.py`: Public API exports
|
|
118
|
+
- `src/dftly/parser.py`: Core parsing logic with `DftlyTransformer` class
|
|
119
|
+
- `src/dftly/nodes.py`: AST node definitions with validation
|
|
120
|
+
- `src/dftly/grammar.lark`: Lark grammar for string expression parsing
|
|
121
|
+
- `src/dftly/polars.py`: Polars execution engine implementation
|
|
122
|
+
- `pyproject.toml`: Project configuration with dependencies and build settings
|
|
123
|
+
- `.pre-commit-config.yaml`: Code quality automation
|
|
124
|
+
|
|
125
|
+
## Common Development Patterns
|
|
126
|
+
|
|
127
|
+
### Adding New Expression Types
|
|
128
|
+
|
|
129
|
+
1. Add expression name to `_EXPR_TYPES` set in `parser.py`
|
|
130
|
+
2. Implement parsing logic in `Parser._parse_mapping()`
|
|
131
|
+
3. Add execution logic in `polars.py` `_expr_to_polars()`
|
|
132
|
+
4. Add comprehensive tests covering parsing and execution
|
|
133
|
+
5. Update documentation and examples
|
|
134
|
+
|
|
135
|
+
### Extending Grammar
|
|
136
|
+
|
|
137
|
+
1. Modify `grammar.lark` with new syntax rules
|
|
138
|
+
2. Update `DftlyTransformer` class in `parser.py`
|
|
139
|
+
3. Add corresponding expression type handling
|
|
140
|
+
4. Test string parsing alongside dictionary forms
|
|
141
|
+
|
|
142
|
+
### Adding New Execution Engines
|
|
143
|
+
|
|
144
|
+
1. Create new module (e.g., `src/dftly/pandas.py`)
|
|
145
|
+
2. Implement `to_[engine]()` function similar to `to_polars()`
|
|
146
|
+
3. Map each expression type to engine-specific operations
|
|
147
|
+
4. Add comprehensive integration tests
|