dftly 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ root = true
2
+
3
+ [*]
4
+ charset = utf-8
5
+ end_of_line = lf
6
+ indent_size = 4
7
+ indent_style = space
8
+ insert_final_newline = true
9
+ max_line_length = 110
10
+ tab_width = 4
11
+
12
+ [{*.yaml,*.yml}]
13
+ indent_size = 2
@@ -0,0 +1,28 @@
1
+ name: Setup Package
2
+ description: This workflow sets up the package for other workflows.
3
+ inputs:
4
+ python-version:
5
+ description: The Python version to use for the setup.
6
+ required: true
7
+ default: "3.12"
8
+ group:
9
+ description: The dependency group to install (benchmarks, dev, docs)
10
+ required: false
11
+ default: "dev"
12
+ runs:
13
+ using: "composite"
14
+ steps:
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "${{ inputs.python-version }}"
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v6
21
+ with:
22
+ enable-cache: true
23
+ cache-suffix: "py${{ inputs.python-version }}"
24
+
25
+ - name: Install packages
26
+ shell: bash
27
+ run: |
28
+ uv sync --locked --group ${{ inputs.group }} --extra polars
@@ -0,0 +1,23 @@
1
+ # Same as `code-quality-pr.yaml` but triggered on commit to main branch
2
+ # and runs on all files (instead of only the changed ones)
3
+
4
+ name: Code Quality Main
5
+
6
+ on:
7
+ push:
8
+ branches: [main]
9
+
10
+ jobs:
11
+ code-quality:
12
+ runs-on: ubuntu-latest
13
+
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Setup package
18
+ uses: ./.github/actions/setup
19
+ with:
20
+ python-version: "3.12"
21
+
22
+ - name: Run pre-commits
23
+ run: uv run pre-commit run --all-files --show-diff-on-failure
@@ -0,0 +1,34 @@
1
+ # This workflow finds which files were changed, prints them,
2
+ # and runs `pre-commit` on those files.
3
+
4
+ # Inspired by the sktime library:
5
+ # https://github.com/alan-turing-institute/sktime/blob/main/.github/workflows/test.yml
6
+
7
+ name: Code Quality PR
8
+
9
+ on:
10
+ pull_request:
11
+
12
+ jobs:
13
+ code-quality:
14
+ runs-on: ubuntu-latest
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Setup package
20
+ uses: ./.github/actions/setup
21
+ with:
22
+ python-version: "3.12"
23
+
24
+ - name: Find modified files
25
+ id: file_changes
26
+ uses: tj-actions/changed-files@v46.0.5
27
+
28
+ - name: List all changed files
29
+ run: echo '${{ steps.file_changes.outputs.all_changed_files }}'
30
+
31
+ - name: Run pre-commits
32
+ run: >
33
+ uv run pre-commit run --show-diff-on-failure
34
+ --files ${{ steps.file_changes.outputs.all_changed_files}}
@@ -0,0 +1,96 @@
1
+ name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
2
+
3
+ on: push
4
+
5
+ jobs:
6
+ build:
7
+ name: Build distribution 📦
8
+ runs-on: ubuntu-latest
9
+
10
+ steps:
11
+ - uses: actions/checkout@v4
12
+
13
+ - name: Set up Python 3.12
14
+ uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.12"
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v6
20
+ with:
21
+ enable-cache: true
22
+
23
+ - name: Build
24
+ run: uv build
25
+
26
+ - name: Store the distribution packages
27
+ uses: actions/upload-artifact@v4
28
+ with:
29
+ name: python-package-distributions
30
+ path: dist/
31
+
32
+ publish-to-pypi:
33
+ name: Publish Python 🐍 distribution 📦 to PyPI
34
+ if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
35
+ needs:
36
+ - build
37
+ runs-on: ubuntu-latest
38
+ environment:
39
+ name: pypi
40
+ url: https://pypi.org/p/<package-name> # Replace <package-name> with your PyPI project name
41
+ permissions:
42
+ id-token: write # IMPORTANT: mandatory for trusted publishing
43
+
44
+ steps:
45
+ - name: Download all the dists
46
+ uses: actions/download-artifact@v4
47
+ with:
48
+ name: python-package-distributions
49
+ path: dist/
50
+
51
+ - name: Publish distribution 📦 to PyPI
52
+ uses: pypa/gh-action-pypi-publish@release/v1
53
+
54
+ github-release:
55
+ name: >-
56
+ Sign the Python 🐍 distribution 📦 with Sigstore
57
+ and upload them to GitHub Release
58
+ needs:
59
+ - publish-to-pypi
60
+ runs-on: ubuntu-latest
61
+
62
+ permissions:
63
+ contents: write # IMPORTANT: mandatory for making GitHub Releases
64
+ id-token: write # IMPORTANT: mandatory for sigstore
65
+
66
+ steps:
67
+ - name: Download all the dists
68
+ uses: actions/download-artifact@v4
69
+ with:
70
+ name: python-package-distributions
71
+ path: dist/
72
+
73
+ - name: Sign the dists with Sigstore
74
+ uses: sigstore/gh-action-sigstore-python@v3.0.0
75
+ with:
76
+ inputs: >-
77
+ ./dist/*.tar.gz
78
+ ./dist/*.whl
79
+ - name: Create GitHub Release
80
+ env:
81
+ GITHUB_TOKEN: ${{ github.token }}
82
+ run: >-
83
+ gh release create
84
+ '${{ github.ref_name }}'
85
+ --repo '${{ github.repository }}'
86
+ --notes ""
87
+ - name: Upload artifact signatures to GitHub Release
88
+ env:
89
+ GITHUB_TOKEN: ${{ github.token }}
90
+ # Upload to GitHub Release using the `gh` CLI.
91
+ # `dist/` contains the built packages, and the
92
+ # sigstore-produced signatures and certificates.
93
+ run: >-
94
+ gh release upload
95
+ '${{ github.ref_name }}' dist/**
96
+ --repo '${{ github.repository }}'
@@ -0,0 +1,47 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ jobs:
9
+ run_tests_ubuntu:
10
+ runs-on: ubuntu-latest
11
+
12
+ strategy:
13
+ fail-fast: false
14
+
15
+ timeout-minutes: 30
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Setup package
21
+ uses: ./.github/actions/setup
22
+ with:
23
+ python-version: "3.12"
24
+
25
+ - name: Run tests
26
+ run: >
27
+ uv run pytest -v
28
+ --ignore=docs
29
+ --cov=src
30
+ --cov-report=xml:coverage.xml
31
+ --cov-report=term
32
+ --junitxml=junit.xml
33
+
34
+ - name: Upload coverage to Codecov
35
+ uses: codecov/codecov-action@v4.0.1
36
+ with:
37
+ token: ${{ secrets.CODECOV_TOKEN }}
38
+ files: coverage.xml
39
+ fail_ci_if_error: true
40
+ verbose: true
41
+
42
+ - name: Upload test results to Codecov
43
+ if: ${{ !cancelled() }}
44
+ uses: codecov/test-results-action@v1
45
+ with:
46
+ token: ${{ secrets.CODECOV_TOKEN }}
47
+ files: junit.xml
dftly-0.0.1/.gitignore ADDED
@@ -0,0 +1,160 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
@@ -0,0 +1,79 @@
1
+ default_language_version:
2
+ python: python3.12
3
+
4
+ exclude: "docs/index.md|example/MEDS_output"
5
+
6
+ repos:
7
+ - repo: https://github.com/pre-commit/pre-commit-hooks
8
+ rev: v5.0.0
9
+ hooks:
10
+ # list of supported hooks: https://pre-commit.com/hooks.html
11
+ - id: trailing-whitespace
12
+ - id: end-of-file-fixer
13
+ - id: check-docstring-first
14
+ - id: check-yaml
15
+ - id: debug-statements
16
+ - id: detect-private-key
17
+ - id: check-executables-have-shebangs
18
+ - id: check-toml
19
+ - id: check-case-conflict
20
+ - id: check-added-large-files
21
+ args: [--maxkb, "800"]
22
+
23
+ # python code formatting, linting, and import sorting using ruff
24
+ - repo: https://github.com/astral-sh/ruff-pre-commit
25
+ rev: v0.12.2
26
+ hooks:
27
+ # Run the formatter
28
+ - id: ruff-format
29
+ # Run the linter
30
+ - id: ruff
31
+ args: ["--fix", "--exit-non-zero-on-fix"]
32
+
33
+ # python docstring formatting
34
+ - repo: https://github.com/myint/docformatter
35
+ rev: v1.7.7
36
+ hooks:
37
+ - id: docformatter
38
+ args: [--in-place, --wrap-summaries=110, --wrap-descriptions=110]
39
+
40
+ # yaml formatting
41
+ - repo: https://github.com/pre-commit/mirrors-prettier
42
+ rev: v4.0.0-alpha.8
43
+ hooks:
44
+ - id: prettier
45
+ types: [yaml]
46
+ exclude: "environment.yaml"
47
+
48
+ # shell scripts linter
49
+ - repo: https://github.com/shellcheck-py/shellcheck-py
50
+ rev: v0.10.0.1
51
+ hooks:
52
+ - id: shellcheck
53
+
54
+ # md formatting
55
+ - repo: https://github.com/executablebooks/mdformat
56
+ rev: 0.7.22
57
+ hooks:
58
+ - id: mdformat
59
+ args: ["--number"]
60
+ additional_dependencies:
61
+ - mdformat-ruff
62
+ - mdformat-gfm
63
+ - mdformat-gfm-alerts
64
+ - mdformat-tables
65
+ - mdformat_frontmatter
66
+ - mdformat-black
67
+ - mdformat-config
68
+ - mdformat-shfmt
69
+ - mdformat-mkdocs
70
+ - mdformat-toc
71
+
72
+ # word spelling linter
73
+ - repo: https://github.com/codespell-project/codespell
74
+ rev: v2.4.1
75
+ hooks:
76
+ - id: codespell
77
+ args:
78
+ - --skip=*.ipynb,*.bib,*.svg,pyproject.toml
79
+ - --ignore-words-list=ehr,crate
dftly-0.0.1/AGENTS.md ADDED
@@ -0,0 +1,147 @@
1
+ # WARP.md
2
+
3
+ This file provides guidance to WARP (warp.dev) when working with code in this repository.
4
+
5
+ ## Overview
6
+
7
+ dftly (pronounced "deftly") is a DataFrame Transformation Language parser that provides a YAML-friendly DSL for expressing simple dataframe operations. The library parses YAML configurations into a fully-resolved intermediate representation that can be translated to different execution engines (currently supports Polars).
8
+
9
+ ## Development Commands
10
+
11
+ ### Installation & Setup
12
+
13
+ ```bash
14
+ # Development installation with all dependencies
15
+ pip install -e ".[dev,tests,polars]"
16
+
17
+ # Enable pre-commit hooks
18
+ pre-commit install
19
+ ```
20
+
21
+ ### Testing
22
+
23
+ ```bash
24
+ # Run all tests
25
+ pytest
26
+
27
+ # Run tests with coverage
28
+ pytest --cov=dftly
29
+
30
+ # Run specific test files
31
+ pytest tests/test_parser.py
32
+ pytest tests/test_polars_engine.py
33
+ pytest tests/test_integration_polars.py
34
+
35
+ # Run doctests in README
36
+ pytest --doctest-glob=README.md
37
+ ```
38
+
39
+ ### Code Quality
40
+
41
+ ```bash
42
+ # Run all pre-commit hooks
43
+ pre-commit run --all-files
44
+ ```
45
+
46
+ ## Architecture
47
+
48
+ ### Core Components
49
+
50
+ 1. **Parser (`src/dftly/parser.py`)**
51
+
52
+ - Main entry point via `from_yaml()` function
53
+ - Handles string parsing using Lark grammar
54
+ - Transforms simplified YAML syntax to fully-resolved AST nodes
55
+
56
+ 2. **AST Nodes (`src/dftly/nodes.py`)**
57
+
58
+ - `Literal`: Simple values (numbers, strings, booleans)
59
+ - `Column`: References to dataframe columns with optional type info
60
+ - `Expression`: Complex operations with type and arguments
61
+
62
+ 3. **Grammar (`src/dftly/grammar.lark`)**
63
+
64
+ - Lark-based parser grammar for string expressions
65
+ - Supports operator precedence, function calls, and complex expressions
66
+ - Handles mathematical, boolean, and string operations
67
+
68
+ 4. **Execution Engine (`src/dftly/polars.py`)**
69
+
70
+ - Translates AST nodes to Polars expressions
71
+ - Maps dftly operations to corresponding Polars operations
72
+ - Handles type conversions and complex operations
73
+
74
+ ### Two-Stage Parsing Process
75
+
76
+ 1. **Simplified Form → Fully Resolved Form**
77
+
78
+ - YAML/dictionary input is parsed into unambiguous AST nodes
79
+ - String expressions are parsed using the Lark grammar
80
+ - Context-aware parsing based on input schema
81
+
82
+ 2. **Fully Resolved Form → Execution Engine**
83
+
84
+ - AST nodes are translated to execution-specific expressions
85
+ - Currently supports Polars via `to_polars()` function
86
+ - Extensible design for additional engines
87
+
88
+ ### Expression Types Supported
89
+
90
+ The library supports a comprehensive set of operations:
91
+
92
+ - Arithmetic: `ADD`, `SUBTRACT`
93
+ - Boolean: `AND`, `OR`, `NOT`
94
+ - Conditional: `CONDITIONAL` (ternary if-else)
95
+ - Type operations: `TYPE_CAST`, `COALESCE`
96
+ - String operations: `STRING_INTERPOLATE`, `REGEX`
97
+ - Temporal: `RESOLVE_TIMESTAMP`, `PARSE_WITH_FORMAT_STRING`
98
+ - Membership: `VALUE_IN_LITERAL_SET`, `VALUE_IN_RANGE`
99
+ - Utility: `HASH_TO_INT`
100
+
101
+ ### Key Design Principles
102
+
103
+ 1. **Human-Readable Input**: YAML-friendly syntax for non-technical users
104
+ 2. **Fully-Resolved Intermediate Form**: Unambiguous representation for reliable execution
105
+ 3. **Engine Independence**: Core parsing separate from execution engines
106
+ 4. **Limited Scope**: Focuses on row-wise transformations, not table-level operations
107
+
108
+ ## Testing Strategy
109
+
110
+ - **Unit Tests**: Individual parser components and node types
111
+ - **Integration Tests**: End-to-end parsing and execution with Polars
112
+ - **Doctest**: Examples in README.md are automatically tested
113
+ - **Type Safety**: All code uses type hints and is validated
114
+
115
+ ## Important Files
116
+
117
+ - `src/dftly/__init__.py`: Public API exports
118
+ - `src/dftly/parser.py`: Core parsing logic with `DftlyTransformer` class
119
+ - `src/dftly/nodes.py`: AST node definitions with validation
120
+ - `src/dftly/grammar.lark`: Lark grammar for string expression parsing
121
+ - `src/dftly/polars.py`: Polars execution engine implementation
122
+ - `pyproject.toml`: Project configuration with dependencies and build settings
123
+ - `.pre-commit-config.yaml`: Code quality automation
124
+
125
+ ## Common Development Patterns
126
+
127
+ ### Adding New Expression Types
128
+
129
+ 1. Add expression name to `_EXPR_TYPES` set in `parser.py`
130
+ 2. Implement parsing logic in `Parser._parse_mapping()`
131
+ 3. Add execution logic in `polars.py` `_expr_to_polars()`
132
+ 4. Add comprehensive tests covering parsing and execution
133
+ 5. Update documentation and examples
134
+
135
+ ### Extending Grammar
136
+
137
+ 1. Modify `grammar.lark` with new syntax rules
138
+ 2. Update `DftlyTransformer` class in `parser.py`
139
+ 3. Add corresponding expression type handling
140
+ 4. Test string parsing alongside dictionary forms
141
+
142
+ ### Adding New Execution Engines
143
+
144
+ 1. Create new module (e.g., `src/dftly/pandas.py`)
145
+ 2. Implement `to_[engine]()` function similar to `to_polars()`
146
+ 3. Map each expression type to engine-specific operations
147
+ 4. Add comprehensive integration tests