crump 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. crump-0.1.0/.devcontainer/devcontainer.json +45 -0
  2. crump-0.1.0/.github/dependabot.yml +15 -0
  3. crump-0.1.0/.github/workflows/ci.yml +169 -0
  4. crump-0.1.0/.github/workflows/docs.yml +61 -0
  5. crump-0.1.0/.gitignore +207 -0
  6. crump-0.1.0/.python-version +1 -0
  7. crump-0.1.0/.vscode/settings.json +7 -0
  8. crump-0.1.0/CLAUDE.md +142 -0
  9. crump-0.1.0/CONTRIBUTING.md +185 -0
  10. crump-0.1.0/LICENSE +21 -0
  11. crump-0.1.0/PKG-INFO +191 -0
  12. crump-0.1.0/README.md +157 -0
  13. crump-0.1.0/build.sh +30 -0
  14. crump-0.1.0/docs/api-reference.md +585 -0
  15. crump-0.1.0/docs/cli-reference.md +584 -0
  16. crump-0.1.0/docs/configuration.md +686 -0
  17. crump-0.1.0/docs/contributing.md +496 -0
  18. crump-0.1.0/docs/development.md +287 -0
  19. crump-0.1.0/docs/features.md +700 -0
  20. crump-0.1.0/docs/index.md +83 -0
  21. crump-0.1.0/docs/installation.md +121 -0
  22. crump-0.1.0/docs/quick-start.md +192 -0
  23. crump-0.1.0/generate-docs.sh +115 -0
  24. crump-0.1.0/mkdocs.yml +76 -0
  25. crump-0.1.0/pyproject.toml +120 -0
  26. crump-0.1.0/src/crump/__init__.py +59 -0
  27. crump-0.1.0/src/crump/cdf_extractor.py +676 -0
  28. crump-0.1.0/src/crump/cdf_reader.py +240 -0
  29. crump-0.1.0/src/crump/cli.py +28 -0
  30. crump-0.1.0/src/crump/cli_extract.py +460 -0
  31. crump-0.1.0/src/crump/cli_inspect.py +421 -0
  32. crump-0.1.0/src/crump/cli_prepare.py +572 -0
  33. crump-0.1.0/src/crump/cli_sync.py +333 -0
  34. crump-0.1.0/src/crump/config.py +960 -0
  35. crump-0.1.0/src/crump/console_utils.py +31 -0
  36. crump-0.1.0/src/crump/csv_file.py +140 -0
  37. crump-0.1.0/src/crump/database.py +1422 -0
  38. crump-0.1.0/src/crump/file_types.py +102 -0
  39. crump-0.1.0/src/crump/history.py +170 -0
  40. crump-0.1.0/src/crump/parquet_file.py +233 -0
  41. crump-0.1.0/src/crump/tabular_file.py +220 -0
  42. crump-0.1.0/src/crump/type_detection.py +204 -0
  43. crump-0.1.0/tests/__init__.py +1 -0
  44. crump-0.1.0/tests/conftest.py +174 -0
  45. crump-0.1.0/tests/custom_functions.py +71 -0
  46. crump-0.1.0/tests/data/imap_mag_l1c_norm-magi_20251010_v001.cdf +0 -0
  47. crump-0.1.0/tests/data/imap_mag_l1c_norm-magi_20251010_v001.cdf.txt +272174 -0
  48. crump-0.1.0/tests/data/solo_L2_mag-rtn-normal-1-minute-internal_20241225_V00.cdf +0 -0
  49. crump-0.1.0/tests/data/solo_L2_mag-rtn-normal-1-minute-internal_20241225_V00.cdf.txt +8980 -0
  50. crump-0.1.0/tests/db_test_utils.py +123 -0
  51. crump-0.1.0/tests/test_auto_detect.py +118 -0
  52. crump-0.1.0/tests/test_cdf_e2e.py +158 -0
  53. crump-0.1.0/tests/test_cdf_extract.py +737 -0
  54. crump-0.1.0/tests/test_cli.py +665 -0
  55. crump-0.1.0/tests/test_config.py +1439 -0
  56. crump-0.1.0/tests/test_database_integration.py +1750 -0
  57. crump-0.1.0/tests/test_docs_examples.py +342 -0
  58. crump-0.1.0/tests/test_dry_run.py +558 -0
  59. crump-0.1.0/tests/test_helpers.py +68 -0
  60. crump-0.1.0/tests/test_history.py +444 -0
  61. crump-0.1.0/tests/test_inspect.py +325 -0
  62. crump-0.1.0/tests/test_parquet_integration.py +181 -0
  63. crump-0.1.0/tests/test_prepare.py +710 -0
  64. crump-0.1.0/tests/test_tabular_file.py +297 -0
  65. crump-0.1.0/tests/test_type_detection.py +172 -0
  66. crump-0.1.0/uv.lock +1249 -0
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "crump Development",
3
+ "image": "mcr.microsoft.com/devcontainers/python:3.14",
4
+
5
+ "features": {
6
+ "ghcr.io/devcontainers/features/docker-outside-of-docker:1": {
7
+ "moby": false
8
+ }
9
+ },
10
+
11
+ "customizations": {
12
+ "vscode": {
13
+ "settings": {
14
+ "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
15
+ "python.terminal.activateEnvironment": true,
16
+ "python.testing.pytestEnabled": true,
17
+ "python.testing.unittestEnabled": false,
18
+ "python.linting.enabled": true,
19
+ "python.linting.ruffEnabled": true,
20
+ "python.formatting.provider": "none",
21
+ "[python]": {
22
+ "editor.formatOnSave": true,
23
+ "editor.codeActionsOnSave": {
24
+ "source.organizeImports": "explicit"
25
+ },
26
+ "editor.defaultFormatter": "charliermarsh.ruff"
27
+ }
28
+ },
29
+ "extensions": [
30
+ "ms-python.python",
31
+ "ms-python.vscode-pylance",
32
+ "charliermarsh.ruff",
33
+ "ms-azuretools.vscode-docker"
34
+ ]
35
+ }
36
+ },
37
+
38
+ "postCreateCommand": "curl -LsSf https://astral.sh/uv/install.sh | sh; uv sync --all-extras",
39
+
40
+ "remoteUser": "vscode",
41
+
42
+ "mounts": [
43
+ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind"
44
+ ]
45
+ }
@@ -0,0 +1,15 @@
1
+ version: 2
2
+ updates:
3
+ # Maintain dependencies for GitHub Actions
4
+ - package-ecosystem: "github-actions"
5
+ directory: "/"
6
+ schedule:
7
+ interval: "weekly"
8
+ open-pull-requests-limit: 10
9
+
10
+ # Maintain dependencies for pip
11
+ - package-ecosystem: "pip"
12
+ directory: "/"
13
+ schedule:
14
+ interval: "weekly"
15
+ open-pull-requests-limit: 10
@@ -0,0 +1,169 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ tags:
8
+ - '*'
9
+ pull_request:
10
+ workflow_dispatch:
11
+
12
+ jobs:
13
+ lint:
14
+ name: Lint
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v5
18
+
19
+ - name: Set up Python 3.14
20
+ uses: actions/setup-python@v6
21
+ with:
22
+ python-version: "3.14"
23
+
24
+ - name: Install uv
25
+ uses: astral-sh/setup-uv@v7
26
+
27
+ - name: Install dependencies
28
+ run: uv sync --all-extras
29
+
30
+ - name: Run ruff format check
31
+ run: uv run ruff format --check .
32
+
33
+ - name: Run ruff lint
34
+ run: uv run ruff check .
35
+
36
+ - name: Run mypy type checking
37
+ run: uv run mypy src/crump --install-types --non-interactive || true
38
+ continue-on-error: true
39
+
40
+ test:
41
+ name: Test on ${{ matrix.os }} / Python ${{ matrix.python-version }}
42
+ runs-on: ${{ matrix.os }}
43
+ strategy:
44
+ fail-fast: false
45
+ matrix:
46
+ include:
47
+ # Test all Python versions on Ubuntu
48
+ - os: ubuntu-latest
49
+ python-version: "3.11"
50
+ - os: ubuntu-latest
51
+ python-version: "3.12"
52
+ - os: ubuntu-latest
53
+ python-version: "3.13"
54
+ - os: ubuntu-latest
55
+ python-version: "3.14"
56
+ # Test latest Python on other OSes
57
+ - os: windows-latest
58
+ python-version: "3.14"
59
+ - os: macos-latest
60
+ python-version: "3.14"
61
+
62
+ steps:
63
+ - uses: actions/checkout@v5
64
+
65
+ - name: Set up Python ${{ matrix.python-version }}
66
+ uses: actions/setup-python@v6
67
+ with:
68
+ python-version: ${{ matrix.python-version }}
69
+
70
+ - name: Install uv
71
+ uses: astral-sh/setup-uv@v7
72
+
73
+ - name: Install dependencies
74
+ run: uv sync --all-extras
75
+
76
+ - name: Run tests with coverage
77
+ run: uv run pytest
78
+
79
+ - name: Upload coverage to Codecov
80
+ if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.14'
81
+ uses: codecov/codecov-action@v5
82
+ with:
83
+ file: ./coverage.xml
84
+ fail_ci_if_error: false
85
+ token: ${{ secrets.CODECOV_TOKEN }}
86
+
87
+ build:
88
+ name: Build package
89
+ runs-on: ubuntu-latest
90
+ needs: [lint, test]
91
+ steps:
92
+ - uses: actions/checkout@v5
93
+
94
+ - name: Set up Python 3.14
95
+ uses: actions/setup-python@v6
96
+ with:
97
+ python-version: "3.14"
98
+
99
+ - name: Install uv
100
+ uses: astral-sh/setup-uv@v7
101
+
102
+ - name: Install build dependencies
103
+ run: uv pip install --system build hatchling
104
+
105
+ - name: Build package
106
+ run: python -m build
107
+
108
+ - name: Upload artifacts
109
+ uses: actions/upload-artifact@v5
110
+ with:
111
+ name: dist
112
+ path: dist/
113
+
114
+ docs:
115
+ name: Deploy Documentation
116
+ runs-on: ubuntu-latest
117
+ if: github.ref == 'refs/heads/main'
118
+ permissions:
119
+ contents: write
120
+ steps:
121
+ - uses: actions/checkout@v5
122
+
123
+ - name: Set up Python 3.14
124
+ uses: actions/setup-python@v6
125
+ with:
126
+ python-version: "3.14"
127
+
128
+ - name: Install uv
129
+ uses: astral-sh/setup-uv@v7
130
+
131
+ - name: Install dependencies
132
+ run: uv sync --all-extras
133
+
134
+ - name: Build and deploy docs
135
+ run: uv run mkdocs gh-deploy --force
136
+
137
+ publish-and-release:
138
+ name: Publish to PyPI and Create GitHub Release
139
+ environment: pypi
140
+ runs-on: ubuntu-latest
141
+ needs: [lint, test, build]
142
+ if: startsWith(github.ref, 'refs/tags/')
143
+ permissions:
144
+ id-token: write # Required for trusted publishing
145
+ contents: write # Required for creating releases
146
+ steps:
147
+ - name: Checkout code
148
+ uses: actions/checkout@v5
149
+
150
+ - name: Download artifacts
151
+ uses: actions/download-artifact@v6
152
+ with:
153
+ name: dist
154
+ path: dist/
155
+
156
+ - name: Publish to PyPI
157
+ uses: pypa/gh-action-pypi-publish@release/v1
158
+ continue-on-error: true
159
+ with:
160
+ password: ${{ secrets.PYPI_API_TOKEN }}
161
+
162
+ - name: Create GitHub Release
163
+ uses: softprops/action-gh-release@v2
164
+ continue-on-error: true
165
+ with:
166
+ files: dist/*
167
+ generate_release_notes: true
168
+ draft: false
169
+ prerelease: false
@@ -0,0 +1,61 @@
1
+ name: Deploy Documentation
2
+
3
+ on:
4
+ # Run on pushes to main branch
5
+ push:
6
+ branches:
7
+ - main
8
+ paths:
9
+ - 'docs/**'
10
+ - 'mkdocs.yml'
11
+ - '.github/workflows/docs.yml'
12
+
13
+ # Allow manual trigger
14
+ workflow_dispatch:
15
+
16
+ # Sets permissions for GitHub Pages deployment
17
+ permissions:
18
+ contents: read
19
+ pages: write
20
+ id-token: write
21
+
22
+ # Allow only one concurrent deployment
23
+ concurrency:
24
+ group: "pages"
25
+ cancel-in-progress: false
26
+
27
+ jobs:
28
+ build:
29
+ runs-on: ubuntu-latest
30
+ steps:
31
+ - name: Checkout repository
32
+ uses: actions/checkout@v5
33
+
34
+ - name: Setup Python
35
+ uses: actions/setup-python@v6
36
+ with:
37
+ python-version: '3.11'
38
+
39
+ - name: Install dependencies
40
+ run: |
41
+ python -m pip install --upgrade pip
42
+ pip install mkdocs mkdocs-material mkdocs-autorefs
43
+
44
+ - name: Build documentation
45
+ run: mkdocs build --clean
46
+
47
+ - name: Upload artifact
48
+ uses: actions/upload-pages-artifact@v4
49
+ with:
50
+ path: ./site
51
+
52
+ deploy:
53
+ environment:
54
+ name: github-pages
55
+ url: ${{ steps.deployment.outputs.page_url }}
56
+ runs-on: ubuntu-latest
57
+ needs: build
58
+ steps:
59
+ - name: Deploy to GitHub Pages
60
+ id: deployment
61
+ uses: actions/deploy-pages@v4
crump-0.1.0/.gitignore ADDED
@@ -0,0 +1,207 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
@@ -0,0 +1 @@
1
+ 3.14
@@ -0,0 +1,7 @@
1
+ {
2
+ "python.testing.pytestArgs": [
3
+ "tests"
4
+ ],
5
+ "python.testing.unittestEnabled": false,
6
+ "python.testing.pytestEnabled": true
7
+ }
crump-0.1.0/CLAUDE.md ADDED
@@ -0,0 +1,142 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Development Commands
6
+
7
+ ### Dependencies and Environment
8
+ ```bash
9
+ # Install dependencies with development extras
10
+ uv sync --all-extras
11
+
12
+ # OR using pip (but do not use pip unless you have to - prefer uv for dependency management)
13
+ pip install -e ".[dev]"
14
+ ```
15
+
16
+ ### Testing
17
+
18
+ Always write unit tests for new features and bug fixes. Ensure all tests pass before committing code.
19
+ Also add integration tests that exercise crump at the cli level using cli_runner - see tests/test_cli.py for examples.
20
+ Tests should be passing before you commit code.
21
+
22
+ ```bash
23
+
24
+ # Start with fast unit tests and sqlite only as these are fast to run (and no Docker required)
25
+ uv run pytest tests -k "not [postgres]" -v
26
+
27
+ # Then run the postgres tests (these are slower) - you should use the version of postgres that is already installed locally
28
+ pytest tests -k "[postgres]" -v
29
+
30
+ # Or run all tests
31
+ uv run pytest -v
32
+ ```
33
+
34
+ Other useful testing commands:
35
+
36
+ ```bash
37
+
38
+ # Run all tests with coverage
39
+ uv run pytest --cov=src --cov-report=term-missing
40
+
41
+ # database integration tests for sqlite only so VERY FAST (and no Docker required)
42
+ uv run pytest tests -k database -k "[sqlite]" -v
43
+
44
+ # Integration tests only (requires Docker)
45
+ uv run pytest tests/test_database_integration.py -v
46
+
47
+ # Run specific test
48
+ uv run pytest tests/test_config.py::TestCrumpConfig::test_load_from_yaml -v
49
+ ```
50
+
51
+ ### Code Quality - linting and Type Checking
52
+
53
+ Always check code quality before committing code. Use the commands below to format, lint, and type check your code.
54
+
55
+ ```bash
56
+ # Format code
57
+ uv run ruff format .
58
+
59
+ # Check and fix linting
60
+ uv run ruff check --fix .
61
+
62
+ # Type checking
63
+ uv run mypy src
64
+ ```
65
+
66
+ ### Documentation
67
+
68
+ Documentation is in markdown files in docs/ folder. All new features should be documented. Documentation should include code and CLI examples. The code and cli examples will be tested in the test suite automatically to ensure they are valid.
69
+
70
+ ```bash
71
+ # Generate and serve documentation locally
72
+ ./generate-docs.sh build
73
+
74
+ # OR manually
75
+ uv run mkdocs serve
76
+ ```
77
+
78
+ ## Project Architecture
79
+
80
+ ### Core Components
81
+
82
+ **CLI Interface** (`cli.py`, `cli_*.py`):
83
+ - Main entry point with Click-based commands
84
+ - Commands: `sync`, `prepare`, `inspect`, `extract`
85
+ - Each command has dedicated module (e.g., `cli_sync.py`)
86
+ - `extract` command supports both raw CDF dump and config-based extraction with column mapping
87
+
88
+ **Configuration System** (`config.py`):
89
+ - YAML-based job configuration with `CrumpConfig` and `CrumpJob` classes
90
+ - Column mappings between CSV and database
91
+ - Filename extraction patterns for metadata (dates, versions, etc.)
92
+ - Compound primary key support via `id_mapping`
93
+
94
+ **Database Operations** (`database.py`):
95
+ - PostgreSQL sync with `sync_csv_to_postgres()`
96
+ - Dry-run mode for previewing changes
97
+ - Automatic table creation and schema updates
98
+ - Stale record cleanup based on filename-extracted values
99
+
100
+ **Type Detection** (`type_detection.py`):
101
+ - Automatic CSV analysis for data types and nullable columns
102
+ - Primary key suggestion based on column characteristics
103
+
104
+ **CDF Support** (`cdf_*.py`):
105
+ - Reading and extracting CDF (Common Data Format) science files
106
+ - Conversion to CSV for database sync
107
+ - Config-based extraction with column mapping and transformations
108
+ - Two extraction modes: raw dump or config-based with same transformations as sync
109
+
110
+ ### Key Features
111
+
112
+ See README.md and docs/index.md for detailed feature list.
113
+
114
+ ### Configuration Structure
115
+
116
+ ```yaml
117
+ jobs:
118
+ job_name:
119
+ target_table: "table_name"
120
+ id_mapping: # Compound primary key
121
+ csv_col1: db_col1
122
+ csv_col2: db_col2
123
+ filename_to_column: # Extract from filename
124
+ template: "data_[date]_[version].csv"
125
+ columns:
126
+ date:
127
+ db_column: sync_date
128
+ type: date
129
+ use_to_delete_old_rows: true
130
+ columns: # Column mappings
131
+ csv_col: db_col
132
+ ```
133
+
134
+ ### Dependencies
135
+
136
+ - **Click**: CLI framework
137
+ - **Rich**: Terminal output formatting
138
+ - **PyYAML**: Configuration parsing
139
+ - **psycopg**: PostgreSQL adapter
140
+ - **cdflib**: CDF file reading
141
+ - **testcontainers**: Integration testing with real databases
142
+ - **pyarrow**: Parquet file support