dataprof 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataprof-0.3.0/.github/ISSUE_TEMPLATE/bug_report.yml +101 -0
- dataprof-0.3.0/.github/ISSUE_TEMPLATE/config.yml +8 -0
- dataprof-0.3.0/.github/ISSUE_TEMPLATE/feature_request.yml +91 -0
- dataprof-0.3.0/.github/dependabot.yml +22 -0
- dataprof-0.3.0/.github/pull_request_template.md +68 -0
- dataprof-0.3.0/.github/workflows/ci.yml +55 -0
- dataprof-0.3.0/.github/workflows/python.yml +133 -0
- dataprof-0.3.0/.gitignore +29 -0
- dataprof-0.3.0/.markdownlint.yaml +34 -0
- dataprof-0.3.0/.pre-commit-config.yaml +50 -0
- dataprof-0.3.0/CHANGELOG.md +226 -0
- dataprof-0.3.0/CONTRIBUTING.md +120 -0
- dataprof-0.3.0/Cargo.lock +1177 -0
- dataprof-0.3.0/Cargo.toml +97 -0
- dataprof-0.3.0/LICENSE +674 -0
- dataprof-0.3.0/MANIFEST.in +10 -0
- dataprof-0.3.0/PKG-INFO +388 -0
- dataprof-0.3.0/PYTHON.md +286 -0
- dataprof-0.3.0/README.md +356 -0
- dataprof-0.3.0/SECURITY.md +64 -0
- dataprof-0.3.0/assets/animations/HTML.gif +0 -0
- dataprof-0.3.0/docs/archive/DataProfiler v0.3.md +494 -0
- dataprof-0.3.0/docs/archive/roadmap.md +2048 -0
- dataprof-0.3.0/docs/archive/v03_verification_report.md +268 -0
- dataprof-0.3.0/docs/project/CLAUDE.md +125 -0
- dataprof-0.3.0/examples/customer_data_clean.csv +6 -0
- dataprof-0.3.0/examples/edge_cases_nightmare.csv +34 -0
- dataprof-0.3.0/examples/large_mixed_data.csv +31 -0
- dataprof-0.3.0/examples/logs.jsonl +5 -0
- dataprof-0.3.0/examples/performance_stress_test.csv +31 -0
- dataprof-0.3.0/examples/sales_data_problematic.csv +11 -0
- dataprof-0.3.0/examples/sample_data.csv +9 -0
- dataprof-0.3.0/examples/sensor_data_outliers.csv +10 -0
- dataprof-0.3.0/examples/test_cli.html +498 -0
- dataprof-0.3.0/examples/test_data.json +82 -0
- dataprof-0.3.0/examples/test_logs.jsonl +10 -0
- dataprof-0.3.0/examples/users.json +42 -0
- dataprof-0.3.0/examples/users_report.html +514 -0
- dataprof-0.3.0/justfile +164 -0
- dataprof-0.3.0/pyproject.toml +42 -0
- dataprof-0.3.0/python/dataprof/__init__.py +5 -0
- dataprof-0.3.0/python/examples/example.py +186 -0
- dataprof-0.3.0/python/setup.py +8 -0
- dataprof-0.3.0/python/tests/test_bindings.py +161 -0
- dataprof-0.3.0/requirements-build.txt +3 -0
- dataprof-0.3.0/scripts/setup-dev.ps1 +98 -0
- dataprof-0.3.0/scripts/setup-dev.sh +96 -0
- dataprof-0.3.0/setup.py +55 -0
- dataprof-0.3.0/src/acceleration/mod.rs +1 -0
- dataprof-0.3.0/src/acceleration/simd.rs +328 -0
- dataprof-0.3.0/src/api/mod.rs +3 -0
- dataprof-0.3.0/src/api/simple.rs +167 -0
- dataprof-0.3.0/src/core/batch.rs +435 -0
- dataprof-0.3.0/src/core/errors.rs +313 -0
- dataprof-0.3.0/src/core/mod.rs +16 -0
- dataprof-0.3.0/src/core/patterns.rs +4 -0
- dataprof-0.3.0/src/core/robust_csv.rs +493 -0
- dataprof-0.3.0/src/core/sampling/chunk_size.rs +52 -0
- dataprof-0.3.0/src/core/sampling/mod.rs +15 -0
- dataprof-0.3.0/src/core/sampling/reservoir.rs +415 -0
- dataprof-0.3.0/src/core/sampling/strategies.rs +440 -0
- dataprof-0.3.0/src/core/stats.rs +4 -0
- dataprof-0.3.0/src/core/streaming_stats.rs +351 -0
- dataprof-0.3.0/src/engines/columnar/arrow_profiler.rs +465 -0
- dataprof-0.3.0/src/engines/columnar/mod.rs +4 -0
- dataprof-0.3.0/src/engines/columnar/simple_columnar.rs +276 -0
- dataprof-0.3.0/src/engines/local.rs +4 -0
- dataprof-0.3.0/src/engines/mod.rs +7 -0
- dataprof-0.3.0/src/engines/streaming/memmap.rs +203 -0
- dataprof-0.3.0/src/engines/streaming/memory_efficient.rs +461 -0
- dataprof-0.3.0/src/engines/streaming/mod.rs +11 -0
- dataprof-0.3.0/src/engines/streaming/profiler.rs +224 -0
- dataprof-0.3.0/src/engines/streaming/progress.rs +115 -0
- dataprof-0.3.0/src/engines/streaming/true_streaming.rs +370 -0
- dataprof-0.3.0/src/lib.rs +583 -0
- dataprof-0.3.0/src/main.rs +608 -0
- dataprof-0.3.0/src/output/html.rs +695 -0
- dataprof-0.3.0/src/output/mod.rs +1 -0
- dataprof-0.3.0/src/python.rs +366 -0
- dataprof-0.3.0/src/types.rs +164 -0
- dataprof-0.3.0/src/utils/mod.rs +2 -0
- dataprof-0.3.0/src/utils/quality.rs +172 -0
- dataprof-0.3.0/src/utils/sampler.rs +125 -0
- dataprof-0.3.0/tests/data/customer_data_clean.csv +6 -0
- dataprof-0.3.0/tests/data/edge_cases_nightmare.csv +34 -0
- dataprof-0.3.0/tests/data/large_mixed_data.csv +31 -0
- dataprof-0.3.0/tests/data/performance_stress_test.csv +31 -0
- dataprof-0.3.0/tests/data/sales_data_problematic.csv +11 -0
- dataprof-0.3.0/tests/data/sample_data.csv +9 -0
- dataprof-0.3.0/tests/data/sensor_data_outliers.csv +10 -0
- dataprof-0.3.0/tests/integration_tests.rs +570 -0
- dataprof-0.3.0/tests/v03_comprehensive.rs +288 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
name: Bug Report
|
|
2
|
+
description: Report a bug or issue with DataProfiler
|
|
3
|
+
title: "[Bug]: "
|
|
4
|
+
labels: ["bug"]
|
|
5
|
+
assignees: []
|
|
6
|
+
body:
|
|
7
|
+
- type: markdown
|
|
8
|
+
attributes:
|
|
9
|
+
value: |
|
|
10
|
+
Thanks for taking the time to report a bug! Please fill out the information below to help us understand and fix the issue.
|
|
11
|
+
|
|
12
|
+
- type: textarea
|
|
13
|
+
id: description
|
|
14
|
+
attributes:
|
|
15
|
+
label: Bug Description
|
|
16
|
+
description: A clear and concise description of what the bug is.
|
|
17
|
+
placeholder: Describe what happened and what you expected to happen...
|
|
18
|
+
validations:
|
|
19
|
+
required: true
|
|
20
|
+
|
|
21
|
+
- type: textarea
|
|
22
|
+
id: reproduce
|
|
23
|
+
attributes:
|
|
24
|
+
label: Steps to Reproduce
|
|
25
|
+
description: Steps to reproduce the behavior
|
|
26
|
+
placeholder: |
|
|
27
|
+
1. Run command '...'
|
|
28
|
+
2. With file '...'
|
|
29
|
+
3. See error
|
|
30
|
+
value: |
|
|
31
|
+
1.
|
|
32
|
+
2.
|
|
33
|
+
3.
|
|
34
|
+
validations:
|
|
35
|
+
required: true
|
|
36
|
+
|
|
37
|
+
- type: textarea
|
|
38
|
+
id: expected
|
|
39
|
+
attributes:
|
|
40
|
+
label: Expected Behavior
|
|
41
|
+
description: What you expected to happen
|
|
42
|
+
placeholder: Describe the expected behavior...
|
|
43
|
+
validations:
|
|
44
|
+
required: true
|
|
45
|
+
|
|
46
|
+
- type: textarea
|
|
47
|
+
id: actual
|
|
48
|
+
attributes:
|
|
49
|
+
label: Actual Behavior
|
|
50
|
+
description: What actually happened (include error messages if any)
|
|
51
|
+
placeholder: Describe what actually happened...
|
|
52
|
+
validations:
|
|
53
|
+
required: true
|
|
54
|
+
|
|
55
|
+
- type: textarea
|
|
56
|
+
id: sample-data
|
|
57
|
+
attributes:
|
|
58
|
+
label: Sample Data
|
|
59
|
+
description: If applicable, provide a small sample of the data that causes the issue (anonymized if needed)
|
|
60
|
+
placeholder: |
|
|
61
|
+
CSV/JSON sample or file structure that reproduces the issue
|
|
62
|
+
render: text
|
|
63
|
+
|
|
64
|
+
- type: dropdown
|
|
65
|
+
id: os
|
|
66
|
+
attributes:
|
|
67
|
+
label: Operating System
|
|
68
|
+
description: What operating system are you using?
|
|
69
|
+
options:
|
|
70
|
+
- Windows
|
|
71
|
+
- macOS
|
|
72
|
+
- Linux (Ubuntu)
|
|
73
|
+
- Linux (Other)
|
|
74
|
+
- Other
|
|
75
|
+
validations:
|
|
76
|
+
required: true
|
|
77
|
+
|
|
78
|
+
- type: input
|
|
79
|
+
id: rust-version
|
|
80
|
+
attributes:
|
|
81
|
+
label: Rust Version
|
|
82
|
+
description: Output of `rustc --version`
|
|
83
|
+
placeholder: rustc 1.70.0 (90c541806 2023-05-31)
|
|
84
|
+
validations:
|
|
85
|
+
required: true
|
|
86
|
+
|
|
87
|
+
- type: input
|
|
88
|
+
id: dataprof-version
|
|
89
|
+
attributes:
|
|
90
|
+
label: DataProfiler Version
|
|
91
|
+
description: Version or commit hash of DataProfiler you're using
|
|
92
|
+
placeholder: v0.1.0 or commit hash
|
|
93
|
+
validations:
|
|
94
|
+
required: true
|
|
95
|
+
|
|
96
|
+
- type: textarea
|
|
97
|
+
id: additional-context
|
|
98
|
+
attributes:
|
|
99
|
+
label: Additional Context
|
|
100
|
+
description: Add any other context about the problem here, including file sizes, data types, etc.
|
|
101
|
+
placeholder: Any additional information that might be helpful...
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
blank_issues_enabled: false
|
|
2
|
+
contact_links:
|
|
3
|
+
- name: Questions & Support
|
|
4
|
+
url: https://github.com/AndreaBozzo/dataprof/discussions
|
|
5
|
+
about: Ask questions and discuss DataProfiler usage
|
|
6
|
+
- name: Security Issues
|
|
7
|
+
url: https://github.com/AndreaBozzo/dataprof/security/advisories/new
|
|
8
|
+
about: Report security vulnerabilities privately
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
name: Feature Request
|
|
2
|
+
description: Suggest a new feature or enhancement for DataProfiler
|
|
3
|
+
title: "[Feature]: "
|
|
4
|
+
labels: ["enhancement"]
|
|
5
|
+
assignees: []
|
|
6
|
+
body:
|
|
7
|
+
- type: markdown
|
|
8
|
+
attributes:
|
|
9
|
+
value: |
|
|
10
|
+
Thanks for suggesting a new feature! Please provide as much detail as possible.
|
|
11
|
+
|
|
12
|
+
- type: textarea
|
|
13
|
+
id: problem
|
|
14
|
+
attributes:
|
|
15
|
+
label: Problem Description
|
|
16
|
+
description: Is your feature request related to a problem? Please describe.
|
|
17
|
+
placeholder: I'm frustrated when... / It would be helpful if...
|
|
18
|
+
validations:
|
|
19
|
+
required: true
|
|
20
|
+
|
|
21
|
+
- type: textarea
|
|
22
|
+
id: solution
|
|
23
|
+
attributes:
|
|
24
|
+
label: Proposed Solution
|
|
25
|
+
description: Describe the solution you'd like to see
|
|
26
|
+
placeholder: I would like to see... / The feature should...
|
|
27
|
+
validations:
|
|
28
|
+
required: true
|
|
29
|
+
|
|
30
|
+
- type: textarea
|
|
31
|
+
id: alternatives
|
|
32
|
+
attributes:
|
|
33
|
+
label: Alternative Solutions
|
|
34
|
+
description: Describe any alternative solutions or features you've considered
|
|
35
|
+
placeholder: Alternative approaches could be...
|
|
36
|
+
|
|
37
|
+
- type: dropdown
|
|
38
|
+
id: category
|
|
39
|
+
attributes:
|
|
40
|
+
label: Feature Category
|
|
41
|
+
description: What category does this feature belong to?
|
|
42
|
+
options:
|
|
43
|
+
- Data Analysis (new statistics, metrics)
|
|
44
|
+
- Quality Detection (new issue types)
|
|
45
|
+
- File Format Support (new formats)
|
|
46
|
+
- Output/Reporting (new export formats)
|
|
47
|
+
- Performance Optimization
|
|
48
|
+
- CLI Interface (new commands, options)
|
|
49
|
+
- Library API (new functions)
|
|
50
|
+
- Other
|
|
51
|
+
validations:
|
|
52
|
+
required: true
|
|
53
|
+
|
|
54
|
+
- type: dropdown
|
|
55
|
+
id: priority
|
|
56
|
+
attributes:
|
|
57
|
+
label: Priority
|
|
58
|
+
description: How important is this feature to you?
|
|
59
|
+
options:
|
|
60
|
+
- Low - Nice to have
|
|
61
|
+
- Medium - Would be helpful
|
|
62
|
+
- High - Important for my use case
|
|
63
|
+
- Critical - Blocking my workflow
|
|
64
|
+
validations:
|
|
65
|
+
required: true
|
|
66
|
+
|
|
67
|
+
- type: textarea
|
|
68
|
+
id: use-case
|
|
69
|
+
attributes:
|
|
70
|
+
label: Use Case
|
|
71
|
+
description: Describe your specific use case for this feature
|
|
72
|
+
placeholder: I work with... and need to... because...
|
|
73
|
+
validations:
|
|
74
|
+
required: true
|
|
75
|
+
|
|
76
|
+
- type: textarea
|
|
77
|
+
id: example
|
|
78
|
+
attributes:
|
|
79
|
+
label: Example
|
|
80
|
+
description: If applicable, provide an example of how the feature would work
|
|
81
|
+
placeholder: |
|
|
82
|
+
Command: dataprof --new-feature data.csv
|
|
83
|
+
Output: ...
|
|
84
|
+
render: shell
|
|
85
|
+
|
|
86
|
+
- type: textarea
|
|
87
|
+
id: additional-context
|
|
88
|
+
attributes:
|
|
89
|
+
label: Additional Context
|
|
90
|
+
description: Add any other context, screenshots, or examples about the feature request
|
|
91
|
+
placeholder: Any additional information...
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
# Cargo dependencies
|
|
4
|
+
- package-ecosystem: "cargo"
|
|
5
|
+
directory: "/"
|
|
6
|
+
schedule:
|
|
7
|
+
interval: "weekly"
|
|
8
|
+
day: "monday"
|
|
9
|
+
time: "06:00"
|
|
10
|
+
open-pull-requests-limit: 5
|
|
11
|
+
reviewers:
|
|
12
|
+
- "AndreaBozzo"
|
|
13
|
+
commit-message:
|
|
14
|
+
prefix: "deps"
|
|
15
|
+
include: "scope"
|
|
16
|
+
|
|
17
|
+
# GitHub Actions
|
|
18
|
+
- package-ecosystem: "github-actions"
|
|
19
|
+
directory: "/"
|
|
20
|
+
schedule:
|
|
21
|
+
interval: "monthly"
|
|
22
|
+
open-pull-requests-limit: 3
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Pull Request
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
Brief description of the changes in this PR.
|
|
6
|
+
|
|
7
|
+
## Type of Change
|
|
8
|
+
|
|
9
|
+
- [ ] Bug fix (non-breaking change that fixes an issue)
|
|
10
|
+
- [ ] New feature (non-breaking change that adds functionality)
|
|
11
|
+
- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
|
|
12
|
+
- [ ] Documentation update
|
|
13
|
+
- [ ] Performance improvement
|
|
14
|
+
- [ ] Code refactoring (no functional changes)
|
|
15
|
+
|
|
16
|
+
## Changes Made
|
|
17
|
+
|
|
18
|
+
- List key changes here
|
|
19
|
+
- Use bullet points for clarity
|
|
20
|
+
- Include any new dependencies added
|
|
21
|
+
|
|
22
|
+
## Testing
|
|
23
|
+
|
|
24
|
+
- [ ] I have tested the changes locally
|
|
25
|
+
- [ ] I have added tests for new functionality
|
|
26
|
+
- [ ] All existing tests pass
|
|
27
|
+
- [ ] I have tested with sample data files
|
|
28
|
+
|
|
29
|
+
### Test Commands Run
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# List the commands you used to test
|
|
33
|
+
cargo test
|
|
34
|
+
cargo build --release
|
|
35
|
+
# Any specific test scenarios
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Documentation
|
|
39
|
+
|
|
40
|
+
- [ ] I have updated the README if needed
|
|
41
|
+
- [ ] I have updated inline documentation/comments
|
|
42
|
+
- [ ] I have updated CONTRIBUTING.md if needed
|
|
43
|
+
|
|
44
|
+
## Performance Impact
|
|
45
|
+
|
|
46
|
+
- [ ] No performance impact
|
|
47
|
+
- [ ] Performance improved
|
|
48
|
+
- [ ] Performance may be affected (explain below)
|
|
49
|
+
|
|
50
|
+
If performance is affected, explain:
|
|
51
|
+
|
|
52
|
+
## Breaking Changes
|
|
53
|
+
|
|
54
|
+
If this is a breaking change, describe what breaks and migration path:
|
|
55
|
+
|
|
56
|
+
## Additional Notes
|
|
57
|
+
|
|
58
|
+
Any additional information, screenshots, or context for reviewers:
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Checklist
|
|
63
|
+
|
|
64
|
+
- [ ] My code follows the project's style guidelines
|
|
65
|
+
- [ ] I have performed a self-review of my code
|
|
66
|
+
- [ ] My changes generate no new warnings
|
|
67
|
+
- [ ] I have made corresponding changes to the documentation
|
|
68
|
+
- [ ] Any dependent changes have been merged and published
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main, master ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ main, master ]
|
|
8
|
+
|
|
9
|
+
env:
|
|
10
|
+
CARGO_TERM_COLOR: always
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
name: Test
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v5
|
|
19
|
+
|
|
20
|
+
- name: Install Rust
|
|
21
|
+
uses: dtolnay/rust-toolchain@stable
|
|
22
|
+
|
|
23
|
+
- name: Cache dependencies
|
|
24
|
+
uses: actions/cache@v4
|
|
25
|
+
with:
|
|
26
|
+
path: |
|
|
27
|
+
~/.cargo/registry
|
|
28
|
+
~/.cargo/git
|
|
29
|
+
target
|
|
30
|
+
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
|
31
|
+
|
|
32
|
+
- name: Run tests
|
|
33
|
+
run: cargo test --verbose
|
|
34
|
+
|
|
35
|
+
- name: Check formatting
|
|
36
|
+
run: cargo fmt --all -- --check
|
|
37
|
+
|
|
38
|
+
- name: Run clippy
|
|
39
|
+
run: cargo clippy --all-targets --all-features -- -D warnings
|
|
40
|
+
|
|
41
|
+
build:
|
|
42
|
+
name: Build
|
|
43
|
+
runs-on: ${{ matrix.os }}
|
|
44
|
+
strategy:
|
|
45
|
+
matrix:
|
|
46
|
+
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
47
|
+
|
|
48
|
+
steps:
|
|
49
|
+
- uses: actions/checkout@v5
|
|
50
|
+
|
|
51
|
+
- name: Install Rust
|
|
52
|
+
uses: dtolnay/rust-toolchain@stable
|
|
53
|
+
|
|
54
|
+
- name: Build
|
|
55
|
+
run: cargo build --release --verbose
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
name: Python Bindings
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ master ]
|
|
6
|
+
tags: [ 'v*' ]
|
|
7
|
+
pull_request:
|
|
8
|
+
branches: [ master ]
|
|
9
|
+
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
linux:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
strategy:
|
|
17
|
+
matrix:
|
|
18
|
+
target: [x86_64, aarch64, armv7, s390x, ppc64le]
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
- uses: actions/setup-python@v4
|
|
22
|
+
with:
|
|
23
|
+
python-version: '3.10'
|
|
24
|
+
- name: Build wheels
|
|
25
|
+
uses: PyO3/maturin-action@v1
|
|
26
|
+
with:
|
|
27
|
+
target: ${{ matrix.target }}
|
|
28
|
+
args: --release --out dist --find-interpreter --features python
|
|
29
|
+
sccache: 'true'
|
|
30
|
+
manylinux: auto
|
|
31
|
+
- name: Upload wheels
|
|
32
|
+
uses: actions/upload-artifact@v4
|
|
33
|
+
with:
|
|
34
|
+
name: wheels-linux-${{ matrix.target }}
|
|
35
|
+
path: dist
|
|
36
|
+
|
|
37
|
+
windows:
|
|
38
|
+
runs-on: windows-latest
|
|
39
|
+
strategy:
|
|
40
|
+
matrix:
|
|
41
|
+
target: [x64, x86]
|
|
42
|
+
steps:
|
|
43
|
+
- uses: actions/checkout@v4
|
|
44
|
+
- uses: actions/setup-python@v4
|
|
45
|
+
with:
|
|
46
|
+
python-version: '3.10'
|
|
47
|
+
architecture: ${{ matrix.target }}
|
|
48
|
+
- name: Build wheels
|
|
49
|
+
uses: PyO3/maturin-action@v1
|
|
50
|
+
with:
|
|
51
|
+
target: ${{ matrix.target }}
|
|
52
|
+
args: --release --out dist --find-interpreter --features python
|
|
53
|
+
sccache: 'true'
|
|
54
|
+
- name: Upload wheels
|
|
55
|
+
uses: actions/upload-artifact@v4
|
|
56
|
+
with:
|
|
57
|
+
name: wheels-windows-${{ matrix.target }}
|
|
58
|
+
path: dist
|
|
59
|
+
|
|
60
|
+
macos:
|
|
61
|
+
runs-on: macos-latest
|
|
62
|
+
strategy:
|
|
63
|
+
matrix:
|
|
64
|
+
target: [x86_64, aarch64]
|
|
65
|
+
steps:
|
|
66
|
+
- uses: actions/checkout@v4
|
|
67
|
+
- uses: actions/setup-python@v4
|
|
68
|
+
with:
|
|
69
|
+
python-version: '3.10'
|
|
70
|
+
- name: Build wheels
|
|
71
|
+
uses: PyO3/maturin-action@v1
|
|
72
|
+
with:
|
|
73
|
+
target: ${{ matrix.target }}
|
|
74
|
+
args: --release --out dist --find-interpreter --features python
|
|
75
|
+
sccache: 'true'
|
|
76
|
+
- name: Upload wheels
|
|
77
|
+
uses: actions/upload-artifact@v4
|
|
78
|
+
with:
|
|
79
|
+
name: wheels-macos-${{ matrix.target }}
|
|
80
|
+
path: dist
|
|
81
|
+
|
|
82
|
+
sdist:
|
|
83
|
+
runs-on: ubuntu-latest
|
|
84
|
+
steps:
|
|
85
|
+
- uses: actions/checkout@v4
|
|
86
|
+
- name: Build sdist
|
|
87
|
+
uses: PyO3/maturin-action@v1
|
|
88
|
+
with:
|
|
89
|
+
command: sdist
|
|
90
|
+
args: --out dist
|
|
91
|
+
- name: Upload sdist
|
|
92
|
+
uses: actions/upload-artifact@v4
|
|
93
|
+
with:
|
|
94
|
+
name: wheels-sdist
|
|
95
|
+
path: dist
|
|
96
|
+
|
|
97
|
+
release:
|
|
98
|
+
name: Release
|
|
99
|
+
runs-on: ubuntu-latest
|
|
100
|
+
if: startsWith(github.ref, 'refs/tags/')
|
|
101
|
+
needs: [linux, windows, macos, sdist]
|
|
102
|
+
permissions:
|
|
103
|
+
id-token: write # For trusted publishing
|
|
104
|
+
steps:
|
|
105
|
+
- uses: actions/download-artifact@v4
|
|
106
|
+
with:
|
|
107
|
+
pattern: wheels-*
|
|
108
|
+
merge-multiple: true
|
|
109
|
+
path: dist
|
|
110
|
+
- name: Publish to PyPI
|
|
111
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
112
|
+
with:
|
|
113
|
+
packages-dir: dist/
|
|
114
|
+
|
|
115
|
+
test:
|
|
116
|
+
runs-on: ubuntu-latest
|
|
117
|
+
needs: [linux]
|
|
118
|
+
steps:
|
|
119
|
+
- uses: actions/checkout@v4
|
|
120
|
+
- uses: actions/setup-python@v4
|
|
121
|
+
with:
|
|
122
|
+
python-version: '3.10'
|
|
123
|
+
- uses: actions/download-artifact@v4
|
|
124
|
+
with:
|
|
125
|
+
name: wheels-linux-x86_64
|
|
126
|
+
path: dist
|
|
127
|
+
- name: Install wheel
|
|
128
|
+
run: |
|
|
129
|
+
pip install --find-links dist dataprof
|
|
130
|
+
pip install pytest
|
|
131
|
+
- name: Test Python bindings
|
|
132
|
+
run: |
|
|
133
|
+
python python/tests/test_bindings.py
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Build artifacts
|
|
2
|
+
/target
|
|
3
|
+
**/*.rs.bk
|
|
4
|
+
|
|
5
|
+
# IDE files
|
|
6
|
+
.vscode/
|
|
7
|
+
.idea/
|
|
8
|
+
*.swp
|
|
9
|
+
*.swo
|
|
10
|
+
|
|
11
|
+
# OS files
|
|
12
|
+
.DS_Store
|
|
13
|
+
Thumbs.db
|
|
14
|
+
|
|
15
|
+
# Test files
|
|
16
|
+
test.csv
|
|
17
|
+
*.tmp
|
|
18
|
+
test_*.rs
|
|
19
|
+
*.pdb
|
|
20
|
+
*.exe
|
|
21
|
+
*.dll
|
|
22
|
+
example_usage/
|
|
23
|
+
|
|
24
|
+
# Logs
|
|
25
|
+
*.log
|
|
26
|
+
|
|
27
|
+
# Claude files
|
|
28
|
+
CLAUDE.md
|
|
29
|
+
.claude/
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Markdownlint configuration for DataProfiler v0.3.0
|
|
2
|
+
|
|
3
|
+
# Extend default configuration
|
|
4
|
+
extends: default
|
|
5
|
+
|
|
6
|
+
# Rule customizations
|
|
7
|
+
rules:
|
|
8
|
+
# Allow HTML in markdown (for badges, enhanced formatting)
|
|
9
|
+
MD033: false
|
|
10
|
+
|
|
11
|
+
# Allow long lines (for URLs, code examples)
|
|
12
|
+
MD013:
|
|
13
|
+
line_length: 200
|
|
14
|
+
|
|
15
|
+
# Allow multiple consecutive blank lines
|
|
16
|
+
MD012: false
|
|
17
|
+
|
|
18
|
+
# Allow multiple headings with same content
|
|
19
|
+
MD024: false
|
|
20
|
+
|
|
21
|
+
# Allow trailing punctuation in headers
|
|
22
|
+
MD026: false
|
|
23
|
+
|
|
24
|
+
# Allow bare URLs
|
|
25
|
+
MD034: false
|
|
26
|
+
|
|
27
|
+
# Allow emphasis as heading (for bold descriptions)
|
|
28
|
+
MD036: false
|
|
29
|
+
|
|
30
|
+
# Allow missing language on fenced code blocks
|
|
31
|
+
MD040: false
|
|
32
|
+
|
|
33
|
+
# Allow multiple top-level headings
|
|
34
|
+
MD025: false
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
# Rust formatting and linting
|
|
3
|
+
- repo: local
|
|
4
|
+
hooks:
|
|
5
|
+
- id: cargo-fmt
|
|
6
|
+
name: cargo fmt
|
|
7
|
+
entry: cargo fmt --all --
|
|
8
|
+
language: system
|
|
9
|
+
types: [rust]
|
|
10
|
+
pass_filenames: false
|
|
11
|
+
|
|
12
|
+
- id: cargo-clippy
|
|
13
|
+
name: cargo clippy
|
|
14
|
+
entry: cargo clippy --all-targets --all-features -- -D warnings
|
|
15
|
+
language: system
|
|
16
|
+
types: [rust]
|
|
17
|
+
pass_filenames: false
|
|
18
|
+
|
|
19
|
+
- id: cargo-test
|
|
20
|
+
name: cargo test
|
|
21
|
+
entry: cargo test --lib
|
|
22
|
+
language: system
|
|
23
|
+
types: [rust]
|
|
24
|
+
pass_filenames: false
|
|
25
|
+
|
|
26
|
+
# General file formatting
|
|
27
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
28
|
+
rev: v4.4.0
|
|
29
|
+
hooks:
|
|
30
|
+
- id: trailing-whitespace
|
|
31
|
+
- id: end-of-file-fixer
|
|
32
|
+
- id: check-yaml
|
|
33
|
+
- id: check-toml
|
|
34
|
+
- id: check-json
|
|
35
|
+
- id: check-merge-conflict
|
|
36
|
+
- id: check-case-conflict
|
|
37
|
+
- id: mixed-line-ending
|
|
38
|
+
|
|
39
|
+
# Markdown formatting (disabled for now due to complex legacy docs)
|
|
40
|
+
# - repo: https://github.com/igorshubovych/markdownlint-cli
|
|
41
|
+
# rev: v0.37.0
|
|
42
|
+
# hooks:
|
|
43
|
+
# - id: markdownlint
|
|
44
|
+
# args: ['--fix']
|
|
45
|
+
# files: '\.md$'
|
|
46
|
+
|
|
47
|
+
# Configuration
|
|
48
|
+
default_language_version:
|
|
49
|
+
python: python3
|
|
50
|
+
fail_fast: false
|