dr-frames 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dr_frames-0.1.0/.gitignore +54 -0
- dr_frames-0.1.0/LICENSE +21 -0
- dr_frames-0.1.0/PKG-INFO +207 -0
- dr_frames-0.1.0/README.md +179 -0
- dr_frames-0.1.0/pyproject.toml +77 -0
- dr_frames-0.1.0/src/dr_frames/__init__.py +161 -0
- dr_frames-0.1.0/src/dr_frames/aggregation.py +138 -0
- dr_frames-0.1.0/src/dr_frames/cells.py +262 -0
- dr_frames-0.1.0/src/dr_frames/columns.py +108 -0
- dr_frames-0.1.0/src/dr_frames/filtering.py +96 -0
- dr_frames-0.1.0/src/dr_frames/formatting.py +257 -0
- dr_frames-0.1.0/src/dr_frames/parsing.py +69 -0
- dr_frames-0.1.0/src/dr_frames/profiling.py +246 -0
- dr_frames-0.1.0/src/dr_frames/py.typed +0 -0
- dr_frames-0.1.0/src/dr_frames/schema.py +244 -0
- dr_frames-0.1.0/src/dr_frames/types.py +61 -0
- dr_frames-0.1.0/tests/__init__.py +0 -0
- dr_frames-0.1.0/tests/conftest.py +41 -0
- dr_frames-0.1.0/tests/test_aggregation.py +110 -0
- dr_frames-0.1.0/tests/test_cells.py +168 -0
- dr_frames-0.1.0/tests/test_columns.py +107 -0
- dr_frames-0.1.0/tests/test_filtering.py +120 -0
- dr_frames-0.1.0/tests/test_formatting.py +74 -0
- dr_frames-0.1.0/tests/test_parsing.py +90 -0
- dr_frames-0.1.0/tests/test_profiling.py +131 -0
- dr_frames-0.1.0/tests/test_schema.py +133 -0
- dr_frames-0.1.0/tests/test_types.py +63 -0
- dr_frames-0.1.0/uv.lock +617 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
|
|
23
|
+
# Virtual environments
|
|
24
|
+
.venv/
|
|
25
|
+
venv/
|
|
26
|
+
ENV/
|
|
27
|
+
|
|
28
|
+
# IDE
|
|
29
|
+
.idea/
|
|
30
|
+
.vscode/
|
|
31
|
+
*.swp
|
|
32
|
+
*.swo
|
|
33
|
+
*~
|
|
34
|
+
|
|
35
|
+
# Testing
|
|
36
|
+
.pytest_cache/
|
|
37
|
+
.coverage
|
|
38
|
+
htmlcov/
|
|
39
|
+
.tox/
|
|
40
|
+
.nox/
|
|
41
|
+
|
|
42
|
+
# Ruff
|
|
43
|
+
.ruff_cache/
|
|
44
|
+
|
|
45
|
+
# Build
|
|
46
|
+
*.manifest
|
|
47
|
+
*.spec
|
|
48
|
+
|
|
49
|
+
# Jupyter
|
|
50
|
+
.ipynb_checkpoints/
|
|
51
|
+
|
|
52
|
+
# OS
|
|
53
|
+
.DS_Store
|
|
54
|
+
Thumbs.db
|
dr_frames-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Danielle Rothermel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dr_frames-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dr-frames
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pandas/DataFrame utilities for data manipulation, filtering, aggregation, and schema management
|
|
5
|
+
Project-URL: Homepage, https://github.com/drothermel/dr_frames
|
|
6
|
+
Project-URL: Repository, https://github.com/drothermel/dr_frames
|
|
7
|
+
Author-email: Danielle Rothermel <danielle.rothermel@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: aggregation,data-manipulation,dataframe,filtering,pandas,schema
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: pandas>=2.0.0
|
|
22
|
+
Requires-Dist: pydantic>=2.0.0
|
|
23
|
+
Provides-Extra: formatting
|
|
24
|
+
Requires-Dist: pyyaml>=6.0.0; extra == 'formatting'
|
|
25
|
+
Requires-Dist: rich>=13.0.0; extra == 'formatting'
|
|
26
|
+
Requires-Dist: tabulate>=0.9.0; extra == 'formatting'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# dr_frames
|
|
30
|
+
|
|
31
|
+
Pandas/DataFrame utilities for data manipulation, filtering, aggregation, and schema management.
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install dr-frames
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
For table formatting features (console, markdown, latex):
|
|
40
|
+
```bash
|
|
41
|
+
pip install dr-frames[formatting]
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import pandas as pd
|
|
48
|
+
from dr_frames import (
|
|
49
|
+
coerce_numeric_cols,
|
|
50
|
+
filter_to_range,
|
|
51
|
+
move_cols_to_beginning,
|
|
52
|
+
select_subset,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
df = pd.DataFrame({
|
|
56
|
+
"name": ["alice", "bob", "charlie"],
|
|
57
|
+
"value": ["1.0", "2.0", "3.0"],
|
|
58
|
+
"category": ["x", "y", "x"],
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
result = (
|
|
62
|
+
df.pipe(coerce_numeric_cols, ["value"])
|
|
63
|
+
.pipe(select_subset, {"category": "x"})
|
|
64
|
+
.pipe(filter_to_range, "value", 0.5, 2.5)
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Module Overview
|
|
69
|
+
|
|
70
|
+
| Module | Purpose | Key Functions |
|
|
71
|
+
|--------|---------|---------------|
|
|
72
|
+
| **columns** | Column selection & reordering | `move_cols_to_beginning`, `get_cols_by_prefix`, `strip_col_prefixes` |
|
|
73
|
+
| **filtering** | Row filtering | `select_subset`, `filter_to_range`, `make_filter_fxn` |
|
|
74
|
+
| **cells** | Cell-level operations | `ensure_column`, `map_column_with_fallback`, `force_set_cell` |
|
|
75
|
+
| **types** | Type coercion | `coerce_numeric_cols`, `coerce_string_cols` |
|
|
76
|
+
| **aggregation** | GroupBy & reduction | `aggregate_over_seeds`, `apply_aggregations`, `unique_non_null` |
|
|
77
|
+
| **parsing** | String list parsing | `parse_first_element`, `sum_list_elements`, `is_homogeneous` |
|
|
78
|
+
| **schema** | Data field metadata | `DataField`, `ComputedField`, `DataFormat` |
|
|
79
|
+
| **profiling** | Column auto-tagging | `DFColInfo`, `ColInfo`, `looks_like_json` |
|
|
80
|
+
| **formatting** | Table output | `format_table`, `format_coverage_table` |
|
|
81
|
+
|
|
82
|
+
## Documentation
|
|
83
|
+
|
|
84
|
+
- [Full API Reference](docs/api.md)
|
|
85
|
+
- Module guides: [columns](docs/columns.md) | [filtering](docs/filtering.md) | [cells](docs/cells.md) | [types](docs/types.md) | [aggregation](docs/aggregation.md) | [parsing](docs/parsing.md) | [schema](docs/schema.md) | [profiling](docs/profiling.md) | [formatting](docs/formatting.md)
|
|
86
|
+
- [Recipes & Patterns](docs/recipes.md)
|
|
87
|
+
|
|
88
|
+
### Auto-generated API Docs
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Serve interactive docs locally
|
|
92
|
+
uv run pdoc dr_frames
|
|
93
|
+
|
|
94
|
+
# Generate static HTML
|
|
95
|
+
uv run pdoc dr_frames -o docs/api_html
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Quick Reference
|
|
99
|
+
|
|
100
|
+
### Column Operations
|
|
101
|
+
```python
|
|
102
|
+
from dr_frames import (
|
|
103
|
+
contained_cols, # cols that exist in df
|
|
104
|
+
remaining_cols, # cols NOT in a list
|
|
105
|
+
get_cols_by_prefix, # cols starting with prefix
|
|
106
|
+
get_cols_by_contains, # cols containing substring
|
|
107
|
+
move_cols_to_beginning, # reorder cols
|
|
108
|
+
move_cols_with_prefix_to_end,
|
|
109
|
+
strip_col_prefixes, # rename by removing prefix
|
|
110
|
+
drop_all_null_cols, # remove empty columns
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Filtering
|
|
115
|
+
```python
|
|
116
|
+
from dr_frames import (
|
|
117
|
+
select_subset, # filter by exact column values
|
|
118
|
+
apply_filters_to_df, # filter by value lists
|
|
119
|
+
filter_to_value, # single value filter
|
|
120
|
+
filter_to_values, # multi-value filter
|
|
121
|
+
filter_to_range, # numeric range filter
|
|
122
|
+
filter_to_best_metric, # keep best per group
|
|
123
|
+
make_filter_fxn, # compose filters
|
|
124
|
+
)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Cell Operations
|
|
128
|
+
```python
|
|
129
|
+
from dr_frames import (
|
|
130
|
+
ensure_column, # add column if missing
|
|
131
|
+
fill_missing_values, # fillna with defaults dict
|
|
132
|
+
rename_columns, # safe rename (skips missing)
|
|
133
|
+
map_column_with_fallback,# map values, keep unmapped
|
|
134
|
+
apply_column_converters, # apply functions to columns
|
|
135
|
+
maybe_update_cell, # update if currently null
|
|
136
|
+
force_set_cell, # always update
|
|
137
|
+
masked_getter, # get value where mask is true
|
|
138
|
+
masked_setter, # set value where mask is true
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Type Coercion
|
|
143
|
+
```python
|
|
144
|
+
from dr_frames import (
|
|
145
|
+
coerce_numeric_cols, # convert to float/int
|
|
146
|
+
coerce_string_cols, # convert to string dtype
|
|
147
|
+
is_string_series, # check if series is strings
|
|
148
|
+
)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### Aggregation
|
|
152
|
+
```python
|
|
153
|
+
from dr_frames import (
|
|
154
|
+
aggregate_over_seeds, # mean/std/count by config
|
|
155
|
+
apply_aggregations, # flexible groupby
|
|
156
|
+
unique_non_null, # unique values excluding null
|
|
157
|
+
unique_by_col, # unique values in column
|
|
158
|
+
get_constant_cols, # cols with single value
|
|
159
|
+
fillna_with_defaults, # fill nulls from dict
|
|
160
|
+
maybe_pipe, # conditional pipe
|
|
161
|
+
)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Parsing
|
|
165
|
+
```python
|
|
166
|
+
from dr_frames import (
|
|
167
|
+
parse_list_string, # "[1,2,3]" -> [1,2,3]
|
|
168
|
+
parse_first_element, # "[1,2,3]" -> 1.0
|
|
169
|
+
sum_list_elements, # "[1,2,3]" -> 6.0
|
|
170
|
+
is_homogeneous, # "[1,1,1]" -> True
|
|
171
|
+
)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Schema
|
|
175
|
+
```python
|
|
176
|
+
from dr_frames import (
|
|
177
|
+
DataField, # field with metadata
|
|
178
|
+
ComputedField, # derived field
|
|
179
|
+
MetricDataField, # metric with group info
|
|
180
|
+
DataFormat, # container for fields
|
|
181
|
+
)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Profiling
|
|
185
|
+
```python
|
|
186
|
+
from dr_frames import (
|
|
187
|
+
DFColInfo, # catalog of column info
|
|
188
|
+
ColInfo, # single column metadata
|
|
189
|
+
looks_like_json, # detect JSON strings
|
|
190
|
+
looks_like_path, # detect file paths
|
|
191
|
+
infer_series_base_tag_type, # infer dtype tags
|
|
192
|
+
)
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
### Formatting (requires `[formatting]` extra)
|
|
196
|
+
```python
|
|
197
|
+
from dr_frames import (
|
|
198
|
+
format_table, # render table in multiple formats
|
|
199
|
+
format_coverage_table, # show column coverage stats
|
|
200
|
+
FORMATTER_TYPES, # available formatters
|
|
201
|
+
OUTPUT_FORMATS, # available output formats
|
|
202
|
+
)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## License
|
|
206
|
+
|
|
207
|
+
MIT
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# dr_frames
|
|
2
|
+
|
|
3
|
+
Pandas/DataFrame utilities for data manipulation, filtering, aggregation, and schema management.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install dr-frames
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
For table formatting features (console, markdown, latex):
|
|
12
|
+
```bash
|
|
13
|
+
pip install dr-frames[formatting]
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Quick Start
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
import pandas as pd
|
|
20
|
+
from dr_frames import (
|
|
21
|
+
coerce_numeric_cols,
|
|
22
|
+
filter_to_range,
|
|
23
|
+
move_cols_to_beginning,
|
|
24
|
+
select_subset,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
df = pd.DataFrame({
|
|
28
|
+
"name": ["alice", "bob", "charlie"],
|
|
29
|
+
"value": ["1.0", "2.0", "3.0"],
|
|
30
|
+
"category": ["x", "y", "x"],
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
result = (
|
|
34
|
+
df.pipe(coerce_numeric_cols, ["value"])
|
|
35
|
+
.pipe(select_subset, {"category": "x"})
|
|
36
|
+
.pipe(filter_to_range, "value", 0.5, 2.5)
|
|
37
|
+
)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Module Overview
|
|
41
|
+
|
|
42
|
+
| Module | Purpose | Key Functions |
|
|
43
|
+
|--------|---------|---------------|
|
|
44
|
+
| **columns** | Column selection & reordering | `move_cols_to_beginning`, `get_cols_by_prefix`, `strip_col_prefixes` |
|
|
45
|
+
| **filtering** | Row filtering | `select_subset`, `filter_to_range`, `make_filter_fxn` |
|
|
46
|
+
| **cells** | Cell-level operations | `ensure_column`, `map_column_with_fallback`, `force_set_cell` |
|
|
47
|
+
| **types** | Type coercion | `coerce_numeric_cols`, `coerce_string_cols` |
|
|
48
|
+
| **aggregation** | GroupBy & reduction | `aggregate_over_seeds`, `apply_aggregations`, `unique_non_null` |
|
|
49
|
+
| **parsing** | String list parsing | `parse_first_element`, `sum_list_elements`, `is_homogeneous` |
|
|
50
|
+
| **schema** | Data field metadata | `DataField`, `ComputedField`, `DataFormat` |
|
|
51
|
+
| **profiling** | Column auto-tagging | `DFColInfo`, `ColInfo`, `looks_like_json` |
|
|
52
|
+
| **formatting** | Table output | `format_table`, `format_coverage_table` |
|
|
53
|
+
|
|
54
|
+
## Documentation
|
|
55
|
+
|
|
56
|
+
- [Full API Reference](docs/api.md)
|
|
57
|
+
- Module guides: [columns](docs/columns.md) | [filtering](docs/filtering.md) | [cells](docs/cells.md) | [types](docs/types.md) | [aggregation](docs/aggregation.md) | [parsing](docs/parsing.md) | [schema](docs/schema.md) | [profiling](docs/profiling.md) | [formatting](docs/formatting.md)
|
|
58
|
+
- [Recipes & Patterns](docs/recipes.md)
|
|
59
|
+
|
|
60
|
+
### Auto-generated API Docs
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Serve interactive docs locally
|
|
64
|
+
uv run pdoc dr_frames
|
|
65
|
+
|
|
66
|
+
# Generate static HTML
|
|
67
|
+
uv run pdoc dr_frames -o docs/api_html
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Quick Reference
|
|
71
|
+
|
|
72
|
+
### Column Operations
|
|
73
|
+
```python
|
|
74
|
+
from dr_frames import (
|
|
75
|
+
contained_cols, # cols that exist in df
|
|
76
|
+
remaining_cols, # cols NOT in a list
|
|
77
|
+
get_cols_by_prefix, # cols starting with prefix
|
|
78
|
+
get_cols_by_contains, # cols containing substring
|
|
79
|
+
move_cols_to_beginning, # reorder cols
|
|
80
|
+
move_cols_with_prefix_to_end,
|
|
81
|
+
strip_col_prefixes, # rename by removing prefix
|
|
82
|
+
drop_all_null_cols, # remove empty columns
|
|
83
|
+
)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Filtering
|
|
87
|
+
```python
|
|
88
|
+
from dr_frames import (
|
|
89
|
+
select_subset, # filter by exact column values
|
|
90
|
+
apply_filters_to_df, # filter by value lists
|
|
91
|
+
filter_to_value, # single value filter
|
|
92
|
+
filter_to_values, # multi-value filter
|
|
93
|
+
filter_to_range, # numeric range filter
|
|
94
|
+
filter_to_best_metric, # keep best per group
|
|
95
|
+
make_filter_fxn, # compose filters
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Cell Operations
|
|
100
|
+
```python
|
|
101
|
+
from dr_frames import (
|
|
102
|
+
ensure_column, # add column if missing
|
|
103
|
+
fill_missing_values, # fillna with defaults dict
|
|
104
|
+
rename_columns, # safe rename (skips missing)
|
|
105
|
+
map_column_with_fallback,# map values, keep unmapped
|
|
106
|
+
apply_column_converters, # apply functions to columns
|
|
107
|
+
maybe_update_cell, # update if currently null
|
|
108
|
+
force_set_cell, # always update
|
|
109
|
+
masked_getter, # get value where mask is true
|
|
110
|
+
masked_setter, # set value where mask is true
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Type Coercion
|
|
115
|
+
```python
|
|
116
|
+
from dr_frames import (
|
|
117
|
+
coerce_numeric_cols, # convert to float/int
|
|
118
|
+
coerce_string_cols, # convert to string dtype
|
|
119
|
+
is_string_series, # check if series is strings
|
|
120
|
+
)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Aggregation
|
|
124
|
+
```python
|
|
125
|
+
from dr_frames import (
|
|
126
|
+
aggregate_over_seeds, # mean/std/count by config
|
|
127
|
+
apply_aggregations, # flexible groupby
|
|
128
|
+
unique_non_null, # unique values excluding null
|
|
129
|
+
unique_by_col, # unique values in column
|
|
130
|
+
get_constant_cols, # cols with single value
|
|
131
|
+
fillna_with_defaults, # fill nulls from dict
|
|
132
|
+
maybe_pipe, # conditional pipe
|
|
133
|
+
)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Parsing
|
|
137
|
+
```python
|
|
138
|
+
from dr_frames import (
|
|
139
|
+
parse_list_string, # "[1,2,3]" -> [1,2,3]
|
|
140
|
+
parse_first_element, # "[1,2,3]" -> 1.0
|
|
141
|
+
sum_list_elements, # "[1,2,3]" -> 6.0
|
|
142
|
+
is_homogeneous, # "[1,1,1]" -> True
|
|
143
|
+
)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Schema
|
|
147
|
+
```python
|
|
148
|
+
from dr_frames import (
|
|
149
|
+
DataField, # field with metadata
|
|
150
|
+
ComputedField, # derived field
|
|
151
|
+
MetricDataField, # metric with group info
|
|
152
|
+
DataFormat, # container for fields
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Profiling
|
|
157
|
+
```python
|
|
158
|
+
from dr_frames import (
|
|
159
|
+
DFColInfo, # catalog of column info
|
|
160
|
+
ColInfo, # single column metadata
|
|
161
|
+
looks_like_json, # detect JSON strings
|
|
162
|
+
looks_like_path, # detect file paths
|
|
163
|
+
infer_series_base_tag_type, # infer dtype tags
|
|
164
|
+
)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Formatting (requires `[formatting]` extra)
|
|
168
|
+
```python
|
|
169
|
+
from dr_frames import (
|
|
170
|
+
format_table, # render table in multiple formats
|
|
171
|
+
format_coverage_table, # show column coverage stats
|
|
172
|
+
FORMATTER_TYPES, # available formatters
|
|
173
|
+
OUTPUT_FORMATS, # available output formats
|
|
174
|
+
)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## License
|
|
178
|
+
|
|
179
|
+
MIT
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "dr-frames"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Pandas/DataFrame utilities for data manipulation, filtering, aggregation, and schema management"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Danielle Rothermel", email = "danielle.rothermel@gmail.com" }
|
|
9
|
+
]
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
keywords = ["pandas", "dataframe", "data-manipulation", "filtering", "aggregation", "schema"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"Intended Audience :: Science/Research",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
20
|
+
"Topic :: Scientific/Engineering",
|
|
21
|
+
"Typing :: Typed",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"pandas>=2.0.0",
|
|
25
|
+
"pydantic>=2.0.0",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
formatting = [
|
|
30
|
+
"rich>=13.0.0",
|
|
31
|
+
"tabulate>=0.9.0",
|
|
32
|
+
"pyyaml>=6.0.0",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/drothermel/dr_frames"
|
|
37
|
+
Repository = "https://github.com/drothermel/dr_frames"
|
|
38
|
+
|
|
39
|
+
[build-system]
|
|
40
|
+
requires = ["hatchling"]
|
|
41
|
+
build-backend = "hatchling.build"
|
|
42
|
+
|
|
43
|
+
[tool.hatch.metadata]
|
|
44
|
+
allow-direct-references = true
|
|
45
|
+
|
|
46
|
+
[tool.hatch.build]
|
|
47
|
+
exclude = [
|
|
48
|
+
"docs",
|
|
49
|
+
"docs/**",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
[tool.hatch.build.targets.wheel]
|
|
53
|
+
packages = ["src/dr_frames"]
|
|
54
|
+
|
|
55
|
+
[dependency-groups]
|
|
56
|
+
dev = [
|
|
57
|
+
"pdoc>=15.0.0",
|
|
58
|
+
"pytest>=8.4.1",
|
|
59
|
+
"pyyaml>=6.0.0",
|
|
60
|
+
"rich>=13.0.0",
|
|
61
|
+
"ruff>=0.9.0",
|
|
62
|
+
"tabulate>=0.9.0",
|
|
63
|
+
"ty>=0.0.14",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
[tool.ruff]
|
|
67
|
+
include = [
|
|
68
|
+
"src/**/*.py",
|
|
69
|
+
"tests/**/*.py",
|
|
70
|
+
]
|
|
71
|
+
line-length = 88
|
|
72
|
+
cache-dir = ".ruff_cache"
|
|
73
|
+
exclude = ["outputs/*", "notebooks/*", "plots/*", "data/*", "docs/*"]
|
|
74
|
+
|
|
75
|
+
[tool.ruff.format]
|
|
76
|
+
indent-style = "space"
|
|
77
|
+
quote-style = "double"
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .aggregation import (
|
|
4
|
+
aggregate_over_seeds,
|
|
5
|
+
apply_aggregations,
|
|
6
|
+
fillna_with_defaults,
|
|
7
|
+
get_constant_cols,
|
|
8
|
+
maybe_pipe,
|
|
9
|
+
unique_by_col,
|
|
10
|
+
unique_by_cols,
|
|
11
|
+
unique_non_null,
|
|
12
|
+
)
|
|
13
|
+
from .cells import (
|
|
14
|
+
apply_column_converters,
|
|
15
|
+
apply_if_column,
|
|
16
|
+
ensure_column,
|
|
17
|
+
fill_missing_values,
|
|
18
|
+
force_set_cell,
|
|
19
|
+
group_col_by_prefix,
|
|
20
|
+
map_column_with_fallback,
|
|
21
|
+
masked_getter,
|
|
22
|
+
masked_setter,
|
|
23
|
+
maybe_update_cell,
|
|
24
|
+
rename_columns,
|
|
25
|
+
require_row_index,
|
|
26
|
+
)
|
|
27
|
+
from .columns import (
|
|
28
|
+
apply_skip,
|
|
29
|
+
contained_cols,
|
|
30
|
+
drop_all_null_cols,
|
|
31
|
+
get_cols_by_contains,
|
|
32
|
+
get_cols_by_prefix,
|
|
33
|
+
move_cols_to_beginning,
|
|
34
|
+
move_cols_with_prefix_to_end,
|
|
35
|
+
move_numeric_cols_to_end,
|
|
36
|
+
remaining_cols,
|
|
37
|
+
strip_col_prefixes,
|
|
38
|
+
strip_col_prefixes_batch,
|
|
39
|
+
)
|
|
40
|
+
from .filtering import (
|
|
41
|
+
apply_filters_to_df,
|
|
42
|
+
filter_to_best_metric,
|
|
43
|
+
filter_to_range,
|
|
44
|
+
filter_to_value,
|
|
45
|
+
filter_to_values,
|
|
46
|
+
make_filter_fxn,
|
|
47
|
+
select_subset,
|
|
48
|
+
)
|
|
49
|
+
from .parsing import (
|
|
50
|
+
is_homogeneous,
|
|
51
|
+
parse_first_element,
|
|
52
|
+
parse_list_string,
|
|
53
|
+
sum_list_elements,
|
|
54
|
+
)
|
|
55
|
+
from .profiling import (
|
|
56
|
+
ColInfo,
|
|
57
|
+
DFColInfo,
|
|
58
|
+
infer_col_name_contains_tags,
|
|
59
|
+
infer_col_name_prefix_tags,
|
|
60
|
+
infer_col_name_suffix_tags,
|
|
61
|
+
infer_series_base_tag_type,
|
|
62
|
+
infer_tags_from_series_sample,
|
|
63
|
+
looks_like_json,
|
|
64
|
+
looks_like_path,
|
|
65
|
+
)
|
|
66
|
+
from .schema import (
|
|
67
|
+
ComputedField,
|
|
68
|
+
DataField,
|
|
69
|
+
DataFormat,
|
|
70
|
+
MetricDataField,
|
|
71
|
+
)
|
|
72
|
+
from .types import (
|
|
73
|
+
coerce_numeric_cols,
|
|
74
|
+
coerce_string_cols,
|
|
75
|
+
is_string_series,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
from .formatting import ( # noqa: F401
|
|
80
|
+
FORMATTER_TYPES,
|
|
81
|
+
OUTPUT_FORMATS,
|
|
82
|
+
format_coverage_table,
|
|
83
|
+
format_table,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
_HAS_FORMATTING = True
|
|
87
|
+
except ImportError:
|
|
88
|
+
_HAS_FORMATTING = False
|
|
89
|
+
|
|
90
|
+
__all__ = [
|
|
91
|
+
"aggregate_over_seeds",
|
|
92
|
+
"apply_aggregations",
|
|
93
|
+
"apply_column_converters",
|
|
94
|
+
"apply_filters_to_df",
|
|
95
|
+
"apply_if_column",
|
|
96
|
+
"apply_skip",
|
|
97
|
+
"coerce_numeric_cols",
|
|
98
|
+
"coerce_string_cols",
|
|
99
|
+
"ColInfo",
|
|
100
|
+
"ComputedField",
|
|
101
|
+
"contained_cols",
|
|
102
|
+
"DataField",
|
|
103
|
+
"DataFormat",
|
|
104
|
+
"DFColInfo",
|
|
105
|
+
"drop_all_null_cols",
|
|
106
|
+
"ensure_column",
|
|
107
|
+
"fill_missing_values",
|
|
108
|
+
"fillna_with_defaults",
|
|
109
|
+
"filter_to_best_metric",
|
|
110
|
+
"filter_to_range",
|
|
111
|
+
"filter_to_value",
|
|
112
|
+
"filter_to_values",
|
|
113
|
+
"force_set_cell",
|
|
114
|
+
"get_cols_by_contains",
|
|
115
|
+
"get_cols_by_prefix",
|
|
116
|
+
"get_constant_cols",
|
|
117
|
+
"group_col_by_prefix",
|
|
118
|
+
"infer_col_name_contains_tags",
|
|
119
|
+
"infer_col_name_prefix_tags",
|
|
120
|
+
"infer_col_name_suffix_tags",
|
|
121
|
+
"infer_series_base_tag_type",
|
|
122
|
+
"infer_tags_from_series_sample",
|
|
123
|
+
"is_homogeneous",
|
|
124
|
+
"is_string_series",
|
|
125
|
+
"looks_like_json",
|
|
126
|
+
"looks_like_path",
|
|
127
|
+
"make_filter_fxn",
|
|
128
|
+
"map_column_with_fallback",
|
|
129
|
+
"masked_getter",
|
|
130
|
+
"masked_setter",
|
|
131
|
+
"maybe_pipe",
|
|
132
|
+
"maybe_update_cell",
|
|
133
|
+
"MetricDataField",
|
|
134
|
+
"move_cols_to_beginning",
|
|
135
|
+
"move_cols_with_prefix_to_end",
|
|
136
|
+
"move_numeric_cols_to_end",
|
|
137
|
+
"parse_first_element",
|
|
138
|
+
"parse_list_string",
|
|
139
|
+
"remaining_cols",
|
|
140
|
+
"rename_columns",
|
|
141
|
+
"require_row_index",
|
|
142
|
+
"select_subset",
|
|
143
|
+
"strip_col_prefixes",
|
|
144
|
+
"strip_col_prefixes_batch",
|
|
145
|
+
"sum_list_elements",
|
|
146
|
+
"unique_by_col",
|
|
147
|
+
"unique_by_cols",
|
|
148
|
+
"unique_non_null",
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
if _HAS_FORMATTING:
|
|
152
|
+
__all__.extend(
|
|
153
|
+
[
|
|
154
|
+
"format_table",
|
|
155
|
+
"format_coverage_table",
|
|
156
|
+
"FORMATTER_TYPES",
|
|
157
|
+
"OUTPUT_FORMATS",
|
|
158
|
+
]
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
__version__ = "0.1.0"
|