additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +58 -14
- additory/common/__init__.py +31 -147
- additory/common/column_selector.py +255 -0
- additory/common/distributions.py +286 -613
- additory/common/extractors.py +313 -0
- additory/common/knn_imputation.py +332 -0
- additory/common/result.py +380 -0
- additory/common/strategy_parser.py +243 -0
- additory/common/unit_conversions.py +338 -0
- additory/common/validation.py +283 -103
- additory/core/__init__.py +34 -22
- additory/core/backend.py +258 -0
- additory/core/config.py +177 -305
- additory/core/logging.py +230 -24
- additory/core/memory_manager.py +157 -495
- additory/expressions/__init__.py +2 -23
- additory/expressions/compiler.py +457 -0
- additory/expressions/engine.py +264 -487
- additory/expressions/integrity.py +179 -0
- additory/expressions/loader.py +263 -0
- additory/expressions/parser.py +363 -167
- additory/expressions/resolver.py +274 -0
- additory/functions/__init__.py +1 -0
- additory/functions/analyze/__init__.py +144 -0
- additory/functions/analyze/cardinality.py +58 -0
- additory/functions/analyze/correlations.py +66 -0
- additory/functions/analyze/distributions.py +53 -0
- additory/functions/analyze/duplicates.py +49 -0
- additory/functions/analyze/features.py +61 -0
- additory/functions/analyze/imputation.py +66 -0
- additory/functions/analyze/outliers.py +65 -0
- additory/functions/analyze/patterns.py +65 -0
- additory/functions/analyze/presets.py +72 -0
- additory/functions/analyze/quality.py +59 -0
- additory/functions/analyze/timeseries.py +53 -0
- additory/functions/analyze/types.py +45 -0
- additory/functions/expressions/__init__.py +161 -0
- additory/functions/snapshot/__init__.py +82 -0
- additory/functions/snapshot/filter.py +119 -0
- additory/functions/synthetic/__init__.py +113 -0
- additory/functions/synthetic/mode_detector.py +47 -0
- additory/functions/synthetic/strategies/__init__.py +1 -0
- additory/functions/synthetic/strategies/advanced.py +35 -0
- additory/functions/synthetic/strategies/augmentative.py +160 -0
- additory/functions/synthetic/strategies/generative.py +168 -0
- additory/functions/synthetic/strategies/presets.py +116 -0
- additory/functions/to/__init__.py +188 -0
- additory/functions/to/lookup.py +351 -0
- additory/functions/to/merge.py +189 -0
- additory/functions/to/sort.py +91 -0
- additory/functions/to/summarize.py +170 -0
- additory/functions/transform/__init__.py +140 -0
- additory/functions/transform/datetime.py +79 -0
- additory/functions/transform/extract.py +85 -0
- additory/functions/transform/harmonize.py +105 -0
- additory/functions/transform/knn.py +62 -0
- additory/functions/transform/onehotencoding.py +68 -0
- additory/functions/transform/transpose.py +42 -0
- additory-0.1.1a1.dist-info/METADATA +83 -0
- additory-0.1.1a1.dist-info/RECORD +62 -0
- additory/analysis/__init__.py +0 -48
- additory/analysis/cardinality.py +0 -126
- additory/analysis/correlations.py +0 -124
- additory/analysis/distributions.py +0 -376
- additory/analysis/quality.py +0 -158
- additory/analysis/scan.py +0 -400
- additory/common/backend.py +0 -371
- additory/common/column_utils.py +0 -191
- additory/common/exceptions.py +0 -62
- additory/common/lists.py +0 -229
- additory/common/patterns.py +0 -240
- additory/common/resolver.py +0 -567
- additory/common/sample_data.py +0 -182
- additory/core/ast_builder.py +0 -165
- additory/core/backends/__init__.py +0 -23
- additory/core/backends/arrow_bridge.py +0 -483
- additory/core/backends/cudf_bridge.py +0 -355
- additory/core/column_positioning.py +0 -358
- additory/core/compiler_polars.py +0 -166
- additory/core/enhanced_cache_manager.py +0 -1119
- additory/core/enhanced_matchers.py +0 -473
- additory/core/enhanced_version_manager.py +0 -325
- additory/core/executor.py +0 -59
- additory/core/integrity_manager.py +0 -477
- additory/core/loader.py +0 -190
- additory/core/namespace_manager.py +0 -657
- additory/core/parser.py +0 -176
- additory/core/polars_expression_engine.py +0 -601
- additory/core/registry.py +0 -176
- additory/core/sample_data_manager.py +0 -492
- additory/core/user_namespace.py +0 -751
- additory/core/validator.py +0 -27
- additory/dynamic_api.py +0 -304
- additory/expressions/proxy.py +0 -549
- additory/expressions/registry.py +0 -313
- additory/expressions/samples.py +0 -492
- additory/synthetic/__init__.py +0 -13
- additory/synthetic/column_name_resolver.py +0 -149
- additory/synthetic/distributions.py +0 -22
- additory/synthetic/forecast.py +0 -1132
- additory/synthetic/linked_list_parser.py +0 -415
- additory/synthetic/namespace_lookup.py +0 -129
- additory/synthetic/smote.py +0 -320
- additory/synthetic/strategies.py +0 -850
- additory/synthetic/synthesizer.py +0 -713
- additory/utilities/__init__.py +0 -53
- additory/utilities/encoding.py +0 -600
- additory/utilities/games.py +0 -300
- additory/utilities/keys.py +0 -8
- additory/utilities/lookup.py +0 -103
- additory/utilities/matchers.py +0 -216
- additory/utilities/resolvers.py +0 -286
- additory/utilities/settings.py +0 -167
- additory/utilities/units.py +0 -749
- additory/utilities/validators.py +0 -153
- additory-0.1.0a3.dist-info/METADATA +0 -288
- additory-0.1.0a3.dist-info/RECORD +0 -71
- additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
- {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
- {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
additory/utilities/validators.py
DELETED
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
# additory/utilities/validators.py
|
|
2
|
-
# Input validation utilities - now uses common module
|
|
3
|
-
|
|
4
|
-
"""
|
|
5
|
-
Validation Utilities Module
|
|
6
|
-
|
|
7
|
-
This module provides validation functions for utilities.
|
|
8
|
-
Core validation is now in additory.common for consistency.
|
|
9
|
-
|
|
10
|
-
This module adds utility-specific validations on top of common validations.
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
import pandas as pd
|
|
14
|
-
from typing import Any, List, Union, Optional
|
|
15
|
-
|
|
16
|
-
# Import from common module for consistency
|
|
17
|
-
from additory.common import (
|
|
18
|
-
validate_dataframe,
|
|
19
|
-
validate_columns_exist,
|
|
20
|
-
validate_positive_number,
|
|
21
|
-
validate_non_negative_number,
|
|
22
|
-
validate_parameter_choice,
|
|
23
|
-
is_dataframe,
|
|
24
|
-
ValidationError
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
# Re-export common validations for backward compatibility
|
|
28
|
-
__all__ = [
|
|
29
|
-
'validate_dataframe',
|
|
30
|
-
'validate_columns_exist',
|
|
31
|
-
'validate_positive_number',
|
|
32
|
-
'validate_non_negative_number',
|
|
33
|
-
'validate_parameter_choice',
|
|
34
|
-
'is_dataframe',
|
|
35
|
-
'validate_numeric_column',
|
|
36
|
-
'validate_string_column',
|
|
37
|
-
'validate_file_path',
|
|
38
|
-
'validate_directory_path',
|
|
39
|
-
'validate_column_name'
|
|
40
|
-
]
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def validate_numeric_column(df: Any, column: str) -> None:
|
|
44
|
-
"""
|
|
45
|
-
Validate that column contains numeric data
|
|
46
|
-
|
|
47
|
-
Args:
|
|
48
|
-
df: Dataframe to check
|
|
49
|
-
column: Column name to validate
|
|
50
|
-
|
|
51
|
-
Raises:
|
|
52
|
-
ValidationError: If column is not numeric
|
|
53
|
-
"""
|
|
54
|
-
# Check if column exists first
|
|
55
|
-
validate_columns_exist(df, column)
|
|
56
|
-
|
|
57
|
-
# For pandas, check dtype
|
|
58
|
-
if hasattr(df, 'dtypes'):
|
|
59
|
-
dtype = df[column].dtype
|
|
60
|
-
if not pd.api.types.is_numeric_dtype(dtype):
|
|
61
|
-
raise ValidationError(f"Column '{column}' must be numeric, got {dtype}")
|
|
62
|
-
|
|
63
|
-
# For other backends, try to detect non-numeric values
|
|
64
|
-
# This is a basic check - more sophisticated validation could be added
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def validate_string_column(df: Any, column: str) -> None:
|
|
68
|
-
"""
|
|
69
|
-
Validate that column contains string/text data
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
df: Dataframe to check
|
|
73
|
-
column: Column name to validate
|
|
74
|
-
|
|
75
|
-
Raises:
|
|
76
|
-
ValidationError: If column is not string-like
|
|
77
|
-
"""
|
|
78
|
-
# Check if column exists first
|
|
79
|
-
validate_columns_exist(df, column)
|
|
80
|
-
|
|
81
|
-
# For pandas, check dtype
|
|
82
|
-
if hasattr(df, 'dtypes'):
|
|
83
|
-
dtype = df[column].dtype
|
|
84
|
-
if not (dtype == 'object' or pd.api.types.is_string_dtype(dtype)):
|
|
85
|
-
raise ValidationError(f"Column '{column}' must be string/text, got {dtype}")
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def validate_file_path(path: str, must_exist: bool = True) -> None:
|
|
91
|
-
"""
|
|
92
|
-
Validate file path
|
|
93
|
-
|
|
94
|
-
Args:
|
|
95
|
-
path: File path to validate
|
|
96
|
-
must_exist: Whether file must exist
|
|
97
|
-
|
|
98
|
-
Raises:
|
|
99
|
-
ValidationError: If path is invalid
|
|
100
|
-
"""
|
|
101
|
-
import os
|
|
102
|
-
|
|
103
|
-
if not isinstance(path, str):
|
|
104
|
-
raise ValidationError(f"Path must be a string, got {type(path)}")
|
|
105
|
-
|
|
106
|
-
if must_exist and not os.path.exists(path):
|
|
107
|
-
raise ValidationError(f"Path does not exist: {path}")
|
|
108
|
-
|
|
109
|
-
if must_exist and not os.path.isfile(path):
|
|
110
|
-
raise ValidationError(f"Path is not a file: {path}")
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def validate_directory_path(path: str, must_exist: bool = True) -> None:
|
|
114
|
-
"""
|
|
115
|
-
Validate directory path
|
|
116
|
-
|
|
117
|
-
Args:
|
|
118
|
-
path: Directory path to validate
|
|
119
|
-
must_exist: Whether directory must exist
|
|
120
|
-
|
|
121
|
-
Raises:
|
|
122
|
-
ValidationError: If path is invalid
|
|
123
|
-
"""
|
|
124
|
-
import os
|
|
125
|
-
|
|
126
|
-
if not isinstance(path, str):
|
|
127
|
-
raise ValidationError(f"Path must be a string, got {type(path)}")
|
|
128
|
-
|
|
129
|
-
if must_exist and not os.path.exists(path):
|
|
130
|
-
raise ValidationError(f"Directory does not exist: {path}")
|
|
131
|
-
|
|
132
|
-
if must_exist and not os.path.isdir(path):
|
|
133
|
-
raise ValidationError(f"Path is not a directory: {path}")
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def validate_column_name(name: str) -> None:
|
|
137
|
-
"""
|
|
138
|
-
Validate column name format
|
|
139
|
-
|
|
140
|
-
Args:
|
|
141
|
-
name: Column name to validate
|
|
142
|
-
|
|
143
|
-
Raises:
|
|
144
|
-
ValidationError: If name is invalid
|
|
145
|
-
"""
|
|
146
|
-
if not isinstance(name, str):
|
|
147
|
-
raise ValidationError(f"Column name must be a string, got {type(name)}")
|
|
148
|
-
|
|
149
|
-
if not name.strip():
|
|
150
|
-
raise ValidationError("Column name cannot be empty")
|
|
151
|
-
|
|
152
|
-
# Additional validation could be added here
|
|
153
|
-
# e.g., check for special characters, reserved words, etc.
|
|
@@ -1,288 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: additory
|
|
3
|
-
Version: 0.1.0a3
|
|
4
|
-
Summary: A semantic, extensible dataframe transformation engine with expressions, lookup, and synthetic data generation support.
|
|
5
|
-
Author: Krishnamoorthy Sankaran
|
|
6
|
-
License: MIT
|
|
7
|
-
Project-URL: homepage, https://github.com/sekarkrishna/additory
|
|
8
|
-
Project-URL: documentation, https://github.com/sekarkrishna/additory/tree/main/documentation/V0.1.0
|
|
9
|
-
Project-URL: source, https://github.com/sekarkrishna/additory
|
|
10
|
-
Project-URL: issues, https://github.com/sekarkrishna/additory/issues
|
|
11
|
-
Requires-Python: >=3.9
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
Requires-Dist: pandas>=1.5
|
|
15
|
-
Requires-Dist: polars>=0.20
|
|
16
|
-
Requires-Dist: pyarrow>=10.0
|
|
17
|
-
Requires-Dist: pyyaml>=6.0
|
|
18
|
-
Requires-Dist: requests>=2.31
|
|
19
|
-
Requires-Dist: toml>=0.10
|
|
20
|
-
Requires-Dist: scipy>=1.9
|
|
21
|
-
Requires-Dist: numpy>=1.21
|
|
22
|
-
Requires-Dist: packaging>=21.0
|
|
23
|
-
Requires-Dist: psutil>=5.8
|
|
24
|
-
Provides-Extra: gpu
|
|
25
|
-
Requires-Dist: cudf>=24.0; extra == "gpu"
|
|
26
|
-
Provides-Extra: dev
|
|
27
|
-
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
28
|
-
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
29
|
-
Requires-Dist: pytest-xdist>=3.0; extra == "dev"
|
|
30
|
-
Requires-Dist: hypothesis>=6.0; extra == "dev"
|
|
31
|
-
Requires-Dist: black>=23.0; extra == "dev"
|
|
32
|
-
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
33
|
-
Requires-Dist: coverage>=7.0; extra == "dev"
|
|
34
|
-
Dynamic: license-file
|
|
35
|
-
|
|
36
|
-
# Additory
|
|
37
|
-
|
|
38
|
-
**A semantic, extensible dataframe transformation engine with expressions, lookup, and augmentation support.**
|
|
39
|
-
|
|
40
|
-
[](https://www.python.org/downloads/)
|
|
41
|
-
[](https://opensource.org/licenses/MIT)
|
|
42
|
-
[](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/)
|
|
43
|
-
|
|
44
|
-
**Author:** Krishnamoorthy Sankaran
|
|
45
|
-
|
|
46
|
-
## 🛠️ Requirements
|
|
47
|
-
|
|
48
|
-
- **Python**: 3.9+
|
|
49
|
-
- **Core dependencies**: pandas, polars, numpy, scipy
|
|
50
|
-
- **Optional**: cuDF (for GPU support)
|
|
51
|
-
|
|
52
|
-
## 📦 Installation
|
|
53
|
-
|
|
54
|
-
```bash
|
|
55
|
-
pip install additory==0.1.0a2
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
**Optional GPU support:**
|
|
59
|
-
```bash
|
|
60
|
-
pip install additory[gpu]==0.1.0a2 # Includes cuDF for GPU acceleration
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
**Development installation:**
|
|
64
|
-
```bash
|
|
65
|
-
pip install additory[dev]==0.1.0a2 # Includes testing and development tools
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
## 🎯 Core Functions
|
|
69
|
-
|
|
70
|
-
| Function | Purpose | Example |
|
|
71
|
-
|----------|---------|---------|
|
|
72
|
-
| `add.to()` | Lookup/join operations | `add.to(df1, from_df=df2, bring='col', against='key')` |
|
|
73
|
-
| `add.augment()` | Generate additional data | `add.augment(df, n_rows=1000)` |
|
|
74
|
-
| `add.scan()` | Data profiling & analysis | `add.scan(df, preset="full")` |
|
|
75
|
-
|
|
76
|
-
## 🧬 Available Expressions
|
|
77
|
-
|
|
78
|
-
Additory includes 12 built-in health and fitness expressions:
|
|
79
|
-
|
|
80
|
-
- **`add.bmi()`** - Body Mass Index
|
|
81
|
-
- **`add.bsa()`** - Body Surface Area
|
|
82
|
-
- **`add.bmr()`** - Basal Metabolic Rate
|
|
83
|
-
- **`add.waist_hip_ratio()`** - Waist-to-Hip Ratio
|
|
84
|
-
- **`add.body_fat_percentage()`** - Body Fat Percentage
|
|
85
|
-
- **`add.ideal_body_weight()`** - Ideal Body Weight
|
|
86
|
-
- **`add.blood_pressure_category()`** - BP Classification
|
|
87
|
-
- **`add.cholesterol_ratio()`** - Cholesterol Ratio
|
|
88
|
-
- **`add.age_category()`** - Age Classification
|
|
89
|
-
- **`add.fitness_score()`** - Overall Fitness Score
|
|
90
|
-
|
|
91
|
-
```python
|
|
92
|
-
# Health calculations
|
|
93
|
-
patients = pd.DataFrame({
|
|
94
|
-
'weight_kg': [70, 80, 65], # Weight in kilograms
|
|
95
|
-
'height_m': [1.75, 1.80, 1.60], # Height in meters
|
|
96
|
-
'age': [25, 35, 45],
|
|
97
|
-
'gender': ['M', 'F', 'M']
|
|
98
|
-
})
|
|
99
|
-
|
|
100
|
-
patients_bmi = add.bmi(patients)
|
|
101
|
-
patients_bsa = add.bsa(patients)
|
|
102
|
-
fitness_scores = add.fitness_score(patients)
|
|
103
|
-
|
|
104
|
-
# Chain multiple expressions
|
|
105
|
-
result = add.fitness_score(add.bmr(add.bmi(patients)))
|
|
106
|
-
```
|
|
107
|
-
|
|
108
|
-
## 🔧 DataFrame Support
|
|
109
|
-
|
|
110
|
-
Additory works seamlessly with multiple DataFrame libraries:
|
|
111
|
-
|
|
112
|
-
- **pandas** - Full support
|
|
113
|
-
- **polars** - Full support
|
|
114
|
-
- **cuDF** - GPU acceleration support
|
|
115
|
-
|
|
116
|
-
```python
|
|
117
|
-
import polars as pl
|
|
118
|
-
import additory as add
|
|
119
|
-
|
|
120
|
-
# Works with polars
|
|
121
|
-
df_polars = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
122
|
-
result = add.augment(df_polars, n_rows=100)
|
|
123
|
-
|
|
124
|
-
# Automatic type detection and conversion
|
|
125
|
-
```
|
|
126
|
-
|
|
127
|
-
## ✨ Key Features
|
|
128
|
-
|
|
129
|
-
### 🔧 Utilities
|
|
130
|
-
|
|
131
|
-
**add.to() - Data Lookup & Joins**
|
|
132
|
-
Simplified syntax for bringing columns from one dataframe to another.
|
|
133
|
-
|
|
134
|
-
```python
|
|
135
|
-
# Simple lookup
|
|
136
|
-
orders_with_prices = add.to(
|
|
137
|
-
orders,
|
|
138
|
-
from_df=products,
|
|
139
|
-
bring='price',
|
|
140
|
-
against='product_id'
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
# Multiple columns and keys
|
|
144
|
-
enriched = add.to(
|
|
145
|
-
orders,
|
|
146
|
-
from_df=products,
|
|
147
|
-
bring=['price', 'category'],
|
|
148
|
-
against=['product_id', 'region']
|
|
149
|
-
)
|
|
150
|
-
```
|
|
151
|
-
|
|
152
|
-
**add.onehotencoding() - Categorical Encoding**
|
|
153
|
-
Convert categorical columns to one-hot encoded format.
|
|
154
|
-
|
|
155
|
-
```python
|
|
156
|
-
# One-hot encoding (single column)
|
|
157
|
-
encoded = add.onehotencoding(df, 'category')
|
|
158
|
-
```
|
|
159
|
-
|
|
160
|
-
**add.harmonize_units() - Unit Standardization**
|
|
161
|
-
Standardize units across your dataset.
|
|
162
|
-
|
|
163
|
-
```python
|
|
164
|
-
# Unit harmonization
|
|
165
|
-
standardized = add.harmonize_units(
|
|
166
|
-
df,
|
|
167
|
-
value_column='temperature',
|
|
168
|
-
unit_column='unit',
|
|
169
|
-
target_unit='C'
|
|
170
|
-
)
|
|
171
|
-
```
|
|
172
|
-
|
|
173
|
-
### 🧮 Expressions
|
|
174
|
-
|
|
175
|
-
Pre-built calculations for health, fitness, and common metrics. Simple examples:
|
|
176
|
-
|
|
177
|
-
```python
|
|
178
|
-
# Create patient data with correct column names
|
|
179
|
-
patients = pd.DataFrame({
|
|
180
|
-
'weight_kg': [70, 80, 65], # Weight in kilograms
|
|
181
|
-
'height_m': [1.75, 1.80, 1.60], # Height in meters
|
|
182
|
-
'age': [25, 35, 45],
|
|
183
|
-
'gender': ['M', 'F', 'M']
|
|
184
|
-
})
|
|
185
|
-
|
|
186
|
-
# Calculate BMI
|
|
187
|
-
patients_with_bmi = add.bmi(patients)
|
|
188
|
-
|
|
189
|
-
# Calculate Body Surface Area
|
|
190
|
-
patients_with_bsa = add.bsa(patients)
|
|
191
|
-
|
|
192
|
-
# Chain multiple expressions
|
|
193
|
-
result = add.fitness_score(add.bmr(add.bmi(patients)))
|
|
194
|
-
```
|
|
195
|
-
|
|
196
|
-
### 🔄 Augment Data Generation
|
|
197
|
-
|
|
198
|
-
**Augment** generates additional data similar to your existing dataset using inline strategies.
|
|
199
|
-
|
|
200
|
-
```python
|
|
201
|
-
# Augment existing data (learns from patterns)
|
|
202
|
-
more_customers = add.augment(customers, n_rows=1000)
|
|
203
|
-
|
|
204
|
-
# Create data from scratch with strategies
|
|
205
|
-
new_data = add.augment("@new", n_rows=500, strategy={
|
|
206
|
-
'id': 'increment:start=1',
|
|
207
|
-
'name': 'choice:[John,Jane,Bob]',
|
|
208
|
-
'age': 'range:18-65'
|
|
209
|
-
})
|
|
210
|
-
```
|
|
211
|
-
|
|
212
|
-
## 🧪 Examples
|
|
213
|
-
|
|
214
|
-
### E-commerce Data Pipeline
|
|
215
|
-
```python
|
|
216
|
-
import pandas as pd
|
|
217
|
-
import additory as add
|
|
218
|
-
|
|
219
|
-
# Start with small customer sample
|
|
220
|
-
customers = pd.DataFrame({
|
|
221
|
-
'customer_id': [1, 2, 3],
|
|
222
|
-
'age': [25, 35, 45],
|
|
223
|
-
'region': ['North', 'South', 'East']
|
|
224
|
-
})
|
|
225
|
-
|
|
226
|
-
# Generate more customers
|
|
227
|
-
customers = add.augment(customers, n_rows=10000)
|
|
228
|
-
|
|
229
|
-
# Add customer tiers
|
|
230
|
-
tiers = pd.DataFrame({
|
|
231
|
-
'customer_id': range(1, 4), # Match original IDs
|
|
232
|
-
'tier': ['Gold', 'Silver', 'Bronze']
|
|
233
|
-
})
|
|
234
|
-
|
|
235
|
-
# Use pipeline approach
|
|
236
|
-
result = (customers
|
|
237
|
-
.pipe(add.to, from_df=tiers, bring='tier', against='customer_id')
|
|
238
|
-
.pipe(add.scan, preset="quick"))
|
|
239
|
-
|
|
240
|
-
print(result.summary())
|
|
241
|
-
```
|
|
242
|
-
|
|
243
|
-
### Healthcare Data Analysis
|
|
244
|
-
```python
|
|
245
|
-
# Create patient data from scratch
|
|
246
|
-
strategy = {
|
|
247
|
-
'patient_id': 'increment:start=1',
|
|
248
|
-
'age': 'range:18-80',
|
|
249
|
-
'weight_kg': 'range:50-120', # Weight in kg
|
|
250
|
-
'height_cm': 'range:150-200' # Height in cm
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
patients = add.augment("@new", n_rows=1000, strategy=strategy)
|
|
254
|
-
|
|
255
|
-
# Convert height to meters for expressions
|
|
256
|
-
patients['height_m'] = patients['height_cm'] / 100
|
|
257
|
-
|
|
258
|
-
# Calculate health metrics using pipeline
|
|
259
|
-
result = (patients
|
|
260
|
-
.pipe(add.bmi)
|
|
261
|
-
.pipe(add.scan, preset="correlations"))
|
|
262
|
-
|
|
263
|
-
print(result.correlations)
|
|
264
|
-
```
|
|
265
|
-
|
|
266
|
-
## 📚 Documentation
|
|
267
|
-
|
|
268
|
-
- **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/)** - Detailed guides for each function
|
|
269
|
-
- **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/expressions.html)** - Complete expressions reference
|
|
270
|
-
|
|
271
|
-
## 📄 License
|
|
272
|
-
|
|
273
|
-
MIT License - see [LICENSE](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/LICENSE) file for details.
|
|
274
|
-
|
|
275
|
-
## 📞 Support
|
|
276
|
-
|
|
277
|
-
- **Issues**: [GitHub Issues](https://github.com/sekarkrishna/additory/issues)
|
|
278
|
-
- **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0)
|
|
279
|
-
|
|
280
|
-
## 🗺️ v0.1.1 (February 2025)
|
|
281
|
-
- Enhanced documentation and tutorials
|
|
282
|
-
- Performance optimizations
|
|
283
|
-
- Additional expressions
|
|
284
|
-
- Advanced synthetic data patterns
|
|
285
|
-
|
|
286
|
-
---
|
|
287
|
-
|
|
288
|
-
**Made with ❤️ for data scientists, analysts, and developers who love working with data.**
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
additory/__init__.py,sha256=Q0vhhQxwcI-Wl-8TERWqYaq4_8gJXqUBUGSywlhdD3E,397
|
|
2
|
-
additory/dynamic_api.py,sha256=q38rax223ZX5lRNRPs0I7WXd1YZwBJEF2nm1IG-mzFI,11843
|
|
3
|
-
additory/analysis/__init__.py,sha256=F_yhD_hcIWbwO1wrRe8Js1RI-vkozaKyWNIIEb-jSMk,986
|
|
4
|
-
additory/analysis/cardinality.py,sha256=y4ttjk3VFNm3mEfNZaTegVQxH7btnmXgnDUSkctNuTo,2976
|
|
5
|
-
additory/analysis/correlations.py,sha256=n0vIPW9lTTSPsPlr40YOIohTX3mUgGmSLdlBrkJZa1c,3909
|
|
6
|
-
additory/analysis/distributions.py,sha256=nkYme_gkzdJ1FWlLJIV31CvoDc3GOOu8KlmeJ2jgkds,10777
|
|
7
|
-
additory/analysis/quality.py,sha256=chow5ZcPaV7czKrycP-35pmBZJYCpLsy20rQ5U4qgCw,4221
|
|
8
|
-
additory/analysis/scan.py,sha256=a6rP1VnwMEhapwIwynzYyX2IqfPcWMOJ4Ff2wRDzh40,14200
|
|
9
|
-
additory/common/__init__.py,sha256=yxwq52uABz5dpC2N7F2wPyvxcsczAxdWNkAKvjwRgys,3551
|
|
10
|
-
additory/common/backend.py,sha256=_VK2RGA4Ee_pUvuubBfdNYK3YU0zrEcYqNk-YkiIG4Y,10571
|
|
11
|
-
additory/common/column_utils.py,sha256=w2zqmiogavgzD2K9-yeH4GLg0f6xLQkID-9QP_8JLxk,5333
|
|
12
|
-
additory/common/distributions.py,sha256=5GZ-1V-E5-T0i1wXuD8v2N3lVbnZAjF18exLG_PE_Qk,21974
|
|
13
|
-
additory/common/exceptions.py,sha256=S6l0Au0_y2HybPnB2c3SUl9M--HgqLLpj6TJasN5mlc,1207
|
|
14
|
-
additory/common/lists.py,sha256=ItbUpm4YFTqBR5RfryeoIiyQdiRVvKHOnu9ZFshPjcw,6443
|
|
15
|
-
additory/common/patterns.py,sha256=_Z6AVItn72nNYMJX-r93Z7SSZmVngQ7pzqbvhKgmBUQ,6892
|
|
16
|
-
additory/common/resolver.py,sha256=9sfuWbHlvYvJlRx18W6BZU-3sgdo7qNRXB0WuzeZjUc,20574
|
|
17
|
-
additory/common/sample_data.py,sha256=Zqjz4Ed-nUptOBw48A0L4BKwo3HEBN9hCnwpnQnM_1w,5838
|
|
18
|
-
additory/common/validation.py,sha256=P3OQa6ae1NST_UcEW_MgWzSUKUPQpTwoZ5h4OnPGu5E,5930
|
|
19
|
-
additory/core/__init__.py,sha256=dhEBneupBndNBlsQI8niFZgQjUJDLORzRcFtvXGXg-E,630
|
|
20
|
-
additory/core/ast_builder.py,sha256=cW65w-utVGjUJos1ffmfEPgPbxVwN6WU-vcDKrBPy8o,5303
|
|
21
|
-
additory/core/column_positioning.py,sha256=1frwieAvdHXvlZzlUhL1BXP1P_iOZ7yzCNDlvw4L9kI,13241
|
|
22
|
-
additory/core/compiler_polars.py,sha256=wN_785yk7N3tYGPCP2IsOpCeWxqJNOMq35TX-xoSCS8,5161
|
|
23
|
-
additory/core/config.py,sha256=DIGsBfs9sVPGKMZNDtulPrXCUO8dcywc9Zp_R2pIIew,9578
|
|
24
|
-
additory/core/enhanced_cache_manager.py,sha256=7hpoMucAWkP_-sUzst_JigPKK04S6TsYLpI_m-s9FrY,47230
|
|
25
|
-
additory/core/enhanced_matchers.py,sha256=lZO-PPfiAiriX4SjTenaulWqijogq9NnhUATHfwMqak,20353
|
|
26
|
-
additory/core/enhanced_version_manager.py,sha256=wIk5pg0Pn5KahgsGMYtmHxxxX3sltnwHqJ_QT7mosNw,12957
|
|
27
|
-
additory/core/executor.py,sha256=rgFqJ6ZPmW4IhcitebRWkXDyKIO3UZZ_5ZQZWCzsK40,1977
|
|
28
|
-
additory/core/integrity_manager.py,sha256=nRUOjGFcQuIccNxSWzKES5mVYo8izp7By6vsgq70Ziw,17338
|
|
29
|
-
additory/core/loader.py,sha256=kWIfSFpk7FkcQ3Is0YqwEkP6LdyyDDTneKWEKMcvZUY,5903
|
|
30
|
-
additory/core/logging.py,sha256=5wv_J4I0eaedqcXwrkD7T5zqOfNMCXCrMCOZY__PeCw,695
|
|
31
|
-
additory/core/memory_manager.py,sha256=b1H1juAg2CXioSI4N65XldPdKxHTXRI3MSTSAtKV3S0,20178
|
|
32
|
-
additory/core/namespace_manager.py,sha256=RWbMZBcoXvpdcz5km2CJlXcrDwWE4DES-lGET4r98Pk,23325
|
|
33
|
-
additory/core/parser.py,sha256=yVh87CiE4fmrg4jFisNMKTHlz4OpAMNVFF4thq6I0JE,4748
|
|
34
|
-
additory/core/polars_expression_engine.py,sha256=O4s-ZtHgP2SQd_LsdGgCPVOACJgJsL7W48wj6CbutFw,23158
|
|
35
|
-
additory/core/registry.py,sha256=_K9DY4lprBUbMJl5D18D9lmOjwhDawxXpV4_hdSBy7o,5621
|
|
36
|
-
additory/core/sample_data_manager.py,sha256=urBT2T5NZZM0KXriuW5xfCwC1SA3WHwraVMtz5qyw7Y,19800
|
|
37
|
-
additory/core/user_namespace.py,sha256=qgPhuHuhiePa9Qr2CtBCuflpUfxD8wTakWFcp5Ve2xU,22522
|
|
38
|
-
additory/core/validator.py,sha256=em71_1TAdk44B2yyNwzmxkh4pMpqAq1JN_oHoDH7fCk,588
|
|
39
|
-
additory/core/backends/__init__.py,sha256=Qp70UI5UEGBjr8TDcD3ZQKjtL91JPUEv6wXWLNqk6XI,587
|
|
40
|
-
additory/core/backends/arrow_bridge.py,sha256=2BCWkoCxmI5jr9ZgmJTB-hpWaMj2B9S6BTtvFuLrm_o,16876
|
|
41
|
-
additory/core/backends/cudf_bridge.py,sha256=TWthiZIZFUPdrXRjml6y63SOLkqgVL7dZ0eqaidx2LY,12821
|
|
42
|
-
additory/expressions/__init__.py,sha256=FYZjHA7zJie1HRAQjMo6MdQxwYW2owrHulKXjfBFg4A,781
|
|
43
|
-
additory/expressions/engine.py,sha256=yOzZDNKjltP-HLVKBL4BXke63ALqgRFXgHK4YeeXLQ8,21138
|
|
44
|
-
additory/expressions/parser.py,sha256=yVh87CiE4fmrg4jFisNMKTHlz4OpAMNVFF4thq6I0JE,4748
|
|
45
|
-
additory/expressions/proxy.py,sha256=kohaZTtU5f_r3O_WidnNKXzN3IAmAnt6M0L5F3mpb7I,22044
|
|
46
|
-
additory/expressions/registry.py,sha256=R0nj3-qRx5Q9OxsevIeWGOfIvBcltYNmnUf8QCu06s8,11015
|
|
47
|
-
additory/expressions/samples.py,sha256=urBT2T5NZZM0KXriuW5xfCwC1SA3WHwraVMtz5qyw7Y,19800
|
|
48
|
-
additory/synthetic/__init__.py,sha256=Zw0GqXXh5v6_1S6SxPcEYL7CzNmaRuVk1aC3qBOQ2RE,342
|
|
49
|
-
additory/synthetic/column_name_resolver.py,sha256=-kh6bxitaSUwk28TZ5yPzbLUe6nxU7oYsazKEwumtRA,4913
|
|
50
|
-
additory/synthetic/distributions.py,sha256=jrwDGVy_Vcm5XXoGKy-V0LrpnxdGM5p84GklKq-0b_A,705
|
|
51
|
-
additory/synthetic/forecast.py,sha256=F2XoKEDFDJ47W6bSzy2jXYWU3PN5X0l16YvtfxXc4GU,34820
|
|
52
|
-
additory/synthetic/linked_list_parser.py,sha256=YysP1ODyABJzUe82QLEfbuxGknTCyWb81tf8Pueg-oE,13002
|
|
53
|
-
additory/synthetic/namespace_lookup.py,sha256=4ILe1MWubGvRsF_xbQLybBbr3hG0iMTseypigB_66TI,4096
|
|
54
|
-
additory/synthetic/smote.py,sha256=ub8pTA5Ez3WjXP15GtyUqCRztiPr7XfHbNGTucUFErA,9092
|
|
55
|
-
additory/synthetic/strategies.py,sha256=k0gc5ic6LfJ2gjp8UiEP9txAqeIcen911jaAXhSkgGs,26269
|
|
56
|
-
additory/synthetic/synthesizer.py,sha256=9YHXyA9wfUyMZLse7nBMJ1hQ0F9SJmF4j01y4Oyebzg,26405
|
|
57
|
-
additory/utilities/__init__.py,sha256=I28c5ZqqZ2VsMIG40fUBJhnc930cFXHJX22xQWARXq8,1679
|
|
58
|
-
additory/utilities/encoding.py,sha256=DhTaTeUlJOSixQ3-hgUwSy1jMJAYadV2bQHuONVzzEY,20995
|
|
59
|
-
additory/utilities/games.py,sha256=nEPGUup0RHgzVKdgg8sRMraDxACaIVDFmMuBBjGDrVI,8364
|
|
60
|
-
additory/utilities/keys.py,sha256=CAbMN8VowLHwjWAMvRHTvqwJ44TxulKrYvK9UBzAwEw,197
|
|
61
|
-
additory/utilities/lookup.py,sha256=itE_ntAFHae92tQS43P165-87PHF5hTaH1Qy7ENoYak,3045
|
|
62
|
-
additory/utilities/matchers.py,sha256=x8Nve49_TzJ7jneueiXvnzMgI3ov3o0dlU_h4xFr8Qw,6120
|
|
63
|
-
additory/utilities/resolvers.py,sha256=ykMfce2f9in9wqHgmljCFIil8xGcalT0FBwFIwHOlnk,7127
|
|
64
|
-
additory/utilities/settings.py,sha256=5XB2S3L7Ht486LZMDacYTuyB_ta7sVohUFEKzMo1nDU,4698
|
|
65
|
-
additory/utilities/units.py,sha256=75VFSLCVhX3dcFokh-jbZepDRaFRuO2QpGZNQbG8fag,30526
|
|
66
|
-
additory/utilities/validators.py,sha256=K1ZYsPL3W7XkIUECVWov4HZxTlzqs9Rbc61Vidh2F8o,4213
|
|
67
|
-
additory-0.1.0a3.dist-info/licenses/LICENSE,sha256=ztobegtjJRyvQntGjQ1w80MGuTOeMmWkh5Be-pFyq3I,1079
|
|
68
|
-
additory-0.1.0a3.dist-info/METADATA,sha256=4mlzxPW1q3nCdKAj27Fr3KVv4FTZhbrVtYOYRAcMdsw,8094
|
|
69
|
-
additory-0.1.0a3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
70
|
-
additory-0.1.0a3.dist-info/top_level.txt,sha256=4zphwXiI6HEl40fdjMXoUp9JNIqQ-tgYWeo3zqKqvEk,9
|
|
71
|
-
additory-0.1.0a3.dist-info/RECORD,,
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 Krishnamoorthy Sankaran
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
File without changes
|
|
File without changes
|