additory 0.1.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. additory-0.1.0a1/LICENSE +21 -0
  2. additory-0.1.0a1/PKG-INFO +293 -0
  3. additory-0.1.0a1/README.md +261 -0
  4. additory-0.1.0a1/additory/__init__.py +15 -0
  5. additory-0.1.0a1/additory/analysis/__init__.py +48 -0
  6. additory-0.1.0a1/additory/analysis/cardinality.py +126 -0
  7. additory-0.1.0a1/additory/analysis/correlations.py +124 -0
  8. additory-0.1.0a1/additory/analysis/distributions.py +376 -0
  9. additory-0.1.0a1/additory/analysis/quality.py +158 -0
  10. additory-0.1.0a1/additory/analysis/scan.py +400 -0
  11. additory-0.1.0a1/additory/augment/__init__.py +24 -0
  12. additory-0.1.0a1/additory/augment/augmentor.py +653 -0
  13. additory-0.1.0a1/additory/augment/builtin_lists.py +430 -0
  14. additory-0.1.0a1/additory/augment/distributions.py +22 -0
  15. additory-0.1.0a1/additory/augment/forecast.py +1132 -0
  16. additory-0.1.0a1/additory/augment/list_registry.py +177 -0
  17. additory-0.1.0a1/additory/augment/smote.py +320 -0
  18. additory-0.1.0a1/additory/augment/strategies.py +883 -0
  19. additory-0.1.0a1/additory/common/__init__.py +157 -0
  20. additory-0.1.0a1/additory/common/backend.py +355 -0
  21. additory-0.1.0a1/additory/common/column_utils.py +191 -0
  22. additory-0.1.0a1/additory/common/distributions.py +737 -0
  23. additory-0.1.0a1/additory/common/exceptions.py +62 -0
  24. additory-0.1.0a1/additory/common/lists.py +229 -0
  25. additory-0.1.0a1/additory/common/patterns.py +240 -0
  26. additory-0.1.0a1/additory/common/resolver.py +567 -0
  27. additory-0.1.0a1/additory/common/sample_data.py +182 -0
  28. additory-0.1.0a1/additory/common/validation.py +197 -0
  29. additory-0.1.0a1/additory/core/__init__.py +27 -0
  30. additory-0.1.0a1/additory/core/ast_builder.py +165 -0
  31. additory-0.1.0a1/additory/core/backends/__init__.py +23 -0
  32. additory-0.1.0a1/additory/core/backends/arrow_bridge.py +476 -0
  33. additory-0.1.0a1/additory/core/backends/cudf_bridge.py +355 -0
  34. additory-0.1.0a1/additory/core/column_positioning.py +358 -0
  35. additory-0.1.0a1/additory/core/compiler_polars.py +166 -0
  36. additory-0.1.0a1/additory/core/config.py +342 -0
  37. additory-0.1.0a1/additory/core/enhanced_cache_manager.py +1119 -0
  38. additory-0.1.0a1/additory/core/enhanced_matchers.py +473 -0
  39. additory-0.1.0a1/additory/core/enhanced_version_manager.py +325 -0
  40. additory-0.1.0a1/additory/core/executor.py +59 -0
  41. additory-0.1.0a1/additory/core/integrity_manager.py +477 -0
  42. additory-0.1.0a1/additory/core/loader.py +190 -0
  43. additory-0.1.0a1/additory/core/logging.py +24 -0
  44. additory-0.1.0a1/additory/core/memory_manager.py +547 -0
  45. additory-0.1.0a1/additory/core/namespace_manager.py +657 -0
  46. additory-0.1.0a1/additory/core/parser.py +176 -0
  47. additory-0.1.0a1/additory/core/polars_expression_engine.py +551 -0
  48. additory-0.1.0a1/additory/core/registry.py +176 -0
  49. additory-0.1.0a1/additory/core/sample_data_manager.py +492 -0
  50. additory-0.1.0a1/additory/core/user_namespace.py +751 -0
  51. additory-0.1.0a1/additory/core/validator.py +27 -0
  52. additory-0.1.0a1/additory/dynamic_api.py +308 -0
  53. additory-0.1.0a1/additory/expressions/__init__.py +26 -0
  54. additory-0.1.0a1/additory/expressions/engine.py +551 -0
  55. additory-0.1.0a1/additory/expressions/parser.py +176 -0
  56. additory-0.1.0a1/additory/expressions/proxy.py +546 -0
  57. additory-0.1.0a1/additory/expressions/registry.py +313 -0
  58. additory-0.1.0a1/additory/expressions/samples.py +492 -0
  59. additory-0.1.0a1/additory/synthetic/__init__.py +101 -0
  60. additory-0.1.0a1/additory/synthetic/api.py +220 -0
  61. additory-0.1.0a1/additory/synthetic/common_integration.py +314 -0
  62. additory-0.1.0a1/additory/synthetic/config.py +262 -0
  63. additory-0.1.0a1/additory/synthetic/engines.py +529 -0
  64. additory-0.1.0a1/additory/synthetic/exceptions.py +180 -0
  65. additory-0.1.0a1/additory/synthetic/file_managers.py +518 -0
  66. additory-0.1.0a1/additory/synthetic/generator.py +702 -0
  67. additory-0.1.0a1/additory/synthetic/generator_parser.py +68 -0
  68. additory-0.1.0a1/additory/synthetic/integration.py +319 -0
  69. additory-0.1.0a1/additory/synthetic/models.py +241 -0
  70. additory-0.1.0a1/additory/synthetic/pattern_resolver.py +573 -0
  71. additory-0.1.0a1/additory/synthetic/performance.py +469 -0
  72. additory-0.1.0a1/additory/synthetic/polars_integration.py +464 -0
  73. additory-0.1.0a1/additory/synthetic/proxy.py +60 -0
  74. additory-0.1.0a1/additory/synthetic/schema_parser.py +685 -0
  75. additory-0.1.0a1/additory/synthetic/validator.py +553 -0
  76. additory-0.1.0a1/additory/utilities/__init__.py +53 -0
  77. additory-0.1.0a1/additory/utilities/encoding.py +600 -0
  78. additory-0.1.0a1/additory/utilities/games.py +300 -0
  79. additory-0.1.0a1/additory/utilities/keys.py +8 -0
  80. additory-0.1.0a1/additory/utilities/lookup.py +103 -0
  81. additory-0.1.0a1/additory/utilities/matchers.py +216 -0
  82. additory-0.1.0a1/additory/utilities/resolvers.py +286 -0
  83. additory-0.1.0a1/additory/utilities/settings.py +167 -0
  84. additory-0.1.0a1/additory/utilities/units.py +746 -0
  85. additory-0.1.0a1/additory/utilities/validators.py +153 -0
  86. additory-0.1.0a1/additory.egg-info/SOURCES.txt +121 -0
  87. additory-0.1.0a1/documentation/V0.1.0/add_augment_function.html +603 -0
  88. additory-0.1.0a1/documentation/V0.1.0/add_harmonize_units_function.html +564 -0
  89. additory-0.1.0a1/documentation/V0.1.0/add_onehotencoding_function.html +530 -0
  90. additory-0.1.0a1/documentation/V0.1.0/add_scan_function.html +701 -0
  91. additory-0.1.0a1/documentation/V0.1.0/add_synth_function.html +664 -0
  92. additory-0.1.0a1/documentation/V0.1.0/add_to_function.html +707 -0
  93. additory-0.1.0a1/documentation/V0.1.0/expressions.html +621 -0
  94. additory-0.1.0a1/pyproject.toml +45 -0
  95. additory-0.1.0a1/reference/expressions_definitions/age_category_0.1.add +66 -0
  96. additory-0.1.0a1/reference/expressions_definitions/blood_pressure_category_0.1.add +79 -0
  97. additory-0.1.0a1/reference/expressions_definitions/bmi2_0.1.add +41 -0
  98. additory-0.1.0a1/reference/expressions_definitions/bmi3_0.1.add +26 -0
  99. additory-0.1.0a1/reference/expressions_definitions/bmi_0.1.add +77 -0
  100. additory-0.1.0a1/reference/expressions_definitions/bmr_0.1.add +109 -0
  101. additory-0.1.0a1/reference/expressions_definitions/body_fat_percentage_0.1.add +124 -0
  102. additory-0.1.0a1/reference/expressions_definitions/bsa_0.1.add +79 -0
  103. additory-0.1.0a1/reference/expressions_definitions/cholesterol_ratio_0.1.add +79 -0
  104. additory-0.1.0a1/reference/expressions_definitions/fitness_score_0.1.add +110 -0
  105. additory-0.1.0a1/reference/expressions_definitions/ideal_body_weight_0.1.add +79 -0
  106. additory-0.1.0a1/reference/expressions_definitions/manifest.json +35 -0
  107. additory-0.1.0a1/reference/expressions_definitions/waist_hip_ratio_0.1.add +79 -0
  108. additory-0.1.0a1/reference/schema_definitions/ca.list +41 -0
  109. additory-0.1.0a1/reference/schema_definitions/ca.properties +14 -0
  110. additory-0.1.0a1/reference/schema_definitions/eu.list +41 -0
  111. additory-0.1.0a1/reference/schema_definitions/eu.properties +13 -0
  112. additory-0.1.0a1/reference/schema_definitions/finance.list +31 -0
  113. additory-0.1.0a1/reference/schema_definitions/finance.properties +18 -0
  114. additory-0.1.0a1/reference/schema_definitions/global.list +57 -0
  115. additory-0.1.0a1/reference/schema_definitions/global.properties +11 -0
  116. additory-0.1.0a1/reference/schema_definitions/healthcare.list +28 -0
  117. additory-0.1.0a1/reference/schema_definitions/us.list +41 -0
  118. additory-0.1.0a1/reference/schema_definitions/us.properties +14 -0
  119. additory-0.1.0a1/setup.cfg +4 -0
  120. additory-0.1.0a1/user_expressions/bmi1_0.1.add +41 -0
  121. additory-0.1.0a1/user_expressions/bmi2_0.1.add +41 -0
  122. additory-0.1.0a1/user_expressions/bmi3_0.1.add +26 -0
  123. additory-0.1.0a1/user_expressions/bmi_0.1.add +26 -0
  124. additory-0.1.0a1/user_expressions/manifest.json +22 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Krishnamoorthy Sankaran
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,293 @@
1
+ Metadata-Version: 2.4
2
+ Name: additory
3
+ Version: 0.1.0a1
4
+ Summary: A semantic, extensible dataframe transformation engine with expressions, lookup, synthetic data, and sample-data support.
5
+ Author: Krishnamoorthy Sankaran
6
+ License: MIT
7
+ Project-URL: homepage, https://github.com/sekarkrishna/additory
8
+ Project-URL: documentation, https://github.com/sekarkrishna/additory/tree/main/documentation/V0.1.0
9
+ Project-URL: source, https://github.com/sekarkrishna/additory
10
+ Project-URL: issues, https://github.com/sekarkrishna/additory/issues
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: pandas>=1.5
15
+ Requires-Dist: polars>=0.20
16
+ Requires-Dist: pyyaml>=6.0
17
+ Requires-Dist: requests>=2.31
18
+ Requires-Dist: toml>=0.10
19
+ Requires-Dist: scipy>=1.9
20
+ Requires-Dist: numpy>=1.21
21
+ Provides-Extra: gpu
22
+ Requires-Dist: cudf>=24.0; extra == "gpu"
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=7.0; extra == "dev"
25
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
26
+ Requires-Dist: pytest-xdist>=3.0; extra == "dev"
27
+ Requires-Dist: hypothesis>=6.0; extra == "dev"
28
+ Requires-Dist: black>=23.0; extra == "dev"
29
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
30
+ Requires-Dist: coverage>=7.0; extra == "dev"
31
+ Dynamic: license-file
32
+
33
+ # Additory
34
+
35
+ **A semantic, extensible dataframe transformation engine with expressions, lookup, synthetic data, and sample-data support.**
36
+
37
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
38
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
39
+ [![Version](https://img.shields.io/badge/version-0.1.0a1-orange.svg)](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/)
40
+
41
+ **Author:** Krishnamoorthy Sankaran
42
+
43
+ ## 🛠️ Requirements
44
+
45
+ - **Python**: 3.9+
46
+ - **Core dependencies**: pandas, polars, numpy, scipy
47
+ - **Optional**: cuDF (for GPU support)
48
+
49
+ ## 📦 Installation
50
+
51
+ ```bash
52
+ pip install additory==0.1.0a1
53
+ ```
54
+
55
+ **Optional GPU support:**
56
+ ```bash
57
+ pip install additory[gpu]==0.1.0a1 # Includes cuDF for GPU acceleration
58
+ ```
59
+
60
+ **Development installation:**
61
+ ```bash
62
+ pip install additory[dev]==0.1.0a1 # Includes testing and development tools
63
+ ```
64
+
65
+ ## 🎯 Core Functions
66
+
67
+ | Function | Purpose | Example |
68
+ |----------|---------|---------|
69
+ | `add.to()` | Lookup/join operations | `add.to(df1, from_df=df2, bring='col', against='key')` |
70
+ | `add.augment()` | Generate additional data | `add.augment(df, n_rows=1000)` |
71
+ | `add.synth()` | Synthetic data from schemas | `add.synth("schema.toml", rows=5000)` |
72
+ | `add.scan()` | Data profiling & analysis | `add.scan(df, preset="full")` |
73
+
74
+ ## 🧬 Available Expressions
75
+
76
+ Additory includes 12 built-in health and fitness expressions:
77
+
78
+ - **`add.bmi()`** - Body Mass Index
79
+ - **`add.bsa()`** - Body Surface Area
80
+ - **`add.bmr()`** - Basal Metabolic Rate
81
+ - **`add.waist_hip_ratio()`** - Waist-to-Hip Ratio
82
+ - **`add.body_fat_percentage()`** - Body Fat Percentage
83
+ - **`add.ideal_body_weight()`** - Ideal Body Weight
84
+ - **`add.blood_pressure_category()`** - BP Classification
85
+ - **`add.cholesterol_ratio()`** - Cholesterol Ratio
86
+ - **`add.age_category()`** - Age Classification
87
+ - **`add.fitness_score()`** - Overall Fitness Score
88
+
89
+ ```python
90
+ # Health calculations
91
+ patients = pd.DataFrame({
92
+ 'weight_kg': [70, 80, 65], # Weight in kilograms
93
+ 'height_m': [1.75, 1.80, 1.60], # Height in meters
94
+ 'age': [25, 35, 45],
95
+ 'gender': ['M', 'F', 'M']
96
+ })
97
+
98
+ patients_bmi = add.bmi(patients)
99
+ patients_bsa = add.bsa(patients)
100
+ fitness_scores = add.fitness_score(patients)
101
+
102
+ # Chain multiple expressions
103
+ result = add.fitness_score(add.bmr(add.bmi(patients)))
104
+ ```
105
+
106
+ ## 🔧 DataFrame Support
107
+
108
+ Additory works seamlessly with multiple DataFrame libraries:
109
+
110
+ - **pandas** - Full support
111
+ - **polars** - Full support
112
+ - **cuDF** - GPU acceleration support
113
+
114
+ ```python
115
+ import polars as pl
116
+ import additory as add
117
+
118
+ # Works with polars
119
+ df_polars = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
120
+ result = add.augment(df_polars, n_rows=100)
121
+
122
+ # Automatic type detection and conversion
123
+ ```
124
+
125
+ ## ✨ Key Features
126
+
127
+ ### 🔧 Utilities
128
+
129
+ **add.to() - Data Lookup & Joins**
130
+ Simplified syntax for bringing columns from one dataframe to another.
131
+
132
+ ```python
133
+ # Simple lookup
134
+ orders_with_prices = add.to(
135
+ orders,
136
+ from_df=products,
137
+ bring='price',
138
+ against='product_id'
139
+ )
140
+
141
+ # Multiple columns and keys
142
+ enriched = add.to(
143
+ orders,
144
+ from_df=products,
145
+ bring=['price', 'category'],
146
+ against=['product_id', 'region']
147
+ )
148
+ ```
149
+
150
+ **add.onehotencoding() - Categorical Encoding**
151
+ Convert categorical columns to one-hot encoded format.
152
+
153
+ ```python
154
+ # One-hot encoding (single column)
155
+ encoded = add.onehotencoding(df, 'category')
156
+ ```
157
+
158
+ **add.harmonize_units() - Unit Standardization**
159
+ Standardize units across your dataset.
160
+
161
+ ```python
162
+ # Unit harmonization
163
+ standardized = add.harmonize_units(
164
+ df,
165
+ value_column='temperature',
166
+ unit_column='unit',
167
+ target_unit='C'
168
+ )
169
+ ```
170
+
171
+ ### 🧮 Expressions
172
+
173
+ Pre-built calculations for health, fitness, and common metrics. Simple examples:
174
+
175
+ ```python
176
+ # Create patient data with correct column names
177
+ patients = pd.DataFrame({
178
+ 'weight_kg': [70, 80, 65], # Weight in kilograms
179
+ 'height_m': [1.75, 1.80, 1.60], # Height in meters
180
+ 'age': [25, 35, 45],
181
+ 'gender': ['M', 'F', 'M']
182
+ })
183
+
184
+ # Calculate BMI
185
+ patients_with_bmi = add.bmi(patients)
186
+
187
+ # Calculate Body Surface Area
188
+ patients_with_bsa = add.bsa(patients)
189
+
190
+ # Chain multiple expressions
191
+ result = add.fitness_score(add.bmr(add.bmi(patients)))
192
+ ```
193
+
194
+ ### 🔄 Augment and Synthetic Data
195
+
196
+ **Augment** generates more data similar to your existing dataset, while **Synthetic** creates entirely new datasets from schema definitions.
197
+
198
+ **Key Differences:**
199
+ - **Augment**: Learns patterns from existing data to create similar rows
200
+ - **Synthetic**: Uses predefined schemas to generate structured data
201
+
202
+ ```python
203
+ # Augment existing data (learns from patterns)
204
+ more_customers = add.augment(customers, n_rows=1000)
205
+
206
+ # Create data from scratch with strategies
207
+ new_data = add.augment("@new", n_rows=500, strategy={
208
+ 'id': 'increment:start=1',
209
+ 'name': 'choice:[John,Jane,Bob]',
210
+ 'age': 'range:18-65'
211
+ })
212
+
213
+ # Generate from schema file (structured approach)
214
+ customers = add.synth("customer_schema.toml", rows=10000)
215
+ ```
216
+
217
+ ## 🧪 Examples
218
+
219
+ ### E-commerce Data Pipeline
220
+ ```python
221
+ import pandas as pd
222
+ import additory as add
223
+
224
+ # Start with small customer sample
225
+ customers = pd.DataFrame({
226
+ 'customer_id': [1, 2, 3],
227
+ 'age': [25, 35, 45],
228
+ 'region': ['North', 'South', 'East']
229
+ })
230
+
231
+ # Generate more customers
232
+ customers = add.augment(customers, n_rows=10000)
233
+
234
+ # Add customer tiers
235
+ tiers = pd.DataFrame({
236
+ 'customer_id': range(1, 4), # Match original IDs
237
+ 'tier': ['Gold', 'Silver', 'Bronze']
238
+ })
239
+
240
+ # Use pipeline approach
241
+ result = (customers
242
+ .pipe(add.to, from_df=tiers, bring='tier', against='customer_id')
243
+ .pipe(add.scan, preset="quick"))
244
+
245
+ print(result.summary())
246
+ ```
247
+
248
+ ### Healthcare Data Analysis
249
+ ```python
250
+ # Create patient data from scratch
251
+ strategy = {
252
+ 'patient_id': 'increment:start=1',
253
+ 'age': 'range:18-80',
254
+ 'weight_kg': 'range:50-120', # Weight in kg
255
+ 'height_cm': 'range:150-200' # Height in cm
256
+ }
257
+
258
+ patients = add.augment("@new", n_rows=1000, strategy=strategy)
259
+
260
+ # Convert height to meters for expressions
261
+ patients['height_m'] = patients['height_cm'] / 100
262
+
263
+ # Calculate health metrics using pipeline
264
+ result = (patients
265
+ .pipe(add.bmi)
266
+ .pipe(add.scan, preset="correlations"))
267
+
268
+ print(result.correlations)
269
+ ```
270
+
271
+ ## 📚 Documentation
272
+
273
+ - **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/)** - Detailed guides for each function
274
+ - **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/expressions.html)** - Complete expressions reference
275
+
276
+ ## 📄 License
277
+
278
+ MIT License - see [LICENSE](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/LICENSE) file for details.
279
+
280
+ ## 📞 Support
281
+
282
+ - **Issues**: [GitHub Issues](https://github.com/sekarkrishna/additory/issues)
283
+ - **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0)
284
+
285
+ ## 🗺️ v0.1.1 (February 2025)
286
+ - Enhanced documentation and tutorials
287
+ - Performance optimizations
288
+ - Additional expressions
289
+ - Advanced synthetic data patterns
290
+
291
+ ---
292
+
293
+ **Made with ❤️ for data scientists, analysts, and developers who love working with data.**
@@ -0,0 +1,261 @@
1
+ # Additory
2
+
3
+ **A semantic, extensible dataframe transformation engine with expressions, lookup, synthetic data, and sample-data support.**
4
+
5
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
+ [![Version](https://img.shields.io/badge/version-0.1.0a1-orange.svg)](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/)
8
+
9
+ **Author:** Krishnamoorthy Sankaran
10
+
11
+ ## 🛠️ Requirements
12
+
13
+ - **Python**: 3.9+
14
+ - **Core dependencies**: pandas, polars, numpy, scipy
15
+ - **Optional**: cuDF (for GPU support)
16
+
17
+ ## 📦 Installation
18
+
19
+ ```bash
20
+ pip install additory==0.1.0a1
21
+ ```
22
+
23
+ **Optional GPU support:**
24
+ ```bash
25
+ pip install additory[gpu]==0.1.0a1 # Includes cuDF for GPU acceleration
26
+ ```
27
+
28
+ **Development installation:**
29
+ ```bash
30
+ pip install additory[dev]==0.1.0a1 # Includes testing and development tools
31
+ ```
32
+
33
+ ## 🎯 Core Functions
34
+
35
+ | Function | Purpose | Example |
36
+ |----------|---------|---------|
37
+ | `add.to()` | Lookup/join operations | `add.to(df1, from_df=df2, bring='col', against='key')` |
38
+ | `add.augment()` | Generate additional data | `add.augment(df, n_rows=1000)` |
39
+ | `add.synth()` | Synthetic data from schemas | `add.synth("schema.toml", rows=5000)` |
40
+ | `add.scan()` | Data profiling & analysis | `add.scan(df, preset="full")` |
41
+
42
+ ## 🧬 Available Expressions
43
+
44
+ Additory includes 12 built-in health and fitness expressions:
45
+
46
+ - **`add.bmi()`** - Body Mass Index
47
+ - **`add.bsa()`** - Body Surface Area
48
+ - **`add.bmr()`** - Basal Metabolic Rate
49
+ - **`add.waist_hip_ratio()`** - Waist-to-Hip Ratio
50
+ - **`add.body_fat_percentage()`** - Body Fat Percentage
51
+ - **`add.ideal_body_weight()`** - Ideal Body Weight
52
+ - **`add.blood_pressure_category()`** - BP Classification
53
+ - **`add.cholesterol_ratio()`** - Cholesterol Ratio
54
+ - **`add.age_category()`** - Age Classification
55
+ - **`add.fitness_score()`** - Overall Fitness Score
56
+
57
+ ```python
58
+ # Health calculations
59
+ patients = pd.DataFrame({
60
+ 'weight_kg': [70, 80, 65], # Weight in kilograms
61
+ 'height_m': [1.75, 1.80, 1.60], # Height in meters
62
+ 'age': [25, 35, 45],
63
+ 'gender': ['M', 'F', 'M']
64
+ })
65
+
66
+ patients_bmi = add.bmi(patients)
67
+ patients_bsa = add.bsa(patients)
68
+ fitness_scores = add.fitness_score(patients)
69
+
70
+ # Chain multiple expressions
71
+ result = add.fitness_score(add.bmr(add.bmi(patients)))
72
+ ```
73
+
74
+ ## 🔧 DataFrame Support
75
+
76
+ Additory works seamlessly with multiple DataFrame libraries:
77
+
78
+ - **pandas** - Full support
79
+ - **polars** - Full support
80
+ - **cuDF** - GPU acceleration support
81
+
82
+ ```python
83
+ import polars as pl
84
+ import additory as add
85
+
86
+ # Works with polars
87
+ df_polars = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
88
+ result = add.augment(df_polars, n_rows=100)
89
+
90
+ # Automatic type detection and conversion
91
+ ```
92
+
93
+ ## ✨ Key Features
94
+
95
+ ### 🔧 Utilities
96
+
97
+ **add.to() - Data Lookup & Joins**
98
+ Simplified syntax for bringing columns from one dataframe to another.
99
+
100
+ ```python
101
+ # Simple lookup
102
+ orders_with_prices = add.to(
103
+ orders,
104
+ from_df=products,
105
+ bring='price',
106
+ against='product_id'
107
+ )
108
+
109
+ # Multiple columns and keys
110
+ enriched = add.to(
111
+ orders,
112
+ from_df=products,
113
+ bring=['price', 'category'],
114
+ against=['product_id', 'region']
115
+ )
116
+ ```
117
+
118
+ **add.onehotencoding() - Categorical Encoding**
119
+ Convert categorical columns to one-hot encoded format.
120
+
121
+ ```python
122
+ # One-hot encoding (single column)
123
+ encoded = add.onehotencoding(df, 'category')
124
+ ```
125
+
126
+ **add.harmonize_units() - Unit Standardization**
127
+ Standardize units across your dataset.
128
+
129
+ ```python
130
+ # Unit harmonization
131
+ standardized = add.harmonize_units(
132
+ df,
133
+ value_column='temperature',
134
+ unit_column='unit',
135
+ target_unit='C'
136
+ )
137
+ ```
138
+
139
+ ### 🧮 Expressions
140
+
141
+ Pre-built calculations for health, fitness, and common metrics. Simple examples:
142
+
143
+ ```python
144
+ # Create patient data with correct column names
145
+ patients = pd.DataFrame({
146
+ 'weight_kg': [70, 80, 65], # Weight in kilograms
147
+ 'height_m': [1.75, 1.80, 1.60], # Height in meters
148
+ 'age': [25, 35, 45],
149
+ 'gender': ['M', 'F', 'M']
150
+ })
151
+
152
+ # Calculate BMI
153
+ patients_with_bmi = add.bmi(patients)
154
+
155
+ # Calculate Body Surface Area
156
+ patients_with_bsa = add.bsa(patients)
157
+
158
+ # Chain multiple expressions
159
+ result = add.fitness_score(add.bmr(add.bmi(patients)))
160
+ ```
161
+
162
+ ### 🔄 Augment and Synthetic Data
163
+
164
+ **Augment** generates more data similar to your existing dataset, while **Synthetic** creates entirely new datasets from schema definitions.
165
+
166
+ **Key Differences:**
167
+ - **Augment**: Learns patterns from existing data to create similar rows
168
+ - **Synthetic**: Uses predefined schemas to generate structured data
169
+
170
+ ```python
171
+ # Augment existing data (learns from patterns)
172
+ more_customers = add.augment(customers, n_rows=1000)
173
+
174
+ # Create data from scratch with strategies
175
+ new_data = add.augment("@new", n_rows=500, strategy={
176
+ 'id': 'increment:start=1',
177
+ 'name': 'choice:[John,Jane,Bob]',
178
+ 'age': 'range:18-65'
179
+ })
180
+
181
+ # Generate from schema file (structured approach)
182
+ customers = add.synth("customer_schema.toml", rows=10000)
183
+ ```
184
+
185
+ ## 🧪 Examples
186
+
187
+ ### E-commerce Data Pipeline
188
+ ```python
189
+ import pandas as pd
190
+ import additory as add
191
+
192
+ # Start with small customer sample
193
+ customers = pd.DataFrame({
194
+ 'customer_id': [1, 2, 3],
195
+ 'age': [25, 35, 45],
196
+ 'region': ['North', 'South', 'East']
197
+ })
198
+
199
+ # Generate more customers
200
+ customers = add.augment(customers, n_rows=10000)
201
+
202
+ # Add customer tiers
203
+ tiers = pd.DataFrame({
204
+ 'customer_id': range(1, 4), # Match original IDs
205
+ 'tier': ['Gold', 'Silver', 'Bronze']
206
+ })
207
+
208
+ # Use pipeline approach
209
+ result = (customers
210
+ .pipe(add.to, from_df=tiers, bring='tier', against='customer_id')
211
+ .pipe(add.scan, preset="quick"))
212
+
213
+ print(result.summary())
214
+ ```
215
+
216
+ ### Healthcare Data Analysis
217
+ ```python
218
+ # Create patient data from scratch
219
+ strategy = {
220
+ 'patient_id': 'increment:start=1',
221
+ 'age': 'range:18-80',
222
+ 'weight_kg': 'range:50-120', # Weight in kg
223
+ 'height_cm': 'range:150-200' # Height in cm
224
+ }
225
+
226
+ patients = add.augment("@new", n_rows=1000, strategy=strategy)
227
+
228
+ # Convert height to meters for expressions
229
+ patients['height_m'] = patients['height_cm'] / 100
230
+
231
+ # Calculate health metrics using pipeline
232
+ result = (patients
233
+ .pipe(add.bmi)
234
+ .pipe(add.scan, preset="correlations"))
235
+
236
+ print(result.correlations)
237
+ ```
238
+
239
+ ## 📚 Documentation
240
+
241
+ - **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/)** - Detailed guides for each function
242
+ - **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/expressions.html)** - Complete expressions reference
243
+
244
+ ## 📄 License
245
+
246
+ MIT License - see [LICENSE](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/LICENSE) file for details.
247
+
248
+ ## 📞 Support
249
+
250
+ - **Issues**: [GitHub Issues](https://github.com/sekarkrishna/additory/issues)
251
+ - **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0)
252
+
253
+ ## 🗺️ v0.1.1 (February 2025)
254
+ - Enhanced documentation and tutorials
255
+ - Performance optimizations
256
+ - Additional expressions
257
+ - Advanced synthetic data patterns
258
+
259
+ ---
260
+
261
+ **Made with ❤️ for data scientists, analysts, and developers who love working with data.**
@@ -0,0 +1,15 @@
1
+ # additory/__init__.py
2
+
3
+ from .dynamic_api import add as _api_instance
4
+
5
+ # Expose the API instance normally
6
+ add = _api_instance
7
+
8
+ # Module-level __getattr__ to forward dynamic attributes
9
+ def __getattr__(name):
10
+ # Delegate all unknown attributes to the API instance
11
+ return getattr(_api_instance, name)
12
+
13
+ __all__ = [
14
+ "add",
15
+ ]
@@ -0,0 +1,48 @@
1
+ """
2
+ Analysis Module for Data Profiling
3
+
4
+ Provides comprehensive data analysis capabilities:
5
+ - Distribution detection and fitting
6
+ - Correlation analysis
7
+ - Cardinality analysis
8
+ - Data quality metrics
9
+ - Data profiling and scanning
10
+ """
11
+
12
+ from additory.analysis.distributions import (
13
+ detect_distributions,
14
+ fit_distribution,
15
+ DistributionFit
16
+ )
17
+ from additory.analysis.correlations import (
18
+ calculate_correlations,
19
+ CorrelationResult
20
+ )
21
+ from additory.analysis.cardinality import (
22
+ analyze_cardinality,
23
+ CardinalityInfo
24
+ )
25
+ from additory.analysis.quality import (
26
+ analyze_quality,
27
+ QualityMetrics
28
+ )
29
+ from additory.analysis.scan import (
30
+ scan,
31
+ ScanResult,
32
+ ColumnInfo
33
+ )
34
+
35
+ __all__ = [
36
+ 'detect_distributions',
37
+ 'fit_distribution',
38
+ 'DistributionFit',
39
+ 'calculate_correlations',
40
+ 'CorrelationResult',
41
+ 'analyze_cardinality',
42
+ 'CardinalityInfo',
43
+ 'analyze_quality',
44
+ 'QualityMetrics',
45
+ 'scan',
46
+ 'ScanResult',
47
+ 'ColumnInfo',
48
+ ]