additory 0.1.0a3__tar.gz → 0.1.0a4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {additory-0.1.0a3 → additory-0.1.0a4}/PKG-INFO +42 -19
- {additory-0.1.0a3 → additory-0.1.0a4}/README.md +41 -18
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/__init__.py +1 -1
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/config.py +3 -3
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/registry.py +4 -3
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/dynamic_api.py +55 -7
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/registry.py +3 -3
- additory-0.1.0a4/additory/synthetic/deduce.py +259 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/strategies.py +76 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory.egg-info/SOURCES.txt +2 -0
- additory-0.1.0a4/documentation/V0.1.0/add_deduce_function.html +759 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/pyproject.toml +1 -1
- {additory-0.1.0a3 → additory-0.1.0a4}/LICENSE +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/__init__.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/cardinality.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/correlations.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/distributions.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/quality.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/scan.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/__init__.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/backend.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/column_utils.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/distributions.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/exceptions.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/lists.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/patterns.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/resolver.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/sample_data.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/validation.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/__init__.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/ast_builder.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/backends/__init__.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/backends/arrow_bridge.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/backends/cudf_bridge.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/column_positioning.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/compiler_polars.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/enhanced_cache_manager.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/enhanced_matchers.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/enhanced_version_manager.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/executor.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/integrity_manager.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/loader.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/logging.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/memory_manager.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/namespace_manager.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/parser.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/polars_expression_engine.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/sample_data_manager.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/user_namespace.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/validator.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/__init__.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/engine.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/parser.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/proxy.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/samples.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/__init__.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/column_name_resolver.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/distributions.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/forecast.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/linked_list_parser.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/namespace_lookup.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/smote.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/synthesizer.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/__init__.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/encoding.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/games.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/keys.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/lookup.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/matchers.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/resolvers.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/settings.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/units.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/validators.py +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/add_harmonize_units_function.html +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/add_onehotencoding_function.html +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/add_scan_function.html +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/add_synthetic_function.html +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/add_to_function.html +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/expressions.html +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/age_category_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/blood_pressure_category_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/bmi2_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/bmi3_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/bmi_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/bmr_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/body_fat_percentage_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/bsa_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/cholesterol_ratio_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/fitness_score_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/ideal_body_weight_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/manifest.json +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/waist_hip_ratio_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/setup.cfg +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/user_expressions/bmi1_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/user_expressions/bmi2_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/user_expressions/bmi3_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/user_expressions/bmi_0.1.add +0 -0
- {additory-0.1.0a3 → additory-0.1.0a4}/user_expressions/manifest.json +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: additory
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0a4
|
|
4
4
|
Summary: A semantic, extensible dataframe transformation engine with expressions, lookup, and synthetic data generation support.
|
|
5
5
|
Author: Krishnamoorthy Sankaran
|
|
6
6
|
License: MIT
|
|
@@ -39,7 +39,7 @@ Dynamic: license-file
|
|
|
39
39
|
|
|
40
40
|
[](https://www.python.org/downloads/)
|
|
41
41
|
[](https://opensource.org/licenses/MIT)
|
|
42
|
-
[](https://github.com/sekarkrishna/additory)
|
|
43
43
|
|
|
44
44
|
**Author:** Krishnamoorthy Sankaran
|
|
45
45
|
|
|
@@ -52,17 +52,17 @@ Dynamic: license-file
|
|
|
52
52
|
## 📦 Installation
|
|
53
53
|
|
|
54
54
|
```bash
|
|
55
|
-
pip install additory==0.1.
|
|
55
|
+
pip install additory==0.1.0a4
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
**Optional GPU support:**
|
|
59
59
|
```bash
|
|
60
|
-
pip install additory[gpu]==0.1.
|
|
60
|
+
pip install additory[gpu]==0.1.0a4 # Includes cuDF for GPU acceleration
|
|
61
61
|
```
|
|
62
62
|
|
|
63
63
|
**Development installation:**
|
|
64
64
|
```bash
|
|
65
|
-
pip install additory[dev]==0.1.
|
|
65
|
+
pip install additory[dev]==0.1.0a4 # Includes testing and development tools
|
|
66
66
|
```
|
|
67
67
|
|
|
68
68
|
## 🎯 Core Functions
|
|
@@ -70,7 +70,8 @@ pip install additory[dev]==0.1.0a2 # Includes testing and development tools
|
|
|
70
70
|
| Function | Purpose | Example |
|
|
71
71
|
|----------|---------|---------|
|
|
72
72
|
| `add.to()` | Lookup/join operations | `add.to(df1, from_df=df2, bring='col', against='key')` |
|
|
73
|
-
| `add.
|
|
73
|
+
| `add.synthetic()` | Generate additional data | `add.synthetic(df, n_rows=1000)` |
|
|
74
|
+
| `add.deduce()` | Text-based label deduction | `add.deduce(df, from_column='text', to_column='label')` |
|
|
74
75
|
| `add.scan()` | Data profiling & analysis | `add.scan(df, preset="full")` |
|
|
75
76
|
|
|
76
77
|
## 🧬 Available Expressions
|
|
@@ -119,7 +120,7 @@ import additory as add
|
|
|
119
120
|
|
|
120
121
|
# Works with polars
|
|
121
122
|
df_polars = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
122
|
-
result = add.
|
|
123
|
+
result = add.synthetic(df_polars, n_rows=100)
|
|
123
124
|
|
|
124
125
|
# Automatic type detection and conversion
|
|
125
126
|
```
|
|
@@ -193,22 +194,44 @@ patients_with_bsa = add.bsa(patients)
|
|
|
193
194
|
result = add.fitness_score(add.bmr(add.bmi(patients)))
|
|
194
195
|
```
|
|
195
196
|
|
|
196
|
-
### 🔄
|
|
197
|
+
### 🔄 Synthetic Data Generation
|
|
197
198
|
|
|
198
|
-
**
|
|
199
|
+
**Synthetic** generates additional data similar to your existing dataset using inline strategies.
|
|
199
200
|
|
|
200
201
|
```python
|
|
201
|
-
#
|
|
202
|
-
more_customers = add.
|
|
202
|
+
# Extend existing data (learns from patterns)
|
|
203
|
+
more_customers = add.synthetic(customers, n_rows=1000)
|
|
203
204
|
|
|
204
205
|
# Create data from scratch with strategies
|
|
205
|
-
new_data = add.
|
|
206
|
+
new_data = add.synthetic("@new", n_rows=500, strategy={
|
|
206
207
|
'id': 'increment:start=1',
|
|
207
208
|
'name': 'choice:[John,Jane,Bob]',
|
|
208
209
|
'age': 'range:18-65'
|
|
209
210
|
})
|
|
210
211
|
```
|
|
211
212
|
|
|
213
|
+
### 🤖 Text-Based Label Deduction
|
|
214
|
+
|
|
215
|
+
**Deduce** automatically fills in missing labels by learning from your existing labeled examples. Pure Python, no LLMs, offline-first.
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
# Deduce missing labels from text
|
|
219
|
+
tickets = pd.DataFrame({
|
|
220
|
+
"ticket_text": ["Cannot log in", "Billing question", "App crashes", "Need invoice"],
|
|
221
|
+
"category": ["Technical", "Billing", None, None]
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
# Automatically fill in missing categories
|
|
225
|
+
result = add.deduce(tickets, from_column="ticket_text", to_column="category")
|
|
226
|
+
|
|
227
|
+
# Use multiple columns for better accuracy
|
|
228
|
+
result = add.deduce(
|
|
229
|
+
df,
|
|
230
|
+
from_column=["title", "description"],
|
|
231
|
+
to_column="category"
|
|
232
|
+
)
|
|
233
|
+
```
|
|
234
|
+
|
|
212
235
|
## 🧪 Examples
|
|
213
236
|
|
|
214
237
|
### E-commerce Data Pipeline
|
|
@@ -224,7 +247,7 @@ customers = pd.DataFrame({
|
|
|
224
247
|
})
|
|
225
248
|
|
|
226
249
|
# Generate more customers
|
|
227
|
-
customers = add.
|
|
250
|
+
customers = add.synthetic(customers, n_rows=10000)
|
|
228
251
|
|
|
229
252
|
# Add customer tiers
|
|
230
253
|
tiers = pd.DataFrame({
|
|
@@ -250,7 +273,7 @@ strategy = {
|
|
|
250
273
|
'height_cm': 'range:150-200' # Height in cm
|
|
251
274
|
}
|
|
252
275
|
|
|
253
|
-
patients = add.
|
|
276
|
+
patients = add.synthetic("@new", n_rows=1000, strategy=strategy)
|
|
254
277
|
|
|
255
278
|
# Convert height to meters for expressions
|
|
256
279
|
patients['height_m'] = patients['height_cm'] / 100
|
|
@@ -265,19 +288,19 @@ print(result.correlations)
|
|
|
265
288
|
|
|
266
289
|
## 📚 Documentation
|
|
267
290
|
|
|
268
|
-
- **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/
|
|
269
|
-
- **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/
|
|
291
|
+
- **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/documentation/)** - Detailed guides for each function
|
|
292
|
+
- **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/documentation/)** - Complete expressions reference
|
|
270
293
|
|
|
271
294
|
## 📄 License
|
|
272
295
|
|
|
273
|
-
MIT License - see [LICENSE](
|
|
296
|
+
MIT License - see [LICENSE](LICENSE) file for details.
|
|
274
297
|
|
|
275
298
|
## 📞 Support
|
|
276
299
|
|
|
277
300
|
- **Issues**: [GitHub Issues](https://github.com/sekarkrishna/additory/issues)
|
|
278
|
-
- **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/
|
|
301
|
+
- **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/documentation/)
|
|
279
302
|
|
|
280
|
-
## 🗺️ v0.1.1 (
|
|
303
|
+
## 🗺️ v0.1.1 (January 2026)
|
|
281
304
|
- Enhanced documentation and tutorials
|
|
282
305
|
- Performance optimizations
|
|
283
306
|
- Additional expressions
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
[](https://www.python.org/downloads/)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
7
|
-
[](https://github.com/sekarkrishna/additory)
|
|
8
8
|
|
|
9
9
|
**Author:** Krishnamoorthy Sankaran
|
|
10
10
|
|
|
@@ -17,17 +17,17 @@
|
|
|
17
17
|
## 📦 Installation
|
|
18
18
|
|
|
19
19
|
```bash
|
|
20
|
-
pip install additory==0.1.
|
|
20
|
+
pip install additory==0.1.0a4
|
|
21
21
|
```
|
|
22
22
|
|
|
23
23
|
**Optional GPU support:**
|
|
24
24
|
```bash
|
|
25
|
-
pip install additory[gpu]==0.1.
|
|
25
|
+
pip install additory[gpu]==0.1.0a4 # Includes cuDF for GPU acceleration
|
|
26
26
|
```
|
|
27
27
|
|
|
28
28
|
**Development installation:**
|
|
29
29
|
```bash
|
|
30
|
-
pip install additory[dev]==0.1.
|
|
30
|
+
pip install additory[dev]==0.1.0a4 # Includes testing and development tools
|
|
31
31
|
```
|
|
32
32
|
|
|
33
33
|
## 🎯 Core Functions
|
|
@@ -35,7 +35,8 @@ pip install additory[dev]==0.1.0a2 # Includes testing and development tools
|
|
|
35
35
|
| Function | Purpose | Example |
|
|
36
36
|
|----------|---------|---------|
|
|
37
37
|
| `add.to()` | Lookup/join operations | `add.to(df1, from_df=df2, bring='col', against='key')` |
|
|
38
|
-
| `add.
|
|
38
|
+
| `add.synthetic()` | Generate additional data | `add.synthetic(df, n_rows=1000)` |
|
|
39
|
+
| `add.deduce()` | Text-based label deduction | `add.deduce(df, from_column='text', to_column='label')` |
|
|
39
40
|
| `add.scan()` | Data profiling & analysis | `add.scan(df, preset="full")` |
|
|
40
41
|
|
|
41
42
|
## 🧬 Available Expressions
|
|
@@ -84,7 +85,7 @@ import additory as add
|
|
|
84
85
|
|
|
85
86
|
# Works with polars
|
|
86
87
|
df_polars = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
87
|
-
result = add.
|
|
88
|
+
result = add.synthetic(df_polars, n_rows=100)
|
|
88
89
|
|
|
89
90
|
# Automatic type detection and conversion
|
|
90
91
|
```
|
|
@@ -158,22 +159,44 @@ patients_with_bsa = add.bsa(patients)
|
|
|
158
159
|
result = add.fitness_score(add.bmr(add.bmi(patients)))
|
|
159
160
|
```
|
|
160
161
|
|
|
161
|
-
### 🔄
|
|
162
|
+
### 🔄 Synthetic Data Generation
|
|
162
163
|
|
|
163
|
-
**
|
|
164
|
+
**Synthetic** generates additional data similar to your existing dataset using inline strategies.
|
|
164
165
|
|
|
165
166
|
```python
|
|
166
|
-
#
|
|
167
|
-
more_customers = add.
|
|
167
|
+
# Extend existing data (learns from patterns)
|
|
168
|
+
more_customers = add.synthetic(customers, n_rows=1000)
|
|
168
169
|
|
|
169
170
|
# Create data from scratch with strategies
|
|
170
|
-
new_data = add.
|
|
171
|
+
new_data = add.synthetic("@new", n_rows=500, strategy={
|
|
171
172
|
'id': 'increment:start=1',
|
|
172
173
|
'name': 'choice:[John,Jane,Bob]',
|
|
173
174
|
'age': 'range:18-65'
|
|
174
175
|
})
|
|
175
176
|
```
|
|
176
177
|
|
|
178
|
+
### 🤖 Text-Based Label Deduction
|
|
179
|
+
|
|
180
|
+
**Deduce** automatically fills in missing labels by learning from your existing labeled examples. Pure Python, no LLMs, offline-first.
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
# Deduce missing labels from text
|
|
184
|
+
tickets = pd.DataFrame({
|
|
185
|
+
"ticket_text": ["Cannot log in", "Billing question", "App crashes", "Need invoice"],
|
|
186
|
+
"category": ["Technical", "Billing", None, None]
|
|
187
|
+
})
|
|
188
|
+
|
|
189
|
+
# Automatically fill in missing categories
|
|
190
|
+
result = add.deduce(tickets, from_column="ticket_text", to_column="category")
|
|
191
|
+
|
|
192
|
+
# Use multiple columns for better accuracy
|
|
193
|
+
result = add.deduce(
|
|
194
|
+
df,
|
|
195
|
+
from_column=["title", "description"],
|
|
196
|
+
to_column="category"
|
|
197
|
+
)
|
|
198
|
+
```
|
|
199
|
+
|
|
177
200
|
## 🧪 Examples
|
|
178
201
|
|
|
179
202
|
### E-commerce Data Pipeline
|
|
@@ -189,7 +212,7 @@ customers = pd.DataFrame({
|
|
|
189
212
|
})
|
|
190
213
|
|
|
191
214
|
# Generate more customers
|
|
192
|
-
customers = add.
|
|
215
|
+
customers = add.synthetic(customers, n_rows=10000)
|
|
193
216
|
|
|
194
217
|
# Add customer tiers
|
|
195
218
|
tiers = pd.DataFrame({
|
|
@@ -215,7 +238,7 @@ strategy = {
|
|
|
215
238
|
'height_cm': 'range:150-200' # Height in cm
|
|
216
239
|
}
|
|
217
240
|
|
|
218
|
-
patients = add.
|
|
241
|
+
patients = add.synthetic("@new", n_rows=1000, strategy=strategy)
|
|
219
242
|
|
|
220
243
|
# Convert height to meters for expressions
|
|
221
244
|
patients['height_m'] = patients['height_cm'] / 100
|
|
@@ -230,19 +253,19 @@ print(result.correlations)
|
|
|
230
253
|
|
|
231
254
|
## 📚 Documentation
|
|
232
255
|
|
|
233
|
-
- **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/
|
|
234
|
-
- **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/
|
|
256
|
+
- **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/documentation/)** - Detailed guides for each function
|
|
257
|
+
- **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/documentation/)** - Complete expressions reference
|
|
235
258
|
|
|
236
259
|
## 📄 License
|
|
237
260
|
|
|
238
|
-
MIT License - see [LICENSE](
|
|
261
|
+
MIT License - see [LICENSE](LICENSE) file for details.
|
|
239
262
|
|
|
240
263
|
## 📞 Support
|
|
241
264
|
|
|
242
265
|
- **Issues**: [GitHub Issues](https://github.com/sekarkrishna/additory/issues)
|
|
243
|
-
- **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/
|
|
266
|
+
- **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/documentation/)
|
|
244
267
|
|
|
245
|
-
## 🗺️ v0.1.1 (
|
|
268
|
+
## 🗺️ v0.1.1 (January 2026)
|
|
246
269
|
- Enhanced documentation and tutorials
|
|
247
270
|
- Performance optimizations
|
|
248
271
|
- Additional expressions
|
|
@@ -329,14 +329,14 @@ def set_custom_formula_path(path):
|
|
|
329
329
|
|
|
330
330
|
# backend preference setting
|
|
331
331
|
|
|
332
|
-
_backend_preference: str
|
|
332
|
+
_backend_preference: Optional[str] = None # "cpu", "gpu", or None
|
|
333
333
|
|
|
334
|
-
def set_backend_preference(mode: str
|
|
334
|
+
def set_backend_preference(mode: Optional[str]):
|
|
335
335
|
global _backend_preference
|
|
336
336
|
if mode not in (None, "cpu", "gpu"):
|
|
337
337
|
raise ValueError("backend must be 'cpu', 'gpu', or None")
|
|
338
338
|
_backend_preference = mode
|
|
339
339
|
|
|
340
|
-
def get_backend_preference() -> str
|
|
340
|
+
def get_backend_preference() -> Optional[str]:
|
|
341
341
|
return _backend_preference
|
|
342
342
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
# Versioned registry for additory
|
|
3
3
|
|
|
4
4
|
from dataclasses import dataclass
|
|
5
|
+
from typing import Optional
|
|
5
6
|
import os
|
|
6
7
|
import json
|
|
7
8
|
|
|
@@ -26,9 +27,9 @@ class ResolvedFormula:
|
|
|
26
27
|
source: str
|
|
27
28
|
version: str
|
|
28
29
|
mode: str = "local"
|
|
29
|
-
ast: dict
|
|
30
|
-
sample_clean: dict
|
|
31
|
-
sample_unclean: dict
|
|
30
|
+
ast: Optional[dict] = None
|
|
31
|
+
sample_clean: Optional[dict] = None
|
|
32
|
+
sample_unclean: Optional[dict] = None
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
# ------------------------------------------------------------
|
|
@@ -30,8 +30,15 @@ class AdditoryAPI(SimpleNamespace):
|
|
|
30
30
|
self.my = ExpressionProxy(namespace="user")
|
|
31
31
|
self._builtin_proxy = ExpressionProxy(namespace="builtin")
|
|
32
32
|
|
|
33
|
-
# Explicitly set
|
|
33
|
+
# Explicitly set methods to prevent namespace conflicts
|
|
34
34
|
self.synthetic = self._synthetic_method
|
|
35
|
+
self.deduce = self._deduce_method
|
|
36
|
+
self.to = self._to_method
|
|
37
|
+
self.onehotencoding = self._onehotencoding_method
|
|
38
|
+
self.harmonize_units = self._harmonize_units_method
|
|
39
|
+
self.scan = self._scan_method
|
|
40
|
+
self.games = self._games_method
|
|
41
|
+
self.play = self._play_method
|
|
35
42
|
|
|
36
43
|
def __getattr__(self, name):
|
|
37
44
|
"""
|
|
@@ -118,7 +125,7 @@ class AdditoryAPI(SimpleNamespace):
|
|
|
118
125
|
additory.synthetic = self._synthetic_method
|
|
119
126
|
raise
|
|
120
127
|
|
|
121
|
-
def
|
|
128
|
+
def _to_method(self, target_df, from_df=None, bring=None, against=None, **kwargs):
|
|
122
129
|
"""
|
|
123
130
|
Add columns from reference dataframe to target dataframe.
|
|
124
131
|
|
|
@@ -139,7 +146,7 @@ class AdditoryAPI(SimpleNamespace):
|
|
|
139
146
|
from additory.utilities.lookup import to
|
|
140
147
|
return to(target_df, from_df, bring=bring, against=against, **kwargs)
|
|
141
148
|
|
|
142
|
-
def
|
|
149
|
+
def _onehotencoding_method(self, df, columns=None, **kwargs):
|
|
143
150
|
"""
|
|
144
151
|
One-hot encode categorical columns.
|
|
145
152
|
|
|
@@ -154,7 +161,7 @@ class AdditoryAPI(SimpleNamespace):
|
|
|
154
161
|
from additory.utilities.encoding import onehotencoding
|
|
155
162
|
return onehotencoding(df, column=columns, **kwargs)
|
|
156
163
|
|
|
157
|
-
def
|
|
164
|
+
def _harmonize_units_method(self, df, value_column, unit_column, target_unit=None, position="end", **kwargs):
|
|
158
165
|
"""
|
|
159
166
|
Harmonize units in a dataframe.
|
|
160
167
|
|
|
@@ -176,7 +183,7 @@ class AdditoryAPI(SimpleNamespace):
|
|
|
176
183
|
from additory.utilities.units import harmonize_units
|
|
177
184
|
return harmonize_units(df, value_column, unit_column, target_unit, position, **kwargs)
|
|
178
185
|
|
|
179
|
-
def
|
|
186
|
+
def _scan_method(
|
|
180
187
|
self,
|
|
181
188
|
df: Union[pl.DataFrame, pd.DataFrame, Any],
|
|
182
189
|
preset: Optional[str] = None,
|
|
@@ -259,7 +266,48 @@ class AdditoryAPI(SimpleNamespace):
|
|
|
259
266
|
verbose=verbose
|
|
260
267
|
)
|
|
261
268
|
|
|
262
|
-
def
|
|
269
|
+
def _deduce_method(
|
|
270
|
+
self,
|
|
271
|
+
df: Union[pd.DataFrame, pl.DataFrame, Any],
|
|
272
|
+
from_column: Union[str, List[str]],
|
|
273
|
+
to_column: str
|
|
274
|
+
) -> Union[pd.DataFrame, pl.DataFrame, Any]:
|
|
275
|
+
"""
|
|
276
|
+
Deduce missing labels based on text similarity to labeled examples.
|
|
277
|
+
|
|
278
|
+
Uses cosine similarity on TF-IDF vectors. Pure Python, no LLMs, offline-first.
|
|
279
|
+
Requires at least 3 labeled examples to work.
|
|
280
|
+
|
|
281
|
+
When multiple source columns are provided, they are concatenated with
|
|
282
|
+
spaces before computing similarity.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
df: DataFrame with some labeled and some unlabeled rows
|
|
286
|
+
from_column: Text column(s) to analyze
|
|
287
|
+
- str: Single column (e.g., "comment")
|
|
288
|
+
- List[str]: Multiple columns (e.g., ["comment", "notes"])
|
|
289
|
+
to_column: Label column to fill (e.g., "status")
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
DataFrame with deduced labels filled in
|
|
293
|
+
|
|
294
|
+
Examples:
|
|
295
|
+
# Single column
|
|
296
|
+
>>> result = add.deduce(df, from_column="comment", to_column="status")
|
|
297
|
+
|
|
298
|
+
# Multiple columns (better accuracy)
|
|
299
|
+
>>> result = add.deduce(
|
|
300
|
+
... df,
|
|
301
|
+
... from_column=["comment", "notes", "description"],
|
|
302
|
+
... to_column="status"
|
|
303
|
+
... )
|
|
304
|
+
|
|
305
|
+
Privacy: Your data never leaves your machine. No external connections.
|
|
306
|
+
"""
|
|
307
|
+
from additory.synthetic.deduce import deduce as deduce_impl
|
|
308
|
+
return deduce_impl(df, from_column, to_column)
|
|
309
|
+
|
|
310
|
+
def _games_method(self):
|
|
263
311
|
"""
|
|
264
312
|
List available games! 🎮
|
|
265
313
|
|
|
@@ -275,7 +323,7 @@ class AdditoryAPI(SimpleNamespace):
|
|
|
275
323
|
"""
|
|
276
324
|
return ['tictactoe', 'sudoku']
|
|
277
325
|
|
|
278
|
-
def
|
|
326
|
+
def _play_method(self, game: str = "tictactoe"):
|
|
279
327
|
"""
|
|
280
328
|
Play a game! 🎮
|
|
281
329
|
|
|
@@ -28,9 +28,9 @@ class ResolvedFormula:
|
|
|
28
28
|
version: str
|
|
29
29
|
mode: str = "local"
|
|
30
30
|
namespace: str = "builtin" # NEW: "builtin" or "user"
|
|
31
|
-
ast: dict
|
|
32
|
-
sample_clean: dict
|
|
33
|
-
sample_unclean: dict
|
|
31
|
+
ast: Optional[dict] = None
|
|
32
|
+
sample_clean: Optional[dict] = None
|
|
33
|
+
sample_unclean: Optional[dict] = None
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
# ------------------------------------------------------------
|