additory 0.1.0a3__tar.gz → 0.1.0a4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {additory-0.1.0a3 → additory-0.1.0a4}/PKG-INFO +42 -19
  2. {additory-0.1.0a3 → additory-0.1.0a4}/README.md +41 -18
  3. {additory-0.1.0a3 → additory-0.1.0a4}/additory/__init__.py +1 -1
  4. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/config.py +3 -3
  5. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/registry.py +4 -3
  6. {additory-0.1.0a3 → additory-0.1.0a4}/additory/dynamic_api.py +55 -7
  7. {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/registry.py +3 -3
  8. additory-0.1.0a4/additory/synthetic/deduce.py +259 -0
  9. {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/strategies.py +76 -0
  10. {additory-0.1.0a3 → additory-0.1.0a4}/additory.egg-info/SOURCES.txt +2 -0
  11. additory-0.1.0a4/documentation/V0.1.0/add_deduce_function.html +759 -0
  12. {additory-0.1.0a3 → additory-0.1.0a4}/pyproject.toml +1 -1
  13. {additory-0.1.0a3 → additory-0.1.0a4}/LICENSE +0 -0
  14. {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/__init__.py +0 -0
  15. {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/cardinality.py +0 -0
  16. {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/correlations.py +0 -0
  17. {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/distributions.py +0 -0
  18. {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/quality.py +0 -0
  19. {additory-0.1.0a3 → additory-0.1.0a4}/additory/analysis/scan.py +0 -0
  20. {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/__init__.py +0 -0
  21. {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/backend.py +0 -0
  22. {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/column_utils.py +0 -0
  23. {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/distributions.py +0 -0
  24. {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/exceptions.py +0 -0
  25. {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/lists.py +0 -0
  26. {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/patterns.py +0 -0
  27. {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/resolver.py +0 -0
  28. {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/sample_data.py +0 -0
  29. {additory-0.1.0a3 → additory-0.1.0a4}/additory/common/validation.py +0 -0
  30. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/__init__.py +0 -0
  31. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/ast_builder.py +0 -0
  32. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/backends/__init__.py +0 -0
  33. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/backends/arrow_bridge.py +0 -0
  34. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/backends/cudf_bridge.py +0 -0
  35. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/column_positioning.py +0 -0
  36. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/compiler_polars.py +0 -0
  37. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/enhanced_cache_manager.py +0 -0
  38. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/enhanced_matchers.py +0 -0
  39. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/enhanced_version_manager.py +0 -0
  40. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/executor.py +0 -0
  41. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/integrity_manager.py +0 -0
  42. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/loader.py +0 -0
  43. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/logging.py +0 -0
  44. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/memory_manager.py +0 -0
  45. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/namespace_manager.py +0 -0
  46. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/parser.py +0 -0
  47. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/polars_expression_engine.py +0 -0
  48. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/sample_data_manager.py +0 -0
  49. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/user_namespace.py +0 -0
  50. {additory-0.1.0a3 → additory-0.1.0a4}/additory/core/validator.py +0 -0
  51. {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/__init__.py +0 -0
  52. {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/engine.py +0 -0
  53. {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/parser.py +0 -0
  54. {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/proxy.py +0 -0
  55. {additory-0.1.0a3 → additory-0.1.0a4}/additory/expressions/samples.py +0 -0
  56. {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/__init__.py +0 -0
  57. {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/column_name_resolver.py +0 -0
  58. {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/distributions.py +0 -0
  59. {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/forecast.py +0 -0
  60. {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/linked_list_parser.py +0 -0
  61. {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/namespace_lookup.py +0 -0
  62. {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/smote.py +0 -0
  63. {additory-0.1.0a3 → additory-0.1.0a4}/additory/synthetic/synthesizer.py +0 -0
  64. {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/__init__.py +0 -0
  65. {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/encoding.py +0 -0
  66. {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/games.py +0 -0
  67. {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/keys.py +0 -0
  68. {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/lookup.py +0 -0
  69. {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/matchers.py +0 -0
  70. {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/resolvers.py +0 -0
  71. {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/settings.py +0 -0
  72. {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/units.py +0 -0
  73. {additory-0.1.0a3 → additory-0.1.0a4}/additory/utilities/validators.py +0 -0
  74. {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/add_harmonize_units_function.html +0 -0
  75. {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/add_onehotencoding_function.html +0 -0
  76. {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/add_scan_function.html +0 -0
  77. {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/add_synthetic_function.html +0 -0
  78. {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/add_to_function.html +0 -0
  79. {additory-0.1.0a3 → additory-0.1.0a4}/documentation/V0.1.0/expressions.html +0 -0
  80. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/age_category_0.1.add +0 -0
  81. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/blood_pressure_category_0.1.add +0 -0
  82. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/bmi2_0.1.add +0 -0
  83. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/bmi3_0.1.add +0 -0
  84. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/bmi_0.1.add +0 -0
  85. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/bmr_0.1.add +0 -0
  86. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/body_fat_percentage_0.1.add +0 -0
  87. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/bsa_0.1.add +0 -0
  88. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/cholesterol_ratio_0.1.add +0 -0
  89. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/fitness_score_0.1.add +0 -0
  90. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/ideal_body_weight_0.1.add +0 -0
  91. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/manifest.json +0 -0
  92. {additory-0.1.0a3 → additory-0.1.0a4}/reference/expressions_definitions/waist_hip_ratio_0.1.add +0 -0
  93. {additory-0.1.0a3 → additory-0.1.0a4}/setup.cfg +0 -0
  94. {additory-0.1.0a3 → additory-0.1.0a4}/user_expressions/bmi1_0.1.add +0 -0
  95. {additory-0.1.0a3 → additory-0.1.0a4}/user_expressions/bmi2_0.1.add +0 -0
  96. {additory-0.1.0a3 → additory-0.1.0a4}/user_expressions/bmi3_0.1.add +0 -0
  97. {additory-0.1.0a3 → additory-0.1.0a4}/user_expressions/bmi_0.1.add +0 -0
  98. {additory-0.1.0a3 → additory-0.1.0a4}/user_expressions/manifest.json +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: additory
3
- Version: 0.1.0a3
3
+ Version: 0.1.0a4
4
4
  Summary: A semantic, extensible dataframe transformation engine with expressions, lookup, and synthetic data generation support.
5
5
  Author: Krishnamoorthy Sankaran
6
6
  License: MIT
@@ -39,7 +39,7 @@ Dynamic: license-file
39
39
 
40
40
  [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
41
41
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
42
- [![Version](https://img.shields.io/badge/version-0.1.0a2-orange.svg)](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/)
42
+ [![Version](https://img.shields.io/badge/version-0.1.0a4-orange.svg)](https://github.com/sekarkrishna/additory)
43
43
 
44
44
  **Author:** Krishnamoorthy Sankaran
45
45
 
@@ -52,17 +52,17 @@ Dynamic: license-file
52
52
  ## 📦 Installation
53
53
 
54
54
  ```bash
55
- pip install additory==0.1.0a2
55
+ pip install additory==0.1.0a4
56
56
  ```
57
57
 
58
58
  **Optional GPU support:**
59
59
  ```bash
60
- pip install additory[gpu]==0.1.0a2 # Includes cuDF for GPU acceleration
60
+ pip install additory[gpu]==0.1.0a4 # Includes cuDF for GPU acceleration
61
61
  ```
62
62
 
63
63
  **Development installation:**
64
64
  ```bash
65
- pip install additory[dev]==0.1.0a2 # Includes testing and development tools
65
+ pip install additory[dev]==0.1.0a4 # Includes testing and development tools
66
66
  ```
67
67
 
68
68
  ## 🎯 Core Functions
@@ -70,7 +70,8 @@ pip install additory[dev]==0.1.0a2 # Includes testing and development tools
70
70
  | Function | Purpose | Example |
71
71
  |----------|---------|---------|
72
72
  | `add.to()` | Lookup/join operations | `add.to(df1, from_df=df2, bring='col', against='key')` |
73
- | `add.augment()` | Generate additional data | `add.augment(df, n_rows=1000)` |
73
+ | `add.synthetic()` | Generate additional data | `add.synthetic(df, n_rows=1000)` |
74
+ | `add.deduce()` | Text-based label deduction | `add.deduce(df, from_column='text', to_column='label')` |
74
75
  | `add.scan()` | Data profiling & analysis | `add.scan(df, preset="full")` |
75
76
 
76
77
  ## 🧬 Available Expressions
@@ -119,7 +120,7 @@ import additory as add
119
120
 
120
121
  # Works with polars
121
122
  df_polars = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
122
- result = add.augment(df_polars, n_rows=100)
123
+ result = add.synthetic(df_polars, n_rows=100)
123
124
 
124
125
  # Automatic type detection and conversion
125
126
  ```
@@ -193,22 +194,44 @@ patients_with_bsa = add.bsa(patients)
193
194
  result = add.fitness_score(add.bmr(add.bmi(patients)))
194
195
  ```
195
196
 
196
- ### 🔄 Augment Data Generation
197
+ ### 🔄 Synthetic Data Generation
197
198
 
198
- **Augment** generates additional data similar to your existing dataset using inline strategies.
199
+ **Synthetic** generates additional data similar to your existing dataset using inline strategies.
199
200
 
200
201
  ```python
201
- # Augment existing data (learns from patterns)
202
- more_customers = add.augment(customers, n_rows=1000)
202
+ # Extend existing data (learns from patterns)
203
+ more_customers = add.synthetic(customers, n_rows=1000)
203
204
 
204
205
  # Create data from scratch with strategies
205
- new_data = add.augment("@new", n_rows=500, strategy={
206
+ new_data = add.synthetic("@new", n_rows=500, strategy={
206
207
  'id': 'increment:start=1',
207
208
  'name': 'choice:[John,Jane,Bob]',
208
209
  'age': 'range:18-65'
209
210
  })
210
211
  ```
211
212
 
213
+ ### 🤖 Text-Based Label Deduction
214
+
215
+ **Deduce** automatically fills in missing labels by learning from your existing labeled examples. Pure Python, no LLMs, offline-first.
216
+
217
+ ```python
218
+ # Deduce missing labels from text
219
+ tickets = pd.DataFrame({
220
+ "ticket_text": ["Cannot log in", "Billing question", "App crashes", "Need invoice"],
221
+ "category": ["Technical", "Billing", None, None]
222
+ })
223
+
224
+ # Automatically fill in missing categories
225
+ result = add.deduce(tickets, from_column="ticket_text", to_column="category")
226
+
227
+ # Use multiple columns for better accuracy
228
+ result = add.deduce(
229
+ df,
230
+ from_column=["title", "description"],
231
+ to_column="category"
232
+ )
233
+ ```
234
+
212
235
  ## 🧪 Examples
213
236
 
214
237
  ### E-commerce Data Pipeline
@@ -224,7 +247,7 @@ customers = pd.DataFrame({
224
247
  })
225
248
 
226
249
  # Generate more customers
227
- customers = add.augment(customers, n_rows=10000)
250
+ customers = add.synthetic(customers, n_rows=10000)
228
251
 
229
252
  # Add customer tiers
230
253
  tiers = pd.DataFrame({
@@ -250,7 +273,7 @@ strategy = {
250
273
  'height_cm': 'range:150-200' # Height in cm
251
274
  }
252
275
 
253
- patients = add.augment("@new", n_rows=1000, strategy=strategy)
276
+ patients = add.synthetic("@new", n_rows=1000, strategy=strategy)
254
277
 
255
278
  # Convert height to meters for expressions
256
279
  patients['height_m'] = patients['height_cm'] / 100
@@ -265,19 +288,19 @@ print(result.correlations)
265
288
 
266
289
  ## 📚 Documentation
267
290
 
268
- - **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/)** - Detailed guides for each function
269
- - **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/expressions.html)** - Complete expressions reference
291
+ - **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/documentation/)** - Detailed guides for each function
292
+ - **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/documentation/)** - Complete expressions reference
270
293
 
271
294
  ## 📄 License
272
295
 
273
- MIT License - see [LICENSE](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/LICENSE) file for details.
296
+ MIT License - see [LICENSE](LICENSE) file for details.
274
297
 
275
298
  ## 📞 Support
276
299
 
277
300
  - **Issues**: [GitHub Issues](https://github.com/sekarkrishna/additory/issues)
278
- - **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0)
301
+ - **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/documentation/)
279
302
 
280
- ## 🗺️ v0.1.1 (February 2025)
303
+ ## 🗺️ v0.1.1 (January 2026)
281
304
  - Enhanced documentation and tutorials
282
305
  - Performance optimizations
283
306
  - Additional expressions
@@ -4,7 +4,7 @@
4
4
 
5
5
  [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
- [![Version](https://img.shields.io/badge/version-0.1.0a2-orange.svg)](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/)
7
+ [![Version](https://img.shields.io/badge/version-0.1.0a4-orange.svg)](https://github.com/sekarkrishna/additory)
8
8
 
9
9
  **Author:** Krishnamoorthy Sankaran
10
10
 
@@ -17,17 +17,17 @@
17
17
  ## 📦 Installation
18
18
 
19
19
  ```bash
20
- pip install additory==0.1.0a2
20
+ pip install additory==0.1.0a4
21
21
  ```
22
22
 
23
23
  **Optional GPU support:**
24
24
  ```bash
25
- pip install additory[gpu]==0.1.0a2 # Includes cuDF for GPU acceleration
25
+ pip install additory[gpu]==0.1.0a4 # Includes cuDF for GPU acceleration
26
26
  ```
27
27
 
28
28
  **Development installation:**
29
29
  ```bash
30
- pip install additory[dev]==0.1.0a2 # Includes testing and development tools
30
+ pip install additory[dev]==0.1.0a4 # Includes testing and development tools
31
31
  ```
32
32
 
33
33
  ## 🎯 Core Functions
@@ -35,7 +35,8 @@ pip install additory[dev]==0.1.0a2 # Includes testing and development tools
35
35
  | Function | Purpose | Example |
36
36
  |----------|---------|---------|
37
37
  | `add.to()` | Lookup/join operations | `add.to(df1, from_df=df2, bring='col', against='key')` |
38
- | `add.augment()` | Generate additional data | `add.augment(df, n_rows=1000)` |
38
+ | `add.synthetic()` | Generate additional data | `add.synthetic(df, n_rows=1000)` |
39
+ | `add.deduce()` | Text-based label deduction | `add.deduce(df, from_column='text', to_column='label')` |
39
40
  | `add.scan()` | Data profiling & analysis | `add.scan(df, preset="full")` |
40
41
 
41
42
  ## 🧬 Available Expressions
@@ -84,7 +85,7 @@ import additory as add
84
85
 
85
86
  # Works with polars
86
87
  df_polars = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
87
- result = add.augment(df_polars, n_rows=100)
88
+ result = add.synthetic(df_polars, n_rows=100)
88
89
 
89
90
  # Automatic type detection and conversion
90
91
  ```
@@ -158,22 +159,44 @@ patients_with_bsa = add.bsa(patients)
158
159
  result = add.fitness_score(add.bmr(add.bmi(patients)))
159
160
  ```
160
161
 
161
- ### 🔄 Augment Data Generation
162
+ ### 🔄 Synthetic Data Generation
162
163
 
163
- **Augment** generates additional data similar to your existing dataset using inline strategies.
164
+ **Synthetic** generates additional data similar to your existing dataset using inline strategies.
164
165
 
165
166
  ```python
166
- # Augment existing data (learns from patterns)
167
- more_customers = add.augment(customers, n_rows=1000)
167
+ # Extend existing data (learns from patterns)
168
+ more_customers = add.synthetic(customers, n_rows=1000)
168
169
 
169
170
  # Create data from scratch with strategies
170
- new_data = add.augment("@new", n_rows=500, strategy={
171
+ new_data = add.synthetic("@new", n_rows=500, strategy={
171
172
  'id': 'increment:start=1',
172
173
  'name': 'choice:[John,Jane,Bob]',
173
174
  'age': 'range:18-65'
174
175
  })
175
176
  ```
176
177
 
178
+ ### 🤖 Text-Based Label Deduction
179
+
180
+ **Deduce** automatically fills in missing labels by learning from your existing labeled examples. Pure Python, no LLMs, offline-first.
181
+
182
+ ```python
183
+ # Deduce missing labels from text
184
+ tickets = pd.DataFrame({
185
+ "ticket_text": ["Cannot log in", "Billing question", "App crashes", "Need invoice"],
186
+ "category": ["Technical", "Billing", None, None]
187
+ })
188
+
189
+ # Automatically fill in missing categories
190
+ result = add.deduce(tickets, from_column="ticket_text", to_column="category")
191
+
192
+ # Use multiple columns for better accuracy
193
+ result = add.deduce(
194
+ df,
195
+ from_column=["title", "description"],
196
+ to_column="category"
197
+ )
198
+ ```
199
+
177
200
  ## 🧪 Examples
178
201
 
179
202
  ### E-commerce Data Pipeline
@@ -189,7 +212,7 @@ customers = pd.DataFrame({
189
212
  })
190
213
 
191
214
  # Generate more customers
192
- customers = add.augment(customers, n_rows=10000)
215
+ customers = add.synthetic(customers, n_rows=10000)
193
216
 
194
217
  # Add customer tiers
195
218
  tiers = pd.DataFrame({
@@ -215,7 +238,7 @@ strategy = {
215
238
  'height_cm': 'range:150-200' # Height in cm
216
239
  }
217
240
 
218
- patients = add.augment("@new", n_rows=1000, strategy=strategy)
241
+ patients = add.synthetic("@new", n_rows=1000, strategy=strategy)
219
242
 
220
243
  # Convert height to meters for expressions
221
244
  patients['height_m'] = patients['height_cm'] / 100
@@ -230,19 +253,19 @@ print(result.correlations)
230
253
 
231
254
  ## 📚 Documentation
232
255
 
233
- - **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/)** - Detailed guides for each function
234
- - **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/expressions.html)** - Complete expressions reference
256
+ - **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/documentation/)** - Detailed guides for each function
257
+ - **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/documentation/)** - Complete expressions reference
235
258
 
236
259
  ## 📄 License
237
260
 
238
- MIT License - see [LICENSE](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/LICENSE) file for details.
261
+ MIT License - see [LICENSE](LICENSE) file for details.
239
262
 
240
263
  ## 📞 Support
241
264
 
242
265
  - **Issues**: [GitHub Issues](https://github.com/sekarkrishna/additory/issues)
243
- - **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0)
266
+ - **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/documentation/)
244
267
 
245
- ## 🗺️ v0.1.1 (February 2025)
268
+ ## 🗺️ v0.1.1 (January 2026)
246
269
  - Enhanced documentation and tutorials
247
270
  - Performance optimizations
248
271
  - Additional expressions
@@ -3,7 +3,7 @@
3
3
  from .dynamic_api import add as _api_instance
4
4
 
5
5
  # Version information
6
- __version__ = "0.1.0a3"
6
+ __version__ = "0.1.0a4"
7
7
 
8
8
  # Expose the API instance normally
9
9
  add = _api_instance
@@ -329,14 +329,14 @@ def set_custom_formula_path(path):
329
329
 
330
330
  # backend preference setting
331
331
 
332
- _backend_preference: str | None = None # "cpu", "gpu", or None
332
+ _backend_preference: Optional[str] = None # "cpu", "gpu", or None
333
333
 
334
- def set_backend_preference(mode: str | None):
334
+ def set_backend_preference(mode: Optional[str]):
335
335
  global _backend_preference
336
336
  if mode not in (None, "cpu", "gpu"):
337
337
  raise ValueError("backend must be 'cpu', 'gpu', or None")
338
338
  _backend_preference = mode
339
339
 
340
- def get_backend_preference() -> str | None:
340
+ def get_backend_preference() -> Optional[str]:
341
341
  return _backend_preference
342
342
 
@@ -2,6 +2,7 @@
2
2
  # Versioned registry for additory
3
3
 
4
4
  from dataclasses import dataclass
5
+ from typing import Optional
5
6
  import os
6
7
  import json
7
8
 
@@ -26,9 +27,9 @@ class ResolvedFormula:
26
27
  source: str
27
28
  version: str
28
29
  mode: str = "local"
29
- ast: dict | None = None
30
- sample_clean: dict | None = None
31
- sample_unclean: dict | None = None
30
+ ast: Optional[dict] = None
31
+ sample_clean: Optional[dict] = None
32
+ sample_unclean: Optional[dict] = None
32
33
 
33
34
 
34
35
  # ------------------------------------------------------------
@@ -30,8 +30,15 @@ class AdditoryAPI(SimpleNamespace):
30
30
  self.my = ExpressionProxy(namespace="user")
31
31
  self._builtin_proxy = ExpressionProxy(namespace="builtin")
32
32
 
33
- # Explicitly set the synthetic method to prevent namespace conflicts
33
+ # Explicitly set methods to prevent namespace conflicts
34
34
  self.synthetic = self._synthetic_method
35
+ self.deduce = self._deduce_method
36
+ self.to = self._to_method
37
+ self.onehotencoding = self._onehotencoding_method
38
+ self.harmonize_units = self._harmonize_units_method
39
+ self.scan = self._scan_method
40
+ self.games = self._games_method
41
+ self.play = self._play_method
35
42
 
36
43
  def __getattr__(self, name):
37
44
  """
@@ -118,7 +125,7 @@ class AdditoryAPI(SimpleNamespace):
118
125
  additory.synthetic = self._synthetic_method
119
126
  raise
120
127
 
121
- def to(self, target_df, from_df=None, bring=None, against=None, **kwargs):
128
+ def _to_method(self, target_df, from_df=None, bring=None, against=None, **kwargs):
122
129
  """
123
130
  Add columns from reference dataframe to target dataframe.
124
131
 
@@ -139,7 +146,7 @@ class AdditoryAPI(SimpleNamespace):
139
146
  from additory.utilities.lookup import to
140
147
  return to(target_df, from_df, bring=bring, against=against, **kwargs)
141
148
 
142
- def onehotencoding(self, df, columns=None, **kwargs):
149
+ def _onehotencoding_method(self, df, columns=None, **kwargs):
143
150
  """
144
151
  One-hot encode categorical columns.
145
152
 
@@ -154,7 +161,7 @@ class AdditoryAPI(SimpleNamespace):
154
161
  from additory.utilities.encoding import onehotencoding
155
162
  return onehotencoding(df, column=columns, **kwargs)
156
163
 
157
- def harmonize_units(self, df, value_column, unit_column, target_unit=None, position="end", **kwargs):
164
+ def _harmonize_units_method(self, df, value_column, unit_column, target_unit=None, position="end", **kwargs):
158
165
  """
159
166
  Harmonize units in a dataframe.
160
167
 
@@ -176,7 +183,7 @@ class AdditoryAPI(SimpleNamespace):
176
183
  from additory.utilities.units import harmonize_units
177
184
  return harmonize_units(df, value_column, unit_column, target_unit, position, **kwargs)
178
185
 
179
- def scan(
186
+ def _scan_method(
180
187
  self,
181
188
  df: Union[pl.DataFrame, pd.DataFrame, Any],
182
189
  preset: Optional[str] = None,
@@ -259,7 +266,48 @@ class AdditoryAPI(SimpleNamespace):
259
266
  verbose=verbose
260
267
  )
261
268
 
262
- def games(self):
269
+ def _deduce_method(
270
+ self,
271
+ df: Union[pd.DataFrame, pl.DataFrame, Any],
272
+ from_column: Union[str, List[str]],
273
+ to_column: str
274
+ ) -> Union[pd.DataFrame, pl.DataFrame, Any]:
275
+ """
276
+ Deduce missing labels based on text similarity to labeled examples.
277
+
278
+ Uses cosine similarity on TF-IDF vectors. Pure Python, no LLMs, offline-first.
279
+ Requires at least 3 labeled examples to work.
280
+
281
+ When multiple source columns are provided, they are concatenated with
282
+ spaces before computing similarity.
283
+
284
+ Args:
285
+ df: DataFrame with some labeled and some unlabeled rows
286
+ from_column: Text column(s) to analyze
287
+ - str: Single column (e.g., "comment")
288
+ - List[str]: Multiple columns (e.g., ["comment", "notes"])
289
+ to_column: Label column to fill (e.g., "status")
290
+
291
+ Returns:
292
+ DataFrame with deduced labels filled in
293
+
294
+ Examples:
295
+ # Single column
296
+ >>> result = add.deduce(df, from_column="comment", to_column="status")
297
+
298
+ # Multiple columns (better accuracy)
299
+ >>> result = add.deduce(
300
+ ... df,
301
+ ... from_column=["comment", "notes", "description"],
302
+ ... to_column="status"
303
+ ... )
304
+
305
+ Privacy: Your data never leaves your machine. No external connections.
306
+ """
307
+ from additory.synthetic.deduce import deduce as deduce_impl
308
+ return deduce_impl(df, from_column, to_column)
309
+
310
+ def _games_method(self):
263
311
  """
264
312
  List available games! 🎮
265
313
 
@@ -275,7 +323,7 @@ class AdditoryAPI(SimpleNamespace):
275
323
  """
276
324
  return ['tictactoe', 'sudoku']
277
325
 
278
- def play(self, game: str = "tictactoe"):
326
+ def _play_method(self, game: str = "tictactoe"):
279
327
  """
280
328
  Play a game! 🎮
281
329
 
@@ -28,9 +28,9 @@ class ResolvedFormula:
28
28
  version: str
29
29
  mode: str = "local"
30
30
  namespace: str = "builtin" # NEW: "builtin" or "user"
31
- ast: dict | None = None
32
- sample_clean: dict | None = None
33
- sample_unclean: dict | None = None
31
+ ast: Optional[dict] = None
32
+ sample_clean: Optional[dict] = None
33
+ sample_unclean: Optional[dict] = None
34
34
 
35
35
 
36
36
  # ------------------------------------------------------------