additory 0.1.0a3__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
additory/__init__.py CHANGED
@@ -3,7 +3,7 @@
3
3
  from .dynamic_api import add as _api_instance
4
4
 
5
5
  # Version information
6
- __version__ = "0.1.0a3"
6
+ __version__ = "0.1.0a4"
7
7
 
8
8
  # Expose the API instance normally
9
9
  add = _api_instance
additory/core/config.py CHANGED
@@ -329,14 +329,14 @@ def set_custom_formula_path(path):
329
329
 
330
330
  # backend preference setting
331
331
 
332
- _backend_preference: str | None = None # "cpu", "gpu", or None
332
+ _backend_preference: Optional[str] = None # "cpu", "gpu", or None
333
333
 
334
- def set_backend_preference(mode: str | None):
334
+ def set_backend_preference(mode: Optional[str]):
335
335
  global _backend_preference
336
336
  if mode not in (None, "cpu", "gpu"):
337
337
  raise ValueError("backend must be 'cpu', 'gpu', or None")
338
338
  _backend_preference = mode
339
339
 
340
- def get_backend_preference() -> str | None:
340
+ def get_backend_preference() -> Optional[str]:
341
341
  return _backend_preference
342
342
 
additory/core/registry.py CHANGED
@@ -2,6 +2,7 @@
2
2
  # Versioned registry for additory
3
3
 
4
4
  from dataclasses import dataclass
5
+ from typing import Optional
5
6
  import os
6
7
  import json
7
8
 
@@ -26,9 +27,9 @@ class ResolvedFormula:
26
27
  source: str
27
28
  version: str
28
29
  mode: str = "local"
29
- ast: dict | None = None
30
- sample_clean: dict | None = None
31
- sample_unclean: dict | None = None
30
+ ast: Optional[dict] = None
31
+ sample_clean: Optional[dict] = None
32
+ sample_unclean: Optional[dict] = None
32
33
 
33
34
 
34
35
  # ------------------------------------------------------------
additory/dynamic_api.py CHANGED
@@ -30,8 +30,15 @@ class AdditoryAPI(SimpleNamespace):
30
30
  self.my = ExpressionProxy(namespace="user")
31
31
  self._builtin_proxy = ExpressionProxy(namespace="builtin")
32
32
 
33
- # Explicitly set the synthetic method to prevent namespace conflicts
33
+ # Explicitly set methods to prevent namespace conflicts
34
34
  self.synthetic = self._synthetic_method
35
+ self.deduce = self._deduce_method
36
+ self.to = self._to_method
37
+ self.onehotencoding = self._onehotencoding_method
38
+ self.harmonize_units = self._harmonize_units_method
39
+ self.scan = self._scan_method
40
+ self.games = self._games_method
41
+ self.play = self._play_method
35
42
 
36
43
  def __getattr__(self, name):
37
44
  """
@@ -118,7 +125,7 @@ class AdditoryAPI(SimpleNamespace):
118
125
  additory.synthetic = self._synthetic_method
119
126
  raise
120
127
 
121
- def to(self, target_df, from_df=None, bring=None, against=None, **kwargs):
128
+ def _to_method(self, target_df, from_df=None, bring=None, against=None, **kwargs):
122
129
  """
123
130
  Add columns from reference dataframe to target dataframe.
124
131
 
@@ -139,7 +146,7 @@ class AdditoryAPI(SimpleNamespace):
139
146
  from additory.utilities.lookup import to
140
147
  return to(target_df, from_df, bring=bring, against=against, **kwargs)
141
148
 
142
- def onehotencoding(self, df, columns=None, **kwargs):
149
+ def _onehotencoding_method(self, df, columns=None, **kwargs):
143
150
  """
144
151
  One-hot encode categorical columns.
145
152
 
@@ -154,7 +161,7 @@ class AdditoryAPI(SimpleNamespace):
154
161
  from additory.utilities.encoding import onehotencoding
155
162
  return onehotencoding(df, column=columns, **kwargs)
156
163
 
157
- def harmonize_units(self, df, value_column, unit_column, target_unit=None, position="end", **kwargs):
164
+ def _harmonize_units_method(self, df, value_column, unit_column, target_unit=None, position="end", **kwargs):
158
165
  """
159
166
  Harmonize units in a dataframe.
160
167
 
@@ -176,7 +183,7 @@ class AdditoryAPI(SimpleNamespace):
176
183
  from additory.utilities.units import harmonize_units
177
184
  return harmonize_units(df, value_column, unit_column, target_unit, position, **kwargs)
178
185
 
179
- def scan(
186
+ def _scan_method(
180
187
  self,
181
188
  df: Union[pl.DataFrame, pd.DataFrame, Any],
182
189
  preset: Optional[str] = None,
@@ -259,7 +266,48 @@ class AdditoryAPI(SimpleNamespace):
259
266
  verbose=verbose
260
267
  )
261
268
 
262
- def games(self):
269
+ def _deduce_method(
270
+ self,
271
+ df: Union[pd.DataFrame, pl.DataFrame, Any],
272
+ from_column: Union[str, List[str]],
273
+ to_column: str
274
+ ) -> Union[pd.DataFrame, pl.DataFrame, Any]:
275
+ """
276
+ Deduce missing labels based on text similarity to labeled examples.
277
+
278
+ Uses cosine similarity on TF-IDF vectors. Pure Python, no LLMs, offline-first.
279
+ Requires at least 3 labeled examples to work.
280
+
281
+ When multiple source columns are provided, they are concatenated with
282
+ spaces before computing similarity.
283
+
284
+ Args:
285
+ df: DataFrame with some labeled and some unlabeled rows
286
+ from_column: Text column(s) to analyze
287
+ - str: Single column (e.g., "comment")
288
+ - List[str]: Multiple columns (e.g., ["comment", "notes"])
289
+ to_column: Label column to fill (e.g., "status")
290
+
291
+ Returns:
292
+ DataFrame with deduced labels filled in
293
+
294
+ Examples:
295
+ # Single column
296
+ >>> result = add.deduce(df, from_column="comment", to_column="status")
297
+
298
+ # Multiple columns (better accuracy)
299
+ >>> result = add.deduce(
300
+ ... df,
301
+ ... from_column=["comment", "notes", "description"],
302
+ ... to_column="status"
303
+ ... )
304
+
305
+ Privacy: Your data never leaves your machine. No external connections.
306
+ """
307
+ from additory.synthetic.deduce import deduce as deduce_impl
308
+ return deduce_impl(df, from_column, to_column)
309
+
310
+ def _games_method(self):
263
311
  """
264
312
  List available games! 🎮
265
313
 
@@ -275,7 +323,7 @@ class AdditoryAPI(SimpleNamespace):
275
323
  """
276
324
  return ['tictactoe', 'sudoku']
277
325
 
278
- def play(self, game: str = "tictactoe"):
326
+ def _play_method(self, game: str = "tictactoe"):
279
327
  """
280
328
  Play a game! 🎮
281
329
 
@@ -28,9 +28,9 @@ class ResolvedFormula:
28
28
  version: str
29
29
  mode: str = "local"
30
30
  namespace: str = "builtin" # NEW: "builtin" or "user"
31
- ast: dict | None = None
32
- sample_clean: dict | None = None
33
- sample_unclean: dict | None = None
31
+ ast: Optional[dict] = None
32
+ sample_clean: Optional[dict] = None
33
+ sample_unclean: Optional[dict] = None
34
34
 
35
35
 
36
36
  # ------------------------------------------------------------
@@ -0,0 +1,259 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Text-based label deduction for additory.
4
+
5
+ Uses TF-IDF + cosine similarity to deduce labels from text.
6
+ Pure Python, no LLMs, offline-first.
7
+ """
8
+
9
+ import math
10
+ import re
11
+ from collections import Counter
12
+ from typing import Union, List, Optional
13
+ import pandas as pd
14
+ import polars as pl
15
+
16
+
17
+ def tokenize(text: str) -> List[str]:
18
+ """
19
+ Tokenize text into words.
20
+
21
+ Args:
22
+ text: Input text
23
+
24
+ Returns:
25
+ List of lowercase tokens
26
+ """
27
+ if text is None or not isinstance(text, str):
28
+ return []
29
+
30
+ text = text.lower()
31
+ text = re.sub(r"[^a-z0-9\s]", " ", text)
32
+ return [w for w in text.split() if w]
33
+
34
+
35
+ def vectorize(tokens: List[str]) -> Counter:
36
+ """
37
+ Convert tokens to TF vector (term frequency).
38
+
39
+ Args:
40
+ tokens: List of tokens
41
+
42
+ Returns:
43
+ Counter with term frequencies
44
+ """
45
+ return Counter(tokens)
46
+
47
+
48
+ def cosine_similarity(v1: Counter, v2: Counter) -> float:
49
+ """
50
+ Compute cosine similarity between two vectors.
51
+
52
+ Args:
53
+ v1: First vector (Counter)
54
+ v2: Second vector (Counter)
55
+
56
+ Returns:
57
+ Similarity score (0-1)
58
+ """
59
+ # Dot product
60
+ dot = sum(v1[t] * v2[t] for t in v1 if t in v2)
61
+
62
+ # Magnitudes
63
+ mag1 = math.sqrt(sum(v * v for v in v1.values()))
64
+ mag2 = math.sqrt(sum(v * v for v in v2.values()))
65
+
66
+ if mag1 == 0 or mag2 == 0:
67
+ return 0.0
68
+
69
+ return dot / (mag1 * mag2)
70
+
71
+
72
+ def _deduce_polars(
73
+ df: pl.DataFrame,
74
+ from_column: Union[str, List[str]],
75
+ to_column: str,
76
+ min_examples: int = 3
77
+ ) -> pl.DataFrame:
78
+ """
79
+ Deduce missing labels using text similarity (Polars-native).
80
+
81
+ Args:
82
+ df: Polars DataFrame
83
+ from_column: Text column(s) to analyze
84
+ to_column: Label column to fill
85
+ min_examples: Minimum labeled examples required
86
+
87
+ Returns:
88
+ DataFrame with deduced labels
89
+
90
+ Raises:
91
+ ValueError: If insufficient labeled examples
92
+ """
93
+ # Normalize from_column to list
94
+ if isinstance(from_column, str):
95
+ source_cols = [from_column]
96
+ else:
97
+ source_cols = from_column
98
+
99
+ # Validate columns exist
100
+ for col in source_cols:
101
+ if col not in df.columns:
102
+ raise ValueError(f"Column '{col}' not found in DataFrame")
103
+
104
+ if to_column not in df.columns:
105
+ raise ValueError(f"Column '{to_column}' not found in DataFrame")
106
+
107
+ # Create combined text column if multiple sources
108
+ if len(source_cols) == 1:
109
+ text_col = source_cols[0]
110
+ df_work = df.clone()
111
+ else:
112
+ # Concatenate multiple columns with spaces
113
+ df_work = df.with_columns([
114
+ pl.concat_str(
115
+ [pl.col(c).fill_null("") for c in source_cols],
116
+ separator=" "
117
+ ).alias("__deduce_text__")
118
+ ])
119
+ text_col = "__deduce_text__"
120
+
121
+ # Split into labeled and unlabeled
122
+ labeled_df = df_work.filter(pl.col(to_column).is_not_null())
123
+ unlabeled_df = df_work.filter(pl.col(to_column).is_null())
124
+
125
+ # Check if we have enough labeled examples
126
+ n_labeled = len(labeled_df)
127
+ if n_labeled == 0:
128
+ raise ValueError(
129
+ f"⚠️ Cannot deduce labels: No labeled examples found in '{to_column}' column.\n"
130
+ f"Please manually label at least {min_examples} examples per category, then run again.\n\n"
131
+ f"Note: additory uses pure Python text similarity (no LLMs, no external calls).\n"
132
+ f"Your data never leaves your machine."
133
+ )
134
+
135
+ if n_labeled < min_examples:
136
+ print(
137
+ f"⚠️ Only {n_labeled} labeled examples found. "
138
+ f"For better accuracy, label at least {min_examples} examples.\n"
139
+ f"Proceeding with available data..."
140
+ )
141
+
142
+ # If no unlabeled rows, return original
143
+ if len(unlabeled_df) == 0:
144
+ if len(source_cols) > 1:
145
+ # Remove temporary column
146
+ return df_work.drop("__deduce_text__")
147
+ return df_work
148
+
149
+ # Precompute vectors for labeled rows
150
+ labeled_vectors = []
151
+ for row in labeled_df.iter_rows(named=True):
152
+ text = row[text_col]
153
+ label = row[to_column]
154
+ tokens = tokenize(text)
155
+ vec = vectorize(tokens)
156
+ labeled_vectors.append((vec, label))
157
+
158
+ # Deduce labels for unlabeled rows
159
+ deduced_labels = []
160
+ for row in unlabeled_df.iter_rows(named=True):
161
+ text = row[text_col]
162
+ tokens = tokenize(text)
163
+ vec = vectorize(tokens)
164
+
165
+ # Find most similar labeled example
166
+ best_label = None
167
+ best_score = -1.0
168
+
169
+ for labeled_vec, label in labeled_vectors:
170
+ score = cosine_similarity(vec, labeled_vec)
171
+ if score > best_score:
172
+ best_score = score
173
+ best_label = label
174
+
175
+ deduced_labels.append(best_label)
176
+
177
+ # Create deduced labels series
178
+ deduced_series = pl.Series(to_column, deduced_labels)
179
+
180
+ # Update unlabeled rows with deduced labels
181
+ unlabeled_df = unlabeled_df.with_columns([deduced_series])
182
+
183
+ # Combine labeled and unlabeled back together
184
+ result_df = pl.concat([labeled_df, unlabeled_df])
185
+
186
+ # Remove temporary column if created
187
+ if len(source_cols) > 1:
188
+ result_df = result_df.drop("__deduce_text__")
189
+
190
+ # Print success message
191
+ n_deduced = len(deduced_labels)
192
+ print(f"✓ Deduced {n_deduced} label{'s' if n_deduced != 1 else ''} from {n_labeled} examples (offline, no LLMs)")
193
+
194
+ return result_df
195
+
196
+
197
+ def deduce(
198
+ df: Union[pd.DataFrame, pl.DataFrame],
199
+ from_column: Union[str, List[str]],
200
+ to_column: str
201
+ ) -> Union[pd.DataFrame, pl.DataFrame]:
202
+ """
203
+ Deduce missing labels based on text similarity to labeled examples.
204
+
205
+ Uses cosine similarity on TF-IDF vectors. Pure Python, no LLMs, offline-first.
206
+ Requires at least 3 labeled examples to work.
207
+
208
+ When multiple source columns are provided, they are concatenated with
209
+ spaces before computing similarity.
210
+
211
+ Args:
212
+ df: DataFrame with some labeled and some unlabeled rows
213
+ from_column: Text column(s) to analyze
214
+ - str: Single column (e.g., "comment")
215
+ - List[str]: Multiple columns (e.g., ["comment", "notes"])
216
+ to_column: Label column to fill (e.g., "status")
217
+
218
+ Returns:
219
+ DataFrame with deduced labels filled in
220
+
221
+ Examples:
222
+ # Single column
223
+ >>> result = add.deduce(df, from_column="comment", to_column="status")
224
+
225
+ # Multiple columns (better accuracy)
226
+ >>> result = add.deduce(
227
+ ... df,
228
+ ... from_column=["comment", "notes", "description"],
229
+ ... to_column="status"
230
+ ... )
231
+
232
+ Privacy: Your data never leaves your machine. No external connections.
233
+ """
234
+ # Detect input backend
235
+ if isinstance(df, pd.DataFrame):
236
+ backend = "pandas"
237
+ # Convert to Polars
238
+ df_polars = pl.from_pandas(df)
239
+ elif isinstance(df, pl.DataFrame):
240
+ backend = "polars"
241
+ df_polars = df
242
+ else:
243
+ # Try arrow bridge (for cudf, etc.)
244
+ try:
245
+ df_polars = pl.from_arrow(df)
246
+ backend = "arrow"
247
+ except Exception:
248
+ raise TypeError(f"Unsupported DataFrame type: {type(df)}")
249
+
250
+ # Process in Polars
251
+ result_polars = _deduce_polars(df_polars, from_column, to_column)
252
+
253
+ # Convert back to original format
254
+ if backend == "pandas":
255
+ return result_polars.to_pandas()
256
+ elif backend == "polars":
257
+ return result_polars
258
+ else: # arrow
259
+ return result_polars.to_arrow()
@@ -848,3 +848,79 @@ def apply_smote_strategy(
848
848
  )
849
849
  except Exception as e:
850
850
  raise ValidationError(f"SMOTE strategy failed: {e}")
851
+
852
+
853
+ def parse_deduce_strategy(strategy_spec: str) -> Tuple[str, List[str]]:
854
+ """
855
+ Parse deduce strategy specification.
856
+
857
+ Args:
858
+ strategy_spec: Strategy string like:
859
+ - "deduce:comment"
860
+ - "deduce:[comment, notes]"
861
+
862
+ Returns:
863
+ Tuple of (strategy_type, source_columns)
864
+ - strategy_type: "deduce"
865
+ - source_columns: List of source column names
866
+
867
+ Raises:
868
+ ValidationError: If strategy format is invalid
869
+
870
+ Examples:
871
+ >>> parse_deduce_strategy("deduce:comment")
872
+ ("deduce", ["comment"])
873
+
874
+ >>> parse_deduce_strategy("deduce:[comment, notes]")
875
+ ("deduce", ["comment", "notes"])
876
+ """
877
+ if not strategy_spec.startswith("deduce:"):
878
+ raise ValidationError(
879
+ f"Invalid deduce strategy: {strategy_spec}. "
880
+ "Must start with 'deduce:'"
881
+ )
882
+
883
+ # Extract source specification after "deduce:"
884
+ source_spec = strategy_spec[7:].strip() # Remove "deduce:" prefix
885
+
886
+ if not source_spec:
887
+ raise ValidationError(
888
+ f"Deduce strategy requires source column(s): {strategy_spec}. "
889
+ "Format: 'deduce:column' or 'deduce:[col1, col2]'"
890
+ )
891
+
892
+ # Check if it's multiple columns: deduce:[col1, col2]
893
+ if source_spec.startswith("[") and source_spec.endswith("]"):
894
+ # Multiple columns
895
+ columns_str = source_spec[1:-1] # Remove brackets
896
+
897
+ if not columns_str.strip():
898
+ raise ValidationError(
899
+ f"Deduce column list cannot be empty: {strategy_spec}"
900
+ )
901
+
902
+ # Split by comma and strip whitespace
903
+ columns = [c.strip() for c in columns_str.split(",")]
904
+
905
+ if len(columns) == 0:
906
+ raise ValidationError(
907
+ f"Deduce strategy must specify at least one column: {strategy_spec}"
908
+ )
909
+
910
+ return "deduce", columns
911
+ else:
912
+ # Single column
913
+ return "deduce", [source_spec]
914
+
915
+
916
+ def is_deduce_strategy(strategy_spec: str) -> bool:
917
+ """
918
+ Check if a strategy specification is a deduce strategy.
919
+
920
+ Args:
921
+ strategy_spec: Strategy string
922
+
923
+ Returns:
924
+ True if it's a deduce strategy, False otherwise
925
+ """
926
+ return isinstance(strategy_spec, str) and strategy_spec.startswith("deduce:")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: additory
3
- Version: 0.1.0a3
3
+ Version: 0.1.0a4
4
4
  Summary: A semantic, extensible dataframe transformation engine with expressions, lookup, and synthetic data generation support.
5
5
  Author: Krishnamoorthy Sankaran
6
6
  License: MIT
@@ -39,7 +39,7 @@ Dynamic: license-file
39
39
 
40
40
  [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
41
41
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
42
- [![Version](https://img.shields.io/badge/version-0.1.0a2-orange.svg)](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/)
42
+ [![Version](https://img.shields.io/badge/version-0.1.0a4-orange.svg)](https://github.com/sekarkrishna/additory)
43
43
 
44
44
  **Author:** Krishnamoorthy Sankaran
45
45
 
@@ -52,17 +52,17 @@ Dynamic: license-file
52
52
  ## 📦 Installation
53
53
 
54
54
  ```bash
55
- pip install additory==0.1.0a2
55
+ pip install additory==0.1.0a4
56
56
  ```
57
57
 
58
58
  **Optional GPU support:**
59
59
  ```bash
60
- pip install additory[gpu]==0.1.0a2 # Includes cuDF for GPU acceleration
60
+ pip install additory[gpu]==0.1.0a4 # Includes cuDF for GPU acceleration
61
61
  ```
62
62
 
63
63
  **Development installation:**
64
64
  ```bash
65
- pip install additory[dev]==0.1.0a2 # Includes testing and development tools
65
+ pip install additory[dev]==0.1.0a4 # Includes testing and development tools
66
66
  ```
67
67
 
68
68
  ## 🎯 Core Functions
@@ -70,7 +70,8 @@ pip install additory[dev]==0.1.0a2 # Includes testing and development tools
70
70
  | Function | Purpose | Example |
71
71
  |----------|---------|---------|
72
72
  | `add.to()` | Lookup/join operations | `add.to(df1, from_df=df2, bring='col', against='key')` |
73
- | `add.augment()` | Generate additional data | `add.augment(df, n_rows=1000)` |
73
+ | `add.synthetic()` | Generate additional data | `add.synthetic(df, n_rows=1000)` |
74
+ | `add.deduce()` | Text-based label deduction | `add.deduce(df, from_column='text', to_column='label')` |
74
75
  | `add.scan()` | Data profiling & analysis | `add.scan(df, preset="full")` |
75
76
 
76
77
  ## 🧬 Available Expressions
@@ -119,7 +120,7 @@ import additory as add
119
120
 
120
121
  # Works with polars
121
122
  df_polars = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
122
- result = add.augment(df_polars, n_rows=100)
123
+ result = add.synthetic(df_polars, n_rows=100)
123
124
 
124
125
  # Automatic type detection and conversion
125
126
  ```
@@ -193,22 +194,44 @@ patients_with_bsa = add.bsa(patients)
193
194
  result = add.fitness_score(add.bmr(add.bmi(patients)))
194
195
  ```
195
196
 
196
- ### 🔄 Augment Data Generation
197
+ ### 🔄 Synthetic Data Generation
197
198
 
198
- **Augment** generates additional data similar to your existing dataset using inline strategies.
199
+ **Synthetic** generates additional data similar to your existing dataset using inline strategies.
199
200
 
200
201
  ```python
201
- # Augment existing data (learns from patterns)
202
- more_customers = add.augment(customers, n_rows=1000)
202
+ # Extend existing data (learns from patterns)
203
+ more_customers = add.synthetic(customers, n_rows=1000)
203
204
 
204
205
  # Create data from scratch with strategies
205
- new_data = add.augment("@new", n_rows=500, strategy={
206
+ new_data = add.synthetic("@new", n_rows=500, strategy={
206
207
  'id': 'increment:start=1',
207
208
  'name': 'choice:[John,Jane,Bob]',
208
209
  'age': 'range:18-65'
209
210
  })
210
211
  ```
211
212
 
213
+ ### 🤖 Text-Based Label Deduction
214
+
215
+ **Deduce** automatically fills in missing labels by learning from your existing labeled examples. Pure Python, no LLMs, offline-first.
216
+
217
+ ```python
218
+ # Deduce missing labels from text
219
+ tickets = pd.DataFrame({
220
+ "ticket_text": ["Cannot log in", "Billing question", "App crashes", "Need invoice"],
221
+ "category": ["Technical", "Billing", None, None]
222
+ })
223
+
224
+ # Automatically fill in missing categories
225
+ result = add.deduce(tickets, from_column="ticket_text", to_column="category")
226
+
227
+ # Use multiple columns for better accuracy
228
+ result = add.deduce(
229
+ df,
230
+ from_column=["title", "description"],
231
+ to_column="category"
232
+ )
233
+ ```
234
+
212
235
  ## 🧪 Examples
213
236
 
214
237
  ### E-commerce Data Pipeline
@@ -224,7 +247,7 @@ customers = pd.DataFrame({
224
247
  })
225
248
 
226
249
  # Generate more customers
227
- customers = add.augment(customers, n_rows=10000)
250
+ customers = add.synthetic(customers, n_rows=10000)
228
251
 
229
252
  # Add customer tiers
230
253
  tiers = pd.DataFrame({
@@ -250,7 +273,7 @@ strategy = {
250
273
  'height_cm': 'range:150-200' # Height in cm
251
274
  }
252
275
 
253
- patients = add.augment("@new", n_rows=1000, strategy=strategy)
276
+ patients = add.synthetic("@new", n_rows=1000, strategy=strategy)
254
277
 
255
278
  # Convert height to meters for expressions
256
279
  patients['height_m'] = patients['height_cm'] / 100
@@ -265,19 +288,19 @@ print(result.correlations)
265
288
 
266
289
  ## 📚 Documentation
267
290
 
268
- - **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/)** - Detailed guides for each function
269
- - **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0/expressions.html)** - Complete expressions reference
291
+ - **[Function Documentation](https://github.com/sekarkrishna/additory/tree/main/documentation/)** - Detailed guides for each function
292
+ - **[Expressions Guide](https://github.com/sekarkrishna/additory/tree/main/documentation/)** - Complete expressions reference
270
293
 
271
294
  ## 📄 License
272
295
 
273
- MIT License - see [LICENSE](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/LICENSE) file for details.
296
+ MIT License - see [LICENSE](LICENSE) file for details.
274
297
 
275
298
  ## 📞 Support
276
299
 
277
300
  - **Issues**: [GitHub Issues](https://github.com/sekarkrishna/additory/issues)
278
- - **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/V0.1.0a1/documentation/V0.1.0)
301
+ - **Documentation**: [Full Documentation](https://github.com/sekarkrishna/additory/tree/main/documentation/)
279
302
 
280
- ## 🗺️ v0.1.1 (February 2025)
303
+ ## 🗺️ v0.1.1 (January 2026)
281
304
  - Enhanced documentation and tutorials
282
305
  - Performance optimizations
283
306
  - Additional expressions
@@ -1,5 +1,5 @@
1
- additory/__init__.py,sha256=Q0vhhQxwcI-Wl-8TERWqYaq4_8gJXqUBUGSywlhdD3E,397
2
- additory/dynamic_api.py,sha256=q38rax223ZX5lRNRPs0I7WXd1YZwBJEF2nm1IG-mzFI,11843
1
+ additory/__init__.py,sha256=6LWidwOfdQInwiTCaS9OlJG97WDnXCEMncZF9f9Uheo,397
2
+ additory/dynamic_api.py,sha256=XcmXMS-k2u5RoH5uM6BoQfTLGgF4UcZvXs9f9-BlOM4,13814
3
3
  additory/analysis/__init__.py,sha256=F_yhD_hcIWbwO1wrRe8Js1RI-vkozaKyWNIIEb-jSMk,986
4
4
  additory/analysis/cardinality.py,sha256=y4ttjk3VFNm3mEfNZaTegVQxH7btnmXgnDUSkctNuTo,2976
5
5
  additory/analysis/correlations.py,sha256=n0vIPW9lTTSPsPlr40YOIohTX3mUgGmSLdlBrkJZa1c,3909
@@ -20,7 +20,7 @@ additory/core/__init__.py,sha256=dhEBneupBndNBlsQI8niFZgQjUJDLORzRcFtvXGXg-E,630
20
20
  additory/core/ast_builder.py,sha256=cW65w-utVGjUJos1ffmfEPgPbxVwN6WU-vcDKrBPy8o,5303
21
21
  additory/core/column_positioning.py,sha256=1frwieAvdHXvlZzlUhL1BXP1P_iOZ7yzCNDlvw4L9kI,13241
22
22
  additory/core/compiler_polars.py,sha256=wN_785yk7N3tYGPCP2IsOpCeWxqJNOMq35TX-xoSCS8,5161
23
- additory/core/config.py,sha256=DIGsBfs9sVPGKMZNDtulPrXCUO8dcywc9Zp_R2pIIew,9578
23
+ additory/core/config.py,sha256=3qqM_JIahzf4ZscjU0OzlAAYQ7kEdfjG7ztKk8993nQ,9587
24
24
  additory/core/enhanced_cache_manager.py,sha256=7hpoMucAWkP_-sUzst_JigPKK04S6TsYLpI_m-s9FrY,47230
25
25
  additory/core/enhanced_matchers.py,sha256=lZO-PPfiAiriX4SjTenaulWqijogq9NnhUATHfwMqak,20353
26
26
  additory/core/enhanced_version_manager.py,sha256=wIk5pg0Pn5KahgsGMYtmHxxxX3sltnwHqJ_QT7mosNw,12957
@@ -32,7 +32,7 @@ additory/core/memory_manager.py,sha256=b1H1juAg2CXioSI4N65XldPdKxHTXRI3MSTSAtKV3
32
32
  additory/core/namespace_manager.py,sha256=RWbMZBcoXvpdcz5km2CJlXcrDwWE4DES-lGET4r98Pk,23325
33
33
  additory/core/parser.py,sha256=yVh87CiE4fmrg4jFisNMKTHlz4OpAMNVFF4thq6I0JE,4748
34
34
  additory/core/polars_expression_engine.py,sha256=O4s-ZtHgP2SQd_LsdGgCPVOACJgJsL7W48wj6CbutFw,23158
35
- additory/core/registry.py,sha256=_K9DY4lprBUbMJl5D18D9lmOjwhDawxXpV4_hdSBy7o,5621
35
+ additory/core/registry.py,sha256=bhm__bPuLvGvjbEUm8xbb0lUXQ1ZMd1Pcn2oUsIg4Rc,5658
36
36
  additory/core/sample_data_manager.py,sha256=urBT2T5NZZM0KXriuW5xfCwC1SA3WHwraVMtz5qyw7Y,19800
37
37
  additory/core/user_namespace.py,sha256=qgPhuHuhiePa9Qr2CtBCuflpUfxD8wTakWFcp5Ve2xU,22522
38
38
  additory/core/validator.py,sha256=em71_1TAdk44B2yyNwzmxkh4pMpqAq1JN_oHoDH7fCk,588
@@ -43,16 +43,17 @@ additory/expressions/__init__.py,sha256=FYZjHA7zJie1HRAQjMo6MdQxwYW2owrHulKXjfBF
43
43
  additory/expressions/engine.py,sha256=yOzZDNKjltP-HLVKBL4BXke63ALqgRFXgHK4YeeXLQ8,21138
44
44
  additory/expressions/parser.py,sha256=yVh87CiE4fmrg4jFisNMKTHlz4OpAMNVFF4thq6I0JE,4748
45
45
  additory/expressions/proxy.py,sha256=kohaZTtU5f_r3O_WidnNKXzN3IAmAnt6M0L5F3mpb7I,22044
46
- additory/expressions/registry.py,sha256=R0nj3-qRx5Q9OxsevIeWGOfIvBcltYNmnUf8QCu06s8,11015
46
+ additory/expressions/registry.py,sha256=DPkjg8YfQxjz0Tf6nif_e_8uTNAMfHd7LmUNL2GqZ-4,11024
47
47
  additory/expressions/samples.py,sha256=urBT2T5NZZM0KXriuW5xfCwC1SA3WHwraVMtz5qyw7Y,19800
48
48
  additory/synthetic/__init__.py,sha256=Zw0GqXXh5v6_1S6SxPcEYL7CzNmaRuVk1aC3qBOQ2RE,342
49
49
  additory/synthetic/column_name_resolver.py,sha256=-kh6bxitaSUwk28TZ5yPzbLUe6nxU7oYsazKEwumtRA,4913
50
+ additory/synthetic/deduce.py,sha256=CjEw-mCHGO1GjQtb0i-YX8QlmHiJORwMqjnfp8Oxm68,7736
50
51
  additory/synthetic/distributions.py,sha256=jrwDGVy_Vcm5XXoGKy-V0LrpnxdGM5p84GklKq-0b_A,705
51
52
  additory/synthetic/forecast.py,sha256=F2XoKEDFDJ47W6bSzy2jXYWU3PN5X0l16YvtfxXc4GU,34820
52
53
  additory/synthetic/linked_list_parser.py,sha256=YysP1ODyABJzUe82QLEfbuxGknTCyWb81tf8Pueg-oE,13002
53
54
  additory/synthetic/namespace_lookup.py,sha256=4ILe1MWubGvRsF_xbQLybBbr3hG0iMTseypigB_66TI,4096
54
55
  additory/synthetic/smote.py,sha256=ub8pTA5Ez3WjXP15GtyUqCRztiPr7XfHbNGTucUFErA,9092
55
- additory/synthetic/strategies.py,sha256=k0gc5ic6LfJ2gjp8UiEP9txAqeIcen911jaAXhSkgGs,26269
56
+ additory/synthetic/strategies.py,sha256=2Cn6wy-tRTj9CuBkhYizB6oQGev4EzxrprbQlKyxce8,28620
56
57
  additory/synthetic/synthesizer.py,sha256=9YHXyA9wfUyMZLse7nBMJ1hQ0F9SJmF4j01y4Oyebzg,26405
57
58
  additory/utilities/__init__.py,sha256=I28c5ZqqZ2VsMIG40fUBJhnc930cFXHJX22xQWARXq8,1679
58
59
  additory/utilities/encoding.py,sha256=DhTaTeUlJOSixQ3-hgUwSy1jMJAYadV2bQHuONVzzEY,20995
@@ -64,8 +65,8 @@ additory/utilities/resolvers.py,sha256=ykMfce2f9in9wqHgmljCFIil8xGcalT0FBwFIwHOl
64
65
  additory/utilities/settings.py,sha256=5XB2S3L7Ht486LZMDacYTuyB_ta7sVohUFEKzMo1nDU,4698
65
66
  additory/utilities/units.py,sha256=75VFSLCVhX3dcFokh-jbZepDRaFRuO2QpGZNQbG8fag,30526
66
67
  additory/utilities/validators.py,sha256=K1ZYsPL3W7XkIUECVWov4HZxTlzqs9Rbc61Vidh2F8o,4213
67
- additory-0.1.0a3.dist-info/licenses/LICENSE,sha256=ztobegtjJRyvQntGjQ1w80MGuTOeMmWkh5Be-pFyq3I,1079
68
- additory-0.1.0a3.dist-info/METADATA,sha256=4mlzxPW1q3nCdKAj27Fr3KVv4FTZhbrVtYOYRAcMdsw,8094
69
- additory-0.1.0a3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
70
- additory-0.1.0a3.dist-info/top_level.txt,sha256=4zphwXiI6HEl40fdjMXoUp9JNIqQ-tgYWeo3zqKqvEk,9
71
- additory-0.1.0a3.dist-info/RECORD,,
68
+ additory-0.1.0a4.dist-info/licenses/LICENSE,sha256=ztobegtjJRyvQntGjQ1w80MGuTOeMmWkh5Be-pFyq3I,1079
69
+ additory-0.1.0a4.dist-info/METADATA,sha256=uYviUo6_AjdbVtV91hFZL7fyEwh4Pd_HsDqjgzxqDts,8729
70
+ additory-0.1.0a4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
71
+ additory-0.1.0a4.dist-info/top_level.txt,sha256=4zphwXiI6HEl40fdjMXoUp9JNIqQ-tgYWeo3zqKqvEk,9
72
+ additory-0.1.0a4.dist-info/RECORD,,