additory 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. additory/__init__.py +15 -0
  2. additory/analysis/__init__.py +48 -0
  3. additory/analysis/cardinality.py +126 -0
  4. additory/analysis/correlations.py +124 -0
  5. additory/analysis/distributions.py +376 -0
  6. additory/analysis/quality.py +158 -0
  7. additory/analysis/scan.py +400 -0
  8. additory/augment/__init__.py +24 -0
  9. additory/augment/augmentor.py +653 -0
  10. additory/augment/builtin_lists.py +430 -0
  11. additory/augment/distributions.py +22 -0
  12. additory/augment/forecast.py +1132 -0
  13. additory/augment/list_registry.py +177 -0
  14. additory/augment/smote.py +320 -0
  15. additory/augment/strategies.py +883 -0
  16. additory/common/__init__.py +157 -0
  17. additory/common/backend.py +355 -0
  18. additory/common/column_utils.py +191 -0
  19. additory/common/distributions.py +737 -0
  20. additory/common/exceptions.py +62 -0
  21. additory/common/lists.py +229 -0
  22. additory/common/patterns.py +240 -0
  23. additory/common/resolver.py +567 -0
  24. additory/common/sample_data.py +182 -0
  25. additory/common/validation.py +197 -0
  26. additory/core/__init__.py +27 -0
  27. additory/core/ast_builder.py +165 -0
  28. additory/core/backends/__init__.py +23 -0
  29. additory/core/backends/arrow_bridge.py +476 -0
  30. additory/core/backends/cudf_bridge.py +355 -0
  31. additory/core/column_positioning.py +358 -0
  32. additory/core/compiler_polars.py +166 -0
  33. additory/core/config.py +342 -0
  34. additory/core/enhanced_cache_manager.py +1119 -0
  35. additory/core/enhanced_matchers.py +473 -0
  36. additory/core/enhanced_version_manager.py +325 -0
  37. additory/core/executor.py +59 -0
  38. additory/core/integrity_manager.py +477 -0
  39. additory/core/loader.py +190 -0
  40. additory/core/logging.py +24 -0
  41. additory/core/memory_manager.py +547 -0
  42. additory/core/namespace_manager.py +657 -0
  43. additory/core/parser.py +176 -0
  44. additory/core/polars_expression_engine.py +551 -0
  45. additory/core/registry.py +176 -0
  46. additory/core/sample_data_manager.py +492 -0
  47. additory/core/user_namespace.py +751 -0
  48. additory/core/validator.py +27 -0
  49. additory/dynamic_api.py +308 -0
  50. additory/expressions/__init__.py +26 -0
  51. additory/expressions/engine.py +551 -0
  52. additory/expressions/parser.py +176 -0
  53. additory/expressions/proxy.py +546 -0
  54. additory/expressions/registry.py +313 -0
  55. additory/expressions/samples.py +492 -0
  56. additory/synthetic/__init__.py +101 -0
  57. additory/synthetic/api.py +220 -0
  58. additory/synthetic/common_integration.py +314 -0
  59. additory/synthetic/config.py +262 -0
  60. additory/synthetic/engines.py +529 -0
  61. additory/synthetic/exceptions.py +180 -0
  62. additory/synthetic/file_managers.py +518 -0
  63. additory/synthetic/generator.py +702 -0
  64. additory/synthetic/generator_parser.py +68 -0
  65. additory/synthetic/integration.py +319 -0
  66. additory/synthetic/models.py +241 -0
  67. additory/synthetic/pattern_resolver.py +573 -0
  68. additory/synthetic/performance.py +469 -0
  69. additory/synthetic/polars_integration.py +464 -0
  70. additory/synthetic/proxy.py +60 -0
  71. additory/synthetic/schema_parser.py +685 -0
  72. additory/synthetic/validator.py +553 -0
  73. additory/utilities/__init__.py +53 -0
  74. additory/utilities/encoding.py +600 -0
  75. additory/utilities/games.py +300 -0
  76. additory/utilities/keys.py +8 -0
  77. additory/utilities/lookup.py +103 -0
  78. additory/utilities/matchers.py +216 -0
  79. additory/utilities/resolvers.py +286 -0
  80. additory/utilities/settings.py +167 -0
  81. additory/utilities/units.py +746 -0
  82. additory/utilities/validators.py +153 -0
  83. additory-0.1.0a1.dist-info/METADATA +293 -0
  84. additory-0.1.0a1.dist-info/RECORD +87 -0
  85. additory-0.1.0a1.dist-info/WHEEL +5 -0
  86. additory-0.1.0a1.dist-info/licenses/LICENSE +21 -0
  87. additory-0.1.0a1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,176 @@
1
+ # parser.py
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Optional, List, Any
5
+
6
+ import yaml
7
+
8
+ from .logging import log_info, log_warning
9
+ from .ast_builder import build_ast_from_expression # <-- NEW: your AST builder
10
+
11
+
12
+ # ------------------------------------------------------------
13
+ # Parsed Expression Structure
14
+ # ------------------------------------------------------------
15
+
16
+ @dataclass
17
+ class ParsedExpression:
18
+ name: str
19
+ metadata: Dict[str, Any]
20
+ expression: str
21
+ raw_text: str
22
+ ast: Optional[Dict[str, Any]] = None # <-- NEW
23
+ sample_clean: Optional[Dict[str, List[Any]]] = None
24
+ sample_unclean: Optional[Dict[str, List[Any]]] = None
25
+
26
+
27
+ # ------------------------------------------------------------
28
+ # Public API
29
+ # ------------------------------------------------------------
30
+
31
+ def parse_expression(text: str) -> ParsedExpression:
32
+ """
33
+ Parses a .add expression file.
34
+ Supports two formats:
35
+ 1. YAML-style (new)
36
+ 2. Legacy metadata + expression block (old)
37
+ """
38
+
39
+ if not text.strip():
40
+ log_warning("[parser] Empty expression file")
41
+ return ParsedExpression(
42
+ name="unknown",
43
+ metadata={},
44
+ expression="",
45
+ raw_text=text,
46
+ ast=None,
47
+ )
48
+
49
+ # Detect YAML-style format
50
+ if _looks_like_yaml(text):
51
+ parsed = _parse_yaml_style(text)
52
+ else:
53
+ parsed = _parse_legacy_style(text)
54
+
55
+ # --------------------------------------------------------
56
+ # NEW: Build AST from parsed.expression
57
+ # --------------------------------------------------------
58
+ try:
59
+ parsed.ast = build_ast_from_expression(parsed.expression)
60
+ except Exception as e:
61
+ log_warning(f"[parser] Failed to build AST: {e}")
62
+ parsed.ast = None
63
+
64
+ return parsed
65
+
66
+
67
+ # ------------------------------------------------------------
68
+ # YAML-STYLE PARSER
69
+ # ------------------------------------------------------------
70
+
71
+ def _looks_like_yaml(text: str) -> bool:
72
+ lowered = text.lower()
73
+ return ("formula:" in lowered) or ("sample:" in lowered)
74
+
75
+
76
+ def _parse_yaml_style(text: str) -> ParsedExpression:
77
+ try:
78
+ parsed = yaml.safe_load(text)
79
+ except Exception as e:
80
+ log_warning(f"[parser] YAML parse failed, falling back to legacy: {e}")
81
+ return _parse_legacy_style(text)
82
+
83
+ formula = parsed.get("formula", {})
84
+ sample = parsed.get("sample", {})
85
+ expression_block = formula.get("expression")
86
+
87
+ if not expression_block:
88
+ log_warning("[parser] YAML file missing 'formula.expression' block")
89
+ expression_block = ""
90
+
91
+ name = formula.get("name", "unknown")
92
+
93
+ return ParsedExpression(
94
+ name=name,
95
+ metadata=formula,
96
+ expression=_normalize_expression(expression_block),
97
+ raw_text=text,
98
+ sample_clean=sample.get("clean"),
99
+ sample_unclean=sample.get("unclean"),
100
+ )
101
+
102
+
103
+ def _normalize_expression(expr):
104
+ if isinstance(expr, list):
105
+ return "\n".join(expr)
106
+ return str(expr).strip()
107
+
108
+
109
+ # ------------------------------------------------------------
110
+ # LEGACY PARSER
111
+ # ------------------------------------------------------------
112
+
113
+ def _parse_legacy_style(text: str) -> ParsedExpression:
114
+ lines = _preprocess(text)
115
+ metadata = _parse_metadata(lines)
116
+ expression = _parse_expression_block(lines)
117
+
118
+ name = metadata.get("name", "unknown")
119
+
120
+ return ParsedExpression(
121
+ name=name,
122
+ metadata=metadata,
123
+ expression=expression,
124
+ raw_text=text,
125
+ )
126
+
127
+
128
+ # ------------------------------------------------------------
129
+ # Internal Helpers
130
+ # ------------------------------------------------------------
131
+
132
+ def _preprocess(text: str) -> List[str]:
133
+ cleaned = []
134
+ for line in text.splitlines():
135
+ stripped = line.strip()
136
+ if not stripped:
137
+ continue
138
+ if stripped.startswith("#"):
139
+ continue
140
+ cleaned.append(stripped)
141
+ return cleaned
142
+
143
+
144
+ def _parse_metadata(lines: List[str]) -> Dict[str, str]:
145
+ metadata = {}
146
+
147
+ for line in lines:
148
+ if line.lower().startswith("expression:"):
149
+ break
150
+
151
+ if ":" not in line:
152
+ log_warning(f"[parser] Invalid metadata line: {line}")
153
+ continue
154
+
155
+ key, value = line.split(":", 1)
156
+ metadata[key.strip()] = value.strip()
157
+
158
+ return metadata
159
+
160
+
161
+ def _parse_expression_block(lines: List[str]) -> str:
162
+ expr_lines = []
163
+ in_expr = False
164
+
165
+ for line in lines:
166
+ if line.lower().startswith("expression:"):
167
+ in_expr = True
168
+ continue
169
+
170
+ if in_expr:
171
+ expr_lines.append(line)
172
+
173
+ if not expr_lines:
174
+ log_warning("[parser] No expression block found")
175
+
176
+ return "\n".join(expr_lines).strip()