additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +58 -14
- additory/common/__init__.py +31 -147
- additory/common/column_selector.py +255 -0
- additory/common/distributions.py +286 -613
- additory/common/extractors.py +313 -0
- additory/common/knn_imputation.py +332 -0
- additory/common/result.py +380 -0
- additory/common/strategy_parser.py +243 -0
- additory/common/unit_conversions.py +338 -0
- additory/common/validation.py +283 -103
- additory/core/__init__.py +34 -22
- additory/core/backend.py +258 -0
- additory/core/config.py +177 -305
- additory/core/logging.py +230 -24
- additory/core/memory_manager.py +157 -495
- additory/expressions/__init__.py +2 -23
- additory/expressions/compiler.py +457 -0
- additory/expressions/engine.py +264 -487
- additory/expressions/integrity.py +179 -0
- additory/expressions/loader.py +263 -0
- additory/expressions/parser.py +363 -167
- additory/expressions/resolver.py +274 -0
- additory/functions/__init__.py +1 -0
- additory/functions/analyze/__init__.py +144 -0
- additory/functions/analyze/cardinality.py +58 -0
- additory/functions/analyze/correlations.py +66 -0
- additory/functions/analyze/distributions.py +53 -0
- additory/functions/analyze/duplicates.py +49 -0
- additory/functions/analyze/features.py +61 -0
- additory/functions/analyze/imputation.py +66 -0
- additory/functions/analyze/outliers.py +65 -0
- additory/functions/analyze/patterns.py +65 -0
- additory/functions/analyze/presets.py +72 -0
- additory/functions/analyze/quality.py +59 -0
- additory/functions/analyze/timeseries.py +53 -0
- additory/functions/analyze/types.py +45 -0
- additory/functions/expressions/__init__.py +161 -0
- additory/functions/snapshot/__init__.py +82 -0
- additory/functions/snapshot/filter.py +119 -0
- additory/functions/synthetic/__init__.py +113 -0
- additory/functions/synthetic/mode_detector.py +47 -0
- additory/functions/synthetic/strategies/__init__.py +1 -0
- additory/functions/synthetic/strategies/advanced.py +35 -0
- additory/functions/synthetic/strategies/augmentative.py +160 -0
- additory/functions/synthetic/strategies/generative.py +168 -0
- additory/functions/synthetic/strategies/presets.py +116 -0
- additory/functions/to/__init__.py +188 -0
- additory/functions/to/lookup.py +351 -0
- additory/functions/to/merge.py +189 -0
- additory/functions/to/sort.py +91 -0
- additory/functions/to/summarize.py +170 -0
- additory/functions/transform/__init__.py +140 -0
- additory/functions/transform/datetime.py +79 -0
- additory/functions/transform/extract.py +85 -0
- additory/functions/transform/harmonize.py +105 -0
- additory/functions/transform/knn.py +62 -0
- additory/functions/transform/onehotencoding.py +68 -0
- additory/functions/transform/transpose.py +42 -0
- additory-0.1.1a1.dist-info/METADATA +83 -0
- additory-0.1.1a1.dist-info/RECORD +62 -0
- additory/analysis/__init__.py +0 -48
- additory/analysis/cardinality.py +0 -126
- additory/analysis/correlations.py +0 -124
- additory/analysis/distributions.py +0 -376
- additory/analysis/quality.py +0 -158
- additory/analysis/scan.py +0 -400
- additory/common/backend.py +0 -371
- additory/common/column_utils.py +0 -191
- additory/common/exceptions.py +0 -62
- additory/common/lists.py +0 -229
- additory/common/patterns.py +0 -240
- additory/common/resolver.py +0 -567
- additory/common/sample_data.py +0 -182
- additory/core/ast_builder.py +0 -165
- additory/core/backends/__init__.py +0 -23
- additory/core/backends/arrow_bridge.py +0 -483
- additory/core/backends/cudf_bridge.py +0 -355
- additory/core/column_positioning.py +0 -358
- additory/core/compiler_polars.py +0 -166
- additory/core/enhanced_cache_manager.py +0 -1119
- additory/core/enhanced_matchers.py +0 -473
- additory/core/enhanced_version_manager.py +0 -325
- additory/core/executor.py +0 -59
- additory/core/integrity_manager.py +0 -477
- additory/core/loader.py +0 -190
- additory/core/namespace_manager.py +0 -657
- additory/core/parser.py +0 -176
- additory/core/polars_expression_engine.py +0 -601
- additory/core/registry.py +0 -176
- additory/core/sample_data_manager.py +0 -492
- additory/core/user_namespace.py +0 -751
- additory/core/validator.py +0 -27
- additory/dynamic_api.py +0 -304
- additory/expressions/proxy.py +0 -549
- additory/expressions/registry.py +0 -313
- additory/expressions/samples.py +0 -492
- additory/synthetic/__init__.py +0 -13
- additory/synthetic/column_name_resolver.py +0 -149
- additory/synthetic/distributions.py +0 -22
- additory/synthetic/forecast.py +0 -1132
- additory/synthetic/linked_list_parser.py +0 -415
- additory/synthetic/namespace_lookup.py +0 -129
- additory/synthetic/smote.py +0 -320
- additory/synthetic/strategies.py +0 -850
- additory/synthetic/synthesizer.py +0 -713
- additory/utilities/__init__.py +0 -53
- additory/utilities/encoding.py +0 -600
- additory/utilities/games.py +0 -300
- additory/utilities/keys.py +0 -8
- additory/utilities/lookup.py +0 -103
- additory/utilities/matchers.py +0 -216
- additory/utilities/resolvers.py +0 -286
- additory/utilities/settings.py +0 -167
- additory/utilities/units.py +0 -749
- additory/utilities/validators.py +0 -153
- additory-0.1.0a3.dist-info/METADATA +0 -288
- additory-0.1.0a3.dist-info/RECORD +0 -71
- additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
- {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
- {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
additory/common/lists.py
DELETED
|
@@ -1,229 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
List File Management
|
|
3
|
-
|
|
4
|
-
Handles loading and parsing of .list files containing static value lists.
|
|
5
|
-
|
|
6
|
-
File Format (.list):
|
|
7
|
-
[lists]
|
|
8
|
-
first_names = Arjun, Vikram, Samuel, James, Mary
|
|
9
|
-
last_names = Sharma, Kumar, Smith, Johnson
|
|
10
|
-
|
|
11
|
-
[relationships]
|
|
12
|
-
first_names[0] = last_names[0, 1]
|
|
13
|
-
|
|
14
|
-
Usage:
|
|
15
|
-
from additory.common.lists import load_list_file, get_list_values
|
|
16
|
-
|
|
17
|
-
lists = load_list_file("reference/schema_definitions/global.list")
|
|
18
|
-
first_names = get_list_values("first_names", lists)
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
from typing import Dict, List, Optional
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
import re
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class ListFileError(Exception):
|
|
27
|
-
"""Raised when list file parsing fails."""
|
|
28
|
-
pass
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def parse_list_file(content: str) -> Dict[str, List[str]]:
|
|
32
|
-
"""
|
|
33
|
-
Parse .list file content into dictionary of lists.
|
|
34
|
-
|
|
35
|
-
Format:
|
|
36
|
-
[lists]
|
|
37
|
-
list_name = value1, value2, value3
|
|
38
|
-
|
|
39
|
-
[relationships]
|
|
40
|
-
list1[0] = list2[1, 2]
|
|
41
|
-
|
|
42
|
-
Args:
|
|
43
|
-
content: File content as string
|
|
44
|
-
|
|
45
|
-
Returns:
|
|
46
|
-
Dictionary mapping list names to value lists
|
|
47
|
-
|
|
48
|
-
Raises:
|
|
49
|
-
ListFileError: If parsing fails
|
|
50
|
-
|
|
51
|
-
Example:
|
|
52
|
-
>>> content = '''
|
|
53
|
-
... [lists]
|
|
54
|
-
... names = Alice, Bob, Charlie
|
|
55
|
-
... statuses = Active, Inactive
|
|
56
|
-
... '''
|
|
57
|
-
>>> lists = parse_list_file(content)
|
|
58
|
-
>>> lists['names']
|
|
59
|
-
['Alice', 'Bob', 'Charlie']
|
|
60
|
-
"""
|
|
61
|
-
lists = {}
|
|
62
|
-
current_section = None
|
|
63
|
-
|
|
64
|
-
for line_num, line in enumerate(content.split('\n'), 1):
|
|
65
|
-
# Remove comments and strip whitespace
|
|
66
|
-
line = line.split('#')[0].strip()
|
|
67
|
-
|
|
68
|
-
# Skip empty lines
|
|
69
|
-
if not line:
|
|
70
|
-
continue
|
|
71
|
-
|
|
72
|
-
# Check for section headers
|
|
73
|
-
if line.startswith('[') and line.endswith(']'):
|
|
74
|
-
current_section = line[1:-1].strip()
|
|
75
|
-
continue
|
|
76
|
-
|
|
77
|
-
# Parse list definitions (only in [lists] section)
|
|
78
|
-
if current_section == 'lists':
|
|
79
|
-
if '=' not in line:
|
|
80
|
-
raise ListFileError(
|
|
81
|
-
f"Line {line_num}: Invalid format. Expected 'name = value1, value2, ...'"
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
name, values_str = line.split('=', 1)
|
|
85
|
-
name = name.strip()
|
|
86
|
-
|
|
87
|
-
# Validate list name
|
|
88
|
-
if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
|
|
89
|
-
raise ListFileError(
|
|
90
|
-
f"Line {line_num}: Invalid list name '{name}'. "
|
|
91
|
-
f"Must start with letter/underscore and contain only alphanumeric/underscore."
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
# Parse comma-separated values
|
|
95
|
-
values = [v.strip() for v in values_str.split(',')]
|
|
96
|
-
values = [v for v in values if v] # Remove empty strings
|
|
97
|
-
|
|
98
|
-
if not values:
|
|
99
|
-
raise ListFileError(
|
|
100
|
-
f"Line {line_num}: List '{name}' has no values"
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
lists[name] = values
|
|
104
|
-
|
|
105
|
-
# Skip relationships section for now (Phase II)
|
|
106
|
-
elif current_section == 'relationships':
|
|
107
|
-
continue
|
|
108
|
-
|
|
109
|
-
# Unknown section
|
|
110
|
-
elif current_section is not None:
|
|
111
|
-
# Allow unknown sections (for future extensions)
|
|
112
|
-
continue
|
|
113
|
-
|
|
114
|
-
return lists
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def load_list_file(filepath: str) -> Dict[str, List[str]]:
|
|
118
|
-
"""
|
|
119
|
-
Load and parse a .list file.
|
|
120
|
-
|
|
121
|
-
Args:
|
|
122
|
-
filepath: Path to .list file
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
Dictionary mapping list names to value lists
|
|
126
|
-
|
|
127
|
-
Raises:
|
|
128
|
-
ListFileError: If file not found or parsing fails
|
|
129
|
-
|
|
130
|
-
Example:
|
|
131
|
-
>>> lists = load_list_file("reference/schema_definitions/global.list")
|
|
132
|
-
>>> lists['first_names']
|
|
133
|
-
['Arjun', 'Vikram', 'Samuel', ...]
|
|
134
|
-
"""
|
|
135
|
-
path = Path(filepath)
|
|
136
|
-
|
|
137
|
-
if not path.exists():
|
|
138
|
-
raise ListFileError(f"List file not found: {filepath}")
|
|
139
|
-
|
|
140
|
-
if not path.suffix == '.list':
|
|
141
|
-
raise ListFileError(f"File must have .list extension: {filepath}")
|
|
142
|
-
|
|
143
|
-
try:
|
|
144
|
-
content = path.read_text(encoding='utf-8')
|
|
145
|
-
return parse_list_file(content)
|
|
146
|
-
except UnicodeDecodeError as e:
|
|
147
|
-
raise ListFileError(f"Failed to read file {filepath}: {e}")
|
|
148
|
-
except Exception as e:
|
|
149
|
-
if isinstance(e, ListFileError):
|
|
150
|
-
raise
|
|
151
|
-
raise ListFileError(f"Failed to parse {filepath}: {e}")
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def get_list_values(list_name: str, lists: Dict[str, List[str]]) -> Optional[List[str]]:
|
|
155
|
-
"""
|
|
156
|
-
Get values for a specific list.
|
|
157
|
-
|
|
158
|
-
Args:
|
|
159
|
-
list_name: Name of the list
|
|
160
|
-
lists: Dictionary of lists (from load_list_file or parse_list_file)
|
|
161
|
-
|
|
162
|
-
Returns:
|
|
163
|
-
List of values or None if not found
|
|
164
|
-
|
|
165
|
-
Example:
|
|
166
|
-
>>> lists = load_list_file("global.list")
|
|
167
|
-
>>> values = get_list_values("first_names", lists)
|
|
168
|
-
>>> print(values[:3])
|
|
169
|
-
['Arjun', 'Vikram', 'Samuel']
|
|
170
|
-
"""
|
|
171
|
-
return lists.get(list_name)
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
def list_all_lists(lists: Dict[str, List[str]]) -> List[str]:
|
|
175
|
-
"""
|
|
176
|
-
Get names of all available lists.
|
|
177
|
-
|
|
178
|
-
Args:
|
|
179
|
-
lists: Dictionary of lists
|
|
180
|
-
|
|
181
|
-
Returns:
|
|
182
|
-
List of list names
|
|
183
|
-
|
|
184
|
-
Example:
|
|
185
|
-
>>> lists = load_list_file("global.list")
|
|
186
|
-
>>> names = list_all_lists(lists)
|
|
187
|
-
>>> print(names)
|
|
188
|
-
['first_names', 'last_names', 'banks', 'statuses', ...]
|
|
189
|
-
"""
|
|
190
|
-
return list(lists.keys())
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def validate_list_file(filepath: str) -> tuple[bool, List[str]]:
|
|
194
|
-
"""
|
|
195
|
-
Validate a .list file and return any errors.
|
|
196
|
-
|
|
197
|
-
Args:
|
|
198
|
-
filepath: Path to .list file
|
|
199
|
-
|
|
200
|
-
Returns:
|
|
201
|
-
Tuple of (is_valid, error_messages)
|
|
202
|
-
|
|
203
|
-
Example:
|
|
204
|
-
>>> is_valid, errors = validate_list_file("global.list")
|
|
205
|
-
>>> if not is_valid:
|
|
206
|
-
... for error in errors:
|
|
207
|
-
... print(error)
|
|
208
|
-
"""
|
|
209
|
-
errors = []
|
|
210
|
-
|
|
211
|
-
try:
|
|
212
|
-
lists = load_list_file(filepath)
|
|
213
|
-
|
|
214
|
-
# Check for empty file
|
|
215
|
-
if not lists:
|
|
216
|
-
errors.append("File contains no lists")
|
|
217
|
-
|
|
218
|
-
# Check for duplicate names (already handled by dict)
|
|
219
|
-
# Check for empty lists
|
|
220
|
-
for name, values in lists.items():
|
|
221
|
-
if not values:
|
|
222
|
-
errors.append(f"List '{name}' is empty")
|
|
223
|
-
|
|
224
|
-
return (len(errors) == 0, errors)
|
|
225
|
-
|
|
226
|
-
except ListFileError as e:
|
|
227
|
-
return (False, [str(e)])
|
|
228
|
-
except Exception as e:
|
|
229
|
-
return (False, [f"Unexpected error: {e}"])
|
additory/common/patterns.py
DELETED
|
@@ -1,240 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Pattern File Management
|
|
3
|
-
|
|
4
|
-
Handles loading and parsing of .properties files containing regex patterns.
|
|
5
|
-
|
|
6
|
-
File Format (.properties):
|
|
7
|
-
# Email patterns
|
|
8
|
-
email_generic = [A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\\\.[A-Z|a-z]{2,}
|
|
9
|
-
email_us = [A-Za-z0-9._%+-]+@(gmail|yahoo|outlook)\\.com
|
|
10
|
-
|
|
11
|
-
# Phone patterns
|
|
12
|
-
phone_us = \\+1-\\d{3}-\\d{3}-\\d{4}
|
|
13
|
-
phone_in = \\+91-\\d{10}
|
|
14
|
-
|
|
15
|
-
Usage:
|
|
16
|
-
from additory.common.patterns import load_properties_file, get_pattern
|
|
17
|
-
|
|
18
|
-
patterns = load_properties_file("reference/schema_definitions/global.properties")
|
|
19
|
-
email_pattern = get_pattern("email_generic", patterns)
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
from typing import Dict, Optional, List
|
|
23
|
-
from pathlib import Path
|
|
24
|
-
import re
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class PatternFileError(Exception):
|
|
28
|
-
"""Raised when pattern file parsing fails."""
|
|
29
|
-
pass
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def parse_properties_file(content: str) -> Dict[str, str]:
|
|
33
|
-
"""
|
|
34
|
-
Parse .properties file content into dictionary of patterns.
|
|
35
|
-
|
|
36
|
-
Format:
|
|
37
|
-
# Comment
|
|
38
|
-
pattern_name = regex_pattern
|
|
39
|
-
another_pattern = another_regex
|
|
40
|
-
|
|
41
|
-
Args:
|
|
42
|
-
content: File content as string
|
|
43
|
-
|
|
44
|
-
Returns:
|
|
45
|
-
Dictionary mapping pattern names to regex patterns
|
|
46
|
-
|
|
47
|
-
Raises:
|
|
48
|
-
PatternFileError: If parsing fails
|
|
49
|
-
|
|
50
|
-
Example:
|
|
51
|
-
>>> content = '''
|
|
52
|
-
... # Email patterns
|
|
53
|
-
... email = [A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}
|
|
54
|
-
... phone = \\+1-\\d{3}-\\d{3}-\\d{4}
|
|
55
|
-
... '''
|
|
56
|
-
>>> patterns = parse_properties_file(content)
|
|
57
|
-
>>> patterns['email']
|
|
58
|
-
'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\\\.[A-Z|a-z]{2,}'
|
|
59
|
-
"""
|
|
60
|
-
patterns = {}
|
|
61
|
-
|
|
62
|
-
for line_num, line in enumerate(content.split('\n'), 1):
|
|
63
|
-
# Remove comments and strip whitespace
|
|
64
|
-
line = line.split('#')[0].strip()
|
|
65
|
-
|
|
66
|
-
# Skip empty lines
|
|
67
|
-
if not line:
|
|
68
|
-
continue
|
|
69
|
-
|
|
70
|
-
# Parse pattern definitions
|
|
71
|
-
if '=' not in line:
|
|
72
|
-
raise PatternFileError(
|
|
73
|
-
f"Line {line_num}: Invalid format. Expected 'name = pattern'"
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
name, pattern = line.split('=', 1)
|
|
77
|
-
name = name.strip()
|
|
78
|
-
pattern = pattern.strip()
|
|
79
|
-
|
|
80
|
-
# Validate pattern name
|
|
81
|
-
if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
|
|
82
|
-
raise PatternFileError(
|
|
83
|
-
f"Line {line_num}: Invalid pattern name '{name}'. "
|
|
84
|
-
f"Must start with letter/underscore and contain only alphanumeric/underscore."
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
# Check for empty pattern
|
|
88
|
-
if not pattern:
|
|
89
|
-
raise PatternFileError(
|
|
90
|
-
f"Line {line_num}: Pattern '{name}' has no value"
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
# Check for duplicate names
|
|
94
|
-
if name in patterns:
|
|
95
|
-
raise PatternFileError(
|
|
96
|
-
f"Line {line_num}: Duplicate pattern name '{name}'"
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
patterns[name] = pattern
|
|
100
|
-
|
|
101
|
-
return patterns
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def load_properties_file(filepath: str) -> Dict[str, str]:
|
|
105
|
-
"""
|
|
106
|
-
Load and parse a .properties file.
|
|
107
|
-
|
|
108
|
-
Args:
|
|
109
|
-
filepath: Path to .properties file
|
|
110
|
-
|
|
111
|
-
Returns:
|
|
112
|
-
Dictionary mapping pattern names to regex patterns
|
|
113
|
-
|
|
114
|
-
Raises:
|
|
115
|
-
PatternFileError: If file not found or parsing fails
|
|
116
|
-
|
|
117
|
-
Example:
|
|
118
|
-
>>> patterns = load_properties_file("reference/schema_definitions/global.properties")
|
|
119
|
-
>>> patterns['email_generic']
|
|
120
|
-
'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}'
|
|
121
|
-
"""
|
|
122
|
-
path = Path(filepath)
|
|
123
|
-
|
|
124
|
-
if not path.exists():
|
|
125
|
-
raise PatternFileError(f"Pattern file not found: {filepath}")
|
|
126
|
-
|
|
127
|
-
if not path.suffix == '.properties':
|
|
128
|
-
raise PatternFileError(f"File must have .properties extension: {filepath}")
|
|
129
|
-
|
|
130
|
-
try:
|
|
131
|
-
content = path.read_text(encoding='utf-8')
|
|
132
|
-
return parse_properties_file(content)
|
|
133
|
-
except UnicodeDecodeError as e:
|
|
134
|
-
raise PatternFileError(f"Failed to read file {filepath}: {e}")
|
|
135
|
-
except Exception as e:
|
|
136
|
-
if isinstance(e, PatternFileError):
|
|
137
|
-
raise
|
|
138
|
-
raise PatternFileError(f"Failed to parse {filepath}: {e}")
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def get_pattern(pattern_name: str, patterns: Dict[str, str]) -> Optional[str]:
|
|
142
|
-
"""
|
|
143
|
-
Get regex pattern for a specific name.
|
|
144
|
-
|
|
145
|
-
Args:
|
|
146
|
-
pattern_name: Name of the pattern
|
|
147
|
-
patterns: Dictionary of patterns (from load_properties_file or parse_properties_file)
|
|
148
|
-
|
|
149
|
-
Returns:
|
|
150
|
-
Regex pattern string or None if not found
|
|
151
|
-
|
|
152
|
-
Example:
|
|
153
|
-
>>> patterns = load_properties_file("global.properties")
|
|
154
|
-
>>> pattern = get_pattern("email_generic", patterns)
|
|
155
|
-
>>> print(pattern)
|
|
156
|
-
[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}
|
|
157
|
-
"""
|
|
158
|
-
return patterns.get(pattern_name)
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
def list_all_patterns(patterns: Dict[str, str]) -> List[str]:
|
|
162
|
-
"""
|
|
163
|
-
Get names of all available patterns.
|
|
164
|
-
|
|
165
|
-
Args:
|
|
166
|
-
patterns: Dictionary of patterns
|
|
167
|
-
|
|
168
|
-
Returns:
|
|
169
|
-
List of pattern names
|
|
170
|
-
|
|
171
|
-
Example:
|
|
172
|
-
>>> patterns = load_properties_file("global.properties")
|
|
173
|
-
>>> names = list_all_patterns(patterns)
|
|
174
|
-
>>> print(names)
|
|
175
|
-
['email_generic', 'email_us', 'phone_us', 'phone_in', ...]
|
|
176
|
-
"""
|
|
177
|
-
return list(patterns.keys())
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
def validate_properties_file(filepath: str) -> tuple[bool, List[str]]:
|
|
181
|
-
"""
|
|
182
|
-
Validate a .properties file and return any errors.
|
|
183
|
-
|
|
184
|
-
Args:
|
|
185
|
-
filepath: Path to .properties file
|
|
186
|
-
|
|
187
|
-
Returns:
|
|
188
|
-
Tuple of (is_valid, error_messages)
|
|
189
|
-
|
|
190
|
-
Example:
|
|
191
|
-
>>> is_valid, errors = validate_properties_file("global.properties")
|
|
192
|
-
>>> if not is_valid:
|
|
193
|
-
... for error in errors:
|
|
194
|
-
... print(error)
|
|
195
|
-
"""
|
|
196
|
-
errors = []
|
|
197
|
-
|
|
198
|
-
try:
|
|
199
|
-
patterns = load_properties_file(filepath)
|
|
200
|
-
|
|
201
|
-
# Check for empty file
|
|
202
|
-
if not patterns:
|
|
203
|
-
errors.append("File contains no patterns")
|
|
204
|
-
|
|
205
|
-
# Check for empty patterns
|
|
206
|
-
for name, pattern in patterns.items():
|
|
207
|
-
if not pattern:
|
|
208
|
-
errors.append(f"Pattern '{name}' is empty")
|
|
209
|
-
|
|
210
|
-
return (len(errors) == 0, errors)
|
|
211
|
-
|
|
212
|
-
except PatternFileError as e:
|
|
213
|
-
return (False, [str(e)])
|
|
214
|
-
except Exception as e:
|
|
215
|
-
return (False, [f"Unexpected error: {e}"])
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
def is_regex_pattern(value: str) -> bool:
|
|
219
|
-
r"""
|
|
220
|
-
Check if a string looks like a regex pattern.
|
|
221
|
-
|
|
222
|
-
Detects special regex characters: \\ [ ] ( ) { } + * ? ^ $ | .
|
|
223
|
-
|
|
224
|
-
Args:
|
|
225
|
-
value: String to check
|
|
226
|
-
|
|
227
|
-
Returns:
|
|
228
|
-
True if string contains regex special characters
|
|
229
|
-
|
|
230
|
-
Example:
|
|
231
|
-
>>> is_regex_pattern("CUST\\d{8}")
|
|
232
|
-
True
|
|
233
|
-
>>> is_regex_pattern("first_names")
|
|
234
|
-
False
|
|
235
|
-
>>> is_regex_pattern("[A-Z]+")
|
|
236
|
-
True
|
|
237
|
-
"""
|
|
238
|
-
# Check for regex special characters
|
|
239
|
-
regex_chars = r'[\\[\](){}+*?^$|.]'
|
|
240
|
-
return bool(re.search(regex_chars, value))
|