additory 0.1.0a2__py3-none-any.whl → 0.1.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. additory/__init__.py +4 -0
  2. additory/common/__init__.py +2 -2
  3. additory/common/backend.py +20 -4
  4. additory/common/distributions.py +1 -1
  5. additory/common/sample_data.py +19 -19
  6. additory/core/backends/arrow_bridge.py +7 -0
  7. additory/core/polars_expression_engine.py +66 -16
  8. additory/dynamic_api.py +42 -46
  9. additory/expressions/proxy.py +4 -1
  10. additory/synthetic/__init__.py +7 -95
  11. additory/synthetic/column_name_resolver.py +149 -0
  12. additory/{augment → synthetic}/distributions.py +2 -2
  13. additory/{augment → synthetic}/forecast.py +1 -1
  14. additory/synthetic/linked_list_parser.py +415 -0
  15. additory/synthetic/namespace_lookup.py +129 -0
  16. additory/{augment → synthetic}/smote.py +1 -1
  17. additory/{augment → synthetic}/strategies.py +11 -44
  18. additory/{augment/augmentor.py → synthetic/synthesizer.py} +75 -15
  19. additory/utilities/units.py +4 -1
  20. {additory-0.1.0a2.dist-info → additory-0.1.0a3.dist-info}/METADATA +10 -17
  21. {additory-0.1.0a2.dist-info → additory-0.1.0a3.dist-info}/RECORD +24 -40
  22. {additory-0.1.0a2.dist-info → additory-0.1.0a3.dist-info}/WHEEL +1 -1
  23. additory/augment/__init__.py +0 -24
  24. additory/augment/builtin_lists.py +0 -430
  25. additory/augment/list_registry.py +0 -177
  26. additory/synthetic/api.py +0 -220
  27. additory/synthetic/common_integration.py +0 -314
  28. additory/synthetic/config.py +0 -262
  29. additory/synthetic/engines.py +0 -529
  30. additory/synthetic/exceptions.py +0 -180
  31. additory/synthetic/file_managers.py +0 -518
  32. additory/synthetic/generator.py +0 -702
  33. additory/synthetic/generator_parser.py +0 -68
  34. additory/synthetic/integration.py +0 -319
  35. additory/synthetic/models.py +0 -241
  36. additory/synthetic/pattern_resolver.py +0 -573
  37. additory/synthetic/performance.py +0 -469
  38. additory/synthetic/polars_integration.py +0 -464
  39. additory/synthetic/proxy.py +0 -60
  40. additory/synthetic/schema_parser.py +0 -685
  41. additory/synthetic/validator.py +0 -553
  42. {additory-0.1.0a2.dist-info → additory-0.1.0a3.dist-info}/licenses/LICENSE +0 -0
  43. {additory-0.1.0a2.dist-info → additory-0.1.0a3.dist-info}/top_level.txt +0 -0
@@ -1,177 +0,0 @@
1
- """
2
- List Registry Manager for Data Augmentation
3
-
4
- Manages user-registered lists and provides access to built-in lists.
5
-
6
- List Resolution Order:
7
- 1. User-registered lists (highest priority)
8
- 2. Built-in lists
9
- 3. Error if not found
10
- """
11
-
12
- from typing import List, Optional, Dict, Any
13
-
14
- from additory.common.exceptions import ValidationError
15
- from additory.augment.builtin_lists import BUILTIN_LISTS, list_builtin_names
16
-
17
-
18
- # Global registry for user-registered lists
19
- _USER_LISTS: Dict[str, List[Any]] = {}
20
-
21
-
22
- def register_list(name: str, values: List[Any]) -> None:
23
- """
24
- Register a custom list for use in augmentation strategies.
25
-
26
- User-registered lists take priority over built-in lists with the same name.
27
-
28
- Args:
29
- name: List name (e.g., "my_custom_list")
30
- values: List of values
31
-
32
- Raises:
33
- ValidationError: If parameters are invalid
34
-
35
- Examples:
36
- >>> add.register_list("custom_statuses", ["New", "Processing", "Done"])
37
- >>> add.register_list("banks", ["My Bank", "Your Bank"]) # Overrides built-in
38
- """
39
- if not isinstance(name, str) or not name.strip():
40
- raise ValidationError("List name must be a non-empty string")
41
-
42
- if not isinstance(values, list):
43
- raise ValidationError("Values must be a list")
44
-
45
- if len(values) == 0:
46
- raise ValidationError("List must contain at least one value")
47
-
48
- # Store in user registry
49
- _USER_LISTS[name] = values
50
-
51
-
52
- def get_list(name: str) -> List[Any]:
53
- """
54
- Get a list by name (user-registered or built-in).
55
-
56
- Resolution order:
57
- 1. User-registered lists
58
- 2. Built-in lists
59
- 3. Raise error if not found
60
-
61
- Args:
62
- name: List name
63
-
64
- Returns:
65
- List of values
66
-
67
- Raises:
68
- ValidationError: If list not found
69
- """
70
- # Check user-registered lists first
71
- if name in _USER_LISTS:
72
- return _USER_LISTS[name]
73
-
74
- # Check built-in lists
75
- if name in BUILTIN_LISTS:
76
- return BUILTIN_LISTS[name]
77
-
78
- # Not found
79
- raise ValidationError(
80
- f"List '{name}' not found. "
81
- f"Use add.list_available() to see available lists or "
82
- f"add.register_list('{name}', [...]) to create it."
83
- )
84
-
85
-
86
- def list_exists(name: str) -> bool:
87
- """
88
- Check if a list exists (user-registered or built-in).
89
-
90
- Args:
91
- name: List name
92
-
93
- Returns:
94
- True if list exists, False otherwise
95
- """
96
- return name in _USER_LISTS or name in BUILTIN_LISTS
97
-
98
-
99
- def list_available() -> Dict[str, int]:
100
- """
101
- Get all available lists with their sizes.
102
-
103
- Returns:
104
- Dictionary mapping list names to their sizes
105
- Format: {"list_name": count, ...}
106
-
107
- Examples:
108
- >>> lists = add.list_available()
109
- >>> print(lists)
110
- {'first_names': 200, 'banks': 120, 'my_custom_list': 5, ...}
111
- """
112
- result = {}
113
-
114
- # Add built-in lists
115
- for name, values in BUILTIN_LISTS.items():
116
- result[name] = len(values)
117
-
118
- # Add user-registered lists (may override built-in counts)
119
- for name, values in _USER_LISTS.items():
120
- result[name] = len(values)
121
-
122
- return result
123
-
124
-
125
- def list_show(name: str) -> List[Any]:
126
- """
127
- Show the contents of a list.
128
-
129
- Args:
130
- name: List name
131
-
132
- Returns:
133
- List of values
134
-
135
- Raises:
136
- ValidationError: If list not found
137
-
138
- Examples:
139
- >>> add.list_show("statuses")
140
- ['Active', 'Inactive', 'Pending', 'Completed', ...]
141
- """
142
- return get_list(name)
143
-
144
-
145
- def list_remove(name: str) -> bool:
146
- """
147
- Remove a user-registered list.
148
-
149
- Note: Cannot remove built-in lists. If a user-registered list
150
- overrides a built-in list, removing it will restore the built-in.
151
-
152
- Args:
153
- name: List name
154
-
155
- Returns:
156
- True if list was removed, False if not found
157
-
158
- Examples:
159
- >>> add.register_list("temp_list", ["a", "b"])
160
- >>> add.list_remove("temp_list")
161
- True
162
- >>> add.list_remove("temp_list")
163
- False
164
- """
165
- if name in _USER_LISTS:
166
- del _USER_LISTS[name]
167
- return True
168
- return False
169
-
170
-
171
- def clear_user_lists() -> None:
172
- """
173
- Clear all user-registered lists.
174
-
175
- Built-in lists are not affected.
176
- """
177
- _USER_LISTS.clear()
additory/synthetic/api.py DELETED
@@ -1,220 +0,0 @@
1
- """
2
- Main API interface for synthetic data generation.
3
-
4
- Provides the primary user-facing functions for generating synthetic data
5
- with support for different output engines and configuration management.
6
- """
7
-
8
- from typing import Union, Optional, Type
9
- import pandas as pd
10
- import polars as pl
11
- import os
12
- from pathlib import Path
13
-
14
- from .config import SyntheticConfig
15
- from .exceptions import SyntheticDataError, ValidationError
16
- from .integration import SyntheticDataIntegrator
17
- from .generator import GenerationConfig
18
- from .engines import DistributionEngineFactory, DistributionEngine
19
-
20
-
21
- # Global configuration instance
22
- config = SyntheticConfig()
23
-
24
-
25
- def synth(schema_path: str, rows: int = 1000,
26
- engine: Optional[str] = None) -> Union[pd.DataFrame, pl.DataFrame]:
27
- """
28
- Generate synthetic data from a schema file.
29
-
30
- Args:
31
- schema_path: Path to the .toml schema file
32
- rows: Number of rows to generate (default: 1000)
33
- engine: Output engine ("pandas" or "polars"). If None, uses default from config
34
-
35
- Returns:
36
- Generated DataFrame in the specified format
37
-
38
- Raises:
39
- SyntheticDataError: If generation fails
40
- ValidationError: If schema validation fails
41
- FileNotFoundError: If schema file doesn't exist
42
-
43
- Examples:
44
- >>> df = synth("customer.toml", rows=5000) # pandas DataFrame
45
- >>> df = synth("customer.toml", rows=5000, engine="polars") # polars DataFrame
46
- """
47
- # Validate inputs
48
- if rows <= 0:
49
- raise ValueError("Number of rows must be positive")
50
-
51
- # Determine output engine
52
- output_engine = engine if engine is not None else config.get_default_engine()
53
- if output_engine not in ["pandas", "polars"]:
54
- raise ValueError(f"Unsupported engine: {output_engine}. Must be 'pandas' or 'polars'")
55
-
56
- # Resolve schema path if it's relative to the configured base path
57
- resolved_schema_path = _resolve_schema_path(schema_path)
58
-
59
- # Create generation configuration
60
- generation_config = GenerationConfig(
61
- batch_size=config.get_default_batch_size(),
62
- seed=None, # Use random seed by default
63
- validate_patterns=config.is_validation_enabled()
64
- )
65
-
66
- # Create integrator and generate data
67
- integrator = SyntheticDataIntegrator(generation_config)
68
-
69
- try:
70
- result = integrator.generate_from_schema_file(
71
- schema_path=resolved_schema_path,
72
- target_rows=rows,
73
- output_engine=output_engine
74
- )
75
- return result.dataframe
76
-
77
- except ValidationError:
78
- # Re-raise validation errors as-is
79
- raise
80
- except Exception as e:
81
- # Wrap other exceptions in SyntheticDataError
82
- raise SyntheticDataError(f"Failed to generate synthetic data: {e}") from e
83
-
84
-
85
- def register_distribution_engine(engine_class: Type[DistributionEngine]) -> None:
86
- """
87
- Register a custom distribution engine for use in synthetic data generation.
88
-
89
- Custom engines allow you to implement your own distribution strategies beyond
90
- the built-in ones (equal, custom, categorical, high_cardinality, numeric_range, skewed).
91
-
92
- Args:
93
- engine_class: A class that inherits from DistributionEngine and implements
94
- the required methods (supports_strategy, validate_config, apply_distribution)
95
-
96
- Raises:
97
- ValidationError: If the engine class is invalid or already registered
98
-
99
- Examples:
100
- >>> from additory.synthetic.engines import DistributionEngine, DistributionConfig
101
- >>> from additory.synthetic.models import DistributionType, ValidationResult
102
- >>> import polars as pl
103
- >>>
104
- >>> class GaussianDistributionEngine(DistributionEngine):
105
- ... def supports_strategy(self, strategy_type):
106
- ... return strategy_type == DistributionType.CUSTOM and strategy_type.value == "gaussian"
107
- ...
108
- ... def validate_config(self, config: DistributionConfig) -> ValidationResult:
109
- ... result = self._validate_base_requirements(config)
110
- ... # Add custom validation
111
- ... return result
112
- ...
113
- ... def apply_distribution(self, config: DistributionConfig) -> pl.Series:
114
- ... # Implement gaussian distribution logic
115
- ... pass
116
- >>>
117
- >>> register_distribution_engine(GaussianDistributionEngine)
118
- """
119
- DistributionEngineFactory.register_custom_engine(engine_class)
120
-
121
-
122
- def unregister_distribution_engine(engine_class: Type[DistributionEngine]) -> None:
123
- """
124
- Unregister a previously registered custom distribution engine.
125
-
126
- Args:
127
- engine_class: The engine class to unregister
128
-
129
- Raises:
130
- ValidationError: If the engine is not registered
131
-
132
- Examples:
133
- >>> unregister_distribution_engine(GaussianDistributionEngine)
134
- """
135
- DistributionEngineFactory.unregister_custom_engine(engine_class)
136
-
137
-
138
- def list_custom_distribution_engines() -> list:
139
- """
140
- Get a list of all registered custom distribution engines.
141
-
142
- Returns:
143
- List of custom engine classes
144
-
145
- Examples:
146
- >>> engines = list_custom_distribution_engines()
147
- >>> for engine in engines:
148
- ... print(engine.__name__)
149
- """
150
- return DistributionEngineFactory.list_custom_engines()
151
-
152
-
153
- def _resolve_schema_path(schema_path: str) -> str:
154
- """
155
- Resolve schema path, checking both absolute and relative to config base path.
156
-
157
- Args:
158
- schema_path: Input schema path
159
-
160
- Returns:
161
- Resolved absolute path to schema file
162
-
163
- Raises:
164
- FileNotFoundError: If schema file cannot be found
165
- """
166
- # If it's an absolute path or exists as-is, use it directly
167
- if os.path.isabs(schema_path) or os.path.exists(schema_path):
168
- if os.path.exists(schema_path):
169
- return schema_path
170
- else:
171
- raise FileNotFoundError(f"Schema file not found: {schema_path}")
172
-
173
- # Try resolving relative to the configured base path
174
- resolved_path = config.resolve_schema_path(schema_path)
175
- if resolved_path.exists():
176
- return str(resolved_path)
177
-
178
- # Try current working directory
179
- cwd_path = Path.cwd() / schema_path
180
- if cwd_path.exists():
181
- return str(cwd_path)
182
-
183
- # File not found in any location
184
- raise FileNotFoundError(
185
- f"Schema file not found: {schema_path}. "
186
- f"Searched in: current directory, {config.get_schema_base_path()}"
187
- )
188
-
189
-
190
- def augment(df: Union[pd.DataFrame, pl.DataFrame],
191
- schema_path: str, **kwargs) -> Union[pd.DataFrame, pl.DataFrame]:
192
- """
193
- Augment existing DataFrame with synthetic columns.
194
-
195
- This function will be implemented in future phases to support data augmentation
196
- for class balancing and other use cases.
197
-
198
- Args:
199
- df: Input DataFrame to augment
200
- schema_path: Path to the .toml schema file
201
- **kwargs: Additional augmentation parameters
202
-
203
- Returns:
204
- Augmented DataFrame in the same format as input
205
-
206
- Raises:
207
- NotImplementedError: This feature is planned for future implementation
208
- """
209
- raise NotImplementedError("augment() function is planned for future implementation")
210
-
211
-
212
- # Export the config object and plugin functions for user access
213
- __all__ = [
214
- 'synth',
215
- 'augment',
216
- 'config',
217
- 'register_distribution_engine',
218
- 'unregister_distribution_engine',
219
- 'list_custom_distribution_engines'
220
- ]
@@ -1,314 +0,0 @@
1
- """
2
- Common Module Integration for Synthetic Data Generation
3
-
4
- Provides integration between the common module (lists, patterns, resolver)
5
- and the synthetic data generation system.
6
-
7
- This module:
8
- - Wraps common/resolver.py for synthetic-specific needs
9
- - Handles .list and .properties file loading
10
- - Implements prefer_mode logic
11
- - Provides pattern type detection
12
- - Maintains backward compatibility
13
- """
14
-
15
- from typing import Union, List, Optional, Dict, Any
16
- from pathlib import Path
17
- import logging
18
-
19
- from additory.common.resolver import (
20
- PatternResolver,
21
- resolve_pattern,
22
- PreferMode,
23
- PatternResolutionResult,
24
- )
25
- from additory.common.lists import load_list_file, ListFileError
26
- from additory.common.patterns import load_properties_file, is_regex_pattern, PatternFileError
27
-
28
- from .exceptions import PatternResolutionError, ValidationError
29
-
30
-
31
- logger = logging.getLogger(__name__)
32
-
33
-
34
- class SyntheticPatternLoader:
35
- """
36
- Pattern loader for synthetic data generation.
37
-
38
- Integrates with common module to provide:
39
- - .list file loading
40
- - .properties file loading
41
- - Fallback resolution
42
- - Pattern type detection
43
- - Prefer mode support
44
- """
45
-
46
- def __init__(self, base_path: str = "reference/schema_definitions"):
47
- """
48
- Initialize the pattern loader.
49
-
50
- Args:
51
- base_path: Base directory for pattern files
52
- """
53
- self.base_path = Path(base_path)
54
- self.resolver = PatternResolver(base_path=str(self.base_path))
55
-
56
- # Cache for loaded files
57
- self._list_cache: Dict[str, Dict[str, List[str]]] = {}
58
- self._properties_cache: Dict[str, Dict[str, str]] = {}
59
-
60
- def load_pattern(
61
- self,
62
- pattern_value: Union[str, List[str]],
63
- imports: List[str],
64
- prefer_mode: str = "default"
65
- ) -> tuple[Union[List[str], str], str]:
66
- """
67
- Load a pattern with fallback resolution.
68
-
69
- Args:
70
- pattern_value: Pattern value from TOML (string reference, array list, or inline regex)
71
- imports: List of imports from TOML (e.g., ["global", "finance"])
72
- prefer_mode: Resolution preference ("default", "list_only", "regex_only")
73
-
74
- Returns:
75
- Tuple of (resolved_value, pattern_type)
76
- - resolved_value: List of values (for lists) or regex string (for regex)
77
- - pattern_type: "list" or "regex"
78
-
79
- Raises:
80
- PatternResolutionError: If pattern cannot be resolved
81
-
82
- Example:
83
- >>> loader = SyntheticPatternLoader()
84
- >>>
85
- >>> # Reference (resolved via fallback)
86
- >>> value, type = loader.load_pattern("first_names", ["global"], "default")
87
- >>> # value = ['Arjun', 'Vikram', ...], type = 'list'
88
- >>>
89
- >>> # Inline list
90
- >>> value, type = loader.load_pattern(["Active", "Inactive"], ["global"], "default")
91
- >>> # value = ['Active', 'Inactive'], type = 'list'
92
- >>>
93
- >>> # Inline regex
94
- >>> value, type = loader.load_pattern("CUST\\d{8}", ["global"], "default")
95
- >>> # value = 'CUST\\d{8}', type = 'regex'
96
- """
97
- # Detect pattern type
98
- pattern_type = self._detect_pattern_type(pattern_value)
99
-
100
- if pattern_type == "inline_list":
101
- # Inline list (array)
102
- logger.info(f"Using inline list with {len(pattern_value)} values")
103
- return (pattern_value, "list")
104
-
105
- elif pattern_type == "inline_regex":
106
- # Inline regex (string with special chars)
107
- logger.info(f"Using inline regex: {pattern_value}")
108
- return (pattern_value, "regex")
109
-
110
- elif pattern_type == "reference":
111
- # Reference (resolve via fallback)
112
- logger.info(f"Resolving reference: {pattern_value}")
113
- return self._resolve_reference(pattern_value, imports, prefer_mode)
114
-
115
- else:
116
- raise PatternResolutionError(
117
- f"Unknown pattern type for value: {pattern_value}"
118
- )
119
-
120
- def _detect_pattern_type(self, pattern_value: Union[str, List[str]]) -> str:
121
- """
122
- Detect pattern type from value.
123
-
124
- Args:
125
- pattern_value: Pattern value from TOML
126
-
127
- Returns:
128
- Pattern type: "inline_list", "inline_regex", or "reference"
129
- """
130
- if isinstance(pattern_value, list):
131
- # Array = inline list
132
- return "inline_list"
133
-
134
- elif isinstance(pattern_value, str):
135
- # String: check if it's regex or reference
136
- if is_regex_pattern(pattern_value):
137
- # Has special regex chars = inline regex
138
- return "inline_regex"
139
- else:
140
- # Simple string = reference
141
- return "reference"
142
-
143
- else:
144
- raise ValidationError(
145
- f"Invalid pattern value type: {type(pattern_value)}. "
146
- f"Expected string or list."
147
- )
148
-
149
- def _resolve_reference(
150
- self,
151
- pattern_name: str,
152
- imports: List[str],
153
- prefer_mode: str
154
- ) -> tuple[Union[List[str], str], str]:
155
- """
156
- Resolve a pattern reference using fallback resolution.
157
-
158
- Args:
159
- pattern_name: Name of pattern to resolve
160
- imports: List of imports
161
- prefer_mode: Resolution preference
162
-
163
- Returns:
164
- Tuple of (resolved_value, pattern_type)
165
-
166
- Raises:
167
- PatternResolutionError: If pattern cannot be resolved
168
- """
169
- # Convert prefer_mode string to enum
170
- try:
171
- mode = PreferMode(prefer_mode)
172
- except ValueError:
173
- logger.warning(f"Invalid prefer_mode '{prefer_mode}', using DEFAULT")
174
- mode = PreferMode.DEFAULT
175
-
176
- # Resolve using common module
177
- result = self.resolver.resolve(pattern_name, imports, mode)
178
-
179
- if not result.found:
180
- raise PatternResolutionError(
181
- f"Pattern '{pattern_name}' not found. {result.error_message}",
182
- pattern_name,
183
- imports
184
- )
185
-
186
- # Log resolution
187
- logger.info(
188
- f"Resolved '{pattern_name}' from {result.source} "
189
- f"(type: {result.pattern_type}, fallback: {result.fallback_used})"
190
- )
191
-
192
- return (result.value, result.pattern_type)
193
-
194
- def validate_imports(self, imports: List[str]) -> tuple[bool, List[str]]:
195
- """
196
- Validate that import files exist.
197
-
198
- Args:
199
- imports: List of import names (e.g., ["global", "finance"])
200
-
201
- Returns:
202
- Tuple of (is_valid, error_messages)
203
- """
204
- errors = []
205
-
206
- for import_name in imports:
207
- # Check for .list file
208
- list_file = self.base_path / f"{import_name}.list"
209
- properties_file = self.base_path / f"{import_name}.properties"
210
-
211
- # At least one should exist
212
- if not list_file.exists() and not properties_file.exists():
213
- errors.append(
214
- f"Import '{import_name}' not found. "
215
- f"Neither {import_name}.list nor {import_name}.properties exists."
216
- )
217
-
218
- return (len(errors) == 0, errors)
219
-
220
- def get_available_patterns(self, imports: List[str]) -> Dict[str, str]:
221
- """
222
- Get all available patterns from imports.
223
-
224
- Args:
225
- imports: List of import names
226
-
227
- Returns:
228
- Dictionary mapping pattern names to sources
229
- """
230
- available = {}
231
-
232
- for import_name in imports:
233
- # Load .list file if exists
234
- list_file = self.base_path / f"{import_name}.list"
235
- if list_file.exists():
236
- try:
237
- lists = load_list_file(str(list_file))
238
- for list_name in lists.keys():
239
- available[list_name] = f"{import_name}.list"
240
- except ListFileError as e:
241
- logger.warning(f"Failed to load {list_file}: {e}")
242
-
243
- # Load .properties file if exists
244
- properties_file = self.base_path / f"{import_name}.properties"
245
- if properties_file.exists():
246
- try:
247
- patterns = load_properties_file(str(properties_file))
248
- for pattern_name in patterns.keys():
249
- # Don't overwrite if already in list
250
- if pattern_name not in available:
251
- available[pattern_name] = f"{import_name}.properties"
252
- except PatternFileError as e:
253
- logger.warning(f"Failed to load {properties_file}: {e}")
254
-
255
- return available
256
-
257
- def clear_cache(self):
258
- """Clear cached files (useful for testing)."""
259
- self._list_cache.clear()
260
- self._properties_cache.clear()
261
- self.resolver.clear_cache()
262
-
263
-
264
- def detect_pattern_type_from_toml(pattern_value: Any) -> str:
265
- """
266
- Detect pattern type from TOML value.
267
-
268
- This is a convenience function for use in schema parsing.
269
-
270
- Args:
271
- pattern_value: Value from TOML file
272
-
273
- Returns:
274
- Pattern type: "inline_list", "inline_regex", or "reference"
275
-
276
- Example:
277
- >>> detect_pattern_type_from_toml(["Active", "Inactive"])
278
- 'inline_list'
279
- >>> detect_pattern_type_from_toml("CUST\\d{8}")
280
- 'inline_regex'
281
- >>> detect_pattern_type_from_toml("first_names")
282
- 'reference'
283
- """
284
- if isinstance(pattern_value, list):
285
- return "inline_list"
286
- elif isinstance(pattern_value, str):
287
- if is_regex_pattern(pattern_value):
288
- return "inline_regex"
289
- else:
290
- return "reference"
291
- else:
292
- return "unknown"
293
-
294
-
295
- def convert_prefer_mode(mode_str: str) -> PreferMode:
296
- """
297
- Convert prefer_mode string to enum.
298
-
299
- Args:
300
- mode_str: Mode string ("default", "list_only", "regex_only")
301
-
302
- Returns:
303
- PreferMode enum value
304
-
305
- Raises:
306
- ValueError: If mode string is invalid
307
- """
308
- try:
309
- return PreferMode(mode_str)
310
- except ValueError:
311
- raise ValueError(
312
- f"Invalid prefer_mode '{mode_str}'. "
313
- f"Valid values: default, list_only, regex_only"
314
- )