additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -177
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -352
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/deduce.py +0 -259
  100. additory/synthetic/distributions.py +0 -22
  101. additory/synthetic/forecast.py +0 -1132
  102. additory/synthetic/linked_list_parser.py +0 -415
  103. additory/synthetic/namespace_lookup.py +0 -129
  104. additory/synthetic/smote.py +0 -320
  105. additory/synthetic/strategies.py +0 -926
  106. additory/synthetic/synthesizer.py +0 -713
  107. additory/utilities/__init__.py +0 -53
  108. additory/utilities/encoding.py +0 -600
  109. additory/utilities/games.py +0 -300
  110. additory/utilities/keys.py +0 -8
  111. additory/utilities/lookup.py +0 -103
  112. additory/utilities/matchers.py +0 -216
  113. additory/utilities/resolvers.py +0 -286
  114. additory/utilities/settings.py +0 -167
  115. additory/utilities/units.py +0 -749
  116. additory/utilities/validators.py +0 -153
  117. additory-0.1.0a4.dist-info/METADATA +0 -311
  118. additory-0.1.0a4.dist-info/RECORD +0 -72
  119. additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
  120. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  121. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -1,473 +0,0 @@
1
- # additory/core/enhanced_matchers.py
2
-
3
- """
4
- Enhanced Match Parameters System for Intelligent String Matching
5
-
6
- This module provides the enhanced matching system for add.to() with case-insensitive
7
- defaults and comprehensive matching strategies. It builds upon the existing matchers
8
- but provides a more user-friendly interface with intelligent defaults.
9
-
10
- New Match Parameter Design:
11
- - "exact": Case-insensitive exact match (DEFAULT) - maps to "iexact"
12
- - "exact_case": Case-sensitive exact match - maps to "exact"
13
- - "contains": Case-insensitive substring matching - maps to "icontains"
14
- - "contains_case": Case-sensitive substring matching - maps to "contains"
15
- - "startswith": Case-insensitive prefix matching - maps to "ibeginswith"
16
- - "startswith_case": Case-sensitive prefix matching - maps to "beginswith"
17
- - "endswith": Case-insensitive suffix matching - maps to "iendswith"
18
- - "endswith_case": Case-sensitive suffix matching - maps to "endswith"
19
- - "regex": Regular expression matching (case-sensitive by default)
20
- - "range": Numeric range matching
21
- - "fuzzy": Fuzzy string matching with configurable threshold
22
-
23
- Design Philosophy:
24
- - Case-insensitive by default for real-world messy data
25
- - Explicit "_case" suffix when case sensitivity is needed
26
- - Backward compatibility with existing matchers
27
- - Enhanced fuzzy matching with configurable parameters
28
- - Comprehensive validation and helpful error messages
29
- """
30
-
31
- import logging
32
- import re
33
- from typing import List, Dict, Any, Tuple, Optional, Union
34
- from dataclasses import dataclass
35
-
36
- # Import existing matchers
37
- from ..ops.matchers import (
38
- match_exact, match_iexact, match_contains, match_icontains,
39
- match_beginswith, match_ibeginswith, match_endswith, match_iendswith,
40
- match_regex, match_numeric_range, match_fuzzy,
41
- _safe_contains, _safe_startswith, _safe_endswith, _calculate_similarity
42
- )
43
-
44
- logger = logging.getLogger(__name__)
45
-
46
-
47
- @dataclass
48
- class MatchConfig:
49
- """Configuration for enhanced matching operations"""
50
- strategy: str
51
- case_sensitive: bool
52
- fuzzy_threshold: float = 0.8
53
- regex_flags: int = 0
54
- numeric_tolerance: float = 0.0
55
- description: str = ""
56
- examples: List[str] = None
57
-
58
-
59
- class EnhancedMatcherSystem:
60
- """
61
- Enhanced matching system with case-insensitive defaults and comprehensive strategies
62
- """
63
-
64
- def __init__(self):
65
- self._match_configs = self._initialize_match_configs()
66
- self._match_stats = {
67
- 'total_matches': 0,
68
- 'exact_matches': 0,
69
- 'case_insensitive_matches': 0,
70
- 'case_sensitive_matches': 0,
71
- 'fuzzy_matches': 0,
72
- 'regex_matches': 0,
73
- 'contains_matches': 0,
74
- 'prefix_suffix_matches': 0,
75
- 'range_matches': 0
76
- }
77
-
78
- def get_matcher_function(self, match_strategy: str) -> callable:
79
- """
80
- Get the appropriate matcher function for the given strategy
81
-
82
- Args:
83
- match_strategy: Enhanced match strategy name
84
-
85
- Returns:
86
- Callable matcher function
87
- """
88
-
89
- # Validate strategy
90
- if match_strategy not in self._match_configs:
91
- available = list(self._match_configs.keys())
92
- raise ValueError(f"Unknown match strategy: '{match_strategy}'. Available: {available}")
93
-
94
- config = self._match_configs[match_strategy]
95
-
96
- # Map enhanced strategies to existing matcher functions
97
- strategy_mapping = {
98
- "exact": match_iexact, # Case-insensitive by default
99
- "exact_case": match_exact, # Case-sensitive when explicit
100
- "contains": match_icontains, # Case-insensitive by default
101
- "contains_case": match_contains, # Case-sensitive when explicit
102
- "startswith": match_ibeginswith, # Case-insensitive by default
103
- "startswith_case": match_beginswith, # Case-sensitive when explicit
104
- "endswith": match_iendswith, # Case-insensitive by default
105
- "endswith_case": match_endswith, # Case-sensitive when explicit
106
- "regex": match_regex, # Case-sensitive (standard regex behavior)
107
- "range": match_numeric_range, # Numeric range matching
108
- "fuzzy": self._create_fuzzy_matcher(config.fuzzy_threshold)
109
- }
110
-
111
- matcher_func = strategy_mapping.get(match_strategy)
112
- if matcher_func is None:
113
- raise ValueError(f"No matcher implementation for strategy: '{match_strategy}'")
114
-
115
- # Wrap the matcher to collect statistics
116
- return self._wrap_matcher_with_stats(matcher_func, match_strategy)
117
-
118
- def _create_fuzzy_matcher(self, threshold: float = 0.8) -> callable:
119
- """Create a fuzzy matcher with configurable threshold"""
120
-
121
- def fuzzy_matcher_with_threshold(key, lookup):
122
- """Fuzzy matching with configurable threshold"""
123
- matches = []
124
-
125
- for k, rows in lookup.items():
126
- similarity = _calculate_similarity(key, k)
127
- if similarity >= threshold:
128
- matches.extend(rows)
129
-
130
- return matches
131
-
132
- return fuzzy_matcher_with_threshold
133
-
134
- def _wrap_matcher_with_stats(self, matcher_func: callable, strategy: str) -> callable:
135
- """Wrap matcher function to collect statistics"""
136
-
137
- def wrapped_matcher(key, lookup):
138
- """Wrapped matcher that collects statistics"""
139
- matches = matcher_func(key, lookup)
140
-
141
- # Update statistics
142
- self._match_stats['total_matches'] += 1
143
-
144
- if strategy in ['exact', 'contains', 'startswith', 'endswith']:
145
- self._match_stats['case_insensitive_matches'] += 1
146
- elif strategy in ['exact_case', 'contains_case', 'startswith_case', 'endswith_case']:
147
- self._match_stats['case_sensitive_matches'] += 1
148
- elif strategy == 'fuzzy':
149
- self._match_stats['fuzzy_matches'] += 1
150
- elif strategy == 'regex':
151
- self._match_stats['regex_matches'] += 1
152
- elif strategy == 'range':
153
- self._match_stats['range_matches'] += 1
154
-
155
- if strategy in ['exact', 'exact_case']:
156
- self._match_stats['exact_matches'] += 1
157
- elif strategy in ['contains', 'contains_case']:
158
- self._match_stats['contains_matches'] += 1
159
- elif strategy in ['startswith', 'startswith_case', 'endswith', 'endswith_case']:
160
- self._match_stats['prefix_suffix_matches'] += 1
161
-
162
- return matches
163
-
164
- return wrapped_matcher
165
-
166
- def validate_match_strategy(self, strategy: str) -> Dict[str, Any]:
167
- """
168
- Validate match strategy and provide helpful information
169
-
170
- Returns:
171
- Dict with validation results, description, and examples
172
- """
173
-
174
- result = {
175
- 'valid': strategy in self._match_configs,
176
- 'strategy': strategy,
177
- 'config': None,
178
- 'suggestions': [],
179
- 'similar_strategies': []
180
- }
181
-
182
- if result['valid']:
183
- config = self._match_configs[strategy]
184
- result['config'] = {
185
- 'description': config.description,
186
- 'case_sensitive': config.case_sensitive,
187
- 'examples': config.examples or [],
188
- 'fuzzy_threshold': config.fuzzy_threshold if strategy == 'fuzzy' else None
189
- }
190
- else:
191
- # Provide suggestions for invalid strategies
192
- result['suggestions'] = [
193
- "Valid match strategies:",
194
- " Case-insensitive (default):",
195
- " - 'exact': Exact match, ignoring case",
196
- " - 'contains': Substring match, ignoring case",
197
- " - 'startswith': Prefix match, ignoring case",
198
- " - 'endswith': Suffix match, ignoring case",
199
- " Case-sensitive (explicit):",
200
- " - 'exact_case': Exact match, case-sensitive",
201
- " - 'contains_case': Substring match, case-sensitive",
202
- " - 'startswith_case': Prefix match, case-sensitive",
203
- " - 'endswith_case': Suffix match, case-sensitive",
204
- " Advanced:",
205
- " - 'fuzzy': Fuzzy string matching",
206
- " - 'regex': Regular expression matching",
207
- " - 'range': Numeric range matching"
208
- ]
209
-
210
- # Find similar strategies
211
- available_strategies = list(self._match_configs.keys())
212
- result['similar_strategies'] = [
213
- s for s in available_strategies
214
- if strategy.lower() in s.lower() or s.lower() in strategy.lower()
215
- ][:3] # Top 3 similar
216
-
217
- return result
218
-
219
- def get_match_examples(self, strategy: str) -> List[Dict[str, Any]]:
220
- """Get practical examples for a match strategy"""
221
-
222
- examples = {
223
- "exact": [
224
- {"target": "Apple", "reference": "APPLE", "matches": True, "reason": "Case-insensitive exact match"},
225
- {"target": "Apple", "reference": "Orange", "matches": False, "reason": "Different values"},
226
- {"target": "Apple Inc", "reference": "apple inc", "matches": True, "reason": "Case-insensitive exact match"}
227
- ],
228
- "exact_case": [
229
- {"target": "Apple", "reference": "APPLE", "matches": False, "reason": "Case-sensitive, different case"},
230
- {"target": "Apple", "reference": "Apple", "matches": True, "reason": "Exact case match"},
231
- {"target": "Apple", "reference": "apple", "matches": False, "reason": "Case-sensitive, different case"}
232
- ],
233
- "contains": [
234
- {"target": "laptop", "reference": "Gaming Laptop Pro", "matches": True, "reason": "Contains 'laptop' (case-insensitive)"},
235
- {"target": "MOUSE", "reference": "wireless mouse", "matches": True, "reason": "Contains 'mouse' (case-insensitive)"},
236
- {"target": "keyboard", "reference": "Monitor", "matches": False, "reason": "Does not contain 'keyboard'"}
237
- ],
238
- "contains_case": [
239
- {"target": "Laptop", "reference": "Gaming Laptop Pro", "matches": True, "reason": "Contains 'Laptop' (exact case)"},
240
- {"target": "laptop", "reference": "Gaming Laptop Pro", "matches": False, "reason": "Case-sensitive, different case"},
241
- {"target": "MOUSE", "reference": "wireless mouse", "matches": False, "reason": "Case-sensitive, different case"}
242
- ],
243
- "startswith": [
244
- {"target": "tech", "reference": "TechCorp Inc", "matches": True, "reason": "Starts with 'tech' (case-insensitive)"},
245
- {"target": "PROD", "reference": "product-001", "matches": True, "reason": "Starts with 'prod' (case-insensitive)"},
246
- {"target": "sales", "reference": "Marketing Dept", "matches": False, "reason": "Does not start with 'sales'"}
247
- ],
248
- "fuzzy": [
249
- {"target": "John Smith", "reference": "Jon Smith", "matches": True, "reason": "High similarity (typo tolerance)"},
250
- {"target": "TechCorp", "reference": "Tech Corp", "matches": True, "reason": "High similarity (spacing difference)"},
251
- {"target": "Apple", "reference": "Orange", "matches": False, "reason": "Low similarity, different words"}
252
- ],
253
- "regex": [
254
- {"target": r"P\d{3}", "reference": "P001", "matches": True, "reason": "Matches pattern P + 3 digits"},
255
- {"target": r"^[A-Z]{2}\d{4}$", "reference": "AB1234", "matches": True, "reason": "Matches 2 letters + 4 digits"},
256
- {"target": r"\d+", "reference": "Product123", "matches": True, "reason": "Contains digits"}
257
- ]
258
- }
259
-
260
- return examples.get(strategy, [])
261
-
262
- def _initialize_match_configs(self) -> Dict[str, MatchConfig]:
263
- """Initialize match strategy configurations"""
264
-
265
- return {
266
- "exact": MatchConfig(
267
- strategy="exact",
268
- case_sensitive=False,
269
- description="Case-insensitive exact match (default)",
270
- examples=["'Apple' matches 'APPLE', 'apple', 'Apple'"]
271
- ),
272
- "exact_case": MatchConfig(
273
- strategy="exact_case",
274
- case_sensitive=True,
275
- description="Case-sensitive exact match",
276
- examples=["'Apple' matches only 'Apple', not 'APPLE' or 'apple'"]
277
- ),
278
- "contains": MatchConfig(
279
- strategy="contains",
280
- case_sensitive=False,
281
- description="Case-insensitive substring matching",
282
- examples=["'laptop' matches 'Gaming Laptop Pro', 'LAPTOP-001'"]
283
- ),
284
- "contains_case": MatchConfig(
285
- strategy="contains_case",
286
- case_sensitive=True,
287
- description="Case-sensitive substring matching",
288
- examples=["'Laptop' matches 'Gaming Laptop Pro' but not 'gaming laptop pro'"]
289
- ),
290
- "startswith": MatchConfig(
291
- strategy="startswith",
292
- case_sensitive=False,
293
- description="Case-insensitive prefix matching",
294
- examples=["'tech' matches 'TechCorp', 'TECHNOLOGY', 'tech-support'"]
295
- ),
296
- "startswith_case": MatchConfig(
297
- strategy="startswith_case",
298
- case_sensitive=True,
299
- description="Case-sensitive prefix matching",
300
- examples=["'Tech' matches 'TechCorp' but not 'technology'"]
301
- ),
302
- "endswith": MatchConfig(
303
- strategy="endswith",
304
- case_sensitive=False,
305
- description="Case-insensitive suffix matching",
306
- examples=["'corp' matches 'TechCorp', 'RETAILCORP', 'my-corp'"]
307
- ),
308
- "endswith_case": MatchConfig(
309
- strategy="endswith_case",
310
- case_sensitive=True,
311
- description="Case-sensitive suffix matching",
312
- examples=["'Corp' matches 'TechCorp' but not 'techcorp'"]
313
- ),
314
- "fuzzy": MatchConfig(
315
- strategy="fuzzy",
316
- case_sensitive=False,
317
- fuzzy_threshold=0.8,
318
- description="Fuzzy string matching with similarity threshold",
319
- examples=["'John Smith' matches 'Jon Smith', 'John Smyth' (typo tolerance)"]
320
- ),
321
- "regex": MatchConfig(
322
- strategy="regex",
323
- case_sensitive=True,
324
- description="Regular expression pattern matching",
325
- examples=["r'P\\d{3}' matches 'P001', 'P123' (product codes)"]
326
- ),
327
- "range": MatchConfig(
328
- strategy="range",
329
- case_sensitive=False,
330
- description="Numeric range matching",
331
- examples=["(10, 50) matches values between 10 and 50 inclusive"]
332
- )
333
- }
334
-
335
- def get_strategy_recommendations(self, use_case: str) -> List[str]:
336
- """Get strategy recommendations based on use case"""
337
-
338
- recommendations = {
339
- "messy_data": ["exact", "fuzzy", "contains"],
340
- "clean_data": ["exact_case", "exact", "contains_case"],
341
- "product_codes": ["exact", "regex", "startswith"],
342
- "names": ["fuzzy", "exact", "contains"],
343
- "categories": ["exact", "contains", "startswith"],
344
- "ids": ["exact_case", "exact", "regex"],
345
- "text_search": ["contains", "fuzzy", "startswith"],
346
- "strict_matching": ["exact_case", "regex"],
347
- "flexible_matching": ["fuzzy", "contains", "exact"]
348
- }
349
-
350
- return recommendations.get(use_case.lower(), ["exact", "contains", "fuzzy"])
351
-
352
- def get_stats(self) -> Dict[str, Any]:
353
- """Get matching statistics"""
354
- return self._match_stats.copy()
355
-
356
- def reset_stats(self):
357
- """Reset matching statistics"""
358
- self._match_stats = {
359
- 'total_matches': 0,
360
- 'exact_matches': 0,
361
- 'case_insensitive_matches': 0,
362
- 'case_sensitive_matches': 0,
363
- 'fuzzy_matches': 0,
364
- 'regex_matches': 0,
365
- 'contains_matches': 0,
366
- 'prefix_suffix_matches': 0,
367
- 'range_matches': 0
368
- }
369
-
370
-
371
- # Global enhanced matcher system
372
- _enhanced_matcher = EnhancedMatcherSystem()
373
-
374
-
375
- # Convenience functions
376
- def get_enhanced_matcher(strategy: str) -> callable:
377
- """Get enhanced matcher function for strategy"""
378
- return _enhanced_matcher.get_matcher_function(strategy)
379
-
380
-
381
- def validate_enhanced_match_strategy(strategy: str) -> Dict[str, Any]:
382
- """Validate enhanced match strategy"""
383
- return _enhanced_matcher.validate_match_strategy(strategy)
384
-
385
-
386
- def get_enhanced_match_examples(strategy: str) -> List[Dict[str, Any]]:
387
- """Get examples for enhanced match strategy"""
388
- return _enhanced_matcher.get_match_examples(strategy)
389
-
390
-
391
- def get_strategy_recommendations(use_case: str) -> List[str]:
392
- """Get strategy recommendations for use case"""
393
- return _enhanced_matcher.get_strategy_recommendations(use_case)
394
-
395
-
396
- def get_enhanced_match_stats() -> Dict[str, Any]:
397
- """Get enhanced matching statistics"""
398
- return _enhanced_matcher.get_stats()
399
-
400
-
401
- # Enhanced matcher mapping for backward compatibility
402
- ENHANCED_MATCHERS = {
403
- # Case-insensitive defaults (new behavior)
404
- "exact": "iexact", # Maps to existing case-insensitive matcher
405
- "contains": "icontains", # Maps to existing case-insensitive matcher
406
- "startswith": "ibeginswith", # Maps to existing case-insensitive matcher
407
- "endswith": "iendswith", # Maps to existing case-insensitive matcher
408
-
409
- # Case-sensitive explicit (when needed)
410
- "exact_case": "exact", # Maps to existing case-sensitive matcher
411
- "contains_case": "contains", # Maps to existing case-sensitive matcher
412
- "startswith_case": "beginswith", # Maps to existing case-sensitive matcher
413
- "endswith_case": "endswith", # Maps to existing case-sensitive matcher
414
-
415
- # Advanced matching (unchanged)
416
- "fuzzy": "fuzzy",
417
- "regex": "regex",
418
- "range": "range"
419
- }
420
-
421
-
422
- def map_enhanced_to_legacy_strategy(enhanced_strategy: str) -> str:
423
- """Map enhanced strategy name to legacy matcher name"""
424
- return ENHANCED_MATCHERS.get(enhanced_strategy, enhanced_strategy)
425
-
426
-
427
- # Demonstration and validation
428
- def demonstrate_enhanced_matching():
429
- """Demonstrate enhanced matching capabilities"""
430
-
431
- print("Enhanced Match Parameters Demonstration")
432
- print("=" * 50)
433
-
434
- # Test data
435
- test_cases = [
436
- ("exact", "Apple", ["APPLE", "apple", "Apple Inc"]),
437
- ("exact_case", "Apple", ["APPLE", "apple", "Apple"]),
438
- ("contains", "laptop", ["Gaming Laptop", "LAPTOP-001", "My Laptop"]),
439
- ("startswith", "tech", ["TechCorp", "TECHNOLOGY", "tech-support"]),
440
- ("fuzzy", "John Smith", ["Jon Smith", "John Smyth", "Jane Doe"])
441
- ]
442
-
443
- for strategy, target, candidates in test_cases:
444
- print(f"\nStrategy: {strategy}")
445
- print(f"Target: '{target}'")
446
- print("Candidates:")
447
-
448
- # Get matcher function
449
- try:
450
- matcher = get_enhanced_matcher(strategy)
451
-
452
- # Create simple lookup structure
453
- lookup = {(candidate,): [i] for i, candidate in enumerate(candidates)}
454
-
455
- # Test matching
456
- matches = matcher((target,), lookup)
457
-
458
- for i, candidate in enumerate(candidates):
459
- match_status = "✅" if i in matches else "❌"
460
- print(f" {match_status} '{candidate}'")
461
-
462
- except Exception as e:
463
- print(f" Error: {e}")
464
-
465
- # Show statistics
466
- stats = get_enhanced_match_stats()
467
- print(f"\nMatching Statistics:")
468
- for key, value in stats.items():
469
- print(f" {key}: {value}")
470
-
471
-
472
- if __name__ == "__main__":
473
- demonstrate_enhanced_matching()