additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -177
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -352
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/deduce.py +0 -259
  100. additory/synthetic/distributions.py +0 -22
  101. additory/synthetic/forecast.py +0 -1132
  102. additory/synthetic/linked_list_parser.py +0 -415
  103. additory/synthetic/namespace_lookup.py +0 -129
  104. additory/synthetic/smote.py +0 -320
  105. additory/synthetic/strategies.py +0 -926
  106. additory/synthetic/synthesizer.py +0 -713
  107. additory/utilities/__init__.py +0 -53
  108. additory/utilities/encoding.py +0 -600
  109. additory/utilities/games.py +0 -300
  110. additory/utilities/keys.py +0 -8
  111. additory/utilities/lookup.py +0 -103
  112. additory/utilities/matchers.py +0 -216
  113. additory/utilities/resolvers.py +0 -286
  114. additory/utilities/settings.py +0 -167
  115. additory/utilities/units.py +0 -749
  116. additory/utilities/validators.py +0 -153
  117. additory-0.1.0a4.dist-info/METADATA +0 -311
  118. additory-0.1.0a4.dist-info/RECORD +0 -72
  119. additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
  120. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  121. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -1,300 +0,0 @@
1
- """
2
- Easter Egg Games Module
3
-
4
- Hidden games for the curious. Not documented in main API docs.
5
- Reinforces row-column thinking - critical for DataFrame operations.
6
-
7
- Inspired by: Chrome dinosaur game, Python's antigravity, apt-get moo
8
- """
9
-
10
- import random
11
-
12
-
13
- def print_board(board):
14
- """Print a 3x3 tic-tac-toe board"""
15
- print("\n")
16
- for i in range(3):
17
- row = " | ".join(board[i])
18
- print(" " + row)
19
- if i < 2:
20
- print("---+---+---")
21
- print("\n")
22
-
23
-
24
- def check_winner(board, player):
25
- """Check if a player has won"""
26
- win_states = [
27
- [(0,0),(0,1),(0,2)], # Row 1
28
- [(1,0),(1,1),(1,2)], # Row 2
29
- [(2,0),(2,1),(2,2)], # Row 3
30
- [(0,0),(1,0),(2,0)], # Col 1
31
- [(0,1),(1,1),(2,1)], # Col 2
32
- [(0,2),(1,2),(2,2)], # Col 3
33
- [(0,0),(1,1),(2,2)], # Diagonal \
34
- [(0,2),(1,1),(2,0)] # Diagonal /
35
- ]
36
- return any(all(board[r][c] == player for r, c in combo) for combo in win_states)
37
-
38
-
39
- def is_full(board):
40
- """Check if board is full"""
41
- return all(board[r][c] != " " for r in range(3) for c in range(3))
42
-
43
-
44
- def get_empty_cells(board):
45
- """Get list of empty cells"""
46
- return [(r, c) for r in range(3) for c in range(3) if board[r][c] == " "]
47
-
48
-
49
- def computer_move(board):
50
- """AI move for tic-tac-toe"""
51
- # 1. Try to win
52
- for r, c in get_empty_cells(board):
53
- board[r][c] = "O"
54
- if check_winner(board, "O"):
55
- return
56
- board[r][c] = " "
57
-
58
- # 2. Try to block user
59
- for r, c in get_empty_cells(board):
60
- board[r][c] = "X"
61
- if check_winner(board, "X"):
62
- board[r][c] = "O"
63
- return
64
- board[r][c] = " "
65
-
66
- # 3. Otherwise pick random
67
- r, c = random.choice(get_empty_cells(board))
68
- board[r][c] = "O"
69
-
70
-
71
- def tictactoe():
72
- """
73
- Play Tic-Tac-Toe against the computer.
74
-
75
- Reinforces row-column thinking - enter moves as "row col" (e.g., "2 3")
76
- Just like DataFrame indexing: df.iloc[row, col]
77
- """
78
- board = [[" " for _ in range(3)] for _ in range(3)]
79
-
80
- print("=" * 50)
81
- print("Welcome to Tic Tac Toe!")
82
- print("=" * 50)
83
- print("You are X. Computer is O.")
84
- print("Enter moves as: row col (e.g., '2 3' for row 2, column 3)")
85
- print("Rows and columns are numbered 1-3")
86
- print("Think of it like DataFrame indexing: df.iloc[row, col]")
87
- print("=" * 50)
88
-
89
- print_board(board)
90
-
91
- while True:
92
- # USER MOVE
93
- try:
94
- move = input("Your move (row col): ")
95
- r, c = map(int, move.split())
96
- r -= 1 # Convert to 0-indexed
97
- c -= 1
98
-
99
- if r not in range(3) or c not in range(3):
100
- print("Invalid position. Choose row/col between 1 and 3.")
101
- continue
102
-
103
- if board[r][c] != " ":
104
- print("That spot is already taken. Try again.")
105
- continue
106
-
107
- board[r][c] = "X"
108
- print_board(board)
109
-
110
- if check_winner(board, "X"):
111
- print("🎉 You win!")
112
- break
113
-
114
- if is_full(board):
115
- print("It's a draw!")
116
- break
117
-
118
- # COMPUTER MOVE
119
- print("Computer is thinking...")
120
- computer_move(board)
121
- print_board(board)
122
-
123
- if check_winner(board, "O"):
124
- print("💻 Computer wins!")
125
- break
126
-
127
- if is_full(board):
128
- print("It's a draw!")
129
- break
130
-
131
- except ValueError:
132
- print("Invalid input. Enter row and column like: 2 3")
133
- except KeyboardInterrupt:
134
- print("\n\nGame interrupted. Thanks for playing!")
135
- break
136
-
137
-
138
- # A simple valid completed Sudoku board
139
- BASE_BOARD = [
140
- [5,3,4,6,7,8,9,1,2],
141
- [6,7,2,1,9,5,3,4,8],
142
- [1,9,8,3,4,2,5,6,7],
143
- [8,5,9,7,6,1,4,2,3],
144
- [4,2,6,8,5,3,7,9,1],
145
- [7,1,3,9,2,4,8,5,6],
146
- [9,6,1,5,3,7,2,8,4],
147
- [2,8,7,4,1,9,6,3,5],
148
- [3,4,5,2,8,6,1,7,9]
149
- ]
150
-
151
-
152
- def remove_numbers(board, holes=40):
153
- """Remove numbers from a completed Sudoku board to create a puzzle"""
154
- puzzle = [row[:] for row in board]
155
- removed = 0
156
- while removed < holes:
157
- r = random.randint(0, 8)
158
- c = random.randint(0, 8)
159
- if puzzle[r][c] != 0:
160
- puzzle[r][c] = 0
161
- removed += 1
162
- return puzzle
163
-
164
-
165
- def print_sudoku_board(board):
166
- """Print a Sudoku board with nice formatting"""
167
- print("\nSudoku Board:")
168
- for i, row in enumerate(board):
169
- if i % 3 == 0 and i != 0:
170
- print("------+-------+------")
171
- row_str = ""
172
- for j, val in enumerate(row):
173
- if j % 3 == 0 and j != 0:
174
- row_str += "| "
175
- row_str += (str(val) if val != 0 else ".") + " "
176
- print(row_str)
177
- print()
178
-
179
-
180
- def is_valid_sudoku(board, r, c, num):
181
- """Check if placing num at (r, c) is valid"""
182
- # Check row
183
- if num in board[r]:
184
- return False
185
-
186
- # Check column
187
- for i in range(9):
188
- if board[i][c] == num:
189
- return False
190
-
191
- # Check 3x3 box
192
- br = (r // 3) * 3
193
- bc = (c // 3) * 3
194
- for i in range(br, br + 3):
195
- for j in range(bc, bc + 3):
196
- if board[i][j] == num:
197
- return False
198
-
199
- return True
200
-
201
-
202
- def is_solved(board):
203
- """Check if Sudoku is completely solved"""
204
- return all(all(cell != 0 for cell in row) for row in board)
205
-
206
-
207
- def sudoku():
208
- """
209
- Play Sudoku!
210
-
211
- Reinforces row-column thinking - enter moves as "row col number" (e.g., "3 4 9")
212
- Just like DataFrame operations: df.iloc[row, col] = value
213
- """
214
- print("=" * 50)
215
- print("Welcome to Sudoku!")
216
- print("=" * 50)
217
- print("Enter moves as: row col number (e.g., '3 4 9')")
218
- print("Rows and columns are numbered 1-9")
219
- print("Think of it like DataFrame assignment: df.iloc[row, col] = value")
220
- print("Type 'abort' or 'exit' to quit.")
221
- print("=" * 50)
222
-
223
- solution = BASE_BOARD
224
- puzzle = remove_numbers(solution, holes=45)
225
- board = [row[:] for row in puzzle]
226
-
227
- print_sudoku_board(board)
228
-
229
- while True:
230
- move = input("Your move: ").strip().lower()
231
-
232
- if move in ("abort", "exit"):
233
- print("Game ended by user.")
234
- break
235
-
236
- try:
237
- r, c, num = map(int, move.split())
238
- r -= 1 # Convert to 0-indexed
239
- c -= 1
240
-
241
- if not (0 <= r < 9 and 0 <= c < 9):
242
- print("Invalid position. Row/col must be 1–9.")
243
- continue
244
-
245
- if not (1 <= num <= 9):
246
- print("Number must be between 1 and 9.")
247
- continue
248
-
249
- if puzzle[r][c] != 0:
250
- print("This cell is fixed and cannot be changed.")
251
- continue
252
-
253
- if not is_valid_sudoku(board, r, c, num):
254
- print("Invalid move. Violates Sudoku rules.")
255
- continue
256
-
257
- board[r][c] = num
258
- print_sudoku_board(board)
259
-
260
- if is_solved(board):
261
- print("🎉 Congratulations! You solved the Sudoku.")
262
- break
263
-
264
- except ValueError:
265
- print("Invalid input. Use: row col number (e.g., 2 5 7)")
266
- except KeyboardInterrupt:
267
- print("\n\nGame interrupted. Thanks for playing!")
268
- break
269
-
270
-
271
- def play(game="tictactoe"):
272
- """
273
- Play a game! 🎮
274
-
275
- Available games:
276
- - 'tictactoe' or 'ttt': Play Tic-Tac-Toe
277
- - 'sudoku': Play Sudoku
278
-
279
- Both games reinforce row-column thinking - critical for DataFrame operations!
280
-
281
- Args:
282
- game: Name of the game to play (default: 'tictactoe')
283
-
284
- Example:
285
- >>> import additory
286
- >>> additory.add.play('tictactoe')
287
- >>> additory.add.play('sudoku')
288
- """
289
- game = game.lower().strip()
290
-
291
- if game in ('tictactoe', 'ttt'):
292
- tictactoe()
293
- elif game == 'sudoku':
294
- sudoku()
295
- else:
296
- print(f"Unknown game: {game}")
297
- print("Available games: 'tictactoe' (or 'ttt'), 'sudoku'")
298
- print("\nExample:")
299
- print(" add.play('tictactoe')")
300
- print(" add.play('sudoku')")
@@ -1,8 +0,0 @@
1
- # additory/ops/keys.py
2
-
3
- def build_keys(df, columns):
4
- """
5
- Build composite tuple keys for each row.
6
- Vectorized where possible.
7
- """
8
- return list(zip(*[df[col] for col in columns]))
@@ -1,103 +0,0 @@
1
- # additory/utilities/lookup.py
2
- # Consolidated lookup functionality (add.to)
3
-
4
- """
5
- Lookup Utilities Module
6
-
7
- This module provides the add.to() functionality for adding columns from reference dataframes.
8
- """
9
-
10
- import pandas as pd
11
- import polars as pl
12
- from typing import Union, List, Optional, Any
13
-
14
-
15
- def to(target_df: Union[pd.DataFrame, pl.DataFrame],
16
- from_df: Optional[Union[pd.DataFrame, pl.DataFrame]] = None,
17
- bring: Union[str, List[str]] = None,
18
- against: Union[str, List[str]] = None,
19
- **kwargs) -> Union[pd.DataFrame, pl.DataFrame]:
20
- """
21
- Add columns from reference dataframe to target dataframe
22
-
23
- Args:
24
- target_df: Target dataframe to add columns to
25
- from_df: Reference dataframe to get columns from
26
- bring: Column(s) to bring from reference dataframe
27
- against: Column(s) to match on
28
- **kwargs: Additional parameters
29
-
30
- Returns:
31
- Target dataframe with new columns added
32
-
33
- Example:
34
- result = add.to(orders_df, from_df=products_df, bring='price', against='product_id')
35
- """
36
- if from_df is None:
37
- raise ValueError("from_df parameter is required")
38
-
39
- if bring is None:
40
- raise ValueError("bring parameter is required")
41
-
42
- if against is None:
43
- raise ValueError("against parameter is required")
44
-
45
- # Convert single values to lists
46
- if isinstance(bring, str):
47
- bring = [bring]
48
- if isinstance(against, str):
49
- against = [against]
50
-
51
- # Simple pandas-based implementation
52
- if isinstance(target_df, pd.DataFrame) and isinstance(from_df, pd.DataFrame):
53
- # Create a mapping from the reference dataframe
54
- merge_cols = against
55
- result = target_df.merge(
56
- from_df[merge_cols + bring],
57
- on=merge_cols,
58
- how='left'
59
- )
60
- return result
61
-
62
- # For other backends, convert to pandas, process, and convert back
63
- # This is a simplified implementation
64
- if hasattr(target_df, 'to_pandas'):
65
- target_pd = target_df.to_pandas()
66
- else:
67
- target_pd = target_df
68
-
69
- if hasattr(from_df, 'to_pandas'):
70
- from_pd = from_df.to_pandas()
71
- else:
72
- from_pd = from_df
73
-
74
- # Perform the merge
75
- result_pd = target_pd.merge(
76
- from_pd[against + bring],
77
- on=against,
78
- how='left'
79
- )
80
-
81
- # Convert back to original format if needed
82
- if isinstance(target_df, pl.DataFrame):
83
- return pl.from_pandas(result_pd)
84
-
85
- return result_pd
86
-
87
-
88
- def fuzzy_lookup(target_df, lookup_df, **kwargs):
89
- """Placeholder for fuzzy lookup - not implemented"""
90
- raise NotImplementedError("Fuzzy lookup not yet implemented in restructured version")
91
-
92
-
93
- def aggregate_lookup(target_df, lookup_df, **kwargs):
94
- """Placeholder for aggregate lookup - not implemented"""
95
- raise NotImplementedError("Aggregate lookup not yet implemented in restructured version")
96
-
97
-
98
- # Re-export the main functions
99
- __all__ = [
100
- 'to',
101
- 'fuzzy_lookup',
102
- 'aggregate_lookup'
103
- ]
@@ -1,216 +0,0 @@
1
- # additory/ops/matchers.py
2
-
3
- import re
4
- from typing import List, Dict, Any, Tuple
5
-
6
- def match_exact(key, lookup):
7
- """Exact match - case sensitive"""
8
- return lookup.get(key, [])
9
-
10
-
11
- def match_iexact(key, lookup):
12
- """Case-insensitive exact match"""
13
- matches = []
14
- key_lower = tuple(str(k).lower() if k is not None else k for k in key)
15
-
16
- for k, rows in lookup.items():
17
- k_lower = tuple(str(v).lower() if v is not None else v for v in k)
18
- if key_lower == k_lower:
19
- matches.extend(rows)
20
- return matches
21
-
22
-
23
- def match_contains(key, lookup):
24
- """Substring matching - case sensitive"""
25
- matches = []
26
- for k, rows in lookup.items():
27
- if all(_safe_contains(str(a), str(b)) for a, b in zip(key, k) if a is not None and b is not None):
28
- matches.extend(rows)
29
- return matches
30
-
31
-
32
- def match_icontains(key, lookup):
33
- """Case-insensitive substring matching"""
34
- matches = []
35
- for k, rows in lookup.items():
36
- if all(_safe_contains(str(a).lower(), str(b).lower()) for a, b in zip(key, k) if a is not None and b is not None):
37
- matches.extend(rows)
38
- return matches
39
-
40
-
41
- def match_beginswith(key, lookup):
42
- """Prefix matching - case sensitive"""
43
- matches = []
44
- for k, rows in lookup.items():
45
- if all(_safe_startswith(str(b), str(a)) for a, b in zip(key, k) if a is not None and b is not None):
46
- matches.extend(rows)
47
- return matches
48
-
49
-
50
- def match_ibeginswith(key, lookup):
51
- """Case-insensitive prefix matching"""
52
- matches = []
53
- for k, rows in lookup.items():
54
- if all(_safe_startswith(str(b).lower(), str(a).lower()) for a, b in zip(key, k) if a is not None and b is not None):
55
- matches.extend(rows)
56
- return matches
57
-
58
-
59
- def match_endswith(key, lookup):
60
- """Suffix matching - case sensitive"""
61
- matches = []
62
- for k, rows in lookup.items():
63
- if all(_safe_endswith(str(b), str(a)) for a, b in zip(key, k) if a is not None and b is not None):
64
- matches.extend(rows)
65
- return matches
66
-
67
-
68
- def match_iendswith(key, lookup):
69
- """Case-insensitive suffix matching"""
70
- matches = []
71
- for k, rows in lookup.items():
72
- if all(_safe_endswith(str(b).lower(), str(a).lower()) for a, b in zip(key, k) if a is not None and b is not None):
73
- matches.extend(rows)
74
- return matches
75
-
76
-
77
- def match_regex(key, lookup):
78
- """Regular expression matching"""
79
- matches = []
80
- try:
81
- patterns = [re.compile(str(a)) if a is not None else None for a in key]
82
- except re.error:
83
- return [] # Invalid regex returns no matches
84
-
85
- for k, rows in lookup.items():
86
- try:
87
- if all(pattern.search(str(b)) if pattern and b is not None else False
88
- for pattern, b in zip(patterns, k)):
89
- matches.extend(rows)
90
- except (TypeError, AttributeError):
91
- continue
92
- return matches
93
-
94
-
95
- def match_numeric_range(key, lookup):
96
- """
97
- Numeric range matching.
98
- Key format: (min_val, max_val) matches values in lookup between min and max
99
- """
100
- matches = []
101
- if len(key) != 2:
102
- return matches
103
-
104
- try:
105
- min_val, max_val = float(key[0]), float(key[1])
106
- except (ValueError, TypeError):
107
- return matches
108
-
109
- for k, rows in lookup.items():
110
- if len(k) == 1: # Single numeric value in lookup
111
- try:
112
- val = float(k[0])
113
- if min_val <= val <= max_val:
114
- matches.extend(rows)
115
- except (ValueError, TypeError):
116
- continue
117
- return matches
118
-
119
-
120
- def match_fuzzy(key, lookup):
121
- """
122
- Basic fuzzy matching using simple string similarity.
123
- For V1, we'll use a simple approach. V2 will have semantic matching.
124
- """
125
- matches = []
126
- threshold = 0.8 # Similarity threshold
127
-
128
- for k, rows in lookup.items():
129
- similarity = _calculate_similarity(key, k)
130
- if similarity >= threshold:
131
- matches.extend(rows)
132
-
133
- return matches
134
-
135
-
136
- # Helper functions
137
- def _safe_contains(needle, haystack):
138
- """Safe substring check"""
139
- try:
140
- return needle in haystack
141
- except TypeError:
142
- return False
143
-
144
-
145
- def _safe_startswith(text, prefix):
146
- """Safe prefix check"""
147
- try:
148
- return text.startswith(prefix)
149
- except (TypeError, AttributeError):
150
- return False
151
-
152
-
153
- def _safe_endswith(text, suffix):
154
- """Safe suffix check"""
155
- try:
156
- return text.endswith(suffix)
157
- except (TypeError, AttributeError):
158
- return False
159
-
160
-
161
- def _calculate_similarity(key1, key2):
162
- """
163
- Simple similarity calculation for fuzzy matching.
164
- Uses Jaccard similarity on character sets.
165
- """
166
- if len(key1) != len(key2):
167
- return 0.0
168
-
169
- total_similarity = 0.0
170
- for a, b in zip(key1, key2):
171
- if a is None or b is None:
172
- if a == b: # Both None
173
- total_similarity += 1.0
174
- else:
175
- total_similarity += 0.0
176
- else:
177
- str_a, str_b = str(a).lower(), str(b).lower()
178
- if str_a == str_b:
179
- total_similarity += 1.0
180
- else:
181
- # Character-level Jaccard similarity
182
- set_a, set_b = set(str_a), set(str_b)
183
- if len(set_a) == 0 and len(set_b) == 0:
184
- total_similarity += 1.0
185
- else:
186
- intersection = len(set_a & set_b)
187
- union = len(set_a | set_b)
188
- total_similarity += intersection / union if union > 0 else 0.0
189
-
190
- return total_similarity / len(key1)
191
-
192
-
193
- MATCHERS = {
194
- # Exact matching
195
- "exact": match_exact,
196
- "iexact": match_iexact,
197
-
198
- # Substring matching
199
- "contains": match_contains,
200
- "icontains": match_icontains,
201
-
202
- # Prefix/suffix matching
203
- "beginswith": match_beginswith,
204
- "ibeginswith": match_ibeginswith,
205
- "endswith": match_endswith,
206
- "iendswith": match_iendswith,
207
-
208
- # Pattern matching
209
- "regex": match_regex,
210
-
211
- # Numeric matching
212
- "range": match_numeric_range,
213
-
214
- # Fuzzy matching
215
- "fuzzy": match_fuzzy,
216
- }