additory 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +15 -0
- additory/analysis/__init__.py +48 -0
- additory/analysis/cardinality.py +126 -0
- additory/analysis/correlations.py +124 -0
- additory/analysis/distributions.py +376 -0
- additory/analysis/quality.py +158 -0
- additory/analysis/scan.py +400 -0
- additory/augment/__init__.py +24 -0
- additory/augment/augmentor.py +653 -0
- additory/augment/builtin_lists.py +430 -0
- additory/augment/distributions.py +22 -0
- additory/augment/forecast.py +1132 -0
- additory/augment/list_registry.py +177 -0
- additory/augment/smote.py +320 -0
- additory/augment/strategies.py +883 -0
- additory/common/__init__.py +157 -0
- additory/common/backend.py +355 -0
- additory/common/column_utils.py +191 -0
- additory/common/distributions.py +737 -0
- additory/common/exceptions.py +62 -0
- additory/common/lists.py +229 -0
- additory/common/patterns.py +240 -0
- additory/common/resolver.py +567 -0
- additory/common/sample_data.py +182 -0
- additory/common/validation.py +197 -0
- additory/core/__init__.py +27 -0
- additory/core/ast_builder.py +165 -0
- additory/core/backends/__init__.py +23 -0
- additory/core/backends/arrow_bridge.py +476 -0
- additory/core/backends/cudf_bridge.py +355 -0
- additory/core/column_positioning.py +358 -0
- additory/core/compiler_polars.py +166 -0
- additory/core/config.py +342 -0
- additory/core/enhanced_cache_manager.py +1119 -0
- additory/core/enhanced_matchers.py +473 -0
- additory/core/enhanced_version_manager.py +325 -0
- additory/core/executor.py +59 -0
- additory/core/integrity_manager.py +477 -0
- additory/core/loader.py +190 -0
- additory/core/logging.py +24 -0
- additory/core/memory_manager.py +547 -0
- additory/core/namespace_manager.py +657 -0
- additory/core/parser.py +176 -0
- additory/core/polars_expression_engine.py +551 -0
- additory/core/registry.py +176 -0
- additory/core/sample_data_manager.py +492 -0
- additory/core/user_namespace.py +751 -0
- additory/core/validator.py +27 -0
- additory/dynamic_api.py +308 -0
- additory/expressions/__init__.py +26 -0
- additory/expressions/engine.py +551 -0
- additory/expressions/parser.py +176 -0
- additory/expressions/proxy.py +546 -0
- additory/expressions/registry.py +313 -0
- additory/expressions/samples.py +492 -0
- additory/synthetic/__init__.py +101 -0
- additory/synthetic/api.py +220 -0
- additory/synthetic/common_integration.py +314 -0
- additory/synthetic/config.py +262 -0
- additory/synthetic/engines.py +529 -0
- additory/synthetic/exceptions.py +180 -0
- additory/synthetic/file_managers.py +518 -0
- additory/synthetic/generator.py +702 -0
- additory/synthetic/generator_parser.py +68 -0
- additory/synthetic/integration.py +319 -0
- additory/synthetic/models.py +241 -0
- additory/synthetic/pattern_resolver.py +573 -0
- additory/synthetic/performance.py +469 -0
- additory/synthetic/polars_integration.py +464 -0
- additory/synthetic/proxy.py +60 -0
- additory/synthetic/schema_parser.py +685 -0
- additory/synthetic/validator.py +553 -0
- additory/utilities/__init__.py +53 -0
- additory/utilities/encoding.py +600 -0
- additory/utilities/games.py +300 -0
- additory/utilities/keys.py +8 -0
- additory/utilities/lookup.py +103 -0
- additory/utilities/matchers.py +216 -0
- additory/utilities/resolvers.py +286 -0
- additory/utilities/settings.py +167 -0
- additory/utilities/units.py +746 -0
- additory/utilities/validators.py +153 -0
- additory-0.1.0a1.dist-info/METADATA +293 -0
- additory-0.1.0a1.dist-info/RECORD +87 -0
- additory-0.1.0a1.dist-info/WHEEL +5 -0
- additory-0.1.0a1.dist-info/licenses/LICENSE +21 -0
- additory-0.1.0a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Easter Egg Games Module
|
|
3
|
+
|
|
4
|
+
Hidden games for the curious. Not documented in main API docs.
|
|
5
|
+
Reinforces row-column thinking - critical for DataFrame operations.
|
|
6
|
+
|
|
7
|
+
Inspired by: Chrome dinosaur game, Python's antigravity, apt-get moo
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import random
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def print_board(board):
|
|
14
|
+
"""Print a 3x3 tic-tac-toe board"""
|
|
15
|
+
print("\n")
|
|
16
|
+
for i in range(3):
|
|
17
|
+
row = " | ".join(board[i])
|
|
18
|
+
print(" " + row)
|
|
19
|
+
if i < 2:
|
|
20
|
+
print("---+---+---")
|
|
21
|
+
print("\n")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def check_winner(board, player):
|
|
25
|
+
"""Check if a player has won"""
|
|
26
|
+
win_states = [
|
|
27
|
+
[(0,0),(0,1),(0,2)], # Row 1
|
|
28
|
+
[(1,0),(1,1),(1,2)], # Row 2
|
|
29
|
+
[(2,0),(2,1),(2,2)], # Row 3
|
|
30
|
+
[(0,0),(1,0),(2,0)], # Col 1
|
|
31
|
+
[(0,1),(1,1),(2,1)], # Col 2
|
|
32
|
+
[(0,2),(1,2),(2,2)], # Col 3
|
|
33
|
+
[(0,0),(1,1),(2,2)], # Diagonal \
|
|
34
|
+
[(0,2),(1,1),(2,0)] # Diagonal /
|
|
35
|
+
]
|
|
36
|
+
return any(all(board[r][c] == player for r, c in combo) for combo in win_states)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def is_full(board):
|
|
40
|
+
"""Check if board is full"""
|
|
41
|
+
return all(board[r][c] != " " for r in range(3) for c in range(3))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_empty_cells(board):
|
|
45
|
+
"""Get list of empty cells"""
|
|
46
|
+
return [(r, c) for r in range(3) for c in range(3) if board[r][c] == " "]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def computer_move(board):
|
|
50
|
+
"""AI move for tic-tac-toe"""
|
|
51
|
+
# 1. Try to win
|
|
52
|
+
for r, c in get_empty_cells(board):
|
|
53
|
+
board[r][c] = "O"
|
|
54
|
+
if check_winner(board, "O"):
|
|
55
|
+
return
|
|
56
|
+
board[r][c] = " "
|
|
57
|
+
|
|
58
|
+
# 2. Try to block user
|
|
59
|
+
for r, c in get_empty_cells(board):
|
|
60
|
+
board[r][c] = "X"
|
|
61
|
+
if check_winner(board, "X"):
|
|
62
|
+
board[r][c] = "O"
|
|
63
|
+
return
|
|
64
|
+
board[r][c] = " "
|
|
65
|
+
|
|
66
|
+
# 3. Otherwise pick random
|
|
67
|
+
r, c = random.choice(get_empty_cells(board))
|
|
68
|
+
board[r][c] = "O"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def tictactoe():
|
|
72
|
+
"""
|
|
73
|
+
Play Tic-Tac-Toe against the computer.
|
|
74
|
+
|
|
75
|
+
Reinforces row-column thinking - enter moves as "row col" (e.g., "2 3")
|
|
76
|
+
Just like DataFrame indexing: df.iloc[row, col]
|
|
77
|
+
"""
|
|
78
|
+
board = [[" " for _ in range(3)] for _ in range(3)]
|
|
79
|
+
|
|
80
|
+
print("=" * 50)
|
|
81
|
+
print("Welcome to Tic Tac Toe!")
|
|
82
|
+
print("=" * 50)
|
|
83
|
+
print("You are X. Computer is O.")
|
|
84
|
+
print("Enter moves as: row col (e.g., '2 3' for row 2, column 3)")
|
|
85
|
+
print("Rows and columns are numbered 1-3")
|
|
86
|
+
print("Think of it like DataFrame indexing: df.iloc[row, col]")
|
|
87
|
+
print("=" * 50)
|
|
88
|
+
|
|
89
|
+
print_board(board)
|
|
90
|
+
|
|
91
|
+
while True:
|
|
92
|
+
# USER MOVE
|
|
93
|
+
try:
|
|
94
|
+
move = input("Your move (row col): ")
|
|
95
|
+
r, c = map(int, move.split())
|
|
96
|
+
r -= 1 # Convert to 0-indexed
|
|
97
|
+
c -= 1
|
|
98
|
+
|
|
99
|
+
if r not in range(3) or c not in range(3):
|
|
100
|
+
print("Invalid position. Choose row/col between 1 and 3.")
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
if board[r][c] != " ":
|
|
104
|
+
print("That spot is already taken. Try again.")
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
board[r][c] = "X"
|
|
108
|
+
print_board(board)
|
|
109
|
+
|
|
110
|
+
if check_winner(board, "X"):
|
|
111
|
+
print("🎉 You win!")
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
if is_full(board):
|
|
115
|
+
print("It's a draw!")
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
# COMPUTER MOVE
|
|
119
|
+
print("Computer is thinking...")
|
|
120
|
+
computer_move(board)
|
|
121
|
+
print_board(board)
|
|
122
|
+
|
|
123
|
+
if check_winner(board, "O"):
|
|
124
|
+
print("💻 Computer wins!")
|
|
125
|
+
break
|
|
126
|
+
|
|
127
|
+
if is_full(board):
|
|
128
|
+
print("It's a draw!")
|
|
129
|
+
break
|
|
130
|
+
|
|
131
|
+
except ValueError:
|
|
132
|
+
print("Invalid input. Enter row and column like: 2 3")
|
|
133
|
+
except KeyboardInterrupt:
|
|
134
|
+
print("\n\nGame interrupted. Thanks for playing!")
|
|
135
|
+
break
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# A simple valid completed Sudoku board
|
|
139
|
+
BASE_BOARD = [
|
|
140
|
+
[5,3,4,6,7,8,9,1,2],
|
|
141
|
+
[6,7,2,1,9,5,3,4,8],
|
|
142
|
+
[1,9,8,3,4,2,5,6,7],
|
|
143
|
+
[8,5,9,7,6,1,4,2,3],
|
|
144
|
+
[4,2,6,8,5,3,7,9,1],
|
|
145
|
+
[7,1,3,9,2,4,8,5,6],
|
|
146
|
+
[9,6,1,5,3,7,2,8,4],
|
|
147
|
+
[2,8,7,4,1,9,6,3,5],
|
|
148
|
+
[3,4,5,2,8,6,1,7,9]
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def remove_numbers(board, holes=40):
|
|
153
|
+
"""Remove numbers from a completed Sudoku board to create a puzzle"""
|
|
154
|
+
puzzle = [row[:] for row in board]
|
|
155
|
+
removed = 0
|
|
156
|
+
while removed < holes:
|
|
157
|
+
r = random.randint(0, 8)
|
|
158
|
+
c = random.randint(0, 8)
|
|
159
|
+
if puzzle[r][c] != 0:
|
|
160
|
+
puzzle[r][c] = 0
|
|
161
|
+
removed += 1
|
|
162
|
+
return puzzle
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def print_sudoku_board(board):
|
|
166
|
+
"""Print a Sudoku board with nice formatting"""
|
|
167
|
+
print("\nSudoku Board:")
|
|
168
|
+
for i, row in enumerate(board):
|
|
169
|
+
if i % 3 == 0 and i != 0:
|
|
170
|
+
print("------+-------+------")
|
|
171
|
+
row_str = ""
|
|
172
|
+
for j, val in enumerate(row):
|
|
173
|
+
if j % 3 == 0 and j != 0:
|
|
174
|
+
row_str += "| "
|
|
175
|
+
row_str += (str(val) if val != 0 else ".") + " "
|
|
176
|
+
print(row_str)
|
|
177
|
+
print()
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def is_valid_sudoku(board, r, c, num):
|
|
181
|
+
"""Check if placing num at (r, c) is valid"""
|
|
182
|
+
# Check row
|
|
183
|
+
if num in board[r]:
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
# Check column
|
|
187
|
+
for i in range(9):
|
|
188
|
+
if board[i][c] == num:
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
# Check 3x3 box
|
|
192
|
+
br = (r // 3) * 3
|
|
193
|
+
bc = (c // 3) * 3
|
|
194
|
+
for i in range(br, br + 3):
|
|
195
|
+
for j in range(bc, bc + 3):
|
|
196
|
+
if board[i][j] == num:
|
|
197
|
+
return False
|
|
198
|
+
|
|
199
|
+
return True
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def is_solved(board):
|
|
203
|
+
"""Check if Sudoku is completely solved"""
|
|
204
|
+
return all(all(cell != 0 for cell in row) for row in board)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def sudoku():
|
|
208
|
+
"""
|
|
209
|
+
Play Sudoku!
|
|
210
|
+
|
|
211
|
+
Reinforces row-column thinking - enter moves as "row col number" (e.g., "3 4 9")
|
|
212
|
+
Just like DataFrame operations: df.iloc[row, col] = value
|
|
213
|
+
"""
|
|
214
|
+
print("=" * 50)
|
|
215
|
+
print("Welcome to Sudoku!")
|
|
216
|
+
print("=" * 50)
|
|
217
|
+
print("Enter moves as: row col number (e.g., '3 4 9')")
|
|
218
|
+
print("Rows and columns are numbered 1-9")
|
|
219
|
+
print("Think of it like DataFrame assignment: df.iloc[row, col] = value")
|
|
220
|
+
print("Type 'abort' or 'exit' to quit.")
|
|
221
|
+
print("=" * 50)
|
|
222
|
+
|
|
223
|
+
solution = BASE_BOARD
|
|
224
|
+
puzzle = remove_numbers(solution, holes=45)
|
|
225
|
+
board = [row[:] for row in puzzle]
|
|
226
|
+
|
|
227
|
+
print_sudoku_board(board)
|
|
228
|
+
|
|
229
|
+
while True:
|
|
230
|
+
move = input("Your move: ").strip().lower()
|
|
231
|
+
|
|
232
|
+
if move in ("abort", "exit"):
|
|
233
|
+
print("Game ended by user.")
|
|
234
|
+
break
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
r, c, num = map(int, move.split())
|
|
238
|
+
r -= 1 # Convert to 0-indexed
|
|
239
|
+
c -= 1
|
|
240
|
+
|
|
241
|
+
if not (0 <= r < 9 and 0 <= c < 9):
|
|
242
|
+
print("Invalid position. Row/col must be 1–9.")
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
if not (1 <= num <= 9):
|
|
246
|
+
print("Number must be between 1 and 9.")
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
if puzzle[r][c] != 0:
|
|
250
|
+
print("This cell is fixed and cannot be changed.")
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
if not is_valid_sudoku(board, r, c, num):
|
|
254
|
+
print("Invalid move. Violates Sudoku rules.")
|
|
255
|
+
continue
|
|
256
|
+
|
|
257
|
+
board[r][c] = num
|
|
258
|
+
print_sudoku_board(board)
|
|
259
|
+
|
|
260
|
+
if is_solved(board):
|
|
261
|
+
print("🎉 Congratulations! You solved the Sudoku.")
|
|
262
|
+
break
|
|
263
|
+
|
|
264
|
+
except ValueError:
|
|
265
|
+
print("Invalid input. Use: row col number (e.g., 2 5 7)")
|
|
266
|
+
except KeyboardInterrupt:
|
|
267
|
+
print("\n\nGame interrupted. Thanks for playing!")
|
|
268
|
+
break
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def play(game="tictactoe"):
|
|
272
|
+
"""
|
|
273
|
+
Play a game! 🎮
|
|
274
|
+
|
|
275
|
+
Available games:
|
|
276
|
+
- 'tictactoe' or 'ttt': Play Tic-Tac-Toe
|
|
277
|
+
- 'sudoku': Play Sudoku
|
|
278
|
+
|
|
279
|
+
Both games reinforce row-column thinking - critical for DataFrame operations!
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
game: Name of the game to play (default: 'tictactoe')
|
|
283
|
+
|
|
284
|
+
Example:
|
|
285
|
+
>>> import additory
|
|
286
|
+
>>> additory.add.play('tictactoe')
|
|
287
|
+
>>> additory.add.play('sudoku')
|
|
288
|
+
"""
|
|
289
|
+
game = game.lower().strip()
|
|
290
|
+
|
|
291
|
+
if game in ('tictactoe', 'ttt'):
|
|
292
|
+
tictactoe()
|
|
293
|
+
elif game == 'sudoku':
|
|
294
|
+
sudoku()
|
|
295
|
+
else:
|
|
296
|
+
print(f"Unknown game: {game}")
|
|
297
|
+
print("Available games: 'tictactoe' (or 'ttt'), 'sudoku'")
|
|
298
|
+
print("\nExample:")
|
|
299
|
+
print(" add.play('tictactoe')")
|
|
300
|
+
print(" add.play('sudoku')")
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# additory/utilities/lookup.py
|
|
2
|
+
# Consolidated lookup functionality (add.to)
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Lookup Utilities Module
|
|
6
|
+
|
|
7
|
+
This module provides the add.to() functionality for adding columns from reference dataframes.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import polars as pl
|
|
12
|
+
from typing import Union, List, Optional, Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def to(target_df: Union[pd.DataFrame, pl.DataFrame],
|
|
16
|
+
from_df: Optional[Union[pd.DataFrame, pl.DataFrame]] = None,
|
|
17
|
+
bring: Union[str, List[str]] = None,
|
|
18
|
+
against: Union[str, List[str]] = None,
|
|
19
|
+
**kwargs) -> Union[pd.DataFrame, pl.DataFrame]:
|
|
20
|
+
"""
|
|
21
|
+
Add columns from reference dataframe to target dataframe
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
target_df: Target dataframe to add columns to
|
|
25
|
+
from_df: Reference dataframe to get columns from
|
|
26
|
+
bring: Column(s) to bring from reference dataframe
|
|
27
|
+
against: Column(s) to match on
|
|
28
|
+
**kwargs: Additional parameters
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Target dataframe with new columns added
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
result = add.to(orders_df, from_df=products_df, bring='price', against='product_id')
|
|
35
|
+
"""
|
|
36
|
+
if from_df is None:
|
|
37
|
+
raise ValueError("from_df parameter is required")
|
|
38
|
+
|
|
39
|
+
if bring is None:
|
|
40
|
+
raise ValueError("bring parameter is required")
|
|
41
|
+
|
|
42
|
+
if against is None:
|
|
43
|
+
raise ValueError("against parameter is required")
|
|
44
|
+
|
|
45
|
+
# Convert single values to lists
|
|
46
|
+
if isinstance(bring, str):
|
|
47
|
+
bring = [bring]
|
|
48
|
+
if isinstance(against, str):
|
|
49
|
+
against = [against]
|
|
50
|
+
|
|
51
|
+
# Simple pandas-based implementation
|
|
52
|
+
if isinstance(target_df, pd.DataFrame) and isinstance(from_df, pd.DataFrame):
|
|
53
|
+
# Create a mapping from the reference dataframe
|
|
54
|
+
merge_cols = against
|
|
55
|
+
result = target_df.merge(
|
|
56
|
+
from_df[merge_cols + bring],
|
|
57
|
+
on=merge_cols,
|
|
58
|
+
how='left'
|
|
59
|
+
)
|
|
60
|
+
return result
|
|
61
|
+
|
|
62
|
+
# For other backends, convert to pandas, process, and convert back
|
|
63
|
+
# This is a simplified implementation
|
|
64
|
+
if hasattr(target_df, 'to_pandas'):
|
|
65
|
+
target_pd = target_df.to_pandas()
|
|
66
|
+
else:
|
|
67
|
+
target_pd = target_df
|
|
68
|
+
|
|
69
|
+
if hasattr(from_df, 'to_pandas'):
|
|
70
|
+
from_pd = from_df.to_pandas()
|
|
71
|
+
else:
|
|
72
|
+
from_pd = from_df
|
|
73
|
+
|
|
74
|
+
# Perform the merge
|
|
75
|
+
result_pd = target_pd.merge(
|
|
76
|
+
from_pd[against + bring],
|
|
77
|
+
on=against,
|
|
78
|
+
how='left'
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Convert back to original format if needed
|
|
82
|
+
if isinstance(target_df, pl.DataFrame):
|
|
83
|
+
return pl.from_pandas(result_pd)
|
|
84
|
+
|
|
85
|
+
return result_pd
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def fuzzy_lookup(target_df, lookup_df, **kwargs):
|
|
89
|
+
"""Placeholder for fuzzy lookup - not implemented"""
|
|
90
|
+
raise NotImplementedError("Fuzzy lookup not yet implemented in restructured version")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def aggregate_lookup(target_df, lookup_df, **kwargs):
|
|
94
|
+
"""Placeholder for aggregate lookup - not implemented"""
|
|
95
|
+
raise NotImplementedError("Aggregate lookup not yet implemented in restructured version")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# Re-export the main functions
|
|
99
|
+
__all__ = [
|
|
100
|
+
'to',
|
|
101
|
+
'fuzzy_lookup',
|
|
102
|
+
'aggregate_lookup'
|
|
103
|
+
]
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# additory/ops/matchers.py
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import List, Dict, Any, Tuple
|
|
5
|
+
|
|
6
|
+
def match_exact(key, lookup):
|
|
7
|
+
"""Exact match - case sensitive"""
|
|
8
|
+
return lookup.get(key, [])
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def match_iexact(key, lookup):
|
|
12
|
+
"""Case-insensitive exact match"""
|
|
13
|
+
matches = []
|
|
14
|
+
key_lower = tuple(str(k).lower() if k is not None else k for k in key)
|
|
15
|
+
|
|
16
|
+
for k, rows in lookup.items():
|
|
17
|
+
k_lower = tuple(str(v).lower() if v is not None else v for v in k)
|
|
18
|
+
if key_lower == k_lower:
|
|
19
|
+
matches.extend(rows)
|
|
20
|
+
return matches
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def match_contains(key, lookup):
|
|
24
|
+
"""Substring matching - case sensitive"""
|
|
25
|
+
matches = []
|
|
26
|
+
for k, rows in lookup.items():
|
|
27
|
+
if all(_safe_contains(str(a), str(b)) for a, b in zip(key, k) if a is not None and b is not None):
|
|
28
|
+
matches.extend(rows)
|
|
29
|
+
return matches
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def match_icontains(key, lookup):
|
|
33
|
+
"""Case-insensitive substring matching"""
|
|
34
|
+
matches = []
|
|
35
|
+
for k, rows in lookup.items():
|
|
36
|
+
if all(_safe_contains(str(a).lower(), str(b).lower()) for a, b in zip(key, k) if a is not None and b is not None):
|
|
37
|
+
matches.extend(rows)
|
|
38
|
+
return matches
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def match_beginswith(key, lookup):
|
|
42
|
+
"""Prefix matching - case sensitive"""
|
|
43
|
+
matches = []
|
|
44
|
+
for k, rows in lookup.items():
|
|
45
|
+
if all(_safe_startswith(str(b), str(a)) for a, b in zip(key, k) if a is not None and b is not None):
|
|
46
|
+
matches.extend(rows)
|
|
47
|
+
return matches
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def match_ibeginswith(key, lookup):
|
|
51
|
+
"""Case-insensitive prefix matching"""
|
|
52
|
+
matches = []
|
|
53
|
+
for k, rows in lookup.items():
|
|
54
|
+
if all(_safe_startswith(str(b).lower(), str(a).lower()) for a, b in zip(key, k) if a is not None and b is not None):
|
|
55
|
+
matches.extend(rows)
|
|
56
|
+
return matches
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def match_endswith(key, lookup):
|
|
60
|
+
"""Suffix matching - case sensitive"""
|
|
61
|
+
matches = []
|
|
62
|
+
for k, rows in lookup.items():
|
|
63
|
+
if all(_safe_endswith(str(b), str(a)) for a, b in zip(key, k) if a is not None and b is not None):
|
|
64
|
+
matches.extend(rows)
|
|
65
|
+
return matches
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def match_iendswith(key, lookup):
|
|
69
|
+
"""Case-insensitive suffix matching"""
|
|
70
|
+
matches = []
|
|
71
|
+
for k, rows in lookup.items():
|
|
72
|
+
if all(_safe_endswith(str(b).lower(), str(a).lower()) for a, b in zip(key, k) if a is not None and b is not None):
|
|
73
|
+
matches.extend(rows)
|
|
74
|
+
return matches
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def match_regex(key, lookup):
|
|
78
|
+
"""Regular expression matching"""
|
|
79
|
+
matches = []
|
|
80
|
+
try:
|
|
81
|
+
patterns = [re.compile(str(a)) if a is not None else None for a in key]
|
|
82
|
+
except re.error:
|
|
83
|
+
return [] # Invalid regex returns no matches
|
|
84
|
+
|
|
85
|
+
for k, rows in lookup.items():
|
|
86
|
+
try:
|
|
87
|
+
if all(pattern.search(str(b)) if pattern and b is not None else False
|
|
88
|
+
for pattern, b in zip(patterns, k)):
|
|
89
|
+
matches.extend(rows)
|
|
90
|
+
except (TypeError, AttributeError):
|
|
91
|
+
continue
|
|
92
|
+
return matches
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def match_numeric_range(key, lookup):
|
|
96
|
+
"""
|
|
97
|
+
Numeric range matching.
|
|
98
|
+
Key format: (min_val, max_val) matches values in lookup between min and max
|
|
99
|
+
"""
|
|
100
|
+
matches = []
|
|
101
|
+
if len(key) != 2:
|
|
102
|
+
return matches
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
min_val, max_val = float(key[0]), float(key[1])
|
|
106
|
+
except (ValueError, TypeError):
|
|
107
|
+
return matches
|
|
108
|
+
|
|
109
|
+
for k, rows in lookup.items():
|
|
110
|
+
if len(k) == 1: # Single numeric value in lookup
|
|
111
|
+
try:
|
|
112
|
+
val = float(k[0])
|
|
113
|
+
if min_val <= val <= max_val:
|
|
114
|
+
matches.extend(rows)
|
|
115
|
+
except (ValueError, TypeError):
|
|
116
|
+
continue
|
|
117
|
+
return matches
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def match_fuzzy(key, lookup):
|
|
121
|
+
"""
|
|
122
|
+
Basic fuzzy matching using simple string similarity.
|
|
123
|
+
For V1, we'll use a simple approach. V2 will have semantic matching.
|
|
124
|
+
"""
|
|
125
|
+
matches = []
|
|
126
|
+
threshold = 0.8 # Similarity threshold
|
|
127
|
+
|
|
128
|
+
for k, rows in lookup.items():
|
|
129
|
+
similarity = _calculate_similarity(key, k)
|
|
130
|
+
if similarity >= threshold:
|
|
131
|
+
matches.extend(rows)
|
|
132
|
+
|
|
133
|
+
return matches
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# Helper functions
|
|
137
|
+
def _safe_contains(needle, haystack):
|
|
138
|
+
"""Safe substring check"""
|
|
139
|
+
try:
|
|
140
|
+
return needle in haystack
|
|
141
|
+
except TypeError:
|
|
142
|
+
return False
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _safe_startswith(text, prefix):
|
|
146
|
+
"""Safe prefix check"""
|
|
147
|
+
try:
|
|
148
|
+
return text.startswith(prefix)
|
|
149
|
+
except (TypeError, AttributeError):
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _safe_endswith(text, suffix):
|
|
154
|
+
"""Safe suffix check"""
|
|
155
|
+
try:
|
|
156
|
+
return text.endswith(suffix)
|
|
157
|
+
except (TypeError, AttributeError):
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _calculate_similarity(key1, key2):
|
|
162
|
+
"""
|
|
163
|
+
Simple similarity calculation for fuzzy matching.
|
|
164
|
+
Uses Jaccard similarity on character sets.
|
|
165
|
+
"""
|
|
166
|
+
if len(key1) != len(key2):
|
|
167
|
+
return 0.0
|
|
168
|
+
|
|
169
|
+
total_similarity = 0.0
|
|
170
|
+
for a, b in zip(key1, key2):
|
|
171
|
+
if a is None or b is None:
|
|
172
|
+
if a == b: # Both None
|
|
173
|
+
total_similarity += 1.0
|
|
174
|
+
else:
|
|
175
|
+
total_similarity += 0.0
|
|
176
|
+
else:
|
|
177
|
+
str_a, str_b = str(a).lower(), str(b).lower()
|
|
178
|
+
if str_a == str_b:
|
|
179
|
+
total_similarity += 1.0
|
|
180
|
+
else:
|
|
181
|
+
# Character-level Jaccard similarity
|
|
182
|
+
set_a, set_b = set(str_a), set(str_b)
|
|
183
|
+
if len(set_a) == 0 and len(set_b) == 0:
|
|
184
|
+
total_similarity += 1.0
|
|
185
|
+
else:
|
|
186
|
+
intersection = len(set_a & set_b)
|
|
187
|
+
union = len(set_a | set_b)
|
|
188
|
+
total_similarity += intersection / union if union > 0 else 0.0
|
|
189
|
+
|
|
190
|
+
return total_similarity / len(key1)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
MATCHERS = {
|
|
194
|
+
# Exact matching
|
|
195
|
+
"exact": match_exact,
|
|
196
|
+
"iexact": match_iexact,
|
|
197
|
+
|
|
198
|
+
# Substring matching
|
|
199
|
+
"contains": match_contains,
|
|
200
|
+
"icontains": match_icontains,
|
|
201
|
+
|
|
202
|
+
# Prefix/suffix matching
|
|
203
|
+
"beginswith": match_beginswith,
|
|
204
|
+
"ibeginswith": match_ibeginswith,
|
|
205
|
+
"endswith": match_endswith,
|
|
206
|
+
"iendswith": match_iendswith,
|
|
207
|
+
|
|
208
|
+
# Pattern matching
|
|
209
|
+
"regex": match_regex,
|
|
210
|
+
|
|
211
|
+
# Numeric matching
|
|
212
|
+
"range": match_numeric_range,
|
|
213
|
+
|
|
214
|
+
# Fuzzy matching
|
|
215
|
+
"fuzzy": match_fuzzy,
|
|
216
|
+
}
|