bookalimo 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,8 @@ import re
7
7
  import unicodedata
8
8
  from functools import lru_cache
9
9
  from importlib.resources import files
10
- from typing import Any, Optional, cast
10
+ from types import MappingProxyType
11
+ from typing import Any, Dict, List, Optional, Tuple, cast
11
12
 
12
13
  import numpy as np
13
14
  from numpy.typing import NDArray
@@ -30,10 +31,6 @@ AIRPORTY_TYPES = {
30
31
  "heliport",
31
32
  }
32
33
 
33
- # Small bonus when a candidate airport’s IATA/ICAO matches codes hinted by Places
34
- CODE_BONUS_QUERY = 15.0 # user typed a code (strong)
35
- CODE_BONUS_PLACES = 8.0 # code inferred from Places strings (softer)
36
-
37
34
 
38
35
  # ---------- Helpers ----------
39
36
  def _norm(s: Optional[str]) -> str:
@@ -62,26 +59,6 @@ def _haversine_km_scalar_to_many(
62
59
  return cast(NDArray[np.float64], 6371.0088 * c) # mean Earth radius (km)
63
60
 
64
61
 
65
- def _looks_like_code(q: str) -> tuple[Optional[str], Optional[str]]:
66
- q = q.strip().upper()
67
- if re.fullmatch(r"[A-Z0-9]{3}", q):
68
- return (q, None) # likely IATA
69
- if re.fullmatch(r"[A-Z0-9]{4}", q):
70
- return (None, q) # likely ICAO
71
- return (None, None)
72
-
73
-
74
- def _extract_codes_from_text(s: str) -> tuple[set[str], set[str]]:
75
- """
76
- Pull 3- or 4-char uppercase tokens that *could* be codes.
77
- We'll only use these with a small bonus and only if the place looks airport-ish.
78
- """
79
- tokens = set(re.findall(r"\b[A-Z0-9]{3,4}\b", s.upper()))
80
- iata = {t for t in tokens if re.fullmatch(r"[A-Z]{3}", t)}
81
- icao = {t for t in tokens if re.fullmatch(r"[A-Z0-9]{4}", t)}
82
- return iata, icao
83
-
84
-
85
62
  def _place_points(places: list[GooglePlace]) -> list[tuple[float, float]]:
86
63
  """
87
64
  Extract (lat, lon) from Places responses. Prefers 'location', then viewport center,
@@ -108,18 +85,13 @@ def _place_points(places: list[GooglePlace]) -> list[tuple[float, float]]:
108
85
  return pts
109
86
 
110
87
 
111
- def _place_hints_and_codes(
112
- places: list[GooglePlace],
113
- ) -> tuple[list[str], set[str], set[str]]:
88
+ def _place_hints(places: list[GooglePlace]) -> list[str]:
114
89
  """
115
- Collect a few high-utility strings from Places to augment text matching,
116
- plus soft code candidates (IATA/ICAO) extracted from those strings.
117
- We prioritize places whose types include airport-ish categories.
90
+ Collect high-utility strings from Places to augment text matching.
91
+ Prioritizes places whose types include airport-ish categories.
118
92
  """
119
93
  hints_prioritized: list[str] = []
120
94
  hints_general: list[str] = []
121
- iata_cand: set[str] = set()
122
- icao_cand: set[str] = set()
123
95
 
124
96
  for p in places or []:
125
97
  types = set(getattr(p, "types", []) or [])
@@ -151,13 +123,6 @@ def _place_hints_and_codes(
151
123
  if not candidates:
152
124
  continue
153
125
 
154
- # Extract soft code candidates from the most descriptive strings
155
- for s in candidates[:2]:
156
- i3, i4 = _extract_codes_from_text(s)
157
- if airporty:
158
- iata_cand |= i3
159
- icao_cand |= i4
160
-
161
126
  # Prioritize hints if the place is airport-ish
162
127
  (hints_prioritized if airporty else hints_general).extend(candidates[:2])
163
128
 
@@ -176,8 +141,7 @@ def _place_hints_and_codes(
176
141
  break
177
142
  return out
178
143
 
179
- hints = dedup_cap(hints_prioritized, cap=3) + dedup_cap(hints_general, cap=2)
180
- return hints, iata_cand, icao_cand
144
+ return dedup_cap(hints_prioritized, cap=3) + dedup_cap(hints_general, cap=2)
181
145
 
182
146
 
183
147
  def _parse_coord(s: Optional[str]) -> float:
@@ -193,18 +157,44 @@ def _parse_coord(s: Optional[str]) -> float:
193
157
  return float("nan")
194
158
 
195
159
 
160
+ def _frozen_np_float(arr_like: List[float]) -> NDArray[np.float64]:
161
+ """Create a float64 numpy array and set writeable=False."""
162
+ a = np.array(arr_like, dtype=np.float64)
163
+ a.setflags(write=False)
164
+ return a
165
+
166
+
167
+ def _frozen_np_bool(arr_like: List[bool]) -> NDArray[np.bool_]:
168
+ """Create a bool numpy array and set writeable=False."""
169
+ a = np.array(arr_like, dtype=bool)
170
+ a.setflags(write=False)
171
+ return cast(NDArray[np.bool_], a)
172
+
173
+
174
+ # ---------- Data loading with immutable return + dual indexes ----------
196
175
  @lru_cache(maxsize=1)
197
- def _load_data() -> dict[str, Any]:
176
+ def _load_data() -> MappingProxyType[str, Any]:
198
177
  """
199
178
  Loads and caches airport rows and vectorized fields.
200
179
  Expects CSV columns: icao,iata,name,city,subd,country,elevation,lat,lon,tz,lid
180
+
181
+ Returns an immutable mapping with:
182
+ - rows: tuple[dict[str, Any]] (each row dict should be treated as read-only)
183
+ - lat_rad, lon_rad: np.ndarray (float64, write-protected)
184
+ - keys: tuple[str] (normalized text used for fuzzy matching)
185
+ - codes: tuple[tuple[str, str]] (iata, icao)
186
+ - has_coords: np.ndarray (bool, write-protected)
187
+ - idx_iata: Mapping[str, int] (UPPERCASE IATA -> row index)
188
+ - idx_icao: Mapping[str, int] (UPPERCASE ICAO -> row index)
201
189
  """
202
- rows: list[dict[str, Any]] = []
203
- lat_rad: list[float] = []
204
- lon_rad: list[float] = []
205
- keys: list[str] = [] # normalized text used for fuzzy matching
206
- codes: list[tuple[str, str]] = [] # (iata, icao)
207
- has_coords: list[bool] = []
190
+ rows_mut: List[Dict[str, Any]] = []
191
+ lat_rad_mut: List[float] = []
192
+ lon_rad_mut: List[float] = []
193
+ keys_mut: List[str] = []
194
+ codes_mut: List[Tuple[str, str]] = []
195
+ has_coords_mut: List[bool] = []
196
+ idx_iata_mut: Dict[str, int] = {}
197
+ idx_icao_mut: Dict[str, int] = {}
208
198
 
209
199
  with open(CSV_PATH, newline="", encoding="utf-8") as f:
210
200
  reader = csv.DictReader(f)
@@ -214,15 +204,12 @@ def _load_data() -> dict[str, Any]:
214
204
  iata = (r.get("iata") or "").strip() or None
215
205
  icao = (r.get("icao") or "").strip() or None
216
206
 
217
- # Robust coords: keep NaN if missing/invalid
218
- lat_s = cast(Optional[str], r.get("lat"))
219
- lon_s = cast(Optional[str], r.get("lon"))
220
- lat = _parse_coord(lat_s)
221
- lon = _parse_coord(lon_s)
222
-
207
+ lat = _parse_coord(cast(Optional[str], r.get("lat")))
208
+ lon = _parse_coord(cast(Optional[str], r.get("lon")))
223
209
  valid = not (math.isnan(lat) or math.isnan(lon))
224
210
 
225
- rows.append(
211
+ idx = len(rows_mut)
212
+ rows_mut.append(
226
213
  {
227
214
  "name": name,
228
215
  "city": city,
@@ -232,24 +219,110 @@ def _load_data() -> dict[str, Any]:
232
219
  "lon": lon,
233
220
  }
234
221
  )
235
- lat_rad.append(math.radians(lat) if valid else float("nan"))
236
- lon_rad.append(math.radians(lon) if valid else float("nan"))
237
- has_coords.append(valid)
222
+
223
+ # radians() propagates NaN; no conditional needed
224
+ lat_rad_mut.append(math.radians(lat))
225
+ lon_rad_mut.append(math.radians(lon))
226
+ has_coords_mut.append(valid)
238
227
 
239
228
  code_bits = (
240
229
  " ".join([c for c in (iata, icao) if c]) if (iata or icao) else ""
241
230
  )
242
- keys.append(_norm(f"{name} {city} {code_bits}"))
243
- codes.append((iata or "", icao or ""))
231
+ keys_mut.append(_norm(f"{name} {city} {code_bits}"))
232
+ codes_mut.append((iata or "", icao or ""))
233
+
234
+ # Build dual indexes (first occurrence wins)
235
+ if iata:
236
+ iu = iata.upper()
237
+ if iu not in idx_iata_mut:
238
+ idx_iata_mut[iu] = idx
239
+ if icao:
240
+ iu = icao.upper()
241
+ if iu not in idx_icao_mut:
242
+ idx_icao_mut[iu] = idx
243
+
244
+ # Freeze everything
245
+ rows = tuple(rows_mut)
246
+ lat_rad = _frozen_np_float(lat_rad_mut)
247
+ lon_rad = _frozen_np_float(lon_rad_mut)
248
+ keys = tuple(keys_mut)
249
+ codes = tuple(codes_mut)
250
+ has_coords = _frozen_np_bool(has_coords_mut)
251
+ idx_iata = MappingProxyType(dict(idx_iata_mut)) # proxy ensures read-only
252
+ idx_icao = MappingProxyType(dict(idx_icao_mut))
253
+
254
+ # Return a read-only top-level mapping
255
+ return MappingProxyType(
256
+ {
257
+ "rows": rows,
258
+ "lat_rad": lat_rad,
259
+ "lon_rad": lon_rad,
260
+ "keys": keys,
261
+ "codes": codes,
262
+ "has_coords": has_coords,
263
+ "idx_iata": idx_iata,
264
+ "idx_icao": idx_icao,
265
+ }
266
+ )
267
+
268
+
269
+ # ---------- Convenience lookups (O(1) via dual indexes) ----------
270
+ def get_row_by_iata(code: str) -> Optional[dict[str, Any]]:
271
+ """Return the airport row for an IATA code, or None if not found."""
272
+ if not code:
273
+ return None
274
+ data = _load_data()
275
+ idx = data["idx_iata"].get(code.upper())
276
+ return data["rows"][idx] if idx is not None else None
277
+
278
+
279
+ def get_row_by_icao(code: str) -> Optional[dict[str, Any]]:
280
+ """Return the airport row for an ICAO code, or None if not found."""
281
+ if not code:
282
+ return None
283
+ data = _load_data()
284
+ idx = data["idx_icao"].get(code.upper())
285
+ return data["rows"][idx] if idx is not None else None
286
+
287
+
288
+ def _try_direct_code_lookup(query: str) -> Optional[ResolvedAirport]:
289
+ """
290
+ Try to resolve the query as a direct IATA or ICAO code match.
291
+ Returns ResolvedAirport with high confidence if found, None otherwise.
292
+ """
293
+ if not query:
294
+ return None
295
+
296
+ # Clean and normalize the query for code matching
297
+ code = query.strip().upper()
298
+ if not code:
299
+ return None
300
+
301
+ # Try IATA first (3 characters)
302
+ if len(code) == 3:
303
+ row = get_row_by_iata(code)
304
+ if row:
305
+ return ResolvedAirport(
306
+ name=row["name"],
307
+ city=row["city"],
308
+ iata_code=row["iata"],
309
+ icao_code=row["icao"],
310
+ confidence=0.95, # High confidence for exact code matches
311
+ )
312
+
313
+ # Try ICAO (4 characters)
314
+ elif len(code) == 4:
315
+ row = get_row_by_icao(code)
316
+ if row:
317
+ return ResolvedAirport(
318
+ name=row["name"],
319
+ city=row["city"],
320
+ iata_code=row["iata"],
321
+ icao_code=row["icao"],
322
+ confidence=0.95, # High confidence for exact code matches
323
+ )
244
324
 
245
- return {
246
- "rows": rows,
247
- "lat_rad": np.array(lat_rad, dtype=float),
248
- "lon_rad": np.array(lon_rad, dtype=float),
249
- "keys": np.array(keys, dtype=object),
250
- "codes": codes,
251
- "has_coords": np.array(has_coords, dtype=bool),
252
- }
325
+ return None
253
326
 
254
327
 
255
328
  # ---------- Main ----------
@@ -274,6 +347,11 @@ def resolve_airport(
274
347
  The list of resolved airports ordered by confidence.
275
348
  """
276
349
 
350
+ # First, try direct IATA/ICAO code lookup for exact matches
351
+ direct_match = _try_direct_code_lookup(query)
352
+ if direct_match is not None:
353
+ return [direct_match]
354
+
277
355
  data = _load_data()
278
356
  rows: list[dict[str, Any]] = data["rows"]
279
357
  n = len(rows)
@@ -298,61 +376,12 @@ def resolve_airport(
298
376
  prox = 100.0 * np.exp(-min_dist / float(DIST_KM_SCALE))
299
377
 
300
378
  # ---- Text score: best across augmented queries ----
301
- hints, iata_from_places, icao_from_places = _place_hints_and_codes(places_response)
379
+ hints = _place_hints(places_response)
302
380
  q_variants = [_norm(query)] + [_norm(f"{query} {h}") for h in hints]
303
381
  # Single cdist call over up to 1+5 variants keeps things fast
304
382
  scores_matrix = process.cdist(q_variants, data["keys"], scorer=fuzz.token_set_ratio)
305
383
  text_scores = np.array(scores_matrix.max(axis=0), dtype=float)
306
384
 
307
- # ---- Code bonuses ----
308
- # 1) If the *user* typed a code, stronger bonus
309
- iata_q, icao_q = _looks_like_code(query)
310
- if iata_q or icao_q:
311
- if iata_q:
312
- text_scores += (
313
- np.fromiter(
314
- ((1.0 if iata_q == iata else 0.0) for iata, _ in data["codes"]),
315
- float,
316
- count=n,
317
- )
318
- * CODE_BONUS_QUERY
319
- )
320
- if icao_q:
321
- text_scores += (
322
- np.fromiter(
323
- ((1.0 if icao_q == icao else 0.0) for _, icao in data["codes"]),
324
- float,
325
- count=n,
326
- )
327
- * CODE_BONUS_QUERY
328
- )
329
-
330
- # 2) If Places hints include codes (e.g., “JFK Terminal 4”), soft bonus
331
- if iata_from_places:
332
- text_scores += (
333
- np.fromiter(
334
- (
335
- (1.0 if (iata in iata_from_places) else 0.0)
336
- for iata, _ in data["codes"]
337
- ),
338
- float,
339
- count=n,
340
- )
341
- * CODE_BONUS_PLACES
342
- )
343
- if icao_from_places:
344
- text_scores += (
345
- np.fromiter(
346
- (
347
- (1.0 if (icao in icao_from_places) else 0.0)
348
- for _, icao in data["codes"]
349
- ),
350
- float,
351
- count=n,
352
- )
353
- * CODE_BONUS_PLACES
354
- )
355
-
356
385
  # Cap to 0..100
357
386
  text_scores = np.clip(text_scores, 0.0, 100.0)
358
387
 
@@ -4,11 +4,22 @@ Transport abstractions for Google Places clients.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- from typing import Any, Optional, Protocol
7
+ from typing import Any, Optional, Protocol, Type, TypeVar
8
8
 
9
9
  from google.api_core.client_options import ClientOptions
10
10
  from google.maps.places_v1 import PlacesAsyncClient, PlacesClient
11
11
 
12
+ T = TypeVar("T", PlacesClient, PlacesAsyncClient)
13
+
14
+
15
+ def _get_places_client(
16
+ api_key: str,
17
+ client: Optional[T],
18
+ client_type: Type[T],
19
+ ) -> T:
20
+ """Create client options with API key - shared logic."""
21
+ return client or client_type(client_options=ClientOptions(api_key=api_key))
22
+
12
23
 
13
24
  class SyncPlacesTransport(Protocol):
14
25
  """Protocol for synchronous Places API transport."""
@@ -48,9 +59,7 @@ class GoogleSyncTransport:
48
59
  """Synchronous transport implementation for Google Places API."""
49
60
 
50
61
  def __init__(self, api_key: str, client: Optional[PlacesClient] = None) -> None:
51
- self.client = client or PlacesClient(
52
- client_options=ClientOptions(api_key=api_key)
53
- )
62
+ self.client = _get_places_client(api_key, client, PlacesClient)
54
63
 
55
64
  def autocomplete_places(self, *, request: dict[str, Any], **kwargs: Any) -> Any:
56
65
  return self.client.autocomplete_places(request=request, **kwargs)
@@ -75,9 +84,7 @@ class GoogleAsyncTransport:
75
84
  def __init__(
76
85
  self, api_key: str, client: Optional[PlacesAsyncClient] = None
77
86
  ) -> None:
78
- self.client = client or PlacesAsyncClient(
79
- client_options=ClientOptions(api_key=api_key)
80
- )
87
+ self.client = _get_places_client(api_key, client, PlacesAsyncClient)
81
88
 
82
89
  async def autocomplete_places(
83
90
  self, *, request: dict[str, Any], **kwargs: Any
bookalimo/logging.py CHANGED
@@ -21,6 +21,7 @@ from collections.abc import Awaitable, Iterable, Mapping
21
21
  from functools import wraps
22
22
  from time import perf_counter
23
23
  from typing import Any, Callable, TypeVar
24
+ from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
24
25
 
25
26
  from typing_extensions import ParamSpec
26
27
 
@@ -65,6 +66,87 @@ else:
65
66
 
66
67
  REDACTED = "******"
67
68
 
69
+ # Sensitive query parameter names (case-insensitive)
70
+ SENSITIVE_QUERY_PARAMS = {
71
+ "token",
72
+ "access_token",
73
+ "refresh_token",
74
+ "api_key",
75
+ "apikey",
76
+ "key",
77
+ "password",
78
+ "pass",
79
+ "pwd",
80
+ "secret",
81
+ "auth",
82
+ "authorization",
83
+ "code",
84
+ "auth_code",
85
+ "verification_code",
86
+ "otp",
87
+ "session",
88
+ "session_id",
89
+ "sid",
90
+ "csrf_token",
91
+ "xsrf_token",
92
+ "signature",
93
+ "sig",
94
+ "hash",
95
+ "nonce",
96
+ "state",
97
+ }
98
+
99
+
100
+ def redact_url(
101
+ url: str, *, replacement: str = REDACTED, sensitive_params: set[str] | None = None
102
+ ) -> str:
103
+ """
104
+ Redact sensitive query parameters from a URL.
105
+
106
+ Args:
107
+ url: The URL to redact
108
+ replacement: The replacement string for sensitive values
109
+ sensitive_params: Set of parameter names to redact (case-insensitive)
110
+ Defaults to SENSITIVE_QUERY_PARAMS
111
+
112
+ Returns:
113
+ The URL with sensitive query parameters redacted
114
+
115
+ Example:
116
+ >>> redact_url("https://api.example.com/auth?token=secret123&user=john")
117
+ "https://api.example.com/auth?token=******&user=john"
118
+ """
119
+ if not isinstance(url, str) or not url:
120
+ return _safe_str(url)
121
+
122
+ try:
123
+ parts = urlsplit(url)
124
+ if not parts.query:
125
+ return url
126
+
127
+ sensitive = sensitive_params or SENSITIVE_QUERY_PARAMS
128
+ sensitive_lower = {name.lower() for name in sensitive}
129
+
130
+ # Parse and redact query parameters
131
+ pairs = parse_qsl(parts.query, keep_blank_values=True)
132
+ redacted_pairs = []
133
+
134
+ for key, value in pairs:
135
+ if key.lower() in sensitive_lower:
136
+ redacted_pairs.append((key, replacement))
137
+ else:
138
+ redacted_pairs.append((key, value))
139
+
140
+ # Reconstruct URL with redacted query
141
+ redacted_query = urlencode(redacted_pairs, doseq=True)
142
+ return urlunsplit(
143
+ (parts.scheme, parts.netloc, parts.path, redacted_query, parts.fragment)
144
+ )
145
+
146
+ except Exception:
147
+ # If URL parsing fails, return a safe representation
148
+ return _safe_str(url)
149
+
68
150
 
69
151
  def mask_token(s: Any, *, show_prefix: int = 6, show_suffix: int = 2) -> str:
70
152
  if not isinstance(s, str) or not s:
@@ -179,6 +261,27 @@ def get_logger(name: str | None = None) -> logging.Logger:
179
261
  return logger
180
262
 
181
263
 
264
+ def configure_httpx_logging() -> None:
265
+ """
266
+ Configure httpx and httpcore loggers to prevent exposure of sensitive query parameters.
267
+
268
+ This is called automatically by the transport classes when debug logging is enabled.
269
+ It raises the log level of httpx/httpcore to WARNING to prevent their built-in
270
+ request/response logs from exposing URLs with sensitive query parameters.
271
+ """
272
+ # Silence httpx's built-in request/response logs that might contain sensitive URLs
273
+ httpx_logger = logging.getLogger("httpx")
274
+ httpcore_logger = logging.getLogger("httpcore.http11")
275
+
276
+ # If our logger is at DEBUG level, silence httpx to prevent duplicate/unredacted logs
277
+ if logger.isEnabledFor(logging.DEBUG):
278
+ if httpx_logger.level < logging.WARNING:
279
+ httpx_logger.setLevel(logging.WARNING)
280
+ # Keep httpcore at INFO level for connection details (no URLs)
281
+ if httpcore_logger.level < logging.INFO:
282
+ httpcore_logger.setLevel(logging.INFO)
283
+
284
+
182
285
  # ---- decorator for async methods --------------------------------------------
183
286
 
184
287