affinity-sdk 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. affinity/__init__.py +139 -0
  2. affinity/cli/__init__.py +7 -0
  3. affinity/cli/click_compat.py +27 -0
  4. affinity/cli/commands/__init__.py +1 -0
  5. affinity/cli/commands/_entity_files_dump.py +219 -0
  6. affinity/cli/commands/_list_entry_fields.py +41 -0
  7. affinity/cli/commands/_v1_parsing.py +77 -0
  8. affinity/cli/commands/company_cmds.py +2139 -0
  9. affinity/cli/commands/completion_cmd.py +33 -0
  10. affinity/cli/commands/config_cmds.py +540 -0
  11. affinity/cli/commands/entry_cmds.py +33 -0
  12. affinity/cli/commands/field_cmds.py +413 -0
  13. affinity/cli/commands/interaction_cmds.py +875 -0
  14. affinity/cli/commands/list_cmds.py +3152 -0
  15. affinity/cli/commands/note_cmds.py +433 -0
  16. affinity/cli/commands/opportunity_cmds.py +1174 -0
  17. affinity/cli/commands/person_cmds.py +1980 -0
  18. affinity/cli/commands/query_cmd.py +444 -0
  19. affinity/cli/commands/relationship_strength_cmds.py +62 -0
  20. affinity/cli/commands/reminder_cmds.py +595 -0
  21. affinity/cli/commands/resolve_url_cmd.py +127 -0
  22. affinity/cli/commands/session_cmds.py +84 -0
  23. affinity/cli/commands/task_cmds.py +110 -0
  24. affinity/cli/commands/version_cmd.py +29 -0
  25. affinity/cli/commands/whoami_cmd.py +36 -0
  26. affinity/cli/config.py +108 -0
  27. affinity/cli/context.py +749 -0
  28. affinity/cli/csv_utils.py +195 -0
  29. affinity/cli/date_utils.py +42 -0
  30. affinity/cli/decorators.py +77 -0
  31. affinity/cli/errors.py +28 -0
  32. affinity/cli/field_utils.py +355 -0
  33. affinity/cli/formatters.py +551 -0
  34. affinity/cli/help_json.py +283 -0
  35. affinity/cli/logging.py +100 -0
  36. affinity/cli/main.py +261 -0
  37. affinity/cli/options.py +53 -0
  38. affinity/cli/paths.py +32 -0
  39. affinity/cli/progress.py +183 -0
  40. affinity/cli/query/__init__.py +163 -0
  41. affinity/cli/query/aggregates.py +357 -0
  42. affinity/cli/query/dates.py +194 -0
  43. affinity/cli/query/exceptions.py +147 -0
  44. affinity/cli/query/executor.py +1236 -0
  45. affinity/cli/query/filters.py +248 -0
  46. affinity/cli/query/models.py +333 -0
  47. affinity/cli/query/output.py +331 -0
  48. affinity/cli/query/parser.py +619 -0
  49. affinity/cli/query/planner.py +430 -0
  50. affinity/cli/query/progress.py +270 -0
  51. affinity/cli/query/schema.py +439 -0
  52. affinity/cli/render.py +1589 -0
  53. affinity/cli/resolve.py +222 -0
  54. affinity/cli/resolvers.py +249 -0
  55. affinity/cli/results.py +308 -0
  56. affinity/cli/runner.py +218 -0
  57. affinity/cli/serialization.py +65 -0
  58. affinity/cli/session_cache.py +276 -0
  59. affinity/cli/types.py +70 -0
  60. affinity/client.py +771 -0
  61. affinity/clients/__init__.py +19 -0
  62. affinity/clients/http.py +3664 -0
  63. affinity/clients/pipeline.py +165 -0
  64. affinity/compare.py +501 -0
  65. affinity/downloads.py +114 -0
  66. affinity/exceptions.py +615 -0
  67. affinity/filters.py +1128 -0
  68. affinity/hooks.py +198 -0
  69. affinity/inbound_webhooks.py +302 -0
  70. affinity/models/__init__.py +163 -0
  71. affinity/models/entities.py +798 -0
  72. affinity/models/pagination.py +513 -0
  73. affinity/models/rate_limit_snapshot.py +48 -0
  74. affinity/models/secondary.py +413 -0
  75. affinity/models/types.py +663 -0
  76. affinity/policies.py +40 -0
  77. affinity/progress.py +22 -0
  78. affinity/py.typed +0 -0
  79. affinity/services/__init__.py +42 -0
  80. affinity/services/companies.py +1286 -0
  81. affinity/services/lists.py +1892 -0
  82. affinity/services/opportunities.py +1330 -0
  83. affinity/services/persons.py +1348 -0
  84. affinity/services/rate_limits.py +173 -0
  85. affinity/services/tasks.py +193 -0
  86. affinity/services/v1_only.py +2445 -0
  87. affinity/types.py +83 -0
  88. affinity_sdk-0.9.5.dist-info/METADATA +622 -0
  89. affinity_sdk-0.9.5.dist-info/RECORD +92 -0
  90. affinity_sdk-0.9.5.dist-info/WHEEL +4 -0
  91. affinity_sdk-0.9.5.dist-info/entry_points.txt +2 -0
  92. affinity_sdk-0.9.5.dist-info/licenses/LICENSE +21 -0
affinity/filters.py ADDED
@@ -0,0 +1,1128 @@
1
+ """
2
+ Filter builder for V2 API filtering support.
3
+
4
+ Provides a type-safe, Pythonic way to build filter expressions for V2 list endpoints.
5
+ The builder handles proper escaping and quoting of user inputs.
6
+
7
+ Example:
8
+ from affinity.filters import Filter, F
9
+
10
+ # Using the builder (recommended)
11
+ filter = (
12
+ F.field("name").contains("Acme") &
13
+ F.field("status").equals("Active")
14
+ )
15
+ companies = client.companies.list(filter=filter)
16
+
17
+ # Or build complex filters
18
+ filter = (
19
+ (F.field("name").contains("Corp") | F.field("name").contains("Inc")) &
20
+ ~F.field("archived").equals(True)
21
+ )
22
+
23
+ # Raw filter string escape hatch (power users)
24
+ companies = client.companies.list(filter='name =~ "Acme"')
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ from abc import ABC, abstractmethod
30
+ from dataclasses import dataclass
31
+ from datetime import date, datetime
32
+ from enum import Enum, auto
33
+ from typing import Any, ClassVar
34
+
35
+ from affinity.compare import compare_values, map_operator, normalize_value
36
+
37
+
38
+ @dataclass(frozen=True)
39
+ class RawToken:
40
+ """
41
+ A raw token inserted into a filter expression without quoting.
42
+
43
+ Used for special Affinity Filtering Language literals like `*`.
44
+ """
45
+
46
+ token: str
47
+
48
+
49
+ def _escape_string(value: str) -> str:
50
+ """
51
+ Escape a string value for use in a filter expression.
52
+
53
+ Handles:
54
+ - Backslashes (must be doubled)
55
+ - Double quotes (must be escaped)
56
+ - Newlines and tabs (escaped as literals)
57
+ - NUL bytes (removed)
58
+ """
59
+ # Order matters: escape backslashes first
60
+ result = value.replace("\\", "\\\\")
61
+ result = result.replace('"', '\\"')
62
+ result = result.replace("\x00", "")
63
+ result = result.replace("\n", "\\n")
64
+ result = result.replace("\t", "\\t")
65
+ result = result.replace("\r", "\\r")
66
+ return result
67
+
68
+
69
+ def _format_value(value: Any) -> str:
70
+ """Format a Python value for use in a filter expression."""
71
+ if isinstance(value, RawToken):
72
+ return value.token
73
+ if value is None:
74
+ raise ValueError("None is not a valid filter literal; use is_null()/is_not_null().")
75
+ if isinstance(value, bool):
76
+ return "true" if value else "false"
77
+ if isinstance(value, (int, float)):
78
+ return str(value)
79
+ # Handle datetime before date (datetime is subclass of date)
80
+ if isinstance(value, datetime):
81
+ return f'"{value.isoformat()}"'
82
+ if isinstance(value, date):
83
+ return f'"{value.isoformat()}"'
84
+ # String and fallback
85
+ text = value if isinstance(value, str) else str(value)
86
+ return f'"{_escape_string(text)}"'
87
+
88
+
89
+ def _get_entity_value(entity: dict[str, Any], field_name: str) -> Any:
90
+ """
91
+ Get a field value from an entity dict with fallback normalization.
92
+
93
+ Tries multiple key formats to handle field name variations:
94
+ 1. Exact field name as provided
95
+ 2. Lowercase version
96
+ 3. With entity type prefix (person., company., opportunity.)
97
+ """
98
+ value = entity.get(field_name)
99
+ if value is None:
100
+ value = entity.get(field_name.lower())
101
+ if value is None:
102
+ for prefix in ["person.", "company.", "opportunity."]:
103
+ value = entity.get(f"{prefix}{field_name}")
104
+ if value is not None:
105
+ break
106
+ return value
107
+
108
+
109
+ class FilterExpression(ABC):
110
+ """Base class for filter expressions."""
111
+
112
+ @abstractmethod
113
+ def to_string(self) -> str:
114
+ """Convert the expression to a filter string."""
115
+ ...
116
+
117
+ @abstractmethod
118
+ def matches(self, entity: dict[str, Any]) -> bool:
119
+ """
120
+ Evaluate filter against an entity dict (client-side).
121
+
122
+ Used for --expand-filter in list export where filtering happens
123
+ after fetching data from the API.
124
+ """
125
+ ...
126
+
127
+ def __and__(self, other: FilterExpression) -> FilterExpression:
128
+ """Combine two expressions with `&`."""
129
+ return AndExpression(self, other)
130
+
131
+ def __or__(self, other: FilterExpression) -> FilterExpression:
132
+ """Combine two expressions with `|`."""
133
+ return OrExpression(self, other)
134
+
135
+ def __invert__(self) -> FilterExpression:
136
+ """Negate the expression with `!`."""
137
+ return NotExpression(self)
138
+
139
+ def __str__(self) -> str:
140
+ return self.to_string()
141
+
142
+ def __repr__(self) -> str:
143
+ return f"Filter({self.to_string()!r})"
144
+
145
+
146
+ @dataclass
147
+ class FieldComparison(FilterExpression):
148
+ """A comparison operation on a field."""
149
+
150
+ field_name: str
151
+ operator: str
152
+ value: Any
153
+
154
+ def to_string(self) -> str:
155
+ formatted_value = _format_value(self.value)
156
+ return f"{self.field_name} {self.operator} {formatted_value}"
157
+
158
+ def matches(self, entity: dict[str, Any]) -> bool:
159
+ """Evaluate field comparison against an entity dict.
160
+
161
+ For multi-select dropdown fields (arrays), the operators have special semantics:
162
+ - `=` with scalar: checks if value is IN the array (membership)
163
+ - `=` with list: checks set equality (order-insensitive)
164
+ - `!=` with scalar: checks if value is NOT in the array
165
+ - `!=` with list: checks set inequality
166
+ - `=~` (contains): checks if any array element contains the substring
167
+ - `=^` (starts_with): checks if any array element starts with the prefix
168
+ - `=$` (ends_with): checks if any array element ends with the suffix
169
+ - `>`, `>=`, `<`, `<=`: numeric/date comparisons
170
+
171
+ Uses the shared compare module for consistent behavior across SDK and Query tool.
172
+ """
173
+ field_value = _get_entity_value(entity, self.field_name)
174
+
175
+ # Normalize dropdown dicts and multi-select arrays
176
+ field_value = normalize_value(field_value)
177
+
178
+ # Handle NULL checks (Affinity convention: =* means NOT NULL, !=* means IS NULL)
179
+ if isinstance(self.value, RawToken) and self.value.token == "*":
180
+ if self.operator == "=":
181
+ return compare_values(field_value, None, "is_not_null")
182
+ elif self.operator == "!=":
183
+ return compare_values(field_value, None, "is_null")
184
+
185
+ # Extract target value
186
+ target = self.value if not isinstance(self.value, RawToken) else self.value.token
187
+
188
+ # Map SDK operator symbol to canonical operator name
189
+ try:
190
+ canonical_op = map_operator(self.operator)
191
+ except ValueError:
192
+ raise ValueError(
193
+ f"Unsupported operator '{self.operator}' for client-side matching. "
194
+ f"Supported operators: =, !=, =~, =^, =$, >, >=, <, <=, "
195
+ f"contains, starts_with, ends_with, gt, gte, lt, lte, "
196
+ f"is null, is not null, is empty, "
197
+ f"in, between, has_any, has_all, contains_any, contains_all"
198
+ ) from None
199
+
200
+ return compare_values(field_value, target, canonical_op)
201
+
202
+
203
+ @dataclass
204
+ class RawFilter(FilterExpression):
205
+ """A raw filter string (escape hatch for power users)."""
206
+
207
+ expression: str
208
+
209
+ def to_string(self) -> str:
210
+ return self.expression
211
+
212
+ def matches(self, entity: dict[str, Any]) -> bool:
213
+ """RawFilter cannot be evaluated client-side."""
214
+ raise NotImplementedError(
215
+ "RawFilter cannot be evaluated client-side. "
216
+ "Use structured filter expressions for --expand-filter."
217
+ )
218
+
219
+
220
+ @dataclass
221
+ class AndExpression(FilterExpression):
222
+ """`&` combination of two expressions."""
223
+
224
+ left: FilterExpression
225
+ right: FilterExpression
226
+
227
+ def to_string(self) -> str:
228
+ left_str = self.left.to_string()
229
+ right_str = self.right.to_string()
230
+ # Wrap in parentheses for correct precedence
231
+ return f"({left_str}) & ({right_str})"
232
+
233
+ def matches(self, entity: dict[str, Any]) -> bool:
234
+ """Both sides must match."""
235
+ return self.left.matches(entity) and self.right.matches(entity)
236
+
237
+
238
+ @dataclass
239
+ class OrExpression(FilterExpression):
240
+ """`|` combination of two expressions."""
241
+
242
+ left: FilterExpression
243
+ right: FilterExpression
244
+
245
+ def to_string(self) -> str:
246
+ left_str = self.left.to_string()
247
+ right_str = self.right.to_string()
248
+ return f"({left_str}) | ({right_str})"
249
+
250
+ def matches(self, entity: dict[str, Any]) -> bool:
251
+ """Either side must match."""
252
+ return self.left.matches(entity) or self.right.matches(entity)
253
+
254
+
255
+ @dataclass
256
+ class NotExpression(FilterExpression):
257
+ """`!` negation of an expression."""
258
+
259
+ expr: FilterExpression
260
+
261
+ def to_string(self) -> str:
262
+ return f"!({self.expr.to_string()})"
263
+
264
+ def matches(self, entity: dict[str, Any]) -> bool:
265
+ """Invert the inner expression."""
266
+ return not self.expr.matches(entity)
267
+
268
+
269
+ class FieldBuilder:
270
+ """Builder for field-based filter expressions."""
271
+
272
+ def __init__(self, field_name: str):
273
+ self._field_name = field_name
274
+
275
+ def equals(self, value: Any) -> FieldComparison:
276
+ """Field equals value (exact match)."""
277
+ return FieldComparison(self._field_name, "=", value)
278
+
279
+ def not_equals(self, value: Any) -> FieldComparison:
280
+ """Field does not equal value."""
281
+ return FieldComparison(self._field_name, "!=", value)
282
+
283
+ def contains(self, value: str) -> FieldComparison:
284
+ """Field contains substring (case-insensitive)."""
285
+ return FieldComparison(self._field_name, "=~", value)
286
+
287
+ def starts_with(self, value: str) -> FieldComparison:
288
+ """Field starts with prefix."""
289
+ return FieldComparison(self._field_name, "=^", value)
290
+
291
+ def ends_with(self, value: str) -> FieldComparison:
292
+ """Field ends with suffix."""
293
+ return FieldComparison(self._field_name, "=$", value)
294
+
295
+ def greater_than(self, value: int | float | datetime | date) -> FieldComparison:
296
+ """Field is greater than value."""
297
+ return FieldComparison(self._field_name, ">", value)
298
+
299
+ def greater_than_or_equal(self, value: int | float | datetime | date) -> FieldComparison:
300
+ """Field is greater than or equal to value."""
301
+ return FieldComparison(self._field_name, ">=", value)
302
+
303
+ def less_than(self, value: int | float | datetime | date) -> FieldComparison:
304
+ """Field is less than value."""
305
+ return FieldComparison(self._field_name, "<", value)
306
+
307
+ def less_than_or_equal(self, value: int | float | datetime | date) -> FieldComparison:
308
+ """Field is less than or equal to value."""
309
+ return FieldComparison(self._field_name, "<=", value)
310
+
311
+ def is_null(self) -> FieldComparison:
312
+ """Field is null."""
313
+ return FieldComparison(self._field_name, "!=", RawToken("*"))
314
+
315
+ def is_not_null(self) -> FieldComparison:
316
+ """Field is not null."""
317
+ return FieldComparison(self._field_name, "=", RawToken("*"))
318
+
319
+ def in_list(self, values: list[Any]) -> FilterExpression:
320
+ """Field value is in the given list (OR of equals)."""
321
+ if not values:
322
+ raise ValueError("in_list() requires at least one value")
323
+ expressions: list[FilterExpression] = [self.equals(v) for v in values]
324
+ result: FilterExpression = expressions[0]
325
+ for expr in expressions[1:]:
326
+ result = result | expr
327
+ return result
328
+
329
+
330
+ class Filter:
331
+ """
332
+ Factory for building filter expressions.
333
+
334
+ Example:
335
+ # Simple comparison
336
+ Filter.field("name").contains("Acme")
337
+
338
+ # Complex boolean logic
339
+ (Filter.field("status").equals("Active") &
340
+ Filter.field("type").in_list(["customer", "prospect"]))
341
+
342
+ # Negation
343
+ ~Filter.field("archived").equals(True)
344
+ """
345
+
346
+ @staticmethod
347
+ def field(name: str) -> FieldBuilder:
348
+ """Start building a filter on a field."""
349
+ return FieldBuilder(name)
350
+
351
+ @staticmethod
352
+ def raw(expression: str) -> RawFilter:
353
+ """
354
+ Create a raw filter expression (escape hatch).
355
+
356
+ Use this when you need filter syntax not supported by the builder.
357
+ The expression is passed directly to the API without modification.
358
+
359
+ Args:
360
+ expression: Raw filter string (e.g., 'name =~ "Acme"')
361
+ """
362
+ return RawFilter(expression)
363
+
364
+ @staticmethod
365
+ def and_(*expressions: FilterExpression) -> FilterExpression:
366
+ """Combine multiple expressions with `&`."""
367
+ if not expressions:
368
+ raise ValueError("and_() requires at least one expression")
369
+ result = expressions[0]
370
+ for expr in expressions[1:]:
371
+ result = result & expr
372
+ return result
373
+
374
+ @staticmethod
375
+ def or_(*expressions: FilterExpression) -> FilterExpression:
376
+ """Combine multiple expressions with `|`."""
377
+ if not expressions:
378
+ raise ValueError("or_() requires at least one expression")
379
+ result = expressions[0]
380
+ for expr in expressions[1:]:
381
+ result = result | expr
382
+ return result
383
+
384
+
385
+ # Shorthand alias for convenience
386
+ F = Filter
387
+
388
+
389
+ # =============================================================================
390
+ # Filter String Parser
391
+ # =============================================================================
392
+
393
+
394
+ class _TokenType(Enum):
395
+ """Token types for the filter parser."""
396
+
397
+ FIELD = auto() # Field name (quoted or unquoted)
398
+ OPERATOR = auto() # =, !=, =~
399
+ VALUE = auto() # Value (quoted, unquoted, or *)
400
+ AND = auto() # &
401
+ OR = auto() # |
402
+ NOT = auto() # !
403
+ LPAREN = auto() # (
404
+ RPAREN = auto() # )
405
+ EOF = auto() # End of input
406
+
407
+
408
+ @dataclass
409
+ class _Token:
410
+ """A token from the filter string."""
411
+
412
+ type: _TokenType
413
+ value: str | list[str] # str for most tokens, list for bracket values
414
+ pos: int # Position in original string for error messages
415
+
416
+
417
+ class _Tokenizer:
418
+ """Tokenizer for filter strings."""
419
+
420
+ # Symbolic operators that can appear after field names
421
+ # IMPORTANT: Multi-character operators MUST come first to avoid partial matches
422
+ # e.g., ">=" must be checked before ">" or it will match as ">" + "="
423
+ OPERATORS: ClassVar[tuple[str, ...]] = (">=", "<=", "!=", "=~", "=^", "=$", ">", "<", "=")
424
+
425
+ # Single-word aliases for operators (SDK extensions for LLM/human clarity)
426
+ WORD_OPERATORS: ClassVar[dict[str, str]] = {
427
+ "contains": "=~",
428
+ "starts_with": "=^",
429
+ "ends_with": "=$",
430
+ "gt": ">",
431
+ "gte": ">=",
432
+ "lt": "<",
433
+ "lte": "<=",
434
+ # Collection operators
435
+ "in": "in",
436
+ "between": "between",
437
+ "has_any": "has_any",
438
+ "has_all": "has_all",
439
+ "contains_any": "contains_any",
440
+ "contains_all": "contains_all",
441
+ }
442
+
443
+ # Multi-word aliases that need lookahead
444
+ # Checked when we see "is" - peek ahead for "null", "not null", "empty"
445
+ # These are stored as (operator_value, canonical_operator_name)
446
+ MULTI_WORD_OPERATORS: ClassVar[dict[str, tuple[str, str, str | None]]] = {
447
+ # "is null" -> "!= *" equivalent (maps to is_null in compare)
448
+ "is null": ("is null", "!=", "*"),
449
+ # "is not null" -> "= *" equivalent (maps to is_not_null in compare)
450
+ "is not null": ("is not null", "=", "*"),
451
+ # "is empty" -> check for empty string or empty array
452
+ "is empty": ("is empty", "is empty", None),
453
+ }
454
+
455
+ def __init__(self, text: str):
456
+ self.text = text
457
+ self.pos = 0
458
+ self.length = len(text)
459
+
460
+ def _skip_whitespace(self) -> None:
461
+ """Skip whitespace characters."""
462
+ while self.pos < self.length and self.text[self.pos] in " \t\n\r":
463
+ self.pos += 1
464
+
465
+ def _read_quoted_string(self) -> str:
466
+ """Read a quoted string, handling escapes."""
467
+ assert self.text[self.pos] == '"'
468
+ start_pos = self.pos
469
+ self.pos += 1 # Skip opening quote
470
+ result: list[str] = []
471
+
472
+ while self.pos < self.length:
473
+ ch = self.text[self.pos]
474
+ if ch == '"':
475
+ self.pos += 1 # Skip closing quote
476
+ return "".join(result)
477
+ elif ch == "\\":
478
+ self.pos += 1
479
+ if self.pos >= self.length:
480
+ raise ValueError(
481
+ f"Unexpected end of string after backslash at position {self.pos}"
482
+ )
483
+ escaped = self.text[self.pos]
484
+ if escaped == "n":
485
+ result.append("\n")
486
+ elif escaped == "t":
487
+ result.append("\t")
488
+ elif escaped == "r":
489
+ result.append("\r")
490
+ elif escaped in ('"', "\\"):
491
+ result.append(escaped)
492
+ else:
493
+ result.append(escaped)
494
+ self.pos += 1
495
+ else:
496
+ result.append(ch)
497
+ self.pos += 1
498
+
499
+ raise ValueError(f"Unterminated quoted string starting at position {start_pos}")
500
+
501
+ def _read_unquoted(self, stop_chars: str) -> str:
502
+ """Read an unquoted token until a stop character or whitespace."""
503
+ start = self.pos
504
+ while self.pos < self.length:
505
+ ch = self.text[self.pos]
506
+ if ch in stop_chars or ch in " \t\n\r":
507
+ break
508
+ self.pos += 1
509
+ return self.text[start : self.pos]
510
+
511
+ def _read_bracket_list(self) -> list[str]:
512
+ """Read a bracket-delimited list: [A, B, C] or ["A B", C].
513
+
514
+ Returns a list of string values.
515
+ Raises ValueError for syntax errors with helpful messages.
516
+ """
517
+ assert self.text[self.pos] == "["
518
+ start_pos = self.pos
519
+ self.pos += 1 # Skip opening bracket
520
+
521
+ items: list[str] = []
522
+ expect_value = True # Start expecting a value
523
+
524
+ while self.pos < self.length:
525
+ self._skip_whitespace()
526
+
527
+ if self.pos >= self.length:
528
+ raise ValueError(
529
+ f"Unclosed bracket at position {start_pos}. "
530
+ f"Hint: Collection syntax requires closing bracket: [A, B]"
531
+ )
532
+
533
+ ch = self.text[self.pos]
534
+
535
+ if ch == "]":
536
+ # Check for trailing comma (expect_value=True after comma means trailing comma)
537
+ if items and expect_value:
538
+ # We just got a comma and now see ]
539
+ raise ValueError(
540
+ f"Unexpected ']' after comma at position {self.pos}. "
541
+ f"Hint: Remove trailing comma: [A, B] not [A, B,]"
542
+ )
543
+ self.pos += 1 # Skip closing bracket
544
+ return items
545
+
546
+ if ch == ",":
547
+ if expect_value:
548
+ raise ValueError(
549
+ f"Unexpected ',' at position {self.pos}. Hint: Expected value before comma"
550
+ )
551
+ self.pos += 1 # Skip comma
552
+ expect_value = True
553
+ continue
554
+
555
+ if not expect_value:
556
+ raise ValueError(f"Expected ',' or ']' at position {self.pos}, got '{ch}'")
557
+
558
+ # Read a value (quoted or unquoted)
559
+ # Unquoted values stop at comma, bracket, or whitespace
560
+ value = self._read_quoted_string() if ch == '"' else self._read_unquoted(",]")
561
+
562
+ if not value:
563
+ raise ValueError(f"Empty value in collection at position {self.pos}")
564
+
565
+ items.append(value)
566
+ expect_value = False
567
+
568
+ raise ValueError(
569
+ f"Unclosed bracket at position {start_pos}. "
570
+ f"Hint: Collection syntax requires closing bracket: [A, B]"
571
+ )
572
+
573
+ def _peek_operator(self) -> str | None:
574
+ """Check if current position starts with a symbolic operator."""
575
+ for op in self.OPERATORS:
576
+ if self.text[self.pos : self.pos + len(op)] == op:
577
+ return op
578
+ return None
579
+
580
+ def _peek_word_operator(self) -> tuple[str, str] | None:
581
+ """Check if the next word(s) form a word-based operator.
582
+
583
+ Returns (alias, canonical_op) if found, None otherwise.
584
+ Does not advance position - just peeks.
585
+ """
586
+ # Save position for potential rollback
587
+ saved_pos = self.pos
588
+ self._skip_whitespace()
589
+
590
+ if self.pos >= self.length:
591
+ self.pos = saved_pos
592
+ return None
593
+
594
+ # Read the next word
595
+ word = self._read_unquoted('=!&|()"')
596
+ word_lower = word.lower()
597
+
598
+ # Check single-word operators
599
+ if word_lower in self.WORD_OPERATORS:
600
+ self.pos = saved_pos
601
+ return (word_lower, self.WORD_OPERATORS[word_lower])
602
+
603
+ # Check multi-word operators starting with "is"
604
+ if word_lower == "is":
605
+ self._skip_whitespace()
606
+ if self.pos < self.length:
607
+ next_word = self._read_unquoted('=!&|()"')
608
+ next_lower = next_word.lower()
609
+
610
+ if next_lower == "null":
611
+ self.pos = saved_pos
612
+ return ("is null", "is null")
613
+ elif next_lower == "not":
614
+ self._skip_whitespace()
615
+ if self.pos < self.length:
616
+ third_word = self._read_unquoted('=!&|()"')
617
+ if third_word.lower() == "null":
618
+ self.pos = saved_pos
619
+ return ("is not null", "is not null")
620
+ elif next_lower == "empty":
621
+ self.pos = saved_pos
622
+ return ("is empty", "is empty")
623
+
624
+ self.pos = saved_pos
625
+ return None
626
+
627
+ def _consume_word_operator(self, alias: str) -> None:
628
+ """Consume a word operator from the input, advancing position."""
629
+ words = alias.split()
630
+ for expected in words:
631
+ self._skip_whitespace()
632
+ word = self._read_unquoted('=!&|()"')
633
+ # Verify (should match since we already peeked)
634
+ assert word.lower() == expected.lower()
635
+
636
+ def tokenize(self) -> list[_Token]:
637
+ """Tokenize the entire filter string."""
638
+ tokens: list[_Token] = []
639
+
640
+ while True:
641
+ self._skip_whitespace()
642
+
643
+ if self.pos >= self.length:
644
+ tokens.append(_Token(_TokenType.EOF, "", self.pos))
645
+ break
646
+
647
+ ch = self.text[self.pos]
648
+ start_pos = self.pos
649
+
650
+ # Single-character tokens
651
+ if ch == "(":
652
+ tokens.append(_Token(_TokenType.LPAREN, "(", start_pos))
653
+ self.pos += 1
654
+ elif ch == ")":
655
+ tokens.append(_Token(_TokenType.RPAREN, ")", start_pos))
656
+ self.pos += 1
657
+ elif ch == "&":
658
+ tokens.append(_Token(_TokenType.AND, "&", start_pos))
659
+ self.pos += 1
660
+ elif ch == "|":
661
+ tokens.append(_Token(_TokenType.OR, "|", start_pos))
662
+ self.pos += 1
663
+ elif ch == "!":
664
+ # Check if it's != operator or standalone NOT
665
+ if self.pos + 1 < self.length and self.text[self.pos + 1] == "=":
666
+ # This is != operator, will be handled as OPERATOR
667
+ op = self._peek_operator()
668
+ if op:
669
+ tokens.append(_Token(_TokenType.OPERATOR, op, start_pos))
670
+ self.pos += len(op)
671
+ else:
672
+ raise ValueError(f"Unexpected character at position {start_pos}")
673
+ else:
674
+ tokens.append(_Token(_TokenType.NOT, "!", start_pos))
675
+ self.pos += 1
676
+ elif ch == '"':
677
+ # Quoted string - could be field name or value depending on context
678
+ value = self._read_quoted_string()
679
+ # Determine token type based on context (what comes next)
680
+ self._skip_whitespace()
681
+ if self.pos < self.length and (self._peek_operator() or self._peek_word_operator()):
682
+ tokens.append(_Token(_TokenType.FIELD, value, start_pos))
683
+ else:
684
+ tokens.append(_Token(_TokenType.VALUE, value, start_pos))
685
+ elif ch == "*":
686
+ # Wildcard value
687
+ tokens.append(_Token(_TokenType.VALUE, "*", start_pos))
688
+ self.pos += 1
689
+ elif ch == "[":
690
+ # Bracket list value: [A, B, C]
691
+ items = self._read_bracket_list()
692
+ tokens.append(_Token(_TokenType.VALUE, items, start_pos))
693
+ else:
694
+ # Check for symbolic operator first
695
+ op = self._peek_operator()
696
+ if op:
697
+ tokens.append(_Token(_TokenType.OPERATOR, op, start_pos))
698
+ self.pos += len(op)
699
+ else:
700
+ # Unquoted field name, value, or word operator
701
+ # Read until operator, boolean, paren, or whitespace
702
+ value = self._read_unquoted('=!&|()"')
703
+ if not value:
704
+ raise ValueError(f"Unexpected character '{ch}' at position {start_pos}")
705
+
706
+ value_lower = value.lower()
707
+
708
+ # Check if this is a word operator
709
+ if value_lower in self.WORD_OPERATORS:
710
+ # Emit as OPERATOR with the canonical symbol
711
+ tokens.append(
712
+ _Token(_TokenType.OPERATOR, self.WORD_OPERATORS[value_lower], start_pos)
713
+ )
714
+ elif value_lower == "is":
715
+ # Check for multi-word operator: "is null", "is not null", "is empty"
716
+ saved_pos = self.pos
717
+ self._skip_whitespace()
718
+ if self.pos < self.length:
719
+ next_word = self._read_unquoted('=!&|()"')
720
+ next_lower = next_word.lower()
721
+ if next_lower == "null":
722
+ # "is null" -> != *
723
+ tokens.append(_Token(_TokenType.OPERATOR, "!=", start_pos))
724
+ tokens.append(_Token(_TokenType.VALUE, "*", self.pos))
725
+ elif next_lower == "empty":
726
+ # "is empty" -> is empty operator with placeholder value
727
+ tokens.append(_Token(_TokenType.OPERATOR, "is empty", start_pos))
728
+ tokens.append(_Token(_TokenType.VALUE, "", self.pos)) # placeholder
729
+ elif next_lower == "not":
730
+ # Could be "is not null"
731
+ self._skip_whitespace()
732
+ if self.pos < self.length:
733
+ third_word = self._read_unquoted('=!&|()"')
734
+ if third_word.lower() == "null":
735
+ # "is not null" -> = *
736
+ tokens.append(_Token(_TokenType.OPERATOR, "=", start_pos))
737
+ tokens.append(_Token(_TokenType.VALUE, "*", self.pos))
738
+ else:
739
+ # Not a multi-word operator, restore
740
+ self.pos = saved_pos
741
+ self._skip_whitespace()
742
+ if self._peek_operator() or self._peek_word_operator():
743
+ tokens.append(
744
+ _Token(_TokenType.FIELD, value, start_pos)
745
+ )
746
+ else:
747
+ tokens.append(
748
+ _Token(_TokenType.VALUE, value, start_pos)
749
+ )
750
+ else:
751
+ # Just "is not" with nothing after - restore
752
+ self.pos = saved_pos
753
+ tokens.append(_Token(_TokenType.VALUE, value, start_pos))
754
+ else:
755
+ # Not a multi-word operator, restore
756
+ self.pos = saved_pos
757
+ self._skip_whitespace()
758
+ if self._peek_operator() or self._peek_word_operator():
759
+ tokens.append(_Token(_TokenType.FIELD, value, start_pos))
760
+ else:
761
+ tokens.append(_Token(_TokenType.VALUE, value, start_pos))
762
+ else:
763
+ # "is" at end of input - treat as value
764
+ tokens.append(_Token(_TokenType.VALUE, value, start_pos))
765
+ else:
766
+ # Determine token type based on what comes next
767
+ self._skip_whitespace()
768
+ if self.pos < self.length and (
769
+ self._peek_operator() or self._peek_word_operator()
770
+ ):
771
+ tokens.append(_Token(_TokenType.FIELD, value, start_pos))
772
+ else:
773
+ tokens.append(_Token(_TokenType.VALUE, value, start_pos))
774
+
775
+ return tokens
776
+
777
+
778
+ def _suggest_operator(unknown: str) -> str | None:
779
+ """
780
+ Suggest a similar operator for a misspelled word.
781
+
782
+ Uses simple heuristics:
783
+ 1. Prefix match (at least 3 characters)
784
+ 2. Simple edit distance (1 character difference)
785
+
786
+ Returns suggestion string or None.
787
+ """
788
+ unknown_lower = unknown.lower()
789
+
790
+ # All known word operators
791
+ known_operators = [
792
+ *_Tokenizer.WORD_OPERATORS.keys(),
793
+ "is null",
794
+ "is not null",
795
+ "is empty",
796
+ ]
797
+
798
+ # Check prefix match (at least 3 chars)
799
+ if len(unknown_lower) >= 3:
800
+ for op in known_operators:
801
+ # Check if unknown is a prefix of operator
802
+ if op.startswith(unknown_lower):
803
+ return op
804
+ # Check if operator is a prefix of unknown (e.g., "containsall" vs "contains_all")
805
+ op_no_space = op.replace(" ", "").replace("_", "")
806
+ unknown_no_sep = unknown_lower.replace("_", "")
807
+ if op_no_space.startswith(unknown_no_sep[:3]):
808
+ return op
809
+
810
+ # Check for simple typos (1 char difference for short ops, 2 for longer)
811
+ for op in known_operators:
812
+ op_lower = op.lower()
813
+ # Skip very different lengths
814
+ if abs(len(unknown_lower) - len(op_lower)) > 2:
815
+ continue
816
+ # Simple character difference count (zip shorter strings, add length diff)
817
+ diff = sum(1 for a, b in zip(unknown_lower, op_lower, strict=False) if a != b)
818
+ diff += abs(len(unknown_lower) - len(op_lower))
819
+ threshold = 2 if len(op_lower) > 4 else 1
820
+ if diff <= threshold:
821
+ return op
822
+
823
+ return None
824
+
825
+
826
+ class _Parser:
827
+ """Recursive descent parser for filter expressions."""
828
+
829
+ def __init__(self, tokens: list[_Token]):
830
+ self.tokens = tokens
831
+ self.pos = 0
832
+
833
+ def _current(self) -> _Token:
834
+ """Get current token."""
835
+ return self.tokens[self.pos]
836
+
837
+ def _advance(self) -> _Token:
838
+ """Advance to next token and return previous."""
839
+ token = self.tokens[self.pos]
840
+ if self.pos < len(self.tokens) - 1:
841
+ self.pos += 1
842
+ return token
843
+
844
+ def _expect(self, token_type: _TokenType, context: str = "") -> _Token:
845
+ """Expect a specific token type, raise if not found."""
846
+ token = self._current()
847
+ if token.type != token_type:
848
+ ctx = f" {context}" if context else ""
849
+ raise ValueError(
850
+ f"Expected {token_type.name}{ctx} at position {token.pos}, "
851
+ f"got {token.type.name} '{token.value}'"
852
+ )
853
+ return self._advance()
854
+
855
+ def parse(self) -> FilterExpression:
856
+ """Parse the token stream into a FilterExpression."""
857
+ if self._current().type == _TokenType.EOF:
858
+ raise ValueError("Empty filter expression")
859
+
860
+ expr = self._parse_or_expr()
861
+
862
+ if self._current().type != _TokenType.EOF:
863
+ token = self._current()
864
+ # Check if this looks like a multi-word value (extra word after comparison)
865
+ if token.type in (_TokenType.VALUE, _TokenType.FIELD):
866
+ token_val = token.value if isinstance(token.value, str) else str(token.value)
867
+ # Check for SQL-like boolean keywords
868
+ upper_val = token_val.upper()
869
+ if upper_val == "AND":
870
+ raise ValueError(
871
+ f"Unexpected 'AND' at position {token.pos}. "
872
+ f"Hint: Use '&' for AND: expr1 & expr2"
873
+ )
874
+ if upper_val == "OR":
875
+ raise ValueError(
876
+ f"Unexpected 'OR' at position {token.pos}. "
877
+ f"Hint: Use '|' for OR: expr1 | expr2"
878
+ )
879
+ # Look back to find the previous value to suggest quoting
880
+ # Collect remaining words
881
+ remaining_words: list[str] = [token_val]
882
+ pos = self.pos + 1
883
+ while pos < len(self.tokens) - 1:
884
+ next_tok = self.tokens[pos]
885
+ if next_tok.type in (_TokenType.VALUE, _TokenType.FIELD):
886
+ next_val = (
887
+ next_tok.value
888
+ if isinstance(next_tok.value, str)
889
+ else str(next_tok.value)
890
+ )
891
+ remaining_words.append(next_val)
892
+ pos += 1
893
+ else:
894
+ break
895
+ if len(remaining_words) == 1:
896
+ raise ValueError(
897
+ f"Unexpected token '{token_val}' at position {token.pos}. "
898
+ f'Hint: Values with spaces must be quoted: "... {token_val}"'
899
+ )
900
+ else:
901
+ combined = " ".join(remaining_words)
902
+ raise ValueError(
903
+ f"Unexpected token '{token_val}' at position {token.pos}. "
904
+ f'Hint: Values with spaces must be quoted: "...{combined}"'
905
+ )
906
+ raise ValueError(f"Unexpected token '{token.value}' at position {token.pos}")
907
+
908
+ return expr
909
+
910
+ def _parse_or_expr(self) -> FilterExpression:
911
+ """Parse OR expressions (lowest precedence)."""
912
+ left = self._parse_and_expr()
913
+
914
+ while self._current().type == _TokenType.OR:
915
+ self._advance() # consume |
916
+ right = self._parse_and_expr()
917
+ left = OrExpression(left, right)
918
+
919
+ return left
920
+
921
+ def _parse_and_expr(self) -> FilterExpression:
922
+ """Parse AND expressions (medium precedence)."""
923
+ left = self._parse_not_expr()
924
+
925
+ while self._current().type == _TokenType.AND:
926
+ self._advance() # consume &
927
+ right = self._parse_not_expr()
928
+ left = AndExpression(left, right)
929
+
930
+ return left
931
+
932
+ def _parse_not_expr(self) -> FilterExpression:
933
+ """Parse NOT expressions (high precedence)."""
934
+ if self._current().type == _TokenType.NOT:
935
+ self._advance() # consume !
936
+ expr = self._parse_not_expr() # NOT is right-associative
937
+ return NotExpression(expr)
938
+
939
+ return self._parse_atom()
940
+
941
+ def _parse_atom(self) -> FilterExpression:
942
+ """Parse atomic expressions: comparisons or parenthesized expressions."""
943
+ token = self._current()
944
+
945
+ # Parenthesized expression
946
+ if token.type == _TokenType.LPAREN:
947
+ self._advance() # consume (
948
+ expr = self._parse_or_expr()
949
+ closing = self._current()
950
+ if closing.type != _TokenType.RPAREN:
951
+ raise ValueError(f"Unbalanced parentheses: expected ')' at position {closing.pos}")
952
+ self._advance() # consume )
953
+ return expr
954
+
955
+ # Field comparison
956
+ if token.type == _TokenType.FIELD:
957
+ return self._parse_comparison()
958
+
959
+ # Error cases
960
+ if token.type == _TokenType.EOF:
961
+ raise ValueError("Unexpected end of expression")
962
+ if token.type == _TokenType.OPERATOR:
963
+ raise ValueError(
964
+ f"Missing field name before operator '{token.value}' at position {token.pos}"
965
+ )
966
+ if token.type == _TokenType.VALUE:
967
+ # This could be an unquoted field name that wasn't recognized
968
+ # Try to parse it as a comparison
969
+ return self._parse_comparison_from_value()
970
+
971
+ raise ValueError(f"Unexpected token '{token.value}' at position {token.pos}")
972
+
973
+ def _parse_comparison(self) -> FilterExpression:
974
+ """Parse a field comparison expression."""
975
+ field_token = self._expect(_TokenType.FIELD, "for field name")
976
+ # Field names are always strings (not bracket lists)
977
+ assert isinstance(field_token.value, str)
978
+ field_name = field_token.value
979
+
980
+ op_token = self._current()
981
+ if op_token.type != _TokenType.OPERATOR:
982
+ raise ValueError(
983
+ f"Expected operator after field name at position {op_token.pos}, "
984
+ f"got {op_token.type.name}"
985
+ )
986
+ self._advance()
987
+ # Operators are always strings
988
+ assert isinstance(op_token.value, str)
989
+ operator = op_token.value
990
+
991
+ value_token = self._current()
992
+ if value_token.type not in (_TokenType.VALUE, _TokenType.FIELD):
993
+ # Check for == instead of =
994
+ if value_token.type == _TokenType.OPERATOR and value_token.value == "=":
995
+ raise ValueError(
996
+ f"Unexpected '=' at position {value_token.pos}. "
997
+ f"Hint: Use single '=' for equality, not '=='"
998
+ )
999
+ raise ValueError(f"Expected value after operator at position {value_token.pos}")
1000
+ self._advance()
1001
+
1002
+ # Convert value to appropriate type
1003
+ if value_token.value == "*":
1004
+ value: Any = RawToken("*")
1005
+ else:
1006
+ value = value_token.value
1007
+
1008
+ return FieldComparison(field_name, operator, value)
1009
+
1010
+ def _parse_comparison_from_value(self) -> FilterExpression:
1011
+ """Parse a comparison where the field was tokenized as VALUE."""
1012
+ # This happens when field name isn't followed by operator immediately
1013
+ value_token = self._advance()
1014
+ # Field names are always strings (not bracket lists)
1015
+ assert isinstance(value_token.value, str)
1016
+ field_name = value_token.value
1017
+
1018
+ op_token = self._current()
1019
+ if op_token.type != _TokenType.OPERATOR:
1020
+ # Check if this looks like a multi-word field name (next token is word, not operator)
1021
+ # Note: the next word might be tokenized as FIELD if it's followed by an operator
1022
+ if op_token.type in (_TokenType.VALUE, _TokenType.FIELD):
1023
+ op_val = op_token.value if isinstance(op_token.value, str) else str(op_token.value)
1024
+ # Check if it looks like an unsupported operator (e.g., <>, >>, <<)
1025
+ if op_val in ("<>", ">>", "<<"):
1026
+ raise ValueError(
1027
+ f"Unsupported operator '{op_val}' at position {op_token.pos}. "
1028
+ f"Supported operators: = != =~ =^ =$ > >= < <="
1029
+ )
1030
+
1031
+ # Check if this looks like a misspelled operator
1032
+ suggestion = _suggest_operator(op_val)
1033
+ if suggestion:
1034
+ raise ValueError(
1035
+ f"Unknown operator '{op_val}' at position {op_token.pos}. "
1036
+ f"Did you mean: {suggestion}?"
1037
+ )
1038
+
1039
+ # Collect subsequent words to suggest the full field name
1040
+ words: list[str] = [field_name, op_val]
1041
+ pos = self.pos + 1
1042
+ while pos < len(self.tokens) - 1:
1043
+ next_tok = self.tokens[pos]
1044
+ if next_tok.type == _TokenType.OPERATOR:
1045
+ break
1046
+ if next_tok.type in (_TokenType.VALUE, _TokenType.FIELD):
1047
+ next_val = (
1048
+ next_tok.value
1049
+ if isinstance(next_tok.value, str)
1050
+ else str(next_tok.value)
1051
+ )
1052
+ # Skip unsupported operator-like tokens
1053
+ if next_val in ("<>", ">>", "<<"):
1054
+ break
1055
+ words.append(next_val)
1056
+ pos += 1
1057
+ else:
1058
+ break
1059
+ suggested_field = " ".join(words)
1060
+ raise ValueError(
1061
+ f"Expected operator after '{field_name}' at position {op_token.pos}. "
1062
+ f'Hint: For multi-word field names, use quotes: "{suggested_field}"'
1063
+ )
1064
+ raise ValueError(f"Expected operator after '{field_name}' at position {op_token.pos}")
1065
+ self._advance()
1066
+ # Operators are always strings
1067
+ assert isinstance(op_token.value, str)
1068
+ operator = op_token.value
1069
+
1070
+ next_token = self._current()
1071
+ if next_token.type not in (_TokenType.VALUE, _TokenType.FIELD):
1072
+ # Check for == instead of =
1073
+ if next_token.type == _TokenType.OPERATOR and next_token.value == "=":
1074
+ raise ValueError(
1075
+ f"Unexpected '=' at position {next_token.pos}. "
1076
+ f"Hint: Use single '=' for equality, not '=='"
1077
+ )
1078
+ raise ValueError(f"Expected value after operator at position {next_token.pos}")
1079
+ self._advance()
1080
+
1081
+ if next_token.value == "*":
1082
+ value: Any = RawToken("*")
1083
+ else:
1084
+ value = next_token.value
1085
+
1086
+ return FieldComparison(field_name, operator, value)
1087
+
1088
+
1089
+ def parse(filter_string: str) -> FilterExpression:
1090
+ """
1091
+ Parse a filter string into a FilterExpression AST.
1092
+
1093
+ This function converts a human-readable filter string into a structured
1094
+ FilterExpression that can be used for client-side filtering with matches().
1095
+
1096
+ Args:
1097
+ filter_string: The filter expression to parse
1098
+
1099
+ Returns:
1100
+ A FilterExpression AST representing the filter
1101
+
1102
+ Raises:
1103
+ ValueError: If the filter string is invalid
1104
+
1105
+ Examples:
1106
+ >>> expr = parse('name = "Alice"')
1107
+ >>> expr.matches({"name": "Alice"})
1108
+ True
1109
+
1110
+ >>> expr = parse('status = Active | status = Pending')
1111
+ >>> expr.matches({"status": "Active"})
1112
+ True
1113
+
1114
+ >>> expr = parse('email = *') # IS NOT NULL
1115
+ >>> expr.matches({"email": "test@example.com"})
1116
+ True
1117
+
1118
+ >>> expr = parse('email != *') # IS NULL
1119
+ >>> expr.matches({"email": None})
1120
+ True
1121
+ """
1122
+ if not filter_string or not filter_string.strip():
1123
+ raise ValueError("Empty filter expression")
1124
+
1125
+ tokenizer = _Tokenizer(filter_string)
1126
+ tokens = tokenizer.tokenize()
1127
+ parser = _Parser(tokens)
1128
+ return parser.parse()