affinity-sdk 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- affinity/__init__.py +139 -0
- affinity/cli/__init__.py +7 -0
- affinity/cli/click_compat.py +27 -0
- affinity/cli/commands/__init__.py +1 -0
- affinity/cli/commands/_entity_files_dump.py +219 -0
- affinity/cli/commands/_list_entry_fields.py +41 -0
- affinity/cli/commands/_v1_parsing.py +77 -0
- affinity/cli/commands/company_cmds.py +2139 -0
- affinity/cli/commands/completion_cmd.py +33 -0
- affinity/cli/commands/config_cmds.py +540 -0
- affinity/cli/commands/entry_cmds.py +33 -0
- affinity/cli/commands/field_cmds.py +413 -0
- affinity/cli/commands/interaction_cmds.py +875 -0
- affinity/cli/commands/list_cmds.py +3152 -0
- affinity/cli/commands/note_cmds.py +433 -0
- affinity/cli/commands/opportunity_cmds.py +1174 -0
- affinity/cli/commands/person_cmds.py +1980 -0
- affinity/cli/commands/query_cmd.py +444 -0
- affinity/cli/commands/relationship_strength_cmds.py +62 -0
- affinity/cli/commands/reminder_cmds.py +595 -0
- affinity/cli/commands/resolve_url_cmd.py +127 -0
- affinity/cli/commands/session_cmds.py +84 -0
- affinity/cli/commands/task_cmds.py +110 -0
- affinity/cli/commands/version_cmd.py +29 -0
- affinity/cli/commands/whoami_cmd.py +36 -0
- affinity/cli/config.py +108 -0
- affinity/cli/context.py +749 -0
- affinity/cli/csv_utils.py +195 -0
- affinity/cli/date_utils.py +42 -0
- affinity/cli/decorators.py +77 -0
- affinity/cli/errors.py +28 -0
- affinity/cli/field_utils.py +355 -0
- affinity/cli/formatters.py +551 -0
- affinity/cli/help_json.py +283 -0
- affinity/cli/logging.py +100 -0
- affinity/cli/main.py +261 -0
- affinity/cli/options.py +53 -0
- affinity/cli/paths.py +32 -0
- affinity/cli/progress.py +183 -0
- affinity/cli/query/__init__.py +163 -0
- affinity/cli/query/aggregates.py +357 -0
- affinity/cli/query/dates.py +194 -0
- affinity/cli/query/exceptions.py +147 -0
- affinity/cli/query/executor.py +1236 -0
- affinity/cli/query/filters.py +248 -0
- affinity/cli/query/models.py +333 -0
- affinity/cli/query/output.py +331 -0
- affinity/cli/query/parser.py +619 -0
- affinity/cli/query/planner.py +430 -0
- affinity/cli/query/progress.py +270 -0
- affinity/cli/query/schema.py +439 -0
- affinity/cli/render.py +1589 -0
- affinity/cli/resolve.py +222 -0
- affinity/cli/resolvers.py +249 -0
- affinity/cli/results.py +308 -0
- affinity/cli/runner.py +218 -0
- affinity/cli/serialization.py +65 -0
- affinity/cli/session_cache.py +276 -0
- affinity/cli/types.py +70 -0
- affinity/client.py +771 -0
- affinity/clients/__init__.py +19 -0
- affinity/clients/http.py +3664 -0
- affinity/clients/pipeline.py +165 -0
- affinity/compare.py +501 -0
- affinity/downloads.py +114 -0
- affinity/exceptions.py +615 -0
- affinity/filters.py +1128 -0
- affinity/hooks.py +198 -0
- affinity/inbound_webhooks.py +302 -0
- affinity/models/__init__.py +163 -0
- affinity/models/entities.py +798 -0
- affinity/models/pagination.py +513 -0
- affinity/models/rate_limit_snapshot.py +48 -0
- affinity/models/secondary.py +413 -0
- affinity/models/types.py +663 -0
- affinity/policies.py +40 -0
- affinity/progress.py +22 -0
- affinity/py.typed +0 -0
- affinity/services/__init__.py +42 -0
- affinity/services/companies.py +1286 -0
- affinity/services/lists.py +1892 -0
- affinity/services/opportunities.py +1330 -0
- affinity/services/persons.py +1348 -0
- affinity/services/rate_limits.py +173 -0
- affinity/services/tasks.py +193 -0
- affinity/services/v1_only.py +2445 -0
- affinity/types.py +83 -0
- affinity_sdk-0.9.5.dist-info/METADATA +622 -0
- affinity_sdk-0.9.5.dist-info/RECORD +92 -0
- affinity_sdk-0.9.5.dist-info/WHEEL +4 -0
- affinity_sdk-0.9.5.dist-info/entry_points.txt +2 -0
- affinity_sdk-0.9.5.dist-info/licenses/LICENSE +21 -0
affinity/filters.py
ADDED
|
@@ -0,0 +1,1128 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Filter builder for V2 API filtering support.
|
|
3
|
+
|
|
4
|
+
Provides a type-safe, Pythonic way to build filter expressions for V2 list endpoints.
|
|
5
|
+
The builder handles proper escaping and quoting of user inputs.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
from affinity.filters import Filter, F
|
|
9
|
+
|
|
10
|
+
# Using the builder (recommended)
|
|
11
|
+
filter = (
|
|
12
|
+
F.field("name").contains("Acme") &
|
|
13
|
+
F.field("status").equals("Active")
|
|
14
|
+
)
|
|
15
|
+
companies = client.companies.list(filter=filter)
|
|
16
|
+
|
|
17
|
+
# Or build complex filters
|
|
18
|
+
filter = (
|
|
19
|
+
(F.field("name").contains("Corp") | F.field("name").contains("Inc")) &
|
|
20
|
+
~F.field("archived").equals(True)
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Raw filter string escape hatch (power users)
|
|
24
|
+
companies = client.companies.list(filter='name =~ "Acme"')
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
from abc import ABC, abstractmethod
|
|
30
|
+
from dataclasses import dataclass
|
|
31
|
+
from datetime import date, datetime
|
|
32
|
+
from enum import Enum, auto
|
|
33
|
+
from typing import Any, ClassVar
|
|
34
|
+
|
|
35
|
+
from affinity.compare import compare_values, map_operator, normalize_value
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class RawToken:
|
|
40
|
+
"""
|
|
41
|
+
A raw token inserted into a filter expression without quoting.
|
|
42
|
+
|
|
43
|
+
Used for special Affinity Filtering Language literals like `*`.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
token: str
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _escape_string(value: str) -> str:
|
|
50
|
+
"""
|
|
51
|
+
Escape a string value for use in a filter expression.
|
|
52
|
+
|
|
53
|
+
Handles:
|
|
54
|
+
- Backslashes (must be doubled)
|
|
55
|
+
- Double quotes (must be escaped)
|
|
56
|
+
- Newlines and tabs (escaped as literals)
|
|
57
|
+
- NUL bytes (removed)
|
|
58
|
+
"""
|
|
59
|
+
# Order matters: escape backslashes first
|
|
60
|
+
result = value.replace("\\", "\\\\")
|
|
61
|
+
result = result.replace('"', '\\"')
|
|
62
|
+
result = result.replace("\x00", "")
|
|
63
|
+
result = result.replace("\n", "\\n")
|
|
64
|
+
result = result.replace("\t", "\\t")
|
|
65
|
+
result = result.replace("\r", "\\r")
|
|
66
|
+
return result
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _format_value(value: Any) -> str:
|
|
70
|
+
"""Format a Python value for use in a filter expression."""
|
|
71
|
+
if isinstance(value, RawToken):
|
|
72
|
+
return value.token
|
|
73
|
+
if value is None:
|
|
74
|
+
raise ValueError("None is not a valid filter literal; use is_null()/is_not_null().")
|
|
75
|
+
if isinstance(value, bool):
|
|
76
|
+
return "true" if value else "false"
|
|
77
|
+
if isinstance(value, (int, float)):
|
|
78
|
+
return str(value)
|
|
79
|
+
# Handle datetime before date (datetime is subclass of date)
|
|
80
|
+
if isinstance(value, datetime):
|
|
81
|
+
return f'"{value.isoformat()}"'
|
|
82
|
+
if isinstance(value, date):
|
|
83
|
+
return f'"{value.isoformat()}"'
|
|
84
|
+
# String and fallback
|
|
85
|
+
text = value if isinstance(value, str) else str(value)
|
|
86
|
+
return f'"{_escape_string(text)}"'
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _get_entity_value(entity: dict[str, Any], field_name: str) -> Any:
|
|
90
|
+
"""
|
|
91
|
+
Get a field value from an entity dict with fallback normalization.
|
|
92
|
+
|
|
93
|
+
Tries multiple key formats to handle field name variations:
|
|
94
|
+
1. Exact field name as provided
|
|
95
|
+
2. Lowercase version
|
|
96
|
+
3. With entity type prefix (person., company., opportunity.)
|
|
97
|
+
"""
|
|
98
|
+
value = entity.get(field_name)
|
|
99
|
+
if value is None:
|
|
100
|
+
value = entity.get(field_name.lower())
|
|
101
|
+
if value is None:
|
|
102
|
+
for prefix in ["person.", "company.", "opportunity."]:
|
|
103
|
+
value = entity.get(f"{prefix}{field_name}")
|
|
104
|
+
if value is not None:
|
|
105
|
+
break
|
|
106
|
+
return value
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class FilterExpression(ABC):
|
|
110
|
+
"""Base class for filter expressions."""
|
|
111
|
+
|
|
112
|
+
@abstractmethod
|
|
113
|
+
def to_string(self) -> str:
|
|
114
|
+
"""Convert the expression to a filter string."""
|
|
115
|
+
...
|
|
116
|
+
|
|
117
|
+
@abstractmethod
|
|
118
|
+
def matches(self, entity: dict[str, Any]) -> bool:
|
|
119
|
+
"""
|
|
120
|
+
Evaluate filter against an entity dict (client-side).
|
|
121
|
+
|
|
122
|
+
Used for --expand-filter in list export where filtering happens
|
|
123
|
+
after fetching data from the API.
|
|
124
|
+
"""
|
|
125
|
+
...
|
|
126
|
+
|
|
127
|
+
def __and__(self, other: FilterExpression) -> FilterExpression:
|
|
128
|
+
"""Combine two expressions with `&`."""
|
|
129
|
+
return AndExpression(self, other)
|
|
130
|
+
|
|
131
|
+
def __or__(self, other: FilterExpression) -> FilterExpression:
|
|
132
|
+
"""Combine two expressions with `|`."""
|
|
133
|
+
return OrExpression(self, other)
|
|
134
|
+
|
|
135
|
+
def __invert__(self) -> FilterExpression:
|
|
136
|
+
"""Negate the expression with `!`."""
|
|
137
|
+
return NotExpression(self)
|
|
138
|
+
|
|
139
|
+
def __str__(self) -> str:
|
|
140
|
+
return self.to_string()
|
|
141
|
+
|
|
142
|
+
def __repr__(self) -> str:
|
|
143
|
+
return f"Filter({self.to_string()!r})"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclass
|
|
147
|
+
class FieldComparison(FilterExpression):
|
|
148
|
+
"""A comparison operation on a field."""
|
|
149
|
+
|
|
150
|
+
field_name: str
|
|
151
|
+
operator: str
|
|
152
|
+
value: Any
|
|
153
|
+
|
|
154
|
+
def to_string(self) -> str:
|
|
155
|
+
formatted_value = _format_value(self.value)
|
|
156
|
+
return f"{self.field_name} {self.operator} {formatted_value}"
|
|
157
|
+
|
|
158
|
+
def matches(self, entity: dict[str, Any]) -> bool:
|
|
159
|
+
"""Evaluate field comparison against an entity dict.
|
|
160
|
+
|
|
161
|
+
For multi-select dropdown fields (arrays), the operators have special semantics:
|
|
162
|
+
- `=` with scalar: checks if value is IN the array (membership)
|
|
163
|
+
- `=` with list: checks set equality (order-insensitive)
|
|
164
|
+
- `!=` with scalar: checks if value is NOT in the array
|
|
165
|
+
- `!=` with list: checks set inequality
|
|
166
|
+
- `=~` (contains): checks if any array element contains the substring
|
|
167
|
+
- `=^` (starts_with): checks if any array element starts with the prefix
|
|
168
|
+
- `=$` (ends_with): checks if any array element ends with the suffix
|
|
169
|
+
- `>`, `>=`, `<`, `<=`: numeric/date comparisons
|
|
170
|
+
|
|
171
|
+
Uses the shared compare module for consistent behavior across SDK and Query tool.
|
|
172
|
+
"""
|
|
173
|
+
field_value = _get_entity_value(entity, self.field_name)
|
|
174
|
+
|
|
175
|
+
# Normalize dropdown dicts and multi-select arrays
|
|
176
|
+
field_value = normalize_value(field_value)
|
|
177
|
+
|
|
178
|
+
# Handle NULL checks (Affinity convention: =* means NOT NULL, !=* means IS NULL)
|
|
179
|
+
if isinstance(self.value, RawToken) and self.value.token == "*":
|
|
180
|
+
if self.operator == "=":
|
|
181
|
+
return compare_values(field_value, None, "is_not_null")
|
|
182
|
+
elif self.operator == "!=":
|
|
183
|
+
return compare_values(field_value, None, "is_null")
|
|
184
|
+
|
|
185
|
+
# Extract target value
|
|
186
|
+
target = self.value if not isinstance(self.value, RawToken) else self.value.token
|
|
187
|
+
|
|
188
|
+
# Map SDK operator symbol to canonical operator name
|
|
189
|
+
try:
|
|
190
|
+
canonical_op = map_operator(self.operator)
|
|
191
|
+
except ValueError:
|
|
192
|
+
raise ValueError(
|
|
193
|
+
f"Unsupported operator '{self.operator}' for client-side matching. "
|
|
194
|
+
f"Supported operators: =, !=, =~, =^, =$, >, >=, <, <=, "
|
|
195
|
+
f"contains, starts_with, ends_with, gt, gte, lt, lte, "
|
|
196
|
+
f"is null, is not null, is empty, "
|
|
197
|
+
f"in, between, has_any, has_all, contains_any, contains_all"
|
|
198
|
+
) from None
|
|
199
|
+
|
|
200
|
+
return compare_values(field_value, target, canonical_op)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@dataclass
|
|
204
|
+
class RawFilter(FilterExpression):
|
|
205
|
+
"""A raw filter string (escape hatch for power users)."""
|
|
206
|
+
|
|
207
|
+
expression: str
|
|
208
|
+
|
|
209
|
+
def to_string(self) -> str:
|
|
210
|
+
return self.expression
|
|
211
|
+
|
|
212
|
+
def matches(self, entity: dict[str, Any]) -> bool:
|
|
213
|
+
"""RawFilter cannot be evaluated client-side."""
|
|
214
|
+
raise NotImplementedError(
|
|
215
|
+
"RawFilter cannot be evaluated client-side. "
|
|
216
|
+
"Use structured filter expressions for --expand-filter."
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@dataclass
|
|
221
|
+
class AndExpression(FilterExpression):
|
|
222
|
+
"""`&` combination of two expressions."""
|
|
223
|
+
|
|
224
|
+
left: FilterExpression
|
|
225
|
+
right: FilterExpression
|
|
226
|
+
|
|
227
|
+
def to_string(self) -> str:
|
|
228
|
+
left_str = self.left.to_string()
|
|
229
|
+
right_str = self.right.to_string()
|
|
230
|
+
# Wrap in parentheses for correct precedence
|
|
231
|
+
return f"({left_str}) & ({right_str})"
|
|
232
|
+
|
|
233
|
+
def matches(self, entity: dict[str, Any]) -> bool:
|
|
234
|
+
"""Both sides must match."""
|
|
235
|
+
return self.left.matches(entity) and self.right.matches(entity)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@dataclass
|
|
239
|
+
class OrExpression(FilterExpression):
|
|
240
|
+
"""`|` combination of two expressions."""
|
|
241
|
+
|
|
242
|
+
left: FilterExpression
|
|
243
|
+
right: FilterExpression
|
|
244
|
+
|
|
245
|
+
def to_string(self) -> str:
|
|
246
|
+
left_str = self.left.to_string()
|
|
247
|
+
right_str = self.right.to_string()
|
|
248
|
+
return f"({left_str}) | ({right_str})"
|
|
249
|
+
|
|
250
|
+
def matches(self, entity: dict[str, Any]) -> bool:
|
|
251
|
+
"""Either side must match."""
|
|
252
|
+
return self.left.matches(entity) or self.right.matches(entity)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
@dataclass
|
|
256
|
+
class NotExpression(FilterExpression):
|
|
257
|
+
"""`!` negation of an expression."""
|
|
258
|
+
|
|
259
|
+
expr: FilterExpression
|
|
260
|
+
|
|
261
|
+
def to_string(self) -> str:
|
|
262
|
+
return f"!({self.expr.to_string()})"
|
|
263
|
+
|
|
264
|
+
def matches(self, entity: dict[str, Any]) -> bool:
|
|
265
|
+
"""Invert the inner expression."""
|
|
266
|
+
return not self.expr.matches(entity)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class FieldBuilder:
|
|
270
|
+
"""Builder for field-based filter expressions."""
|
|
271
|
+
|
|
272
|
+
def __init__(self, field_name: str):
|
|
273
|
+
self._field_name = field_name
|
|
274
|
+
|
|
275
|
+
def equals(self, value: Any) -> FieldComparison:
|
|
276
|
+
"""Field equals value (exact match)."""
|
|
277
|
+
return FieldComparison(self._field_name, "=", value)
|
|
278
|
+
|
|
279
|
+
def not_equals(self, value: Any) -> FieldComparison:
|
|
280
|
+
"""Field does not equal value."""
|
|
281
|
+
return FieldComparison(self._field_name, "!=", value)
|
|
282
|
+
|
|
283
|
+
def contains(self, value: str) -> FieldComparison:
|
|
284
|
+
"""Field contains substring (case-insensitive)."""
|
|
285
|
+
return FieldComparison(self._field_name, "=~", value)
|
|
286
|
+
|
|
287
|
+
def starts_with(self, value: str) -> FieldComparison:
|
|
288
|
+
"""Field starts with prefix."""
|
|
289
|
+
return FieldComparison(self._field_name, "=^", value)
|
|
290
|
+
|
|
291
|
+
def ends_with(self, value: str) -> FieldComparison:
|
|
292
|
+
"""Field ends with suffix."""
|
|
293
|
+
return FieldComparison(self._field_name, "=$", value)
|
|
294
|
+
|
|
295
|
+
def greater_than(self, value: int | float | datetime | date) -> FieldComparison:
|
|
296
|
+
"""Field is greater than value."""
|
|
297
|
+
return FieldComparison(self._field_name, ">", value)
|
|
298
|
+
|
|
299
|
+
def greater_than_or_equal(self, value: int | float | datetime | date) -> FieldComparison:
|
|
300
|
+
"""Field is greater than or equal to value."""
|
|
301
|
+
return FieldComparison(self._field_name, ">=", value)
|
|
302
|
+
|
|
303
|
+
def less_than(self, value: int | float | datetime | date) -> FieldComparison:
|
|
304
|
+
"""Field is less than value."""
|
|
305
|
+
return FieldComparison(self._field_name, "<", value)
|
|
306
|
+
|
|
307
|
+
def less_than_or_equal(self, value: int | float | datetime | date) -> FieldComparison:
|
|
308
|
+
"""Field is less than or equal to value."""
|
|
309
|
+
return FieldComparison(self._field_name, "<=", value)
|
|
310
|
+
|
|
311
|
+
def is_null(self) -> FieldComparison:
|
|
312
|
+
"""Field is null."""
|
|
313
|
+
return FieldComparison(self._field_name, "!=", RawToken("*"))
|
|
314
|
+
|
|
315
|
+
def is_not_null(self) -> FieldComparison:
|
|
316
|
+
"""Field is not null."""
|
|
317
|
+
return FieldComparison(self._field_name, "=", RawToken("*"))
|
|
318
|
+
|
|
319
|
+
def in_list(self, values: list[Any]) -> FilterExpression:
|
|
320
|
+
"""Field value is in the given list (OR of equals)."""
|
|
321
|
+
if not values:
|
|
322
|
+
raise ValueError("in_list() requires at least one value")
|
|
323
|
+
expressions: list[FilterExpression] = [self.equals(v) for v in values]
|
|
324
|
+
result: FilterExpression = expressions[0]
|
|
325
|
+
for expr in expressions[1:]:
|
|
326
|
+
result = result | expr
|
|
327
|
+
return result
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
class Filter:
|
|
331
|
+
"""
|
|
332
|
+
Factory for building filter expressions.
|
|
333
|
+
|
|
334
|
+
Example:
|
|
335
|
+
# Simple comparison
|
|
336
|
+
Filter.field("name").contains("Acme")
|
|
337
|
+
|
|
338
|
+
# Complex boolean logic
|
|
339
|
+
(Filter.field("status").equals("Active") &
|
|
340
|
+
Filter.field("type").in_list(["customer", "prospect"]))
|
|
341
|
+
|
|
342
|
+
# Negation
|
|
343
|
+
~Filter.field("archived").equals(True)
|
|
344
|
+
"""
|
|
345
|
+
|
|
346
|
+
@staticmethod
|
|
347
|
+
def field(name: str) -> FieldBuilder:
|
|
348
|
+
"""Start building a filter on a field."""
|
|
349
|
+
return FieldBuilder(name)
|
|
350
|
+
|
|
351
|
+
@staticmethod
|
|
352
|
+
def raw(expression: str) -> RawFilter:
|
|
353
|
+
"""
|
|
354
|
+
Create a raw filter expression (escape hatch).
|
|
355
|
+
|
|
356
|
+
Use this when you need filter syntax not supported by the builder.
|
|
357
|
+
The expression is passed directly to the API without modification.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
expression: Raw filter string (e.g., 'name =~ "Acme"')
|
|
361
|
+
"""
|
|
362
|
+
return RawFilter(expression)
|
|
363
|
+
|
|
364
|
+
@staticmethod
|
|
365
|
+
def and_(*expressions: FilterExpression) -> FilterExpression:
|
|
366
|
+
"""Combine multiple expressions with `&`."""
|
|
367
|
+
if not expressions:
|
|
368
|
+
raise ValueError("and_() requires at least one expression")
|
|
369
|
+
result = expressions[0]
|
|
370
|
+
for expr in expressions[1:]:
|
|
371
|
+
result = result & expr
|
|
372
|
+
return result
|
|
373
|
+
|
|
374
|
+
@staticmethod
|
|
375
|
+
def or_(*expressions: FilterExpression) -> FilterExpression:
|
|
376
|
+
"""Combine multiple expressions with `|`."""
|
|
377
|
+
if not expressions:
|
|
378
|
+
raise ValueError("or_() requires at least one expression")
|
|
379
|
+
result = expressions[0]
|
|
380
|
+
for expr in expressions[1:]:
|
|
381
|
+
result = result | expr
|
|
382
|
+
return result
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
# Shorthand alias for convenience
|
|
386
|
+
F = Filter
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# =============================================================================
|
|
390
|
+
# Filter String Parser
|
|
391
|
+
# =============================================================================
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
class _TokenType(Enum):
|
|
395
|
+
"""Token types for the filter parser."""
|
|
396
|
+
|
|
397
|
+
FIELD = auto() # Field name (quoted or unquoted)
|
|
398
|
+
OPERATOR = auto() # =, !=, =~
|
|
399
|
+
VALUE = auto() # Value (quoted, unquoted, or *)
|
|
400
|
+
AND = auto() # &
|
|
401
|
+
OR = auto() # |
|
|
402
|
+
NOT = auto() # !
|
|
403
|
+
LPAREN = auto() # (
|
|
404
|
+
RPAREN = auto() # )
|
|
405
|
+
EOF = auto() # End of input
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
@dataclass
|
|
409
|
+
class _Token:
|
|
410
|
+
"""A token from the filter string."""
|
|
411
|
+
|
|
412
|
+
type: _TokenType
|
|
413
|
+
value: str | list[str] # str for most tokens, list for bracket values
|
|
414
|
+
pos: int # Position in original string for error messages
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
class _Tokenizer:
|
|
418
|
+
"""Tokenizer for filter strings."""
|
|
419
|
+
|
|
420
|
+
# Symbolic operators that can appear after field names
|
|
421
|
+
# IMPORTANT: Multi-character operators MUST come first to avoid partial matches
|
|
422
|
+
# e.g., ">=" must be checked before ">" or it will match as ">" + "="
|
|
423
|
+
OPERATORS: ClassVar[tuple[str, ...]] = (">=", "<=", "!=", "=~", "=^", "=$", ">", "<", "=")
|
|
424
|
+
|
|
425
|
+
# Single-word aliases for operators (SDK extensions for LLM/human clarity)
|
|
426
|
+
WORD_OPERATORS: ClassVar[dict[str, str]] = {
|
|
427
|
+
"contains": "=~",
|
|
428
|
+
"starts_with": "=^",
|
|
429
|
+
"ends_with": "=$",
|
|
430
|
+
"gt": ">",
|
|
431
|
+
"gte": ">=",
|
|
432
|
+
"lt": "<",
|
|
433
|
+
"lte": "<=",
|
|
434
|
+
# Collection operators
|
|
435
|
+
"in": "in",
|
|
436
|
+
"between": "between",
|
|
437
|
+
"has_any": "has_any",
|
|
438
|
+
"has_all": "has_all",
|
|
439
|
+
"contains_any": "contains_any",
|
|
440
|
+
"contains_all": "contains_all",
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
# Multi-word aliases that need lookahead
|
|
444
|
+
# Checked when we see "is" - peek ahead for "null", "not null", "empty"
|
|
445
|
+
# These are stored as (operator_value, canonical_operator_name)
|
|
446
|
+
MULTI_WORD_OPERATORS: ClassVar[dict[str, tuple[str, str, str | None]]] = {
|
|
447
|
+
# "is null" -> "!= *" equivalent (maps to is_null in compare)
|
|
448
|
+
"is null": ("is null", "!=", "*"),
|
|
449
|
+
# "is not null" -> "= *" equivalent (maps to is_not_null in compare)
|
|
450
|
+
"is not null": ("is not null", "=", "*"),
|
|
451
|
+
# "is empty" -> check for empty string or empty array
|
|
452
|
+
"is empty": ("is empty", "is empty", None),
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
def __init__(self, text: str):
|
|
456
|
+
self.text = text
|
|
457
|
+
self.pos = 0
|
|
458
|
+
self.length = len(text)
|
|
459
|
+
|
|
460
|
+
def _skip_whitespace(self) -> None:
|
|
461
|
+
"""Skip whitespace characters."""
|
|
462
|
+
while self.pos < self.length and self.text[self.pos] in " \t\n\r":
|
|
463
|
+
self.pos += 1
|
|
464
|
+
|
|
465
|
+
def _read_quoted_string(self) -> str:
|
|
466
|
+
"""Read a quoted string, handling escapes."""
|
|
467
|
+
assert self.text[self.pos] == '"'
|
|
468
|
+
start_pos = self.pos
|
|
469
|
+
self.pos += 1 # Skip opening quote
|
|
470
|
+
result: list[str] = []
|
|
471
|
+
|
|
472
|
+
while self.pos < self.length:
|
|
473
|
+
ch = self.text[self.pos]
|
|
474
|
+
if ch == '"':
|
|
475
|
+
self.pos += 1 # Skip closing quote
|
|
476
|
+
return "".join(result)
|
|
477
|
+
elif ch == "\\":
|
|
478
|
+
self.pos += 1
|
|
479
|
+
if self.pos >= self.length:
|
|
480
|
+
raise ValueError(
|
|
481
|
+
f"Unexpected end of string after backslash at position {self.pos}"
|
|
482
|
+
)
|
|
483
|
+
escaped = self.text[self.pos]
|
|
484
|
+
if escaped == "n":
|
|
485
|
+
result.append("\n")
|
|
486
|
+
elif escaped == "t":
|
|
487
|
+
result.append("\t")
|
|
488
|
+
elif escaped == "r":
|
|
489
|
+
result.append("\r")
|
|
490
|
+
elif escaped in ('"', "\\"):
|
|
491
|
+
result.append(escaped)
|
|
492
|
+
else:
|
|
493
|
+
result.append(escaped)
|
|
494
|
+
self.pos += 1
|
|
495
|
+
else:
|
|
496
|
+
result.append(ch)
|
|
497
|
+
self.pos += 1
|
|
498
|
+
|
|
499
|
+
raise ValueError(f"Unterminated quoted string starting at position {start_pos}")
|
|
500
|
+
|
|
501
|
+
def _read_unquoted(self, stop_chars: str) -> str:
|
|
502
|
+
"""Read an unquoted token until a stop character or whitespace."""
|
|
503
|
+
start = self.pos
|
|
504
|
+
while self.pos < self.length:
|
|
505
|
+
ch = self.text[self.pos]
|
|
506
|
+
if ch in stop_chars or ch in " \t\n\r":
|
|
507
|
+
break
|
|
508
|
+
self.pos += 1
|
|
509
|
+
return self.text[start : self.pos]
|
|
510
|
+
|
|
511
|
+
def _read_bracket_list(self) -> list[str]:
|
|
512
|
+
"""Read a bracket-delimited list: [A, B, C] or ["A B", C].
|
|
513
|
+
|
|
514
|
+
Returns a list of string values.
|
|
515
|
+
Raises ValueError for syntax errors with helpful messages.
|
|
516
|
+
"""
|
|
517
|
+
assert self.text[self.pos] == "["
|
|
518
|
+
start_pos = self.pos
|
|
519
|
+
self.pos += 1 # Skip opening bracket
|
|
520
|
+
|
|
521
|
+
items: list[str] = []
|
|
522
|
+
expect_value = True # Start expecting a value
|
|
523
|
+
|
|
524
|
+
while self.pos < self.length:
|
|
525
|
+
self._skip_whitespace()
|
|
526
|
+
|
|
527
|
+
if self.pos >= self.length:
|
|
528
|
+
raise ValueError(
|
|
529
|
+
f"Unclosed bracket at position {start_pos}. "
|
|
530
|
+
f"Hint: Collection syntax requires closing bracket: [A, B]"
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
ch = self.text[self.pos]
|
|
534
|
+
|
|
535
|
+
if ch == "]":
|
|
536
|
+
# Check for trailing comma (expect_value=True after comma means trailing comma)
|
|
537
|
+
if items and expect_value:
|
|
538
|
+
# We just got a comma and now see ]
|
|
539
|
+
raise ValueError(
|
|
540
|
+
f"Unexpected ']' after comma at position {self.pos}. "
|
|
541
|
+
f"Hint: Remove trailing comma: [A, B] not [A, B,]"
|
|
542
|
+
)
|
|
543
|
+
self.pos += 1 # Skip closing bracket
|
|
544
|
+
return items
|
|
545
|
+
|
|
546
|
+
if ch == ",":
|
|
547
|
+
if expect_value:
|
|
548
|
+
raise ValueError(
|
|
549
|
+
f"Unexpected ',' at position {self.pos}. Hint: Expected value before comma"
|
|
550
|
+
)
|
|
551
|
+
self.pos += 1 # Skip comma
|
|
552
|
+
expect_value = True
|
|
553
|
+
continue
|
|
554
|
+
|
|
555
|
+
if not expect_value:
|
|
556
|
+
raise ValueError(f"Expected ',' or ']' at position {self.pos}, got '{ch}'")
|
|
557
|
+
|
|
558
|
+
# Read a value (quoted or unquoted)
|
|
559
|
+
# Unquoted values stop at comma, bracket, or whitespace
|
|
560
|
+
value = self._read_quoted_string() if ch == '"' else self._read_unquoted(",]")
|
|
561
|
+
|
|
562
|
+
if not value:
|
|
563
|
+
raise ValueError(f"Empty value in collection at position {self.pos}")
|
|
564
|
+
|
|
565
|
+
items.append(value)
|
|
566
|
+
expect_value = False
|
|
567
|
+
|
|
568
|
+
raise ValueError(
|
|
569
|
+
f"Unclosed bracket at position {start_pos}. "
|
|
570
|
+
f"Hint: Collection syntax requires closing bracket: [A, B]"
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
def _peek_operator(self) -> str | None:
|
|
574
|
+
"""Check if current position starts with a symbolic operator."""
|
|
575
|
+
for op in self.OPERATORS:
|
|
576
|
+
if self.text[self.pos : self.pos + len(op)] == op:
|
|
577
|
+
return op
|
|
578
|
+
return None
|
|
579
|
+
|
|
580
|
+
def _peek_word_operator(self) -> tuple[str, str] | None:
|
|
581
|
+
"""Check if the next word(s) form a word-based operator.
|
|
582
|
+
|
|
583
|
+
Returns (alias, canonical_op) if found, None otherwise.
|
|
584
|
+
Does not advance position - just peeks.
|
|
585
|
+
"""
|
|
586
|
+
# Save position for potential rollback
|
|
587
|
+
saved_pos = self.pos
|
|
588
|
+
self._skip_whitespace()
|
|
589
|
+
|
|
590
|
+
if self.pos >= self.length:
|
|
591
|
+
self.pos = saved_pos
|
|
592
|
+
return None
|
|
593
|
+
|
|
594
|
+
# Read the next word
|
|
595
|
+
word = self._read_unquoted('=!&|()"')
|
|
596
|
+
word_lower = word.lower()
|
|
597
|
+
|
|
598
|
+
# Check single-word operators
|
|
599
|
+
if word_lower in self.WORD_OPERATORS:
|
|
600
|
+
self.pos = saved_pos
|
|
601
|
+
return (word_lower, self.WORD_OPERATORS[word_lower])
|
|
602
|
+
|
|
603
|
+
# Check multi-word operators starting with "is"
|
|
604
|
+
if word_lower == "is":
|
|
605
|
+
self._skip_whitespace()
|
|
606
|
+
if self.pos < self.length:
|
|
607
|
+
next_word = self._read_unquoted('=!&|()"')
|
|
608
|
+
next_lower = next_word.lower()
|
|
609
|
+
|
|
610
|
+
if next_lower == "null":
|
|
611
|
+
self.pos = saved_pos
|
|
612
|
+
return ("is null", "is null")
|
|
613
|
+
elif next_lower == "not":
|
|
614
|
+
self._skip_whitespace()
|
|
615
|
+
if self.pos < self.length:
|
|
616
|
+
third_word = self._read_unquoted('=!&|()"')
|
|
617
|
+
if third_word.lower() == "null":
|
|
618
|
+
self.pos = saved_pos
|
|
619
|
+
return ("is not null", "is not null")
|
|
620
|
+
elif next_lower == "empty":
|
|
621
|
+
self.pos = saved_pos
|
|
622
|
+
return ("is empty", "is empty")
|
|
623
|
+
|
|
624
|
+
self.pos = saved_pos
|
|
625
|
+
return None
|
|
626
|
+
|
|
627
|
+
def _consume_word_operator(self, alias: str) -> None:
|
|
628
|
+
"""Consume a word operator from the input, advancing position."""
|
|
629
|
+
words = alias.split()
|
|
630
|
+
for expected in words:
|
|
631
|
+
self._skip_whitespace()
|
|
632
|
+
word = self._read_unquoted('=!&|()"')
|
|
633
|
+
# Verify (should match since we already peeked)
|
|
634
|
+
assert word.lower() == expected.lower()
|
|
635
|
+
|
|
636
|
+
def tokenize(self) -> list[_Token]:
|
|
637
|
+
"""Tokenize the entire filter string."""
|
|
638
|
+
tokens: list[_Token] = []
|
|
639
|
+
|
|
640
|
+
while True:
|
|
641
|
+
self._skip_whitespace()
|
|
642
|
+
|
|
643
|
+
if self.pos >= self.length:
|
|
644
|
+
tokens.append(_Token(_TokenType.EOF, "", self.pos))
|
|
645
|
+
break
|
|
646
|
+
|
|
647
|
+
ch = self.text[self.pos]
|
|
648
|
+
start_pos = self.pos
|
|
649
|
+
|
|
650
|
+
# Single-character tokens
|
|
651
|
+
if ch == "(":
|
|
652
|
+
tokens.append(_Token(_TokenType.LPAREN, "(", start_pos))
|
|
653
|
+
self.pos += 1
|
|
654
|
+
elif ch == ")":
|
|
655
|
+
tokens.append(_Token(_TokenType.RPAREN, ")", start_pos))
|
|
656
|
+
self.pos += 1
|
|
657
|
+
elif ch == "&":
|
|
658
|
+
tokens.append(_Token(_TokenType.AND, "&", start_pos))
|
|
659
|
+
self.pos += 1
|
|
660
|
+
elif ch == "|":
|
|
661
|
+
tokens.append(_Token(_TokenType.OR, "|", start_pos))
|
|
662
|
+
self.pos += 1
|
|
663
|
+
elif ch == "!":
|
|
664
|
+
# Check if it's != operator or standalone NOT
|
|
665
|
+
if self.pos + 1 < self.length and self.text[self.pos + 1] == "=":
|
|
666
|
+
# This is != operator, will be handled as OPERATOR
|
|
667
|
+
op = self._peek_operator()
|
|
668
|
+
if op:
|
|
669
|
+
tokens.append(_Token(_TokenType.OPERATOR, op, start_pos))
|
|
670
|
+
self.pos += len(op)
|
|
671
|
+
else:
|
|
672
|
+
raise ValueError(f"Unexpected character at position {start_pos}")
|
|
673
|
+
else:
|
|
674
|
+
tokens.append(_Token(_TokenType.NOT, "!", start_pos))
|
|
675
|
+
self.pos += 1
|
|
676
|
+
elif ch == '"':
|
|
677
|
+
# Quoted string - could be field name or value depending on context
|
|
678
|
+
value = self._read_quoted_string()
|
|
679
|
+
# Determine token type based on context (what comes next)
|
|
680
|
+
self._skip_whitespace()
|
|
681
|
+
if self.pos < self.length and (self._peek_operator() or self._peek_word_operator()):
|
|
682
|
+
tokens.append(_Token(_TokenType.FIELD, value, start_pos))
|
|
683
|
+
else:
|
|
684
|
+
tokens.append(_Token(_TokenType.VALUE, value, start_pos))
|
|
685
|
+
elif ch == "*":
|
|
686
|
+
# Wildcard value
|
|
687
|
+
tokens.append(_Token(_TokenType.VALUE, "*", start_pos))
|
|
688
|
+
self.pos += 1
|
|
689
|
+
elif ch == "[":
|
|
690
|
+
# Bracket list value: [A, B, C]
|
|
691
|
+
items = self._read_bracket_list()
|
|
692
|
+
tokens.append(_Token(_TokenType.VALUE, items, start_pos))
|
|
693
|
+
else:
|
|
694
|
+
# Check for symbolic operator first
|
|
695
|
+
op = self._peek_operator()
|
|
696
|
+
if op:
|
|
697
|
+
tokens.append(_Token(_TokenType.OPERATOR, op, start_pos))
|
|
698
|
+
self.pos += len(op)
|
|
699
|
+
else:
|
|
700
|
+
# Unquoted field name, value, or word operator
|
|
701
|
+
# Read until operator, boolean, paren, or whitespace
|
|
702
|
+
value = self._read_unquoted('=!&|()"')
|
|
703
|
+
if not value:
|
|
704
|
+
raise ValueError(f"Unexpected character '{ch}' at position {start_pos}")
|
|
705
|
+
|
|
706
|
+
value_lower = value.lower()
|
|
707
|
+
|
|
708
|
+
# Check if this is a word operator
|
|
709
|
+
if value_lower in self.WORD_OPERATORS:
|
|
710
|
+
# Emit as OPERATOR with the canonical symbol
|
|
711
|
+
tokens.append(
|
|
712
|
+
_Token(_TokenType.OPERATOR, self.WORD_OPERATORS[value_lower], start_pos)
|
|
713
|
+
)
|
|
714
|
+
elif value_lower == "is":
|
|
715
|
+
# Check for multi-word operator: "is null", "is not null", "is empty"
|
|
716
|
+
saved_pos = self.pos
|
|
717
|
+
self._skip_whitespace()
|
|
718
|
+
if self.pos < self.length:
|
|
719
|
+
next_word = self._read_unquoted('=!&|()"')
|
|
720
|
+
next_lower = next_word.lower()
|
|
721
|
+
if next_lower == "null":
|
|
722
|
+
# "is null" -> != *
|
|
723
|
+
tokens.append(_Token(_TokenType.OPERATOR, "!=", start_pos))
|
|
724
|
+
tokens.append(_Token(_TokenType.VALUE, "*", self.pos))
|
|
725
|
+
elif next_lower == "empty":
|
|
726
|
+
# "is empty" -> is empty operator with placeholder value
|
|
727
|
+
tokens.append(_Token(_TokenType.OPERATOR, "is empty", start_pos))
|
|
728
|
+
tokens.append(_Token(_TokenType.VALUE, "", self.pos)) # placeholder
|
|
729
|
+
elif next_lower == "not":
|
|
730
|
+
# Could be "is not null"
|
|
731
|
+
self._skip_whitespace()
|
|
732
|
+
if self.pos < self.length:
|
|
733
|
+
third_word = self._read_unquoted('=!&|()"')
|
|
734
|
+
if third_word.lower() == "null":
|
|
735
|
+
# "is not null" -> = *
|
|
736
|
+
tokens.append(_Token(_TokenType.OPERATOR, "=", start_pos))
|
|
737
|
+
tokens.append(_Token(_TokenType.VALUE, "*", self.pos))
|
|
738
|
+
else:
|
|
739
|
+
# Not a multi-word operator, restore
|
|
740
|
+
self.pos = saved_pos
|
|
741
|
+
self._skip_whitespace()
|
|
742
|
+
if self._peek_operator() or self._peek_word_operator():
|
|
743
|
+
tokens.append(
|
|
744
|
+
_Token(_TokenType.FIELD, value, start_pos)
|
|
745
|
+
)
|
|
746
|
+
else:
|
|
747
|
+
tokens.append(
|
|
748
|
+
_Token(_TokenType.VALUE, value, start_pos)
|
|
749
|
+
)
|
|
750
|
+
else:
|
|
751
|
+
# Just "is not" with nothing after - restore
|
|
752
|
+
self.pos = saved_pos
|
|
753
|
+
tokens.append(_Token(_TokenType.VALUE, value, start_pos))
|
|
754
|
+
else:
|
|
755
|
+
# Not a multi-word operator, restore
|
|
756
|
+
self.pos = saved_pos
|
|
757
|
+
self._skip_whitespace()
|
|
758
|
+
if self._peek_operator() or self._peek_word_operator():
|
|
759
|
+
tokens.append(_Token(_TokenType.FIELD, value, start_pos))
|
|
760
|
+
else:
|
|
761
|
+
tokens.append(_Token(_TokenType.VALUE, value, start_pos))
|
|
762
|
+
else:
|
|
763
|
+
# "is" at end of input - treat as value
|
|
764
|
+
tokens.append(_Token(_TokenType.VALUE, value, start_pos))
|
|
765
|
+
else:
|
|
766
|
+
# Determine token type based on what comes next
|
|
767
|
+
self._skip_whitespace()
|
|
768
|
+
if self.pos < self.length and (
|
|
769
|
+
self._peek_operator() or self._peek_word_operator()
|
|
770
|
+
):
|
|
771
|
+
tokens.append(_Token(_TokenType.FIELD, value, start_pos))
|
|
772
|
+
else:
|
|
773
|
+
tokens.append(_Token(_TokenType.VALUE, value, start_pos))
|
|
774
|
+
|
|
775
|
+
return tokens
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
def _suggest_operator(unknown: str) -> str | None:
|
|
779
|
+
"""
|
|
780
|
+
Suggest a similar operator for a misspelled word.
|
|
781
|
+
|
|
782
|
+
Uses simple heuristics:
|
|
783
|
+
1. Prefix match (at least 3 characters)
|
|
784
|
+
2. Simple edit distance (1 character difference)
|
|
785
|
+
|
|
786
|
+
Returns suggestion string or None.
|
|
787
|
+
"""
|
|
788
|
+
unknown_lower = unknown.lower()
|
|
789
|
+
|
|
790
|
+
# All known word operators
|
|
791
|
+
known_operators = [
|
|
792
|
+
*_Tokenizer.WORD_OPERATORS.keys(),
|
|
793
|
+
"is null",
|
|
794
|
+
"is not null",
|
|
795
|
+
"is empty",
|
|
796
|
+
]
|
|
797
|
+
|
|
798
|
+
# Check prefix match (at least 3 chars)
|
|
799
|
+
if len(unknown_lower) >= 3:
|
|
800
|
+
for op in known_operators:
|
|
801
|
+
# Check if unknown is a prefix of operator
|
|
802
|
+
if op.startswith(unknown_lower):
|
|
803
|
+
return op
|
|
804
|
+
# Check if operator is a prefix of unknown (e.g., "containsall" vs "contains_all")
|
|
805
|
+
op_no_space = op.replace(" ", "").replace("_", "")
|
|
806
|
+
unknown_no_sep = unknown_lower.replace("_", "")
|
|
807
|
+
if op_no_space.startswith(unknown_no_sep[:3]):
|
|
808
|
+
return op
|
|
809
|
+
|
|
810
|
+
# Check for simple typos (1 char difference for short ops, 2 for longer)
|
|
811
|
+
for op in known_operators:
|
|
812
|
+
op_lower = op.lower()
|
|
813
|
+
# Skip very different lengths
|
|
814
|
+
if abs(len(unknown_lower) - len(op_lower)) > 2:
|
|
815
|
+
continue
|
|
816
|
+
# Simple character difference count (zip shorter strings, add length diff)
|
|
817
|
+
diff = sum(1 for a, b in zip(unknown_lower, op_lower, strict=False) if a != b)
|
|
818
|
+
diff += abs(len(unknown_lower) - len(op_lower))
|
|
819
|
+
threshold = 2 if len(op_lower) > 4 else 1
|
|
820
|
+
if diff <= threshold:
|
|
821
|
+
return op
|
|
822
|
+
|
|
823
|
+
return None
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
class _Parser:
|
|
827
|
+
"""Recursive descent parser for filter expressions."""
|
|
828
|
+
|
|
829
|
+
def __init__(self, tokens: list[_Token]):
|
|
830
|
+
self.tokens = tokens
|
|
831
|
+
self.pos = 0
|
|
832
|
+
|
|
833
|
+
def _current(self) -> _Token:
|
|
834
|
+
"""Get current token."""
|
|
835
|
+
return self.tokens[self.pos]
|
|
836
|
+
|
|
837
|
+
def _advance(self) -> _Token:
|
|
838
|
+
"""Advance to next token and return previous."""
|
|
839
|
+
token = self.tokens[self.pos]
|
|
840
|
+
if self.pos < len(self.tokens) - 1:
|
|
841
|
+
self.pos += 1
|
|
842
|
+
return token
|
|
843
|
+
|
|
844
|
+
def _expect(self, token_type: _TokenType, context: str = "") -> _Token:
|
|
845
|
+
"""Expect a specific token type, raise if not found."""
|
|
846
|
+
token = self._current()
|
|
847
|
+
if token.type != token_type:
|
|
848
|
+
ctx = f" {context}" if context else ""
|
|
849
|
+
raise ValueError(
|
|
850
|
+
f"Expected {token_type.name}{ctx} at position {token.pos}, "
|
|
851
|
+
f"got {token.type.name} '{token.value}'"
|
|
852
|
+
)
|
|
853
|
+
return self._advance()
|
|
854
|
+
|
|
855
|
+
def parse(self) -> FilterExpression:
|
|
856
|
+
"""Parse the token stream into a FilterExpression."""
|
|
857
|
+
if self._current().type == _TokenType.EOF:
|
|
858
|
+
raise ValueError("Empty filter expression")
|
|
859
|
+
|
|
860
|
+
expr = self._parse_or_expr()
|
|
861
|
+
|
|
862
|
+
if self._current().type != _TokenType.EOF:
|
|
863
|
+
token = self._current()
|
|
864
|
+
# Check if this looks like a multi-word value (extra word after comparison)
|
|
865
|
+
if token.type in (_TokenType.VALUE, _TokenType.FIELD):
|
|
866
|
+
token_val = token.value if isinstance(token.value, str) else str(token.value)
|
|
867
|
+
# Check for SQL-like boolean keywords
|
|
868
|
+
upper_val = token_val.upper()
|
|
869
|
+
if upper_val == "AND":
|
|
870
|
+
raise ValueError(
|
|
871
|
+
f"Unexpected 'AND' at position {token.pos}. "
|
|
872
|
+
f"Hint: Use '&' for AND: expr1 & expr2"
|
|
873
|
+
)
|
|
874
|
+
if upper_val == "OR":
|
|
875
|
+
raise ValueError(
|
|
876
|
+
f"Unexpected 'OR' at position {token.pos}. "
|
|
877
|
+
f"Hint: Use '|' for OR: expr1 | expr2"
|
|
878
|
+
)
|
|
879
|
+
# Look back to find the previous value to suggest quoting
|
|
880
|
+
# Collect remaining words
|
|
881
|
+
remaining_words: list[str] = [token_val]
|
|
882
|
+
pos = self.pos + 1
|
|
883
|
+
while pos < len(self.tokens) - 1:
|
|
884
|
+
next_tok = self.tokens[pos]
|
|
885
|
+
if next_tok.type in (_TokenType.VALUE, _TokenType.FIELD):
|
|
886
|
+
next_val = (
|
|
887
|
+
next_tok.value
|
|
888
|
+
if isinstance(next_tok.value, str)
|
|
889
|
+
else str(next_tok.value)
|
|
890
|
+
)
|
|
891
|
+
remaining_words.append(next_val)
|
|
892
|
+
pos += 1
|
|
893
|
+
else:
|
|
894
|
+
break
|
|
895
|
+
if len(remaining_words) == 1:
|
|
896
|
+
raise ValueError(
|
|
897
|
+
f"Unexpected token '{token_val}' at position {token.pos}. "
|
|
898
|
+
f'Hint: Values with spaces must be quoted: "... {token_val}"'
|
|
899
|
+
)
|
|
900
|
+
else:
|
|
901
|
+
combined = " ".join(remaining_words)
|
|
902
|
+
raise ValueError(
|
|
903
|
+
f"Unexpected token '{token_val}' at position {token.pos}. "
|
|
904
|
+
f'Hint: Values with spaces must be quoted: "...{combined}"'
|
|
905
|
+
)
|
|
906
|
+
raise ValueError(f"Unexpected token '{token.value}' at position {token.pos}")
|
|
907
|
+
|
|
908
|
+
return expr
|
|
909
|
+
|
|
910
|
+
def _parse_or_expr(self) -> FilterExpression:
|
|
911
|
+
"""Parse OR expressions (lowest precedence)."""
|
|
912
|
+
left = self._parse_and_expr()
|
|
913
|
+
|
|
914
|
+
while self._current().type == _TokenType.OR:
|
|
915
|
+
self._advance() # consume |
|
|
916
|
+
right = self._parse_and_expr()
|
|
917
|
+
left = OrExpression(left, right)
|
|
918
|
+
|
|
919
|
+
return left
|
|
920
|
+
|
|
921
|
+
def _parse_and_expr(self) -> FilterExpression:
|
|
922
|
+
"""Parse AND expressions (medium precedence)."""
|
|
923
|
+
left = self._parse_not_expr()
|
|
924
|
+
|
|
925
|
+
while self._current().type == _TokenType.AND:
|
|
926
|
+
self._advance() # consume &
|
|
927
|
+
right = self._parse_not_expr()
|
|
928
|
+
left = AndExpression(left, right)
|
|
929
|
+
|
|
930
|
+
return left
|
|
931
|
+
|
|
932
|
+
def _parse_not_expr(self) -> FilterExpression:
|
|
933
|
+
"""Parse NOT expressions (high precedence)."""
|
|
934
|
+
if self._current().type == _TokenType.NOT:
|
|
935
|
+
self._advance() # consume !
|
|
936
|
+
expr = self._parse_not_expr() # NOT is right-associative
|
|
937
|
+
return NotExpression(expr)
|
|
938
|
+
|
|
939
|
+
return self._parse_atom()
|
|
940
|
+
|
|
941
|
+
def _parse_atom(self) -> FilterExpression:
|
|
942
|
+
"""Parse atomic expressions: comparisons or parenthesized expressions."""
|
|
943
|
+
token = self._current()
|
|
944
|
+
|
|
945
|
+
# Parenthesized expression
|
|
946
|
+
if token.type == _TokenType.LPAREN:
|
|
947
|
+
self._advance() # consume (
|
|
948
|
+
expr = self._parse_or_expr()
|
|
949
|
+
closing = self._current()
|
|
950
|
+
if closing.type != _TokenType.RPAREN:
|
|
951
|
+
raise ValueError(f"Unbalanced parentheses: expected ')' at position {closing.pos}")
|
|
952
|
+
self._advance() # consume )
|
|
953
|
+
return expr
|
|
954
|
+
|
|
955
|
+
# Field comparison
|
|
956
|
+
if token.type == _TokenType.FIELD:
|
|
957
|
+
return self._parse_comparison()
|
|
958
|
+
|
|
959
|
+
# Error cases
|
|
960
|
+
if token.type == _TokenType.EOF:
|
|
961
|
+
raise ValueError("Unexpected end of expression")
|
|
962
|
+
if token.type == _TokenType.OPERATOR:
|
|
963
|
+
raise ValueError(
|
|
964
|
+
f"Missing field name before operator '{token.value}' at position {token.pos}"
|
|
965
|
+
)
|
|
966
|
+
if token.type == _TokenType.VALUE:
|
|
967
|
+
# This could be an unquoted field name that wasn't recognized
|
|
968
|
+
# Try to parse it as a comparison
|
|
969
|
+
return self._parse_comparison_from_value()
|
|
970
|
+
|
|
971
|
+
raise ValueError(f"Unexpected token '{token.value}' at position {token.pos}")
|
|
972
|
+
|
|
973
|
+
def _parse_comparison(self) -> FilterExpression:
|
|
974
|
+
"""Parse a field comparison expression."""
|
|
975
|
+
field_token = self._expect(_TokenType.FIELD, "for field name")
|
|
976
|
+
# Field names are always strings (not bracket lists)
|
|
977
|
+
assert isinstance(field_token.value, str)
|
|
978
|
+
field_name = field_token.value
|
|
979
|
+
|
|
980
|
+
op_token = self._current()
|
|
981
|
+
if op_token.type != _TokenType.OPERATOR:
|
|
982
|
+
raise ValueError(
|
|
983
|
+
f"Expected operator after field name at position {op_token.pos}, "
|
|
984
|
+
f"got {op_token.type.name}"
|
|
985
|
+
)
|
|
986
|
+
self._advance()
|
|
987
|
+
# Operators are always strings
|
|
988
|
+
assert isinstance(op_token.value, str)
|
|
989
|
+
operator = op_token.value
|
|
990
|
+
|
|
991
|
+
value_token = self._current()
|
|
992
|
+
if value_token.type not in (_TokenType.VALUE, _TokenType.FIELD):
|
|
993
|
+
# Check for == instead of =
|
|
994
|
+
if value_token.type == _TokenType.OPERATOR and value_token.value == "=":
|
|
995
|
+
raise ValueError(
|
|
996
|
+
f"Unexpected '=' at position {value_token.pos}. "
|
|
997
|
+
f"Hint: Use single '=' for equality, not '=='"
|
|
998
|
+
)
|
|
999
|
+
raise ValueError(f"Expected value after operator at position {value_token.pos}")
|
|
1000
|
+
self._advance()
|
|
1001
|
+
|
|
1002
|
+
# Convert value to appropriate type
|
|
1003
|
+
if value_token.value == "*":
|
|
1004
|
+
value: Any = RawToken("*")
|
|
1005
|
+
else:
|
|
1006
|
+
value = value_token.value
|
|
1007
|
+
|
|
1008
|
+
return FieldComparison(field_name, operator, value)
|
|
1009
|
+
|
|
1010
|
+
def _parse_comparison_from_value(self) -> FilterExpression:
|
|
1011
|
+
"""Parse a comparison where the field was tokenized as VALUE."""
|
|
1012
|
+
# This happens when field name isn't followed by operator immediately
|
|
1013
|
+
value_token = self._advance()
|
|
1014
|
+
# Field names are always strings (not bracket lists)
|
|
1015
|
+
assert isinstance(value_token.value, str)
|
|
1016
|
+
field_name = value_token.value
|
|
1017
|
+
|
|
1018
|
+
op_token = self._current()
|
|
1019
|
+
if op_token.type != _TokenType.OPERATOR:
|
|
1020
|
+
# Check if this looks like a multi-word field name (next token is word, not operator)
|
|
1021
|
+
# Note: the next word might be tokenized as FIELD if it's followed by an operator
|
|
1022
|
+
if op_token.type in (_TokenType.VALUE, _TokenType.FIELD):
|
|
1023
|
+
op_val = op_token.value if isinstance(op_token.value, str) else str(op_token.value)
|
|
1024
|
+
# Check if it looks like an unsupported operator (e.g., <>, >>, <<)
|
|
1025
|
+
if op_val in ("<>", ">>", "<<"):
|
|
1026
|
+
raise ValueError(
|
|
1027
|
+
f"Unsupported operator '{op_val}' at position {op_token.pos}. "
|
|
1028
|
+
f"Supported operators: = != =~ =^ =$ > >= < <="
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
# Check if this looks like a misspelled operator
|
|
1032
|
+
suggestion = _suggest_operator(op_val)
|
|
1033
|
+
if suggestion:
|
|
1034
|
+
raise ValueError(
|
|
1035
|
+
f"Unknown operator '{op_val}' at position {op_token.pos}. "
|
|
1036
|
+
f"Did you mean: {suggestion}?"
|
|
1037
|
+
)
|
|
1038
|
+
|
|
1039
|
+
# Collect subsequent words to suggest the full field name
|
|
1040
|
+
words: list[str] = [field_name, op_val]
|
|
1041
|
+
pos = self.pos + 1
|
|
1042
|
+
while pos < len(self.tokens) - 1:
|
|
1043
|
+
next_tok = self.tokens[pos]
|
|
1044
|
+
if next_tok.type == _TokenType.OPERATOR:
|
|
1045
|
+
break
|
|
1046
|
+
if next_tok.type in (_TokenType.VALUE, _TokenType.FIELD):
|
|
1047
|
+
next_val = (
|
|
1048
|
+
next_tok.value
|
|
1049
|
+
if isinstance(next_tok.value, str)
|
|
1050
|
+
else str(next_tok.value)
|
|
1051
|
+
)
|
|
1052
|
+
# Skip unsupported operator-like tokens
|
|
1053
|
+
if next_val in ("<>", ">>", "<<"):
|
|
1054
|
+
break
|
|
1055
|
+
words.append(next_val)
|
|
1056
|
+
pos += 1
|
|
1057
|
+
else:
|
|
1058
|
+
break
|
|
1059
|
+
suggested_field = " ".join(words)
|
|
1060
|
+
raise ValueError(
|
|
1061
|
+
f"Expected operator after '{field_name}' at position {op_token.pos}. "
|
|
1062
|
+
f'Hint: For multi-word field names, use quotes: "{suggested_field}"'
|
|
1063
|
+
)
|
|
1064
|
+
raise ValueError(f"Expected operator after '{field_name}' at position {op_token.pos}")
|
|
1065
|
+
self._advance()
|
|
1066
|
+
# Operators are always strings
|
|
1067
|
+
assert isinstance(op_token.value, str)
|
|
1068
|
+
operator = op_token.value
|
|
1069
|
+
|
|
1070
|
+
next_token = self._current()
|
|
1071
|
+
if next_token.type not in (_TokenType.VALUE, _TokenType.FIELD):
|
|
1072
|
+
# Check for == instead of =
|
|
1073
|
+
if next_token.type == _TokenType.OPERATOR and next_token.value == "=":
|
|
1074
|
+
raise ValueError(
|
|
1075
|
+
f"Unexpected '=' at position {next_token.pos}. "
|
|
1076
|
+
f"Hint: Use single '=' for equality, not '=='"
|
|
1077
|
+
)
|
|
1078
|
+
raise ValueError(f"Expected value after operator at position {next_token.pos}")
|
|
1079
|
+
self._advance()
|
|
1080
|
+
|
|
1081
|
+
if next_token.value == "*":
|
|
1082
|
+
value: Any = RawToken("*")
|
|
1083
|
+
else:
|
|
1084
|
+
value = next_token.value
|
|
1085
|
+
|
|
1086
|
+
return FieldComparison(field_name, operator, value)
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def parse(filter_string: str) -> FilterExpression:
|
|
1090
|
+
"""
|
|
1091
|
+
Parse a filter string into a FilterExpression AST.
|
|
1092
|
+
|
|
1093
|
+
This function converts a human-readable filter string into a structured
|
|
1094
|
+
FilterExpression that can be used for client-side filtering with matches().
|
|
1095
|
+
|
|
1096
|
+
Args:
|
|
1097
|
+
filter_string: The filter expression to parse
|
|
1098
|
+
|
|
1099
|
+
Returns:
|
|
1100
|
+
A FilterExpression AST representing the filter
|
|
1101
|
+
|
|
1102
|
+
Raises:
|
|
1103
|
+
ValueError: If the filter string is invalid
|
|
1104
|
+
|
|
1105
|
+
Examples:
|
|
1106
|
+
>>> expr = parse('name = "Alice"')
|
|
1107
|
+
>>> expr.matches({"name": "Alice"})
|
|
1108
|
+
True
|
|
1109
|
+
|
|
1110
|
+
>>> expr = parse('status = Active | status = Pending')
|
|
1111
|
+
>>> expr.matches({"status": "Active"})
|
|
1112
|
+
True
|
|
1113
|
+
|
|
1114
|
+
>>> expr = parse('email = *') # IS NOT NULL
|
|
1115
|
+
>>> expr.matches({"email": "test@example.com"})
|
|
1116
|
+
True
|
|
1117
|
+
|
|
1118
|
+
>>> expr = parse('email != *') # IS NULL
|
|
1119
|
+
>>> expr.matches({"email": None})
|
|
1120
|
+
True
|
|
1121
|
+
"""
|
|
1122
|
+
if not filter_string or not filter_string.strip():
|
|
1123
|
+
raise ValueError("Empty filter expression")
|
|
1124
|
+
|
|
1125
|
+
tokenizer = _Tokenizer(filter_string)
|
|
1126
|
+
tokens = tokenizer.tokenize()
|
|
1127
|
+
parser = _Parser(tokens)
|
|
1128
|
+
return parser.parse()
|