python-jsonpath 1.3.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jsonpath/parse.py CHANGED
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING
8
8
  from typing import Callable
9
9
  from typing import Dict
10
10
  from typing import Iterable
11
+ from typing import Iterator
11
12
  from typing import List
12
13
  from typing import Optional
13
14
  from typing import Union
@@ -22,41 +23,45 @@ from .filter import FALSE
22
23
  from .filter import NIL
23
24
  from .filter import TRUE
24
25
  from .filter import UNDEFINED_LITERAL
25
- from .filter import BooleanExpression
26
+ from .filter import BaseExpression
26
27
  from .filter import FilterContextPath
27
28
  from .filter import FilterExpression
29
+ from .filter import FilterExpressionLiteral
30
+ from .filter import FilterQuery
28
31
  from .filter import FloatLiteral
29
32
  from .filter import FunctionExtension
30
33
  from .filter import InfixExpression
31
34
  from .filter import IntegerLiteral
32
35
  from .filter import ListLiteral
33
- from .filter import Literal
34
36
  from .filter import Nil
35
- from .filter import Path
36
37
  from .filter import PrefixExpression
37
38
  from .filter import RegexLiteral
38
- from .filter import RootPath
39
- from .filter import SelfPath
39
+ from .filter import RelativeFilterQuery
40
+ from .filter import RootFilterQuery
40
41
  from .filter import StringLiteral
41
42
  from .path import JSONPath
43
+ from .segments import JSONPathChildSegment
44
+ from .segments import JSONPathRecursiveDescentSegment
45
+ from .segments import JSONPathSegment
42
46
  from .selectors import Filter
43
47
  from .selectors import IndexSelector
44
48
  from .selectors import JSONPathSelector
49
+ from .selectors import KeySelector
50
+ from .selectors import KeysFilter
45
51
  from .selectors import KeysSelector
46
- from .selectors import ListSelector
47
- from .selectors import PropertySelector
48
- from .selectors import RecursiveDescentSelector
52
+ from .selectors import NameSelector
53
+ from .selectors import SingularQuerySelector
49
54
  from .selectors import SliceSelector
50
- from .selectors import WildSelector
55
+ from .selectors import WildcardSelector
51
56
  from .token import TOKEN_AND
52
- from .token import TOKEN_BARE_PROPERTY
57
+ from .token import TOKEN_COLON
53
58
  from .token import TOKEN_COMMA
54
59
  from .token import TOKEN_CONTAINS
55
60
  from .token import TOKEN_DDOT
61
+ from .token import TOKEN_DOT
56
62
  from .token import TOKEN_DOUBLE_QUOTE_STRING
57
63
  from .token import TOKEN_EOF
58
64
  from .token import TOKEN_EQ
59
- from .token import TOKEN_FAKE_ROOT
60
65
  from .token import TOKEN_FALSE
61
66
  from .token import TOKEN_FILTER
62
67
  from .token import TOKEN_FILTER_CONTEXT
@@ -68,20 +73,23 @@ from .token import TOKEN_IN
68
73
  from .token import TOKEN_INT
69
74
  from .token import TOKEN_INTERSECTION
70
75
  from .token import TOKEN_KEY
76
+ from .token import TOKEN_KEY_NAME
71
77
  from .token import TOKEN_KEYS
78
+ from .token import TOKEN_KEYS_FILTER
79
+ from .token import TOKEN_LBRACKET
72
80
  from .token import TOKEN_LE
73
81
  from .token import TOKEN_LG
74
- from .token import TOKEN_LIST_START
75
82
  from .token import TOKEN_LPAREN
76
83
  from .token import TOKEN_LT
77
84
  from .token import TOKEN_MISSING
85
+ from .token import TOKEN_NAME
78
86
  from .token import TOKEN_NE
79
87
  from .token import TOKEN_NIL
80
88
  from .token import TOKEN_NONE
81
89
  from .token import TOKEN_NOT
82
90
  from .token import TOKEN_NULL
83
91
  from .token import TOKEN_OR
84
- from .token import TOKEN_PROPERTY
92
+ from .token import TOKEN_PSEUDO_ROOT
85
93
  from .token import TOKEN_RBRACKET
86
94
  from .token import TOKEN_RE
87
95
  from .token import TOKEN_RE_FLAGS
@@ -90,14 +98,13 @@ from .token import TOKEN_ROOT
90
98
  from .token import TOKEN_RPAREN
91
99
  from .token import TOKEN_SELF
92
100
  from .token import TOKEN_SINGLE_QUOTE_STRING
93
- from .token import TOKEN_SLICE_START
94
- from .token import TOKEN_SLICE_STEP
95
- from .token import TOKEN_SLICE_STOP
96
101
  from .token import TOKEN_TRUE
97
102
  from .token import TOKEN_UNDEFINED
98
103
  from .token import TOKEN_UNION
104
+ from .token import TOKEN_WHITESPACE
99
105
  from .token import TOKEN_WILD
100
106
  from .token import Token
107
+ from .unescape import unescape_string
101
108
 
102
109
  if TYPE_CHECKING:
103
110
  from .env import JSONPathEnvironment
@@ -145,7 +152,6 @@ class Parser:
145
152
  """A JSONPath parser bound to a JSONPathEnvironment."""
146
153
 
147
154
  PRECEDENCE_LOWEST = 1
148
- PRECEDENCE_LOGICALRIGHT = 2
149
155
  PRECEDENCE_LOGICAL_OR = 3
150
156
  PRECEDENCE_LOGICAL_AND = 4
151
157
  PRECEDENCE_RELATIONAL = 5
@@ -234,16 +240,16 @@ class Parser:
234
240
  def __init__(self, *, env: JSONPathEnvironment) -> None:
235
241
  self.env = env
236
242
 
237
- self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = {
243
+ self.token_map: Dict[str, Callable[[TokenStream], BaseExpression]] = {
238
244
  TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
239
- TOKEN_FAKE_ROOT: self.parse_root_path,
245
+ TOKEN_PSEUDO_ROOT: self.parse_absolute_query,
240
246
  TOKEN_FALSE: self.parse_boolean,
241
247
  TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
242
248
  TOKEN_FLOAT: self.parse_float_literal,
243
249
  TOKEN_FUNCTION: self.parse_function_extension,
244
250
  TOKEN_INT: self.parse_integer_literal,
245
251
  TOKEN_KEY: self.parse_current_key,
246
- TOKEN_LIST_START: self.parse_list_literal,
252
+ TOKEN_LBRACKET: self.parse_list_literal,
247
253
  TOKEN_LPAREN: self.parse_grouped_expression,
248
254
  TOKEN_MISSING: self.parse_undefined,
249
255
  TOKEN_NIL: self.parse_nil,
@@ -251,14 +257,14 @@ class Parser:
251
257
  TOKEN_NOT: self.parse_prefix_expression,
252
258
  TOKEN_NULL: self.parse_nil,
253
259
  TOKEN_RE_PATTERN: self.parse_regex,
254
- TOKEN_ROOT: self.parse_root_path,
255
- TOKEN_SELF: self.parse_self_path,
260
+ TOKEN_ROOT: self.parse_absolute_query,
261
+ TOKEN_SELF: self.parse_relative_query,
256
262
  TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
257
263
  TOKEN_TRUE: self.parse_boolean,
258
264
  TOKEN_UNDEFINED: self.parse_undefined,
259
265
  }
260
266
 
261
- self.list_item_map: Dict[str, Callable[[TokenStream], FilterExpression]] = {
267
+ self.list_item_map: Dict[str, Callable[[TokenStream], BaseExpression]] = {
262
268
  TOKEN_FALSE: self.parse_boolean,
263
269
  TOKEN_FLOAT: self.parse_float_literal,
264
270
  TOKEN_INT: self.parse_integer_literal,
@@ -271,10 +277,10 @@ class Parser:
271
277
  }
272
278
 
273
279
  self.function_argument_map: Dict[
274
- str, Callable[[TokenStream], FilterExpression]
280
+ str, Callable[[TokenStream], BaseExpression]
275
281
  ] = {
276
282
  TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
277
- TOKEN_FAKE_ROOT: self.parse_root_path,
283
+ TOKEN_PSEUDO_ROOT: self.parse_absolute_query,
278
284
  TOKEN_FALSE: self.parse_boolean,
279
285
  TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
280
286
  TOKEN_FLOAT: self.parse_float_literal,
@@ -284,212 +290,291 @@ class Parser:
284
290
  TOKEN_NIL: self.parse_nil,
285
291
  TOKEN_NONE: self.parse_nil,
286
292
  TOKEN_NULL: self.parse_nil,
287
- TOKEN_ROOT: self.parse_root_path,
288
- TOKEN_SELF: self.parse_self_path,
293
+ TOKEN_ROOT: self.parse_absolute_query,
294
+ TOKEN_SELF: self.parse_relative_query,
289
295
  TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
290
296
  TOKEN_TRUE: self.parse_boolean,
291
297
  }
292
298
 
293
- def parse(self, stream: TokenStream) -> Iterable[JSONPathSelector]:
294
- """Parse a JSONPath from a stream of tokens."""
295
- if stream.current.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT}:
296
- stream.next_token()
297
- yield from self.parse_path(stream, in_filter=False)
299
+ def parse(self, stream: TokenStream) -> Iterator[JSONPathSegment]:
300
+ """Parse a JSONPath query from a stream of tokens."""
301
+ # Leading whitespace is not allowed in strict mode.
302
+ if stream.skip_whitespace() and self.env.strict:
303
+ raise JSONPathSyntaxError(
304
+ "unexpected leading whitespace", token=stream.current()
305
+ )
306
+
307
+ # Trailing whitespace is not allowed in strict mode.
308
+ if (
309
+ self.env.strict
310
+ and stream.tokens
311
+ and stream.tokens[-1].kind == TOKEN_WHITESPACE
312
+ ):
313
+ raise JSONPathSyntaxError(
314
+ "unexpected trailing whitespace", token=stream.tokens[-1]
315
+ )
316
+
317
+ token = stream.current()
318
+
319
+ if token.kind == TOKEN_ROOT or (
320
+ token.kind == TOKEN_PSEUDO_ROOT and not self.env.strict
321
+ ):
322
+ stream.next()
323
+ elif self.env.strict:
324
+ # Raises a syntax error because the current token is not TOKEN_ROOT.
325
+ stream.expect(TOKEN_ROOT)
298
326
 
299
- if stream.current.kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION):
327
+ yield from self.parse_query(stream)
328
+
329
+ if stream.current().kind not in (TOKEN_EOF, TOKEN_INTERSECTION, TOKEN_UNION):
300
330
  raise JSONPathSyntaxError(
301
- f"unexpected token {stream.current.value!r}",
302
- token=stream.current,
331
+ f"unexpected token {stream.current().value!r}",
332
+ token=stream.current(),
303
333
  )
304
334
 
305
- def parse_path(
306
- self,
307
- stream: TokenStream,
308
- *,
309
- in_filter: bool = False,
310
- ) -> Iterable[JSONPathSelector]:
311
- """Parse a top-level JSONPath, or one that is nested in a filter."""
335
+ def parse_query(self, stream: TokenStream) -> Iterable[JSONPathSegment]:
336
+ """Parse a JSONPath query string.
337
+
338
+ This method assumes the root, current or pseudo root identifier has
339
+ already been consumed.
340
+ """
341
+ if not self.env.strict and stream.current().kind in {
342
+ TOKEN_NAME,
343
+ TOKEN_WILD,
344
+ TOKEN_KEYS,
345
+ TOKEN_KEY_NAME,
346
+ }:
347
+ # A non-standard "bare" path. One that starts with a shorthand selector
348
+ # without a leading identifier (`$`, `@`, `^` or `_`).
349
+ #
350
+ # When no identifier is given, a root query (`$`) is assumed.
351
+ token = stream.current()
352
+ selector = self.parse_shorthand_selector(stream)
353
+ yield JSONPathChildSegment(env=self.env, token=token, selectors=(selector,))
354
+
312
355
  while True:
313
- if stream.current.kind in (TOKEN_PROPERTY, TOKEN_BARE_PROPERTY):
314
- yield PropertySelector(
315
- env=self.env,
316
- token=stream.current,
317
- name=stream.current.value,
318
- shorthand=True,
319
- )
320
- elif stream.current.kind == TOKEN_SLICE_START:
321
- yield self.parse_slice(stream)
322
- elif stream.current.kind == TOKEN_WILD:
323
- yield WildSelector(
324
- env=self.env,
325
- token=stream.current,
326
- shorthand=True,
356
+ stream.skip_whitespace()
357
+ token = stream.next()
358
+
359
+ if token.kind == TOKEN_DOT:
360
+ selector = self.parse_shorthand_selector(stream)
361
+ yield JSONPathChildSegment(
362
+ env=self.env, token=token, selectors=(selector,)
327
363
  )
328
- elif stream.current.kind == TOKEN_KEYS:
329
- yield KeysSelector(
330
- env=self.env,
331
- token=stream.current,
332
- shorthand=True,
364
+ elif token.kind == TOKEN_DDOT:
365
+ if stream.current().kind == TOKEN_LBRACKET:
366
+ selectors = tuple(self.parse_bracketed_selection(stream))
367
+ else:
368
+ selectors = (self.parse_shorthand_selector(stream),)
369
+
370
+ yield JSONPathRecursiveDescentSegment(
371
+ env=self.env, token=token, selectors=selectors
333
372
  )
334
- elif stream.current.kind == TOKEN_DDOT:
335
- yield RecursiveDescentSelector(
373
+ elif token.kind == TOKEN_LBRACKET:
374
+ stream.pos -= 1
375
+ yield JSONPathChildSegment(
336
376
  env=self.env,
337
- token=stream.current,
377
+ token=token,
378
+ selectors=tuple(self.parse_bracketed_selection(stream)),
338
379
  )
339
- elif stream.current.kind == TOKEN_LIST_START:
340
- yield self.parse_selector_list(stream)
380
+ elif token.kind == TOKEN_EOF:
381
+ break
341
382
  else:
342
- if in_filter:
343
- stream.push(stream.current)
383
+ # An embedded query. Put the token back on the stream.
384
+ stream.pos -= 1
344
385
  break
345
386
 
346
- stream.next_token()
387
+ def parse_shorthand_selector(self, stream: TokenStream) -> JSONPathSelector:
388
+ token = stream.next()
347
389
 
348
- def parse_slice(self, stream: TokenStream) -> SliceSelector:
349
- """Parse a slice JSONPath expression from a stream of tokens."""
350
- start_token = stream.next_token()
351
- stream.expect(TOKEN_SLICE_STOP)
352
- stop_token = stream.next_token()
353
- stream.expect(TOKEN_SLICE_STEP)
354
- step_token = stream.current
355
-
356
- if not start_token.value:
357
- start: Optional[int] = None
358
- else:
359
- start = int(start_token.value)
360
-
361
- if not stop_token.value:
362
- stop: Optional[int] = None
363
- else:
364
- stop = int(stop_token.value)
365
-
366
- if not step_token.value:
367
- step: Optional[int] = None
368
- else:
369
- step = int(step_token.value)
390
+ if token.kind == TOKEN_NAME:
391
+ return NameSelector(
392
+ env=self.env,
393
+ token=token,
394
+ name=token.value,
395
+ )
370
396
 
371
- return SliceSelector(
372
- env=self.env,
373
- token=start_token,
374
- start=start,
375
- stop=stop,
376
- step=step,
377
- )
397
+ if token.kind == TOKEN_KEY_NAME:
398
+ return KeySelector(
399
+ env=self.env,
400
+ token=token,
401
+ key=token.value,
402
+ )
378
403
 
379
- def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR0912
380
- """Parse a comma separated list JSONPath selectors from a stream of tokens."""
381
- tok = stream.next_token()
382
- list_items: List[
383
- Union[
384
- IndexSelector,
385
- KeysSelector,
386
- PropertySelector,
387
- SliceSelector,
388
- WildSelector,
389
- Filter,
390
- ]
391
- ] = []
392
-
393
- while stream.current.kind != TOKEN_RBRACKET:
394
- if stream.current.kind == TOKEN_INT:
395
- if (
396
- len(stream.current.value) > 1
397
- and stream.current.value.startswith("0")
398
- ) or stream.current.value.startswith("-0"):
399
- raise JSONPathSyntaxError(
400
- "leading zero in index selector", token=stream.current
401
- )
402
- list_items.append(
403
- IndexSelector(
404
- env=self.env,
405
- token=stream.current,
406
- index=int(stream.current.value),
407
- )
408
- )
409
- elif stream.current.kind == TOKEN_BARE_PROPERTY:
410
- list_items.append(
411
- PropertySelector(
412
- env=self.env,
413
- token=stream.current,
414
- name=stream.current.value,
415
- shorthand=False,
416
- ),
404
+ if token.kind == TOKEN_WILD:
405
+ return WildcardSelector(
406
+ env=self.env,
407
+ token=token,
408
+ )
409
+
410
+ if token.kind == TOKEN_KEYS:
411
+ if stream.current().kind == TOKEN_NAME:
412
+ return KeySelector(
413
+ env=self.env,
414
+ token=token,
415
+ key=self._decode_string_literal(stream.next()),
417
416
  )
418
- elif stream.current.kind == TOKEN_KEYS:
419
- list_items.append(
420
- KeysSelector(
421
- env=self.env,
422
- token=stream.current,
423
- shorthand=False,
417
+
418
+ return KeysSelector(
419
+ env=self.env,
420
+ token=token,
421
+ )
422
+
423
+ raise JSONPathSyntaxError("expected a shorthand selector", token=token)
424
+
425
+ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelector]: # noqa: PLR0912, PLR0915
426
+ segment_token = stream.eat(TOKEN_LBRACKET)
427
+ selectors: List[JSONPathSelector] = []
428
+
429
+ while True:
430
+ stream.skip_whitespace()
431
+ token = stream.current()
432
+
433
+ if token.kind == TOKEN_RBRACKET:
434
+ break
435
+
436
+ if token.kind == TOKEN_INT:
437
+ if (
438
+ stream.peek().kind == TOKEN_COLON
439
+ or stream.peek(2).kind == TOKEN_COLON
440
+ ):
441
+ selectors.append(self.parse_slice(stream))
442
+ else:
443
+ self._raise_for_leading_zero(token)
444
+ selectors.append(
445
+ IndexSelector(
446
+ env=self.env,
447
+ token=token,
448
+ index=int(token.value),
449
+ )
424
450
  )
425
- )
426
- elif stream.current.kind in (
451
+ stream.next()
452
+ elif token.kind in (
427
453
  TOKEN_DOUBLE_QUOTE_STRING,
428
454
  TOKEN_SINGLE_QUOTE_STRING,
429
455
  ):
430
- if self.RE_INVALID_NAME_SELECTOR.search(stream.current.value):
431
- raise JSONPathSyntaxError(
432
- f"invalid name selector {stream.current.value!r}",
433
- token=stream.current,
434
- )
435
-
436
- list_items.append(
437
- PropertySelector(
456
+ selectors.append(
457
+ NameSelector(
438
458
  env=self.env,
439
- token=stream.current,
440
- name=self._decode_string_literal(stream.current),
441
- shorthand=False,
459
+ token=token,
460
+ name=self._decode_string_literal(token),
442
461
  ),
443
462
  )
444
- elif stream.current.kind == TOKEN_SLICE_START:
445
- list_items.append(self.parse_slice(stream))
446
- elif stream.current.kind == TOKEN_WILD:
447
- list_items.append(
448
- WildSelector(
449
- env=self.env,
450
- token=stream.current,
451
- shorthand=False,
463
+ stream.next()
464
+ elif token.kind == TOKEN_COLON:
465
+ selectors.append(self.parse_slice(stream))
466
+ elif token.kind == TOKEN_WILD:
467
+ selectors.append(WildcardSelector(env=self.env, token=token))
468
+ stream.next()
469
+ elif token.kind == TOKEN_KEYS:
470
+ stream.eat(TOKEN_KEYS)
471
+ if stream.current().kind in (
472
+ TOKEN_DOUBLE_QUOTE_STRING,
473
+ TOKEN_SINGLE_QUOTE_STRING,
474
+ ):
475
+ selectors.append(
476
+ KeySelector(
477
+ env=self.env,
478
+ token=token,
479
+ key=self._decode_string_literal(stream.next()),
480
+ )
452
481
  )
453
- )
454
- elif stream.current.kind == TOKEN_FILTER:
455
- list_items.append(self.parse_filter(stream))
456
- elif stream.current.kind == TOKEN_EOF:
457
- raise JSONPathSyntaxError(
458
- "unexpected end of query", token=stream.current
459
- )
482
+ else:
483
+ selectors.append(KeysSelector(env=self.env, token=token))
484
+
485
+ elif token.kind == TOKEN_FILTER:
486
+ selectors.append(self.parse_filter_selector(stream))
487
+ elif token.kind == TOKEN_KEYS_FILTER:
488
+ selectors.append(self.parse_filter_selector(stream, keys=True))
489
+ elif token.kind in (TOKEN_ROOT, TOKEN_NAME):
490
+ selectors.append(self.parse_singular_query_selector(stream))
491
+ elif token.kind == TOKEN_EOF:
492
+ raise JSONPathSyntaxError("unexpected end of query", token=token)
460
493
  else:
461
494
  raise JSONPathSyntaxError(
462
- f"unexpected token in bracketed selection {stream.current.kind!r}",
463
- token=stream.current,
495
+ f"unexpected token in bracketed selection {token.kind!r}",
496
+ token=token,
464
497
  )
465
498
 
466
- if stream.peek.kind == TOKEN_EOF:
499
+ stream.skip_whitespace()
500
+
501
+ if stream.current().kind == TOKEN_EOF:
467
502
  raise JSONPathSyntaxError(
468
- "unexpected end of selector list",
469
- token=stream.current,
503
+ "unexpected end of segment",
504
+ token=stream.current(),
470
505
  )
471
506
 
472
- if stream.peek.kind != TOKEN_RBRACKET:
473
- # TODO: error message .. expected a comma or logical operator
474
- stream.expect_peek(TOKEN_COMMA)
475
- stream.next_token()
476
-
477
- if stream.peek.kind == TOKEN_RBRACKET:
507
+ if stream.current().kind != TOKEN_RBRACKET:
508
+ stream.eat(TOKEN_COMMA)
509
+ stream.skip_whitespace()
510
+ if stream.current().kind == TOKEN_RBRACKET:
478
511
  raise JSONPathSyntaxError(
479
- "unexpected trailing comma",
480
- token=stream.peek,
512
+ "unexpected trailing comma", token=stream.current()
481
513
  )
482
514
 
483
- stream.next_token()
515
+ stream.eat(TOKEN_RBRACKET)
516
+
517
+ if not selectors:
518
+ raise JSONPathSyntaxError("empty bracketed segment", token=segment_token)
519
+
520
+ return selectors
484
521
 
485
- if not list_items:
486
- raise JSONPathSyntaxError("empty bracketed segment", token=tok)
522
+ def parse_slice(self, stream: TokenStream) -> SliceSelector:
523
+ """Parse a slice JSONPath expression from a stream of tokens."""
524
+ token = stream.current()
525
+ start: Optional[int] = None
526
+ stop: Optional[int] = None
527
+ step: Optional[int] = None
528
+
529
+ def _maybe_index(token: Token) -> bool:
530
+ if token.kind == TOKEN_INT:
531
+ if len(token.value) > 1 and token.value.startswith(("0", "-0")):
532
+ raise JSONPathSyntaxError(
533
+ f"invalid index {token.value!r}", token=token
534
+ )
535
+ return True
536
+ return False
537
+
538
+ # 1: or :
539
+ if _maybe_index(stream.current()):
540
+ start = int(stream.current().value)
541
+ stream.next()
542
+
543
+ stream.skip_whitespace()
544
+ stream.expect(TOKEN_COLON)
545
+ stream.next()
546
+ stream.skip_whitespace()
547
+
548
+ # 1 or 1: or : or ?
549
+ if _maybe_index(stream.current()):
550
+ stop = int(stream.current().value)
551
+ stream.next()
552
+ stream.skip_whitespace()
553
+ if stream.current().kind == TOKEN_COLON:
554
+ stream.next()
555
+ elif stream.current().kind == TOKEN_COLON:
556
+ stream.expect(TOKEN_COLON)
557
+ stream.next()
558
+
559
+ # 1 or ?
560
+ stream.skip_whitespace()
561
+ if _maybe_index(stream.current()):
562
+ step = int(stream.current().value)
563
+ stream.next()
487
564
 
488
- return ListSelector(env=self.env, token=tok, items=list_items)
565
+ return SliceSelector(
566
+ env=self.env,
567
+ token=token,
568
+ start=start,
569
+ stop=stop,
570
+ step=step,
571
+ )
489
572
 
490
- def parse_filter(self, stream: TokenStream) -> Filter:
491
- tok = stream.next_token()
492
- expr = self.parse_filter_selector(stream)
573
+ def parse_filter_selector(
574
+ self, stream: TokenStream, *, keys: bool = False
575
+ ) -> Union[Filter, KeysFilter]:
576
+ token = stream.next()
577
+ expr = self.parse_filter_expression(stream)
493
578
 
494
579
  if self.env.well_typed and isinstance(expr, FunctionExtension):
495
580
  func = self.env.function_extensions.get(expr.name)
@@ -499,236 +584,291 @@ class Parser:
499
584
  and func.return_type == ExpressionType.VALUE
500
585
  ):
501
586
  raise JSONPathTypeError(
502
- f"result of {expr.name}() must be compared", token=tok
587
+ f"result of {expr.name}() must be compared", token=token
503
588
  )
504
589
 
505
- if isinstance(expr, (Literal, Nil)):
590
+ if isinstance(expr, (FilterExpressionLiteral, Nil)):
506
591
  raise JSONPathSyntaxError(
507
592
  "filter expression literals outside of "
508
593
  "function expressions must be compared",
509
- token=tok,
594
+ token=token,
510
595
  )
511
596
 
512
- return Filter(env=self.env, token=tok, expression=BooleanExpression(expr))
597
+ if keys:
598
+ return KeysFilter(
599
+ env=self.env, token=token, expression=FilterExpression(expr)
600
+ )
601
+
602
+ return Filter(env=self.env, token=token, expression=FilterExpression(expr))
513
603
 
514
- def parse_boolean(self, stream: TokenStream) -> FilterExpression:
515
- if stream.current.kind == TOKEN_TRUE:
604
+ def parse_boolean(self, stream: TokenStream) -> BaseExpression:
605
+ if stream.next().kind == TOKEN_TRUE:
516
606
  return TRUE
517
607
  return FALSE
518
608
 
519
- def parse_nil(self, _: TokenStream) -> FilterExpression:
609
+ def parse_nil(self, stream: TokenStream) -> BaseExpression:
610
+ stream.next()
520
611
  return NIL
521
612
 
522
- def parse_undefined(self, _: TokenStream) -> FilterExpression:
613
+ def parse_undefined(self, stream: TokenStream) -> BaseExpression:
614
+ stream.next()
523
615
  return UNDEFINED_LITERAL
524
616
 
525
- def parse_string_literal(self, stream: TokenStream) -> FilterExpression:
526
- return StringLiteral(value=self._decode_string_literal(stream.current))
617
+ def parse_string_literal(self, stream: TokenStream) -> BaseExpression:
618
+ return StringLiteral(value=self._decode_string_literal(stream.next()))
619
+
620
+ def parse_integer_literal(self, stream: TokenStream) -> BaseExpression:
621
+ token = stream.next()
622
+ value = token.value
623
+
624
+ if self.env.strict and value.startswith("0") and len(value) > 1:
625
+ raise JSONPathSyntaxError("invalid integer literal", token=token)
527
626
 
528
- def parse_integer_literal(self, stream: TokenStream) -> FilterExpression:
529
627
  # Convert to float first to handle scientific notation.
530
- return IntegerLiteral(value=int(float(stream.current.value)))
628
+ return IntegerLiteral(value=int(float(value)))
629
+
630
+ def parse_float_literal(self, stream: TokenStream) -> BaseExpression:
631
+ token = stream.next()
632
+ value = token.value
531
633
 
532
- def parse_float_literal(self, stream: TokenStream) -> FilterExpression:
533
- return FloatLiteral(value=float(stream.current.value))
634
+ if value.startswith("0") and len(value.split(".")[0]) > 1:
635
+ raise JSONPathSyntaxError("invalid float literal", token=token)
534
636
 
535
- def parse_prefix_expression(self, stream: TokenStream) -> FilterExpression:
536
- tok = stream.next_token()
537
- assert tok.kind == TOKEN_NOT
637
+ return FloatLiteral(value=float(value))
638
+
639
+ def parse_prefix_expression(self, stream: TokenStream) -> BaseExpression:
640
+ token = stream.next()
641
+ assert token.kind == TOKEN_NOT
538
642
  return PrefixExpression(
539
643
  operator="!",
540
- right=self.parse_filter_selector(stream, precedence=self.PRECEDENCE_PREFIX),
644
+ right=self.parse_filter_expression(
645
+ stream, precedence=self.PRECEDENCE_PREFIX
646
+ ),
541
647
  )
542
648
 
543
649
  def parse_infix_expression(
544
- self, stream: TokenStream, left: FilterExpression
545
- ) -> FilterExpression:
546
- tok = stream.next_token()
547
- precedence = self.PRECEDENCES.get(tok.kind, self.PRECEDENCE_LOWEST)
548
- right = self.parse_filter_selector(stream, precedence)
549
- operator = self.BINARY_OPERATORS[tok.kind]
650
+ self, stream: TokenStream, left: BaseExpression
651
+ ) -> BaseExpression:
652
+ token = stream.next()
653
+ precedence = self.PRECEDENCES.get(token.kind, self.PRECEDENCE_LOWEST)
654
+ right = self.parse_filter_expression(stream, precedence)
655
+ operator = self.BINARY_OPERATORS[token.kind]
550
656
 
551
657
  if self.env.well_typed and operator in self.COMPARISON_OPERATORS:
552
- self._raise_for_non_comparable_function(left, tok)
553
- self._raise_for_non_comparable_function(right, tok)
658
+ self._raise_for_non_comparable_function(left, token)
659
+ self._raise_for_non_comparable_function(right, token)
554
660
 
555
661
  if operator not in self.INFIX_LITERAL_OPERATORS:
556
- if isinstance(left, (Literal, Nil)):
662
+ if isinstance(left, (FilterExpressionLiteral, Nil)):
557
663
  raise JSONPathSyntaxError(
558
664
  "filter expression literals outside of "
559
665
  "function expressions must be compared",
560
- token=tok,
666
+ token=token,
561
667
  )
562
- if isinstance(right, (Literal, Nil)):
668
+ if isinstance(right, (FilterExpressionLiteral, Nil)):
563
669
  raise JSONPathSyntaxError(
564
670
  "filter expression literals outside of "
565
671
  "function expressions must be compared",
566
- token=tok,
672
+ token=token,
567
673
  )
568
674
 
569
675
  return InfixExpression(left, operator, right)
570
676
 
571
- def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression:
572
- stream.next_token()
573
- expr = self.parse_filter_selector(stream)
574
- stream.next_token()
677
+ def parse_grouped_expression(self, stream: TokenStream) -> BaseExpression:
678
+ _token = stream.eat(TOKEN_LPAREN)
679
+ expr = self.parse_filter_expression(stream)
575
680
 
576
- while stream.current.kind != TOKEN_RPAREN:
577
- if stream.current.kind == TOKEN_EOF:
578
- raise JSONPathSyntaxError(
579
- "unbalanced parentheses", token=stream.current
580
- )
581
-
582
- if stream.current.kind not in self.BINARY_OPERATORS:
583
- raise JSONPathSyntaxError(
584
- f"expected an expression, found '{stream.current.value}'",
585
- token=stream.current,
586
- )
681
+ while stream.current().kind != TOKEN_RPAREN:
682
+ token = stream.current()
683
+ if token.kind in (TOKEN_EOF, TOKEN_RBRACKET):
684
+ raise JSONPathSyntaxError("unbalanced parentheses", token=_token)
587
685
 
588
686
  expr = self.parse_infix_expression(stream, expr)
589
687
 
590
- stream.expect(TOKEN_RPAREN)
688
+ stream.eat(TOKEN_RPAREN)
591
689
  return expr
592
690
 
593
- def parse_root_path(self, stream: TokenStream) -> FilterExpression:
594
- root = stream.next_token()
595
- return RootPath(
691
+ def parse_absolute_query(self, stream: TokenStream) -> BaseExpression:
692
+ root = stream.next() # Could be TOKEN_ROOT or TOKEN_PSEUDO_ROOT
693
+ return RootFilterQuery(
596
694
  JSONPath(
597
695
  env=self.env,
598
- selectors=self.parse_path(stream, in_filter=True),
599
- fake_root=root.kind == TOKEN_FAKE_ROOT,
696
+ segments=self.parse_query(stream),
697
+ pseudo_root=root.kind == TOKEN_PSEUDO_ROOT,
600
698
  )
601
699
  )
602
700
 
603
- def parse_self_path(self, stream: TokenStream) -> FilterExpression:
604
- stream.next_token()
605
- return SelfPath(
606
- JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True))
701
+ def parse_relative_query(self, stream: TokenStream) -> BaseExpression:
702
+ stream.eat(TOKEN_SELF)
703
+ return RelativeFilterQuery(
704
+ JSONPath(env=self.env, segments=self.parse_query(stream))
705
+ )
706
+
707
+ def parse_singular_query_selector(
708
+ self, stream: TokenStream
709
+ ) -> SingularQuerySelector:
710
+ token = (
711
+ stream.next() if stream.current().kind == TOKEN_ROOT else stream.current()
712
+ )
713
+
714
+ query = JSONPath(env=self.env, segments=self.parse_query(stream))
715
+
716
+ if not query.singular_query():
717
+ raise JSONPathSyntaxError(
718
+ "embedded query selectors must be singular queries", token=token
719
+ )
720
+
721
+ return SingularQuerySelector(
722
+ env=self.env,
723
+ token=token,
724
+ query=query,
607
725
  )
608
726
 
609
- def parse_current_key(self, _: TokenStream) -> FilterExpression:
727
+ def parse_current_key(self, stream: TokenStream) -> BaseExpression:
728
+ stream.next()
610
729
  return CURRENT_KEY
611
730
 
612
- def parse_filter_context_path(self, stream: TokenStream) -> FilterExpression:
613
- stream.next_token()
731
+ def parse_filter_context_path(self, stream: TokenStream) -> BaseExpression:
732
+ stream.next()
614
733
  return FilterContextPath(
615
- JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True))
734
+ JSONPath(env=self.env, segments=self.parse_query(stream))
616
735
  )
617
736
 
618
- def parse_regex(self, stream: TokenStream) -> FilterExpression:
619
- pattern = stream.current.value
737
+ def parse_regex(self, stream: TokenStream) -> BaseExpression:
738
+ pattern = stream.current().value
620
739
  flags = 0
621
- if stream.peek.kind == TOKEN_RE_FLAGS:
622
- stream.next_token()
623
- for flag in set(stream.current.value):
740
+ if stream.peek().kind == TOKEN_RE_FLAGS:
741
+ stream.next()
742
+ for flag in set(stream.next().value):
624
743
  flags |= self.RE_FLAG_MAP[flag]
625
744
  return RegexLiteral(value=re.compile(pattern, flags))
626
745
 
627
- def parse_list_literal(self, stream: TokenStream) -> FilterExpression:
628
- stream.next_token()
629
- list_items: List[FilterExpression] = []
746
+ def parse_list_literal(self, stream: TokenStream) -> BaseExpression:
747
+ stream.eat(TOKEN_LBRACKET)
748
+ list_items: List[BaseExpression] = []
749
+
750
+ while True:
751
+ stream.skip_whitespace()
752
+
753
+ if stream.current().kind == TOKEN_RBRACKET:
754
+ break
630
755
 
631
- while stream.current.kind != TOKEN_RBRACKET:
632
756
  try:
633
- list_items.append(self.list_item_map[stream.current.kind](stream))
757
+ list_items.append(self.list_item_map[stream.current().kind](stream))
634
758
  except KeyError as err:
635
759
  raise JSONPathSyntaxError(
636
- f"unexpected {stream.current.value!r}",
637
- token=stream.current,
760
+ f"unexpected {stream.current().value!r}",
761
+ token=stream.current(),
638
762
  ) from err
639
763
 
640
- if stream.peek.kind != TOKEN_RBRACKET:
641
- stream.expect_peek(TOKEN_COMMA)
642
- stream.next_token()
643
-
644
- stream.next_token()
764
+ stream.skip_whitespace()
765
+ if stream.current().kind != TOKEN_RBRACKET:
766
+ stream.eat(TOKEN_COMMA)
767
+ stream.skip_whitespace()
645
768
 
769
+ stream.eat(TOKEN_RBRACKET)
646
770
  return ListLiteral(list_items)
647
771
 
648
- def parse_function_extension(self, stream: TokenStream) -> FilterExpression:
649
- function_arguments: List[FilterExpression] = []
650
- tok = stream.next_token()
772
+ def parse_function_extension(self, stream: TokenStream) -> BaseExpression:
773
+ function_arguments: List[BaseExpression] = []
774
+ function_token = stream.next()
775
+ stream.eat(TOKEN_LPAREN)
776
+
777
+ while True:
778
+ stream.skip_whitespace()
779
+ token = stream.current()
780
+
781
+ if token.kind == TOKEN_RPAREN:
782
+ break
651
783
 
652
- while stream.current.kind != TOKEN_RPAREN:
653
784
  try:
654
- func = self.function_argument_map[stream.current.kind]
785
+ func = self.function_argument_map[token.kind]
655
786
  except KeyError as err:
656
787
  raise JSONPathSyntaxError(
657
- f"unexpected {stream.current.value!r}",
658
- token=stream.current,
788
+ f"unexpected {token.value!r}", token=token
659
789
  ) from err
660
790
 
661
791
  expr = func(stream)
792
+ stream.skip_whitespace()
662
793
 
663
- # The argument could be a comparison or logical expression
664
- peek_kind = stream.peek.kind
665
- while peek_kind in self.BINARY_OPERATORS:
666
- stream.next_token()
794
+ while stream.current().kind in self.BINARY_OPERATORS:
667
795
  expr = self.parse_infix_expression(stream, expr)
668
- peek_kind = stream.peek.kind
669
796
 
670
797
  function_arguments.append(expr)
798
+ stream.skip_whitespace()
671
799
 
672
- if stream.peek.kind != TOKEN_RPAREN:
673
- stream.expect_peek(TOKEN_COMMA)
674
- stream.next_token()
800
+ if stream.current().kind != TOKEN_RPAREN:
801
+ stream.eat(TOKEN_COMMA)
675
802
 
676
- stream.next_token()
803
+ stream.eat(TOKEN_RPAREN)
677
804
 
678
805
  return FunctionExtension(
679
- tok.value,
680
- self.env.validate_function_extension_signature(tok, function_arguments),
806
+ function_token.value,
807
+ self.env.validate_function_extension_signature(
808
+ function_token, function_arguments
809
+ ),
681
810
  )
682
811
 
683
- def parse_filter_selector(
812
+ def parse_filter_expression(
684
813
  self, stream: TokenStream, precedence: int = PRECEDENCE_LOWEST
685
- ) -> FilterExpression:
814
+ ) -> BaseExpression:
815
+ stream.skip_whitespace()
816
+ token = stream.current()
817
+
686
818
  try:
687
- left = self.token_map[stream.current.kind](stream)
819
+ left = self.token_map[token.kind](stream)
688
820
  except KeyError as err:
689
- if stream.current.kind in (TOKEN_EOF, TOKEN_RBRACKET):
821
+ if token.kind in (TOKEN_EOF, TOKEN_RBRACKET):
690
822
  msg = "end of expression"
691
823
  else:
692
- msg = repr(stream.current.value)
693
- raise JSONPathSyntaxError(
694
- f"unexpected {msg}", token=stream.current
695
- ) from err
824
+ msg = repr(token.value)
825
+ raise JSONPathSyntaxError(f"unexpected {msg}", token=token) from err
696
826
 
697
827
  while True:
698
- peek_kind = stream.peek.kind
828
+ stream.skip_whitespace()
829
+ kind = stream.current().kind
830
+
699
831
  if (
700
- peek_kind in (TOKEN_EOF, TOKEN_RBRACKET)
701
- or self.PRECEDENCES.get(peek_kind, self.PRECEDENCE_LOWEST) < precedence
832
+ kind not in self.BINARY_OPERATORS
833
+ or self.PRECEDENCES.get(kind, self.PRECEDENCE_LOWEST) < precedence
702
834
  ):
703
835
  break
704
836
 
705
- if peek_kind not in self.BINARY_OPERATORS:
706
- return left
707
-
708
- stream.next_token()
709
837
  left = self.parse_infix_expression(stream, left)
710
838
 
711
839
  return left
712
840
 
713
841
  def _decode_string_literal(self, token: Token) -> str:
842
+ if self.env.strict:
843
+ # For strict compliance with RC 9535, we must unescape string literals
844
+ # ourself. RFC 9535 is more strict than json.loads when it comes to
845
+ # parsing \uXXXX escape sequences.
846
+ return unescape_string(
847
+ token.value,
848
+ token,
849
+ "'" if token.kind == TOKEN_SINGLE_QUOTE_STRING else '"',
850
+ )
851
+
714
852
  if self.env.unicode_escape:
715
853
  if token.kind == TOKEN_SINGLE_QUOTE_STRING:
716
854
  value = token.value.replace('"', '\\"').replace("\\'", "'")
717
855
  else:
718
856
  value = token.value
857
+
719
858
  try:
720
859
  rv = json.loads(f'"{value}"')
721
860
  assert isinstance(rv, str)
722
861
  return rv
723
862
  except json.JSONDecodeError as err:
724
- raise JSONPathSyntaxError(str(err).split(":")[1], token=token) from None
863
+ message = f"decode error: {str(err).split(':')[1]}"
864
+ raise JSONPathSyntaxError(message, token=token) from None
725
865
 
726
866
  return token.value
727
867
 
728
868
  def _raise_for_non_comparable_function(
729
- self, expr: FilterExpression, token: Token
869
+ self, expr: BaseExpression, token: Token
730
870
  ) -> None:
731
- if isinstance(expr, Path) and not expr.path.singular_query():
871
+ if isinstance(expr, FilterQuery) and not expr.path.singular_query():
732
872
  raise JSONPathTypeError("non-singular query is not comparable", token=token)
733
873
 
734
874
  if isinstance(expr, FunctionExtension):
@@ -740,3 +880,9 @@ class Parser:
740
880
  raise JSONPathTypeError(
741
881
  f"result of {expr.name}() is not comparable", token
742
882
  )
883
+
884
+ def _raise_for_leading_zero(self, token: Token) -> None:
885
+ if (
886
+ len(token.value) > 1 and token.value.startswith("0")
887
+ ) or token.value.startswith("-0"):
888
+ raise JSONPathSyntaxError("leading zero in index selector", token=token)