python-jsonpath 1.3.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jsonpath/env.py CHANGED
@@ -2,7 +2,20 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import re
5
+ try:
6
+ import regex # noqa: F401
7
+
8
+ REGEX_AVAILABLE = True
9
+ except ImportError:
10
+ REGEX_AVAILABLE = False
11
+
12
+ try:
13
+ import iregexp_check # noqa: F401
14
+
15
+ IREGEXP_AVAILABLE = True
16
+ except ImportError:
17
+ IREGEXP_AVAILABLE = False
18
+
6
19
  from decimal import Decimal
7
20
  from operator import getitem
8
21
  from typing import TYPE_CHECKING
@@ -24,10 +37,10 @@ from .exceptions import JSONPathSyntaxError
24
37
  from .exceptions import JSONPathTypeError
25
38
  from .filter import UNDEFINED
26
39
  from .filter import VALUE_TYPE_EXPRESSIONS
27
- from .filter import FilterExpression
40
+ from .filter import BaseExpression
41
+ from .filter import FilterQuery
28
42
  from .filter import FunctionExtension
29
43
  from .filter import InfixExpression
30
- from .filter import Path
31
44
  from .fluent_api import Query
32
45
  from .function_extensions import ExpressionType
33
46
  from .function_extensions import FilterFunction
@@ -40,14 +53,13 @@ from .path import CompoundJSONPath
40
53
  from .path import JSONPath
41
54
  from .stream import TokenStream
42
55
  from .token import TOKEN_EOF
43
- from .token import TOKEN_FAKE_ROOT
44
56
  from .token import TOKEN_INTERSECTION
57
+ from .token import TOKEN_PSEUDO_ROOT
45
58
  from .token import TOKEN_UNION
46
59
  from .token import Token
47
60
 
48
61
  if TYPE_CHECKING:
49
- from io import IOBase
50
-
62
+ from ._types import JSONData
51
63
  from .match import FilterContextVars
52
64
 
53
65
 
@@ -88,12 +100,14 @@ class JSONPathEnvironment:
88
100
  well-typedness as compile time.
89
101
 
90
102
  **New in version 0.10.0**
103
+ strict: When `True`, follow RFC 9535 strictly.
104
+ **New in version 2.0.0**
91
105
 
92
106
  ## Class attributes
93
107
 
94
108
  Attributes:
95
- fake_root_token (str): The pattern used to select a "fake" root node, one level
96
- above the real root node.
109
+ pseudo_root_token (str): The pattern used to select a "fake" root node, one
110
+ level above the real root node.
97
111
  filter_context_token (str): The pattern used to select extra filter context
98
112
  data. Defaults to `"_"`.
99
113
  intersection_token (str): The pattern used as the intersection operator.
@@ -102,11 +116,16 @@ class JSONPathEnvironment:
102
116
  filtering a mapping or sequence. Defaults to `"#"`.
103
117
  keys_selector_token (str): The pattern used as the "keys" selector. Defaults to
104
118
  `"~"`.
119
+ keys_filter_token (str): The pattern used as the "keys filter" selector.
120
+ Defaults to `"~?"`.
105
121
  lexer_class: The lexer to use when tokenizing path strings.
106
122
  max_int_index (int): The maximum integer allowed when selecting array items by
107
123
  index. Defaults to `(2**53) - 1`.
108
124
  min_int_index (int): The minimum integer allowed when selecting array items by
109
125
  index. Defaults to `-(2**53) + 1`.
126
+ max_recursion_depth (int): The maximum number of dict/objects and/or arrays/
127
+ lists the recursive descent selector can visit before a
128
+ `JSONPathRecursionError` is thrown.
110
129
  parser_class: The parser to use when parsing tokens from the lexer.
111
130
  root_token (str): The pattern used to select the root node in a JSON document.
112
131
  Defaults to `"$"`.
@@ -115,19 +134,21 @@ class JSONPathEnvironment:
115
134
  union_token (str): The pattern used as the union operator. Defaults to `"|"`.
116
135
  """
117
136
 
118
- # These should be unescaped strings. `re.escape` will be called
119
- # on them automatically when compiling lexer rules.
120
- fake_root_token = "^"
137
+ # These should be unescaped strings. `re.escape` will be called on them
138
+ # automatically when compiling lexer rules.
139
+ pseudo_root_token = "^"
121
140
  filter_context_token = "_"
122
141
  intersection_token = "&"
123
142
  key_token = "#"
124
143
  keys_selector_token = "~"
144
+ keys_filter_token = "~?"
125
145
  root_token = "$"
126
146
  self_token = "@"
127
147
  union_token = "|"
128
148
 
129
149
  max_int_index = (2**53) - 1
130
150
  min_int_index = -(2**53) + 1
151
+ max_recursion_depth = 100
131
152
 
132
153
  # Override these to customize path tokenization and parsing.
133
154
  lexer_class: Type[Lexer] = Lexer
@@ -140,6 +161,7 @@ class JSONPathEnvironment:
140
161
  filter_caching: bool = True,
141
162
  unicode_escape: bool = True,
142
163
  well_typed: bool = True,
164
+ strict: bool = False,
143
165
  ) -> None:
144
166
  self.filter_caching: bool = filter_caching
145
167
  """Enable or disable filter expression caching."""
@@ -151,6 +173,24 @@ class JSONPathEnvironment:
151
173
  self.well_typed: bool = well_typed
152
174
  """Control well-typedness checks on filter function expressions."""
153
175
 
176
+ self.strict: bool = strict
177
+ """When `True`, follow RFC 9535 strictly.
178
+
179
+ This includes things like enforcing a leading root identifier and
180
+ ensuring there's no leading or trailing whitespace when parsing a
181
+ JSONPath query.
182
+ """
183
+
184
+ self.regex_available: bool = REGEX_AVAILABLE
185
+ """When `True`, the third party `regex` package is available."""
186
+
187
+ self.iregexp_available: bool = IREGEXP_AVAILABLE
188
+ """When `True`, the iregexp_check package is available.
189
+
190
+ iregexp_check will be used to validate regular expressions against RFC 9485,
191
+ if available.
192
+ """
193
+
154
194
  self.lexer: Lexer = self.lexer_class(env=self)
155
195
  """The lexer bound to this environment."""
156
196
 
@@ -180,46 +220,53 @@ class JSONPathEnvironment:
180
220
  """
181
221
  tokens = self.lexer.tokenize(path)
182
222
  stream = TokenStream(tokens)
183
- fake_root = stream.current.kind == TOKEN_FAKE_ROOT
223
+ pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT
184
224
  _path: Union[JSONPath, CompoundJSONPath] = JSONPath(
185
- env=self, selectors=self.parser.parse(stream), fake_root=fake_root
225
+ env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root
186
226
  )
187
227
 
188
- if stream.current.kind != TOKEN_EOF:
228
+ if stream.skip_whitespace() and self.strict:
229
+ raise JSONPathSyntaxError(
230
+ "unexpected whitespace", token=stream.tokens[stream.pos - 1]
231
+ )
232
+
233
+ if stream.current().kind != TOKEN_EOF:
189
234
  _path = CompoundJSONPath(env=self, path=_path)
190
- while stream.current.kind != TOKEN_EOF:
191
- if stream.peek.kind == TOKEN_EOF:
235
+ while stream.current().kind != TOKEN_EOF:
236
+ if stream.peek().kind == TOKEN_EOF:
192
237
  # trailing union or intersection
193
238
  raise JSONPathSyntaxError(
194
- f"expected a path after {stream.current.value!r}",
195
- token=stream.current,
239
+ f"expected a path after {stream.current().value!r}",
240
+ token=stream.current(),
196
241
  )
197
242
 
198
- if stream.current.kind == TOKEN_UNION:
199
- stream.next_token()
200
- fake_root = stream.current.kind == TOKEN_FAKE_ROOT
243
+ if stream.current().kind == TOKEN_UNION:
244
+ stream.next()
245
+ stream.skip_whitespace()
246
+ pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT
201
247
  _path = _path.union(
202
248
  JSONPath(
203
249
  env=self,
204
- selectors=self.parser.parse(stream),
205
- fake_root=fake_root,
250
+ segments=self.parser.parse(stream),
251
+ pseudo_root=pseudo_root,
206
252
  )
207
253
  )
208
- elif stream.current.kind == TOKEN_INTERSECTION:
209
- stream.next_token()
210
- fake_root = stream.current.kind == TOKEN_FAKE_ROOT
254
+ elif stream.current().kind == TOKEN_INTERSECTION:
255
+ stream.next()
256
+ stream.skip_whitespace()
257
+ pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT
211
258
  _path = _path.intersection(
212
259
  JSONPath(
213
260
  env=self,
214
- selectors=self.parser.parse(stream),
215
- fake_root=fake_root,
261
+ segments=self.parser.parse(stream),
262
+ pseudo_root=pseudo_root,
216
263
  )
217
264
  )
218
265
  else: # pragma: no cover
219
266
  # Parser.parse catches this too
220
267
  raise JSONPathSyntaxError( # noqa: TRY003
221
- f"unexpected token {stream.current.value!r}",
222
- token=stream.current,
268
+ f"unexpected token {stream.current().value!r}",
269
+ token=stream.current(),
223
270
  )
224
271
 
225
272
  return _path
@@ -227,7 +274,7 @@ class JSONPathEnvironment:
227
274
  def findall(
228
275
  self,
229
276
  path: str,
230
- data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]],
277
+ data: JSONData,
231
278
  *,
232
279
  filter_context: Optional[FilterContextVars] = None,
233
280
  ) -> List[object]:
@@ -257,7 +304,7 @@ class JSONPathEnvironment:
257
304
  def finditer(
258
305
  self,
259
306
  path: str,
260
- data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]],
307
+ data: JSONData,
261
308
  *,
262
309
  filter_context: Optional[FilterContextVars] = None,
263
310
  ) -> Iterable[JSONPathMatch]:
@@ -286,7 +333,7 @@ class JSONPathEnvironment:
286
333
  def match(
287
334
  self,
288
335
  path: str,
289
- data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]],
336
+ data: JSONData,
290
337
  *,
291
338
  filter_context: Optional[FilterContextVars] = None,
292
339
  ) -> Union[JSONPathMatch, None]:
@@ -315,7 +362,8 @@ class JSONPathEnvironment:
315
362
  def query(
316
363
  self,
317
364
  path: str,
318
- data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]],
365
+ data: JSONData,
366
+ *,
319
367
  filter_context: Optional[FilterContextVars] = None,
320
368
  ) -> Query:
321
369
  """Return a `Query` iterator over matches found by applying _path_ to _data_.
@@ -374,7 +422,7 @@ class JSONPathEnvironment:
374
422
  async def findall_async(
375
423
  self,
376
424
  path: str,
377
- data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]],
425
+ data: JSONData,
378
426
  *,
379
427
  filter_context: Optional[FilterContextVars] = None,
380
428
  ) -> List[object]:
@@ -386,7 +434,7 @@ class JSONPathEnvironment:
386
434
  async def finditer_async(
387
435
  self,
388
436
  path: str,
389
- data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]],
437
+ data: JSONData,
390
438
  *,
391
439
  filter_context: Optional[FilterContextVars] = None,
392
440
  ) -> AsyncIterable[JSONPathMatch]:
@@ -402,10 +450,13 @@ class JSONPathEnvironment:
402
450
  self.function_extensions["match"] = function_extensions.Match()
403
451
  self.function_extensions["search"] = function_extensions.Search()
404
452
  self.function_extensions["value"] = function_extensions.Value()
405
- self.function_extensions["isinstance"] = function_extensions.IsInstance()
406
- self.function_extensions["is"] = self.function_extensions["isinstance"]
407
- self.function_extensions["typeof"] = function_extensions.TypeOf()
408
- self.function_extensions["type"] = self.function_extensions["typeof"]
453
+
454
+ if not self.strict:
455
+ self.function_extensions["isinstance"] = function_extensions.IsInstance()
456
+ self.function_extensions["is"] = self.function_extensions["isinstance"]
457
+ self.function_extensions["typeof"] = function_extensions.TypeOf()
458
+ self.function_extensions["type"] = self.function_extensions["typeof"]
459
+ self.function_extensions["startswith"] = function_extensions.StartsWith()
409
460
 
410
461
  def validate_function_extension_signature(
411
462
  self, token: Token, args: List[Any]
@@ -440,13 +491,14 @@ class JSONPathEnvironment:
440
491
  self,
441
492
  token: Token,
442
493
  func: FilterFunction,
443
- args: List[FilterExpression],
494
+ args: List[BaseExpression],
444
495
  ) -> None:
445
496
  """Check the well-typedness of a function's arguments at compile-time."""
446
497
  # Correct number of arguments?
447
498
  if len(args) != len(func.arg_types):
499
+ plural = "" if len(func.arg_types) == 1 else "s"
448
500
  raise JSONPathTypeError(
449
- f"{token.value!r}() requires {len(func.arg_types)} arguments",
501
+ f"{token.value}() requires {len(func.arg_types)} argument{plural}",
450
502
  token=token,
451
503
  )
452
504
 
@@ -456,7 +508,7 @@ class JSONPathEnvironment:
456
508
  if typ == ExpressionType.VALUE:
457
509
  if not (
458
510
  isinstance(arg, VALUE_TYPE_EXPRESSIONS)
459
- or (isinstance(arg, Path) and arg.path.singular_query())
511
+ or (isinstance(arg, FilterQuery) and arg.path.singular_query())
460
512
  or (self._function_return_type(arg) == ExpressionType.VALUE)
461
513
  ):
462
514
  raise JSONPathTypeError(
@@ -464,13 +516,13 @@ class JSONPathEnvironment:
464
516
  token=token,
465
517
  )
466
518
  elif typ == ExpressionType.LOGICAL:
467
- if not isinstance(arg, (Path, InfixExpression)):
519
+ if not isinstance(arg, (FilterQuery, InfixExpression)):
468
520
  raise JSONPathTypeError(
469
521
  f"{token.value}() argument {idx} must be of LogicalType",
470
522
  token=token,
471
523
  )
472
524
  elif typ == ExpressionType.NODES and not (
473
- isinstance(arg, Path)
525
+ isinstance(arg, FilterQuery)
474
526
  or self._function_return_type(arg) == ExpressionType.NODES
475
527
  ):
476
528
  raise JSONPathTypeError(
@@ -478,7 +530,7 @@ class JSONPathEnvironment:
478
530
  token=token,
479
531
  )
480
532
 
481
- def _function_return_type(self, expr: FilterExpression) -> Optional[ExpressionType]:
533
+ def _function_return_type(self, expr: BaseExpression) -> Optional[ExpressionType]:
482
534
  """Return the type returned from a filter function.
483
535
 
484
536
  If _expr_ is not a `FunctionExtension` or the registered function definition is
@@ -568,7 +620,8 @@ class JSONPathEnvironment:
568
620
  return left in right
569
621
  if operator == "contains" and isinstance(left, (Mapping, Sequence)):
570
622
  return right in left
571
- if operator == "=~" and isinstance(right, re.Pattern) and isinstance(left, str):
623
+ if operator == "=~" and hasattr(right, "fullmatch") and isinstance(left, str):
624
+ # Right should be a regex.Pattern or an re.Pattern.
572
625
  return bool(right.fullmatch(left))
573
626
  return False
574
627
 
jsonpath/exceptions.py CHANGED
@@ -5,6 +5,8 @@ from __future__ import annotations
5
5
  from typing import TYPE_CHECKING
6
6
  from typing import Optional
7
7
 
8
+ from .token import TOKEN_EOF
9
+
8
10
  if TYPE_CHECKING:
9
11
  from .token import Token
10
12
 
@@ -22,13 +24,69 @@ class JSONPathError(Exception):
22
24
  self.token: Optional[Token] = token
23
25
 
24
26
  def __str__(self) -> str:
25
- msg = super().__str__()
27
+ return self.detailed_message()
26
28
 
29
+ def detailed_message(self) -> str:
30
+ """Return an error message formatted with extra context info."""
27
31
  if not self.token:
28
- return msg
32
+ return super().__str__()
29
33
 
30
- line, column = self.token.position()
31
- return f"{msg}, line {line}, column {column}"
34
+ lineno, col, _prev, current, _next = self._error_context(
35
+ self.token.path, self.token.index
36
+ )
37
+
38
+ if self.token.kind == TOKEN_EOF:
39
+ col = len(current)
40
+
41
+ pad = " " * len(str(lineno))
42
+ length = len(self.token.value)
43
+ pointer = (" " * col) + ("^" * max(length, 1))
44
+
45
+ return (
46
+ f"{self.message}\n"
47
+ f"{pad} -> {self.token.path!r} {lineno}:{col}\n"
48
+ f"{pad} |\n"
49
+ f"{lineno} | {current}\n"
50
+ f"{pad} | {pointer} {self.message}\n"
51
+ )
52
+
53
+ @property
54
+ def message(self) -> object:
55
+ """The exception's error message if one was given."""
56
+ if self.args:
57
+ return self.args[0]
58
+ return None
59
+
60
+ def _error_context(self, text: str, index: int) -> tuple[int, int, str, str, str]:
61
+ lines = text.splitlines(keepends=True)
62
+ cumulative_length = 0
63
+ target_line_index = -1
64
+
65
+ for i, line in enumerate(lines):
66
+ cumulative_length += len(line)
67
+ if index < cumulative_length:
68
+ target_line_index = i
69
+ break
70
+
71
+ if target_line_index == -1:
72
+ raise ValueError("index is out of bounds for the given string")
73
+
74
+ # Line number (1-based)
75
+ line_number = target_line_index + 1
76
+ # Column number within the line
77
+ column_number = index - (cumulative_length - len(lines[target_line_index]))
78
+
79
+ previous_line = (
80
+ lines[target_line_index - 1].rstrip() if target_line_index > 0 else ""
81
+ )
82
+ current_line = lines[target_line_index].rstrip()
83
+ next_line = (
84
+ lines[target_line_index + 1].rstrip()
85
+ if target_line_index < len(lines) - 1
86
+ else ""
87
+ )
88
+
89
+ return line_number, column_number, previous_line, current_line, next_line
32
90
 
33
91
 
34
92
  class JSONPathSyntaxError(JSONPathError):
@@ -77,6 +135,19 @@ class JSONPathNameError(JSONPathError):
77
135
  self.token = token
78
136
 
79
137
 
138
+ class JSONPathRecursionError(JSONPathError):
139
+ """An exception raised when the maximum recursion depth is reached.
140
+
141
+ Arguments:
142
+ args: Arguments passed to `Exception`.
143
+ token: The token that caused the error.
144
+ """
145
+
146
+ def __init__(self, *args: object, token: Token) -> None:
147
+ super().__init__(*args)
148
+ self.token = token
149
+
150
+
80
151
  class JSONPointerError(Exception):
81
152
  """Base class for all JSON Pointer errors."""
82
153