python-jsonpath 1.3.2__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,19 @@
1
1
  """The standard `search` function extension."""
2
2
 
3
- import re
3
+ from ._pattern import AbstractRegexFilterFunction
4
4
 
5
- from jsonpath.function_extensions import ExpressionType
6
- from jsonpath.function_extensions import FilterFunction
7
5
 
6
+ class Search(AbstractRegexFilterFunction):
7
+ """The standard `search` function."""
8
8
 
9
- class Search(FilterFunction):
10
- """A type-aware implementation of the standard `search` function."""
9
+ def __call__(self, value: object, pattern: object) -> bool:
10
+ """Return `True` if _value_ matches _pattern_, or `False` otherwise."""
11
+ if not isinstance(value, str) or not isinstance(pattern, str):
12
+ return False
11
13
 
12
- arg_types = [ExpressionType.VALUE, ExpressionType.VALUE]
13
- return_type = ExpressionType.LOGICAL
14
+ _pattern = self.check_cache(pattern)
14
15
 
15
- def __call__(self, string: str, pattern: str) -> bool:
16
- """Return `True` if _string_ contains _pattern_, or `False` otherwise."""
17
- try:
18
- # re.search caches compiled patterns internally
19
- return bool(re.search(pattern, string))
20
- except (TypeError, re.error):
16
+ if _pattern is None:
21
17
  return False
18
+
19
+ return bool(_pattern.search(value))
@@ -0,0 +1,21 @@
1
+ """The `startswith` function extension."""
2
+
3
+ from jsonpath.function_extensions import ExpressionType
4
+ from jsonpath.function_extensions import FilterFunction
5
+
6
+
7
+ class StartsWith(FilterFunction):
8
+ """The `startswith` function extension."""
9
+
10
+ arg_types = [ExpressionType.VALUE, ExpressionType.VALUE]
11
+ return_type = ExpressionType.LOGICAL
12
+
13
+ def __call__(self, value: object, prefix: object) -> bool:
14
+ """Return `True` if `value` starts with `prefix`."""
15
+ if not isinstance(value, str) or not isinstance(prefix, str):
16
+ return False
17
+
18
+ try:
19
+ return value.startswith(prefix)
20
+ except AttributeError:
21
+ return False
jsonpath/lex.py CHANGED
@@ -10,14 +10,16 @@ from typing import Pattern
10
10
 
11
11
  from .exceptions import JSONPathSyntaxError
12
12
  from .token import TOKEN_AND
13
- from .token import TOKEN_BARE_PROPERTY
13
+ from .token import TOKEN_COLON
14
14
  from .token import TOKEN_COMMA
15
15
  from .token import TOKEN_CONTAINS
16
16
  from .token import TOKEN_DDOT
17
+ from .token import TOKEN_DOT
18
+ from .token import TOKEN_DOT_KEY_PROPERTY
17
19
  from .token import TOKEN_DOT_PROPERTY
18
20
  from .token import TOKEN_DOUBLE_QUOTE_STRING
19
21
  from .token import TOKEN_EQ
20
- from .token import TOKEN_FAKE_ROOT
22
+ from .token import TOKEN_ERROR
21
23
  from .token import TOKEN_FALSE
22
24
  from .token import TOKEN_FILTER
23
25
  from .token import TOKEN_FILTER_CONTEXT
@@ -25,26 +27,27 @@ from .token import TOKEN_FLOAT
25
27
  from .token import TOKEN_FUNCTION
26
28
  from .token import TOKEN_GE
27
29
  from .token import TOKEN_GT
28
- from .token import TOKEN_ILLEGAL
29
30
  from .token import TOKEN_IN
30
31
  from .token import TOKEN_INT
31
32
  from .token import TOKEN_INTERSECTION
32
33
  from .token import TOKEN_KEY
34
+ from .token import TOKEN_KEY_NAME
33
35
  from .token import TOKEN_KEYS
36
+ from .token import TOKEN_KEYS_FILTER
37
+ from .token import TOKEN_LBRACKET
34
38
  from .token import TOKEN_LE
35
39
  from .token import TOKEN_LG
36
- from .token import TOKEN_LIST_SLICE
37
- from .token import TOKEN_LIST_START
38
40
  from .token import TOKEN_LPAREN
39
41
  from .token import TOKEN_LT
40
42
  from .token import TOKEN_MISSING
43
+ from .token import TOKEN_NAME
41
44
  from .token import TOKEN_NE
42
45
  from .token import TOKEN_NIL
43
46
  from .token import TOKEN_NONE
44
47
  from .token import TOKEN_NOT
45
48
  from .token import TOKEN_NULL
46
49
  from .token import TOKEN_OR
47
- from .token import TOKEN_PROPERTY
50
+ from .token import TOKEN_PSEUDO_ROOT
48
51
  from .token import TOKEN_RBRACKET
49
52
  from .token import TOKEN_RE
50
53
  from .token import TOKEN_RE_FLAGS
@@ -53,13 +56,10 @@ from .token import TOKEN_ROOT
53
56
  from .token import TOKEN_RPAREN
54
57
  from .token import TOKEN_SELF
55
58
  from .token import TOKEN_SINGLE_QUOTE_STRING
56
- from .token import TOKEN_SKIP
57
- from .token import TOKEN_SLICE_START
58
- from .token import TOKEN_SLICE_STEP
59
- from .token import TOKEN_SLICE_STOP
60
59
  from .token import TOKEN_TRUE
61
60
  from .token import TOKEN_UNDEFINED
62
61
  from .token import TOKEN_UNION
62
+ from .token import TOKEN_WHITESPACE
63
63
  from .token import TOKEN_WILD
64
64
  from .token import Token
65
65
 
@@ -87,7 +87,7 @@ class Lexer:
87
87
 
88
88
  key_pattern = r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*"
89
89
 
90
- # `not` or !
90
+ # ! or `not`
91
91
  logical_not_pattern = r"(?:not\b)|!"
92
92
 
93
93
  # && or `and`
@@ -103,45 +103,50 @@ class Lexer:
103
103
  self.single_quote_pattern = r"'(?P<G_SQUOTE>(?:(?!(?<!\\)').)*)'"
104
104
 
105
105
  # .thing
106
- self.dot_property_pattern = rf"\.(?P<G_PROP>{self.key_pattern})"
106
+ self.dot_property_pattern = rf"(?P<G_DOT>\.)(?P<G_PROP>{self.key_pattern})"
107
107
 
108
- self.slice_list_pattern = (
109
- r"(?P<G_LSLICE_START>\-?\d*)\s*"
110
- r":\s*(?P<G_LSLICE_STOP>\-?\d*)\s*"
111
- r"(?::\s*(?P<G_LSLICE_STEP>\-?\d*))?"
108
+ # .~thing
109
+ self.dot_key_pattern = (
110
+ r"(?P<G_DOT_KEY>\.)"
111
+ rf"(?P<G_KEY>{re.escape(env.keys_selector_token)})"
112
+ rf"(?P<G_PROP_KEY>{self.key_pattern})"
112
113
  )
113
114
 
114
115
  # /pattern/ or /pattern/flags
115
116
  self.re_pattern = r"/(?P<G_RE>.+?)/(?P<G_RE_FLAGS>[aims]*)"
116
117
 
117
118
  # func(
118
- self.function_pattern = r"(?P<G_FUNC>[a-z][a-z_0-9]+)\(\s*"
119
+ self.function_pattern = r"(?P<G_FUNC>[a-z][a-z_0-9]+)(?P<G_FUNC_PAREN>\()"
119
120
 
120
- self.rules = self.compile_rules()
121
+ self.rules = self.compile_strict_rules() if env.strict else self.compile_rules()
121
122
 
122
123
  def compile_rules(self) -> Pattern[str]:
123
124
  """Prepare regular expression rules."""
124
125
  env_tokens = [
125
126
  (TOKEN_ROOT, self.env.root_token),
126
- (TOKEN_FAKE_ROOT, self.env.fake_root_token),
127
+ (TOKEN_PSEUDO_ROOT, self.env.pseudo_root_token),
127
128
  (TOKEN_SELF, self.env.self_token),
128
129
  (TOKEN_KEY, self.env.key_token),
129
130
  (TOKEN_UNION, self.env.union_token),
130
131
  (TOKEN_INTERSECTION, self.env.intersection_token),
131
132
  (TOKEN_FILTER_CONTEXT, self.env.filter_context_token),
132
133
  (TOKEN_KEYS, self.env.keys_selector_token),
134
+ (TOKEN_KEYS_FILTER, self.env.keys_filter_token),
133
135
  ]
134
136
 
135
137
  rules = [
136
138
  (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
137
139
  (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
138
140
  (TOKEN_RE_PATTERN, self.re_pattern),
139
- (TOKEN_LIST_SLICE, self.slice_list_pattern),
140
- (TOKEN_FUNCTION, self.function_pattern),
141
+ (TOKEN_DOT_KEY_PROPERTY, self.dot_key_pattern),
141
142
  (TOKEN_DOT_PROPERTY, self.dot_property_pattern),
142
- (TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"),
143
- (TOKEN_INT, r"-?\d+(?P<G_EXP>[eE][+\-]?\d+)?\b"),
143
+ (
144
+ TOKEN_FLOAT,
145
+ r"(:?-?[0-9]+\.[0-9]+(?:[eE][+-]?[0-9]+)?)|(-?[0-9]+[eE]-[0-9]+)",
146
+ ),
147
+ (TOKEN_INT, r"-?[0-9]+(?:[eE]\+?[0-9]+)?"),
144
148
  (TOKEN_DDOT, r"\.\."),
149
+ (TOKEN_DOT, r"\."),
145
150
  (TOKEN_AND, self.logical_and_pattern),
146
151
  (TOKEN_OR, self.logical_or_pattern),
147
152
  *[
@@ -162,9 +167,10 @@ class Lexer:
162
167
  (TOKEN_CONTAINS, r"contains\b"),
163
168
  (TOKEN_UNDEFINED, r"undefined\b"),
164
169
  (TOKEN_MISSING, r"missing\b"),
165
- (TOKEN_LIST_START, r"\["),
170
+ (TOKEN_LBRACKET, r"\["),
166
171
  (TOKEN_RBRACKET, r"]"),
167
172
  (TOKEN_COMMA, r","),
173
+ (TOKEN_COLON, r":"),
168
174
  (TOKEN_EQ, r"=="),
169
175
  (TOKEN_NE, r"!="),
170
176
  (TOKEN_LG, r"<>"),
@@ -173,12 +179,70 @@ class Lexer:
173
179
  (TOKEN_RE, r"=~"),
174
180
  (TOKEN_LT, r"<"),
175
181
  (TOKEN_GT, r">"),
176
- (TOKEN_NOT, self.logical_not_pattern),
177
- (TOKEN_BARE_PROPERTY, self.key_pattern),
182
+ (TOKEN_NOT, self.logical_not_pattern), # Must go after "!="
183
+ (TOKEN_FUNCTION, self.function_pattern),
184
+ (TOKEN_NAME, self.key_pattern), # Must go after reserved words
178
185
  (TOKEN_LPAREN, r"\("),
179
186
  (TOKEN_RPAREN, r"\)"),
180
- (TOKEN_SKIP, r"[ \n\t\r\.]+"),
181
- (TOKEN_ILLEGAL, r"."),
187
+ (TOKEN_WHITESPACE, r"[ \n\t\r]+"),
188
+ (TOKEN_ERROR, r"."),
189
+ ]
190
+
191
+ return re.compile(
192
+ "|".join(f"(?P<{token}>{pattern})" for token, pattern in rules),
193
+ re.DOTALL,
194
+ )
195
+
196
+ def compile_strict_rules(self) -> Pattern[str]:
197
+ """Prepare regular expression rules in strict mode."""
198
+ env_tokens = [
199
+ (TOKEN_ROOT, self.env.root_token),
200
+ (TOKEN_SELF, self.env.self_token),
201
+ ]
202
+
203
+ rules = [
204
+ (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
205
+ (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
206
+ (TOKEN_DOT_PROPERTY, self.dot_property_pattern),
207
+ (
208
+ TOKEN_FLOAT,
209
+ r"(:?-?[0-9]+\.[0-9]+(?:[eE][+-]?[0-9]+)?)|(-?[0-9]+[eE]-[0-9]+)",
210
+ ),
211
+ (TOKEN_INT, r"-?[0-9]+(?:[eE]\+?[0-9]+)?"),
212
+ (TOKEN_DDOT, r"\.\."),
213
+ (TOKEN_DOT, r"\."),
214
+ (TOKEN_AND, r"&&"),
215
+ (TOKEN_OR, r"\|\|"),
216
+ *[
217
+ (token, re.escape(pattern))
218
+ for token, pattern in sorted(
219
+ env_tokens, key=lambda x: len(x[1]), reverse=True
220
+ )
221
+ if pattern
222
+ ],
223
+ (TOKEN_WILD, r"\*"),
224
+ (TOKEN_FILTER, r"\?"),
225
+ (TOKEN_TRUE, r"true\b"),
226
+ (TOKEN_FALSE, r"false\b"),
227
+ (TOKEN_NULL, r"null\b"),
228
+ (TOKEN_LBRACKET, r"\["),
229
+ (TOKEN_RBRACKET, r"]"),
230
+ (TOKEN_COMMA, r","),
231
+ (TOKEN_COLON, r":"),
232
+ (TOKEN_EQ, r"=="),
233
+ (TOKEN_NE, r"!="),
234
+ (TOKEN_LG, r"<>"),
235
+ (TOKEN_LE, r"<="),
236
+ (TOKEN_GE, r">="),
237
+ (TOKEN_LT, r"<"),
238
+ (TOKEN_GT, r">"),
239
+ (TOKEN_NOT, r"!"), # Must go after "!="
240
+ (TOKEN_FUNCTION, self.function_pattern),
241
+ (TOKEN_NAME, self.key_pattern), # Must go after reserved words
242
+ (TOKEN_LPAREN, r"\("),
243
+ (TOKEN_RPAREN, r"\)"),
244
+ (TOKEN_WHITESPACE, r"[ \n\t\r]+"),
245
+ (TOKEN_ERROR, r"."),
182
246
  ]
183
247
 
184
248
  return re.compile(
@@ -196,31 +260,25 @@ class Lexer:
196
260
 
197
261
  if kind == TOKEN_DOT_PROPERTY:
198
262
  yield _token(
199
- kind=TOKEN_PROPERTY,
200
- value=match.group("G_PROP"),
201
- index=match.start("G_PROP"),
263
+ kind=TOKEN_DOT,
264
+ value=match.group("G_DOT"),
265
+ index=match.start("G_DOT"),
202
266
  )
203
- elif kind == TOKEN_BARE_PROPERTY:
204
267
  yield _token(
205
- kind=TOKEN_BARE_PROPERTY,
206
- value=match.group(),
207
- index=match.start(),
208
- )
209
- elif kind == TOKEN_LIST_SLICE:
210
- yield _token(
211
- kind=TOKEN_SLICE_START,
212
- value=match.group("G_LSLICE_START"),
213
- index=match.start("G_LSLICE_START"),
268
+ kind=TOKEN_NAME,
269
+ value=match.group("G_PROP"),
270
+ index=match.start("G_PROP"),
214
271
  )
272
+ elif kind == TOKEN_DOT_KEY_PROPERTY:
215
273
  yield _token(
216
- kind=TOKEN_SLICE_STOP,
217
- value=match.group("G_LSLICE_STOP"),
218
- index=match.start("G_LSLICE_STOP"),
274
+ kind=TOKEN_DOT,
275
+ value=match.group("G_DOT_KEY"),
276
+ index=match.start("G_DOT_KEY"),
219
277
  )
220
278
  yield _token(
221
- kind=TOKEN_SLICE_STEP,
222
- value=match.group("G_LSLICE_STEP") or "",
223
- index=match.start("G_LSLICE_STEP"),
279
+ kind=TOKEN_KEY_NAME,
280
+ value=match.group("G_PROP_KEY"),
281
+ index=match.start("G_PROP_KEY"),
224
282
  )
225
283
  elif kind == TOKEN_DOUBLE_QUOTE_STRING:
226
284
  yield _token(
@@ -234,19 +292,6 @@ class Lexer:
234
292
  value=match.group("G_SQUOTE"),
235
293
  index=match.start("G_SQUOTE"),
236
294
  )
237
- elif kind == TOKEN_INT:
238
- if match.group("G_EXP") and match.group("G_EXP")[1] == "-":
239
- yield _token(
240
- kind=TOKEN_FLOAT,
241
- value=match.group(),
242
- index=match.start(),
243
- )
244
- else:
245
- yield _token(
246
- kind=TOKEN_INT,
247
- value=match.group(),
248
- index=match.start(),
249
- )
250
295
  elif kind == TOKEN_RE_PATTERN:
251
296
  yield _token(
252
297
  kind=TOKEN_RE_PATTERN,
@@ -270,13 +315,17 @@ class Lexer:
270
315
  value=match.group("G_FUNC"),
271
316
  index=match.start("G_FUNC"),
272
317
  )
273
- elif kind == TOKEN_SKIP:
274
- continue
275
- elif kind == TOKEN_ILLEGAL:
318
+
319
+ yield _token(
320
+ kind=TOKEN_LPAREN,
321
+ value=match.group("G_FUNC_PAREN"),
322
+ index=match.start("G_FUNC_PAREN"),
323
+ )
324
+ elif kind == TOKEN_ERROR:
276
325
  raise JSONPathSyntaxError(
277
326
  f"unexpected token {match.group()!r}",
278
327
  token=_token(
279
- TOKEN_ILLEGAL,
328
+ TOKEN_ERROR,
280
329
  value=match.group(),
281
330
  index=match.start(),
282
331
  ),
jsonpath/lru_cache.py ADDED
@@ -0,0 +1,130 @@
1
+ """An LRU cache with a mapping interface implemented using an ordered dict."""
2
+
3
+ from collections import OrderedDict
4
+ from threading import Lock
5
+ from typing import Generic
6
+ from typing import Iterator
7
+ from typing import Optional
8
+ from typing import Tuple
9
+ from typing import TypeVar
10
+ from typing import Union
11
+ from typing import overload
12
+
13
+ _KT = TypeVar("_KT")
14
+ _VT = TypeVar("_VT")
15
+ _T = TypeVar("_T")
16
+
17
+
18
+ class LRUCache(Generic[_KT, _VT]):
19
+ """An LRU cache with a mapping interface."""
20
+
21
+ def __init__(self, capacity: int):
22
+ if capacity < 1:
23
+ raise ValueError("cache capacity must be greater than zero")
24
+
25
+ self.capacity = capacity
26
+ self._cache: OrderedDict[_KT, _VT] = OrderedDict()
27
+
28
+ def __getitem__(self, key: _KT) -> _VT:
29
+ value = self._cache[key] # This will raise a KeyError if key is not cached
30
+ self._cache.move_to_end(key)
31
+ return value
32
+
33
+ def __setitem__(self, key: _KT, value: _VT) -> None:
34
+ try:
35
+ self._cache.move_to_end(key)
36
+ except KeyError:
37
+ if len(self._cache) >= self.capacity:
38
+ self._cache.popitem(last=False)
39
+
40
+ self._cache[key] = value
41
+
42
+ def __delitem__(self, key: _KT) -> None:
43
+ del self._cache[key]
44
+
45
+ def __len__(self) -> int:
46
+ return len(self._cache)
47
+
48
+ def __iter__(self) -> Iterator[_KT]:
49
+ return reversed(self._cache)
50
+
51
+ def __contains__(self, key: _KT) -> bool:
52
+ return key in self._cache
53
+
54
+ @overload
55
+ def get(self, key: _KT) -> Optional[_VT]: ...
56
+ @overload
57
+ def get(self, key: _KT, default: _VT) -> _VT: ...
58
+ @overload
59
+ def get(self, key: _KT, default: _T) -> Union[_VT, _T]: ...
60
+ def get(self, key: _KT, default: object = None) -> object:
61
+ """Return the cached value for _key_ if _key_ is in the cache, else default."""
62
+ try:
63
+ return self[key]
64
+ except KeyError:
65
+ return default
66
+
67
+ def keys(self) -> Iterator[_KT]:
68
+ """Return an iterator over this cache's keys."""
69
+ return reversed(self._cache.keys())
70
+
71
+ def values(self) -> Iterator[_VT]:
72
+ """Return an iterator over this cache's values."""
73
+ return reversed(self._cache.values())
74
+
75
+ def items(self) -> Iterator[Tuple[_KT, _VT]]:
76
+ """Return an iterator over this cache's key/value pairs."""
77
+ return reversed(self._cache.items())
78
+
79
+
80
+ class ThreadSafeLRUCache(LRUCache[_KT, _VT]):
81
+ """A thread safe LRU cache."""
82
+
83
+ def __init__(self, capacity: int):
84
+ super().__init__(capacity)
85
+ self._lock = Lock()
86
+
87
+ def __getitem__(self, key: _KT) -> _VT:
88
+ with self._lock:
89
+ return super().__getitem__(key)
90
+
91
+ def __setitem__(self, key: _KT, value: _VT) -> None:
92
+ with self._lock:
93
+ return super().__setitem__(key, value)
94
+
95
+ def __delitem__(self, key: _KT) -> None:
96
+ with self._lock:
97
+ return super().__delitem__(key)
98
+
99
+ def __contains__(self, key: _KT) -> bool:
100
+ with self._lock:
101
+ return super().__contains__(key)
102
+
103
+ @overload
104
+ def get(self, key: _KT) -> Optional[_VT]: ...
105
+ @overload
106
+ def get(self, key: _KT, default: _VT) -> _VT: ...
107
+ @overload
108
+ def get(self, key: _KT, default: _T) -> Union[_VT, _T]: ...
109
+ def get(self, key: _KT, default: object = None) -> object:
110
+ """Return the cached value for _key_ if _key_ is in the cache, else default."""
111
+ # NOTE: self.__getitem__ is already acquiring the lock.
112
+ try:
113
+ return self[key]
114
+ except KeyError:
115
+ return default
116
+
117
+ def keys(self) -> Iterator[_KT]:
118
+ """Return an iterator over this cache's keys."""
119
+ with self._lock:
120
+ return super().keys()
121
+
122
+ def values(self) -> Iterator[_VT]:
123
+ """Return an iterator over this cache's values."""
124
+ with self._lock:
125
+ return super().values()
126
+
127
+ def items(self) -> Iterator[Tuple[_KT, _VT]]:
128
+ """Return an iterator over this cache's key/value pairs."""
129
+ with self._lock:
130
+ return super().items()
jsonpath/match.py CHANGED
@@ -11,6 +11,7 @@ from typing import Tuple
11
11
  from typing import Union
12
12
 
13
13
  from .pointer import JSONPointer
14
+ from .serialize import canonical_string
14
15
 
15
16
  FilterContextVars = Mapping[str, Any]
16
17
  PathPart = Union[int, str]
@@ -69,6 +70,18 @@ class JSONPathMatch:
69
70
  """Append one or more children to this match."""
70
71
  self.children.extend(children)
71
72
 
73
+ def new_child(self, obj: object, key: Union[int, str]) -> JSONPathMatch:
74
+ """Return a new JSONPathMatch instance with this instance as its parent."""
75
+ return self.__class__(
76
+ filter_context=self.filter_context(),
77
+ obj=obj,
78
+ parent=self,
79
+ parts=self.parts + (key,),
80
+ path=self.path
81
+ + f"[{canonical_string(key) if isinstance(key, str) else key}]",
82
+ root=self.root,
83
+ )
84
+
72
85
  def filter_context(self) -> FilterContextVars:
73
86
  """Return filter context data for this match."""
74
87
  return self._filter_context