simple_jsonpath 0.2.2__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/.gitignore +2 -1
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/Cargo.lock +1 -1
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/Cargo.toml +1 -1
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/PKG-INFO +152 -1
- simple_jsonpath-0.3.3/README.md +300 -0
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/pyproject.toml +4 -0
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/python/simple_jsonpath/_simple_jsonpath.pyi +12 -4
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/python/simple_jsonpath/jsonpath.py +63 -6
- simple_jsonpath-0.3.3/src/lib.rs +330 -0
- simple_jsonpath-0.2.2/README.md +0 -150
- simple_jsonpath-0.2.2/src/lib.rs +0 -289
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/.github/workflows/CI.yml +0 -0
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/LICENSE +0 -0
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/python/simple_jsonpath/__init__.py +0 -0
- {simple_jsonpath-0.2.2 → simple_jsonpath-0.3.3}/python/simple_jsonpath/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: simple_jsonpath
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Classifier: Intended Audience :: Developers
|
|
5
5
|
Classifier: Programming Language :: Rust
|
|
6
6
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
+
Requires-Dist: orjson>=3.11.7
|
|
14
15
|
License-File: LICENSE
|
|
15
16
|
Summary: A simple - yet quick - JSONPath implementation for querying JSON data.
|
|
16
17
|
Keywords: jsonpath
|
|
@@ -39,6 +40,29 @@ This module exposes a single simple type - a ***JsonPath*** which has two method
|
|
|
39
40
|
- ***set_data()***: sets the data that will be queried against. If multiple queries will be performed against a single piece of JSON data, this helps with the type conversion cost involved. This function can be called with providing new data whenever the inner data held is wished to be changed while retaining already complied paths (useful for querying multiple similarly structured documents).
|
|
40
41
|
- ***find()***: given a path that is wished to be found in the previously set data, this function will perform the query logic. Mulitple calls to **find()** will query against the previously 'set' data.
|
|
41
42
|
- ***find_located()***: given a path that is wished to be found in the previously set data, this function can return a list of ***LocatedNode*** objects. Each ***LocatedNode*** object will have attributes related to the path where the node was located as well as their corresponding data. This method is slower than ***find()***, so should ideally only be used when path information for the found nodes is needed.
|
|
43
|
+
- ***child()***: spawns a child instance of the ***JsonPath*** object that does not inherit its data, but maintains shared mutable access to the collection of
|
|
44
|
+
compiled paths.
|
|
45
|
+
|
|
46
|
+
## Child behavior
|
|
47
|
+
|
|
48
|
+
When the ***child()*** method is invoked, a child will be spawned from the current instance of the ***JsonPath*** object.
|
|
49
|
+
|
|
50
|
+
The child will not inherit the data from the parent, so a call to ***set_data()*** needa to be called on it for it to function.
|
|
51
|
+
|
|
52
|
+
It does however retain shared mutable access to the parent's collection of pre-parsed path objects which is shared across all spawned children.
|
|
53
|
+
|
|
54
|
+
This is useful for the pattern of:
|
|
55
|
+
|
|
56
|
+
1. Searching a document for a path query.
|
|
57
|
+
|
|
58
|
+
2. Then using those results returned as the basis of a new 'root element' for 'deeper' searches into a document.
|
|
59
|
+
|
|
60
|
+
Instead of assigning the query results to current instance, it can be beneficial to spawn a child for each result, and assign the result
|
|
61
|
+
data to the child or multiple children if more than one query result was returned.
|
|
62
|
+
|
|
63
|
+
With this pattern the 'base' parent object will automatically contain all parsed paths for the document that were searched by any descendant instance spawned from it, and children will have access to updates to the 'base' instance that any of their siblings make.
|
|
64
|
+
|
|
65
|
+
Then the 'base' parent object can be efficiently used on the next similarly structured document as all previously complied queries against the document are retained.
|
|
42
66
|
|
|
43
67
|
## Examples
|
|
44
68
|
|
|
@@ -171,3 +195,130 @@ for data in results:
|
|
|
171
195
|
# 2001:db8::1/64
|
|
172
196
|
```
|
|
173
197
|
|
|
198
|
+
### 'Child' Example
|
|
199
|
+
|
|
200
|
+
The child pattern can be useful for speeding up processing of multiple similarly structured documents to avoid overhead
|
|
201
|
+
of parsing the same query strings many times. Children are independent objects from the 'base' instancee, and children
|
|
202
|
+
can also spawn their own children.
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from simple_jsonpath import JsonPath
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
json_data_1 = {
|
|
209
|
+
"items": [
|
|
210
|
+
{
|
|
211
|
+
"address": {
|
|
212
|
+
"prefix-list": [
|
|
213
|
+
{
|
|
214
|
+
"prefix": "2001:db8::1/64",
|
|
215
|
+
"eui-64": [
|
|
216
|
+
None
|
|
217
|
+
]
|
|
218
|
+
}
|
|
219
|
+
],
|
|
220
|
+
"link-local-address": [
|
|
221
|
+
{
|
|
222
|
+
"address": "fe80::1",
|
|
223
|
+
"link-local": [
|
|
224
|
+
None
|
|
225
|
+
]
|
|
226
|
+
}
|
|
227
|
+
]
|
|
228
|
+
}
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
"address": {
|
|
232
|
+
"prefix-list": [
|
|
233
|
+
{
|
|
234
|
+
"prefix": "2001:db8::1/64",
|
|
235
|
+
"eui-64": [
|
|
236
|
+
None
|
|
237
|
+
]
|
|
238
|
+
}
|
|
239
|
+
],
|
|
240
|
+
"link-local-address": [
|
|
241
|
+
{
|
|
242
|
+
"address": "fe80::1",
|
|
243
|
+
"link-local": [
|
|
244
|
+
None
|
|
245
|
+
]
|
|
246
|
+
}
|
|
247
|
+
]
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
]
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
json_data_2 = {
|
|
254
|
+
"items": [
|
|
255
|
+
{
|
|
256
|
+
"address": {
|
|
257
|
+
"prefix-list": [
|
|
258
|
+
{
|
|
259
|
+
"prefix": "2001:db8::2/64",
|
|
260
|
+
"eui-64": [
|
|
261
|
+
None
|
|
262
|
+
]
|
|
263
|
+
}
|
|
264
|
+
]
|
|
265
|
+
}
|
|
266
|
+
},
|
|
267
|
+
{
|
|
268
|
+
"address": {
|
|
269
|
+
"prefix-list": [
|
|
270
|
+
{
|
|
271
|
+
"prefix": "2001:db8::2/64",
|
|
272
|
+
"eui-64": [
|
|
273
|
+
None
|
|
274
|
+
]
|
|
275
|
+
}
|
|
276
|
+
]
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
]
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
def process_document(data, finder: JsonPath)
|
|
283
|
+
# The 'base' instance was instantiated outside of this fn below.
|
|
284
|
+
|
|
285
|
+
# Sets the data that is desired to be queried against
|
|
286
|
+
finder.set_data(json_data)
|
|
287
|
+
|
|
288
|
+
# Search for interested data. This pattern will be cached in the base instance, which will
|
|
289
|
+
# then be availble to all descendents of the base instance.
|
|
290
|
+
results: list[Any] = finder.find("$.items[*]")
|
|
291
|
+
|
|
292
|
+
# Iterate through each found result
|
|
293
|
+
for data in results:
|
|
294
|
+
# Spwn a child for each result
|
|
295
|
+
child = finder.child()
|
|
296
|
+
# Set the result data for the child
|
|
297
|
+
child.set_data(data)
|
|
298
|
+
|
|
299
|
+
# The first child that requests to find a pattern that has not yet been seen by the 'base' instance
|
|
300
|
+
# will parse the pattern and insert it into the 'base' instance's cache of compiled patterns.
|
|
301
|
+
#
|
|
302
|
+
# The 'base' instance now has the pre-compiled pattern should it need to search for it.
|
|
303
|
+
#
|
|
304
|
+
# All descendants of the 'base' instance now have access to the pre-compiled pattern to include
|
|
305
|
+
# the child that will be spawned on the next iteration of this loop which will allow it to
|
|
306
|
+
# process its own searches faster.
|
|
307
|
+
results = child.find("$.address.'prefix-list'[*]")
|
|
308
|
+
|
|
309
|
+
# .... further procesing....
|
|
310
|
+
all_documents = [json_data_1, json_data_2]
|
|
311
|
+
|
|
312
|
+
# create a single base JsonPath
|
|
313
|
+
finder = JsonPath()
|
|
314
|
+
|
|
315
|
+
for document in all_documents:
|
|
316
|
+
# For each document that will be processed known to be similar in structure > pass in the same 'base' instance.
|
|
317
|
+
#
|
|
318
|
+
# By the time it has processed the first document (depending on how deep either iteself, or its child instnaces were able to traverse the document)
|
|
319
|
+
# some/most/all of the possible paths that will need to be compiled have been. Which makes processing the next document in the series
|
|
320
|
+
# quicker.
|
|
321
|
+
process_document(document, finder)
|
|
322
|
+
|
|
323
|
+
```
|
|
324
|
+
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# simple_jsonpath
|
|
2
|
+
|
|
3
|
+
## Installation
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install simple_jsonpath
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## About
|
|
10
|
+
|
|
11
|
+
This module is a JSONPath [RFC9535 - JSONPath: Query Expressions for JSON](https://datatracker.ietf.org/doc/html/rfc9535) utility library.
|
|
12
|
+
|
|
13
|
+
## Use
|
|
14
|
+
|
|
15
|
+
This module exposes a single simple type - a ***JsonPath*** which has two methods after instantiation.
|
|
16
|
+
|
|
17
|
+
- ***set_data()***: sets the data that will be queried against. If multiple queries will be performed against a single piece of JSON data, this helps with the type conversion cost involved. This function can be called with providing new data whenever the inner data held is wished to be changed while retaining already complied paths (useful for querying multiple similarly structured documents).
|
|
18
|
+
- ***find()***: given a path that is wished to be found in the previously set data, this function will perform the query logic. Mulitple calls to **find()** will query against the previously 'set' data.
|
|
19
|
+
- ***find_located()***: given a path that is wished to be found in the previously set data, this function can return a list of ***LocatedNode*** objects. Each ***LocatedNode*** object will have attributes related to the path where the node was located as well as their corresponding data. This method is slower than ***find()***, so should ideally only be used when path information for the found nodes is needed.
|
|
20
|
+
- ***child()***: spawns a child instance of the ***JsonPath*** object that does not inherit its data, but maintains shared mutable access to the collection of
|
|
21
|
+
compiled paths.
|
|
22
|
+
|
|
23
|
+
## Child behavior
|
|
24
|
+
|
|
25
|
+
When the ***child()*** method is invoked, a child will be spawned from the current instance of the ***JsonPath*** object.
|
|
26
|
+
|
|
27
|
+
The child will not inherit the data from the parent, so a call to ***set_data()*** needa to be called on it for it to function.
|
|
28
|
+
|
|
29
|
+
It does however retain shared mutable access to the parent's collection of pre-parsed path objects which is shared across all spawned children.
|
|
30
|
+
|
|
31
|
+
This is useful for the pattern of:
|
|
32
|
+
|
|
33
|
+
1. Searching a document for a path query.
|
|
34
|
+
|
|
35
|
+
2. Then using those results returned as the basis of a new 'root element' for 'deeper' searches into a document.
|
|
36
|
+
|
|
37
|
+
Instead of assigning the query results to current instance, it can be beneficial to spawn a child for each result, and assign the result
|
|
38
|
+
data to the child or multiple children if more than one query result was returned.
|
|
39
|
+
|
|
40
|
+
With this pattern the 'base' parent object will automatically contain all parsed paths for the document that were searched by any descendant instance spawned from it, and children will have access to updates to the 'base' instance that any of their siblings make.
|
|
41
|
+
|
|
42
|
+
Then the 'base' parent object can be efficiently used on the next similarly structured document as all previously complied queries against the document are retained.
|
|
43
|
+
|
|
44
|
+
## Examples
|
|
45
|
+
|
|
46
|
+
### 'Find' Example
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from simple_jsonpath import JsonPath
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
json_data = {
|
|
53
|
+
"address": {
|
|
54
|
+
"prefix-list": [
|
|
55
|
+
{
|
|
56
|
+
"prefix": "2001:db8::1/64",
|
|
57
|
+
"eui-64": [
|
|
58
|
+
None
|
|
59
|
+
]
|
|
60
|
+
}
|
|
61
|
+
],
|
|
62
|
+
"link-local-address": [
|
|
63
|
+
{
|
|
64
|
+
"address": "fe80::1",
|
|
65
|
+
"link-local": [
|
|
66
|
+
None
|
|
67
|
+
]
|
|
68
|
+
}
|
|
69
|
+
]
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
# Instantiates the primary class
|
|
74
|
+
finder = JsonPath()
|
|
75
|
+
|
|
76
|
+
# Sets the data that is desired to be queried against
|
|
77
|
+
finder.set_data(json_data)
|
|
78
|
+
|
|
79
|
+
# A path is provided to query against the 'set' data. The path is internally parsed > used to qeury against the 'set' dataset.
|
|
80
|
+
# Notice that this implementaion allows for escaping of specials characters shorthand path syntax with single or double quotes
|
|
81
|
+
results = finder.find("$.address.'prefix-list'[*].prefix")
|
|
82
|
+
|
|
83
|
+
for data in results:
|
|
84
|
+
# Access the found node.
|
|
85
|
+
print(f"{data}")
|
|
86
|
+
# 2001:db8::1/64
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
The inner implementation stores previously parsed 'paths'. This allows repeatedly used paths to bypass the parsing step invovled.
|
|
91
|
+
|
|
92
|
+
This is ideal for situations where multiple similar JSON documents will be searched in succession.
|
|
93
|
+
|
|
94
|
+
The same ***JsonPath*** object can then be reused with new data sets by calling ***set_data()*** on it again, and any previously parsed paths by the object will be retained.
|
|
95
|
+
|
|
96
|
+
Only when moving onto data of differing structure would it be potentially advisable to instantiate a new ***JsonPath*** object.
|
|
97
|
+
|
|
98
|
+
### 'Find Located' Example
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from simple_jsonpath import JsonPath, LocatedNode
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
json_data = {
|
|
105
|
+
"items": [
|
|
106
|
+
{
|
|
107
|
+
"address": {
|
|
108
|
+
"prefix-list": [
|
|
109
|
+
{
|
|
110
|
+
"prefix": "2001:db8::1/64",
|
|
111
|
+
"eui-64": [
|
|
112
|
+
None
|
|
113
|
+
]
|
|
114
|
+
}
|
|
115
|
+
],
|
|
116
|
+
"link-local-address": [
|
|
117
|
+
{
|
|
118
|
+
"address": "fe80::1",
|
|
119
|
+
"link-local": [
|
|
120
|
+
None
|
|
121
|
+
]
|
|
122
|
+
}
|
|
123
|
+
]
|
|
124
|
+
}
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
"address": {
|
|
128
|
+
"prefix-list": [
|
|
129
|
+
{
|
|
130
|
+
"prefix": "2001:db8::1/64",
|
|
131
|
+
"eui-64": [
|
|
132
|
+
None
|
|
133
|
+
]
|
|
134
|
+
}
|
|
135
|
+
],
|
|
136
|
+
"link-local-address": [
|
|
137
|
+
{
|
|
138
|
+
"address": "fe80::1",
|
|
139
|
+
"link-local": [
|
|
140
|
+
None
|
|
141
|
+
]
|
|
142
|
+
}
|
|
143
|
+
]
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
]
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# Instantiates the primary class
|
|
150
|
+
finder = JsonPath()
|
|
151
|
+
|
|
152
|
+
# Sets the data that is desired to be queried against
|
|
153
|
+
finder.set_data(json_data)
|
|
154
|
+
|
|
155
|
+
# Now we are interested in the path information where matches were found as well as the data
|
|
156
|
+
results: list[LocatedNode] = finder.find_located("$.items[*].address.'prefix-list'[*].prefix")
|
|
157
|
+
|
|
158
|
+
# Iterate through each found LocatedNode object
|
|
159
|
+
for data in results:
|
|
160
|
+
|
|
161
|
+
# Print the normalized full path where the node was found
|
|
162
|
+
print(f"{data.full_path}")
|
|
163
|
+
# $['items'][0]['address']['prefix-list'][0]['prefix']
|
|
164
|
+
|
|
165
|
+
# Iterate over the components of the found path
|
|
166
|
+
# Returned elements will either be a 'str' for keys or 'int' for index values
|
|
167
|
+
print(f"{', '.join([str(component) for component in data.path_components])}")
|
|
168
|
+
# $, items, 0, adddress, prefix-list, 0, prefix
|
|
169
|
+
|
|
170
|
+
# Access the found node.
|
|
171
|
+
print(f"{data.node}")
|
|
172
|
+
# 2001:db8::1/64
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### 'Child' Example
|
|
176
|
+
|
|
177
|
+
The child pattern can be useful for speeding up processing of multiple similarly structured documents to avoid overhead
|
|
178
|
+
of parsing the same query strings many times. Children are independent objects from the 'base' instancee, and children
|
|
179
|
+
can also spawn their own children.
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from simple_jsonpath import JsonPath
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
json_data_1 = {
|
|
186
|
+
"items": [
|
|
187
|
+
{
|
|
188
|
+
"address": {
|
|
189
|
+
"prefix-list": [
|
|
190
|
+
{
|
|
191
|
+
"prefix": "2001:db8::1/64",
|
|
192
|
+
"eui-64": [
|
|
193
|
+
None
|
|
194
|
+
]
|
|
195
|
+
}
|
|
196
|
+
],
|
|
197
|
+
"link-local-address": [
|
|
198
|
+
{
|
|
199
|
+
"address": "fe80::1",
|
|
200
|
+
"link-local": [
|
|
201
|
+
None
|
|
202
|
+
]
|
|
203
|
+
}
|
|
204
|
+
]
|
|
205
|
+
}
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
"address": {
|
|
209
|
+
"prefix-list": [
|
|
210
|
+
{
|
|
211
|
+
"prefix": "2001:db8::1/64",
|
|
212
|
+
"eui-64": [
|
|
213
|
+
None
|
|
214
|
+
]
|
|
215
|
+
}
|
|
216
|
+
],
|
|
217
|
+
"link-local-address": [
|
|
218
|
+
{
|
|
219
|
+
"address": "fe80::1",
|
|
220
|
+
"link-local": [
|
|
221
|
+
None
|
|
222
|
+
]
|
|
223
|
+
}
|
|
224
|
+
]
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
]
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
json_data_2 = {
|
|
231
|
+
"items": [
|
|
232
|
+
{
|
|
233
|
+
"address": {
|
|
234
|
+
"prefix-list": [
|
|
235
|
+
{
|
|
236
|
+
"prefix": "2001:db8::2/64",
|
|
237
|
+
"eui-64": [
|
|
238
|
+
None
|
|
239
|
+
]
|
|
240
|
+
}
|
|
241
|
+
]
|
|
242
|
+
}
|
|
243
|
+
},
|
|
244
|
+
{
|
|
245
|
+
"address": {
|
|
246
|
+
"prefix-list": [
|
|
247
|
+
{
|
|
248
|
+
"prefix": "2001:db8::2/64",
|
|
249
|
+
"eui-64": [
|
|
250
|
+
None
|
|
251
|
+
]
|
|
252
|
+
}
|
|
253
|
+
]
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
]
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
def process_document(data, finder: JsonPath)
|
|
260
|
+
# The 'base' instance was instantiated outside of this fn below.
|
|
261
|
+
|
|
262
|
+
# Sets the data that is desired to be queried against
|
|
263
|
+
finder.set_data(json_data)
|
|
264
|
+
|
|
265
|
+
# Search for interested data. This pattern will be cached in the base instance, which will
|
|
266
|
+
# then be availble to all descendents of the base instance.
|
|
267
|
+
results: list[Any] = finder.find("$.items[*]")
|
|
268
|
+
|
|
269
|
+
# Iterate through each found result
|
|
270
|
+
for data in results:
|
|
271
|
+
# Spwn a child for each result
|
|
272
|
+
child = finder.child()
|
|
273
|
+
# Set the result data for the child
|
|
274
|
+
child.set_data(data)
|
|
275
|
+
|
|
276
|
+
# The first child that requests to find a pattern that has not yet been seen by the 'base' instance
|
|
277
|
+
# will parse the pattern and insert it into the 'base' instance's cache of compiled patterns.
|
|
278
|
+
#
|
|
279
|
+
# The 'base' instance now has the pre-compiled pattern should it need to search for it.
|
|
280
|
+
#
|
|
281
|
+
# All descendants of the 'base' instance now have access to the pre-compiled pattern to include
|
|
282
|
+
# the child that will be spawned on the next iteration of this loop which will allow it to
|
|
283
|
+
# process its own searches faster.
|
|
284
|
+
results = child.find("$.address.'prefix-list'[*]")
|
|
285
|
+
|
|
286
|
+
# .... further procesing....
|
|
287
|
+
all_documents = [json_data_1, json_data_2]
|
|
288
|
+
|
|
289
|
+
# create a single base JsonPath
|
|
290
|
+
finder = JsonPath()
|
|
291
|
+
|
|
292
|
+
for document in all_documents:
|
|
293
|
+
# For each document that will be processed known to be similar in structure > pass in the same 'base' instance.
|
|
294
|
+
#
|
|
295
|
+
# By the time it has processed the first document (depending on how deep either iteself, or its child instnaces were able to traverse the document)
|
|
296
|
+
# some/most/all of the possible paths that will need to be compiled have been. Which makes processing the next document in the series
|
|
297
|
+
# quicker.
|
|
298
|
+
process_document(document, finder)
|
|
299
|
+
|
|
300
|
+
```
|
|
@@ -1,16 +1,24 @@
|
|
|
1
1
|
"""A Python module for querying JSON data using JSONPath expressions."""
|
|
2
2
|
|
|
3
|
+
from typing import Any
|
|
3
4
|
|
|
4
5
|
class SimpleJsonPath:
|
|
5
6
|
"""A parser object that can be reused for multiple queries on the same JSON data."""
|
|
6
7
|
def __init__(self) -> None: ...
|
|
7
|
-
|
|
8
|
+
|
|
9
|
+
def child(self, value: str) -> SimpleJsonPath:
|
|
10
|
+
"""Spawn a child instance fo the Parser"""
|
|
11
|
+
...
|
|
12
|
+
def has_data(self) -> bool:
|
|
13
|
+
"""Checks if the underlying parser has data already."""
|
|
14
|
+
...
|
|
15
|
+
def set_data(self, input_data: bytes) -> None:
|
|
8
16
|
"""Set the JSON data for the parser from a JSON string."""
|
|
9
17
|
...
|
|
10
|
-
def
|
|
18
|
+
def find(self, path: str) -> list[Any]:
|
|
11
19
|
"""Find the value(s) in the JSON data that match the given JSONPath expression, using a cache for parsed paths."""
|
|
12
20
|
...
|
|
13
21
|
|
|
14
|
-
def
|
|
22
|
+
def find_located(self, path: str) -> list[Any]:
|
|
15
23
|
"""Find the value(s) in the JSON data that match the given JSONPath expression, along with their locations, using a cache for parsed paths."""
|
|
16
|
-
...
|
|
24
|
+
...
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
from ._simple_jsonpath import SimpleJsonPath as RustSimpleJsonPath
|
|
2
|
-
|
|
2
|
+
import sys
|
|
3
|
+
if sys.version_info >= (3, 11):
|
|
4
|
+
from typing import Self, Union, Any
|
|
5
|
+
else:
|
|
6
|
+
from typing_extensions import Self, Union, Any
|
|
7
|
+
import orjson
|
|
3
8
|
from dataclasses import dataclass
|
|
4
9
|
import builtins
|
|
5
10
|
import json
|
|
@@ -12,7 +17,6 @@ class PathComponentsIter:
|
|
|
12
17
|
self._items: list[tuple[int,int]] = nodes
|
|
13
18
|
|
|
14
19
|
def __next__(self) -> Union[str, int]:
|
|
15
|
-
|
|
16
20
|
if self._current >= self._end:
|
|
17
21
|
raise StopIteration
|
|
18
22
|
if self._current == 0:
|
|
@@ -100,8 +104,48 @@ class JsonPath:
|
|
|
100
104
|
def __init__(self) -> None:
|
|
101
105
|
self._parser = RustSimpleJsonPath()
|
|
102
106
|
|
|
107
|
+
|
|
108
|
+
def child(self) -> Self:
|
|
109
|
+
"""Spawns a child instance of the class.
|
|
110
|
+
|
|
111
|
+
The child will not inherit the data of the parent, so a call
|
|
112
|
+
to set_data() need to be called on it for it to function.
|
|
113
|
+
|
|
114
|
+
It does however retain shared mutable access to the parent's collection
|
|
115
|
+
of pre-parsed path objects across all spawned children.
|
|
116
|
+
|
|
117
|
+
This is useful for the pattern of:
|
|
118
|
+
1. Searching a document for a path query.
|
|
119
|
+
2. Then using those results returned as the basis of a new 'root element'
|
|
120
|
+
for 'deeper' searches into a document.
|
|
121
|
+
|
|
122
|
+
Instead of assigning the original query results to current instance, it can
|
|
123
|
+
be beneficial to spawn a child for each result, and assign the result
|
|
124
|
+
data to the child or multiple children if more than one result was returned.
|
|
125
|
+
|
|
126
|
+
With this pattern the 'base' parent object will automatically contain
|
|
127
|
+
all parsed paths for the document that were searched by
|
|
128
|
+
any descendant instance spawned from it.
|
|
129
|
+
|
|
130
|
+
Then the 'base' parent object can be efficiently used on the next similarly
|
|
131
|
+
structured document as all previously complied queries are retained.
|
|
132
|
+
"""
|
|
133
|
+
# Jesus this took too long to figure out...
|
|
134
|
+
child_cls = self.__class__
|
|
135
|
+
child = child_cls()
|
|
136
|
+
return child
|
|
137
|
+
|
|
138
|
+
def has_data(self) -> bool:
|
|
139
|
+
"""Returns True if this instance has data set to it, False otherwise."""
|
|
140
|
+
return self._parser.has_data()
|
|
141
|
+
|
|
103
142
|
def set_data(self, input_data: Union[dict[str, Any], list[Any]]) -> None:
|
|
104
|
-
"""Set the JSON data for the
|
|
143
|
+
"""Set the JSON data for the query engine from a Python object.
|
|
144
|
+
|
|
145
|
+
Once set, this data will be used for any find() or find_located()
|
|
146
|
+
operations performed against this instance.
|
|
147
|
+
|
|
148
|
+
Calling the function consecutively will replace any existing 'set' data.
|
|
105
149
|
|
|
106
150
|
Args:
|
|
107
151
|
input_data: The JSON data to set, as a Python dictionary or list.
|
|
@@ -112,10 +156,13 @@ class JsonPath:
|
|
|
112
156
|
Raises:
|
|
113
157
|
ValueError: If the input data is not a valid JSON object or array.
|
|
114
158
|
"""
|
|
115
|
-
self._parser.
|
|
159
|
+
self._parser.set_data(orjson.dumps(input_data))
|
|
116
160
|
|
|
117
161
|
def find(self, path: str) -> list[Any]:
|
|
118
162
|
"""Find the value(s) in the JSON data that match the given JSONPath expression.
|
|
163
|
+
|
|
164
|
+
The path expression is first parsed, then executed against the data previously
|
|
165
|
+
'set'. Parsed path expressions are cached for efficient future use.
|
|
119
166
|
|
|
120
167
|
Args:
|
|
121
168
|
path: The JSONPath expression to evaluate.
|
|
@@ -125,11 +172,17 @@ class JsonPath:
|
|
|
125
172
|
|
|
126
173
|
Raises:
|
|
127
174
|
ValueError: If the JSONPath expression is invalid.
|
|
175
|
+
LookupError: If this is called before data has not been set to this object through 'set_data()'.
|
|
128
176
|
"""
|
|
129
|
-
|
|
177
|
+
if not self._parser.has_data():
|
|
178
|
+
raise LookupError("Data must be set through calling 'set_data()' before attempting a query")
|
|
179
|
+
return self._parser.find(path)
|
|
130
180
|
|
|
131
181
|
def find_located(self, path: str) -> list[LocatedNode]:
|
|
132
182
|
"""Find the value(s) in the JSON data that match the given JSONPath expression, along with their locations.
|
|
183
|
+
|
|
184
|
+
The path expression is first parsed, then executed against the data previously
|
|
185
|
+
'set'. Parsed path expressions are cached for efficient future use.
|
|
133
186
|
|
|
134
187
|
Args:
|
|
135
188
|
path: The JSONPath expression to evaluate.
|
|
@@ -139,6 +192,10 @@ class JsonPath:
|
|
|
139
192
|
|
|
140
193
|
Raises:
|
|
141
194
|
ValueError: If the JSONPath expression is invalid.
|
|
195
|
+
LookupError: If this is called before data has not been set to this object through 'set_data()'.
|
|
142
196
|
"""
|
|
143
|
-
|
|
197
|
+
if not self._parser.has_data():
|
|
198
|
+
raise LookupError("Data must be set through calling 'set_data()' before attempting a query")
|
|
199
|
+
result = self._parser.find_located(path)
|
|
144
200
|
return [LocatedNode(item['full_path'], item['path_components'], item['node']) for item in result]
|
|
201
|
+
|