jsonata-python 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jsonata/tokenizer.py ADDED
@@ -0,0 +1,309 @@
1
+ #
2
+ # Copyright Robert Yokota
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License")
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ #
17
+ # Derived from the following code:
18
+ #
19
+ # Project name: jsonata-java
20
+ # Copyright Dashjoin GmbH. https://dashjoin.com
21
+ # Licensed under the Apache License, Version 2.0 (the "License")
22
+ #
23
+ # Project name: JSONata
24
+ # © Copyright IBM Corp. 2016, 2018 All Rights Reserved
25
+ # This project is licensed under the MIT License, see LICENSE
26
+ #
27
+
28
+ import math
29
+ import re
30
+ from dataclasses import dataclass
31
+ from typing import Any, Optional
32
+
33
+ from jsonata import jexception, utils
34
+
35
+
36
+ class Tokenizer:
37
+ operators = {
38
+ '.': 75,
39
+ '[': 80,
40
+ ']': 0,
41
+ '{': 70,
42
+ '}': 0,
43
+ '(': 80,
44
+ ')': 0,
45
+ ',': 0,
46
+ '@': 80,
47
+ '#': 80,
48
+ ';': 80,
49
+ ':': 80,
50
+ '?': 20,
51
+ '+': 50,
52
+ '-': 50,
53
+ '*': 60,
54
+ '/': 60,
55
+ '%': 60,
56
+ '|': 20,
57
+ '=': 40,
58
+ '<': 40,
59
+ '>': 40,
60
+ '^': 40,
61
+ '**': 60,
62
+ '..': 20,
63
+ ':=': 10,
64
+ '!=': 40,
65
+ '<=': 40,
66
+ '>=': 40,
67
+ '~>': 40,
68
+ 'and': 30,
69
+ 'or': 25,
70
+ 'in': 40,
71
+ '&': 50,
72
+ '!': 0, # not an operator, but needed as a stop character for name tokens
73
+ '~': 0 # not an operator, but needed as a stop character for name tokens
74
+ }
75
+
76
+ escapes = {
77
+ '"': '"',
78
+ '\\': '\\',
79
+ '/': '/',
80
+ 'b': '\b',
81
+ 'f': '\f',
82
+ 'n': '\n',
83
+ 'r': '\r',
84
+ 't': '\t'
85
+ }
86
+
87
+ # Tokenizer (lexer) - invoked by the parser to return one token at a time
88
+
89
+ position: int
90
+ depth: int
91
+ path: str
92
+ length: int
93
+
94
+ def __init__(self, path):
95
+ self.position = 0
96
+ self.depth = 0
97
+
98
+ self.path = path
99
+ self.length = len(path)
100
+
101
+ @dataclass
102
+ class Token:
103
+ type: Optional[str]
104
+ value: Optional[Any]
105
+ position: int
106
+ id: Optional[Any] = None
107
+
108
+ def create(self, type: Optional[str], value: Optional[Any]) -> Token:
109
+ return Tokenizer.Token(type, value, self.position)
110
+
111
+ def is_closing_slash(self, position: int) -> bool:
112
+ if self.path[position] == '/' and self.depth == 0:
113
+ backslash_count = 0
114
+ while self.path[position - (backslash_count + 1)] == '\\':
115
+ backslash_count += 1
116
+ if int(math.fmod(backslash_count, 2)) == 0:
117
+ return True
118
+ return False
119
+
120
+ def scan_regex(self) -> re.Pattern:
121
+ # the prefix '/' will have been previously scanned. Find the end of the regex.
122
+ # search for closing '/' ignoring any that are escaped, or within brackets
123
+ start = self.position
124
+ # int depth = 0
125
+ pattern = None
126
+ flags = None
127
+
128
+ while self.position < self.length:
129
+ current_char = self.path[self.position]
130
+ if self.is_closing_slash(self.position):
131
+ # end of regex found
132
+ pattern = self.path[start:self.position]
133
+ if pattern == "":
134
+ raise jexception.JException("S0301", self.position)
135
+ self.position += 1
136
+ current_char = self.path[self.position]
137
+ # flags
138
+ start = self.position
139
+ while current_char == 'i' or current_char == 'm':
140
+ self.position += 1
141
+ if self.position < self.length:
142
+ current_char = self.path[self.position]
143
+ else:
144
+ current_char = None
145
+ flags = self.path[start:self.position] + 'g'
146
+
147
+ # Convert flags to Java Pattern flags
148
+ _flags = 0
149
+ if "i" in flags:
150
+ _flags |= re.I
151
+ if "m" in flags:
152
+ _flags |= re.M
153
+ return re.compile(pattern, _flags) # Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
154
+ if (current_char == '(' or current_char == '[' or current_char == '{') and self.path[self.position - 1] != '\\':
155
+ self.depth += 1
156
+ if (current_char == ')' or current_char == ']' or current_char == '}') and self.path[self.position - 1] != '\\':
157
+ self.depth -= 1
158
+ self.position += 1
159
+ raise jexception.JException("S0302", self.position)
160
+
161
+ def next(self, prefix: bool) -> Optional[Token]:
162
+ if self.position >= self.length:
163
+ return None
164
+ current_char = self.path[self.position]
165
+ # skip whitespace
166
+ while self.position < self.length and " \t\n\r".find(current_char) > -1:
167
+ self.position += 1
168
+ if self.position >= self.length:
169
+ return None # Uli: JS relies on charAt returns null
170
+ current_char = self.path[self.position]
171
+ # skip comments
172
+ if current_char == '/' and self.path[self.position + 1] == '*':
173
+ comment_start = self.position
174
+ self.position += 2
175
+ current_char = self.path[self.position]
176
+ while not (current_char == '*' and self.path[self.position + 1] == '/'):
177
+ self.position += 1
178
+ current_char = self.path[self.position]
179
+ if self.position >= self.length:
180
+ # no closing tag
181
+ raise jexception.JException("S0106", comment_start)
182
+ self.position += 2
183
+ current_char = self.path[self.position]
184
+ return self.next(prefix) # need this to swallow any following whitespace
185
+ # test for regex
186
+ if not prefix and current_char == '/':
187
+ self.position += 1
188
+ return self.create("regex", self.scan_regex())
189
+ # handle double-char operators
190
+ have_more = self.position < len(self.path) - 1 # Java: position+1 is valid
191
+ if current_char == '.' and have_more and self.path[self.position + 1] == '.':
192
+ # double-dot .. range operator
193
+ self.position += 2
194
+ return self.create("operator", "..")
195
+ if current_char == ':' and have_more and self.path[self.position + 1] == '=':
196
+ # := assignment
197
+ self.position += 2
198
+ return self.create("operator", ":=")
199
+ if current_char == '!' and have_more and self.path[self.position + 1] == '=':
200
+ # !=
201
+ self.position += 2
202
+ return self.create("operator", "!=")
203
+ if current_char == '>' and have_more and self.path[self.position + 1] == '=':
204
+ # >=
205
+ self.position += 2
206
+ return self.create("operator", ">=")
207
+ if current_char == '<' and have_more and self.path[self.position + 1] == '=':
208
+ # <=
209
+ self.position += 2
210
+ return self.create("operator", "<=")
211
+ if current_char == '*' and have_more and self.path[self.position + 1] == '*':
212
+ # ** descendant wildcard
213
+ self.position += 2
214
+ return self.create("operator", "**")
215
+ if current_char == '~' and have_more and self.path[self.position + 1] == '>':
216
+ # ~> chain function
217
+ self.position += 2
218
+ return self.create("operator", "~>")
219
+ # test for single char operators
220
+ if Tokenizer.operators.get(str(current_char)) is not None:
221
+ self.position += 1
222
+ return self.create("operator", current_char)
223
+ # test for string literals
224
+ if current_char == '"' or current_char == '\'':
225
+ quote_type = current_char
226
+ # double quoted string literal - find end of string
227
+ self.position += 1
228
+ qstr = ""
229
+ while self.position < self.length:
230
+ current_char = self.path[self.position]
231
+ if current_char == '\\':
232
+ self.position += 1
233
+ current_char = self.path[self.position]
234
+ if Tokenizer.escapes.get(str(current_char)) is not None:
235
+ qstr += Tokenizer.escapes[str(current_char)]
236
+ elif current_char == 'u':
237
+ # u should be followed by 4 hex digits
238
+ octets = self.path[self.position + 1:(self.position + 1) + 4]
239
+ if re.match("^[0-9a-fA-F]+$", octets):
240
+ codepoint = int(octets, 16)
241
+ qstr += chr(codepoint)
242
+ self.position += 4
243
+ else:
244
+ raise jexception.JException("S0104", self.position)
245
+ else:
246
+ # illegal escape sequence
247
+ raise jexception.JException("S0301", self.position, current_char)
248
+
249
+ elif current_char == quote_type:
250
+ self.position += 1
251
+ return self.create("string", qstr)
252
+ else:
253
+ qstr += current_char
254
+ self.position += 1
255
+ raise jexception.JException("S0101", self.position)
256
+ # test for numbers
257
+ numregex = re.compile("^-?(0|([1-9][0-9]*))(\\.[0-9]+)?([Ee][-+]?[0-9]+)?")
258
+ match_ = numregex.search(self.path[self.position:])
259
+ if match_ is not None:
260
+ num = float(match_.group(0))
261
+ if not math.isnan(num) and math.isfinite(num):
262
+ self.position += len(match_.group(0))
263
+ # If the number is integral, use long as type
264
+ return self.create("number", utils.Utils.convert_number(num))
265
+ else:
266
+ raise jexception.JException("S0102", self.position) # , match.group[0]);
267
+
268
+ # test for quoted names (backticks)
269
+ name = None
270
+ if current_char == '`':
271
+ # scan for closing quote
272
+ self.position += 1
273
+ end = self.path.find('`', self.position)
274
+ if end != -1:
275
+ name = self.path[self.position:end]
276
+ self.position = end + 1
277
+ return self.create("name", name)
278
+ self.position = self.length
279
+ raise jexception.JException("S0105", self.position)
280
+ # test for names
281
+ i = self.position
282
+ while True:
283
+ # if (i>=length) return null; // Uli: JS relies on charAt returns null
284
+
285
+ ch = self.path[i] if i < self.length else chr(0)
286
+ if i == self.length or " \t\n\r".find(ch) > -1 or str(ch) in Tokenizer.operators:
287
+ if self.path[self.position] == '$':
288
+ # variable reference
289
+ name = self.path[self.position + 1:i]
290
+ self.position = i
291
+ return self.create("variable", name)
292
+ else:
293
+ name = self.path[self.position:i]
294
+ self.position = i
295
+ if name == "or" or name == "in" or name == "and":
296
+ return self.create("operator", name)
297
+ elif name == "true":
298
+ return self.create("value", True)
299
+ elif name == "false":
300
+ return self.create("value", False)
301
+ elif name == "null":
302
+ return self.create("value", None)
303
+ else:
304
+ if self.position == self.length and name == "":
305
+ # whitespace at end of input
306
+ return None
307
+ return self.create("name", name)
308
+ else:
309
+ i += 1
jsonata/utils.py ADDED
@@ -0,0 +1,178 @@
1
+ #
2
+ # Copyright Robert Yokota
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License")
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ # Derived from the following code:
17
+ #
18
+ # Project name: jsonata-java
19
+ # Copyright Dashjoin GmbH. https://dashjoin.com
20
+ # Licensed under the Apache License, Version 2.0 (the "License")
21
+ #
22
+
23
+ import math
24
+ from typing import Any, MutableMapping, MutableSequence, Optional, Iterable
25
+
26
+ from jsonata import jexception
27
+
28
+
29
+ class Utils:
30
+ class NullValue:
31
+ def __repr__(self):
32
+ return "null"
33
+
34
+ NULL_VALUE = NullValue()
35
+
36
+ @staticmethod
37
+ def is_numeric(v: Optional[Any]) -> bool:
38
+ if isinstance(v, bool):
39
+ return False
40
+ if isinstance(v, int):
41
+ return True
42
+ is_num = False
43
+ if isinstance(v, float):
44
+ is_num = not math.isnan(v)
45
+ if is_num and not math.isfinite(v):
46
+ raise jexception.JException("D1001", 0, v)
47
+ return is_num
48
+
49
+ @staticmethod
50
+ def is_array_of_strings(v: Optional[Any]) -> bool:
51
+ if isinstance(v, list):
52
+ for o in v:
53
+ if not isinstance(o, str):
54
+ return False
55
+ return True
56
+ return False
57
+
58
+ @staticmethod
59
+ def is_array_of_numbers(v: Optional[Any]) -> bool:
60
+ if isinstance(v, list):
61
+ for o in v:
62
+ if not Utils.is_numeric(o):
63
+ return False
64
+ return True
65
+ return False
66
+
67
+ @staticmethod
68
+ def is_function(o: Optional[Any]) -> bool:
69
+ from jsonata import jsonata
70
+ return isinstance(o, jsonata.Jsonata.JFunctionCallable)
71
+
72
+ NONE = object()
73
+
74
+ @staticmethod
75
+ def create_sequence(el: Optional[Any] = NONE) -> list:
76
+ if el is not Utils.NONE:
77
+ if isinstance(el, list) and len(el) == 1:
78
+ sequence = Utils.JList(el)
79
+ else:
80
+ # This case does NOT exist in Javascript! Why?
81
+ sequence = Utils.JList([el])
82
+ else:
83
+ sequence = Utils.JList()
84
+ sequence.sequence = True
85
+ return sequence
86
+
87
+ @staticmethod
88
+ def create_sequence_from_iter(it: Iterable) -> list:
89
+ sequence = Utils.JList(it)
90
+ sequence.sequence = True
91
+ return sequence
92
+
93
+ class JList(list):
94
+ sequence: bool
95
+ outer_wrapper: bool
96
+ tuple_stream: bool
97
+ keep_singleton: bool
98
+ cons: bool
99
+
100
+ def __init__(self, c=()):
101
+ super().__init__(c)
102
+ # Jsonata specific flags
103
+ self.sequence = False
104
+ self.outer_wrapper = False
105
+ self.tuple_stream = False
106
+ self.keep_singleton = False
107
+ self.cons = False
108
+
109
+ class RangeList(list):
110
+ a: int
111
+ b: int
112
+ size: int
113
+
114
+ def __init__(self, left, right):
115
+ super().__init__()
116
+ self.a = left
117
+ self.b = right
118
+ self.size = self.b - self.a + 1
119
+
120
+ def __len__(self):
121
+ return self.size
122
+
123
+ def __getitem__(self, index):
124
+ if index < self.size:
125
+ return Utils.convert_number(self.a + index)
126
+ raise IndexError(index)
127
+
128
+ def __iter__(self):
129
+ return iter(range(self.a, self.b))
130
+
131
+ @staticmethod
132
+ def is_sequence(result: Optional[Any]) -> bool:
133
+ return isinstance(result, Utils.JList) and result.sequence
134
+
135
+ @staticmethod
136
+ def convert_number(n: float) -> Optional[float]:
137
+ # Use long if the number is not fractional
138
+ if not Utils.is_numeric(n):
139
+ return None
140
+ if int(n) == float(n):
141
+ v = int(n)
142
+ if int(v) == v:
143
+ return int(v)
144
+ else:
145
+ return v
146
+ return float(n)
147
+
148
+ @staticmethod
149
+ def convert_value(val: Optional[Any]) -> Optional[Any]:
150
+ return val if val is not Utils.NULL_VALUE else None
151
+
152
+ @staticmethod
153
+ def convert_dict_nulls(res: MutableMapping[str, Any]) -> None:
154
+ for key, val in res.items():
155
+ v = Utils.convert_value(val)
156
+ if v is not val:
157
+ res[key] = v
158
+ Utils.recurse(val)
159
+
160
+ @staticmethod
161
+ def convert_list_nulls(res: MutableSequence[Any]) -> None:
162
+ for i, val in enumerate(res):
163
+ v = Utils.convert_value(val)
164
+ if v is not val:
165
+ res[i] = v
166
+ Utils.recurse(val)
167
+
168
+ @staticmethod
169
+ def recurse(val: Optional[Any]) -> None:
170
+ if isinstance(val, dict):
171
+ Utils.convert_dict_nulls(val)
172
+ if isinstance(val, list):
173
+ Utils.convert_list_nulls(val)
174
+
175
+ @staticmethod
176
+ def convert_nulls(res: Optional[Any]) -> Optional[Any]:
177
+ Utils.recurse(res)
178
+ return Utils.convert_value(res)