jsonata-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jsonata/tokenizer.py ADDED
@@ -0,0 +1,306 @@
1
+ #
2
+ # Copyright Robert Yokota
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License")
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ #
17
+ # Derived from the following code:
18
+ #
19
+ # Project name: jsonata-java
20
+ # Copyright Dashjoin GmbH. https://dashjoin.com
21
+ # Licensed under the Apache License, Version 2.0 (the "License")
22
+ #
23
+ # Project name: JSONata
24
+ # © Copyright IBM Corp. 2016, 2018 All Rights Reserved
25
+ # This project is licensed under the MIT License, see LICENSE
26
+ #
27
+
28
+ import math
29
+ import re
30
+ from dataclasses import dataclass
31
+ from typing import Any, Optional
32
+
33
+ from jsonata import jexception, utils
34
+
35
+
36
+ class Tokenizer:
37
+ operators = {
38
+ '.': 75,
39
+ '[': 80,
40
+ ']': 0,
41
+ '{': 70,
42
+ '}': 0,
43
+ '(': 80,
44
+ ')': 0,
45
+ ',': 0,
46
+ '@': 80,
47
+ '#': 80,
48
+ ';': 80,
49
+ ':': 80,
50
+ '?': 20,
51
+ '+': 50,
52
+ '-': 50,
53
+ '*': 60,
54
+ '/': 60,
55
+ '%': 60,
56
+ '|': 20,
57
+ '=': 40,
58
+ '<': 40,
59
+ '>': 40,
60
+ '^': 40,
61
+ '**': 60,
62
+ '..': 20,
63
+ ':=': 10,
64
+ '!=': 40,
65
+ '<=': 40,
66
+ '>=': 40,
67
+ '~>': 40,
68
+ 'and': 30,
69
+ 'or': 25,
70
+ 'in': 40,
71
+ '&': 50,
72
+ '!': 0, # not an operator, but needed as a stop character for name tokens
73
+ '~': 0 # not an operator, but needed as a stop character for name tokens
74
+ }
75
+
76
+ escapes = {
77
+ '"': '"',
78
+ '\\': '\\',
79
+ '/': '/',
80
+ 'b': '\b',
81
+ 'f': '\f',
82
+ 'n': '\n',
83
+ 'r': '\r',
84
+ 't': '\t'
85
+ }
86
+
87
+ # Tokenizer (lexer) - invoked by the parser to return one token at a time
88
+
89
+ position: int
90
+ depth: int
91
+ path: str
92
+ length: int
93
+
94
+ def __init__(self, path):
95
+ self.position = 0
96
+ self.depth = 0
97
+
98
+ self.path = path
99
+ self.length = len(path)
100
+
101
+ @dataclass
102
+ class Token:
103
+ type: Optional[str]
104
+ value: Optional[Any]
105
+ position: int
106
+ id: Optional[Any] = None
107
+
108
+ def create(self, type: Optional[str], value: Optional[Any]) -> Token:
109
+ return Tokenizer.Token(type, value, self.position)
110
+
111
+ def is_closing_slash(self, position: int) -> bool:
112
+ if self.path[position] == '/' and self.depth == 0:
113
+ backslash_count = 0
114
+ while self.path[position - (backslash_count + 1)] == '\\':
115
+ backslash_count += 1
116
+ if int(math.fmod(backslash_count, 2)) == 0:
117
+ return True
118
+ return False
119
+
120
+ def scan_regex(self) -> re.Pattern:
121
+ # the prefix '/' will have been previously scanned. Find the end of the regex.
122
+ # search for closing '/' ignoring any that are escaped, or within brackets
123
+ start = self.position
124
+ # int depth = 0
125
+ pattern = None
126
+ flags = None
127
+
128
+ while self.position < self.length:
129
+ current_char = self.path[self.position]
130
+ if self.is_closing_slash(self.position):
131
+ # end of regex found
132
+ pattern = self.path[start:self.position]
133
+ if pattern == "":
134
+ raise jexception.JException("S0301", self.position)
135
+ self.position += 1
136
+ current_char = self.path[self.position]
137
+ # flags
138
+ start = self.position
139
+ while current_char == 'i' or current_char == 'm':
140
+ self.position += 1
141
+ current_char = self.path[self.position]
142
+ flags = self.path[start:self.position] + 'g'
143
+
144
+ # Convert flags to Java Pattern flags
145
+ _flags = 0
146
+ if "i" in flags:
147
+ _flags |= re.I
148
+ if "m" in flags:
149
+ _flags |= re.M
150
+ return re.compile(pattern, _flags) # Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
151
+ if (current_char == '(' or current_char == '[' or current_char == '{') and self.path[self.position - 1] != '\\':
152
+ self.depth += 1
153
+ if (current_char == ')' or current_char == ']' or current_char == '}') and self.path[self.position - 1] != '\\':
154
+ self.depth -= 1
155
+ self.position += 1
156
+ raise jexception.JException("S0302", self.position)
157
+
158
+ def next(self, prefix: bool) -> Optional[Token]:
159
+ if self.position >= self.length:
160
+ return None
161
+ current_char = self.path[self.position]
162
+ # skip whitespace
163
+ while self.position < self.length and " \t\n\r".find(current_char) > -1:
164
+ self.position += 1
165
+ if self.position >= self.length:
166
+ return None # Uli: JS relies on charAt returns null
167
+ current_char = self.path[self.position]
168
+ # skip comments
169
+ if current_char == '/' and self.path[self.position + 1] == '*':
170
+ comment_start = self.position
171
+ self.position += 2
172
+ current_char = self.path[self.position]
173
+ while not (current_char == '*' and self.path[self.position + 1] == '/'):
174
+ self.position += 1
175
+ current_char = self.path[self.position]
176
+ if self.position >= self.length:
177
+ # no closing tag
178
+ raise jexception.JException("S0106", comment_start)
179
+ self.position += 2
180
+ current_char = self.path[self.position]
181
+ return self.next(prefix) # need this to swallow any following whitespace
182
+ # test for regex
183
+ if not prefix and current_char == '/':
184
+ self.position += 1
185
+ return self.create("regex", self.scan_regex())
186
+ # handle double-char operators
187
+ have_more = self.position < len(self.path) - 1 # Java: position+1 is valid
188
+ if current_char == '.' and have_more and self.path[self.position + 1] == '.':
189
+ # double-dot .. range operator
190
+ self.position += 2
191
+ return self.create("operator", "..")
192
+ if current_char == ':' and have_more and self.path[self.position + 1] == '=':
193
+ # := assignment
194
+ self.position += 2
195
+ return self.create("operator", ":=")
196
+ if current_char == '!' and have_more and self.path[self.position + 1] == '=':
197
+ # !=
198
+ self.position += 2
199
+ return self.create("operator", "!=")
200
+ if current_char == '>' and have_more and self.path[self.position + 1] == '=':
201
+ # >=
202
+ self.position += 2
203
+ return self.create("operator", ">=")
204
+ if current_char == '<' and have_more and self.path[self.position + 1] == '=':
205
+ # <=
206
+ self.position += 2
207
+ return self.create("operator", "<=")
208
+ if current_char == '*' and have_more and self.path[self.position + 1] == '*':
209
+ # ** descendant wildcard
210
+ self.position += 2
211
+ return self.create("operator", "**")
212
+ if current_char == '~' and have_more and self.path[self.position + 1] == '>':
213
+ # ~> chain function
214
+ self.position += 2
215
+ return self.create("operator", "~>")
216
+ # test for single char operators
217
+ if Tokenizer.operators.get(str(current_char)) is not None:
218
+ self.position += 1
219
+ return self.create("operator", current_char)
220
+ # test for string literals
221
+ if current_char == '"' or current_char == '\'':
222
+ quote_type = current_char
223
+ # double quoted string literal - find end of string
224
+ self.position += 1
225
+ qstr = ""
226
+ while self.position < self.length:
227
+ current_char = self.path[self.position]
228
+ if current_char == '\\':
229
+ self.position += 1
230
+ current_char = self.path[self.position]
231
+ if Tokenizer.escapes.get(str(current_char)) is not None:
232
+ qstr += Tokenizer.escapes[str(current_char)]
233
+ elif current_char == 'u':
234
+ # u should be followed by 4 hex digits
235
+ octets = self.path[self.position + 1:(self.position + 1) + 4]
236
+ if re.match("^[0-9a-fA-F]+$", octets):
237
+ codepoint = int(octets, 16)
238
+ qstr += chr(codepoint)
239
+ self.position += 4
240
+ else:
241
+ raise jexception.JException("S0104", self.position)
242
+ else:
243
+ # illegal escape sequence
244
+ raise jexception.JException("S0301", self.position, current_char)
245
+
246
+ elif current_char == quote_type:
247
+ self.position += 1
248
+ return self.create("string", qstr)
249
+ else:
250
+ qstr += current_char
251
+ self.position += 1
252
+ raise jexception.JException("S0101", self.position)
253
+ # test for numbers
254
+ numregex = re.compile("^-?(0|([1-9][0-9]*))(\\.[0-9]+)?([Ee][-+]?[0-9]+)?")
255
+ match_ = numregex.search(self.path[self.position:])
256
+ if match_ is not None:
257
+ num = float(match_.group(0))
258
+ if not math.isnan(num) and math.isfinite(num):
259
+ self.position += len(match_.group(0))
260
+ # If the number is integral, use long as type
261
+ return self.create("number", utils.Utils.convert_number(num))
262
+ else:
263
+ raise jexception.JException("S0102", self.position) # , match.group[0]);
264
+
265
+ # test for quoted names (backticks)
266
+ name = None
267
+ if current_char == '`':
268
+ # scan for closing quote
269
+ self.position += 1
270
+ end = self.path.find('`', self.position)
271
+ if end != -1:
272
+ name = self.path[self.position:end]
273
+ self.position = end + 1
274
+ return self.create("name", name)
275
+ self.position = self.length
276
+ raise jexception.JException("S0105", self.position)
277
+ # test for names
278
+ i = self.position
279
+ while True:
280
+ # if (i>=length) return null; // Uli: JS relies on charAt returns null
281
+
282
+ ch = self.path[i] if i < self.length else chr(0)
283
+ if i == self.length or " \t\n\r".find(ch) > -1 or str(ch) in Tokenizer.operators:
284
+ if self.path[self.position] == '$':
285
+ # variable reference
286
+ _name = self.path[self.position + 1:i]
287
+ self.position = i
288
+ return self.create("variable", _name)
289
+ else:
290
+ _name = self.path[self.position:i]
291
+ self.position = i
292
+ if _name == "or" or _name == "in" or _name == "and":
293
+ return self.create("operator", _name)
294
+ elif _name == "true":
295
+ return self.create("value", True)
296
+ elif _name == "false":
297
+ return self.create("value", False)
298
+ elif _name == "null":
299
+ return self.create("value", None)
300
+ else:
301
+ if self.position == self.length and _name == "":
302
+ # whitespace at end of input
303
+ return None
304
+ return self.create("name", _name)
305
+ else:
306
+ i += 1
jsonata/utils.py ADDED
@@ -0,0 +1,150 @@
1
+ #
2
+ # Copyright Robert Yokota
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License")
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ # Derived from the following code:
17
+ #
18
+ # Project name: jsonata-java
19
+ # Copyright Dashjoin GmbH. https://dashjoin.com
20
+ # Licensed under the Apache License, Version 2.0 (the "License")
21
+ #
22
+
23
+ import math
24
+ from typing import Any, MutableMapping, MutableSequence, Optional
25
+
26
+ from jsonata import jexception
27
+
28
+
29
+ class Utils:
30
+ class NullValue:
31
+ def __repr__(self):
32
+ return "null"
33
+
34
+ NULL_VALUE = NullValue()
35
+
36
+ @staticmethod
37
+ def is_numeric(v: Optional[Any]) -> bool:
38
+ if isinstance(v, bool):
39
+ return False
40
+ if isinstance(v, int):
41
+ return True
42
+ is_num = False
43
+ if isinstance(v, float):
44
+ is_num = not math.isnan(v)
45
+ if is_num and not math.isfinite(v):
46
+ raise jexception.JException("D1001", 0, v)
47
+ return is_num
48
+
49
+ @staticmethod
50
+ def is_array_of_strings(v: Optional[Any]) -> bool:
51
+ if isinstance(v, list):
52
+ for o in v:
53
+ if not isinstance(o, str):
54
+ return False
55
+ return True
56
+ return False
57
+
58
+ @staticmethod
59
+ def is_array_of_numbers(v: Optional[Any]) -> bool:
60
+ if isinstance(v, list):
61
+ for o in v:
62
+ if not Utils.is_numeric(o):
63
+ return False
64
+ return True
65
+ return False
66
+
67
+ @staticmethod
68
+ def is_function(o: Optional[Any]) -> bool:
69
+ from jsonata import jsonata
70
+ return isinstance(o, (jsonata.Jsonata.JFunction, jsonata.Jsonata.JFunctionCallable))
71
+
72
+ NONE = object()
73
+
74
+ @staticmethod
75
+ def create_sequence(el: Optional[Any] = NONE) -> list:
76
+ sequence = Utils.JList()
77
+ sequence.sequence = True
78
+ if el is not Utils.NONE:
79
+ if isinstance(el, list) and len(el) == 1:
80
+ sequence.append(el[0])
81
+ else:
82
+ # This case does NOT exist in Javascript! Why?
83
+ sequence.append(el)
84
+ return sequence
85
+
86
+ class JList(list):
87
+ sequence: bool
88
+ outer_wrapper: bool
89
+ tuple_stream: bool
90
+ keep_singleton: bool
91
+ cons: bool
92
+
93
+ def __init__(self, c=()):
94
+ super().__init__(c)
95
+ self.sequence = False
96
+ self.outer_wrapper = False
97
+ self.tuple_stream = False
98
+ self.keep_singleton = False
99
+ self.cons = False
100
+
101
+ # Jsonata specific flags
102
+
103
+ @staticmethod
104
+ def is_sequence(result: Optional[Any]) -> bool:
105
+ return isinstance(result, Utils.JList) and result.sequence
106
+
107
+ @staticmethod
108
+ def convert_number(n: float) -> Optional[float]:
109
+ # Use long if the number is not fractional
110
+ if not Utils.is_numeric(n):
111
+ return None
112
+ if int(n) == float(n):
113
+ v = int(n)
114
+ if int(v) == v:
115
+ return int(v)
116
+ else:
117
+ return v
118
+ return float(n)
119
+
120
+ @staticmethod
121
+ def convert_value(val: Optional[Any]) -> Optional[Any]:
122
+ return val if val is not Utils.NULL_VALUE else None
123
+
124
+ @staticmethod
125
+ def convert_dict_nulls(res: MutableMapping[str, Any]) -> None:
126
+ for key, val in res.items():
127
+ v = Utils.convert_value(val)
128
+ if v is not val:
129
+ res[key] = v
130
+ Utils.recurse(val)
131
+
132
+ @staticmethod
133
+ def convert_list_nulls(res: MutableSequence[Any]) -> None:
134
+ for i, val in enumerate(res):
135
+ v = Utils.convert_value(val)
136
+ if v is not val:
137
+ res[i] = v
138
+ Utils.recurse(val)
139
+
140
+ @staticmethod
141
+ def recurse(val: Optional[Any]) -> None:
142
+ if isinstance(val, dict):
143
+ Utils.convert_dict_nulls(val)
144
+ if isinstance(val, list):
145
+ Utils.convert_list_nulls(val)
146
+
147
+ @staticmethod
148
+ def convert_nulls(res: Optional[Any]) -> Optional[Any]:
149
+ Utils.recurse(res)
150
+ return Utils.convert_value(res)