jmespath 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of jmespath might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5ee5e74d561de8c291c5ff6dbb805638edf8a20a
4
- data.tar.gz: 73efab83f2a20e968b53f4d67dfb9bb95195b7a0
3
+ metadata.gz: d772ec4a8596343c31e4c9a6be128f965a6f46e8
4
+ data.tar.gz: 08dcbd47adb5a32f0d0b27a3ab161af48bd068c7
5
5
  SHA512:
6
- metadata.gz: 242d19aae8a4a56715e0a2c6faad19a1a204627efba6e72ac71385c9f5fbc3f46d431b116119283f1753776d9cafd706d2c205342fabd34deb68938d00ae56b5
7
- data.tar.gz: 0b29709b7df66280b5727689f2cc1bf35635fab5ddd8df885553d9a2d765000b655845f5816f01e0317e6f5ca189d08531f6fdd1131170274afb71997a911170
6
+ metadata.gz: bd3806fd6b131304b5199d871cbcfc92d0a5cae5b8c6fe551899a0a076d4868c0a3fb02a7583c19b42d2d0ed81e9ca0b806e4adb18a78c498133104f7b755999
7
+ data.tar.gz: c7fd708540946c3ed5697478ee114f7942ea0d933329d0f01f9ba326cba7784c40f41431749735c72702e80978d0ee3e1a2902aa24395f2bf9f1d68b3c09c722
data/lib/jmespath.rb CHANGED
@@ -1,4 +1,5 @@
1
- require 'multi_json'
1
+ require 'json'
2
+ require 'stringio'
2
3
  require 'pathname'
3
4
 
4
5
  module JMESPath
@@ -7,11 +8,11 @@ module JMESPath
7
8
  autoload :Errors, 'jmespath/errors'
8
9
  autoload :ExprNode, 'jmespath/expr_node'
9
10
  autoload :Lexer, 'jmespath/lexer'
11
+ autoload :Nodes, 'jmespath/nodes'
10
12
  autoload :Parser, 'jmespath/parser'
11
13
  autoload :Runtime, 'jmespath/runtime'
12
14
  autoload :Token, 'jmespath/token'
13
15
  autoload :TokenStream, 'jmespath/token_stream'
14
- autoload :TreeInterpreter, 'jmespath/tree_interpreter'
15
16
  autoload :VERSION, 'jmespath/version'
16
17
 
17
18
  class << self
@@ -26,7 +27,7 @@ module JMESPath
26
27
  data = case data
27
28
  when Hash, Struct then data # check for most common case first
28
29
  when Pathname then load_json(data)
29
- when IO, StringIO then MultiJson.load(data.read)
30
+ when IO, StringIO then JSON.load(data.read)
30
31
  else data
31
32
  end
32
33
  Runtime.new.search(expression, data)
@@ -34,7 +35,7 @@ module JMESPath
34
35
 
35
36
  # @api private
36
37
  def load_json(path)
37
- MultiJson.load(File.open(path, 'r', encoding: 'UTF-8') { |f| f.read })
38
+ JSON.load(File.open(path, 'r', encoding: 'UTF-8') { |f| f.read })
38
39
  end
39
40
 
40
41
  end
@@ -9,6 +9,8 @@ module JMESPath
9
9
 
10
10
  class InvalidTypeError < Error; end
11
11
 
12
+ class InvalidValueError < Error; end
13
+
12
14
  class InvalidArityError < Error; end
13
15
 
14
16
  class UnknownFunctionError < Error; end
@@ -1,116 +1,323 @@
1
+ require 'json'
2
+ require 'set'
3
+
1
4
  module JMESPath
2
5
  # @api private
3
6
  class Lexer
4
7
 
5
- # @api private
6
- TOKEN_PATTERNS = {}
7
-
8
- # @api private
9
- TOKEN_TYPES = {}
10
-
11
- {
12
- '[a-zA-Z_][a-zA-Z_0-9]*' => :identifier,
13
- '\.' => :dot,
14
- '\*' => :star,
15
- '\[\]' => :flatten,
16
- '-?\d+' => :number,
17
- '\|\|' => :or,
18
- '\|' => :pipe,
19
- '\[\?' => :filter,
20
- '\[' => :lbracket,
21
- '\]' => :rbracket,
22
- '"(?:\\\\\\\\|\\\\"|[^"])*"' => :quoted_identifier,
23
- '`(?:\\\\\\\\|\\\\`|[^`])*`' => :literal,
24
- ',' => :comma,
25
- ':' => :colon,
26
- '@' => :current,
27
- '&' => :expref,
28
- '\(' => :lparen,
29
- '\)' => :rparen,
30
- '\{' => :lbrace,
31
- '\}' => :rbrace,
32
- '!=' => :comparator,
33
- '==' => :comparator,
34
- '<=' => :comparator,
35
- '>=' => :comparator,
36
- '<' => :comparator,
37
- '>' => :comparator,
38
- '[ \t]' => :skip,
39
- }.each.with_index do |(pattern, type), n|
40
- TOKEN_PATTERNS[n] = pattern
41
- TOKEN_TYPES[n] = type
42
- end
8
+ T_DOT = :dot
9
+ T_STAR = :star
10
+ T_COMMA = :comma
11
+ T_COLON = :colon
12
+ T_CURRENT = :current
13
+ T_EXPREF = :expref
14
+ T_LPAREN = :lparen
15
+ T_RPAREN = :rparen
16
+ T_LBRACE = :lbrace
17
+ T_RBRACE = :rbrace
18
+ T_LBRACKET = :lbracket
19
+ T_RBRACKET = :rbracket
20
+ T_FLATTEN = :flatten
21
+ T_IDENTIFIER = :identifier
22
+ T_NUMBER = :number
23
+ T_QUOTED_IDENTIFIER = :quoted_identifier
24
+ T_UNKNOWN = :unknown
25
+ T_PIPE = :pipe
26
+ T_OR = :or
27
+ T_FILTER = :filter
28
+ T_LITERAL = :literal
29
+ T_EOF = :eof
30
+ T_COMPARATOR = :comparator
31
+
32
+ STATE_IDENTIFIER = 0
33
+ STATE_NUMBER = 1
34
+ STATE_SINGLE_CHAR = 2
35
+ STATE_WHITESPACE = 3
36
+ STATE_STRING_LITERAL = 4
37
+ STATE_QUOTED_STRING = 5
38
+ STATE_JSON_LITERAL = 6
39
+ STATE_LBRACKET = 7
40
+ STATE_PIPE = 8
41
+ STATE_LT = 9
42
+ STATE_GT = 10
43
+ STATE_EQ = 11
44
+ STATE_NOT = 12
43
45
 
44
- # @api private
45
- TOKEN_REGEX = /(#{TOKEN_PATTERNS.values.join(')|(')})/
46
+ TRANSLATION_TABLE = {
47
+ '<' => STATE_LT,
48
+ '>' => STATE_GT,
49
+ '=' => STATE_EQ,
50
+ '!' => STATE_NOT,
51
+ '[' => STATE_LBRACKET,
52
+ '|' => STATE_PIPE,
53
+ '`' => STATE_JSON_LITERAL,
54
+ '"' => STATE_QUOTED_STRING,
55
+ "'" => STATE_STRING_LITERAL,
56
+ '-' => STATE_NUMBER,
57
+ '0' => STATE_NUMBER,
58
+ '1' => STATE_NUMBER,
59
+ '2' => STATE_NUMBER,
60
+ '3' => STATE_NUMBER,
61
+ '4' => STATE_NUMBER,
62
+ '5' => STATE_NUMBER,
63
+ '6' => STATE_NUMBER,
64
+ '7' => STATE_NUMBER,
65
+ '8' => STATE_NUMBER,
66
+ '9' => STATE_NUMBER,
67
+ ' ' => STATE_WHITESPACE,
68
+ "\t" => STATE_WHITESPACE,
69
+ "\n" => STATE_WHITESPACE,
70
+ "\r" => STATE_WHITESPACE,
71
+ '.' => STATE_SINGLE_CHAR,
72
+ '*' => STATE_SINGLE_CHAR,
73
+ ']' => STATE_SINGLE_CHAR,
74
+ ',' => STATE_SINGLE_CHAR,
75
+ ':' => STATE_SINGLE_CHAR,
76
+ '@' => STATE_SINGLE_CHAR,
77
+ '&' => STATE_SINGLE_CHAR,
78
+ '(' => STATE_SINGLE_CHAR,
79
+ ')' => STATE_SINGLE_CHAR,
80
+ '{' => STATE_SINGLE_CHAR,
81
+ '}' => STATE_SINGLE_CHAR,
82
+ '_' => STATE_IDENTIFIER,
83
+ 'A' => STATE_IDENTIFIER,
84
+ 'B' => STATE_IDENTIFIER,
85
+ 'C' => STATE_IDENTIFIER,
86
+ 'D' => STATE_IDENTIFIER,
87
+ 'E' => STATE_IDENTIFIER,
88
+ 'F' => STATE_IDENTIFIER,
89
+ 'G' => STATE_IDENTIFIER,
90
+ 'H' => STATE_IDENTIFIER,
91
+ 'I' => STATE_IDENTIFIER,
92
+ 'J' => STATE_IDENTIFIER,
93
+ 'K' => STATE_IDENTIFIER,
94
+ 'L' => STATE_IDENTIFIER,
95
+ 'M' => STATE_IDENTIFIER,
96
+ 'N' => STATE_IDENTIFIER,
97
+ 'O' => STATE_IDENTIFIER,
98
+ 'P' => STATE_IDENTIFIER,
99
+ 'Q' => STATE_IDENTIFIER,
100
+ 'R' => STATE_IDENTIFIER,
101
+ 'S' => STATE_IDENTIFIER,
102
+ 'T' => STATE_IDENTIFIER,
103
+ 'U' => STATE_IDENTIFIER,
104
+ 'V' => STATE_IDENTIFIER,
105
+ 'W' => STATE_IDENTIFIER,
106
+ 'X' => STATE_IDENTIFIER,
107
+ 'Y' => STATE_IDENTIFIER,
108
+ 'Z' => STATE_IDENTIFIER,
109
+ 'a' => STATE_IDENTIFIER,
110
+ 'b' => STATE_IDENTIFIER,
111
+ 'c' => STATE_IDENTIFIER,
112
+ 'd' => STATE_IDENTIFIER,
113
+ 'e' => STATE_IDENTIFIER,
114
+ 'f' => STATE_IDENTIFIER,
115
+ 'g' => STATE_IDENTIFIER,
116
+ 'h' => STATE_IDENTIFIER,
117
+ 'i' => STATE_IDENTIFIER,
118
+ 'j' => STATE_IDENTIFIER,
119
+ 'k' => STATE_IDENTIFIER,
120
+ 'l' => STATE_IDENTIFIER,
121
+ 'm' => STATE_IDENTIFIER,
122
+ 'n' => STATE_IDENTIFIER,
123
+ 'o' => STATE_IDENTIFIER,
124
+ 'p' => STATE_IDENTIFIER,
125
+ 'q' => STATE_IDENTIFIER,
126
+ 'r' => STATE_IDENTIFIER,
127
+ 's' => STATE_IDENTIFIER,
128
+ 't' => STATE_IDENTIFIER,
129
+ 'u' => STATE_IDENTIFIER,
130
+ 'v' => STATE_IDENTIFIER,
131
+ 'w' => STATE_IDENTIFIER,
132
+ 'x' => STATE_IDENTIFIER,
133
+ 'y' => STATE_IDENTIFIER,
134
+ 'z' => STATE_IDENTIFIER,
135
+ }
46
136
 
47
- # @api private
48
- JSON_VALUE = /^[\["{]/
137
+ VALID_IDENTIFIERS = Set.new(%w(
138
+ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
139
+ a b c d e f g h i j k l m n o p q r s t u v w x y z
140
+ _ 0 1 2 3 4 5 6 7 8 9
141
+ ))
49
142
 
50
- # @api private
51
- JSON_NUMBER = /^\-?[0-9]*(\.[0-9]+)?([e|E][+|\-][0-9]+)?$/
143
+ NUMBERS = Set.new(%w(0 1 2 3 4 5 6 7 8 9))
144
+
145
+ SIMPLE_TOKENS = {
146
+ '.' => T_DOT,
147
+ '*' => T_STAR,
148
+ ']' => T_RBRACKET,
149
+ ',' => T_COMMA,
150
+ ':' => T_COLON,
151
+ '@' => T_CURRENT,
152
+ '&' => T_EXPREF,
153
+ '(' => T_LPAREN,
154
+ ')' => T_RPAREN,
155
+ '{' => T_LBRACE,
156
+ '}' => T_RBRACE,
157
+ }
52
158
 
53
159
  # @param [String<JMESPath>] expression
54
160
  # @return [Array<Hash>]
55
161
  def tokenize(expression)
56
- offset = 0
162
+
57
163
  tokens = []
58
- expression.scan(TOKEN_REGEX).each do |match|
59
- match_index = match.find_index { |token| !token.nil? }
60
- match_value = match[match_index]
61
- type = TOKEN_TYPES[match_index]
62
- token = Token.new(type, match_value, offset)
63
- if token.type != :skip
64
- case token.type
65
- when :number then token_number(token, expression, offset)
66
- when :literal then token_literal(token, expression, offset)
67
- when :quoted_identifier
68
- token_quoted_identifier(token, expression, offset)
164
+ chars = CharacterStream.new(expression.chars)
165
+
166
+ while chars.current
167
+ case TRANSLATION_TABLE[chars.current]
168
+ when nil
169
+ tokens << Token.new(
170
+ T_UNKNOWN,
171
+ chars.current,
172
+ chars.position
173
+ )
174
+ chars.next
175
+ when STATE_SINGLE_CHAR
176
+ # consume simple tokens like ".", ",", "@", etc.
177
+ tokens << Token.new(
178
+ SIMPLE_TOKENS[chars.current],
179
+ chars.current,
180
+ chars.position
181
+ )
182
+ chars.next
183
+ when STATE_IDENTIFIER
184
+ start = chars.position
185
+ buffer = []
186
+ begin
187
+ buffer << chars.current
188
+ chars.next
189
+ end while VALID_IDENTIFIERS.include?(chars.current)
190
+ tokens << Token.new(
191
+ T_IDENTIFIER,
192
+ buffer.join,
193
+ start
194
+ )
195
+ when STATE_WHITESPACE
196
+ # skip whitespace
197
+ chars.next
198
+ when STATE_LBRACKET
199
+ # consume "[", "[?" and "[]"
200
+ position = chars.position
201
+ actual = chars.next
202
+ if actual == ']'
203
+ chars.next
204
+ tokens << Token.new(T_FLATTEN, '[]', position)
205
+ elsif actual == '?'
206
+ chars.next
207
+ tokens << Token.new(T_FILTER, '[?', position)
208
+ else
209
+ tokens << Token.new(T_LBRACKET, '[', position)
210
+ end
211
+ when STATE_STRING_LITERAL
212
+ # consume raw string literals
213
+ tokens << inside(chars, "'", T_LITERAL)
214
+ when STATE_PIPE
215
+ # consume pipe and OR
216
+ tokens << match_or(chars, '|', '|', T_OR, T_PIPE)
217
+ when STATE_JSON_LITERAL
218
+ # consume JSON literals
219
+ token = inside(chars, '`', T_LITERAL)
220
+ if token.type == T_LITERAL
221
+ token.value = token.value.gsub('\\`', '`')
222
+ token = parse_json(token)
223
+ end
224
+ tokens << token
225
+ when STATE_NUMBER
226
+ start = chars.position
227
+ buffer = []
228
+ begin
229
+ buffer << chars.current
230
+ chars.next
231
+ end while NUMBERS.include?(chars.current)
232
+ tokens << Token.new(
233
+ T_NUMBER,
234
+ buffer.join.to_i,
235
+ start
236
+ )
237
+ when STATE_QUOTED_STRING
238
+ # consume quoted identifiers
239
+ token = inside(chars, '"', T_QUOTED_IDENTIFIER)
240
+ if token.type == T_QUOTED_IDENTIFIER
241
+ token.value = "\"#{token.value}\""
242
+ token = parse_json(token)
69
243
  end
70
244
  tokens << token
245
+ when STATE_EQ
246
+ # consume equals
247
+ tokens << match_or(chars, '=', '=', T_COMPARATOR, T_UNKNOWN)
248
+ when STATE_NOT
249
+ # consume not equals
250
+ tokens << match_or(chars, '!', '=', T_COMPARATOR, T_UNKNOWN)
251
+ else
252
+ # either '<' or '>'
253
+ # consume less than and greater than
254
+ tokens << match_or(chars, chars.current, '=', T_COMPARATOR, T_COMPARATOR)
71
255
  end
72
- offset += match_value.size
73
- end
74
- tokens << Token.new(:eof, nil, offset)
75
- unless expression.size == offset
76
- syntax_error('invalid expression', expression, offset)
77
256
  end
257
+ tokens << Token.new(T_EOF, nil, chars.position)
78
258
  tokens
79
259
  end
80
260
 
81
261
  private
82
262
 
83
- def token_number(token, expression, offset)
84
- token[:value] = token[:value].to_i
263
+ def match_or(chars, current, expected, type, or_type)
264
+ if chars.next == expected
265
+ chars.next
266
+ Token.new(type, current + expected, chars.position - 1)
267
+ else
268
+ Token.new(or_type, current, chars.position - 1)
269
+ end
85
270
  end
86
271
 
87
- def token_literal(token, expression, offset)
88
- token[:value] = token[:value][1..-2].lstrip.gsub('\`', '`')
89
- token[:value] =
90
- case token[:value]
91
- when 'true', 'false' then token[:value] == 'true'
92
- when 'null' then nil
93
- when '' then syntax_error("empty json literal", expression, offset)
94
- when JSON_VALUE then decode_json(token[:value], expression, offset)
95
- when JSON_NUMBER then decode_json(token[:value], expression, offset)
96
- else decode_json('"' + token[:value] + '"', expression, offset)
272
+ def inside(chars, delim, type)
273
+ position = chars.position
274
+ current = chars.next
275
+ buffer = []
276
+ while current != delim
277
+ if current == '\\'
278
+ buffer << current
279
+ current = chars.next
280
+ end
281
+ if current.nil?
282
+ # unclosed delimiter
283
+ return Token.new(T_UNKNOWN, buffer.join, position)
97
284
  end
285
+ buffer << current
286
+ current = chars.next
287
+ end
288
+ chars.next
289
+ Token.new(type, buffer.join, position)
98
290
  end
99
291
 
100
- def token_quoted_identifier(token, expression, offset)
101
- token[:value] = decode_json(token[:value], expression, offset)
292
+ def parse_json(token)
293
+ begin
294
+ token.value = JSON.load(token.value)
295
+ rescue JSON::ParserError
296
+ token.type = T_UNKNOWN
297
+ end
298
+ token
102
299
  end
103
300
 
104
- def decode_json(json, expression, offset)
105
- MultiJson.load(json)
106
- rescue MultiJson::ParseError => e
107
- syntax_error(e.message, expression, offset)
108
- end
301
+ class CharacterStream
109
302
 
110
- def syntax_error(message, expression, offset)
111
- msg = message + "in #{expression.inspect} at #{offset}"
112
- raise Errors::SyntaxError.new(msg)
113
- end
303
+ def initialize(chars)
304
+ @chars = chars
305
+ @position = 0
306
+ end
307
+
308
+ def current
309
+ @chars[@position]
310
+ end
311
+
312
+ def next
313
+ @position += 1
314
+ @chars[@position]
315
+ end
114
316
 
317
+ def position
318
+ @position
319
+ end
320
+
321
+ end
115
322
  end
116
323
  end