prism 0.13.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
data/include/prism/ast.h CHANGED
@@ -358,8 +358,10 @@ typedef uint16_t pm_node_flags_t;
358
358
 
359
359
  // We store the flags enum in every node in the tree. Some flags are common to
360
360
  // all nodes (the ones listed below). Others are specific to certain node types.
361
- static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = 0x1;
362
- static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = 0x2;
361
+ #define PM_NODE_FLAG_BITS (sizeof(pm_node_flags_t) * 8)
362
+ static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = (1 << (PM_NODE_FLAG_BITS - 1));
363
+ static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = (1 << (PM_NODE_FLAG_BITS - 2));
364
+ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = PM_NODE_FLAG_NEWLINE | PM_NODE_FLAG_STATIC_LITERAL;
363
365
 
364
366
  // For easy access, we define some macros to check node type
365
367
  #define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
@@ -562,8 +564,8 @@ typedef struct pm_call_and_write_node {
562
564
  pm_location_t opening_loc;
563
565
  struct pm_arguments_node *arguments;
564
566
  pm_location_t closing_loc;
565
- pm_string_t read_name;
566
- pm_string_t write_name;
567
+ pm_constant_id_t read_name;
568
+ pm_constant_id_t write_name;
567
569
  pm_location_t operator_loc;
568
570
  struct pm_node *value;
569
571
  } pm_call_and_write_node_t;
@@ -583,7 +585,7 @@ typedef struct pm_call_node {
583
585
  struct pm_arguments_node *arguments;
584
586
  pm_location_t closing_loc;
585
587
  struct pm_node *block;
586
- pm_string_t name;
588
+ pm_constant_id_t name;
587
589
  } pm_call_node_t;
588
590
 
589
591
  // CallOperatorWriteNode
@@ -600,8 +602,8 @@ typedef struct pm_call_operator_write_node {
600
602
  pm_location_t opening_loc;
601
603
  struct pm_arguments_node *arguments;
602
604
  pm_location_t closing_loc;
603
- pm_string_t read_name;
604
- pm_string_t write_name;
605
+ pm_constant_id_t read_name;
606
+ pm_constant_id_t write_name;
605
607
  pm_constant_id_t operator;
606
608
  pm_location_t operator_loc;
607
609
  struct pm_node *value;
@@ -621,8 +623,8 @@ typedef struct pm_call_or_write_node {
621
623
  pm_location_t opening_loc;
622
624
  struct pm_arguments_node *arguments;
623
625
  pm_location_t closing_loc;
624
- pm_string_t read_name;
625
- pm_string_t write_name;
626
+ pm_constant_id_t read_name;
627
+ pm_constant_id_t write_name;
626
628
  pm_location_t operator_loc;
627
629
  struct pm_node *value;
628
630
  } pm_call_or_write_node_t;
@@ -1193,11 +1195,11 @@ typedef struct pm_integer_node {
1193
1195
  // PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
1194
1196
  // PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
1195
1197
  // PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
1198
+ // PM_REGULAR_EXPRESSION_FLAGS_ONCE
1196
1199
  // PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
1197
1200
  // PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
1198
1201
  // PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
1199
1202
  // PM_REGULAR_EXPRESSION_FLAGS_UTF_8
1200
- // PM_REGULAR_EXPRESSION_FLAGS_ONCE
1201
1203
  typedef struct pm_interpolated_match_last_line_node {
1202
1204
  pm_node_t base;
1203
1205
  pm_location_t opening_loc;
@@ -1212,11 +1214,11 @@ typedef struct pm_interpolated_match_last_line_node {
1212
1214
  // PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
1213
1215
  // PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
1214
1216
  // PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
1217
+ // PM_REGULAR_EXPRESSION_FLAGS_ONCE
1215
1218
  // PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
1216
1219
  // PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
1217
1220
  // PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
1218
1221
  // PM_REGULAR_EXPRESSION_FLAGS_UTF_8
1219
- // PM_REGULAR_EXPRESSION_FLAGS_ONCE
1220
1222
  typedef struct pm_interpolated_regular_expression_node {
1221
1223
  pm_node_t base;
1222
1224
  pm_location_t opening_loc;
@@ -1369,11 +1371,11 @@ typedef struct pm_local_variable_write_node {
1369
1371
  // PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
1370
1372
  // PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
1371
1373
  // PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
1374
+ // PM_REGULAR_EXPRESSION_FLAGS_ONCE
1372
1375
  // PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
1373
1376
  // PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
1374
1377
  // PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
1375
1378
  // PM_REGULAR_EXPRESSION_FLAGS_UTF_8
1376
- // PM_REGULAR_EXPRESSION_FLAGS_ONCE
1377
1379
  typedef struct pm_match_last_line_node {
1378
1380
  pm_node_t base;
1379
1381
  pm_location_t opening_loc;
@@ -1616,11 +1618,11 @@ typedef struct pm_redo_node {
1616
1618
  // PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
1617
1619
  // PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
1618
1620
  // PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
1621
+ // PM_REGULAR_EXPRESSION_FLAGS_ONCE
1619
1622
  // PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
1620
1623
  // PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
1621
1624
  // PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
1622
1625
  // PM_REGULAR_EXPRESSION_FLAGS_UTF_8
1623
- // PM_REGULAR_EXPRESSION_FLAGS_ONCE
1624
1626
  typedef struct pm_regular_expression_node {
1625
1627
  pm_node_t base;
1626
1628
  pm_location_t opening_loc;
@@ -1887,44 +1889,44 @@ typedef struct pm_yield_node {
1887
1889
  } pm_yield_node_t;
1888
1890
 
1889
1891
  // CallNodeFlags
1890
- typedef enum {
1891
- PM_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 << 2,
1892
- PM_CALL_NODE_FLAGS_VARIABLE_CALL = 1 << 3,
1892
+ typedef enum pm_call_node_flags {
1893
+ PM_CALL_NODE_FLAGS_SAFE_NAVIGATION = 1 << 0,
1894
+ PM_CALL_NODE_FLAGS_VARIABLE_CALL = 1 << 1,
1893
1895
  } pm_call_node_flags_t;
1894
1896
 
1895
1897
  // IntegerBaseFlags
1896
- typedef enum {
1897
- PM_INTEGER_BASE_FLAGS_BINARY = 1 << 2,
1898
- PM_INTEGER_BASE_FLAGS_OCTAL = 1 << 3,
1899
- PM_INTEGER_BASE_FLAGS_DECIMAL = 1 << 4,
1900
- PM_INTEGER_BASE_FLAGS_HEXADECIMAL = 1 << 5,
1898
+ typedef enum pm_integer_base_flags {
1899
+ PM_INTEGER_BASE_FLAGS_BINARY = 1 << 0,
1900
+ PM_INTEGER_BASE_FLAGS_OCTAL = 1 << 1,
1901
+ PM_INTEGER_BASE_FLAGS_DECIMAL = 1 << 2,
1902
+ PM_INTEGER_BASE_FLAGS_HEXADECIMAL = 1 << 3,
1901
1903
  } pm_integer_base_flags_t;
1902
1904
 
1903
1905
  // LoopFlags
1904
- typedef enum {
1905
- PM_LOOP_FLAGS_BEGIN_MODIFIER = 1 << 2,
1906
+ typedef enum pm_loop_flags {
1907
+ PM_LOOP_FLAGS_BEGIN_MODIFIER = 1 << 0,
1906
1908
  } pm_loop_flags_t;
1907
1909
 
1908
1910
  // RangeFlags
1909
- typedef enum {
1910
- PM_RANGE_FLAGS_EXCLUDE_END = 1 << 2,
1911
+ typedef enum pm_range_flags {
1912
+ PM_RANGE_FLAGS_EXCLUDE_END = 1 << 0,
1911
1913
  } pm_range_flags_t;
1912
1914
 
1913
1915
  // RegularExpressionFlags
1914
- typedef enum {
1915
- PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 << 2,
1916
- PM_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 << 3,
1917
- PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 << 4,
1918
- PM_REGULAR_EXPRESSION_FLAGS_EUC_JP = 1 << 5,
1919
- PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT = 1 << 6,
1920
- PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J = 1 << 7,
1921
- PM_REGULAR_EXPRESSION_FLAGS_UTF_8 = 1 << 8,
1922
- PM_REGULAR_EXPRESSION_FLAGS_ONCE = 1 << 9,
1916
+ typedef enum pm_regular_expression_flags {
1917
+ PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE = 1 << 0,
1918
+ PM_REGULAR_EXPRESSION_FLAGS_EXTENDED = 1 << 1,
1919
+ PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE = 1 << 2,
1920
+ PM_REGULAR_EXPRESSION_FLAGS_ONCE = 1 << 3,
1921
+ PM_REGULAR_EXPRESSION_FLAGS_EUC_JP = 1 << 4,
1922
+ PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT = 1 << 5,
1923
+ PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J = 1 << 6,
1924
+ PM_REGULAR_EXPRESSION_FLAGS_UTF_8 = 1 << 7,
1923
1925
  } pm_regular_expression_flags_t;
1924
1926
 
1925
1927
  // StringFlags
1926
- typedef enum {
1927
- PM_STRING_FLAGS_FROZEN = 1 << 2,
1928
+ typedef enum pm_string_flags {
1929
+ PM_STRING_FLAGS_FROZEN = 1 << 0,
1928
1930
  } pm_string_flags_t;
1929
1931
 
1930
1932
  #define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS false
data/include/prism/node.h CHANGED
@@ -33,6 +33,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
33
33
  // declare them here to avoid generating them.
34
34
  typedef struct pm_scope_node {
35
35
  pm_node_t base;
36
+ pm_node_t *ast_node;
36
37
  struct pm_parameters_node *parameters;
37
38
  pm_node_t *body;
38
39
  pm_constant_id_list_t locals;
@@ -8,6 +8,7 @@
8
8
  #include "prism/util/pm_list.h"
9
9
  #include "prism/util/pm_newline_list.h"
10
10
  #include "prism/util/pm_state_stack.h"
11
+ #include "prism/util/pm_string.h"
11
12
 
12
13
  #include <stdbool.h>
13
14
 
@@ -172,6 +173,11 @@ typedef struct pm_lex_mode {
172
173
  // This is the pointer to the character where lexing should resume
173
174
  // once the heredoc has been completely processed.
174
175
  const uint8_t *next_start;
176
+
177
+ // This is used to track the amount of common whitespace on each
178
+ // line so that we know how much to dedent each line in the case of
179
+ // a tilde heredoc.
180
+ size_t common_whitespace;
175
181
  } heredoc;
176
182
  } as;
177
183
 
@@ -244,6 +250,16 @@ typedef struct pm_comment {
244
250
  pm_comment_type_t type;
245
251
  } pm_comment_t;
246
252
 
253
+ // This is a node in the linked list of magic comments that we've found while
254
+ // parsing.
255
+ typedef struct {
256
+ pm_list_node_t node;
257
+ const uint8_t *key_start;
258
+ const uint8_t *value_start;
259
+ uint32_t key_length;
260
+ uint32_t value_length;
261
+ } pm_magic_comment_t;
262
+
247
263
  // When the encoding that is being used to parse the source is changed by prism,
248
264
  // we provide the ability here to call out to a user-defined function.
249
265
  typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
@@ -293,6 +309,11 @@ typedef struct pm_scope {
293
309
  // This is necessary to determine if child blocks are allowed to use
294
310
  // numbered parameters.
295
311
  bool numbered_params;
312
+
313
+ // A transparent scope is a scope that cannot have locals set on itself.
314
+ // When a local is set on this scope, it will instead be set on the parent
315
+ // scope's local table.
316
+ bool transparent;
296
317
  } pm_scope_t;
297
318
 
298
319
  // This struct represents the overall parser. It contains a reference to the
@@ -342,6 +363,7 @@ struct pm_parser {
342
363
  const uint8_t *heredoc_end;
343
364
 
344
365
  pm_list_t comment_list; // the list of comments that have been found while parsing
366
+ pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing.
345
367
  pm_list_t warning_list; // the list of warnings that have been found while parsing
346
368
  pm_list_t error_list; // the list of errors that have been found while parsing
347
369
  pm_scope_t *current_scope; // the current local scope
@@ -388,6 +410,10 @@ struct pm_parser {
388
410
  // when we find tokens that we need it for.
389
411
  pm_node_flags_t integer_base;
390
412
 
413
+ // This string is used to pass information from the lexer to the parser. It
414
+ // is particularly necessary because of escape sequences.
415
+ pm_string_t current_string;
416
+
391
417
  // Whether or not we're at the beginning of a command
392
418
  bool command_start;
393
419
 
@@ -21,6 +21,9 @@ typedef struct {
21
21
  // Return the size of the pm_buffer_t struct.
22
22
  PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
23
23
 
24
+ // Initialize a pm_buffer_t with the given capacity.
25
+ bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
26
+
24
27
  // Initialize a pm_buffer_t with its default values.
25
28
  PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
26
29
 
@@ -63,6 +63,11 @@ typedef struct {
63
63
  // Initialize a new constant pool with a given capacity.
64
64
  bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
65
65
 
66
+ static inline pm_constant_t* pm_constant_pool_id_to_constant(pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
67
+ assert(constant_id > 0 && constant_id <= pool->size);
68
+ return &pool->constants[constant_id - 1];
69
+ }
70
+
66
71
  // Insert a constant into a constant pool that is a slice of a source string.
67
72
  // Returns the id of the constant, or 0 if any potential calls to resize fail.
68
73
  pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
@@ -11,9 +11,10 @@
11
11
 
12
12
  // This struct represents a string value.
13
13
  typedef struct {
14
- enum { PM_STRING_SHARED, PM_STRING_OWNED, PM_STRING_CONSTANT, PM_STRING_MAPPED } type;
15
14
  const uint8_t *source;
16
15
  size_t length;
16
+ // This field is not the first one, because otherwise things like .pm_string_t_field = 123/pm_constant_id_t does not warn or error
17
+ enum { PM_STRING_SHARED, PM_STRING_OWNED, PM_STRING_CONSTANT, PM_STRING_MAPPED } type;
17
18
  } pm_string_t;
18
19
 
19
20
  #define PM_EMPTY_STRING ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
@@ -1,4 +1,4 @@
1
1
  #define PRISM_VERSION_MAJOR 0
2
- #define PRISM_VERSION_MINOR 13
2
+ #define PRISM_VERSION_MINOR 14
3
3
  #define PRISM_VERSION_PATCH 0
4
- #define PRISM_VERSION "0.13.0"
4
+ #define PRISM_VERSION "0.14.0"
data/include/prism.h CHANGED
@@ -8,7 +8,6 @@
8
8
  #include "prism/pack.h"
9
9
  #include "prism/parser.h"
10
10
  #include "prism/regexp.h"
11
- #include "prism/unescape.h"
12
11
  #include "prism/util/pm_buffer.h"
13
12
  #include "prism/util/pm_char.h"
14
13
  #include "prism/util/pm_memchr.h"