yarp 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
@@ -0,0 +1,54 @@
1
+ #ifndef YARP_DEFINES_H
2
+ #define YARP_DEFINES_H
3
+
4
+ // This file should be included first by any *.h or *.c in YARP
5
+
6
+ #include "yarp/config.h"
7
+
8
+ #include <ctype.h>
9
+ #include <stdarg.h>
10
+ #include <stddef.h>
11
+ #include <stdio.h>
12
+ #include <string.h>
13
+
14
+ // YP_EXPORTED_FUNCTION
15
+ #ifndef YP_EXPORTED_FUNCTION
16
+ # ifdef YP_EXPORT_SYMBOLS
17
+ # ifdef _WIN32
18
+ # define YP_EXPORTED_FUNCTION __declspec(dllexport) extern
19
+ # else
20
+ # define YP_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
21
+ # endif
22
+ # else
23
+ # define YP_EXPORTED_FUNCTION
24
+ # endif
25
+ #endif
26
+
27
+ // YP_ATTRIBUTE_UNUSED
28
+ #if defined(__GNUC__)
29
+ # define YP_ATTRIBUTE_UNUSED __attribute__((unused))
30
+ #else
31
+ # define YP_ATTRIBUTE_UNUSED
32
+ #endif
33
+
34
+ // inline
35
+ #if defined(_MSC_VER) && !defined(inline)
36
+ # define inline __inline
37
+ #endif
38
+
39
+ int yp_strncasecmp(const char *string1, const char *string2, size_t length);
40
+
41
+ int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
42
+
43
+ #if defined(HAVE_SNPRINTF)
44
+ // We use snprintf if it's available
45
+ # define yp_snprintf snprintf
46
+
47
+ #else
48
+ // In case snprintf isn't present on the system, we provide our own that simply
49
+ // forwards to the less-safe sprintf.
50
+ # define yp_snprintf(dest, size, ...) sprintf((dest), __VA_ARGS__)
51
+
52
+ #endif
53
+
54
+ #endif
@@ -0,0 +1,24 @@
1
+ #ifndef YARP_DIAGNOSTIC_H
2
+ #define YARP_DIAGNOSTIC_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/util/yp_list.h"
6
+
7
+ #include <stdbool.h>
8
+ #include <stdlib.h>
9
+
10
+ // This struct represents a diagnostic found during parsing.
11
+ typedef struct {
12
+ yp_list_node_t node;
13
+ const char *start;
14
+ const char *end;
15
+ const char *message;
16
+ } yp_diagnostic_t;
17
+
18
+ // Append a diagnostic to the given list of diagnostics.
19
+ bool yp_diagnostic_list_append(yp_list_t *list, const char *start, const char *end, const char *message);
20
+
21
+ // Deallocate the internal state of the given diagnostic list.
22
+ void yp_diagnostic_list_free(yp_list_t *list);
23
+
24
+ #endif
@@ -0,0 +1,94 @@
1
+ #ifndef YARP_ENCODING_H
2
+ #define YARP_ENCODING_H
3
+
4
+ #include "yarp/defines.h"
5
+
6
+ #include <assert.h>
7
+ #include <stdbool.h>
8
+ #include <stddef.h>
9
+ #include <stdint.h>
10
+
11
+ // This struct defines the functions necessary to implement the encoding
12
+ // interface so we can determine how many bytes the subsequent character takes.
13
+ // Each callback should return the number of bytes, or 0 if the next bytes are
14
+ // invalid for the encoding and type.
15
+ typedef struct {
16
+ // Return the number of bytes that the next character takes if it is valid
17
+ // in the encoding. Does not read more than n bytes. It is assumed that n is
18
+ // at least 1.
19
+ size_t (*char_width)(const char *c, ptrdiff_t n);
20
+
21
+ // Return the number of bytes that the next character takes if it is valid
22
+ // in the encoding and is alphabetical. Does not read more than n bytes. It
23
+ // is assumed that n is at least 1.
24
+ size_t (*alpha_char)(const char *c, ptrdiff_t n);
25
+
26
+ // Return the number of bytes that the next character takes if it is valid
27
+ // in the encoding and is alphanumeric. Does not read more than n bytes. It
28
+ // is assumed that n is at least 1.
29
+ size_t (*alnum_char)(const char *c, ptrdiff_t n);
30
+
31
+ // Return true if the next character is valid in the encoding and is an
32
+ // uppercase character. Does not read more than n bytes. It is assumed that
33
+ // n is at least 1.
34
+ bool (*isupper_char)(const char *c, ptrdiff_t n);
35
+
36
+ // The name of the encoding. This should correspond to a value that can be
37
+ // passed to Encoding.find in Ruby.
38
+ const char *name;
39
+
40
+ // Return true if the encoding is a multibyte encoding.
41
+ bool multibyte;
42
+ } yp_encoding_t;
43
+
44
+ // These bits define the location of each bit of metadata within the various
45
+ // lookup tables that are used to determine the properties of a character.
46
+ #define YP_ENCODING_ALPHABETIC_BIT 1 << 0
47
+ #define YP_ENCODING_ALPHANUMERIC_BIT 1 << 1
48
+ #define YP_ENCODING_UPPERCASE_BIT 1 << 2
49
+
50
+ // These functions are reused by some other encodings, so they are defined here
51
+ // so they can be shared.
52
+ size_t yp_encoding_ascii_alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
53
+ size_t yp_encoding_ascii_alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
54
+ bool yp_encoding_ascii_isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
55
+
56
+ // These functions are shared between the actual encoding and the fast path in
57
+ // the parser so they need to be internally visible.
58
+ size_t yp_encoding_utf_8_alpha_char(const char *c, ptrdiff_t n);
59
+ size_t yp_encoding_utf_8_alnum_char(const char *c, ptrdiff_t n);
60
+
61
+ // This lookup table is referenced in both the UTF-8 encoding file and the
62
+ // parser directly in order to speed up the default encoding processing.
63
+ extern unsigned char yp_encoding_unicode_table[256];
64
+
65
+ // These are the encodings that are supported by the parser. They are defined in
66
+ // their own files in the src/enc directory.
67
+ extern yp_encoding_t yp_encoding_ascii;
68
+ extern yp_encoding_t yp_encoding_ascii_8bit;
69
+ extern yp_encoding_t yp_encoding_big5;
70
+ extern yp_encoding_t yp_encoding_euc_jp;
71
+ extern yp_encoding_t yp_encoding_gbk;
72
+ extern yp_encoding_t yp_encoding_iso_8859_1;
73
+ extern yp_encoding_t yp_encoding_iso_8859_2;
74
+ extern yp_encoding_t yp_encoding_iso_8859_3;
75
+ extern yp_encoding_t yp_encoding_iso_8859_4;
76
+ extern yp_encoding_t yp_encoding_iso_8859_5;
77
+ extern yp_encoding_t yp_encoding_iso_8859_6;
78
+ extern yp_encoding_t yp_encoding_iso_8859_7;
79
+ extern yp_encoding_t yp_encoding_iso_8859_8;
80
+ extern yp_encoding_t yp_encoding_iso_8859_9;
81
+ extern yp_encoding_t yp_encoding_iso_8859_10;
82
+ extern yp_encoding_t yp_encoding_iso_8859_11;
83
+ extern yp_encoding_t yp_encoding_iso_8859_13;
84
+ extern yp_encoding_t yp_encoding_iso_8859_14;
85
+ extern yp_encoding_t yp_encoding_iso_8859_15;
86
+ extern yp_encoding_t yp_encoding_iso_8859_16;
87
+ extern yp_encoding_t yp_encoding_koi8_r;
88
+ extern yp_encoding_t yp_encoding_shift_jis;
89
+ extern yp_encoding_t yp_encoding_utf_8;
90
+ extern yp_encoding_t yp_encoding_windows_31j;
91
+ extern yp_encoding_t yp_encoding_windows_1251;
92
+ extern yp_encoding_t yp_encoding_windows_1252;
93
+
94
+ #endif
@@ -0,0 +1,36 @@
1
+ #ifndef YARP_NODE_H
2
+ #define YARP_NODE_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/parser.h"
6
+
7
+ // Append a token to the given list.
8
+ void yp_location_list_append(yp_location_list_t *list, const yp_token_t *token);
9
+
10
+ // Append a new node onto the end of the node list.
11
+ void yp_node_list_append(yp_node_list_t *list, yp_node_t *node);
12
+
13
+ // Clear the node but preserves the location.
14
+ void yp_node_clear(yp_node_t *node);
15
+
16
+ // Deallocate a node and all of its children.
17
+ YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
18
+
19
+ // This struct stores the information gathered by the yp_node_memsize function.
20
+ // It contains both the memory footprint and additionally metadata about the
21
+ // shape of the tree.
22
+ typedef struct {
23
+ size_t memsize;
24
+ size_t node_count;
25
+ } yp_memsize_t;
26
+
27
+ // Calculates the memory footprint of a given node.
28
+ YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
29
+
30
+ // Returns a string representation of the given node type.
31
+ YP_EXPORTED_FUNCTION const char * yp_node_type_to_str(yp_node_type_t node_type);
32
+
33
+ #define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
34
+ #define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 })
35
+
36
+ #endif // YARP_NODE_H
@@ -0,0 +1,141 @@
1
+ #ifndef YARP_PACK_H
2
+ #define YARP_PACK_H
3
+
4
+ #include "yarp/defines.h"
5
+
6
+ #include <stdint.h>
7
+ #include <stdlib.h>
8
+
9
+ typedef enum yp_pack_version {
10
+ YP_PACK_VERSION_3_2_0
11
+ } yp_pack_version;
12
+
13
+ typedef enum yp_pack_variant {
14
+ YP_PACK_VARIANT_PACK,
15
+ YP_PACK_VARIANT_UNPACK
16
+ } yp_pack_variant;
17
+
18
+ typedef enum yp_pack_type {
19
+ YP_PACK_SPACE,
20
+ YP_PACK_COMMENT,
21
+ YP_PACK_INTEGER,
22
+ YP_PACK_UTF8,
23
+ YP_PACK_BER,
24
+ YP_PACK_FLOAT,
25
+ YP_PACK_STRING_SPACE_PADDED,
26
+ YP_PACK_STRING_NULL_PADDED,
27
+ YP_PACK_STRING_NULL_TERMINATED,
28
+ YP_PACK_STRING_MSB,
29
+ YP_PACK_STRING_LSB,
30
+ YP_PACK_STRING_HEX_HIGH,
31
+ YP_PACK_STRING_HEX_LOW,
32
+ YP_PACK_STRING_UU,
33
+ YP_PACK_STRING_MIME,
34
+ YP_PACK_STRING_BASE64,
35
+ YP_PACK_STRING_FIXED,
36
+ YP_PACK_STRING_POINTER,
37
+ YP_PACK_MOVE,
38
+ YP_PACK_BACK,
39
+ YP_PACK_NULL,
40
+ YP_PACK_END
41
+ } yp_pack_type;
42
+
43
+ typedef enum yp_pack_signed {
44
+ YP_PACK_UNSIGNED,
45
+ YP_PACK_SIGNED,
46
+ YP_PACK_SIGNED_NA
47
+ } yp_pack_signed;
48
+
49
+ typedef enum yp_pack_endian {
50
+ YP_PACK_AGNOSTIC_ENDIAN,
51
+ YP_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
52
+ YP_PACK_BIG_ENDIAN, // aka 'network', or 'N'
53
+ YP_PACK_NATIVE_ENDIAN,
54
+ YP_PACK_ENDIAN_NA
55
+ } yp_pack_endian;
56
+
57
+ typedef enum yp_pack_size {
58
+ YP_PACK_SIZE_SHORT,
59
+ YP_PACK_SIZE_INT,
60
+ YP_PACK_SIZE_LONG,
61
+ YP_PACK_SIZE_LONG_LONG,
62
+ YP_PACK_SIZE_8,
63
+ YP_PACK_SIZE_16,
64
+ YP_PACK_SIZE_32,
65
+ YP_PACK_SIZE_64,
66
+ YP_PACK_SIZE_P,
67
+ YP_PACK_SIZE_NA
68
+ } yp_pack_size;
69
+
70
+ typedef enum yp_pack_length_type {
71
+ YP_PACK_LENGTH_FIXED,
72
+ YP_PACK_LENGTH_MAX,
73
+ YP_PACK_LENGTH_RELATIVE, // special case for unpack @*
74
+ YP_PACK_LENGTH_NA
75
+ } yp_pack_length_type;
76
+
77
+ typedef enum yp_pack_encoding {
78
+ YP_PACK_ENCODING_START,
79
+ YP_PACK_ENCODING_ASCII_8BIT,
80
+ YP_PACK_ENCODING_US_ASCII,
81
+ YP_PACK_ENCODING_UTF_8
82
+ } yp_pack_encoding;
83
+
84
+ typedef enum yp_pack_result {
85
+ YP_PACK_OK,
86
+ YP_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
87
+ YP_PACK_ERROR_UNKNOWN_DIRECTIVE,
88
+ YP_PACK_ERROR_LENGTH_TOO_BIG,
89
+ YP_PACK_ERROR_BANG_NOT_ALLOWED,
90
+ YP_PACK_ERROR_DOUBLE_ENDIAN
91
+ } yp_pack_result;
92
+
93
+ // Parse a single directive from a pack or unpack format string.
94
+ //
95
+ // Parameters:
96
+ // - [in] yp_pack_version version the version of Ruby
97
+ // - [in] yp_pack_variant variant pack or unpack
98
+ // - [in out] const char **format the start of the next directive to parse
99
+ // on calling, and advanced beyond the parsed directive on return, or as
100
+ // much of it as was consumed until an error was encountered
101
+ // - [in] const char *format_end the end of the format string
102
+ // - [out] yp_pack_type *type the type of the directive
103
+ // - [out] yp_pack_signed *signed_type
104
+ // whether the value is signed
105
+ // - [out] yp_pack_endian *endian the endianness of the value
106
+ // - [out] yp_pack_size *size the size of the value
107
+ // - [out] yp_pack_length_type *length_type
108
+ // what kind of length is specified
109
+ // - [out] size_t *length the length of the directive
110
+ // - [in out] yp_pack_encoding *encoding
111
+ // takes the current encoding of the string
112
+ // which would result from parsing the whole format string, and returns a
113
+ // possibly changed directive - the encoding should be
114
+ // YP_PACK_ENCODING_START when yp_pack_parse is called for the first
115
+ // directive in a format string
116
+ //
117
+ // Return:
118
+ // - YP_PACK_OK on success
119
+ // - YP_PACK_ERROR_* on error
120
+ //
121
+ // Notes:
122
+ // Consult Ruby documentation for the meaning of directives.
123
+ YP_EXPORTED_FUNCTION yp_pack_result
124
+ yp_pack_parse(
125
+ yp_pack_variant variant_arg,
126
+ const char **format,
127
+ const char *format_end,
128
+ yp_pack_type *type,
129
+ yp_pack_signed *signed_type,
130
+ yp_pack_endian *endian,
131
+ yp_pack_size *size,
132
+ yp_pack_length_type *length_type,
133
+ uint64_t *length,
134
+ yp_pack_encoding *encoding
135
+ );
136
+
137
+ // YARP abstracts sizes away from the native system - this converts an abstract
138
+ // size to a native size.
139
+ YP_EXPORTED_FUNCTION size_t yp_size_to_native(yp_pack_size size);
140
+
141
+ #endif