yarp 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +76 -0
  3. data/CONTRIBUTING.md +51 -0
  4. data/LICENSE.md +7 -0
  5. data/Makefile.in +79 -0
  6. data/README.md +86 -0
  7. data/config.h.in +25 -0
  8. data/config.yml +2147 -0
  9. data/configure +4487 -0
  10. data/docs/build_system.md +85 -0
  11. data/docs/building.md +26 -0
  12. data/docs/configuration.md +56 -0
  13. data/docs/design.md +53 -0
  14. data/docs/encoding.md +116 -0
  15. data/docs/extension.md +20 -0
  16. data/docs/fuzzing.md +93 -0
  17. data/docs/heredocs.md +36 -0
  18. data/docs/mapping.md +117 -0
  19. data/docs/ripper.md +36 -0
  20. data/docs/serialization.md +130 -0
  21. data/docs/testing.md +55 -0
  22. data/ext/yarp/api_node.c +3680 -0
  23. data/ext/yarp/api_pack.c +256 -0
  24. data/ext/yarp/extconf.rb +131 -0
  25. data/ext/yarp/extension.c +547 -0
  26. data/ext/yarp/extension.h +18 -0
  27. data/include/yarp/ast.h +1412 -0
  28. data/include/yarp/defines.h +54 -0
  29. data/include/yarp/diagnostic.h +24 -0
  30. data/include/yarp/enc/yp_encoding.h +94 -0
  31. data/include/yarp/node.h +36 -0
  32. data/include/yarp/pack.h +141 -0
  33. data/include/yarp/parser.h +389 -0
  34. data/include/yarp/regexp.h +19 -0
  35. data/include/yarp/unescape.h +42 -0
  36. data/include/yarp/util/yp_buffer.h +39 -0
  37. data/include/yarp/util/yp_char.h +75 -0
  38. data/include/yarp/util/yp_constant_pool.h +64 -0
  39. data/include/yarp/util/yp_list.h +67 -0
  40. data/include/yarp/util/yp_memchr.h +14 -0
  41. data/include/yarp/util/yp_newline_list.h +54 -0
  42. data/include/yarp/util/yp_state_stack.h +24 -0
  43. data/include/yarp/util/yp_string.h +57 -0
  44. data/include/yarp/util/yp_string_list.h +28 -0
  45. data/include/yarp/util/yp_strpbrk.h +29 -0
  46. data/include/yarp/version.h +5 -0
  47. data/include/yarp.h +69 -0
  48. data/lib/yarp/lex_compat.rb +759 -0
  49. data/lib/yarp/node.rb +7428 -0
  50. data/lib/yarp/pack.rb +185 -0
  51. data/lib/yarp/ripper_compat.rb +174 -0
  52. data/lib/yarp/serialize.rb +389 -0
  53. data/lib/yarp.rb +330 -0
  54. data/src/diagnostic.c +25 -0
  55. data/src/enc/yp_big5.c +79 -0
  56. data/src/enc/yp_euc_jp.c +85 -0
  57. data/src/enc/yp_gbk.c +88 -0
  58. data/src/enc/yp_shift_jis.c +83 -0
  59. data/src/enc/yp_tables.c +509 -0
  60. data/src/enc/yp_unicode.c +2320 -0
  61. data/src/enc/yp_windows_31j.c +83 -0
  62. data/src/node.c +2011 -0
  63. data/src/pack.c +493 -0
  64. data/src/prettyprint.c +1782 -0
  65. data/src/regexp.c +580 -0
  66. data/src/serialize.c +1576 -0
  67. data/src/token_type.c +347 -0
  68. data/src/unescape.c +576 -0
  69. data/src/util/yp_buffer.c +78 -0
  70. data/src/util/yp_char.c +229 -0
  71. data/src/util/yp_constant_pool.c +147 -0
  72. data/src/util/yp_list.c +50 -0
  73. data/src/util/yp_memchr.c +31 -0
  74. data/src/util/yp_newline_list.c +119 -0
  75. data/src/util/yp_state_stack.c +25 -0
  76. data/src/util/yp_string.c +207 -0
  77. data/src/util/yp_string_list.c +32 -0
  78. data/src/util/yp_strncasecmp.c +20 -0
  79. data/src/util/yp_strpbrk.c +66 -0
  80. data/src/yarp.c +13211 -0
  81. data/yarp.gemspec +100 -0
  82. metadata +125 -0
@@ -0,0 +1,54 @@
1
+ #ifndef YARP_DEFINES_H
2
+ #define YARP_DEFINES_H
3
+
4
+ // This file should be included first by any *.h or *.c in YARP
5
+
6
+ #include "yarp/config.h"
7
+
8
+ #include <ctype.h>
9
+ #include <stdarg.h>
10
+ #include <stddef.h>
11
+ #include <stdio.h>
12
+ #include <string.h>
13
+
14
+ // YP_EXPORTED_FUNCTION
15
+ #ifndef YP_EXPORTED_FUNCTION
16
+ # ifdef YP_EXPORT_SYMBOLS
17
+ # ifdef _WIN32
18
+ # define YP_EXPORTED_FUNCTION __declspec(dllexport) extern
19
+ # else
20
+ # define YP_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
21
+ # endif
22
+ # else
23
+ # define YP_EXPORTED_FUNCTION
24
+ # endif
25
+ #endif
26
+
27
+ // YP_ATTRIBUTE_UNUSED
28
+ #if defined(__GNUC__)
29
+ # define YP_ATTRIBUTE_UNUSED __attribute__((unused))
30
+ #else
31
+ # define YP_ATTRIBUTE_UNUSED
32
+ #endif
33
+
34
+ // inline
35
+ #if defined(_MSC_VER) && !defined(inline)
36
+ # define inline __inline
37
+ #endif
38
+
39
+ int yp_strncasecmp(const char *string1, const char *string2, size_t length);
40
+
41
+ int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
42
+
43
+ #if defined(HAVE_SNPRINTF)
44
+ // We use snprintf if it's available
45
+ # define yp_snprintf snprintf
46
+
47
+ #else
48
+ // In case snprintf isn't present on the system, we provide our own that simply
49
+ // forwards to the less-safe sprintf.
50
+ # define yp_snprintf(dest, size, ...) sprintf((dest), __VA_ARGS__)
51
+
52
+ #endif
53
+
54
+ #endif
@@ -0,0 +1,24 @@
1
+ #ifndef YARP_DIAGNOSTIC_H
2
+ #define YARP_DIAGNOSTIC_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/util/yp_list.h"
6
+
7
+ #include <stdbool.h>
8
+ #include <stdlib.h>
9
+
10
+ // This struct represents a diagnostic found during parsing.
11
+ typedef struct {
12
+ yp_list_node_t node;
13
+ const char *start;
14
+ const char *end;
15
+ const char *message;
16
+ } yp_diagnostic_t;
17
+
18
+ // Append a diagnostic to the given list of diagnostics.
19
+ bool yp_diagnostic_list_append(yp_list_t *list, const char *start, const char *end, const char *message);
20
+
21
+ // Deallocate the internal state of the given diagnostic list.
22
+ void yp_diagnostic_list_free(yp_list_t *list);
23
+
24
+ #endif
@@ -0,0 +1,94 @@
1
+ #ifndef YARP_ENCODING_H
2
+ #define YARP_ENCODING_H
3
+
4
+ #include "yarp/defines.h"
5
+
6
+ #include <assert.h>
7
+ #include <stdbool.h>
8
+ #include <stddef.h>
9
+ #include <stdint.h>
10
+
11
+ // This struct defines the functions necessary to implement the encoding
12
+ // interface so we can determine how many bytes the subsequent character takes.
13
+ // Each callback should return the number of bytes, or 0 if the next bytes are
14
+ // invalid for the encoding and type.
15
+ typedef struct {
16
+ // Return the number of bytes that the next character takes if it is valid
17
+ // in the encoding. Does not read more than n bytes. It is assumed that n is
18
+ // at least 1.
19
+ size_t (*char_width)(const char *c, ptrdiff_t n);
20
+
21
+ // Return the number of bytes that the next character takes if it is valid
22
+ // in the encoding and is alphabetical. Does not read more than n bytes. It
23
+ // is assumed that n is at least 1.
24
+ size_t (*alpha_char)(const char *c, ptrdiff_t n);
25
+
26
+ // Return the number of bytes that the next character takes if it is valid
27
+ // in the encoding and is alphanumeric. Does not read more than n bytes. It
28
+ // is assumed that n is at least 1.
29
+ size_t (*alnum_char)(const char *c, ptrdiff_t n);
30
+
31
+ // Return true if the next character is valid in the encoding and is an
32
+ // uppercase character. Does not read more than n bytes. It is assumed that
33
+ // n is at least 1.
34
+ bool (*isupper_char)(const char *c, ptrdiff_t n);
35
+
36
+ // The name of the encoding. This should correspond to a value that can be
37
+ // passed to Encoding.find in Ruby.
38
+ const char *name;
39
+
40
+ // Return true if the encoding is a multibyte encoding.
41
+ bool multibyte;
42
+ } yp_encoding_t;
43
+
44
+ // These bits define the location of each bit of metadata within the various
45
+ // lookup tables that are used to determine the properties of a character.
46
+ #define YP_ENCODING_ALPHABETIC_BIT 1 << 0
47
+ #define YP_ENCODING_ALPHANUMERIC_BIT 1 << 1
48
+ #define YP_ENCODING_UPPERCASE_BIT 1 << 2
49
+
50
+ // These functions are reused by some other encodings, so they are defined here
51
+ // so they can be shared.
52
+ size_t yp_encoding_ascii_alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
53
+ size_t yp_encoding_ascii_alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
54
+ bool yp_encoding_ascii_isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
55
+
56
+ // These functions are shared between the actual encoding and the fast path in
57
+ // the parser so they need to be internally visible.
58
+ size_t yp_encoding_utf_8_alpha_char(const char *c, ptrdiff_t n);
59
+ size_t yp_encoding_utf_8_alnum_char(const char *c, ptrdiff_t n);
60
+
61
+ // This lookup table is referenced in both the UTF-8 encoding file and the
62
+ // parser directly in order to speed up the default encoding processing.
63
+ extern unsigned char yp_encoding_unicode_table[256];
64
+
65
+ // These are the encodings that are supported by the parser. They are defined in
66
+ // their own files in the src/enc directory.
67
+ extern yp_encoding_t yp_encoding_ascii;
68
+ extern yp_encoding_t yp_encoding_ascii_8bit;
69
+ extern yp_encoding_t yp_encoding_big5;
70
+ extern yp_encoding_t yp_encoding_euc_jp;
71
+ extern yp_encoding_t yp_encoding_gbk;
72
+ extern yp_encoding_t yp_encoding_iso_8859_1;
73
+ extern yp_encoding_t yp_encoding_iso_8859_2;
74
+ extern yp_encoding_t yp_encoding_iso_8859_3;
75
+ extern yp_encoding_t yp_encoding_iso_8859_4;
76
+ extern yp_encoding_t yp_encoding_iso_8859_5;
77
+ extern yp_encoding_t yp_encoding_iso_8859_6;
78
+ extern yp_encoding_t yp_encoding_iso_8859_7;
79
+ extern yp_encoding_t yp_encoding_iso_8859_8;
80
+ extern yp_encoding_t yp_encoding_iso_8859_9;
81
+ extern yp_encoding_t yp_encoding_iso_8859_10;
82
+ extern yp_encoding_t yp_encoding_iso_8859_11;
83
+ extern yp_encoding_t yp_encoding_iso_8859_13;
84
+ extern yp_encoding_t yp_encoding_iso_8859_14;
85
+ extern yp_encoding_t yp_encoding_iso_8859_15;
86
+ extern yp_encoding_t yp_encoding_iso_8859_16;
87
+ extern yp_encoding_t yp_encoding_koi8_r;
88
+ extern yp_encoding_t yp_encoding_shift_jis;
89
+ extern yp_encoding_t yp_encoding_utf_8;
90
+ extern yp_encoding_t yp_encoding_windows_31j;
91
+ extern yp_encoding_t yp_encoding_windows_1251;
92
+ extern yp_encoding_t yp_encoding_windows_1252;
93
+
94
+ #endif
@@ -0,0 +1,36 @@
1
+ #ifndef YARP_NODE_H
2
+ #define YARP_NODE_H
3
+
4
+ #include "yarp/defines.h"
5
+ #include "yarp/parser.h"
6
+
7
+ // Append a token to the given list.
8
+ void yp_location_list_append(yp_location_list_t *list, const yp_token_t *token);
9
+
10
+ // Append a new node onto the end of the node list.
11
+ void yp_node_list_append(yp_node_list_t *list, yp_node_t *node);
12
+
13
+ // Clear the node but preserves the location.
14
+ void yp_node_clear(yp_node_t *node);
15
+
16
+ // Deallocate a node and all of its children.
17
+ YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
18
+
19
+ // This struct stores the information gathered by the yp_node_memsize function.
20
+ // It contains both the memory footprint and additionally metadata about the
21
+ // shape of the tree.
22
+ typedef struct {
23
+ size_t memsize;
24
+ size_t node_count;
25
+ } yp_memsize_t;
26
+
27
+ // Calculates the memory footprint of a given node.
28
+ YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
29
+
30
+ // Returns a string representation of the given node type.
31
+ YP_EXPORTED_FUNCTION const char * yp_node_type_to_str(yp_node_type_t node_type);
32
+
33
+ #define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
34
+ #define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 })
35
+
36
+ #endif // YARP_NODE_H
@@ -0,0 +1,141 @@
1
+ #ifndef YARP_PACK_H
2
+ #define YARP_PACK_H
3
+
4
+ #include "yarp/defines.h"
5
+
6
+ #include <stdint.h>
7
+ #include <stdlib.h>
8
+
9
+ typedef enum yp_pack_version {
10
+ YP_PACK_VERSION_3_2_0
11
+ } yp_pack_version;
12
+
13
+ typedef enum yp_pack_variant {
14
+ YP_PACK_VARIANT_PACK,
15
+ YP_PACK_VARIANT_UNPACK
16
+ } yp_pack_variant;
17
+
18
+ typedef enum yp_pack_type {
19
+ YP_PACK_SPACE,
20
+ YP_PACK_COMMENT,
21
+ YP_PACK_INTEGER,
22
+ YP_PACK_UTF8,
23
+ YP_PACK_BER,
24
+ YP_PACK_FLOAT,
25
+ YP_PACK_STRING_SPACE_PADDED,
26
+ YP_PACK_STRING_NULL_PADDED,
27
+ YP_PACK_STRING_NULL_TERMINATED,
28
+ YP_PACK_STRING_MSB,
29
+ YP_PACK_STRING_LSB,
30
+ YP_PACK_STRING_HEX_HIGH,
31
+ YP_PACK_STRING_HEX_LOW,
32
+ YP_PACK_STRING_UU,
33
+ YP_PACK_STRING_MIME,
34
+ YP_PACK_STRING_BASE64,
35
+ YP_PACK_STRING_FIXED,
36
+ YP_PACK_STRING_POINTER,
37
+ YP_PACK_MOVE,
38
+ YP_PACK_BACK,
39
+ YP_PACK_NULL,
40
+ YP_PACK_END
41
+ } yp_pack_type;
42
+
43
+ typedef enum yp_pack_signed {
44
+ YP_PACK_UNSIGNED,
45
+ YP_PACK_SIGNED,
46
+ YP_PACK_SIGNED_NA
47
+ } yp_pack_signed;
48
+
49
+ typedef enum yp_pack_endian {
50
+ YP_PACK_AGNOSTIC_ENDIAN,
51
+ YP_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
52
+ YP_PACK_BIG_ENDIAN, // aka 'network', or 'N'
53
+ YP_PACK_NATIVE_ENDIAN,
54
+ YP_PACK_ENDIAN_NA
55
+ } yp_pack_endian;
56
+
57
+ typedef enum yp_pack_size {
58
+ YP_PACK_SIZE_SHORT,
59
+ YP_PACK_SIZE_INT,
60
+ YP_PACK_SIZE_LONG,
61
+ YP_PACK_SIZE_LONG_LONG,
62
+ YP_PACK_SIZE_8,
63
+ YP_PACK_SIZE_16,
64
+ YP_PACK_SIZE_32,
65
+ YP_PACK_SIZE_64,
66
+ YP_PACK_SIZE_P,
67
+ YP_PACK_SIZE_NA
68
+ } yp_pack_size;
69
+
70
+ typedef enum yp_pack_length_type {
71
+ YP_PACK_LENGTH_FIXED,
72
+ YP_PACK_LENGTH_MAX,
73
+ YP_PACK_LENGTH_RELATIVE, // special case for unpack @*
74
+ YP_PACK_LENGTH_NA
75
+ } yp_pack_length_type;
76
+
77
+ typedef enum yp_pack_encoding {
78
+ YP_PACK_ENCODING_START,
79
+ YP_PACK_ENCODING_ASCII_8BIT,
80
+ YP_PACK_ENCODING_US_ASCII,
81
+ YP_PACK_ENCODING_UTF_8
82
+ } yp_pack_encoding;
83
+
84
+ typedef enum yp_pack_result {
85
+ YP_PACK_OK,
86
+ YP_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
87
+ YP_PACK_ERROR_UNKNOWN_DIRECTIVE,
88
+ YP_PACK_ERROR_LENGTH_TOO_BIG,
89
+ YP_PACK_ERROR_BANG_NOT_ALLOWED,
90
+ YP_PACK_ERROR_DOUBLE_ENDIAN
91
+ } yp_pack_result;
92
+
93
+ // Parse a single directive from a pack or unpack format string.
94
+ //
95
+ // Parameters:
96
+ // - [in] yp_pack_version version the version of Ruby
97
+ // - [in] yp_pack_variant variant pack or unpack
98
+ // - [in out] const char **format the start of the next directive to parse
99
+ // on calling, and advanced beyond the parsed directive on return, or as
100
+ // much of it as was consumed until an error was encountered
101
+ // - [in] const char *format_end the end of the format string
102
+ // - [out] yp_pack_type *type the type of the directive
103
+ // - [out] yp_pack_signed *signed_type
104
+ // whether the value is signed
105
+ // - [out] yp_pack_endian *endian the endianness of the value
106
+ // - [out] yp_pack_size *size the size of the value
107
+ // - [out] yp_pack_length_type *length_type
108
+ // what kind of length is specified
109
+ // - [out] size_t *length the length of the directive
110
+ // - [in out] yp_pack_encoding *encoding
111
+ // takes the current encoding of the string
112
+ // which would result from parsing the whole format string, and returns a
113
+ // possibly changed directive - the encoding should be
114
+ // YP_PACK_ENCODING_START when yp_pack_parse is called for the first
115
+ // directive in a format string
116
+ //
117
+ // Return:
118
+ // - YP_PACK_OK on success
119
+ // - YP_PACK_ERROR_* on error
120
+ //
121
+ // Notes:
122
+ // Consult Ruby documentation for the meaning of directives.
123
+ YP_EXPORTED_FUNCTION yp_pack_result
124
+ yp_pack_parse(
125
+ yp_pack_variant variant_arg,
126
+ const char **format,
127
+ const char *format_end,
128
+ yp_pack_type *type,
129
+ yp_pack_signed *signed_type,
130
+ yp_pack_endian *endian,
131
+ yp_pack_size *size,
132
+ yp_pack_length_type *length_type,
133
+ uint64_t *length,
134
+ yp_pack_encoding *encoding
135
+ );
136
+
137
+ // YARP abstracts sizes away from the native system - this converts an abstract
138
+ // size to a native size.
139
+ YP_EXPORTED_FUNCTION size_t yp_size_to_native(yp_pack_size size);
140
+
141
+ #endif