yarp 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +51 -0
- data/LICENSE.md +7 -0
- data/Makefile.in +79 -0
- data/README.md +86 -0
- data/config.h.in +25 -0
- data/config.yml +2147 -0
- data/configure +4487 -0
- data/docs/build_system.md +85 -0
- data/docs/building.md +26 -0
- data/docs/configuration.md +56 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +116 -0
- data/docs/extension.md +20 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/serialization.md +130 -0
- data/docs/testing.md +55 -0
- data/ext/yarp/api_node.c +3680 -0
- data/ext/yarp/api_pack.c +256 -0
- data/ext/yarp/extconf.rb +131 -0
- data/ext/yarp/extension.c +547 -0
- data/ext/yarp/extension.h +18 -0
- data/include/yarp/ast.h +1412 -0
- data/include/yarp/defines.h +54 -0
- data/include/yarp/diagnostic.h +24 -0
- data/include/yarp/enc/yp_encoding.h +94 -0
- data/include/yarp/node.h +36 -0
- data/include/yarp/pack.h +141 -0
- data/include/yarp/parser.h +389 -0
- data/include/yarp/regexp.h +19 -0
- data/include/yarp/unescape.h +42 -0
- data/include/yarp/util/yp_buffer.h +39 -0
- data/include/yarp/util/yp_char.h +75 -0
- data/include/yarp/util/yp_constant_pool.h +64 -0
- data/include/yarp/util/yp_list.h +67 -0
- data/include/yarp/util/yp_memchr.h +14 -0
- data/include/yarp/util/yp_newline_list.h +54 -0
- data/include/yarp/util/yp_state_stack.h +24 -0
- data/include/yarp/util/yp_string.h +57 -0
- data/include/yarp/util/yp_string_list.h +28 -0
- data/include/yarp/util/yp_strpbrk.h +29 -0
- data/include/yarp/version.h +5 -0
- data/include/yarp.h +69 -0
- data/lib/yarp/lex_compat.rb +759 -0
- data/lib/yarp/node.rb +7428 -0
- data/lib/yarp/pack.rb +185 -0
- data/lib/yarp/ripper_compat.rb +174 -0
- data/lib/yarp/serialize.rb +389 -0
- data/lib/yarp.rb +330 -0
- data/src/diagnostic.c +25 -0
- data/src/enc/yp_big5.c +79 -0
- data/src/enc/yp_euc_jp.c +85 -0
- data/src/enc/yp_gbk.c +88 -0
- data/src/enc/yp_shift_jis.c +83 -0
- data/src/enc/yp_tables.c +509 -0
- data/src/enc/yp_unicode.c +2320 -0
- data/src/enc/yp_windows_31j.c +83 -0
- data/src/node.c +2011 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +1782 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1576 -0
- data/src/token_type.c +347 -0
- data/src/unescape.c +576 -0
- data/src/util/yp_buffer.c +78 -0
- data/src/util/yp_char.c +229 -0
- data/src/util/yp_constant_pool.c +147 -0
- data/src/util/yp_list.c +50 -0
- data/src/util/yp_memchr.c +31 -0
- data/src/util/yp_newline_list.c +119 -0
- data/src/util/yp_state_stack.c +25 -0
- data/src/util/yp_string.c +207 -0
- data/src/util/yp_string_list.c +32 -0
- data/src/util/yp_strncasecmp.c +20 -0
- data/src/util/yp_strpbrk.c +66 -0
- data/src/yarp.c +13211 -0
- data/yarp.gemspec +100 -0
- metadata +125 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
#ifndef YARP_DEFINES_H
|
2
|
+
#define YARP_DEFINES_H
|
3
|
+
|
4
|
+
// This file should be included first by any *.h or *.c in YARP
|
5
|
+
|
6
|
+
#include "yarp/config.h"
|
7
|
+
|
8
|
+
#include <ctype.h>
|
9
|
+
#include <stdarg.h>
|
10
|
+
#include <stddef.h>
|
11
|
+
#include <stdio.h>
|
12
|
+
#include <string.h>
|
13
|
+
|
14
|
+
// YP_EXPORTED_FUNCTION
|
15
|
+
#ifndef YP_EXPORTED_FUNCTION
|
16
|
+
# ifdef YP_EXPORT_SYMBOLS
|
17
|
+
# ifdef _WIN32
|
18
|
+
# define YP_EXPORTED_FUNCTION __declspec(dllexport) extern
|
19
|
+
# else
|
20
|
+
# define YP_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
|
21
|
+
# endif
|
22
|
+
# else
|
23
|
+
# define YP_EXPORTED_FUNCTION
|
24
|
+
# endif
|
25
|
+
#endif
|
26
|
+
|
27
|
+
// YP_ATTRIBUTE_UNUSED
|
28
|
+
#if defined(__GNUC__)
|
29
|
+
# define YP_ATTRIBUTE_UNUSED __attribute__((unused))
|
30
|
+
#else
|
31
|
+
# define YP_ATTRIBUTE_UNUSED
|
32
|
+
#endif
|
33
|
+
|
34
|
+
// inline
|
35
|
+
#if defined(_MSC_VER) && !defined(inline)
|
36
|
+
# define inline __inline
|
37
|
+
#endif
|
38
|
+
|
39
|
+
int yp_strncasecmp(const char *string1, const char *string2, size_t length);
|
40
|
+
|
41
|
+
int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
|
42
|
+
|
43
|
+
#if defined(HAVE_SNPRINTF)
|
44
|
+
// We use snprintf if it's available
|
45
|
+
# define yp_snprintf snprintf
|
46
|
+
|
47
|
+
#else
|
48
|
+
// In case snprintf isn't present on the system, we provide our own that simply
|
49
|
+
// forwards to the less-safe sprintf.
|
50
|
+
# define yp_snprintf(dest, size, ...) sprintf((dest), __VA_ARGS__)
|
51
|
+
|
52
|
+
#endif
|
53
|
+
|
54
|
+
#endif
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#ifndef YARP_DIAGNOSTIC_H
|
2
|
+
#define YARP_DIAGNOSTIC_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
#include "yarp/util/yp_list.h"
|
6
|
+
|
7
|
+
#include <stdbool.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
|
10
|
+
// This struct represents a diagnostic found during parsing.
|
11
|
+
typedef struct {
|
12
|
+
yp_list_node_t node;
|
13
|
+
const char *start;
|
14
|
+
const char *end;
|
15
|
+
const char *message;
|
16
|
+
} yp_diagnostic_t;
|
17
|
+
|
18
|
+
// Append a diagnostic to the given list of diagnostics.
|
19
|
+
bool yp_diagnostic_list_append(yp_list_t *list, const char *start, const char *end, const char *message);
|
20
|
+
|
21
|
+
// Deallocate the internal state of the given diagnostic list.
|
22
|
+
void yp_diagnostic_list_free(yp_list_t *list);
|
23
|
+
|
24
|
+
#endif
|
@@ -0,0 +1,94 @@
|
|
1
|
+
#ifndef YARP_ENCODING_H
|
2
|
+
#define YARP_ENCODING_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
|
6
|
+
#include <assert.h>
|
7
|
+
#include <stdbool.h>
|
8
|
+
#include <stddef.h>
|
9
|
+
#include <stdint.h>
|
10
|
+
|
11
|
+
// This struct defines the functions necessary to implement the encoding
|
12
|
+
// interface so we can determine how many bytes the subsequent character takes.
|
13
|
+
// Each callback should return the number of bytes, or 0 if the next bytes are
|
14
|
+
// invalid for the encoding and type.
|
15
|
+
typedef struct {
|
16
|
+
// Return the number of bytes that the next character takes if it is valid
|
17
|
+
// in the encoding. Does not read more than n bytes. It is assumed that n is
|
18
|
+
// at least 1.
|
19
|
+
size_t (*char_width)(const char *c, ptrdiff_t n);
|
20
|
+
|
21
|
+
// Return the number of bytes that the next character takes if it is valid
|
22
|
+
// in the encoding and is alphabetical. Does not read more than n bytes. It
|
23
|
+
// is assumed that n is at least 1.
|
24
|
+
size_t (*alpha_char)(const char *c, ptrdiff_t n);
|
25
|
+
|
26
|
+
// Return the number of bytes that the next character takes if it is valid
|
27
|
+
// in the encoding and is alphanumeric. Does not read more than n bytes. It
|
28
|
+
// is assumed that n is at least 1.
|
29
|
+
size_t (*alnum_char)(const char *c, ptrdiff_t n);
|
30
|
+
|
31
|
+
// Return true if the next character is valid in the encoding and is an
|
32
|
+
// uppercase character. Does not read more than n bytes. It is assumed that
|
33
|
+
// n is at least 1.
|
34
|
+
bool (*isupper_char)(const char *c, ptrdiff_t n);
|
35
|
+
|
36
|
+
// The name of the encoding. This should correspond to a value that can be
|
37
|
+
// passed to Encoding.find in Ruby.
|
38
|
+
const char *name;
|
39
|
+
|
40
|
+
// Return true if the encoding is a multibyte encoding.
|
41
|
+
bool multibyte;
|
42
|
+
} yp_encoding_t;
|
43
|
+
|
44
|
+
// These bits define the location of each bit of metadata within the various
|
45
|
+
// lookup tables that are used to determine the properties of a character.
|
46
|
+
#define YP_ENCODING_ALPHABETIC_BIT 1 << 0
|
47
|
+
#define YP_ENCODING_ALPHANUMERIC_BIT 1 << 1
|
48
|
+
#define YP_ENCODING_UPPERCASE_BIT 1 << 2
|
49
|
+
|
50
|
+
// These functions are reused by some other encodings, so they are defined here
|
51
|
+
// so they can be shared.
|
52
|
+
size_t yp_encoding_ascii_alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
53
|
+
size_t yp_encoding_ascii_alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
54
|
+
bool yp_encoding_ascii_isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
55
|
+
|
56
|
+
// These functions are shared between the actual encoding and the fast path in
|
57
|
+
// the parser so they need to be internally visible.
|
58
|
+
size_t yp_encoding_utf_8_alpha_char(const char *c, ptrdiff_t n);
|
59
|
+
size_t yp_encoding_utf_8_alnum_char(const char *c, ptrdiff_t n);
|
60
|
+
|
61
|
+
// This lookup table is referenced in both the UTF-8 encoding file and the
|
62
|
+
// parser directly in order to speed up the default encoding processing.
|
63
|
+
extern unsigned char yp_encoding_unicode_table[256];
|
64
|
+
|
65
|
+
// These are the encodings that are supported by the parser. They are defined in
|
66
|
+
// their own files in the src/enc directory.
|
67
|
+
extern yp_encoding_t yp_encoding_ascii;
|
68
|
+
extern yp_encoding_t yp_encoding_ascii_8bit;
|
69
|
+
extern yp_encoding_t yp_encoding_big5;
|
70
|
+
extern yp_encoding_t yp_encoding_euc_jp;
|
71
|
+
extern yp_encoding_t yp_encoding_gbk;
|
72
|
+
extern yp_encoding_t yp_encoding_iso_8859_1;
|
73
|
+
extern yp_encoding_t yp_encoding_iso_8859_2;
|
74
|
+
extern yp_encoding_t yp_encoding_iso_8859_3;
|
75
|
+
extern yp_encoding_t yp_encoding_iso_8859_4;
|
76
|
+
extern yp_encoding_t yp_encoding_iso_8859_5;
|
77
|
+
extern yp_encoding_t yp_encoding_iso_8859_6;
|
78
|
+
extern yp_encoding_t yp_encoding_iso_8859_7;
|
79
|
+
extern yp_encoding_t yp_encoding_iso_8859_8;
|
80
|
+
extern yp_encoding_t yp_encoding_iso_8859_9;
|
81
|
+
extern yp_encoding_t yp_encoding_iso_8859_10;
|
82
|
+
extern yp_encoding_t yp_encoding_iso_8859_11;
|
83
|
+
extern yp_encoding_t yp_encoding_iso_8859_13;
|
84
|
+
extern yp_encoding_t yp_encoding_iso_8859_14;
|
85
|
+
extern yp_encoding_t yp_encoding_iso_8859_15;
|
86
|
+
extern yp_encoding_t yp_encoding_iso_8859_16;
|
87
|
+
extern yp_encoding_t yp_encoding_koi8_r;
|
88
|
+
extern yp_encoding_t yp_encoding_shift_jis;
|
89
|
+
extern yp_encoding_t yp_encoding_utf_8;
|
90
|
+
extern yp_encoding_t yp_encoding_windows_31j;
|
91
|
+
extern yp_encoding_t yp_encoding_windows_1251;
|
92
|
+
extern yp_encoding_t yp_encoding_windows_1252;
|
93
|
+
|
94
|
+
#endif
|
data/include/yarp/node.h
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#ifndef YARP_NODE_H
|
2
|
+
#define YARP_NODE_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
#include "yarp/parser.h"
|
6
|
+
|
7
|
+
// Append a token to the given list.
|
8
|
+
void yp_location_list_append(yp_location_list_t *list, const yp_token_t *token);
|
9
|
+
|
10
|
+
// Append a new node onto the end of the node list.
|
11
|
+
void yp_node_list_append(yp_node_list_t *list, yp_node_t *node);
|
12
|
+
|
13
|
+
// Clear the node but preserves the location.
|
14
|
+
void yp_node_clear(yp_node_t *node);
|
15
|
+
|
16
|
+
// Deallocate a node and all of its children.
|
17
|
+
YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
|
18
|
+
|
19
|
+
// This struct stores the information gathered by the yp_node_memsize function.
|
20
|
+
// It contains both the memory footprint and additionally metadata about the
|
21
|
+
// shape of the tree.
|
22
|
+
typedef struct {
|
23
|
+
size_t memsize;
|
24
|
+
size_t node_count;
|
25
|
+
} yp_memsize_t;
|
26
|
+
|
27
|
+
// Calculates the memory footprint of a given node.
|
28
|
+
YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
|
29
|
+
|
30
|
+
// Returns a string representation of the given node type.
|
31
|
+
YP_EXPORTED_FUNCTION const char * yp_node_type_to_str(yp_node_type_t node_type);
|
32
|
+
|
33
|
+
#define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
|
34
|
+
#define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 })
|
35
|
+
|
36
|
+
#endif // YARP_NODE_H
|
data/include/yarp/pack.h
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
#ifndef YARP_PACK_H
|
2
|
+
#define YARP_PACK_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
|
6
|
+
#include <stdint.h>
|
7
|
+
#include <stdlib.h>
|
8
|
+
|
9
|
+
typedef enum yp_pack_version {
|
10
|
+
YP_PACK_VERSION_3_2_0
|
11
|
+
} yp_pack_version;
|
12
|
+
|
13
|
+
typedef enum yp_pack_variant {
|
14
|
+
YP_PACK_VARIANT_PACK,
|
15
|
+
YP_PACK_VARIANT_UNPACK
|
16
|
+
} yp_pack_variant;
|
17
|
+
|
18
|
+
typedef enum yp_pack_type {
|
19
|
+
YP_PACK_SPACE,
|
20
|
+
YP_PACK_COMMENT,
|
21
|
+
YP_PACK_INTEGER,
|
22
|
+
YP_PACK_UTF8,
|
23
|
+
YP_PACK_BER,
|
24
|
+
YP_PACK_FLOAT,
|
25
|
+
YP_PACK_STRING_SPACE_PADDED,
|
26
|
+
YP_PACK_STRING_NULL_PADDED,
|
27
|
+
YP_PACK_STRING_NULL_TERMINATED,
|
28
|
+
YP_PACK_STRING_MSB,
|
29
|
+
YP_PACK_STRING_LSB,
|
30
|
+
YP_PACK_STRING_HEX_HIGH,
|
31
|
+
YP_PACK_STRING_HEX_LOW,
|
32
|
+
YP_PACK_STRING_UU,
|
33
|
+
YP_PACK_STRING_MIME,
|
34
|
+
YP_PACK_STRING_BASE64,
|
35
|
+
YP_PACK_STRING_FIXED,
|
36
|
+
YP_PACK_STRING_POINTER,
|
37
|
+
YP_PACK_MOVE,
|
38
|
+
YP_PACK_BACK,
|
39
|
+
YP_PACK_NULL,
|
40
|
+
YP_PACK_END
|
41
|
+
} yp_pack_type;
|
42
|
+
|
43
|
+
typedef enum yp_pack_signed {
|
44
|
+
YP_PACK_UNSIGNED,
|
45
|
+
YP_PACK_SIGNED,
|
46
|
+
YP_PACK_SIGNED_NA
|
47
|
+
} yp_pack_signed;
|
48
|
+
|
49
|
+
typedef enum yp_pack_endian {
|
50
|
+
YP_PACK_AGNOSTIC_ENDIAN,
|
51
|
+
YP_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
|
52
|
+
YP_PACK_BIG_ENDIAN, // aka 'network', or 'N'
|
53
|
+
YP_PACK_NATIVE_ENDIAN,
|
54
|
+
YP_PACK_ENDIAN_NA
|
55
|
+
} yp_pack_endian;
|
56
|
+
|
57
|
+
typedef enum yp_pack_size {
|
58
|
+
YP_PACK_SIZE_SHORT,
|
59
|
+
YP_PACK_SIZE_INT,
|
60
|
+
YP_PACK_SIZE_LONG,
|
61
|
+
YP_PACK_SIZE_LONG_LONG,
|
62
|
+
YP_PACK_SIZE_8,
|
63
|
+
YP_PACK_SIZE_16,
|
64
|
+
YP_PACK_SIZE_32,
|
65
|
+
YP_PACK_SIZE_64,
|
66
|
+
YP_PACK_SIZE_P,
|
67
|
+
YP_PACK_SIZE_NA
|
68
|
+
} yp_pack_size;
|
69
|
+
|
70
|
+
typedef enum yp_pack_length_type {
|
71
|
+
YP_PACK_LENGTH_FIXED,
|
72
|
+
YP_PACK_LENGTH_MAX,
|
73
|
+
YP_PACK_LENGTH_RELATIVE, // special case for unpack @*
|
74
|
+
YP_PACK_LENGTH_NA
|
75
|
+
} yp_pack_length_type;
|
76
|
+
|
77
|
+
typedef enum yp_pack_encoding {
|
78
|
+
YP_PACK_ENCODING_START,
|
79
|
+
YP_PACK_ENCODING_ASCII_8BIT,
|
80
|
+
YP_PACK_ENCODING_US_ASCII,
|
81
|
+
YP_PACK_ENCODING_UTF_8
|
82
|
+
} yp_pack_encoding;
|
83
|
+
|
84
|
+
typedef enum yp_pack_result {
|
85
|
+
YP_PACK_OK,
|
86
|
+
YP_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
|
87
|
+
YP_PACK_ERROR_UNKNOWN_DIRECTIVE,
|
88
|
+
YP_PACK_ERROR_LENGTH_TOO_BIG,
|
89
|
+
YP_PACK_ERROR_BANG_NOT_ALLOWED,
|
90
|
+
YP_PACK_ERROR_DOUBLE_ENDIAN
|
91
|
+
} yp_pack_result;
|
92
|
+
|
93
|
+
// Parse a single directive from a pack or unpack format string.
|
94
|
+
//
|
95
|
+
// Parameters:
|
96
|
+
// - [in] yp_pack_version version the version of Ruby
|
97
|
+
// - [in] yp_pack_variant variant pack or unpack
|
98
|
+
// - [in out] const char **format the start of the next directive to parse
|
99
|
+
// on calling, and advanced beyond the parsed directive on return, or as
|
100
|
+
// much of it as was consumed until an error was encountered
|
101
|
+
// - [in] const char *format_end the end of the format string
|
102
|
+
// - [out] yp_pack_type *type the type of the directive
|
103
|
+
// - [out] yp_pack_signed *signed_type
|
104
|
+
// whether the value is signed
|
105
|
+
// - [out] yp_pack_endian *endian the endianness of the value
|
106
|
+
// - [out] yp_pack_size *size the size of the value
|
107
|
+
// - [out] yp_pack_length_type *length_type
|
108
|
+
// what kind of length is specified
|
109
|
+
// - [out] size_t *length the length of the directive
|
110
|
+
// - [in out] yp_pack_encoding *encoding
|
111
|
+
// takes the current encoding of the string
|
112
|
+
// which would result from parsing the whole format string, and returns a
|
113
|
+
// possibly changed directive - the encoding should be
|
114
|
+
// YP_PACK_ENCODING_START when yp_pack_parse is called for the first
|
115
|
+
// directive in a format string
|
116
|
+
//
|
117
|
+
// Return:
|
118
|
+
// - YP_PACK_OK on success
|
119
|
+
// - YP_PACK_ERROR_* on error
|
120
|
+
//
|
121
|
+
// Notes:
|
122
|
+
// Consult Ruby documentation for the meaning of directives.
|
123
|
+
YP_EXPORTED_FUNCTION yp_pack_result
|
124
|
+
yp_pack_parse(
|
125
|
+
yp_pack_variant variant_arg,
|
126
|
+
const char **format,
|
127
|
+
const char *format_end,
|
128
|
+
yp_pack_type *type,
|
129
|
+
yp_pack_signed *signed_type,
|
130
|
+
yp_pack_endian *endian,
|
131
|
+
yp_pack_size *size,
|
132
|
+
yp_pack_length_type *length_type,
|
133
|
+
uint64_t *length,
|
134
|
+
yp_pack_encoding *encoding
|
135
|
+
);
|
136
|
+
|
137
|
+
// YARP abstracts sizes away from the native system - this converts an abstract
|
138
|
+
// size to a native size.
|
139
|
+
YP_EXPORTED_FUNCTION size_t yp_size_to_native(yp_pack_size size);
|
140
|
+
|
141
|
+
#endif
|