yarp 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +51 -0
- data/LICENSE.md +7 -0
- data/Makefile.in +79 -0
- data/README.md +86 -0
- data/config.h.in +25 -0
- data/config.yml +2147 -0
- data/configure +4487 -0
- data/docs/build_system.md +85 -0
- data/docs/building.md +26 -0
- data/docs/configuration.md +56 -0
- data/docs/design.md +53 -0
- data/docs/encoding.md +116 -0
- data/docs/extension.md +20 -0
- data/docs/fuzzing.md +93 -0
- data/docs/heredocs.md +36 -0
- data/docs/mapping.md +117 -0
- data/docs/ripper.md +36 -0
- data/docs/serialization.md +130 -0
- data/docs/testing.md +55 -0
- data/ext/yarp/api_node.c +3680 -0
- data/ext/yarp/api_pack.c +256 -0
- data/ext/yarp/extconf.rb +131 -0
- data/ext/yarp/extension.c +547 -0
- data/ext/yarp/extension.h +18 -0
- data/include/yarp/ast.h +1412 -0
- data/include/yarp/defines.h +54 -0
- data/include/yarp/diagnostic.h +24 -0
- data/include/yarp/enc/yp_encoding.h +94 -0
- data/include/yarp/node.h +36 -0
- data/include/yarp/pack.h +141 -0
- data/include/yarp/parser.h +389 -0
- data/include/yarp/regexp.h +19 -0
- data/include/yarp/unescape.h +42 -0
- data/include/yarp/util/yp_buffer.h +39 -0
- data/include/yarp/util/yp_char.h +75 -0
- data/include/yarp/util/yp_constant_pool.h +64 -0
- data/include/yarp/util/yp_list.h +67 -0
- data/include/yarp/util/yp_memchr.h +14 -0
- data/include/yarp/util/yp_newline_list.h +54 -0
- data/include/yarp/util/yp_state_stack.h +24 -0
- data/include/yarp/util/yp_string.h +57 -0
- data/include/yarp/util/yp_string_list.h +28 -0
- data/include/yarp/util/yp_strpbrk.h +29 -0
- data/include/yarp/version.h +5 -0
- data/include/yarp.h +69 -0
- data/lib/yarp/lex_compat.rb +759 -0
- data/lib/yarp/node.rb +7428 -0
- data/lib/yarp/pack.rb +185 -0
- data/lib/yarp/ripper_compat.rb +174 -0
- data/lib/yarp/serialize.rb +389 -0
- data/lib/yarp.rb +330 -0
- data/src/diagnostic.c +25 -0
- data/src/enc/yp_big5.c +79 -0
- data/src/enc/yp_euc_jp.c +85 -0
- data/src/enc/yp_gbk.c +88 -0
- data/src/enc/yp_shift_jis.c +83 -0
- data/src/enc/yp_tables.c +509 -0
- data/src/enc/yp_unicode.c +2320 -0
- data/src/enc/yp_windows_31j.c +83 -0
- data/src/node.c +2011 -0
- data/src/pack.c +493 -0
- data/src/prettyprint.c +1782 -0
- data/src/regexp.c +580 -0
- data/src/serialize.c +1576 -0
- data/src/token_type.c +347 -0
- data/src/unescape.c +576 -0
- data/src/util/yp_buffer.c +78 -0
- data/src/util/yp_char.c +229 -0
- data/src/util/yp_constant_pool.c +147 -0
- data/src/util/yp_list.c +50 -0
- data/src/util/yp_memchr.c +31 -0
- data/src/util/yp_newline_list.c +119 -0
- data/src/util/yp_state_stack.c +25 -0
- data/src/util/yp_string.c +207 -0
- data/src/util/yp_string_list.c +32 -0
- data/src/util/yp_strncasecmp.c +20 -0
- data/src/util/yp_strpbrk.c +66 -0
- data/src/yarp.c +13211 -0
- data/yarp.gemspec +100 -0
- metadata +125 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
#ifndef YARP_DEFINES_H
|
2
|
+
#define YARP_DEFINES_H
|
3
|
+
|
4
|
+
// This file should be included first by any *.h or *.c in YARP
|
5
|
+
|
6
|
+
#include "yarp/config.h"
|
7
|
+
|
8
|
+
#include <ctype.h>
|
9
|
+
#include <stdarg.h>
|
10
|
+
#include <stddef.h>
|
11
|
+
#include <stdio.h>
|
12
|
+
#include <string.h>
|
13
|
+
|
14
|
+
// YP_EXPORTED_FUNCTION
|
15
|
+
#ifndef YP_EXPORTED_FUNCTION
|
16
|
+
# ifdef YP_EXPORT_SYMBOLS
|
17
|
+
# ifdef _WIN32
|
18
|
+
# define YP_EXPORTED_FUNCTION __declspec(dllexport) extern
|
19
|
+
# else
|
20
|
+
# define YP_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
|
21
|
+
# endif
|
22
|
+
# else
|
23
|
+
# define YP_EXPORTED_FUNCTION
|
24
|
+
# endif
|
25
|
+
#endif
|
26
|
+
|
27
|
+
// YP_ATTRIBUTE_UNUSED
|
28
|
+
#if defined(__GNUC__)
|
29
|
+
# define YP_ATTRIBUTE_UNUSED __attribute__((unused))
|
30
|
+
#else
|
31
|
+
# define YP_ATTRIBUTE_UNUSED
|
32
|
+
#endif
|
33
|
+
|
34
|
+
// inline
|
35
|
+
#if defined(_MSC_VER) && !defined(inline)
|
36
|
+
# define inline __inline
|
37
|
+
#endif
|
38
|
+
|
39
|
+
int yp_strncasecmp(const char *string1, const char *string2, size_t length);
|
40
|
+
|
41
|
+
int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
|
42
|
+
|
43
|
+
#if defined(HAVE_SNPRINTF)
|
44
|
+
// We use snprintf if it's available
|
45
|
+
# define yp_snprintf snprintf
|
46
|
+
|
47
|
+
#else
|
48
|
+
// In case snprintf isn't present on the system, we provide our own that simply
|
49
|
+
// forwards to the less-safe sprintf.
|
50
|
+
# define yp_snprintf(dest, size, ...) sprintf((dest), __VA_ARGS__)
|
51
|
+
|
52
|
+
#endif
|
53
|
+
|
54
|
+
#endif
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#ifndef YARP_DIAGNOSTIC_H
|
2
|
+
#define YARP_DIAGNOSTIC_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
#include "yarp/util/yp_list.h"
|
6
|
+
|
7
|
+
#include <stdbool.h>
|
8
|
+
#include <stdlib.h>
|
9
|
+
|
10
|
+
// This struct represents a diagnostic found during parsing.
|
11
|
+
typedef struct {
|
12
|
+
yp_list_node_t node;
|
13
|
+
const char *start;
|
14
|
+
const char *end;
|
15
|
+
const char *message;
|
16
|
+
} yp_diagnostic_t;
|
17
|
+
|
18
|
+
// Append a diagnostic to the given list of diagnostics.
|
19
|
+
bool yp_diagnostic_list_append(yp_list_t *list, const char *start, const char *end, const char *message);
|
20
|
+
|
21
|
+
// Deallocate the internal state of the given diagnostic list.
|
22
|
+
void yp_diagnostic_list_free(yp_list_t *list);
|
23
|
+
|
24
|
+
#endif
|
@@ -0,0 +1,94 @@
|
|
1
|
+
#ifndef YARP_ENCODING_H
|
2
|
+
#define YARP_ENCODING_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
|
6
|
+
#include <assert.h>
|
7
|
+
#include <stdbool.h>
|
8
|
+
#include <stddef.h>
|
9
|
+
#include <stdint.h>
|
10
|
+
|
11
|
+
// This struct defines the functions necessary to implement the encoding
|
12
|
+
// interface so we can determine how many bytes the subsequent character takes.
|
13
|
+
// Each callback should return the number of bytes, or 0 if the next bytes are
|
14
|
+
// invalid for the encoding and type.
|
15
|
+
typedef struct {
|
16
|
+
// Return the number of bytes that the next character takes if it is valid
|
17
|
+
// in the encoding. Does not read more than n bytes. It is assumed that n is
|
18
|
+
// at least 1.
|
19
|
+
size_t (*char_width)(const char *c, ptrdiff_t n);
|
20
|
+
|
21
|
+
// Return the number of bytes that the next character takes if it is valid
|
22
|
+
// in the encoding and is alphabetical. Does not read more than n bytes. It
|
23
|
+
// is assumed that n is at least 1.
|
24
|
+
size_t (*alpha_char)(const char *c, ptrdiff_t n);
|
25
|
+
|
26
|
+
// Return the number of bytes that the next character takes if it is valid
|
27
|
+
// in the encoding and is alphanumeric. Does not read more than n bytes. It
|
28
|
+
// is assumed that n is at least 1.
|
29
|
+
size_t (*alnum_char)(const char *c, ptrdiff_t n);
|
30
|
+
|
31
|
+
// Return true if the next character is valid in the encoding and is an
|
32
|
+
// uppercase character. Does not read more than n bytes. It is assumed that
|
33
|
+
// n is at least 1.
|
34
|
+
bool (*isupper_char)(const char *c, ptrdiff_t n);
|
35
|
+
|
36
|
+
// The name of the encoding. This should correspond to a value that can be
|
37
|
+
// passed to Encoding.find in Ruby.
|
38
|
+
const char *name;
|
39
|
+
|
40
|
+
// Return true if the encoding is a multibyte encoding.
|
41
|
+
bool multibyte;
|
42
|
+
} yp_encoding_t;
|
43
|
+
|
44
|
+
// These bits define the location of each bit of metadata within the various
|
45
|
+
// lookup tables that are used to determine the properties of a character.
|
46
|
+
#define YP_ENCODING_ALPHABETIC_BIT 1 << 0
|
47
|
+
#define YP_ENCODING_ALPHANUMERIC_BIT 1 << 1
|
48
|
+
#define YP_ENCODING_UPPERCASE_BIT 1 << 2
|
49
|
+
|
50
|
+
// These functions are reused by some other encodings, so they are defined here
|
51
|
+
// so they can be shared.
|
52
|
+
size_t yp_encoding_ascii_alpha_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
53
|
+
size_t yp_encoding_ascii_alnum_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
54
|
+
bool yp_encoding_ascii_isupper_char(const char *c, YP_ATTRIBUTE_UNUSED ptrdiff_t n);
|
55
|
+
|
56
|
+
// These functions are shared between the actual encoding and the fast path in
|
57
|
+
// the parser so they need to be internally visible.
|
58
|
+
size_t yp_encoding_utf_8_alpha_char(const char *c, ptrdiff_t n);
|
59
|
+
size_t yp_encoding_utf_8_alnum_char(const char *c, ptrdiff_t n);
|
60
|
+
|
61
|
+
// This lookup table is referenced in both the UTF-8 encoding file and the
|
62
|
+
// parser directly in order to speed up the default encoding processing.
|
63
|
+
extern unsigned char yp_encoding_unicode_table[256];
|
64
|
+
|
65
|
+
// These are the encodings that are supported by the parser. They are defined in
|
66
|
+
// their own files in the src/enc directory.
|
67
|
+
extern yp_encoding_t yp_encoding_ascii;
|
68
|
+
extern yp_encoding_t yp_encoding_ascii_8bit;
|
69
|
+
extern yp_encoding_t yp_encoding_big5;
|
70
|
+
extern yp_encoding_t yp_encoding_euc_jp;
|
71
|
+
extern yp_encoding_t yp_encoding_gbk;
|
72
|
+
extern yp_encoding_t yp_encoding_iso_8859_1;
|
73
|
+
extern yp_encoding_t yp_encoding_iso_8859_2;
|
74
|
+
extern yp_encoding_t yp_encoding_iso_8859_3;
|
75
|
+
extern yp_encoding_t yp_encoding_iso_8859_4;
|
76
|
+
extern yp_encoding_t yp_encoding_iso_8859_5;
|
77
|
+
extern yp_encoding_t yp_encoding_iso_8859_6;
|
78
|
+
extern yp_encoding_t yp_encoding_iso_8859_7;
|
79
|
+
extern yp_encoding_t yp_encoding_iso_8859_8;
|
80
|
+
extern yp_encoding_t yp_encoding_iso_8859_9;
|
81
|
+
extern yp_encoding_t yp_encoding_iso_8859_10;
|
82
|
+
extern yp_encoding_t yp_encoding_iso_8859_11;
|
83
|
+
extern yp_encoding_t yp_encoding_iso_8859_13;
|
84
|
+
extern yp_encoding_t yp_encoding_iso_8859_14;
|
85
|
+
extern yp_encoding_t yp_encoding_iso_8859_15;
|
86
|
+
extern yp_encoding_t yp_encoding_iso_8859_16;
|
87
|
+
extern yp_encoding_t yp_encoding_koi8_r;
|
88
|
+
extern yp_encoding_t yp_encoding_shift_jis;
|
89
|
+
extern yp_encoding_t yp_encoding_utf_8;
|
90
|
+
extern yp_encoding_t yp_encoding_windows_31j;
|
91
|
+
extern yp_encoding_t yp_encoding_windows_1251;
|
92
|
+
extern yp_encoding_t yp_encoding_windows_1252;
|
93
|
+
|
94
|
+
#endif
|
data/include/yarp/node.h
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
#ifndef YARP_NODE_H
|
2
|
+
#define YARP_NODE_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
#include "yarp/parser.h"
|
6
|
+
|
7
|
+
// Append a token to the given list.
|
8
|
+
void yp_location_list_append(yp_location_list_t *list, const yp_token_t *token);
|
9
|
+
|
10
|
+
// Append a new node onto the end of the node list.
|
11
|
+
void yp_node_list_append(yp_node_list_t *list, yp_node_t *node);
|
12
|
+
|
13
|
+
// Clear the node but preserves the location.
|
14
|
+
void yp_node_clear(yp_node_t *node);
|
15
|
+
|
16
|
+
// Deallocate a node and all of its children.
|
17
|
+
YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
|
18
|
+
|
19
|
+
// This struct stores the information gathered by the yp_node_memsize function.
|
20
|
+
// It contains both the memory footprint and additionally metadata about the
|
21
|
+
// shape of the tree.
|
22
|
+
typedef struct {
|
23
|
+
size_t memsize;
|
24
|
+
size_t node_count;
|
25
|
+
} yp_memsize_t;
|
26
|
+
|
27
|
+
// Calculates the memory footprint of a given node.
|
28
|
+
YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
|
29
|
+
|
30
|
+
// Returns a string representation of the given node type.
|
31
|
+
YP_EXPORTED_FUNCTION const char * yp_node_type_to_str(yp_node_type_t node_type);
|
32
|
+
|
33
|
+
#define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
|
34
|
+
#define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 })
|
35
|
+
|
36
|
+
#endif // YARP_NODE_H
|
data/include/yarp/pack.h
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
#ifndef YARP_PACK_H
|
2
|
+
#define YARP_PACK_H
|
3
|
+
|
4
|
+
#include "yarp/defines.h"
|
5
|
+
|
6
|
+
#include <stdint.h>
|
7
|
+
#include <stdlib.h>
|
8
|
+
|
9
|
+
typedef enum yp_pack_version {
|
10
|
+
YP_PACK_VERSION_3_2_0
|
11
|
+
} yp_pack_version;
|
12
|
+
|
13
|
+
typedef enum yp_pack_variant {
|
14
|
+
YP_PACK_VARIANT_PACK,
|
15
|
+
YP_PACK_VARIANT_UNPACK
|
16
|
+
} yp_pack_variant;
|
17
|
+
|
18
|
+
typedef enum yp_pack_type {
|
19
|
+
YP_PACK_SPACE,
|
20
|
+
YP_PACK_COMMENT,
|
21
|
+
YP_PACK_INTEGER,
|
22
|
+
YP_PACK_UTF8,
|
23
|
+
YP_PACK_BER,
|
24
|
+
YP_PACK_FLOAT,
|
25
|
+
YP_PACK_STRING_SPACE_PADDED,
|
26
|
+
YP_PACK_STRING_NULL_PADDED,
|
27
|
+
YP_PACK_STRING_NULL_TERMINATED,
|
28
|
+
YP_PACK_STRING_MSB,
|
29
|
+
YP_PACK_STRING_LSB,
|
30
|
+
YP_PACK_STRING_HEX_HIGH,
|
31
|
+
YP_PACK_STRING_HEX_LOW,
|
32
|
+
YP_PACK_STRING_UU,
|
33
|
+
YP_PACK_STRING_MIME,
|
34
|
+
YP_PACK_STRING_BASE64,
|
35
|
+
YP_PACK_STRING_FIXED,
|
36
|
+
YP_PACK_STRING_POINTER,
|
37
|
+
YP_PACK_MOVE,
|
38
|
+
YP_PACK_BACK,
|
39
|
+
YP_PACK_NULL,
|
40
|
+
YP_PACK_END
|
41
|
+
} yp_pack_type;
|
42
|
+
|
43
|
+
typedef enum yp_pack_signed {
|
44
|
+
YP_PACK_UNSIGNED,
|
45
|
+
YP_PACK_SIGNED,
|
46
|
+
YP_PACK_SIGNED_NA
|
47
|
+
} yp_pack_signed;
|
48
|
+
|
49
|
+
typedef enum yp_pack_endian {
|
50
|
+
YP_PACK_AGNOSTIC_ENDIAN,
|
51
|
+
YP_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
|
52
|
+
YP_PACK_BIG_ENDIAN, // aka 'network', or 'N'
|
53
|
+
YP_PACK_NATIVE_ENDIAN,
|
54
|
+
YP_PACK_ENDIAN_NA
|
55
|
+
} yp_pack_endian;
|
56
|
+
|
57
|
+
typedef enum yp_pack_size {
|
58
|
+
YP_PACK_SIZE_SHORT,
|
59
|
+
YP_PACK_SIZE_INT,
|
60
|
+
YP_PACK_SIZE_LONG,
|
61
|
+
YP_PACK_SIZE_LONG_LONG,
|
62
|
+
YP_PACK_SIZE_8,
|
63
|
+
YP_PACK_SIZE_16,
|
64
|
+
YP_PACK_SIZE_32,
|
65
|
+
YP_PACK_SIZE_64,
|
66
|
+
YP_PACK_SIZE_P,
|
67
|
+
YP_PACK_SIZE_NA
|
68
|
+
} yp_pack_size;
|
69
|
+
|
70
|
+
typedef enum yp_pack_length_type {
|
71
|
+
YP_PACK_LENGTH_FIXED,
|
72
|
+
YP_PACK_LENGTH_MAX,
|
73
|
+
YP_PACK_LENGTH_RELATIVE, // special case for unpack @*
|
74
|
+
YP_PACK_LENGTH_NA
|
75
|
+
} yp_pack_length_type;
|
76
|
+
|
77
|
+
typedef enum yp_pack_encoding {
|
78
|
+
YP_PACK_ENCODING_START,
|
79
|
+
YP_PACK_ENCODING_ASCII_8BIT,
|
80
|
+
YP_PACK_ENCODING_US_ASCII,
|
81
|
+
YP_PACK_ENCODING_UTF_8
|
82
|
+
} yp_pack_encoding;
|
83
|
+
|
84
|
+
typedef enum yp_pack_result {
|
85
|
+
YP_PACK_OK,
|
86
|
+
YP_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
|
87
|
+
YP_PACK_ERROR_UNKNOWN_DIRECTIVE,
|
88
|
+
YP_PACK_ERROR_LENGTH_TOO_BIG,
|
89
|
+
YP_PACK_ERROR_BANG_NOT_ALLOWED,
|
90
|
+
YP_PACK_ERROR_DOUBLE_ENDIAN
|
91
|
+
} yp_pack_result;
|
92
|
+
|
93
|
+
// Parse a single directive from a pack or unpack format string.
|
94
|
+
//
|
95
|
+
// Parameters:
|
96
|
+
// - [in] yp_pack_version version the version of Ruby
|
97
|
+
// - [in] yp_pack_variant variant pack or unpack
|
98
|
+
// - [in out] const char **format the start of the next directive to parse
|
99
|
+
// on calling, and advanced beyond the parsed directive on return, or as
|
100
|
+
// much of it as was consumed until an error was encountered
|
101
|
+
// - [in] const char *format_end the end of the format string
|
102
|
+
// - [out] yp_pack_type *type the type of the directive
|
103
|
+
// - [out] yp_pack_signed *signed_type
|
104
|
+
// whether the value is signed
|
105
|
+
// - [out] yp_pack_endian *endian the endianness of the value
|
106
|
+
// - [out] yp_pack_size *size the size of the value
|
107
|
+
// - [out] yp_pack_length_type *length_type
|
108
|
+
// what kind of length is specified
|
109
|
+
// - [out] size_t *length the length of the directive
|
110
|
+
// - [in out] yp_pack_encoding *encoding
|
111
|
+
// takes the current encoding of the string
|
112
|
+
// which would result from parsing the whole format string, and returns a
|
113
|
+
// possibly changed directive - the encoding should be
|
114
|
+
// YP_PACK_ENCODING_START when yp_pack_parse is called for the first
|
115
|
+
// directive in a format string
|
116
|
+
//
|
117
|
+
// Return:
|
118
|
+
// - YP_PACK_OK on success
|
119
|
+
// - YP_PACK_ERROR_* on error
|
120
|
+
//
|
121
|
+
// Notes:
|
122
|
+
// Consult Ruby documentation for the meaning of directives.
|
123
|
+
YP_EXPORTED_FUNCTION yp_pack_result
|
124
|
+
yp_pack_parse(
|
125
|
+
yp_pack_variant variant_arg,
|
126
|
+
const char **format,
|
127
|
+
const char *format_end,
|
128
|
+
yp_pack_type *type,
|
129
|
+
yp_pack_signed *signed_type,
|
130
|
+
yp_pack_endian *endian,
|
131
|
+
yp_pack_size *size,
|
132
|
+
yp_pack_length_type *length_type,
|
133
|
+
uint64_t *length,
|
134
|
+
yp_pack_encoding *encoding
|
135
|
+
);
|
136
|
+
|
137
|
+
// YARP abstracts sizes away from the native system - this converts an abstract
|
138
|
+
// size to a native size.
|
139
|
+
YP_EXPORTED_FUNCTION size_t yp_size_to_native(yp_pack_size size);
|
140
|
+
|
141
|
+
#endif
|