rbs 1.6.1 → 1.7.0.beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +18 -3
- data/.gitignore +10 -1
- data/CHANGELOG.md +25 -0
- data/Gemfile +1 -0
- data/Rakefile +22 -22
- data/core/enumerator.rbs +1 -0
- data/core/io.rbs +1 -1
- data/core/kernel.rbs +4 -4
- data/core/trace_point.rbs +1 -1
- data/ext/rbs_extension/constants.c +139 -0
- data/ext/rbs_extension/constants.h +72 -0
- data/ext/rbs_extension/extconf.rb +3 -0
- data/ext/rbs_extension/lexer.c +2533 -0
- data/ext/rbs_extension/lexer.h +161 -0
- data/ext/rbs_extension/lexer.re +140 -0
- data/ext/rbs_extension/lexstate.c +139 -0
- data/ext/rbs_extension/location.c +295 -0
- data/ext/rbs_extension/location.h +59 -0
- data/ext/rbs_extension/main.c +9 -0
- data/ext/rbs_extension/parser.c +2390 -0
- data/ext/rbs_extension/parser.h +18 -0
- data/ext/rbs_extension/parserstate.c +313 -0
- data/ext/rbs_extension/parserstate.h +141 -0
- data/ext/rbs_extension/rbs_extension.h +40 -0
- data/ext/rbs_extension/ruby_objs.c +521 -0
- data/ext/rbs_extension/ruby_objs.h +46 -0
- data/ext/rbs_extension/unescape.c +65 -0
- data/goodcheck.yml +1 -1
- data/lib/rbs/ast/comment.rb +0 -12
- data/lib/rbs/buffer.rb +4 -0
- data/lib/rbs/cli.rb +5 -8
- data/lib/rbs/collection/installer.rb +1 -0
- data/lib/rbs/collection/sources/git.rb +18 -3
- data/lib/rbs/errors.rb +28 -1
- data/lib/rbs/location.rb +221 -217
- data/lib/rbs/location_aux.rb +121 -0
- data/lib/rbs/locator.rb +10 -7
- data/lib/rbs/parser_aux.rb +63 -0
- data/lib/rbs/parser_compat/lexer_error.rb +4 -0
- data/lib/rbs/parser_compat/located_value.rb +5 -0
- data/lib/rbs/parser_compat/semantics_error.rb +4 -0
- data/lib/rbs/parser_compat/syntax_error.rb +4 -0
- data/lib/rbs/types.rb +2 -3
- data/lib/rbs/version.rb +1 -1
- data/lib/rbs/writer.rb +4 -2
- data/lib/rbs.rb +14 -7
- data/rbs.gemspec +2 -1
- data/sig/ancestor_builder.rbs +2 -2
- data/sig/annotation.rbs +2 -2
- data/sig/comment.rbs +7 -7
- data/sig/constant_table.rbs +1 -1
- data/sig/declarations.rbs +9 -9
- data/sig/definition.rbs +1 -1
- data/sig/definition_builder.rbs +2 -2
- data/sig/errors.rbs +40 -25
- data/sig/location.rbs +46 -78
- data/sig/locator.rbs +2 -2
- data/sig/members.rbs +7 -7
- data/sig/method_types.rbs +3 -3
- data/sig/parser.rbs +15 -20
- data/sig/rbs.rbs +4 -0
- data/sig/types.rbs +45 -27
- data/sig/writer.rbs +1 -1
- data/stdlib/io-console/0/io-console.rbs +137 -0
- data/stdlib/json/0/json.rbs +3 -3
- data/stdlib/net-http/0/net-http.rbs +2 -1
- data/stdlib/tempfile/0/tempfile.rbs +4 -6
- metadata +32 -7
- data/lib/rbs/parser.rb +0 -3614
@@ -0,0 +1,161 @@
|
|
1
|
+
#ifndef RBS__LEXER_H
|
2
|
+
#define RBS__LEXER_H
|
3
|
+
|
4
|
+
enum TokenType {
|
5
|
+
NullType, /* (Nothing) */
|
6
|
+
pEOF, /* EOF */
|
7
|
+
ErrorToken, /* Error */
|
8
|
+
|
9
|
+
pLPAREN, /* ( */
|
10
|
+
pRPAREN, /* ) */
|
11
|
+
pCOLON, /* : */
|
12
|
+
pCOLON2, /* :: */
|
13
|
+
pLBRACKET, /* [ */
|
14
|
+
pRBRACKET, /* ] */
|
15
|
+
pLBRACE, /* { */
|
16
|
+
pRBRACE, /* } */
|
17
|
+
pHAT, /* ^ */
|
18
|
+
pARROW, /* -> */
|
19
|
+
pFATARROW, /* => */
|
20
|
+
pCOMMA, /* , */
|
21
|
+
pBAR, /* | */
|
22
|
+
pAMP, /* & */
|
23
|
+
pSTAR, /* * */
|
24
|
+
pSTAR2, /* ** */
|
25
|
+
pDOT, /* . */
|
26
|
+
pDOT3, /* ... */
|
27
|
+
pBANG, /* ! */
|
28
|
+
pQUESTION, /* ? */
|
29
|
+
pLT, /* < */
|
30
|
+
pEQ, /* = */
|
31
|
+
|
32
|
+
kALIAS, /* alias */
|
33
|
+
kATTRACCESSOR, /* attr_accessor */
|
34
|
+
kATTRREADER, /* attr_reader */
|
35
|
+
kATTRWRITER, /* attr_writer */
|
36
|
+
kBOOL, /* bool */
|
37
|
+
kBOT, /* bot */
|
38
|
+
kCLASS, /* class */
|
39
|
+
kDEF, /* def */
|
40
|
+
kEND, /* end */
|
41
|
+
kEXTEND, /* extend */
|
42
|
+
kFALSE, /* false */
|
43
|
+
kIN, /* in */
|
44
|
+
kINCLUDE, /* include */
|
45
|
+
kINSTANCE, /* instance */
|
46
|
+
kINTERFACE, /* interface */
|
47
|
+
kMODULE, /* module */
|
48
|
+
kNIL, /* nil */
|
49
|
+
kOUT, /* out */
|
50
|
+
kPREPEND, /* prepend */
|
51
|
+
kPRIVATE, /* private */
|
52
|
+
kPUBLIC, /* public */
|
53
|
+
kSELF, /* self */
|
54
|
+
kSINGLETON, /* singleton */
|
55
|
+
kTOP, /* top */
|
56
|
+
kTRUE, /* true */
|
57
|
+
kTYPE, /* type */
|
58
|
+
kUNCHECKED, /* unchecked */
|
59
|
+
kUNTYPED, /* untyped */
|
60
|
+
kVOID, /* void */
|
61
|
+
|
62
|
+
tLIDENT, /* Identifiers starting with lower case */
|
63
|
+
tUIDENT, /* Identifiers starting with upper case */
|
64
|
+
tULIDENT, /* Identifiers starting with `_` followed by upper case */
|
65
|
+
tULLIDENT, /* Identifiers starting with `_` followed by lower case */
|
66
|
+
tGIDENT, /* Identifiers starting with `$` */
|
67
|
+
tAIDENT, /* Identifiers starting with `@` */
|
68
|
+
tA2IDENT, /* Identifiers starting with `@@` */
|
69
|
+
tBANGIDENT, /* Identifiers ending with `!` */
|
70
|
+
tEQIDENT, /* Identifiers ending with `=` */
|
71
|
+
tQIDENT, /* Quoted identifier */
|
72
|
+
tOPERATOR, /* Operator identifier */
|
73
|
+
|
74
|
+
tCOMMENT, /* Comment */
|
75
|
+
tLINECOMMENT, /* Comment of all line */
|
76
|
+
|
77
|
+
tDQSTRING, /* Double quoted string */
|
78
|
+
tSQSTRING, /* Single quoted string */
|
79
|
+
tINTEGER, /* Integer */
|
80
|
+
tSYMBOL, /* Symbol */
|
81
|
+
tDQSYMBOL, /* Double quoted symbol */
|
82
|
+
tSQSYMBOL, /* Single quoted symbol */
|
83
|
+
tANNOTATION, /* Annotation */
|
84
|
+
};
|
85
|
+
|
86
|
+
/**
|
87
|
+
* The `byte_pos` (or `char_pos`) is the primary data.
|
88
|
+
* The rest are cache.
|
89
|
+
*
|
90
|
+
* They can be computed from `byte_pos` (or `char_pos`), but it needs full scan from the beginning of the string (depending on the encoding).
|
91
|
+
* */
|
92
|
+
typedef struct {
|
93
|
+
int byte_pos;
|
94
|
+
int char_pos;
|
95
|
+
int line;
|
96
|
+
int column;
|
97
|
+
} position;
|
98
|
+
|
99
|
+
typedef struct {
|
100
|
+
position start;
|
101
|
+
position end;
|
102
|
+
} range;
|
103
|
+
|
104
|
+
typedef struct {
|
105
|
+
enum TokenType type;
|
106
|
+
range range;
|
107
|
+
} token;
|
108
|
+
|
109
|
+
/**
|
110
|
+
* The lexer state is the curren token.
|
111
|
+
*
|
112
|
+
* ```
|
113
|
+
* ... "a string token"
|
114
|
+
* ^ start position
|
115
|
+
* ^ current position
|
116
|
+
* ~~~~~~ Token => "a str
|
117
|
+
* ```
|
118
|
+
* */
|
119
|
+
typedef struct {
|
120
|
+
VALUE string;
|
121
|
+
position current; /* The current position */
|
122
|
+
position start; /* The start position of the current token */
|
123
|
+
bool first_token_of_line; /* This flag is used for tLINECOMMENT */
|
124
|
+
unsigned int last_char; /* Last peeked character */
|
125
|
+
} lexstate;
|
126
|
+
|
127
|
+
extern token NullToken;
|
128
|
+
extern position NullPosition;
|
129
|
+
extern range NULL_RANGE;
|
130
|
+
|
131
|
+
char *peek_token(lexstate *state, token tok);
|
132
|
+
int token_chars(token tok);
|
133
|
+
int token_bytes(token tok);
|
134
|
+
|
135
|
+
#define null_position_p(pos) (pos.byte_pos == -1)
|
136
|
+
#define null_range_p(range) (range.start.byte_pos == -1)
|
137
|
+
#define nonnull_pos_or(pos1, pos2) (null_position_p(pos1) ? pos2 : pos1)
|
138
|
+
#define RANGE_BYTES(range) (range.end.byte_pos - range.start.byte_pos)
|
139
|
+
|
140
|
+
const char *token_type_str(enum TokenType type);
|
141
|
+
|
142
|
+
/**
|
143
|
+
* Read next character.
|
144
|
+
* */
|
145
|
+
unsigned int peek(lexstate *state);
|
146
|
+
|
147
|
+
/**
|
148
|
+
* Skip one character.
|
149
|
+
* */
|
150
|
+
void skip(lexstate *state);
|
151
|
+
|
152
|
+
/**
|
153
|
+
* Return new token with given type.
|
154
|
+
* */
|
155
|
+
token next_token(lexstate *state, enum TokenType type);
|
156
|
+
|
157
|
+
token rbsparser_next_token(lexstate *state);
|
158
|
+
|
159
|
+
void print_token(token tok);
|
160
|
+
|
161
|
+
#endif
|
@@ -0,0 +1,140 @@
|
|
1
|
+
#include "rbs_extension.h"
|
2
|
+
|
3
|
+
token rbsparser_next_token(lexstate *state) {
|
4
|
+
lexstate backup;
|
5
|
+
|
6
|
+
start:
|
7
|
+
backup = *state;
|
8
|
+
|
9
|
+
/*!re2c
|
10
|
+
re2c:flags:u = 1;
|
11
|
+
re2c:api:style = free-form;
|
12
|
+
re2c:flags:input = custom;
|
13
|
+
re2c:define:YYCTYPE = "unsigned int";
|
14
|
+
re2c:define:YYPEEK = "peek(state)";
|
15
|
+
re2c:define:YYSKIP = "skip(state);";
|
16
|
+
re2c:define:YYBACKUP = "backup = *state;";
|
17
|
+
re2c:define:YYRESTORE = "*state = backup;";
|
18
|
+
re2c:yyfill:enable = 0;
|
19
|
+
|
20
|
+
word = [a-zA-Z0-9_];
|
21
|
+
|
22
|
+
operator = "/" | "~" | "[]" | "[]=" | "!" | "!=" | "!~" | "-" | "-@" | "+" | "+@"
|
23
|
+
| "==" | "===" | "=~" | "<<" | "<=" | "<=>" | ">" | ">=" | ">>" | "%";
|
24
|
+
|
25
|
+
"(" { return next_token(state, pLPAREN); }
|
26
|
+
")" { return next_token(state, pRPAREN); }
|
27
|
+
"[" { return next_token(state, pLBRACKET); }
|
28
|
+
"]" { return next_token(state, pRBRACKET); }
|
29
|
+
"{" { return next_token(state, pLBRACE); }
|
30
|
+
"}" { return next_token(state, pRBRACE); }
|
31
|
+
"," { return next_token(state, pCOMMA); }
|
32
|
+
"|" { return next_token(state, pBAR); }
|
33
|
+
"^" { return next_token(state, pHAT); }
|
34
|
+
"&" { return next_token(state, pAMP); }
|
35
|
+
"?" { return next_token(state, pQUESTION); }
|
36
|
+
"*" { return next_token(state, pSTAR); }
|
37
|
+
"**" { return next_token(state, pSTAR2); }
|
38
|
+
"." { return next_token(state, pDOT); }
|
39
|
+
"..." { return next_token(state, pDOT3); }
|
40
|
+
"`" { return next_token(state, tOPERATOR); }
|
41
|
+
"`" [^ :\x00] [^`\x00]* "`" { return next_token(state, tQIDENT); }
|
42
|
+
"->" { return next_token(state, pARROW); }
|
43
|
+
"=>" { return next_token(state, pFATARROW); }
|
44
|
+
"=" { return next_token(state, pEQ); }
|
45
|
+
":" { return next_token(state, pCOLON); }
|
46
|
+
"::" { return next_token(state, pCOLON2); }
|
47
|
+
"<" { return next_token(state, pLT); }
|
48
|
+
operator { return next_token(state, tOPERATOR); }
|
49
|
+
|
50
|
+
number = [0-9] [0-9_]*;
|
51
|
+
("-"|"+")? number { return next_token(state, tINTEGER); }
|
52
|
+
|
53
|
+
"%a{" [^}\x00]* "}" { return next_token(state, tANNOTATION); }
|
54
|
+
"%a(" [^)\x00]* ")" { return next_token(state, tANNOTATION); }
|
55
|
+
"%a[" [^\]\x00]* "]" { return next_token(state, tANNOTATION); }
|
56
|
+
"%a|" [^|\x00]* "|" { return next_token(state, tANNOTATION); }
|
57
|
+
"%a<" [^>\x00]* ">" { return next_token(state, tANNOTATION); }
|
58
|
+
|
59
|
+
"#" (. \ [\x00])* {
|
60
|
+
return next_token(
|
61
|
+
state,
|
62
|
+
state->first_token_of_line ? tLINECOMMENT : tCOMMENT
|
63
|
+
);
|
64
|
+
}
|
65
|
+
|
66
|
+
"alias" { return next_token(state, kALIAS); }
|
67
|
+
"attr_accessor" { return next_token(state, kATTRACCESSOR); }
|
68
|
+
"attr_reader" { return next_token(state, kATTRREADER); }
|
69
|
+
"attr_writer" { return next_token(state, kATTRWRITER); }
|
70
|
+
"bool" { return next_token(state, kBOOL); }
|
71
|
+
"bot" { return next_token(state, kBOT); }
|
72
|
+
"class" { return next_token(state, kCLASS); }
|
73
|
+
"def" { return next_token(state, kDEF); }
|
74
|
+
"end" { return next_token(state, kEND); }
|
75
|
+
"extend" { return next_token(state, kEXTEND); }
|
76
|
+
"false" { return next_token(state, kFALSE); }
|
77
|
+
"in" { return next_token(state, kIN); }
|
78
|
+
"include" { return next_token(state, kINCLUDE); }
|
79
|
+
"instance" { return next_token(state, kINSTANCE); }
|
80
|
+
"interface" { return next_token(state, kINTERFACE); }
|
81
|
+
"module" { return next_token(state, kMODULE); }
|
82
|
+
"nil" { return next_token(state, kNIL); }
|
83
|
+
"out" { return next_token(state, kOUT); }
|
84
|
+
"prepend" { return next_token(state, kPREPEND); }
|
85
|
+
"private" { return next_token(state, kPRIVATE); }
|
86
|
+
"public" { return next_token(state, kPUBLIC); }
|
87
|
+
"self" { return next_token(state, kSELF); }
|
88
|
+
"singleton" { return next_token(state, kSINGLETON); }
|
89
|
+
"top" { return next_token(state, kTOP); }
|
90
|
+
"true" { return next_token(state, kTRUE); }
|
91
|
+
"type" { return next_token(state, kTYPE); }
|
92
|
+
"unchecked" { return next_token(state, kUNCHECKED); }
|
93
|
+
"untyped" { return next_token(state, kUNTYPED); }
|
94
|
+
"void" { return next_token(state, kVOID); }
|
95
|
+
|
96
|
+
dqstring = ["] ("\\"["] | [^"\x00])* ["];
|
97
|
+
sqstring = ['] ("\\"['] | [^'\x00])* ['];
|
98
|
+
|
99
|
+
dqstring { return next_token(state, tDQSTRING); }
|
100
|
+
sqstring { return next_token(state, tSQSTRING); }
|
101
|
+
":" dqstring { return next_token(state, tDQSYMBOL); }
|
102
|
+
":" sqstring { return next_token(state, tSQSYMBOL); }
|
103
|
+
|
104
|
+
identifier = [a-zA-Z_] word* [!?=]?;
|
105
|
+
symbol_opr = ":|" | ":&" | ":/" | ":%" | ":~" | ":`" | ":^"
|
106
|
+
| ":==" | ":=~" | ":===" | ":!" | ":!=" | ":!~"
|
107
|
+
| ":<" | ":<=" | ":<<" | ":<=>" | ":>" | ":>=" | ":>>"
|
108
|
+
| ":-" | ":-@" | ":+" | ":+@" | ":*" | ":**" | ":[]" | ":[]=";
|
109
|
+
|
110
|
+
global_ident = [0-9]+
|
111
|
+
| "-" [a-zA-Z0-9_]
|
112
|
+
| [~*$?!@\\/;,.=:<>"&'`+]
|
113
|
+
| [^ \t\r\n:;=.,!"$%&()-+~|\\'[\]{}*/<>^\x00]+;
|
114
|
+
|
115
|
+
":" identifier { return next_token(state, tSYMBOL); }
|
116
|
+
":@" identifier { return next_token(state, tSYMBOL); }
|
117
|
+
":@@" identifier { return next_token(state, tSYMBOL); }
|
118
|
+
":$" global_ident { return next_token(state, tSYMBOL); }
|
119
|
+
symbol_opr { return next_token(state, tSYMBOL); }
|
120
|
+
|
121
|
+
[a-z] word* { return next_token(state, tLIDENT); }
|
122
|
+
[A-Z] word* { return next_token(state, tUIDENT); }
|
123
|
+
"_" [a-z0-9_] word* { return next_token(state, tULLIDENT); }
|
124
|
+
"_" [A-Z] word* { return next_token(state, tULIDENT); }
|
125
|
+
"_" { return next_token(state, tULLIDENT); }
|
126
|
+
[a-zA-Z_] word* "!" { return next_token(state, tBANGIDENT); }
|
127
|
+
[a-zA-Z_] word* "=" { return next_token(state, tEQIDENT); }
|
128
|
+
|
129
|
+
"@" [a-zA-Z_] word* { return next_token(state, tAIDENT); }
|
130
|
+
"@@" [a-zA-Z_] word* { return next_token(state, tA2IDENT); }
|
131
|
+
|
132
|
+
"$" global_ident { return next_token(state, tGIDENT); }
|
133
|
+
|
134
|
+
skip = [ \t\n]+;
|
135
|
+
|
136
|
+
skip { state->start = state->current; goto start; }
|
137
|
+
"\x00" { return next_token(state, pEOF); }
|
138
|
+
* { return next_token(state, ErrorToken); }
|
139
|
+
*/
|
140
|
+
}
|
@@ -0,0 +1,139 @@
|
|
1
|
+
#include "rbs_extension.h"
|
2
|
+
|
3
|
+
static const char *RBS_TOKENTYPE_NAMES[] = {
|
4
|
+
"NullType",
|
5
|
+
"pEOF",
|
6
|
+
"ErrorToken",
|
7
|
+
|
8
|
+
"pLPAREN", /* ( */
|
9
|
+
"pRPAREN", /* ) */
|
10
|
+
"pCOLON", /* : */
|
11
|
+
"pCOLON2", /* :: */
|
12
|
+
"pLBRACKET", /* [ */
|
13
|
+
"pRBRACKET", /* ] */
|
14
|
+
"pLBRACE", /* { */
|
15
|
+
"pRBRACE", /* } */
|
16
|
+
"pHAT", /* ^ */
|
17
|
+
"pARROW", /* -> */
|
18
|
+
"pFATARROW", /* => */
|
19
|
+
"pCOMMA", /* , */
|
20
|
+
"pBAR", /* | */
|
21
|
+
"pAMP", /* & */
|
22
|
+
"pSTAR", /* * */
|
23
|
+
"pSTAR2", /* ** */
|
24
|
+
"pDOT", /* . */
|
25
|
+
"pDOT3", /* ... */
|
26
|
+
"pBANG", /* ! */
|
27
|
+
"pQUESTION", /* ? */
|
28
|
+
"pLT", /* < */
|
29
|
+
"pEQ", /* = */
|
30
|
+
|
31
|
+
"kALIAS", /* alias */
|
32
|
+
"kATTRACCESSOR", /* attr_accessor */
|
33
|
+
"kATTRREADER", /* attr_reader */
|
34
|
+
"kATTRWRITER", /* attr_writer */
|
35
|
+
"kBOOL", /* bool */
|
36
|
+
"kBOT", /* bot */
|
37
|
+
"kCLASS", /* class */
|
38
|
+
"kDEF", /* def */
|
39
|
+
"kEND", /* end */
|
40
|
+
"kEXTEND", /* extend */
|
41
|
+
"kFALSE", /* kFALSE */
|
42
|
+
"kIN", /* in */
|
43
|
+
"kINCLUDE", /* include */
|
44
|
+
"kINSTANCE", /* instance */
|
45
|
+
"kINTERFACE", /* interface */
|
46
|
+
"kMODULE", /* module */
|
47
|
+
"kNIL", /* nil */
|
48
|
+
"kOUT", /* out */
|
49
|
+
"kPREPEND", /* prepend */
|
50
|
+
"kPRIVATE", /* private */
|
51
|
+
"kPUBLIC", /* public */
|
52
|
+
"kSELF", /* self */
|
53
|
+
"kSINGLETON", /* singleton */
|
54
|
+
"kTOP", /* top */
|
55
|
+
"kTRUE", /* true */
|
56
|
+
"kTYPE", /* type */
|
57
|
+
"kUNCHECKED", /* unchecked */
|
58
|
+
"kUNTYPED", /* untyped */
|
59
|
+
"kVOID", /* void */
|
60
|
+
|
61
|
+
"tLIDENT", /* Identifiers starting with lower case */
|
62
|
+
"tUIDENT", /* Identifiers starting with upper case */
|
63
|
+
"tULIDENT", /* Identifiers starting with `_` */
|
64
|
+
"tULLIDENT",
|
65
|
+
"tGIDENT", /* Identifiers starting with `$` */
|
66
|
+
"tAIDENT", /* Identifiers starting with `@` */
|
67
|
+
"tA2IDENT", /* Identifiers starting with `@@` */
|
68
|
+
"tBANGIDENT",
|
69
|
+
"tEQIDENT",
|
70
|
+
"tQIDENT", /* Quoted identifier */
|
71
|
+
"tOPERATOR", /* Operator identifier */
|
72
|
+
|
73
|
+
"tCOMMENT",
|
74
|
+
"tLINECOMMENT",
|
75
|
+
|
76
|
+
"tDQSTRING", /* Double quoted string */
|
77
|
+
"tSQSTRING", /* Single quoted string */
|
78
|
+
"tINTEGER", /* Integer */
|
79
|
+
"tSYMBOL", /* Symbol */
|
80
|
+
"tDQSYMBOL",
|
81
|
+
"tSQSYMBOL",
|
82
|
+
"tANNOTATION", /* Annotation */
|
83
|
+
};
|
84
|
+
|
85
|
+
token NullToken = { NullType };
|
86
|
+
position NullPosition = { -1, -1, -1, -1 };
|
87
|
+
range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
|
88
|
+
|
89
|
+
const char *token_type_str(enum TokenType type) {
|
90
|
+
return RBS_TOKENTYPE_NAMES[type];
|
91
|
+
}
|
92
|
+
|
93
|
+
int token_chars(token tok) {
|
94
|
+
return tok.range.end.char_pos - tok.range.start.char_pos;
|
95
|
+
}
|
96
|
+
|
97
|
+
int token_bytes(token tok) {
|
98
|
+
return RANGE_BYTES(tok.range);
|
99
|
+
}
|
100
|
+
|
101
|
+
unsigned int peek(lexstate *state) {
|
102
|
+
unsigned int c = rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string));
|
103
|
+
state->last_char = c;
|
104
|
+
return c;
|
105
|
+
}
|
106
|
+
|
107
|
+
token next_token(lexstate *state, enum TokenType type) {
|
108
|
+
token t;
|
109
|
+
|
110
|
+
t.type = type;
|
111
|
+
t.range.start = state->start;
|
112
|
+
t.range.end = state->current;
|
113
|
+
state->start = state->current;
|
114
|
+
state->first_token_of_line = false;
|
115
|
+
|
116
|
+
return t;
|
117
|
+
}
|
118
|
+
|
119
|
+
void skip(lexstate *state) {
|
120
|
+
if (!state->last_char) {
|
121
|
+
peek(state);
|
122
|
+
}
|
123
|
+
int byte_len = rb_enc_codelen(state->last_char, rb_enc_get(state->string));
|
124
|
+
|
125
|
+
state->current.char_pos += 1;
|
126
|
+
state->current.byte_pos += byte_len;
|
127
|
+
|
128
|
+
if (state->last_char == '\n') {
|
129
|
+
state->current.line += 1;
|
130
|
+
state->current.column = 0;
|
131
|
+
state->first_token_of_line = true;
|
132
|
+
} else {
|
133
|
+
state->current.column += 1;
|
134
|
+
}
|
135
|
+
}
|
136
|
+
|
137
|
+
char *peek_token(lexstate *state, token tok) {
|
138
|
+
return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
|
139
|
+
}
|