rbs 1.7.0.beta.2 → 1.7.0.beta.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -29,35 +29,35 @@ enum TokenType {
29
29
  pLT, /* < */
30
30
  pEQ, /* = */
31
31
 
32
+ kALIAS, /* alias */
33
+ kATTRACCESSOR, /* attr_accessor */
34
+ kATTRREADER, /* attr_reader */
35
+ kATTRWRITER, /* attr_writer */
32
36
  kBOOL, /* bool */
33
37
  kBOT, /* bot */
34
38
  kCLASS, /* class */
39
+ kDEF, /* def */
40
+ kEND, /* end */
41
+ kEXTEND, /* extend */
35
42
  kFALSE, /* false */
43
+ kIN, /* in */
44
+ kINCLUDE, /* include */
36
45
  kINSTANCE, /* instance */
37
46
  kINTERFACE, /* interface */
47
+ kMODULE, /* module */
38
48
  kNIL, /* nil */
49
+ kOUT, /* out */
50
+ kPREPEND, /* prepend */
51
+ kPRIVATE, /* private */
52
+ kPUBLIC, /* public */
39
53
  kSELF, /* self */
40
54
  kSINGLETON, /* singleton */
41
55
  kTOP, /* top */
42
56
  kTRUE, /* true */
43
- kVOID, /* void */
44
57
  kTYPE, /* type */
45
58
  kUNCHECKED, /* unchecked */
46
- kIN, /* in */
47
- kOUT, /* out */
48
- kEND, /* end */
49
- kDEF, /* def */
50
- kINCLUDE, /* include */
51
- kEXTEND, /* extend */
52
- kPREPEND, /* prepend */
53
- kALIAS, /* alias */
54
- kMODULE, /* module */
55
- kATTRREADER, /* attr_reader */
56
- kATTRWRITER, /* attr_writer */
57
- kATTRACCESSOR, /* attr_accessor */
58
- kPUBLIC, /* public */
59
- kPRIVATE, /* private */
60
59
  kUNTYPED, /* untyped */
60
+ kVOID, /* void */
61
61
 
62
62
  tLIDENT, /* Identifiers starting with lower case */
63
63
  tUIDENT, /* Identifiers starting with upper case */
@@ -121,14 +121,13 @@ typedef struct {
121
121
  position current; /* The current position */
122
122
  position start; /* The start position of the current token */
123
123
  bool first_token_of_line; /* This flag is used for tLINECOMMENT */
124
+ unsigned int last_char; /* Last peeked character */
124
125
  } lexstate;
125
126
 
126
127
  extern token NullToken;
127
128
  extern position NullPosition;
128
129
  extern range NULL_RANGE;
129
130
 
130
- token rbsparser_next_token(lexstate *state);
131
-
132
131
  char *peek_token(lexstate *state, token tok);
133
132
  int token_chars(token tok);
134
133
  int token_bytes(token tok);
@@ -140,6 +139,23 @@ int token_bytes(token tok);
140
139
 
141
140
  const char *token_type_str(enum TokenType type);
142
141
 
142
+ /**
143
+ * Read next character.
144
+ * */
145
+ unsigned int peek(lexstate *state);
146
+
147
+ /**
148
+ * Skip one character.
149
+ * */
150
+ void skip(lexstate *state);
151
+
152
+ /**
153
+ * Return new token with given type.
154
+ * */
155
+ token next_token(lexstate *state, enum TokenType type);
156
+
157
+ token rbsparser_next_token(lexstate *state);
158
+
143
159
  void print_token(token tok);
144
160
 
145
161
  #endif
@@ -0,0 +1,140 @@
1
+ #include "rbs_extension.h"
2
+
3
+ token rbsparser_next_token(lexstate *state) {
4
+ lexstate backup;
5
+
6
+ start:
7
+ backup = *state;
8
+
9
+ /*!re2c
10
+ re2c:flags:u = 1;
11
+ re2c:api:style = free-form;
12
+ re2c:flags:input = custom;
13
+ re2c:define:YYCTYPE = "unsigned int";
14
+ re2c:define:YYPEEK = "peek(state)";
15
+ re2c:define:YYSKIP = "skip(state);";
16
+ re2c:define:YYBACKUP = "backup = *state;";
17
+ re2c:define:YYRESTORE = "*state = backup;";
18
+ re2c:yyfill:enable = 0;
19
+
20
+ word = [a-zA-Z0-9_];
21
+
22
+ operator = "/" | "~" | "[]" | "[]=" | "!" | "!=" | "!~" | "-" | "-@" | "+" | "+@"
23
+ | "==" | "===" | "=~" | "<<" | "<=" | "<=>" | ">" | ">=" | ">>" | "%";
24
+
25
+ "(" { return next_token(state, pLPAREN); }
26
+ ")" { return next_token(state, pRPAREN); }
27
+ "[" { return next_token(state, pLBRACKET); }
28
+ "]" { return next_token(state, pRBRACKET); }
29
+ "{" { return next_token(state, pLBRACE); }
30
+ "}" { return next_token(state, pRBRACE); }
31
+ "," { return next_token(state, pCOMMA); }
32
+ "|" { return next_token(state, pBAR); }
33
+ "^" { return next_token(state, pHAT); }
34
+ "&" { return next_token(state, pAMP); }
35
+ "?" { return next_token(state, pQUESTION); }
36
+ "*" { return next_token(state, pSTAR); }
37
+ "**" { return next_token(state, pSTAR2); }
38
+ "." { return next_token(state, pDOT); }
39
+ "..." { return next_token(state, pDOT3); }
40
+ "`" { return next_token(state, tOPERATOR); }
41
+ "`" [^ :\x00] [^`\x00]* "`" { return next_token(state, tQIDENT); }
42
+ "->" { return next_token(state, pARROW); }
43
+ "=>" { return next_token(state, pFATARROW); }
44
+ "=" { return next_token(state, pEQ); }
45
+ ":" { return next_token(state, pCOLON); }
46
+ "::" { return next_token(state, pCOLON2); }
47
+ "<" { return next_token(state, pLT); }
48
+ operator { return next_token(state, tOPERATOR); }
49
+
50
+ number = [0-9] [0-9_]*;
51
+ ("-"|"+")? number { return next_token(state, tINTEGER); }
52
+
53
+ "%a{" [^}\x00]* "}" { return next_token(state, tANNOTATION); }
54
+ "%a(" [^)\x00]* ")" { return next_token(state, tANNOTATION); }
55
+ "%a[" [^\]\x00]* "]" { return next_token(state, tANNOTATION); }
56
+ "%a|" [^|\x00]* "|" { return next_token(state, tANNOTATION); }
57
+ "%a<" [^>\x00]* ">" { return next_token(state, tANNOTATION); }
58
+
59
+ "#" (. \ [\x00])* {
60
+ return next_token(
61
+ state,
62
+ state->first_token_of_line ? tLINECOMMENT : tCOMMENT
63
+ );
64
+ }
65
+
66
+ "alias" { return next_token(state, kALIAS); }
67
+ "attr_accessor" { return next_token(state, kATTRACCESSOR); }
68
+ "attr_reader" { return next_token(state, kATTRREADER); }
69
+ "attr_writer" { return next_token(state, kATTRWRITER); }
70
+ "bool" { return next_token(state, kBOOL); }
71
+ "bot" { return next_token(state, kBOT); }
72
+ "class" { return next_token(state, kCLASS); }
73
+ "def" { return next_token(state, kDEF); }
74
+ "end" { return next_token(state, kEND); }
75
+ "extend" { return next_token(state, kEXTEND); }
76
+ "false" { return next_token(state, kFALSE); }
77
+ "in" { return next_token(state, kIN); }
78
+ "include" { return next_token(state, kINCLUDE); }
79
+ "instance" { return next_token(state, kINSTANCE); }
80
+ "interface" { return next_token(state, kINTERFACE); }
81
+ "module" { return next_token(state, kMODULE); }
82
+ "nil" { return next_token(state, kNIL); }
83
+ "out" { return next_token(state, kOUT); }
84
+ "prepend" { return next_token(state, kPREPEND); }
85
+ "private" { return next_token(state, kPRIVATE); }
86
+ "public" { return next_token(state, kPUBLIC); }
87
+ "self" { return next_token(state, kSELF); }
88
+ "singleton" { return next_token(state, kSINGLETON); }
89
+ "top" { return next_token(state, kTOP); }
90
+ "true" { return next_token(state, kTRUE); }
91
+ "type" { return next_token(state, kTYPE); }
92
+ "unchecked" { return next_token(state, kUNCHECKED); }
93
+ "untyped" { return next_token(state, kUNTYPED); }
94
+ "void" { return next_token(state, kVOID); }
95
+
96
+ dqstring = ["] ("\\"["] | [^"\x00])* ["];
97
+ sqstring = ['] ("\\"['] | [^'\x00])* ['];
98
+
99
+ dqstring { return next_token(state, tDQSTRING); }
100
+ sqstring { return next_token(state, tSQSTRING); }
101
+ ":" dqstring { return next_token(state, tDQSYMBOL); }
102
+ ":" sqstring { return next_token(state, tSQSYMBOL); }
103
+
104
+ identifier = [a-zA-Z_] word* [!?=]?;
105
+ symbol_opr = ":|" | ":&" | ":/" | ":%" | ":~" | ":`" | ":^"
106
+ | ":==" | ":=~" | ":===" | ":!" | ":!=" | ":!~"
107
+ | ":<" | ":<=" | ":<<" | ":<=>" | ":>" | ":>=" | ":>>"
108
+ | ":-" | ":-@" | ":+" | ":+@" | ":*" | ":**" | ":[]" | ":[]=";
109
+
110
+ global_ident = [0-9]+
111
+ | "-" [a-zA-Z0-9_]
112
+ | [~*$?!@\\/;,.=:<>"&'`+]
113
+ | [^ \t\r\n:;=.,!"$%&()-+~|\\'[\]{}*/<>^\x00]+;
114
+
115
+ ":" identifier { return next_token(state, tSYMBOL); }
116
+ ":@" identifier { return next_token(state, tSYMBOL); }
117
+ ":@@" identifier { return next_token(state, tSYMBOL); }
118
+ ":$" global_ident { return next_token(state, tSYMBOL); }
119
+ symbol_opr { return next_token(state, tSYMBOL); }
120
+
121
+ [a-z] word* { return next_token(state, tLIDENT); }
122
+ [A-Z] word* { return next_token(state, tUIDENT); }
123
+ "_" [a-z0-9_] word* { return next_token(state, tULLIDENT); }
124
+ "_" [A-Z] word* { return next_token(state, tULIDENT); }
125
+ "_" { return next_token(state, tULLIDENT); }
126
+ [a-zA-Z_] word* "!" { return next_token(state, tBANGIDENT); }
127
+ [a-zA-Z_] word* "=" { return next_token(state, tEQIDENT); }
128
+
129
+ "@" [a-zA-Z_] word* { return next_token(state, tAIDENT); }
130
+ "@@" [a-zA-Z_] word* { return next_token(state, tA2IDENT); }
131
+
132
+ "$" global_ident { return next_token(state, tGIDENT); }
133
+
134
+ skip = [ \t\n]+;
135
+
136
+ skip { state->start = state->current; goto start; }
137
+ "\x00" { return next_token(state, pEOF); }
138
+ * { return next_token(state, ErrorToken); }
139
+ */
140
+ }
@@ -0,0 +1,139 @@
1
+ #include "rbs_extension.h"
2
+
3
+ static const char *RBS_TOKENTYPE_NAMES[] = {
4
+ "NullType",
5
+ "pEOF",
6
+ "ErrorToken",
7
+
8
+ "pLPAREN", /* ( */
9
+ "pRPAREN", /* ) */
10
+ "pCOLON", /* : */
11
+ "pCOLON2", /* :: */
12
+ "pLBRACKET", /* [ */
13
+ "pRBRACKET", /* ] */
14
+ "pLBRACE", /* { */
15
+ "pRBRACE", /* } */
16
+ "pHAT", /* ^ */
17
+ "pARROW", /* -> */
18
+ "pFATARROW", /* => */
19
+ "pCOMMA", /* , */
20
+ "pBAR", /* | */
21
+ "pAMP", /* & */
22
+ "pSTAR", /* * */
23
+ "pSTAR2", /* ** */
24
+ "pDOT", /* . */
25
+ "pDOT3", /* ... */
26
+ "pBANG", /* ! */
27
+ "pQUESTION", /* ? */
28
+ "pLT", /* < */
29
+ "pEQ", /* = */
30
+
31
+ "kALIAS", /* alias */
32
+ "kATTRACCESSOR", /* attr_accessor */
33
+ "kATTRREADER", /* attr_reader */
34
+ "kATTRWRITER", /* attr_writer */
35
+ "kBOOL", /* bool */
36
+ "kBOT", /* bot */
37
+ "kCLASS", /* class */
38
+ "kDEF", /* def */
39
+ "kEND", /* end */
40
+ "kEXTEND", /* extend */
41
+ "kFALSE", /* kFALSE */
42
+ "kIN", /* in */
43
+ "kINCLUDE", /* include */
44
+ "kINSTANCE", /* instance */
45
+ "kINTERFACE", /* interface */
46
+ "kMODULE", /* module */
47
+ "kNIL", /* nil */
48
+ "kOUT", /* out */
49
+ "kPREPEND", /* prepend */
50
+ "kPRIVATE", /* private */
51
+ "kPUBLIC", /* public */
52
+ "kSELF", /* self */
53
+ "kSINGLETON", /* singleton */
54
+ "kTOP", /* top */
55
+ "kTRUE", /* true */
56
+ "kTYPE", /* type */
57
+ "kUNCHECKED", /* unchecked */
58
+ "kUNTYPED", /* untyped */
59
+ "kVOID", /* void */
60
+
61
+ "tLIDENT", /* Identifiers starting with lower case */
62
+ "tUIDENT", /* Identifiers starting with upper case */
63
+ "tULIDENT", /* Identifiers starting with `_` */
64
+ "tULLIDENT",
65
+ "tGIDENT", /* Identifiers starting with `$` */
66
+ "tAIDENT", /* Identifiers starting with `@` */
67
+ "tA2IDENT", /* Identifiers starting with `@@` */
68
+ "tBANGIDENT",
69
+ "tEQIDENT",
70
+ "tQIDENT", /* Quoted identifier */
71
+ "tOPERATOR", /* Operator identifier */
72
+
73
+ "tCOMMENT",
74
+ "tLINECOMMENT",
75
+
76
+ "tDQSTRING", /* Double quoted string */
77
+ "tSQSTRING", /* Single quoted string */
78
+ "tINTEGER", /* Integer */
79
+ "tSYMBOL", /* Symbol */
80
+ "tDQSYMBOL",
81
+ "tSQSYMBOL",
82
+ "tANNOTATION", /* Annotation */
83
+ };
84
+
85
+ token NullToken = { NullType };
86
+ position NullPosition = { -1, -1, -1, -1 };
87
+ range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
88
+
89
+ const char *token_type_str(enum TokenType type) {
90
+ return RBS_TOKENTYPE_NAMES[type];
91
+ }
92
+
93
+ int token_chars(token tok) {
94
+ return tok.range.end.char_pos - tok.range.start.char_pos;
95
+ }
96
+
97
+ int token_bytes(token tok) {
98
+ return RANGE_BYTES(tok.range);
99
+ }
100
+
101
+ unsigned int peek(lexstate *state) {
102
+ unsigned int c = rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string));
103
+ state->last_char = c;
104
+ return c;
105
+ }
106
+
107
+ token next_token(lexstate *state, enum TokenType type) {
108
+ token t;
109
+
110
+ t.type = type;
111
+ t.range.start = state->start;
112
+ t.range.end = state->current;
113
+ state->start = state->current;
114
+ state->first_token_of_line = false;
115
+
116
+ return t;
117
+ }
118
+
119
+ void skip(lexstate *state) {
120
+ if (!state->last_char) {
121
+ peek(state);
122
+ }
123
+ int byte_len = rb_enc_codelen(state->last_char, rb_enc_get(state->string));
124
+
125
+ state->current.char_pos += 1;
126
+ state->current.byte_pos += byte_len;
127
+
128
+ if (state->last_char == '\n') {
129
+ state->current.line += 1;
130
+ state->current.column = 0;
131
+ state->first_token_of_line = true;
132
+ } else {
133
+ state->current.column += 1;
134
+ }
135
+ }
136
+
137
+ char *peek_token(lexstate *state, token tok) {
138
+ return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
139
+ }
@@ -2387,36 +2387,4 @@ void rbs__init_parser() {
2387
2387
  rb_define_singleton_method(RBS_Parser, "_parse_type", rbsparser_parse_type, 4);
2388
2388
  rb_define_singleton_method(RBS_Parser, "_parse_method_type", rbsparser_parse_method_type, 4);
2389
2389
  rb_define_singleton_method(RBS_Parser, "_parse_signature", rbsparser_parse_signature, 3);
2390
-
2391
- RBS_Parser_KEYWORDS = rb_hash_new();
2392
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("bool"), INT2FIX(kBOOL));
2393
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("bot"), INT2FIX(kBOT));
2394
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("class"), INT2FIX(kCLASS));
2395
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("instance"), INT2FIX(kINSTANCE));
2396
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("interface"), INT2FIX(kINTERFACE));
2397
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("nil"), INT2FIX(kNIL));
2398
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("self"), INT2FIX(kSELF));
2399
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("singleton"), INT2FIX(kSINGLETON));
2400
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("top"), INT2FIX(kTOP));
2401
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("void"), INT2FIX(kVOID));
2402
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("type"), INT2FIX(kTYPE));
2403
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("unchecked"), INT2FIX(kUNCHECKED));
2404
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("in"), INT2FIX(kIN));
2405
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("out"), INT2FIX(kOUT));
2406
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("end"), INT2FIX(kEND));
2407
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("def"), INT2FIX(kDEF));
2408
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("include"), INT2FIX(kINCLUDE));
2409
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("extend"), INT2FIX(kEXTEND));
2410
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("prepend"), INT2FIX(kPREPEND));
2411
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("alias"), INT2FIX(kALIAS));
2412
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("module"), INT2FIX(kMODULE));
2413
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("attr_reader"), INT2FIX(kATTRREADER));
2414
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("attr_writer"), INT2FIX(kATTRWRITER));
2415
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("attr_accessor"), INT2FIX(kATTRACCESSOR));
2416
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("public"), INT2FIX(kPUBLIC));
2417
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("private"), INT2FIX(kPRIVATE));
2418
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("untyped"), INT2FIX(kUNTYPED));
2419
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("true"), INT2FIX(kTRUE));
2420
- rb_hash_aset(RBS_Parser_KEYWORDS, rb_str_new_literal("false"), INT2FIX(kFALSE));
2421
- rb_define_const(RBS_Parser, "KEYWORDS", RBS_Parser_KEYWORDS);
2422
2390
  }
@@ -9,11 +9,6 @@
9
9
  * */
10
10
  extern VALUE RBS_Parser;
11
11
 
12
- /**
13
- * RBS::Parser::KEYWORDS constant, which stores a hash from keyword string to token type fixnum
14
- * */
15
- extern VALUE RBS_Parser_KEYWORDS;
16
-
17
12
  VALUE parse_type(parserstate *state);
18
13
  VALUE parse_method_type(parserstate *state);
19
14
  VALUE parse_signature(parserstate *state);