rbs 1.6.2 → 1.7.0.beta.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +0 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +6 -0
- data/Gemfile +1 -0
- data/Rakefile +7 -22
- data/core/kernel.rbs +4 -4
- data/core/trace_point.rbs +1 -1
- data/ext/rbs/extension/constants.c +140 -0
- data/ext/rbs/extension/constants.h +72 -0
- data/ext/rbs/extension/extconf.rb +3 -0
- data/ext/rbs/extension/lexer.c +1070 -0
- data/ext/rbs/extension/lexer.h +145 -0
- data/ext/rbs/extension/location.c +295 -0
- data/ext/rbs/extension/location.h +59 -0
- data/ext/rbs/extension/main.c +9 -0
- data/ext/rbs/extension/parser.c +2418 -0
- data/ext/rbs/extension/parser.h +23 -0
- data/ext/rbs/extension/parserstate.c +313 -0
- data/ext/rbs/extension/parserstate.h +141 -0
- data/ext/rbs/extension/rbs_extension.h +40 -0
- data/ext/rbs/extension/ruby_objs.c +585 -0
- data/ext/rbs/extension/ruby_objs.h +46 -0
- data/ext/rbs/extension/unescape.c +65 -0
- data/goodcheck.yml +1 -1
- data/lib/rbs/ast/comment.rb +0 -12
- data/lib/rbs/buffer.rb +4 -0
- data/lib/rbs/cli.rb +5 -8
- data/lib/rbs/collection/sources/git.rb +18 -3
- data/lib/rbs/errors.rb +14 -1
- data/lib/rbs/location.rb +221 -217
- data/lib/rbs/location_aux.rb +108 -0
- data/lib/rbs/locator.rb +10 -7
- data/lib/rbs/parser_aux.rb +24 -0
- data/lib/rbs/types.rb +2 -3
- data/lib/rbs/version.rb +1 -1
- data/lib/rbs/writer.rb +4 -2
- data/lib/rbs.rb +3 -7
- data/rbs.gemspec +2 -1
- data/sig/ancestor_builder.rbs +2 -2
- data/sig/annotation.rbs +2 -2
- data/sig/comment.rbs +7 -7
- data/sig/constant_table.rbs +1 -1
- data/sig/declarations.rbs +9 -9
- data/sig/definition.rbs +1 -1
- data/sig/definition_builder.rbs +2 -2
- data/sig/errors.rbs +30 -25
- data/sig/location.rbs +42 -79
- data/sig/locator.rbs +2 -2
- data/sig/members.rbs +7 -7
- data/sig/method_types.rbs +3 -3
- data/sig/parser.rbs +11 -21
- data/sig/types.rbs +45 -27
- data/sig/writer.rbs +1 -1
- data/stdlib/json/0/json.rbs +3 -3
- metadata +24 -6
- data/lib/rbs/parser.rb +0 -3614
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f2e7e51d08feb7b1a562ac43c679da8fc6f0a7cfc2ecef65e11c1e8c3d0d2b9
|
4
|
+
data.tar.gz: d41ed33c0b5de01cd1746faa3572e56f651ecfc808c046cdb4166a0ec357138e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bc198dc7d40caaa802604cb1f633098955540d838c4e1adcca635da85880703378d981a4f37c528e7f4b3720fc6f152d7fc6c7bcb11ce3e0abba76fd14a6d47c
|
7
|
+
data.tar.gz: 68c6643f9adf4761bec2a2ffb211e223cb732a37c341652c4625191b5ab6d6791c91196f3c3bc4fd5624d6d853558c33e28f8462f483f923badad8f38498a675
|
data/.github/workflows/ruby.yml
CHANGED
@@ -19,10 +19,6 @@ jobs:
|
|
19
19
|
- test
|
20
20
|
- stdlib_test
|
21
21
|
- rubocop validate test_doc build test_generate_stdlib
|
22
|
-
- confirm_parser
|
23
|
-
exclude:
|
24
|
-
- container_tag: master-nightly-focal
|
25
|
-
job: confirm_parser
|
26
22
|
container:
|
27
23
|
image: rubylang/ruby:${{ matrix.container_tag }}
|
28
24
|
steps:
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
2
|
require "rake/testtask"
|
3
3
|
require "rbconfig"
|
4
|
+
require 'rake/extensiontask'
|
4
5
|
|
5
6
|
$LOAD_PATH << File.join(__dir__, "test")
|
6
7
|
|
7
8
|
ruby = ENV["RUBY"] || RbConfig.ruby
|
8
|
-
racc = ENV.fetch("RACC", "racc")
|
9
9
|
rbs = File.join(__dir__, "exe/rbs")
|
10
10
|
bin = File.join(__dir__, "bin")
|
11
11
|
|
12
|
-
Rake::
|
12
|
+
Rake::ExtensionTask.new("rbs/extension")
|
13
|
+
|
14
|
+
Rake::TestTask.new(:test => :compile) do |t|
|
13
15
|
t.libs << "test"
|
14
16
|
t.libs << "lib"
|
15
17
|
t.test_files = FileList["test/**/*_test.rb"].reject do |path|
|
@@ -19,7 +21,7 @@ end
|
|
19
21
|
|
20
22
|
multitask :default => [:test, :stdlib_test, :rubocop, :validate, :test_doc]
|
21
23
|
|
22
|
-
task :test_doc
|
24
|
+
task :test_doc do
|
23
25
|
files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
24
26
|
`git ls-files -z`.split("\x0").select do |file| Pathname(file).extname == ".md" end
|
25
27
|
end
|
@@ -27,7 +29,7 @@ task :test_doc => :parser do
|
|
27
29
|
sh "#{ruby} #{__dir__}/bin/run_in_md.rb #{files.join(" ")}"
|
28
30
|
end
|
29
31
|
|
30
|
-
task :validate => :
|
32
|
+
task :validate => :compile do
|
31
33
|
sh "#{ruby} #{rbs} validate --silent"
|
32
34
|
|
33
35
|
FileList["stdlib/*"].each do |path|
|
@@ -72,7 +74,7 @@ task :validate => :parser do
|
|
72
74
|
end
|
73
75
|
|
74
76
|
FileList["test/stdlib/**/*_test.rb"].each do |test|
|
75
|
-
task test => :
|
77
|
+
task test => :compile do
|
76
78
|
sh "#{ruby} -Ilib #{bin}/test_runner.rb #{test}"
|
77
79
|
end
|
78
80
|
task stdlib_test: test
|
@@ -82,21 +84,6 @@ task :rubocop do
|
|
82
84
|
sh "rubocop --parallel"
|
83
85
|
end
|
84
86
|
|
85
|
-
rule ".rb" => ".y" do |t|
|
86
|
-
sh "#{racc} -v -o #{t.name} #{t.source}"
|
87
|
-
end
|
88
|
-
|
89
|
-
task :parser => "lib/rbs/parser.rb"
|
90
|
-
task :test => :parser
|
91
|
-
task :stdlib_test => :parser
|
92
|
-
task :build => :parser
|
93
|
-
|
94
|
-
task :confirm_parser do
|
95
|
-
puts "Testing if parser.rb is updated with respect to parser.y"
|
96
|
-
sh "#{racc} -v -o lib/rbs/parser.rb lib/rbs/parser.y"
|
97
|
-
sh "git diff --exit-code lib/rbs/parser.rb"
|
98
|
-
end
|
99
|
-
|
100
87
|
namespace :generate do
|
101
88
|
desc "Generate a test file for a stdlib class signatures"
|
102
89
|
task :stdlib_test, [:class] do |_task, args|
|
@@ -225,5 +212,3 @@ task :test_generate_stdlib do
|
|
225
212
|
sh "RBS_GENERATE_TEST_PATH=/tmp/Array_test.rb rake 'generate:stdlib_test[Array]'"
|
226
213
|
sh "ruby -c /tmp/Array_test.rb"
|
227
214
|
end
|
228
|
-
|
229
|
-
CLEAN.include("lib/rbs/parser.rb")
|
data/core/kernel.rbs
CHANGED
@@ -482,8 +482,8 @@ module Kernel : BasicObject
|
|
482
482
|
| [:child, int] # redirect to the redirected file descriptor
|
483
483
|
| :close # close the file descriptor in child process
|
484
484
|
|
485
|
-
def self?.spawn: (String command, *String args, ?unsetenv_others: boolish, ?pgroup
|
486
|
-
| (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup
|
485
|
+
def self?.spawn: (String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> Integer
|
486
|
+
| (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> Integer
|
487
487
|
|
488
488
|
# Executes *command…* in a subshell. *command…* is one of following forms.
|
489
489
|
#
|
@@ -510,8 +510,8 @@ module Kernel : BasicObject
|
|
510
510
|
# *
|
511
511
|
#
|
512
512
|
# See `Kernel.exec` for the standard shell.
|
513
|
-
def self?.system: (String command, *String args, ?unsetenv_others: boolish, ?pgroup
|
514
|
-
| (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup
|
513
|
+
def self?.system: (String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> (NilClass | FalseClass | TrueClass)
|
514
|
+
| (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> (NilClass | FalseClass | TrueClass)
|
515
515
|
end
|
516
516
|
|
517
517
|
Kernel::RUBYGEMS_ACTIVATION_MONITOR: untyped
|
data/core/trace_point.rbs
CHANGED
@@ -0,0 +1,140 @@
|
|
1
|
+
#include "rbs_extension.h"
|
2
|
+
|
3
|
+
VALUE RBS_Parser;
|
4
|
+
VALUE RBS_Parser_KEYWORDS;
|
5
|
+
|
6
|
+
VALUE RBS;
|
7
|
+
VALUE RBS_AST;
|
8
|
+
VALUE RBS_AST_Comment;
|
9
|
+
VALUE RBS_AST_Annotation;
|
10
|
+
|
11
|
+
VALUE RBS_AST_Declarations;
|
12
|
+
|
13
|
+
VALUE RBS_AST_Declarations_ModuleTypeParams;
|
14
|
+
VALUE RBS_AST_Declarations_ModuleTypeParams_TypeParam;
|
15
|
+
|
16
|
+
VALUE RBS_AST_Declarations_Alias;
|
17
|
+
VALUE RBS_AST_Declarations_Constant;
|
18
|
+
VALUE RBS_AST_Declarations_Global;
|
19
|
+
VALUE RBS_AST_Declarations_Interface;
|
20
|
+
VALUE RBS_AST_Declarations_Module;
|
21
|
+
VALUE RBS_AST_Declarations_Module_Self;
|
22
|
+
VALUE RBS_AST_Declarations_Class;
|
23
|
+
VALUE RBS_AST_Declarations_Class_Super;
|
24
|
+
|
25
|
+
VALUE RBS_AST_Members;
|
26
|
+
VALUE RBS_AST_Members_Alias;
|
27
|
+
VALUE RBS_AST_Members_AttrAccessor;
|
28
|
+
VALUE RBS_AST_Members_AttrReader;
|
29
|
+
VALUE RBS_AST_Members_AttrWriter;
|
30
|
+
VALUE RBS_AST_Members_ClassInstanceVariable;
|
31
|
+
VALUE RBS_AST_Members_ClassVariable;
|
32
|
+
VALUE RBS_AST_Members_Extend;
|
33
|
+
VALUE RBS_AST_Members_Include;
|
34
|
+
VALUE RBS_AST_Members_InstanceVariable;
|
35
|
+
VALUE RBS_AST_Members_MethodDefinition;
|
36
|
+
VALUE RBS_AST_Members_Prepend;
|
37
|
+
VALUE RBS_AST_Members_Private;
|
38
|
+
VALUE RBS_AST_Members_Public;
|
39
|
+
|
40
|
+
VALUE RBS_Namespace;
|
41
|
+
VALUE RBS_TypeName;
|
42
|
+
|
43
|
+
VALUE RBS_Types_Alias;
|
44
|
+
VALUE RBS_Types_Bases_Any;
|
45
|
+
VALUE RBS_Types_Bases_Bool;
|
46
|
+
VALUE RBS_Types_Bases_Bottom;
|
47
|
+
VALUE RBS_Types_Bases_Class;
|
48
|
+
VALUE RBS_Types_Bases_Instance;
|
49
|
+
VALUE RBS_Types_Bases_Nil;
|
50
|
+
VALUE RBS_Types_Bases_Self;
|
51
|
+
VALUE RBS_Types_Bases_Top;
|
52
|
+
VALUE RBS_Types_Bases_Void;
|
53
|
+
VALUE RBS_Types_Bases;
|
54
|
+
VALUE RBS_Types_Block;
|
55
|
+
VALUE RBS_Types_ClassInstance;
|
56
|
+
VALUE RBS_Types_ClassSingleton;
|
57
|
+
VALUE RBS_Types_Function_Param;
|
58
|
+
VALUE RBS_Types_Function;
|
59
|
+
VALUE RBS_Types_Interface;
|
60
|
+
VALUE RBS_Types_Intersection;
|
61
|
+
VALUE RBS_Types_Literal;
|
62
|
+
VALUE RBS_Types_Optional;
|
63
|
+
VALUE RBS_Types_Proc;
|
64
|
+
VALUE RBS_Types_Record;
|
65
|
+
VALUE RBS_Types_Tuple;
|
66
|
+
VALUE RBS_Types_Union;
|
67
|
+
VALUE RBS_Types_Variable;
|
68
|
+
VALUE RBS_Types;
|
69
|
+
VALUE RBS_MethodType;
|
70
|
+
|
71
|
+
VALUE RBS_ParsingError;
|
72
|
+
|
73
|
+
void rbs__init_constants() {
|
74
|
+
ID id_RBS = rb_intern_const("RBS");
|
75
|
+
|
76
|
+
RBS = rb_const_get(rb_cObject, id_RBS);
|
77
|
+
RBS_ParsingError = rb_const_get(RBS, rb_intern("ParsingError"));
|
78
|
+
RBS_AST = rb_const_get(RBS, rb_intern("AST"));
|
79
|
+
RBS_AST_Comment = rb_const_get(RBS_AST, rb_intern("Comment"));
|
80
|
+
RBS_AST_Annotation = rb_const_get(RBS_AST, rb_intern("Annotation"));
|
81
|
+
|
82
|
+
RBS_AST_Declarations = rb_const_get(RBS_AST, rb_intern("Declarations"));
|
83
|
+
|
84
|
+
RBS_AST_Declarations_ModuleTypeParams = rb_const_get(RBS_AST_Declarations, rb_intern("ModuleTypeParams"));
|
85
|
+
RBS_AST_Declarations_ModuleTypeParams_TypeParam = rb_const_get(RBS_AST_Declarations_ModuleTypeParams, rb_intern("TypeParam"));
|
86
|
+
|
87
|
+
RBS_AST_Declarations_Alias = rb_const_get(RBS_AST_Declarations, rb_intern("Alias"));
|
88
|
+
RBS_AST_Declarations_Constant = rb_const_get(RBS_AST_Declarations, rb_intern("Constant"));
|
89
|
+
RBS_AST_Declarations_Global = rb_const_get(RBS_AST_Declarations, rb_intern("Global"));
|
90
|
+
RBS_AST_Declarations_Interface = rb_const_get(RBS_AST_Declarations, rb_intern("Interface"));
|
91
|
+
RBS_AST_Declarations_Module = rb_const_get(RBS_AST_Declarations, rb_intern("Module"));
|
92
|
+
RBS_AST_Declarations_Module_Self = rb_const_get(RBS_AST_Declarations_Module, rb_intern("Self"));
|
93
|
+
RBS_AST_Declarations_Class = rb_const_get(RBS_AST_Declarations, rb_intern("Class"));
|
94
|
+
RBS_AST_Declarations_Class_Super = rb_const_get(RBS_AST_Declarations_Class, rb_intern("Super"));
|
95
|
+
|
96
|
+
RBS_AST_Members = rb_const_get(RBS_AST, rb_intern("Members"));
|
97
|
+
RBS_AST_Members_Alias = rb_const_get(RBS_AST_Members, rb_intern("Alias"));
|
98
|
+
RBS_AST_Members_AttrAccessor = rb_const_get(RBS_AST_Members, rb_intern("AttrAccessor"));
|
99
|
+
RBS_AST_Members_AttrReader = rb_const_get(RBS_AST_Members, rb_intern("AttrReader"));
|
100
|
+
RBS_AST_Members_AttrWriter = rb_const_get(RBS_AST_Members, rb_intern("AttrWriter"));
|
101
|
+
RBS_AST_Members_ClassInstanceVariable = rb_const_get(RBS_AST_Members, rb_intern("ClassInstanceVariable"));
|
102
|
+
RBS_AST_Members_ClassVariable = rb_const_get(RBS_AST_Members, rb_intern("ClassVariable"));
|
103
|
+
RBS_AST_Members_Extend = rb_const_get(RBS_AST_Members, rb_intern("Extend"));
|
104
|
+
RBS_AST_Members_Include = rb_const_get(RBS_AST_Members, rb_intern("Include"));
|
105
|
+
RBS_AST_Members_InstanceVariable = rb_const_get(RBS_AST_Members, rb_intern("InstanceVariable"));
|
106
|
+
RBS_AST_Members_MethodDefinition = rb_const_get(RBS_AST_Members, rb_intern("MethodDefinition"));
|
107
|
+
RBS_AST_Members_Prepend = rb_const_get(RBS_AST_Members, rb_intern("Prepend"));
|
108
|
+
RBS_AST_Members_Private = rb_const_get(RBS_AST_Members, rb_intern("Private"));
|
109
|
+
RBS_AST_Members_Public = rb_const_get(RBS_AST_Members, rb_intern("Public"));
|
110
|
+
|
111
|
+
RBS_Namespace = rb_const_get(RBS, rb_intern("Namespace"));
|
112
|
+
RBS_TypeName = rb_const_get(RBS, rb_intern("TypeName"));
|
113
|
+
RBS_Types = rb_const_get(RBS, rb_intern("Types"));
|
114
|
+
RBS_Types_Alias = rb_const_get(RBS_Types, rb_intern("Alias"));
|
115
|
+
RBS_Types_Bases = rb_const_get(RBS_Types, rb_intern("Bases"));
|
116
|
+
RBS_Types_Bases_Any = rb_const_get(RBS_Types_Bases, rb_intern("Any"));
|
117
|
+
RBS_Types_Bases_Bool = rb_const_get(RBS_Types_Bases, rb_intern("Bool"));
|
118
|
+
RBS_Types_Bases_Bottom = rb_const_get(RBS_Types_Bases, rb_intern("Bottom"));
|
119
|
+
RBS_Types_Bases_Class = rb_const_get(RBS_Types_Bases, rb_intern("Class"));
|
120
|
+
RBS_Types_Bases_Instance = rb_const_get(RBS_Types_Bases, rb_intern("Instance"));
|
121
|
+
RBS_Types_Bases_Nil = rb_const_get(RBS_Types_Bases, rb_intern("Nil"));
|
122
|
+
RBS_Types_Bases_Self = rb_const_get(RBS_Types_Bases, rb_intern("Self"));
|
123
|
+
RBS_Types_Bases_Top = rb_const_get(RBS_Types_Bases, rb_intern("Top"));
|
124
|
+
RBS_Types_Bases_Void = rb_const_get(RBS_Types_Bases, rb_intern("Void"));
|
125
|
+
RBS_Types_Block = rb_const_get(RBS_Types, rb_intern("Block"));
|
126
|
+
RBS_Types_ClassInstance = rb_const_get(RBS_Types, rb_intern("ClassInstance"));
|
127
|
+
RBS_Types_ClassSingleton = rb_const_get(RBS_Types, rb_intern("ClassSingleton"));
|
128
|
+
RBS_Types_Function = rb_const_get(RBS_Types, rb_intern("Function"));
|
129
|
+
RBS_Types_Function_Param = rb_const_get(RBS_Types_Function, rb_intern("Param"));
|
130
|
+
RBS_Types_Interface = rb_const_get(RBS_Types, rb_intern("Interface"));
|
131
|
+
RBS_Types_Intersection = rb_const_get(RBS_Types, rb_intern("Intersection"));
|
132
|
+
RBS_Types_Literal = rb_const_get(RBS_Types, rb_intern("Literal"));
|
133
|
+
RBS_Types_Optional = rb_const_get(RBS_Types, rb_intern("Optional"));
|
134
|
+
RBS_Types_Proc = rb_const_get(RBS_Types, rb_intern("Proc"));
|
135
|
+
RBS_Types_Record = rb_const_get(RBS_Types, rb_intern("Record"));
|
136
|
+
RBS_Types_Tuple = rb_const_get(RBS_Types, rb_intern("Tuple"));
|
137
|
+
RBS_Types_Union = rb_const_get(RBS_Types, rb_intern("Union"));
|
138
|
+
RBS_Types_Variable = rb_const_get(RBS_Types, rb_intern("Variable"));
|
139
|
+
RBS_MethodType = rb_const_get(RBS, rb_intern("MethodType"));
|
140
|
+
}
|
@@ -0,0 +1,72 @@
|
|
1
|
+
#ifndef RBS__CONSTANTS_H
|
2
|
+
#define RBS__CONSTANTS_H
|
3
|
+
|
4
|
+
extern VALUE RBS;
|
5
|
+
|
6
|
+
extern VALUE RBS_AST;
|
7
|
+
extern VALUE RBS_AST_Annotation;
|
8
|
+
extern VALUE RBS_AST_Comment;
|
9
|
+
|
10
|
+
extern VALUE RBS_AST_Declarations;
|
11
|
+
extern VALUE RBS_AST_Declarations_Alias;
|
12
|
+
extern VALUE RBS_AST_Declarations_Class_Super;
|
13
|
+
extern VALUE RBS_AST_Declarations_Class;
|
14
|
+
extern VALUE RBS_AST_Declarations_Constant;
|
15
|
+
extern VALUE RBS_AST_Declarations_Global;
|
16
|
+
extern VALUE RBS_AST_Declarations_Interface;
|
17
|
+
extern VALUE RBS_AST_Declarations_Module_Self;
|
18
|
+
extern VALUE RBS_AST_Declarations_Module;
|
19
|
+
extern VALUE RBS_AST_Declarations_ModuleTypeParams_TypeParam;
|
20
|
+
extern VALUE RBS_AST_Declarations_ModuleTypeParams;
|
21
|
+
|
22
|
+
extern VALUE RBS_AST_Members;
|
23
|
+
extern VALUE RBS_AST_Members_Alias;
|
24
|
+
extern VALUE RBS_AST_Members_AttrAccessor;
|
25
|
+
extern VALUE RBS_AST_Members_AttrReader;
|
26
|
+
extern VALUE RBS_AST_Members_AttrWriter;
|
27
|
+
extern VALUE RBS_AST_Members_ClassInstanceVariable;
|
28
|
+
extern VALUE RBS_AST_Members_ClassVariable;
|
29
|
+
extern VALUE RBS_AST_Members_Extend;
|
30
|
+
extern VALUE RBS_AST_Members_Include;
|
31
|
+
extern VALUE RBS_AST_Members_InstanceVariable;
|
32
|
+
extern VALUE RBS_AST_Members_MethodDefinition;
|
33
|
+
extern VALUE RBS_AST_Members_Prepend;
|
34
|
+
extern VALUE RBS_AST_Members_Private;
|
35
|
+
extern VALUE RBS_AST_Members_Public;
|
36
|
+
|
37
|
+
extern VALUE RBS_MethodType;
|
38
|
+
extern VALUE RBS_Namespace;
|
39
|
+
|
40
|
+
extern VALUE RBS_ParsingError;
|
41
|
+
extern VALUE RBS_TypeName;
|
42
|
+
|
43
|
+
extern VALUE RBS_Types;
|
44
|
+
extern VALUE RBS_Types_Alias;
|
45
|
+
extern VALUE RBS_Types_Bases;
|
46
|
+
extern VALUE RBS_Types_Bases_Any;
|
47
|
+
extern VALUE RBS_Types_Bases_Bool;
|
48
|
+
extern VALUE RBS_Types_Bases_Bottom;
|
49
|
+
extern VALUE RBS_Types_Bases_Class;
|
50
|
+
extern VALUE RBS_Types_Bases_Instance;
|
51
|
+
extern VALUE RBS_Types_Bases_Nil;
|
52
|
+
extern VALUE RBS_Types_Bases_Self;
|
53
|
+
extern VALUE RBS_Types_Bases_Top;
|
54
|
+
extern VALUE RBS_Types_Bases_Void;
|
55
|
+
extern VALUE RBS_Types_Block;
|
56
|
+
extern VALUE RBS_Types_ClassInstance;
|
57
|
+
extern VALUE RBS_Types_ClassSingleton;
|
58
|
+
extern VALUE RBS_Types_Function_Param;
|
59
|
+
extern VALUE RBS_Types_Function;
|
60
|
+
extern VALUE RBS_Types_Interface;
|
61
|
+
extern VALUE RBS_Types_Intersection;
|
62
|
+
extern VALUE RBS_Types_Literal;
|
63
|
+
extern VALUE RBS_Types_Optional;
|
64
|
+
extern VALUE RBS_Types_Proc;
|
65
|
+
extern VALUE RBS_Types_Record;
|
66
|
+
extern VALUE RBS_Types_Tuple;
|
67
|
+
extern VALUE RBS_Types_Union;
|
68
|
+
extern VALUE RBS_Types_Variable;
|
69
|
+
|
70
|
+
void rbs__init_constants();
|
71
|
+
|
72
|
+
#endif
|
@@ -0,0 +1,1070 @@
|
|
1
|
+
#include "rbs_extension.h"
|
2
|
+
|
3
|
+
#define ONE_CHAR_PATTERN(c, t) case c: tok = next_token(state, t); break
|
4
|
+
|
5
|
+
/**
|
6
|
+
* Returns one character at current.
|
7
|
+
*
|
8
|
+
* ... A B C ...
|
9
|
+
* ^ current => A
|
10
|
+
* */
|
11
|
+
#define peek(state) rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string))
|
12
|
+
|
13
|
+
static const char *RBS_TOKENTYPE_NAMES[] = {
|
14
|
+
"NullType",
|
15
|
+
"pEOF",
|
16
|
+
"ErrorToken",
|
17
|
+
|
18
|
+
"pLPAREN", /* ( */
|
19
|
+
"pRPAREN", /* ) */
|
20
|
+
"pCOLON", /* : */
|
21
|
+
"pCOLON2", /* :: */
|
22
|
+
"pLBRACKET", /* [ */
|
23
|
+
"pRBRACKET", /* ] */
|
24
|
+
"pLBRACE", /* { */
|
25
|
+
"pRBRACE", /* } */
|
26
|
+
"pHAT", /* ^ */
|
27
|
+
"pARROW", /* -> */
|
28
|
+
"pFATARROW", /* => */
|
29
|
+
"pCOMMA", /* , */
|
30
|
+
"pBAR", /* | */
|
31
|
+
"pAMP", /* & */
|
32
|
+
"pSTAR", /* * */
|
33
|
+
"pSTAR2", /* ** */
|
34
|
+
"pDOT", /* . */
|
35
|
+
"pDOT3", /* ... */
|
36
|
+
"pBANG", /* ! */
|
37
|
+
"pQUESTION", /* ? */
|
38
|
+
"pLT", /* < */
|
39
|
+
"pEQ", /* = */
|
40
|
+
|
41
|
+
"kBOOL", /* bool */
|
42
|
+
"kBOT", /* bot */
|
43
|
+
"kCLASS", /* class */
|
44
|
+
"kFALSE", /* kFALSE */
|
45
|
+
"kINSTANCE", /* instance */
|
46
|
+
"kINTERFACE", /* interface */
|
47
|
+
"kNIL", /* nil */
|
48
|
+
"kSELF", /* self */
|
49
|
+
"kSINGLETON", /* singleton */
|
50
|
+
"kTOP", /* top */
|
51
|
+
"kTRUE", /* true */
|
52
|
+
"kVOID", /* void */
|
53
|
+
"kTYPE", /* type */
|
54
|
+
"kUNCHECKED", /* unchecked */
|
55
|
+
"kIN", /* in */
|
56
|
+
"kOUT", /* out */
|
57
|
+
"kEND", /* end */
|
58
|
+
"kDEF", /* def */
|
59
|
+
"kINCLUDE", /* include */
|
60
|
+
"kEXTEND", /* extend */
|
61
|
+
"kPREPEND", /* prepend */
|
62
|
+
"kALIAS", /* alias */
|
63
|
+
"kMODULE", /* module */
|
64
|
+
"kATTRREADER", /* attr_reader */
|
65
|
+
"kATTRWRITER", /* attr_writer */
|
66
|
+
"kATTRACCESSOR", /* attr_accessor */
|
67
|
+
"kPUBLIC", /* public */
|
68
|
+
"kPRIVATE", /* private */
|
69
|
+
"kUNTYPED", /* untyped */
|
70
|
+
|
71
|
+
"tLIDENT", /* Identifiers starting with lower case */
|
72
|
+
"tUIDENT", /* Identifiers starting with upper case */
|
73
|
+
"tULIDENT", /* Identifiers starting with `_` */
|
74
|
+
"tULLIDENT",
|
75
|
+
"tGIDENT", /* Identifiers starting with `$` */
|
76
|
+
"tAIDENT", /* Identifiers starting with `@` */
|
77
|
+
"tA2IDENT", /* Identifiers starting with `@@` */
|
78
|
+
"tBANGIDENT",
|
79
|
+
"tEQIDENT",
|
80
|
+
"tQIDENT", /* Quoted identifier */
|
81
|
+
"tOPERATOR", /* Operator identifier */
|
82
|
+
|
83
|
+
"tCOMMENT",
|
84
|
+
"tLINECOMMENT",
|
85
|
+
|
86
|
+
"tDQSTRING", /* Double quoted string */
|
87
|
+
"tSQSTRING", /* Single quoted string */
|
88
|
+
"tINTEGER", /* Integer */
|
89
|
+
"tSYMBOL", /* Symbol */
|
90
|
+
"tDQSYMBOL",
|
91
|
+
"tSQSYMBOL",
|
92
|
+
"tANNOTATION", /* Annotation */
|
93
|
+
};
|
94
|
+
|
95
|
+
token NullToken = { NullType };
|
96
|
+
position NullPosition = { -1, -1, -1, -1 };
|
97
|
+
range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
|
98
|
+
|
99
|
+
const char *token_type_str(enum TokenType type) {
|
100
|
+
return RBS_TOKENTYPE_NAMES[type];
|
101
|
+
}
|
102
|
+
|
103
|
+
unsigned int peekn(lexstate *state, unsigned int chars[], size_t length) {
|
104
|
+
int byteoffset = 0;
|
105
|
+
|
106
|
+
rb_encoding *encoding = rb_enc_get(state->string);
|
107
|
+
char *start = RSTRING_PTR(state->string) + state->current.byte_pos;
|
108
|
+
char *end = RSTRING_END(state->string);
|
109
|
+
|
110
|
+
for (size_t i = 0; i < length; i++)
|
111
|
+
{
|
112
|
+
chars[i] = rb_enc_mbc_to_codepoint(start + byteoffset, end, encoding);
|
113
|
+
byteoffset += rb_enc_codelen(chars[i], rb_enc_get(state->string));
|
114
|
+
}
|
115
|
+
|
116
|
+
return byteoffset;
|
117
|
+
}
|
118
|
+
|
119
|
+
int token_chars(token tok) {
|
120
|
+
return tok.range.end.char_pos - tok.range.start.char_pos;
|
121
|
+
}
|
122
|
+
|
123
|
+
int token_bytes(token tok) {
|
124
|
+
return RANGE_BYTES(tok.range);
|
125
|
+
}
|
126
|
+
|
127
|
+
/**
|
128
|
+
* ... token ...
|
129
|
+
* ^ start
|
130
|
+
* ^ current
|
131
|
+
*
|
132
|
+
* */
|
133
|
+
token next_token(lexstate *state, enum TokenType type) {
|
134
|
+
token t;
|
135
|
+
|
136
|
+
t.type = type;
|
137
|
+
t.range.start = state->start;
|
138
|
+
t.range.end = state->current;
|
139
|
+
state->start = state->current;
|
140
|
+
state->first_token_of_line = false;
|
141
|
+
|
142
|
+
return t;
|
143
|
+
}
|
144
|
+
|
145
|
+
void advance_skip(lexstate *state, unsigned int c, bool skip) {
|
146
|
+
int len = rb_enc_codelen(c, rb_enc_get(state->string));
|
147
|
+
|
148
|
+
state->current.char_pos += 1;
|
149
|
+
state->current.byte_pos += len;
|
150
|
+
|
151
|
+
if (c == '\n') {
|
152
|
+
state->current.line += 1;
|
153
|
+
state->current.column = 0;
|
154
|
+
state->first_token_of_line = true;
|
155
|
+
} else {
|
156
|
+
state->current.column += 1;
|
157
|
+
}
|
158
|
+
|
159
|
+
if (skip) {
|
160
|
+
state->start = state->current;
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
void advance_char(lexstate *state, unsigned int c) {
|
165
|
+
advance_skip(state, c, false);
|
166
|
+
}
|
167
|
+
|
168
|
+
void skip_char(lexstate *state, unsigned int c) {
|
169
|
+
advance_skip(state, c, true);
|
170
|
+
}
|
171
|
+
|
172
|
+
void skip(lexstate *state) {
|
173
|
+
unsigned char c = peek(state);
|
174
|
+
skip_char(state, c);
|
175
|
+
}
|
176
|
+
|
177
|
+
void advance(lexstate *state) {
|
178
|
+
unsigned char c = peek(state);
|
179
|
+
advance_char(state, c);
|
180
|
+
}
|
181
|
+
|
182
|
+
/*
|
183
|
+
1. Peek one character from state
|
184
|
+
2. If read characetr equals to given `c`, skip the character and return true.
|
185
|
+
3. Return false otherwise.
|
186
|
+
*/
|
187
|
+
static bool advance_next_character_if(lexstate *state, unsigned int c) {
|
188
|
+
if (peek(state) == c) {
|
189
|
+
advance_char(state, c);
|
190
|
+
return true;
|
191
|
+
} else {
|
192
|
+
return false;
|
193
|
+
}
|
194
|
+
}
|
195
|
+
|
196
|
+
/*
|
197
|
+
... 0 1 ...
|
198
|
+
^ current
|
199
|
+
^ current (return)
|
200
|
+
*/
|
201
|
+
static token lex_number(lexstate *state) {
|
202
|
+
unsigned int c;
|
203
|
+
|
204
|
+
while (true) {
|
205
|
+
c = peek(state);
|
206
|
+
|
207
|
+
if (rb_isdigit(c) || c == '_') {
|
208
|
+
advance_char(state, c);
|
209
|
+
} else {
|
210
|
+
break;
|
211
|
+
}
|
212
|
+
}
|
213
|
+
|
214
|
+
return next_token(state, tINTEGER);
|
215
|
+
}
|
216
|
+
|
217
|
+
/*
|
218
|
+
lex_hyphen ::= - (tOPERATOR)
|
219
|
+
| - @ (tOPERATOR)
|
220
|
+
| - > (pARROW)
|
221
|
+
| - 1 ... (tINTEGER)
|
222
|
+
*/
|
223
|
+
static token lex_hyphen(lexstate* state) {
|
224
|
+
if (advance_next_character_if(state, '>')) {
|
225
|
+
return next_token(state, pARROW);
|
226
|
+
} else if (advance_next_character_if(state, '@')) {
|
227
|
+
return next_token(state, tOPERATOR);
|
228
|
+
} else {
|
229
|
+
unsigned int c = peek(state);
|
230
|
+
|
231
|
+
if (rb_isdigit(c)) {
|
232
|
+
advance_char(state, c);
|
233
|
+
return lex_number(state);
|
234
|
+
} else {
|
235
|
+
return next_token(state, tOPERATOR);
|
236
|
+
}
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
/*
|
241
|
+
lex_plus ::= +
|
242
|
+
| + @
|
243
|
+
| + \d
|
244
|
+
*/
|
245
|
+
static token lex_plus(lexstate *state) {
|
246
|
+
if (advance_next_character_if(state, '@')) {
|
247
|
+
return next_token(state, tOPERATOR);
|
248
|
+
} else if (rb_isdigit(peek(state))) {
|
249
|
+
return lex_number(state);
|
250
|
+
} else {
|
251
|
+
return next_token(state, tOPERATOR);
|
252
|
+
}
|
253
|
+
}
|
254
|
+
|
255
|
+
/*
|
256
|
+
lex_dot ::= . pDOT
|
257
|
+
| . . . pDOT3
|
258
|
+
*/
|
259
|
+
static token lex_dot(lexstate *state) {
|
260
|
+
unsigned int cs[2];
|
261
|
+
|
262
|
+
peekn(state, cs, 2);
|
263
|
+
|
264
|
+
if (cs[0] == '.' && cs[1] == '.') {
|
265
|
+
advance_char(state, '.');
|
266
|
+
advance_char(state, '.');
|
267
|
+
return next_token(state, pDOT3);
|
268
|
+
} else {
|
269
|
+
return next_token(state, pDOT);
|
270
|
+
}
|
271
|
+
}
|
272
|
+
|
273
|
+
/*
|
274
|
+
lex_eq ::= =
|
275
|
+
| ==
|
276
|
+
| ===
|
277
|
+
| =~
|
278
|
+
| =>
|
279
|
+
*/
|
280
|
+
static token lex_eq(lexstate *state) {
|
281
|
+
unsigned int cs[2];
|
282
|
+
peekn(state, cs, 2);
|
283
|
+
|
284
|
+
if (cs[0] == '=' && cs[1] == '=') {
|
285
|
+
// ===
|
286
|
+
advance_char(state, cs[0]);
|
287
|
+
advance_char(state, cs[1]);
|
288
|
+
return next_token(state, tOPERATOR);
|
289
|
+
} else if (cs[0] == '=') {
|
290
|
+
// ==
|
291
|
+
advance_char(state, cs[0]);
|
292
|
+
return next_token(state, tOPERATOR);
|
293
|
+
} else if (cs[0] == '~') {
|
294
|
+
// =~
|
295
|
+
advance_char(state, cs[0]);
|
296
|
+
return next_token(state, tOPERATOR);
|
297
|
+
} else if (cs[0] == '>') {
|
298
|
+
// =>
|
299
|
+
advance_char(state, cs[0]);
|
300
|
+
return next_token(state, pFATARROW);
|
301
|
+
} else {
|
302
|
+
return next_token(state, pEQ);
|
303
|
+
}
|
304
|
+
}
|
305
|
+
|
306
|
+
/*
|
307
|
+
underscore ::= _A tULIDENT
|
308
|
+
| _a tULLIDENT
|
309
|
+
| _ tULLIDENT
|
310
|
+
*/
|
311
|
+
static token lex_underscore(lexstate *state) {
|
312
|
+
unsigned int c;
|
313
|
+
|
314
|
+
c = peek(state);
|
315
|
+
|
316
|
+
if ('A' <= c && c <= 'Z') {
|
317
|
+
advance_char(state, c);
|
318
|
+
|
319
|
+
while (true) {
|
320
|
+
c = peek(state);
|
321
|
+
|
322
|
+
if (rb_isalnum(c) || c == '_') {
|
323
|
+
// ok
|
324
|
+
advance_char(state, c);
|
325
|
+
} else {
|
326
|
+
break;
|
327
|
+
}
|
328
|
+
}
|
329
|
+
|
330
|
+
return next_token(state, tULIDENT);
|
331
|
+
} else if (rb_isalnum(c) || c == '_') {
|
332
|
+
advance_char(state, c);
|
333
|
+
|
334
|
+
while (true) {
|
335
|
+
c = peek(state);
|
336
|
+
|
337
|
+
if (rb_isalnum(c) || c == '_') {
|
338
|
+
// ok
|
339
|
+
advance_char(state, c);
|
340
|
+
} else {
|
341
|
+
break;
|
342
|
+
}
|
343
|
+
}
|
344
|
+
|
345
|
+
if (c == '!') {
|
346
|
+
advance_char(state, c);
|
347
|
+
return next_token(state, tBANGIDENT);
|
348
|
+
} else if (c == '=') {
|
349
|
+
advance_char(state, c);
|
350
|
+
return next_token(state, tEQIDENT);
|
351
|
+
} else {
|
352
|
+
return next_token(state, tULLIDENT);
|
353
|
+
}
|
354
|
+
} else {
|
355
|
+
return next_token(state, tULLIDENT);
|
356
|
+
}
|
357
|
+
}
|
358
|
+
|
359
|
+
static bool is_opr(unsigned int c) {
|
360
|
+
switch (c) {
|
361
|
+
case ':':
|
362
|
+
case ';':
|
363
|
+
case '=':
|
364
|
+
case '.':
|
365
|
+
case ',':
|
366
|
+
case '!':
|
367
|
+
case '"':
|
368
|
+
case '$':
|
369
|
+
case '%':
|
370
|
+
case '&':
|
371
|
+
case '(':
|
372
|
+
case ')':
|
373
|
+
case '-':
|
374
|
+
case '+':
|
375
|
+
case '~':
|
376
|
+
case '|':
|
377
|
+
case '\\':
|
378
|
+
case '\'':
|
379
|
+
case '[':
|
380
|
+
case ']':
|
381
|
+
case '{':
|
382
|
+
case '}':
|
383
|
+
case '*':
|
384
|
+
case '/':
|
385
|
+
case '<':
|
386
|
+
case '>':
|
387
|
+
case '^':
|
388
|
+
return true;
|
389
|
+
default:
|
390
|
+
return false;
|
391
|
+
}
|
392
|
+
}
|
393
|
+
|
394
|
+
static token lex_global(lexstate *state) {
|
395
|
+
unsigned int c;
|
396
|
+
|
397
|
+
c = peek(state);
|
398
|
+
|
399
|
+
if (rb_isspace(c) || c == 0) {
|
400
|
+
return next_token(state, ErrorToken);
|
401
|
+
}
|
402
|
+
|
403
|
+
if (rb_isdigit(c)) {
|
404
|
+
// `$` [`0`-`9`]+
|
405
|
+
advance_char(state, c);
|
406
|
+
|
407
|
+
while (true) {
|
408
|
+
c = peek(state);
|
409
|
+
if (rb_isdigit(c)) {
|
410
|
+
advance_char(state, c);
|
411
|
+
} else {
|
412
|
+
return next_token(state, tGIDENT);
|
413
|
+
}
|
414
|
+
}
|
415
|
+
}
|
416
|
+
|
417
|
+
if (c == '-') {
|
418
|
+
// `$` `-` [a-zA-Z0-9_]
|
419
|
+
advance_char(state, c);
|
420
|
+
c = peek(state);
|
421
|
+
|
422
|
+
if (rb_isalnum(c) || c == '_') {
|
423
|
+
advance_char(state, c);
|
424
|
+
return next_token(state, tGIDENT);
|
425
|
+
} else {
|
426
|
+
return next_token(state, ErrorToken);
|
427
|
+
}
|
428
|
+
}
|
429
|
+
|
430
|
+
switch (c) {
|
431
|
+
case '~':
|
432
|
+
case '*':
|
433
|
+
case '$':
|
434
|
+
case '?':
|
435
|
+
case '!':
|
436
|
+
case '@':
|
437
|
+
case '\\':
|
438
|
+
case '/':
|
439
|
+
case ';':
|
440
|
+
case ',':
|
441
|
+
case '.':
|
442
|
+
case '=':
|
443
|
+
case ':':
|
444
|
+
case '<':
|
445
|
+
case '>':
|
446
|
+
case '"':
|
447
|
+
case '&':
|
448
|
+
case '\'':
|
449
|
+
case '`':
|
450
|
+
case '+':
|
451
|
+
advance_char(state, c);
|
452
|
+
return next_token(state, tGIDENT);
|
453
|
+
|
454
|
+
default:
|
455
|
+
if (is_opr(c) || c == 0) {
|
456
|
+
return next_token(state, ErrorToken);
|
457
|
+
}
|
458
|
+
|
459
|
+
while (true) {
|
460
|
+
advance_char(state, c);
|
461
|
+
c = peek(state);
|
462
|
+
|
463
|
+
if (rb_isspace(c) || is_opr(c) || c == 0) {
|
464
|
+
break;
|
465
|
+
}
|
466
|
+
}
|
467
|
+
|
468
|
+
return next_token(state, tGIDENT);
|
469
|
+
}
|
470
|
+
}
|
471
|
+
|
472
|
+
void pp(VALUE object) {
|
473
|
+
VALUE inspect = rb_funcall(object, rb_intern("inspect"), 0);
|
474
|
+
printf("pp >> %s\n", RSTRING_PTR(inspect));
|
475
|
+
}
|
476
|
+
|
477
|
+
static token lex_ident(lexstate *state, enum TokenType default_type) {
|
478
|
+
unsigned int c;
|
479
|
+
token tok;
|
480
|
+
|
481
|
+
while (true) {
|
482
|
+
c = peek(state);
|
483
|
+
if (rb_isalnum(c) || c == '_') {
|
484
|
+
advance_char(state, c);
|
485
|
+
} else if (c == '!') {
|
486
|
+
advance_char(state, c);
|
487
|
+
tok = next_token(state, tBANGIDENT);
|
488
|
+
break;
|
489
|
+
} else if (c == '=') {
|
490
|
+
advance_char(state, c);
|
491
|
+
tok = next_token(state, tEQIDENT);
|
492
|
+
break;
|
493
|
+
} else {
|
494
|
+
tok = next_token(state, default_type);
|
495
|
+
break;
|
496
|
+
}
|
497
|
+
}
|
498
|
+
|
499
|
+
if (tok.type == tLIDENT) {
|
500
|
+
VALUE string = rb_enc_str_new(
|
501
|
+
RSTRING_PTR(state->string) + tok.range.start.byte_pos,
|
502
|
+
RANGE_BYTES(tok.range),
|
503
|
+
rb_enc_get(state->string)
|
504
|
+
);
|
505
|
+
|
506
|
+
VALUE type = rb_hash_aref(RBS_Parser_KEYWORDS, string);
|
507
|
+
if (FIXNUM_P(type)) {
|
508
|
+
tok.type = FIX2INT(type);
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
return tok;
|
513
|
+
}
|
514
|
+
|
515
|
+
static token lex_comment(lexstate *state, enum TokenType type) {
|
516
|
+
unsigned int c;
|
517
|
+
|
518
|
+
c = peek(state);
|
519
|
+
if (c == ' ') {
|
520
|
+
advance_char(state, c);
|
521
|
+
}
|
522
|
+
|
523
|
+
while (true) {
|
524
|
+
c = peek(state);
|
525
|
+
|
526
|
+
if (c == '\n' || c == '\0') {
|
527
|
+
break;
|
528
|
+
} else {
|
529
|
+
advance_char(state, c);
|
530
|
+
}
|
531
|
+
}
|
532
|
+
|
533
|
+
token tok = next_token(state, type);
|
534
|
+
|
535
|
+
skip_char(state, c);
|
536
|
+
|
537
|
+
return tok;
|
538
|
+
}
|
539
|
+
|
540
|
+
/*
|
541
|
+
... " ... " ...
|
542
|
+
^ start
|
543
|
+
^ current
|
544
|
+
^ current (after)
|
545
|
+
*/
|
546
|
+
static token lex_dqstring(lexstate *state) {
|
547
|
+
unsigned int c;
|
548
|
+
|
549
|
+
while (true) {
|
550
|
+
c = peek(state);
|
551
|
+
advance_char(state, c);
|
552
|
+
|
553
|
+
if (c == '\\') {
|
554
|
+
if (peek(state) == '"') {
|
555
|
+
advance_char(state, c);
|
556
|
+
c = peek(state);
|
557
|
+
}
|
558
|
+
} else if (c == '"') {
|
559
|
+
break;
|
560
|
+
}
|
561
|
+
}
|
562
|
+
|
563
|
+
return next_token(state, tDQSTRING);
|
564
|
+
}
|
565
|
+
|
566
|
+
/*
|
567
|
+
... @ foo ...
|
568
|
+
^ start
|
569
|
+
^ current
|
570
|
+
^ current (return)
|
571
|
+
|
572
|
+
... @ @ foo ...
|
573
|
+
^ start
|
574
|
+
^ current
|
575
|
+
^ current (return)
|
576
|
+
*/
|
577
|
+
static token lex_ivar(lexstate *state) {
|
578
|
+
unsigned int c;
|
579
|
+
|
580
|
+
enum TokenType type = tAIDENT;
|
581
|
+
|
582
|
+
c = peek(state);
|
583
|
+
|
584
|
+
if (c == '@') {
|
585
|
+
type = tA2IDENT;
|
586
|
+
advance_char(state, c);
|
587
|
+
c = peek(state);
|
588
|
+
}
|
589
|
+
|
590
|
+
if (rb_isalpha(c) || c == '_') {
|
591
|
+
advance_char(state, c);
|
592
|
+
c = peek(state);
|
593
|
+
} else {
|
594
|
+
return next_token(state, ErrorToken);
|
595
|
+
}
|
596
|
+
|
597
|
+
while (rb_isalnum(c) || c == '_') {
|
598
|
+
advance_char(state, c);
|
599
|
+
c = peek(state);
|
600
|
+
}
|
601
|
+
|
602
|
+
return next_token(state, type);
|
603
|
+
}
|
604
|
+
|
605
|
+
/*
|
606
|
+
... ' ... ' ...
|
607
|
+
^ start
|
608
|
+
^ current
|
609
|
+
^ current (after)
|
610
|
+
*/
|
611
|
+
static token lex_sqstring(lexstate *state) {
|
612
|
+
unsigned int c;
|
613
|
+
|
614
|
+
c = peek(state);
|
615
|
+
|
616
|
+
while (true) {
|
617
|
+
c = peek(state);
|
618
|
+
advance_char(state, c);
|
619
|
+
|
620
|
+
if (c == '\\') {
|
621
|
+
if (peek(state) == '\'') {
|
622
|
+
advance_char(state, c);
|
623
|
+
c = peek(state);
|
624
|
+
}
|
625
|
+
} else if (c == '\'') {
|
626
|
+
break;
|
627
|
+
}
|
628
|
+
}
|
629
|
+
|
630
|
+
return next_token(state, tSQSTRING);
|
631
|
+
}
|
632
|
+
|
633
|
+
#define EQPOINTS2(c0, c1, s) (c0 == s[0] && c1 == s[1])
|
634
|
+
#define EQPOINTS3(c0, c1, c2, s) (c0 == s[0] && c1 == s[1] && c2 == s[2])
|
635
|
+
|
636
|
+
/*
|
637
|
+
... : @ ...
|
638
|
+
^ start
|
639
|
+
^ current
|
640
|
+
^ current (return)
|
641
|
+
*/
|
642
|
+
static token lex_colon_symbol(lexstate *state) {
|
643
|
+
unsigned int c[3];
|
644
|
+
peekn(state, c, 3);
|
645
|
+
|
646
|
+
switch (c[0]) {
|
647
|
+
case '|':
|
648
|
+
case '&':
|
649
|
+
case '/':
|
650
|
+
case '%':
|
651
|
+
case '~':
|
652
|
+
case '`':
|
653
|
+
case '^':
|
654
|
+
advance_char(state, c[0]);
|
655
|
+
return next_token(state, tSYMBOL);
|
656
|
+
case '=':
|
657
|
+
if (EQPOINTS2(c[0], c[1], "=~")) {
|
658
|
+
// :=~
|
659
|
+
advance_char(state, c[0]);
|
660
|
+
advance_char(state, c[1]);
|
661
|
+
return next_token(state, tSYMBOL);
|
662
|
+
} else if (EQPOINTS3(c[0], c[1], c[2], "===")) {
|
663
|
+
// :===
|
664
|
+
advance_char(state, c[0]);
|
665
|
+
advance_char(state, c[1]);
|
666
|
+
advance_char(state, c[2]);
|
667
|
+
return next_token(state, tSYMBOL);
|
668
|
+
} else if (EQPOINTS2(c[0], c[1], "==")) {
|
669
|
+
// :==
|
670
|
+
advance_char(state, c[0]);
|
671
|
+
advance_char(state, c[1]);
|
672
|
+
return next_token(state, tSYMBOL);
|
673
|
+
}
|
674
|
+
break;
|
675
|
+
case '<':
|
676
|
+
if (EQPOINTS3(c[0], c[1], c[2], "<=>")) {
|
677
|
+
advance_char(state, c[0]);
|
678
|
+
advance_char(state, c[1]);
|
679
|
+
advance_char(state, c[2]);
|
680
|
+
} else if (EQPOINTS2(c[0], c[1], "<=") || EQPOINTS2(c[0], c[1], "<<")) {
|
681
|
+
advance_char(state, c[0]);
|
682
|
+
advance_char(state, c[1]);
|
683
|
+
} else {
|
684
|
+
advance_char(state, c[0]);
|
685
|
+
}
|
686
|
+
return next_token(state, tSYMBOL);
|
687
|
+
case '>':
|
688
|
+
if (EQPOINTS2(c[0], c[1], ">=") || EQPOINTS2(c[0], c[1], ">>")) {
|
689
|
+
advance_char(state, c[0]);
|
690
|
+
advance_char(state, c[1]);
|
691
|
+
} else {
|
692
|
+
advance_char(state, c[0]);
|
693
|
+
}
|
694
|
+
return next_token(state, tSYMBOL);
|
695
|
+
case '-':
|
696
|
+
case '+':
|
697
|
+
if (EQPOINTS2(c[0], c[1], "+@") || EQPOINTS2(c[0], c[1], "-@")) {
|
698
|
+
advance_char(state, c[0]);
|
699
|
+
advance_char(state, c[1]);
|
700
|
+
} else {
|
701
|
+
advance_char(state, c[0]);
|
702
|
+
}
|
703
|
+
return next_token(state, tSYMBOL);
|
704
|
+
case '*':
|
705
|
+
if (EQPOINTS2(c[0], c[1], "**")) {
|
706
|
+
advance_char(state, c[0]);
|
707
|
+
advance_char(state, c[1]);
|
708
|
+
} else {
|
709
|
+
advance_char(state, c[0]);
|
710
|
+
}
|
711
|
+
return next_token(state, tSYMBOL);
|
712
|
+
case '[':
|
713
|
+
if (EQPOINTS3(c[0], c[1], c[2], "[]=")) {
|
714
|
+
advance_char(state, c[0]);
|
715
|
+
advance_char(state, c[1]);
|
716
|
+
advance_char(state, c[2]);
|
717
|
+
} else if (EQPOINTS2(c[0], c[1], "[]")) {
|
718
|
+
advance_char(state, c[0]);
|
719
|
+
advance_char(state, c[1]);
|
720
|
+
} else {
|
721
|
+
break;
|
722
|
+
}
|
723
|
+
return next_token(state, tSYMBOL);
|
724
|
+
case '!':
|
725
|
+
if (EQPOINTS2(c[0], c[1], "!=") || EQPOINTS2(c[0], c[1], "!~")) {
|
726
|
+
advance_char(state, c[0]);
|
727
|
+
advance_char(state, c[1]);
|
728
|
+
} else {
|
729
|
+
advance_char(state, c[0]);
|
730
|
+
}
|
731
|
+
return next_token(state, tSYMBOL);
|
732
|
+
case '@': {
|
733
|
+
advance_char(state, '@');
|
734
|
+
token tok = lex_ivar(state);
|
735
|
+
if (tok.type != ErrorToken) {
|
736
|
+
tok.type = tSYMBOL;
|
737
|
+
}
|
738
|
+
return tok;
|
739
|
+
}
|
740
|
+
case '$': {
|
741
|
+
advance_char(state, '$');
|
742
|
+
token tok = lex_global(state);
|
743
|
+
if (tok.type != ErrorToken) {
|
744
|
+
tok.type = tSYMBOL;
|
745
|
+
}
|
746
|
+
return tok;
|
747
|
+
}
|
748
|
+
case '\'': {
|
749
|
+
position start = state->start;
|
750
|
+
advance_char(state, '\'');
|
751
|
+
token tok = lex_sqstring(state);
|
752
|
+
tok.type = tSQSYMBOL;
|
753
|
+
tok.range.start = start;
|
754
|
+
return tok;
|
755
|
+
}
|
756
|
+
case '"': {
|
757
|
+
position start = state->start;
|
758
|
+
advance_char(state, '"');
|
759
|
+
token tok = lex_dqstring(state);
|
760
|
+
tok.type = tDQSYMBOL;
|
761
|
+
tok.range.start = start;
|
762
|
+
return tok;
|
763
|
+
}
|
764
|
+
default:
|
765
|
+
if (rb_isalpha(c[0]) || c[0] == '_') {
|
766
|
+
position start = state->start;
|
767
|
+
token tok = lex_ident(state, NullType);
|
768
|
+
tok.range.start = start;
|
769
|
+
|
770
|
+
if (peek(state) == '?') {
|
771
|
+
if (tok.type != tBANGIDENT && tok.type != tEQIDENT) {
|
772
|
+
skip_char(state, '?');
|
773
|
+
tok.range.end = state->current;
|
774
|
+
}
|
775
|
+
}
|
776
|
+
|
777
|
+
tok.type = tSYMBOL;
|
778
|
+
return tok;
|
779
|
+
}
|
780
|
+
}
|
781
|
+
|
782
|
+
return next_token(state, pCOLON);
|
783
|
+
}
|
784
|
+
|
785
|
+
/*
|
786
|
+
... : : ...
|
787
|
+
^ start
|
788
|
+
^ current
|
789
|
+
^ current (return)
|
790
|
+
|
791
|
+
... : ...
|
792
|
+
^ start
|
793
|
+
^ current (lex_colon_symbol)
|
794
|
+
*/
|
795
|
+
static token lex_colon(lexstate *state) {
|
796
|
+
unsigned int c = peek(state);
|
797
|
+
|
798
|
+
if (c == ':') {
|
799
|
+
advance_char(state, c);
|
800
|
+
return next_token(state, pCOLON2);
|
801
|
+
} else {
|
802
|
+
return lex_colon_symbol(state);
|
803
|
+
}
|
804
|
+
}
|
805
|
+
|
806
|
+
/*
|
807
|
+
lex_lt ::= < (pLT)
|
808
|
+
| < < (tOPERATOR)
|
809
|
+
| < = (tOPERATOR)
|
810
|
+
| < = > (tOPERATOR)
|
811
|
+
*/
|
812
|
+
static token lex_lt(lexstate *state) {
|
813
|
+
if (advance_next_character_if(state, '<')) {
|
814
|
+
return next_token(state, tOPERATOR);
|
815
|
+
} else if (advance_next_character_if(state, '=')) {
|
816
|
+
advance_next_character_if(state, '>');
|
817
|
+
return next_token(state, tOPERATOR);
|
818
|
+
} else {
|
819
|
+
return next_token(state, pLT);
|
820
|
+
}
|
821
|
+
}
|
822
|
+
|
823
|
+
/*
|
824
|
+
lex_gt ::= >
|
825
|
+
| > =
|
826
|
+
| > >
|
827
|
+
*/
|
828
|
+
static token lex_gt(lexstate *state) {
|
829
|
+
advance_next_character_if(state, '=') || advance_next_character_if(state, '>');
|
830
|
+
return next_token(state, tOPERATOR);
|
831
|
+
}
|
832
|
+
|
833
|
+
/*
|
834
|
+
... `%` `a` `{` ... `}` ...
|
835
|
+
^ start
|
836
|
+
^ current
|
837
|
+
^ current (exit)
|
838
|
+
--- token
|
839
|
+
*/
|
840
|
+
static token lex_percent(lexstate *state) {
|
841
|
+
unsigned int cs[2];
|
842
|
+
unsigned int end_char;
|
843
|
+
|
844
|
+
peekn(state, cs, 2);
|
845
|
+
|
846
|
+
if (cs[0] != 'a') {
|
847
|
+
return next_token(state, tOPERATOR);
|
848
|
+
}
|
849
|
+
|
850
|
+
switch (cs[1])
|
851
|
+
{
|
852
|
+
case '{':
|
853
|
+
end_char = '}';
|
854
|
+
break;
|
855
|
+
case '(':
|
856
|
+
end_char = ')';
|
857
|
+
break;
|
858
|
+
case '[':
|
859
|
+
end_char = ']';
|
860
|
+
break;
|
861
|
+
case '|':
|
862
|
+
end_char = '|';
|
863
|
+
break;
|
864
|
+
case '<':
|
865
|
+
end_char = '>';
|
866
|
+
break;
|
867
|
+
default:
|
868
|
+
return next_token(state, tOPERATOR);
|
869
|
+
}
|
870
|
+
|
871
|
+
advance_char(state, cs[0]);
|
872
|
+
advance_char(state, cs[1]);
|
873
|
+
|
874
|
+
unsigned int c;
|
875
|
+
|
876
|
+
while ((c = peek(state))) {
|
877
|
+
if (c == end_char) {
|
878
|
+
advance_char(state, c);
|
879
|
+
return next_token(state, tANNOTATION);
|
880
|
+
}
|
881
|
+
advance_char(state, c);
|
882
|
+
}
|
883
|
+
|
884
|
+
return next_token(state, ErrorToken);
|
885
|
+
}
|
886
|
+
|
887
|
+
/*
|
888
|
+
bracket ::= [ (pLBRACKET)
|
889
|
+
* ^
|
890
|
+
| [ ] (tOPERATOR)
|
891
|
+
* ^ $
|
892
|
+
| [ ] = (tOPERATOR)
|
893
|
+
* ^ $
|
894
|
+
*/
|
895
|
+
static token lex_bracket(lexstate *state) {
|
896
|
+
if (advance_next_character_if(state, ']')) {
|
897
|
+
advance_next_character_if(state, '=');
|
898
|
+
return next_token(state, tOPERATOR);
|
899
|
+
} else {
|
900
|
+
return next_token(state, pLBRACKET);
|
901
|
+
}
|
902
|
+
}
|
903
|
+
|
904
|
+
/*
|
905
|
+
bracket ::= *
|
906
|
+
| * *
|
907
|
+
*/
|
908
|
+
static token lex_star(lexstate *state) {
|
909
|
+
if (advance_next_character_if(state, '*')) {
|
910
|
+
return next_token(state, pSTAR2);
|
911
|
+
} else {
|
912
|
+
return next_token(state, pSTAR);
|
913
|
+
}
|
914
|
+
}
|
915
|
+
|
916
|
+
/*
|
917
|
+
bang ::= !
|
918
|
+
| ! =
|
919
|
+
| ! ~
|
920
|
+
*/
|
921
|
+
static token lex_bang(lexstate *state) {
|
922
|
+
advance_next_character_if(state, '=') || advance_next_character_if(state, '~');
|
923
|
+
return next_token(state, tOPERATOR);
|
924
|
+
}
|
925
|
+
|
926
|
+
/*
|
927
|
+
backquote ::= ` (tOPERATOR)
|
928
|
+
| `[^ :][^`]` (tQIDENT)
|
929
|
+
*/
|
930
|
+
static token lex_backquote(lexstate *state) {
|
931
|
+
unsigned int c = peek(state);
|
932
|
+
|
933
|
+
if (c == ' ' || c == ':') {
|
934
|
+
return next_token(state, tOPERATOR);
|
935
|
+
} else {
|
936
|
+
while (true) {
|
937
|
+
if (c == '`') {
|
938
|
+
break;
|
939
|
+
}
|
940
|
+
|
941
|
+
c = peek(state);
|
942
|
+
advance_char(state, c);
|
943
|
+
}
|
944
|
+
|
945
|
+
return next_token(state, tQIDENT);
|
946
|
+
}
|
947
|
+
}
|
948
|
+
|
949
|
+
token rbsparser_next_token(lexstate *state) {
|
950
|
+
token tok = NullToken;
|
951
|
+
|
952
|
+
unsigned int c;
|
953
|
+
bool skipping = true;
|
954
|
+
|
955
|
+
while (skipping) {
|
956
|
+
c = peek(state);
|
957
|
+
|
958
|
+
switch (c) {
|
959
|
+
case ' ':
|
960
|
+
case '\t':
|
961
|
+
case '\n':
|
962
|
+
// nop
|
963
|
+
skip_char(state, c);
|
964
|
+
break;
|
965
|
+
case '\0':
|
966
|
+
return next_token(state, pEOF);
|
967
|
+
default:
|
968
|
+
advance_char(state, c);
|
969
|
+
skipping = false;
|
970
|
+
break;
|
971
|
+
}
|
972
|
+
}
|
973
|
+
|
974
|
+
/* ... c d .. */
|
975
|
+
/* ^ state->current */
|
976
|
+
/* ^ start */
|
977
|
+
switch (c) {
|
978
|
+
case '\0': tok = next_token(state, pEOF);
|
979
|
+
ONE_CHAR_PATTERN('(', pLPAREN);
|
980
|
+
ONE_CHAR_PATTERN(')', pRPAREN);
|
981
|
+
ONE_CHAR_PATTERN(']', pRBRACKET);
|
982
|
+
ONE_CHAR_PATTERN('{', pLBRACE);
|
983
|
+
ONE_CHAR_PATTERN('}', pRBRACE);
|
984
|
+
ONE_CHAR_PATTERN(',', pCOMMA);
|
985
|
+
ONE_CHAR_PATTERN('|', pBAR);
|
986
|
+
ONE_CHAR_PATTERN('^', pHAT);
|
987
|
+
ONE_CHAR_PATTERN('&', pAMP);
|
988
|
+
ONE_CHAR_PATTERN('?', pQUESTION);
|
989
|
+
ONE_CHAR_PATTERN('/', tOPERATOR);
|
990
|
+
ONE_CHAR_PATTERN('~', tOPERATOR);
|
991
|
+
case '[':
|
992
|
+
tok = lex_bracket(state);
|
993
|
+
break;
|
994
|
+
case '-':
|
995
|
+
tok = lex_hyphen(state);
|
996
|
+
break;
|
997
|
+
case '+':
|
998
|
+
tok = lex_plus(state);
|
999
|
+
break;
|
1000
|
+
case '*':
|
1001
|
+
tok = lex_star(state);
|
1002
|
+
break;
|
1003
|
+
case '<':
|
1004
|
+
tok = lex_lt(state);
|
1005
|
+
break;
|
1006
|
+
case '=':
|
1007
|
+
tok = lex_eq(state);
|
1008
|
+
break;
|
1009
|
+
case '>':
|
1010
|
+
tok = lex_gt(state);
|
1011
|
+
break;
|
1012
|
+
case '!':
|
1013
|
+
tok = lex_bang(state);
|
1014
|
+
break;
|
1015
|
+
case '#':
|
1016
|
+
if (state->first_token_of_line) {
|
1017
|
+
tok = lex_comment(state, tLINECOMMENT);
|
1018
|
+
} else {
|
1019
|
+
tok = lex_comment(state, tCOMMENT);
|
1020
|
+
}
|
1021
|
+
break;
|
1022
|
+
case ':':
|
1023
|
+
tok = lex_colon(state);
|
1024
|
+
break;
|
1025
|
+
case '.':
|
1026
|
+
tok = lex_dot(state);
|
1027
|
+
break;
|
1028
|
+
case '_':
|
1029
|
+
tok = lex_underscore(state);
|
1030
|
+
break;
|
1031
|
+
case '$':
|
1032
|
+
tok = lex_global(state);
|
1033
|
+
break;
|
1034
|
+
case '@':
|
1035
|
+
tok = lex_ivar(state);
|
1036
|
+
break;
|
1037
|
+
case '"':
|
1038
|
+
tok = lex_dqstring(state);
|
1039
|
+
break;
|
1040
|
+
case '\'':
|
1041
|
+
tok = lex_sqstring(state);
|
1042
|
+
break;
|
1043
|
+
case '%':
|
1044
|
+
tok = lex_percent(state);
|
1045
|
+
break;
|
1046
|
+
case '`':
|
1047
|
+
tok = lex_backquote(state);
|
1048
|
+
break;
|
1049
|
+
default:
|
1050
|
+
if (rb_isalpha(c) && rb_isupper(c)) {
|
1051
|
+
tok = lex_ident(state, tUIDENT);
|
1052
|
+
}
|
1053
|
+
if (rb_isalpha(c) && rb_islower(c)) {
|
1054
|
+
tok = lex_ident(state, tLIDENT);
|
1055
|
+
}
|
1056
|
+
if (rb_isdigit(c)) {
|
1057
|
+
tok = lex_number(state);
|
1058
|
+
}
|
1059
|
+
}
|
1060
|
+
|
1061
|
+
if (tok.type == NullType) {
|
1062
|
+
tok = next_token(state, ErrorToken);
|
1063
|
+
}
|
1064
|
+
|
1065
|
+
return tok;
|
1066
|
+
}
|
1067
|
+
|
1068
|
+
char *peek_token(lexstate *state, token tok) {
|
1069
|
+
return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
|
1070
|
+
}
|