rbs 1.6.2 → 1.7.0.beta.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +0 -4
  3. data/.gitignore +1 -0
  4. data/CHANGELOG.md +6 -0
  5. data/Gemfile +1 -0
  6. data/Rakefile +7 -22
  7. data/core/kernel.rbs +4 -4
  8. data/core/trace_point.rbs +1 -1
  9. data/ext/rbs/extension/constants.c +140 -0
  10. data/ext/rbs/extension/constants.h +72 -0
  11. data/ext/rbs/extension/extconf.rb +3 -0
  12. data/ext/rbs/extension/lexer.c +1070 -0
  13. data/ext/rbs/extension/lexer.h +145 -0
  14. data/ext/rbs/extension/location.c +295 -0
  15. data/ext/rbs/extension/location.h +59 -0
  16. data/ext/rbs/extension/main.c +9 -0
  17. data/ext/rbs/extension/parser.c +2418 -0
  18. data/ext/rbs/extension/parser.h +23 -0
  19. data/ext/rbs/extension/parserstate.c +313 -0
  20. data/ext/rbs/extension/parserstate.h +141 -0
  21. data/ext/rbs/extension/rbs_extension.h +40 -0
  22. data/ext/rbs/extension/ruby_objs.c +585 -0
  23. data/ext/rbs/extension/ruby_objs.h +46 -0
  24. data/ext/rbs/extension/unescape.c +65 -0
  25. data/goodcheck.yml +1 -1
  26. data/lib/rbs/ast/comment.rb +0 -12
  27. data/lib/rbs/buffer.rb +4 -0
  28. data/lib/rbs/cli.rb +5 -8
  29. data/lib/rbs/collection/sources/git.rb +18 -3
  30. data/lib/rbs/errors.rb +14 -1
  31. data/lib/rbs/location.rb +221 -217
  32. data/lib/rbs/location_aux.rb +108 -0
  33. data/lib/rbs/locator.rb +10 -7
  34. data/lib/rbs/parser_aux.rb +24 -0
  35. data/lib/rbs/types.rb +2 -3
  36. data/lib/rbs/version.rb +1 -1
  37. data/lib/rbs/writer.rb +4 -2
  38. data/lib/rbs.rb +3 -7
  39. data/rbs.gemspec +2 -1
  40. data/sig/ancestor_builder.rbs +2 -2
  41. data/sig/annotation.rbs +2 -2
  42. data/sig/comment.rbs +7 -7
  43. data/sig/constant_table.rbs +1 -1
  44. data/sig/declarations.rbs +9 -9
  45. data/sig/definition.rbs +1 -1
  46. data/sig/definition_builder.rbs +2 -2
  47. data/sig/errors.rbs +30 -25
  48. data/sig/location.rbs +42 -79
  49. data/sig/locator.rbs +2 -2
  50. data/sig/members.rbs +7 -7
  51. data/sig/method_types.rbs +3 -3
  52. data/sig/parser.rbs +11 -21
  53. data/sig/types.rbs +45 -27
  54. data/sig/writer.rbs +1 -1
  55. data/stdlib/json/0/json.rbs +3 -3
  56. metadata +24 -6
  57. data/lib/rbs/parser.rb +0 -3614
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de88dd41e7cee057e4668ffd3ed94e91d5d09746cdbfcd26602cc04b9ef2abd1
4
- data.tar.gz: '06092d8a703d157bc214f81155bc24009491db68073fbcedc5fde9b09aca85e7'
3
+ metadata.gz: 6f2e7e51d08feb7b1a562ac43c679da8fc6f0a7cfc2ecef65e11c1e8c3d0d2b9
4
+ data.tar.gz: d41ed33c0b5de01cd1746faa3572e56f651ecfc808c046cdb4166a0ec357138e
5
5
  SHA512:
6
- metadata.gz: 2a53e2b25733fd63c832748ae629275becb31badd9b120cf2b8c72c66cea7e084d0aefefeaa09c00f2364ab1335ee030b0b54e8a14f3f7dbe08e1523cfe1d7e2
7
- data.tar.gz: 66b02700d2a81f5d8367451356af01a0cac8b0152194adb5f844abf92ffdfc16bcec975f2e21302477b32ee344242d3e0407105c3cf4e7a78387fa454003a418
6
+ metadata.gz: bc198dc7d40caaa802604cb1f633098955540d838c4e1adcca635da85880703378d981a4f37c528e7f4b3720fc6f152d7fc6c7bcb11ce3e0abba76fd14a6d47c
7
+ data.tar.gz: 68c6643f9adf4761bec2a2ffb211e223cb732a37c341652c4625191b5ab6d6791c91196f3c3bc4fd5624d6d853558c33e28f8462f483f923badad8f38498a675
@@ -19,10 +19,6 @@ jobs:
19
19
  - test
20
20
  - stdlib_test
21
21
  - rubocop validate test_doc build test_generate_stdlib
22
- - confirm_parser
23
- exclude:
24
- - container_tag: master-nightly-focal
25
- job: confirm_parser
26
22
  container:
27
23
  image: rubylang/ruby:${{ matrix.container_tag }}
28
24
  steps:
data/.gitignore CHANGED
@@ -11,3 +11,4 @@
11
11
  /vendor/sigs
12
12
  /Gemfile.lock
13
13
 
14
+ lib/**/*.bundle
data/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  ## master
4
4
 
5
+ ## 1.7.0 (beta.1)
6
+
7
+ ## Library changes
8
+
9
+ * Replace RBS::Parser ([#788](https://github.com/ruby/rbs/pull/788))
10
+
5
11
  ## 1.6.2 (2021-09-09)
6
12
 
7
13
  ## Signature updates
data/Gemfile CHANGED
@@ -5,6 +5,7 @@ gemspec
5
5
 
6
6
  # Development dependencies
7
7
  gem "rake"
8
+ gem "rake-compiler"
8
9
  gem "test-unit"
9
10
  gem "rspec"
10
11
  gem "racc"
data/Rakefile CHANGED
@@ -1,15 +1,17 @@
1
1
  require "bundler/gem_tasks"
2
2
  require "rake/testtask"
3
3
  require "rbconfig"
4
+ require 'rake/extensiontask'
4
5
 
5
6
  $LOAD_PATH << File.join(__dir__, "test")
6
7
 
7
8
  ruby = ENV["RUBY"] || RbConfig.ruby
8
- racc = ENV.fetch("RACC", "racc")
9
9
  rbs = File.join(__dir__, "exe/rbs")
10
10
  bin = File.join(__dir__, "bin")
11
11
 
12
- Rake::TestTask.new(:test) do |t|
12
+ Rake::ExtensionTask.new("rbs/extension")
13
+
14
+ Rake::TestTask.new(:test => :compile) do |t|
13
15
  t.libs << "test"
14
16
  t.libs << "lib"
15
17
  t.test_files = FileList["test/**/*_test.rb"].reject do |path|
@@ -19,7 +21,7 @@ end
19
21
 
20
22
  multitask :default => [:test, :stdlib_test, :rubocop, :validate, :test_doc]
21
23
 
22
- task :test_doc => :parser do
24
+ task :test_doc do
23
25
  files = Dir.chdir(File.expand_path('..', __FILE__)) do
24
26
  `git ls-files -z`.split("\x0").select do |file| Pathname(file).extname == ".md" end
25
27
  end
@@ -27,7 +29,7 @@ task :test_doc => :parser do
27
29
  sh "#{ruby} #{__dir__}/bin/run_in_md.rb #{files.join(" ")}"
28
30
  end
29
31
 
30
- task :validate => :parser do
32
+ task :validate => :compile do
31
33
  sh "#{ruby} #{rbs} validate --silent"
32
34
 
33
35
  FileList["stdlib/*"].each do |path|
@@ -72,7 +74,7 @@ task :validate => :parser do
72
74
  end
73
75
 
74
76
  FileList["test/stdlib/**/*_test.rb"].each do |test|
75
- task test => :parser do
77
+ task test => :compile do
76
78
  sh "#{ruby} -Ilib #{bin}/test_runner.rb #{test}"
77
79
  end
78
80
  task stdlib_test: test
@@ -82,21 +84,6 @@ task :rubocop do
82
84
  sh "rubocop --parallel"
83
85
  end
84
86
 
85
- rule ".rb" => ".y" do |t|
86
- sh "#{racc} -v -o #{t.name} #{t.source}"
87
- end
88
-
89
- task :parser => "lib/rbs/parser.rb"
90
- task :test => :parser
91
- task :stdlib_test => :parser
92
- task :build => :parser
93
-
94
- task :confirm_parser do
95
- puts "Testing if parser.rb is updated with respect to parser.y"
96
- sh "#{racc} -v -o lib/rbs/parser.rb lib/rbs/parser.y"
97
- sh "git diff --exit-code lib/rbs/parser.rb"
98
- end
99
-
100
87
  namespace :generate do
101
88
  desc "Generate a test file for a stdlib class signatures"
102
89
  task :stdlib_test, [:class] do |_task, args|
@@ -225,5 +212,3 @@ task :test_generate_stdlib do
225
212
  sh "RBS_GENERATE_TEST_PATH=/tmp/Array_test.rb rake 'generate:stdlib_test[Array]'"
226
213
  sh "ruby -c /tmp/Array_test.rb"
227
214
  end
228
-
229
- CLEAN.include("lib/rbs/parser.rb")
data/core/kernel.rbs CHANGED
@@ -482,8 +482,8 @@ module Kernel : BasicObject
482
482
  | [:child, int] # redirect to the redirected file descriptor
483
483
  | :close # close the file descriptor in child process
484
484
 
485
- def self?.spawn: (String command, *String args, ?unsetenv_others: boolish, ?pgroup?: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> Integer
486
- | (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup?: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> Integer
485
+ def self?.spawn: (String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> Integer
486
+ | (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> Integer
487
487
 
488
488
  # Executes *command…* in a subshell. *command…* is one of following forms.
489
489
  #
@@ -510,8 +510,8 @@ module Kernel : BasicObject
510
510
  # *
511
511
  #
512
512
  # See `Kernel.exec` for the standard shell.
513
- def self?.system: (String command, *String args, ?unsetenv_others: boolish, ?pgroup?: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> (NilClass | FalseClass | TrueClass)
514
- | (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup?: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> (NilClass | FalseClass | TrueClass)
513
+ def self?.system: (String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> (NilClass | FalseClass | TrueClass)
514
+ | (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> (NilClass | FalseClass | TrueClass)
515
515
  end
516
516
 
517
517
  Kernel::RUBYGEMS_ACTIVATION_MONITOR: untyped
data/core/trace_point.rbs CHANGED
@@ -286,5 +286,5 @@ class TracePoint < Object
286
286
  # Same as TracePoint#binding:
287
287
  # trace.binding.eval('self')
288
288
  #
289
- def `self`: () -> Binding
289
+ def self: () -> Binding
290
290
  end
@@ -0,0 +1,140 @@
1
+ #include "rbs_extension.h"
2
+
3
+ VALUE RBS_Parser;
4
+ VALUE RBS_Parser_KEYWORDS;
5
+
6
+ VALUE RBS;
7
+ VALUE RBS_AST;
8
+ VALUE RBS_AST_Comment;
9
+ VALUE RBS_AST_Annotation;
10
+
11
+ VALUE RBS_AST_Declarations;
12
+
13
+ VALUE RBS_AST_Declarations_ModuleTypeParams;
14
+ VALUE RBS_AST_Declarations_ModuleTypeParams_TypeParam;
15
+
16
+ VALUE RBS_AST_Declarations_Alias;
17
+ VALUE RBS_AST_Declarations_Constant;
18
+ VALUE RBS_AST_Declarations_Global;
19
+ VALUE RBS_AST_Declarations_Interface;
20
+ VALUE RBS_AST_Declarations_Module;
21
+ VALUE RBS_AST_Declarations_Module_Self;
22
+ VALUE RBS_AST_Declarations_Class;
23
+ VALUE RBS_AST_Declarations_Class_Super;
24
+
25
+ VALUE RBS_AST_Members;
26
+ VALUE RBS_AST_Members_Alias;
27
+ VALUE RBS_AST_Members_AttrAccessor;
28
+ VALUE RBS_AST_Members_AttrReader;
29
+ VALUE RBS_AST_Members_AttrWriter;
30
+ VALUE RBS_AST_Members_ClassInstanceVariable;
31
+ VALUE RBS_AST_Members_ClassVariable;
32
+ VALUE RBS_AST_Members_Extend;
33
+ VALUE RBS_AST_Members_Include;
34
+ VALUE RBS_AST_Members_InstanceVariable;
35
+ VALUE RBS_AST_Members_MethodDefinition;
36
+ VALUE RBS_AST_Members_Prepend;
37
+ VALUE RBS_AST_Members_Private;
38
+ VALUE RBS_AST_Members_Public;
39
+
40
+ VALUE RBS_Namespace;
41
+ VALUE RBS_TypeName;
42
+
43
+ VALUE RBS_Types_Alias;
44
+ VALUE RBS_Types_Bases_Any;
45
+ VALUE RBS_Types_Bases_Bool;
46
+ VALUE RBS_Types_Bases_Bottom;
47
+ VALUE RBS_Types_Bases_Class;
48
+ VALUE RBS_Types_Bases_Instance;
49
+ VALUE RBS_Types_Bases_Nil;
50
+ VALUE RBS_Types_Bases_Self;
51
+ VALUE RBS_Types_Bases_Top;
52
+ VALUE RBS_Types_Bases_Void;
53
+ VALUE RBS_Types_Bases;
54
+ VALUE RBS_Types_Block;
55
+ VALUE RBS_Types_ClassInstance;
56
+ VALUE RBS_Types_ClassSingleton;
57
+ VALUE RBS_Types_Function_Param;
58
+ VALUE RBS_Types_Function;
59
+ VALUE RBS_Types_Interface;
60
+ VALUE RBS_Types_Intersection;
61
+ VALUE RBS_Types_Literal;
62
+ VALUE RBS_Types_Optional;
63
+ VALUE RBS_Types_Proc;
64
+ VALUE RBS_Types_Record;
65
+ VALUE RBS_Types_Tuple;
66
+ VALUE RBS_Types_Union;
67
+ VALUE RBS_Types_Variable;
68
+ VALUE RBS_Types;
69
+ VALUE RBS_MethodType;
70
+
71
+ VALUE RBS_ParsingError;
72
+
73
+ void rbs__init_constants() {
74
+ ID id_RBS = rb_intern_const("RBS");
75
+
76
+ RBS = rb_const_get(rb_cObject, id_RBS);
77
+ RBS_ParsingError = rb_const_get(RBS, rb_intern("ParsingError"));
78
+ RBS_AST = rb_const_get(RBS, rb_intern("AST"));
79
+ RBS_AST_Comment = rb_const_get(RBS_AST, rb_intern("Comment"));
80
+ RBS_AST_Annotation = rb_const_get(RBS_AST, rb_intern("Annotation"));
81
+
82
+ RBS_AST_Declarations = rb_const_get(RBS_AST, rb_intern("Declarations"));
83
+
84
+ RBS_AST_Declarations_ModuleTypeParams = rb_const_get(RBS_AST_Declarations, rb_intern("ModuleTypeParams"));
85
+ RBS_AST_Declarations_ModuleTypeParams_TypeParam = rb_const_get(RBS_AST_Declarations_ModuleTypeParams, rb_intern("TypeParam"));
86
+
87
+ RBS_AST_Declarations_Alias = rb_const_get(RBS_AST_Declarations, rb_intern("Alias"));
88
+ RBS_AST_Declarations_Constant = rb_const_get(RBS_AST_Declarations, rb_intern("Constant"));
89
+ RBS_AST_Declarations_Global = rb_const_get(RBS_AST_Declarations, rb_intern("Global"));
90
+ RBS_AST_Declarations_Interface = rb_const_get(RBS_AST_Declarations, rb_intern("Interface"));
91
+ RBS_AST_Declarations_Module = rb_const_get(RBS_AST_Declarations, rb_intern("Module"));
92
+ RBS_AST_Declarations_Module_Self = rb_const_get(RBS_AST_Declarations_Module, rb_intern("Self"));
93
+ RBS_AST_Declarations_Class = rb_const_get(RBS_AST_Declarations, rb_intern("Class"));
94
+ RBS_AST_Declarations_Class_Super = rb_const_get(RBS_AST_Declarations_Class, rb_intern("Super"));
95
+
96
+ RBS_AST_Members = rb_const_get(RBS_AST, rb_intern("Members"));
97
+ RBS_AST_Members_Alias = rb_const_get(RBS_AST_Members, rb_intern("Alias"));
98
+ RBS_AST_Members_AttrAccessor = rb_const_get(RBS_AST_Members, rb_intern("AttrAccessor"));
99
+ RBS_AST_Members_AttrReader = rb_const_get(RBS_AST_Members, rb_intern("AttrReader"));
100
+ RBS_AST_Members_AttrWriter = rb_const_get(RBS_AST_Members, rb_intern("AttrWriter"));
101
+ RBS_AST_Members_ClassInstanceVariable = rb_const_get(RBS_AST_Members, rb_intern("ClassInstanceVariable"));
102
+ RBS_AST_Members_ClassVariable = rb_const_get(RBS_AST_Members, rb_intern("ClassVariable"));
103
+ RBS_AST_Members_Extend = rb_const_get(RBS_AST_Members, rb_intern("Extend"));
104
+ RBS_AST_Members_Include = rb_const_get(RBS_AST_Members, rb_intern("Include"));
105
+ RBS_AST_Members_InstanceVariable = rb_const_get(RBS_AST_Members, rb_intern("InstanceVariable"));
106
+ RBS_AST_Members_MethodDefinition = rb_const_get(RBS_AST_Members, rb_intern("MethodDefinition"));
107
+ RBS_AST_Members_Prepend = rb_const_get(RBS_AST_Members, rb_intern("Prepend"));
108
+ RBS_AST_Members_Private = rb_const_get(RBS_AST_Members, rb_intern("Private"));
109
+ RBS_AST_Members_Public = rb_const_get(RBS_AST_Members, rb_intern("Public"));
110
+
111
+ RBS_Namespace = rb_const_get(RBS, rb_intern("Namespace"));
112
+ RBS_TypeName = rb_const_get(RBS, rb_intern("TypeName"));
113
+ RBS_Types = rb_const_get(RBS, rb_intern("Types"));
114
+ RBS_Types_Alias = rb_const_get(RBS_Types, rb_intern("Alias"));
115
+ RBS_Types_Bases = rb_const_get(RBS_Types, rb_intern("Bases"));
116
+ RBS_Types_Bases_Any = rb_const_get(RBS_Types_Bases, rb_intern("Any"));
117
+ RBS_Types_Bases_Bool = rb_const_get(RBS_Types_Bases, rb_intern("Bool"));
118
+ RBS_Types_Bases_Bottom = rb_const_get(RBS_Types_Bases, rb_intern("Bottom"));
119
+ RBS_Types_Bases_Class = rb_const_get(RBS_Types_Bases, rb_intern("Class"));
120
+ RBS_Types_Bases_Instance = rb_const_get(RBS_Types_Bases, rb_intern("Instance"));
121
+ RBS_Types_Bases_Nil = rb_const_get(RBS_Types_Bases, rb_intern("Nil"));
122
+ RBS_Types_Bases_Self = rb_const_get(RBS_Types_Bases, rb_intern("Self"));
123
+ RBS_Types_Bases_Top = rb_const_get(RBS_Types_Bases, rb_intern("Top"));
124
+ RBS_Types_Bases_Void = rb_const_get(RBS_Types_Bases, rb_intern("Void"));
125
+ RBS_Types_Block = rb_const_get(RBS_Types, rb_intern("Block"));
126
+ RBS_Types_ClassInstance = rb_const_get(RBS_Types, rb_intern("ClassInstance"));
127
+ RBS_Types_ClassSingleton = rb_const_get(RBS_Types, rb_intern("ClassSingleton"));
128
+ RBS_Types_Function = rb_const_get(RBS_Types, rb_intern("Function"));
129
+ RBS_Types_Function_Param = rb_const_get(RBS_Types_Function, rb_intern("Param"));
130
+ RBS_Types_Interface = rb_const_get(RBS_Types, rb_intern("Interface"));
131
+ RBS_Types_Intersection = rb_const_get(RBS_Types, rb_intern("Intersection"));
132
+ RBS_Types_Literal = rb_const_get(RBS_Types, rb_intern("Literal"));
133
+ RBS_Types_Optional = rb_const_get(RBS_Types, rb_intern("Optional"));
134
+ RBS_Types_Proc = rb_const_get(RBS_Types, rb_intern("Proc"));
135
+ RBS_Types_Record = rb_const_get(RBS_Types, rb_intern("Record"));
136
+ RBS_Types_Tuple = rb_const_get(RBS_Types, rb_intern("Tuple"));
137
+ RBS_Types_Union = rb_const_get(RBS_Types, rb_intern("Union"));
138
+ RBS_Types_Variable = rb_const_get(RBS_Types, rb_intern("Variable"));
139
+ RBS_MethodType = rb_const_get(RBS, rb_intern("MethodType"));
140
+ }
@@ -0,0 +1,72 @@
1
+ #ifndef RBS__CONSTANTS_H
2
+ #define RBS__CONSTANTS_H
3
+
4
+ extern VALUE RBS;
5
+
6
+ extern VALUE RBS_AST;
7
+ extern VALUE RBS_AST_Annotation;
8
+ extern VALUE RBS_AST_Comment;
9
+
10
+ extern VALUE RBS_AST_Declarations;
11
+ extern VALUE RBS_AST_Declarations_Alias;
12
+ extern VALUE RBS_AST_Declarations_Class_Super;
13
+ extern VALUE RBS_AST_Declarations_Class;
14
+ extern VALUE RBS_AST_Declarations_Constant;
15
+ extern VALUE RBS_AST_Declarations_Global;
16
+ extern VALUE RBS_AST_Declarations_Interface;
17
+ extern VALUE RBS_AST_Declarations_Module_Self;
18
+ extern VALUE RBS_AST_Declarations_Module;
19
+ extern VALUE RBS_AST_Declarations_ModuleTypeParams_TypeParam;
20
+ extern VALUE RBS_AST_Declarations_ModuleTypeParams;
21
+
22
+ extern VALUE RBS_AST_Members;
23
+ extern VALUE RBS_AST_Members_Alias;
24
+ extern VALUE RBS_AST_Members_AttrAccessor;
25
+ extern VALUE RBS_AST_Members_AttrReader;
26
+ extern VALUE RBS_AST_Members_AttrWriter;
27
+ extern VALUE RBS_AST_Members_ClassInstanceVariable;
28
+ extern VALUE RBS_AST_Members_ClassVariable;
29
+ extern VALUE RBS_AST_Members_Extend;
30
+ extern VALUE RBS_AST_Members_Include;
31
+ extern VALUE RBS_AST_Members_InstanceVariable;
32
+ extern VALUE RBS_AST_Members_MethodDefinition;
33
+ extern VALUE RBS_AST_Members_Prepend;
34
+ extern VALUE RBS_AST_Members_Private;
35
+ extern VALUE RBS_AST_Members_Public;
36
+
37
+ extern VALUE RBS_MethodType;
38
+ extern VALUE RBS_Namespace;
39
+
40
+ extern VALUE RBS_ParsingError;
41
+ extern VALUE RBS_TypeName;
42
+
43
+ extern VALUE RBS_Types;
44
+ extern VALUE RBS_Types_Alias;
45
+ extern VALUE RBS_Types_Bases;
46
+ extern VALUE RBS_Types_Bases_Any;
47
+ extern VALUE RBS_Types_Bases_Bool;
48
+ extern VALUE RBS_Types_Bases_Bottom;
49
+ extern VALUE RBS_Types_Bases_Class;
50
+ extern VALUE RBS_Types_Bases_Instance;
51
+ extern VALUE RBS_Types_Bases_Nil;
52
+ extern VALUE RBS_Types_Bases_Self;
53
+ extern VALUE RBS_Types_Bases_Top;
54
+ extern VALUE RBS_Types_Bases_Void;
55
+ extern VALUE RBS_Types_Block;
56
+ extern VALUE RBS_Types_ClassInstance;
57
+ extern VALUE RBS_Types_ClassSingleton;
58
+ extern VALUE RBS_Types_Function_Param;
59
+ extern VALUE RBS_Types_Function;
60
+ extern VALUE RBS_Types_Interface;
61
+ extern VALUE RBS_Types_Intersection;
62
+ extern VALUE RBS_Types_Literal;
63
+ extern VALUE RBS_Types_Optional;
64
+ extern VALUE RBS_Types_Proc;
65
+ extern VALUE RBS_Types_Record;
66
+ extern VALUE RBS_Types_Tuple;
67
+ extern VALUE RBS_Types_Union;
68
+ extern VALUE RBS_Types_Variable;
69
+
70
+ void rbs__init_constants();
71
+
72
+ #endif
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ $INCFLAGS << " -I$(top_srcdir)" if $extmk
3
+ create_makefile 'extension'
@@ -0,0 +1,1070 @@
1
+ #include "rbs_extension.h"
2
+
3
+ #define ONE_CHAR_PATTERN(c, t) case c: tok = next_token(state, t); break
4
+
5
+ /**
6
+ * Returns one character at current.
7
+ *
8
+ * ... A B C ...
9
+ * ^ current => A
10
+ * */
11
+ #define peek(state) rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string))
12
+
13
+ static const char *RBS_TOKENTYPE_NAMES[] = {
14
+ "NullType",
15
+ "pEOF",
16
+ "ErrorToken",
17
+
18
+ "pLPAREN", /* ( */
19
+ "pRPAREN", /* ) */
20
+ "pCOLON", /* : */
21
+ "pCOLON2", /* :: */
22
+ "pLBRACKET", /* [ */
23
+ "pRBRACKET", /* ] */
24
+ "pLBRACE", /* { */
25
+ "pRBRACE", /* } */
26
+ "pHAT", /* ^ */
27
+ "pARROW", /* -> */
28
+ "pFATARROW", /* => */
29
+ "pCOMMA", /* , */
30
+ "pBAR", /* | */
31
+ "pAMP", /* & */
32
+ "pSTAR", /* * */
33
+ "pSTAR2", /* ** */
34
+ "pDOT", /* . */
35
+ "pDOT3", /* ... */
36
+ "pBANG", /* ! */
37
+ "pQUESTION", /* ? */
38
+ "pLT", /* < */
39
+ "pEQ", /* = */
40
+
41
+ "kBOOL", /* bool */
42
+ "kBOT", /* bot */
43
+ "kCLASS", /* class */
44
+ "kFALSE", /* kFALSE */
45
+ "kINSTANCE", /* instance */
46
+ "kINTERFACE", /* interface */
47
+ "kNIL", /* nil */
48
+ "kSELF", /* self */
49
+ "kSINGLETON", /* singleton */
50
+ "kTOP", /* top */
51
+ "kTRUE", /* true */
52
+ "kVOID", /* void */
53
+ "kTYPE", /* type */
54
+ "kUNCHECKED", /* unchecked */
55
+ "kIN", /* in */
56
+ "kOUT", /* out */
57
+ "kEND", /* end */
58
+ "kDEF", /* def */
59
+ "kINCLUDE", /* include */
60
+ "kEXTEND", /* extend */
61
+ "kPREPEND", /* prepend */
62
+ "kALIAS", /* alias */
63
+ "kMODULE", /* module */
64
+ "kATTRREADER", /* attr_reader */
65
+ "kATTRWRITER", /* attr_writer */
66
+ "kATTRACCESSOR", /* attr_accessor */
67
+ "kPUBLIC", /* public */
68
+ "kPRIVATE", /* private */
69
+ "kUNTYPED", /* untyped */
70
+
71
+ "tLIDENT", /* Identifiers starting with lower case */
72
+ "tUIDENT", /* Identifiers starting with upper case */
73
+ "tULIDENT", /* Identifiers starting with `_` */
74
+ "tULLIDENT",
75
+ "tGIDENT", /* Identifiers starting with `$` */
76
+ "tAIDENT", /* Identifiers starting with `@` */
77
+ "tA2IDENT", /* Identifiers starting with `@@` */
78
+ "tBANGIDENT",
79
+ "tEQIDENT",
80
+ "tQIDENT", /* Quoted identifier */
81
+ "tOPERATOR", /* Operator identifier */
82
+
83
+ "tCOMMENT",
84
+ "tLINECOMMENT",
85
+
86
+ "tDQSTRING", /* Double quoted string */
87
+ "tSQSTRING", /* Single quoted string */
88
+ "tINTEGER", /* Integer */
89
+ "tSYMBOL", /* Symbol */
90
+ "tDQSYMBOL",
91
+ "tSQSYMBOL",
92
+ "tANNOTATION", /* Annotation */
93
+ };
94
+
95
+ token NullToken = { NullType };
96
+ position NullPosition = { -1, -1, -1, -1 };
97
+ range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
98
+
99
+ const char *token_type_str(enum TokenType type) {
100
+ return RBS_TOKENTYPE_NAMES[type];
101
+ }
102
+
103
+ unsigned int peekn(lexstate *state, unsigned int chars[], size_t length) {
104
+ int byteoffset = 0;
105
+
106
+ rb_encoding *encoding = rb_enc_get(state->string);
107
+ char *start = RSTRING_PTR(state->string) + state->current.byte_pos;
108
+ char *end = RSTRING_END(state->string);
109
+
110
+ for (size_t i = 0; i < length; i++)
111
+ {
112
+ chars[i] = rb_enc_mbc_to_codepoint(start + byteoffset, end, encoding);
113
+ byteoffset += rb_enc_codelen(chars[i], rb_enc_get(state->string));
114
+ }
115
+
116
+ return byteoffset;
117
+ }
118
+
119
+ int token_chars(token tok) {
120
+ return tok.range.end.char_pos - tok.range.start.char_pos;
121
+ }
122
+
123
+ int token_bytes(token tok) {
124
+ return RANGE_BYTES(tok.range);
125
+ }
126
+
127
+ /**
128
+ * ... token ...
129
+ * ^ start
130
+ * ^ current
131
+ *
132
+ * */
133
+ token next_token(lexstate *state, enum TokenType type) {
134
+ token t;
135
+
136
+ t.type = type;
137
+ t.range.start = state->start;
138
+ t.range.end = state->current;
139
+ state->start = state->current;
140
+ state->first_token_of_line = false;
141
+
142
+ return t;
143
+ }
144
+
145
+ void advance_skip(lexstate *state, unsigned int c, bool skip) {
146
+ int len = rb_enc_codelen(c, rb_enc_get(state->string));
147
+
148
+ state->current.char_pos += 1;
149
+ state->current.byte_pos += len;
150
+
151
+ if (c == '\n') {
152
+ state->current.line += 1;
153
+ state->current.column = 0;
154
+ state->first_token_of_line = true;
155
+ } else {
156
+ state->current.column += 1;
157
+ }
158
+
159
+ if (skip) {
160
+ state->start = state->current;
161
+ }
162
+ }
163
+
164
+ void advance_char(lexstate *state, unsigned int c) {
165
+ advance_skip(state, c, false);
166
+ }
167
+
168
+ void skip_char(lexstate *state, unsigned int c) {
169
+ advance_skip(state, c, true);
170
+ }
171
+
172
+ void skip(lexstate *state) {
173
+ unsigned char c = peek(state);
174
+ skip_char(state, c);
175
+ }
176
+
177
+ void advance(lexstate *state) {
178
+ unsigned char c = peek(state);
179
+ advance_char(state, c);
180
+ }
181
+
182
+ /*
183
+ 1. Peek one character from state
184
+ 2. If read characetr equals to given `c`, skip the character and return true.
185
+ 3. Return false otherwise.
186
+ */
187
+ static bool advance_next_character_if(lexstate *state, unsigned int c) {
188
+ if (peek(state) == c) {
189
+ advance_char(state, c);
190
+ return true;
191
+ } else {
192
+ return false;
193
+ }
194
+ }
195
+
196
+ /*
197
+ ... 0 1 ...
198
+ ^ current
199
+ ^ current (return)
200
+ */
201
+ static token lex_number(lexstate *state) {
202
+ unsigned int c;
203
+
204
+ while (true) {
205
+ c = peek(state);
206
+
207
+ if (rb_isdigit(c) || c == '_') {
208
+ advance_char(state, c);
209
+ } else {
210
+ break;
211
+ }
212
+ }
213
+
214
+ return next_token(state, tINTEGER);
215
+ }
216
+
217
+ /*
218
+ lex_hyphen ::= - (tOPERATOR)
219
+ | - @ (tOPERATOR)
220
+ | - > (pARROW)
221
+ | - 1 ... (tINTEGER)
222
+ */
223
+ static token lex_hyphen(lexstate* state) {
224
+ if (advance_next_character_if(state, '>')) {
225
+ return next_token(state, pARROW);
226
+ } else if (advance_next_character_if(state, '@')) {
227
+ return next_token(state, tOPERATOR);
228
+ } else {
229
+ unsigned int c = peek(state);
230
+
231
+ if (rb_isdigit(c)) {
232
+ advance_char(state, c);
233
+ return lex_number(state);
234
+ } else {
235
+ return next_token(state, tOPERATOR);
236
+ }
237
+ }
238
+ }
239
+
240
+ /*
241
+ lex_plus ::= +
242
+ | + @
243
+ | + \d
244
+ */
245
+ static token lex_plus(lexstate *state) {
246
+ if (advance_next_character_if(state, '@')) {
247
+ return next_token(state, tOPERATOR);
248
+ } else if (rb_isdigit(peek(state))) {
249
+ return lex_number(state);
250
+ } else {
251
+ return next_token(state, tOPERATOR);
252
+ }
253
+ }
254
+
255
+ /*
256
+ lex_dot ::= . pDOT
257
+ | . . . pDOT3
258
+ */
259
+ static token lex_dot(lexstate *state) {
260
+ unsigned int cs[2];
261
+
262
+ peekn(state, cs, 2);
263
+
264
+ if (cs[0] == '.' && cs[1] == '.') {
265
+ advance_char(state, '.');
266
+ advance_char(state, '.');
267
+ return next_token(state, pDOT3);
268
+ } else {
269
+ return next_token(state, pDOT);
270
+ }
271
+ }
272
+
273
+ /*
274
+ lex_eq ::= =
275
+ | ==
276
+ | ===
277
+ | =~
278
+ | =>
279
+ */
280
+ static token lex_eq(lexstate *state) {
281
+ unsigned int cs[2];
282
+ peekn(state, cs, 2);
283
+
284
+ if (cs[0] == '=' && cs[1] == '=') {
285
+ // ===
286
+ advance_char(state, cs[0]);
287
+ advance_char(state, cs[1]);
288
+ return next_token(state, tOPERATOR);
289
+ } else if (cs[0] == '=') {
290
+ // ==
291
+ advance_char(state, cs[0]);
292
+ return next_token(state, tOPERATOR);
293
+ } else if (cs[0] == '~') {
294
+ // =~
295
+ advance_char(state, cs[0]);
296
+ return next_token(state, tOPERATOR);
297
+ } else if (cs[0] == '>') {
298
+ // =>
299
+ advance_char(state, cs[0]);
300
+ return next_token(state, pFATARROW);
301
+ } else {
302
+ return next_token(state, pEQ);
303
+ }
304
+ }
305
+
306
+ /*
307
+ underscore ::= _A tULIDENT
308
+ | _a tULLIDENT
309
+ | _ tULLIDENT
310
+ */
311
+ static token lex_underscore(lexstate *state) {
312
+ unsigned int c;
313
+
314
+ c = peek(state);
315
+
316
+ if ('A' <= c && c <= 'Z') {
317
+ advance_char(state, c);
318
+
319
+ while (true) {
320
+ c = peek(state);
321
+
322
+ if (rb_isalnum(c) || c == '_') {
323
+ // ok
324
+ advance_char(state, c);
325
+ } else {
326
+ break;
327
+ }
328
+ }
329
+
330
+ return next_token(state, tULIDENT);
331
+ } else if (rb_isalnum(c) || c == '_') {
332
+ advance_char(state, c);
333
+
334
+ while (true) {
335
+ c = peek(state);
336
+
337
+ if (rb_isalnum(c) || c == '_') {
338
+ // ok
339
+ advance_char(state, c);
340
+ } else {
341
+ break;
342
+ }
343
+ }
344
+
345
+ if (c == '!') {
346
+ advance_char(state, c);
347
+ return next_token(state, tBANGIDENT);
348
+ } else if (c == '=') {
349
+ advance_char(state, c);
350
+ return next_token(state, tEQIDENT);
351
+ } else {
352
+ return next_token(state, tULLIDENT);
353
+ }
354
+ } else {
355
+ return next_token(state, tULLIDENT);
356
+ }
357
+ }
358
+
359
+ static bool is_opr(unsigned int c) {
360
+ switch (c) {
361
+ case ':':
362
+ case ';':
363
+ case '=':
364
+ case '.':
365
+ case ',':
366
+ case '!':
367
+ case '"':
368
+ case '$':
369
+ case '%':
370
+ case '&':
371
+ case '(':
372
+ case ')':
373
+ case '-':
374
+ case '+':
375
+ case '~':
376
+ case '|':
377
+ case '\\':
378
+ case '\'':
379
+ case '[':
380
+ case ']':
381
+ case '{':
382
+ case '}':
383
+ case '*':
384
+ case '/':
385
+ case '<':
386
+ case '>':
387
+ case '^':
388
+ return true;
389
+ default:
390
+ return false;
391
+ }
392
+ }
393
+
394
+ static token lex_global(lexstate *state) {
395
+ unsigned int c;
396
+
397
+ c = peek(state);
398
+
399
+ if (rb_isspace(c) || c == 0) {
400
+ return next_token(state, ErrorToken);
401
+ }
402
+
403
+ if (rb_isdigit(c)) {
404
+ // `$` [`0`-`9`]+
405
+ advance_char(state, c);
406
+
407
+ while (true) {
408
+ c = peek(state);
409
+ if (rb_isdigit(c)) {
410
+ advance_char(state, c);
411
+ } else {
412
+ return next_token(state, tGIDENT);
413
+ }
414
+ }
415
+ }
416
+
417
+ if (c == '-') {
418
+ // `$` `-` [a-zA-Z0-9_]
419
+ advance_char(state, c);
420
+ c = peek(state);
421
+
422
+ if (rb_isalnum(c) || c == '_') {
423
+ advance_char(state, c);
424
+ return next_token(state, tGIDENT);
425
+ } else {
426
+ return next_token(state, ErrorToken);
427
+ }
428
+ }
429
+
430
+ switch (c) {
431
+ case '~':
432
+ case '*':
433
+ case '$':
434
+ case '?':
435
+ case '!':
436
+ case '@':
437
+ case '\\':
438
+ case '/':
439
+ case ';':
440
+ case ',':
441
+ case '.':
442
+ case '=':
443
+ case ':':
444
+ case '<':
445
+ case '>':
446
+ case '"':
447
+ case '&':
448
+ case '\'':
449
+ case '`':
450
+ case '+':
451
+ advance_char(state, c);
452
+ return next_token(state, tGIDENT);
453
+
454
+ default:
455
+ if (is_opr(c) || c == 0) {
456
+ return next_token(state, ErrorToken);
457
+ }
458
+
459
+ while (true) {
460
+ advance_char(state, c);
461
+ c = peek(state);
462
+
463
+ if (rb_isspace(c) || is_opr(c) || c == 0) {
464
+ break;
465
+ }
466
+ }
467
+
468
+ return next_token(state, tGIDENT);
469
+ }
470
+ }
471
+
472
+ void pp(VALUE object) {
473
+ VALUE inspect = rb_funcall(object, rb_intern("inspect"), 0);
474
+ printf("pp >> %s\n", RSTRING_PTR(inspect));
475
+ }
476
+
477
+ static token lex_ident(lexstate *state, enum TokenType default_type) {
478
+ unsigned int c;
479
+ token tok;
480
+
481
+ while (true) {
482
+ c = peek(state);
483
+ if (rb_isalnum(c) || c == '_') {
484
+ advance_char(state, c);
485
+ } else if (c == '!') {
486
+ advance_char(state, c);
487
+ tok = next_token(state, tBANGIDENT);
488
+ break;
489
+ } else if (c == '=') {
490
+ advance_char(state, c);
491
+ tok = next_token(state, tEQIDENT);
492
+ break;
493
+ } else {
494
+ tok = next_token(state, default_type);
495
+ break;
496
+ }
497
+ }
498
+
499
+ if (tok.type == tLIDENT) {
500
+ VALUE string = rb_enc_str_new(
501
+ RSTRING_PTR(state->string) + tok.range.start.byte_pos,
502
+ RANGE_BYTES(tok.range),
503
+ rb_enc_get(state->string)
504
+ );
505
+
506
+ VALUE type = rb_hash_aref(RBS_Parser_KEYWORDS, string);
507
+ if (FIXNUM_P(type)) {
508
+ tok.type = FIX2INT(type);
509
+ }
510
+ }
511
+
512
+ return tok;
513
+ }
514
+
515
+ static token lex_comment(lexstate *state, enum TokenType type) {
516
+ unsigned int c;
517
+
518
+ c = peek(state);
519
+ if (c == ' ') {
520
+ advance_char(state, c);
521
+ }
522
+
523
+ while (true) {
524
+ c = peek(state);
525
+
526
+ if (c == '\n' || c == '\0') {
527
+ break;
528
+ } else {
529
+ advance_char(state, c);
530
+ }
531
+ }
532
+
533
+ token tok = next_token(state, type);
534
+
535
+ skip_char(state, c);
536
+
537
+ return tok;
538
+ }
539
+
540
+ /*
541
+ ... " ... " ...
542
+ ^ start
543
+ ^ current
544
+ ^ current (after)
545
+ */
546
+ static token lex_dqstring(lexstate *state) {
547
+ unsigned int c;
548
+
549
+ while (true) {
550
+ c = peek(state);
551
+ advance_char(state, c);
552
+
553
+ if (c == '\\') {
554
+ if (peek(state) == '"') {
555
+ advance_char(state, c);
556
+ c = peek(state);
557
+ }
558
+ } else if (c == '"') {
559
+ break;
560
+ }
561
+ }
562
+
563
+ return next_token(state, tDQSTRING);
564
+ }
565
+
566
+ /*
567
+ ... @ foo ...
568
+ ^ start
569
+ ^ current
570
+ ^ current (return)
571
+
572
+ ... @ @ foo ...
573
+ ^ start
574
+ ^ current
575
+ ^ current (return)
576
+ */
577
+ static token lex_ivar(lexstate *state) {
578
+ unsigned int c;
579
+
580
+ enum TokenType type = tAIDENT;
581
+
582
+ c = peek(state);
583
+
584
+ if (c == '@') {
585
+ type = tA2IDENT;
586
+ advance_char(state, c);
587
+ c = peek(state);
588
+ }
589
+
590
+ if (rb_isalpha(c) || c == '_') {
591
+ advance_char(state, c);
592
+ c = peek(state);
593
+ } else {
594
+ return next_token(state, ErrorToken);
595
+ }
596
+
597
+ while (rb_isalnum(c) || c == '_') {
598
+ advance_char(state, c);
599
+ c = peek(state);
600
+ }
601
+
602
+ return next_token(state, type);
603
+ }
604
+
605
+ /*
606
+ ... ' ... ' ...
607
+ ^ start
608
+ ^ current
609
+ ^ current (after)
610
+ */
611
+ static token lex_sqstring(lexstate *state) {
612
+ unsigned int c;
613
+
614
+ c = peek(state);
615
+
616
+ while (true) {
617
+ c = peek(state);
618
+ advance_char(state, c);
619
+
620
+ if (c == '\\') {
621
+ if (peek(state) == '\'') {
622
+ advance_char(state, c);
623
+ c = peek(state);
624
+ }
625
+ } else if (c == '\'') {
626
+ break;
627
+ }
628
+ }
629
+
630
+ return next_token(state, tSQSTRING);
631
+ }
632
+
633
+ #define EQPOINTS2(c0, c1, s) (c0 == s[0] && c1 == s[1])
634
+ #define EQPOINTS3(c0, c1, c2, s) (c0 == s[0] && c1 == s[1] && c2 == s[2])
635
+
636
+ /*
637
+ ... : @ ...
638
+ ^ start
639
+ ^ current
640
+ ^ current (return)
641
+ */
642
+ static token lex_colon_symbol(lexstate *state) {
643
+ unsigned int c[3];
644
+ peekn(state, c, 3);
645
+
646
+ switch (c[0]) {
647
+ case '|':
648
+ case '&':
649
+ case '/':
650
+ case '%':
651
+ case '~':
652
+ case '`':
653
+ case '^':
654
+ advance_char(state, c[0]);
655
+ return next_token(state, tSYMBOL);
656
+ case '=':
657
+ if (EQPOINTS2(c[0], c[1], "=~")) {
658
+ // :=~
659
+ advance_char(state, c[0]);
660
+ advance_char(state, c[1]);
661
+ return next_token(state, tSYMBOL);
662
+ } else if (EQPOINTS3(c[0], c[1], c[2], "===")) {
663
+ // :===
664
+ advance_char(state, c[0]);
665
+ advance_char(state, c[1]);
666
+ advance_char(state, c[2]);
667
+ return next_token(state, tSYMBOL);
668
+ } else if (EQPOINTS2(c[0], c[1], "==")) {
669
+ // :==
670
+ advance_char(state, c[0]);
671
+ advance_char(state, c[1]);
672
+ return next_token(state, tSYMBOL);
673
+ }
674
+ break;
675
+ case '<':
676
+ if (EQPOINTS3(c[0], c[1], c[2], "<=>")) {
677
+ advance_char(state, c[0]);
678
+ advance_char(state, c[1]);
679
+ advance_char(state, c[2]);
680
+ } else if (EQPOINTS2(c[0], c[1], "<=") || EQPOINTS2(c[0], c[1], "<<")) {
681
+ advance_char(state, c[0]);
682
+ advance_char(state, c[1]);
683
+ } else {
684
+ advance_char(state, c[0]);
685
+ }
686
+ return next_token(state, tSYMBOL);
687
+ case '>':
688
+ if (EQPOINTS2(c[0], c[1], ">=") || EQPOINTS2(c[0], c[1], ">>")) {
689
+ advance_char(state, c[0]);
690
+ advance_char(state, c[1]);
691
+ } else {
692
+ advance_char(state, c[0]);
693
+ }
694
+ return next_token(state, tSYMBOL);
695
+ case '-':
696
+ case '+':
697
+ if (EQPOINTS2(c[0], c[1], "+@") || EQPOINTS2(c[0], c[1], "-@")) {
698
+ advance_char(state, c[0]);
699
+ advance_char(state, c[1]);
700
+ } else {
701
+ advance_char(state, c[0]);
702
+ }
703
+ return next_token(state, tSYMBOL);
704
+ case '*':
705
+ if (EQPOINTS2(c[0], c[1], "**")) {
706
+ advance_char(state, c[0]);
707
+ advance_char(state, c[1]);
708
+ } else {
709
+ advance_char(state, c[0]);
710
+ }
711
+ return next_token(state, tSYMBOL);
712
+ case '[':
713
+ if (EQPOINTS3(c[0], c[1], c[2], "[]=")) {
714
+ advance_char(state, c[0]);
715
+ advance_char(state, c[1]);
716
+ advance_char(state, c[2]);
717
+ } else if (EQPOINTS2(c[0], c[1], "[]")) {
718
+ advance_char(state, c[0]);
719
+ advance_char(state, c[1]);
720
+ } else {
721
+ break;
722
+ }
723
+ return next_token(state, tSYMBOL);
724
+ case '!':
725
+ if (EQPOINTS2(c[0], c[1], "!=") || EQPOINTS2(c[0], c[1], "!~")) {
726
+ advance_char(state, c[0]);
727
+ advance_char(state, c[1]);
728
+ } else {
729
+ advance_char(state, c[0]);
730
+ }
731
+ return next_token(state, tSYMBOL);
732
+ case '@': {
733
+ advance_char(state, '@');
734
+ token tok = lex_ivar(state);
735
+ if (tok.type != ErrorToken) {
736
+ tok.type = tSYMBOL;
737
+ }
738
+ return tok;
739
+ }
740
+ case '$': {
741
+ advance_char(state, '$');
742
+ token tok = lex_global(state);
743
+ if (tok.type != ErrorToken) {
744
+ tok.type = tSYMBOL;
745
+ }
746
+ return tok;
747
+ }
748
+ case '\'': {
749
+ position start = state->start;
750
+ advance_char(state, '\'');
751
+ token tok = lex_sqstring(state);
752
+ tok.type = tSQSYMBOL;
753
+ tok.range.start = start;
754
+ return tok;
755
+ }
756
+ case '"': {
757
+ position start = state->start;
758
+ advance_char(state, '"');
759
+ token tok = lex_dqstring(state);
760
+ tok.type = tDQSYMBOL;
761
+ tok.range.start = start;
762
+ return tok;
763
+ }
764
+ default:
765
+ if (rb_isalpha(c[0]) || c[0] == '_') {
766
+ position start = state->start;
767
+ token tok = lex_ident(state, NullType);
768
+ tok.range.start = start;
769
+
770
+ if (peek(state) == '?') {
771
+ if (tok.type != tBANGIDENT && tok.type != tEQIDENT) {
772
+ skip_char(state, '?');
773
+ tok.range.end = state->current;
774
+ }
775
+ }
776
+
777
+ tok.type = tSYMBOL;
778
+ return tok;
779
+ }
780
+ }
781
+
782
+ return next_token(state, pCOLON);
783
+ }
784
+
785
+ /*
786
+ ... : : ...
787
+ ^ start
788
+ ^ current
789
+ ^ current (return)
790
+
791
+ ... : ...
792
+ ^ start
793
+ ^ current (lex_colon_symbol)
794
+ */
795
+ static token lex_colon(lexstate *state) {
796
+ unsigned int c = peek(state);
797
+
798
+ if (c == ':') {
799
+ advance_char(state, c);
800
+ return next_token(state, pCOLON2);
801
+ } else {
802
+ return lex_colon_symbol(state);
803
+ }
804
+ }
805
+
806
+ /*
807
+ lex_lt ::= < (pLT)
808
+ | < < (tOPERATOR)
809
+ | < = (tOPERATOR)
810
+ | < = > (tOPERATOR)
811
+ */
812
+ static token lex_lt(lexstate *state) {
813
+ if (advance_next_character_if(state, '<')) {
814
+ return next_token(state, tOPERATOR);
815
+ } else if (advance_next_character_if(state, '=')) {
816
+ advance_next_character_if(state, '>');
817
+ return next_token(state, tOPERATOR);
818
+ } else {
819
+ return next_token(state, pLT);
820
+ }
821
+ }
822
+
823
+ /*
824
+ lex_gt ::= >
825
+ | > =
826
+ | > >
827
+ */
828
+ static token lex_gt(lexstate *state) {
829
+ advance_next_character_if(state, '=') || advance_next_character_if(state, '>');
830
+ return next_token(state, tOPERATOR);
831
+ }
832
+
833
+ /*
834
+ ... `%` `a` `{` ... `}` ...
835
+ ^ start
836
+ ^ current
837
+ ^ current (exit)
838
+ --- token
839
+ */
840
+ static token lex_percent(lexstate *state) {
841
+ unsigned int cs[2];
842
+ unsigned int end_char;
843
+
844
+ peekn(state, cs, 2);
845
+
846
+ if (cs[0] != 'a') {
847
+ return next_token(state, tOPERATOR);
848
+ }
849
+
850
+ switch (cs[1])
851
+ {
852
+ case '{':
853
+ end_char = '}';
854
+ break;
855
+ case '(':
856
+ end_char = ')';
857
+ break;
858
+ case '[':
859
+ end_char = ']';
860
+ break;
861
+ case '|':
862
+ end_char = '|';
863
+ break;
864
+ case '<':
865
+ end_char = '>';
866
+ break;
867
+ default:
868
+ return next_token(state, tOPERATOR);
869
+ }
870
+
871
+ advance_char(state, cs[0]);
872
+ advance_char(state, cs[1]);
873
+
874
+ unsigned int c;
875
+
876
+ while ((c = peek(state))) {
877
+ if (c == end_char) {
878
+ advance_char(state, c);
879
+ return next_token(state, tANNOTATION);
880
+ }
881
+ advance_char(state, c);
882
+ }
883
+
884
+ return next_token(state, ErrorToken);
885
+ }
886
+
887
+ /*
888
+ bracket ::= [ (pLBRACKET)
889
+ * ^
890
+ | [ ] (tOPERATOR)
891
+ * ^ $
892
+ | [ ] = (tOPERATOR)
893
+ * ^ $
894
+ */
895
+ static token lex_bracket(lexstate *state) {
896
+ if (advance_next_character_if(state, ']')) {
897
+ advance_next_character_if(state, '=');
898
+ return next_token(state, tOPERATOR);
899
+ } else {
900
+ return next_token(state, pLBRACKET);
901
+ }
902
+ }
903
+
904
+ /*
905
+ bracket ::= *
906
+ | * *
907
+ */
908
+ static token lex_star(lexstate *state) {
909
+ if (advance_next_character_if(state, '*')) {
910
+ return next_token(state, pSTAR2);
911
+ } else {
912
+ return next_token(state, pSTAR);
913
+ }
914
+ }
915
+
916
+ /*
917
+ bang ::= !
918
+ | ! =
919
+ | ! ~
920
+ */
921
+ static token lex_bang(lexstate *state) {
922
+ advance_next_character_if(state, '=') || advance_next_character_if(state, '~');
923
+ return next_token(state, tOPERATOR);
924
+ }
925
+
926
+ /*
927
+ backquote ::= ` (tOPERATOR)
928
+ | `[^ :][^`]` (tQIDENT)
929
+ */
930
+ static token lex_backquote(lexstate *state) {
931
+ unsigned int c = peek(state);
932
+
933
+ if (c == ' ' || c == ':') {
934
+ return next_token(state, tOPERATOR);
935
+ } else {
936
+ while (true) {
937
+ if (c == '`') {
938
+ break;
939
+ }
940
+
941
+ c = peek(state);
942
+ advance_char(state, c);
943
+ }
944
+
945
+ return next_token(state, tQIDENT);
946
+ }
947
+ }
948
+
949
+ token rbsparser_next_token(lexstate *state) {
950
+ token tok = NullToken;
951
+
952
+ unsigned int c;
953
+ bool skipping = true;
954
+
955
+ while (skipping) {
956
+ c = peek(state);
957
+
958
+ switch (c) {
959
+ case ' ':
960
+ case '\t':
961
+ case '\n':
962
+ // nop
963
+ skip_char(state, c);
964
+ break;
965
+ case '\0':
966
+ return next_token(state, pEOF);
967
+ default:
968
+ advance_char(state, c);
969
+ skipping = false;
970
+ break;
971
+ }
972
+ }
973
+
974
+ /* ... c d .. */
975
+ /* ^ state->current */
976
+ /* ^ start */
977
+ switch (c) {
978
+ case '\0': tok = next_token(state, pEOF);
979
+ ONE_CHAR_PATTERN('(', pLPAREN);
980
+ ONE_CHAR_PATTERN(')', pRPAREN);
981
+ ONE_CHAR_PATTERN(']', pRBRACKET);
982
+ ONE_CHAR_PATTERN('{', pLBRACE);
983
+ ONE_CHAR_PATTERN('}', pRBRACE);
984
+ ONE_CHAR_PATTERN(',', pCOMMA);
985
+ ONE_CHAR_PATTERN('|', pBAR);
986
+ ONE_CHAR_PATTERN('^', pHAT);
987
+ ONE_CHAR_PATTERN('&', pAMP);
988
+ ONE_CHAR_PATTERN('?', pQUESTION);
989
+ ONE_CHAR_PATTERN('/', tOPERATOR);
990
+ ONE_CHAR_PATTERN('~', tOPERATOR);
991
+ case '[':
992
+ tok = lex_bracket(state);
993
+ break;
994
+ case '-':
995
+ tok = lex_hyphen(state);
996
+ break;
997
+ case '+':
998
+ tok = lex_plus(state);
999
+ break;
1000
+ case '*':
1001
+ tok = lex_star(state);
1002
+ break;
1003
+ case '<':
1004
+ tok = lex_lt(state);
1005
+ break;
1006
+ case '=':
1007
+ tok = lex_eq(state);
1008
+ break;
1009
+ case '>':
1010
+ tok = lex_gt(state);
1011
+ break;
1012
+ case '!':
1013
+ tok = lex_bang(state);
1014
+ break;
1015
+ case '#':
1016
+ if (state->first_token_of_line) {
1017
+ tok = lex_comment(state, tLINECOMMENT);
1018
+ } else {
1019
+ tok = lex_comment(state, tCOMMENT);
1020
+ }
1021
+ break;
1022
+ case ':':
1023
+ tok = lex_colon(state);
1024
+ break;
1025
+ case '.':
1026
+ tok = lex_dot(state);
1027
+ break;
1028
+ case '_':
1029
+ tok = lex_underscore(state);
1030
+ break;
1031
+ case '$':
1032
+ tok = lex_global(state);
1033
+ break;
1034
+ case '@':
1035
+ tok = lex_ivar(state);
1036
+ break;
1037
+ case '"':
1038
+ tok = lex_dqstring(state);
1039
+ break;
1040
+ case '\'':
1041
+ tok = lex_sqstring(state);
1042
+ break;
1043
+ case '%':
1044
+ tok = lex_percent(state);
1045
+ break;
1046
+ case '`':
1047
+ tok = lex_backquote(state);
1048
+ break;
1049
+ default:
1050
+ if (rb_isalpha(c) && rb_isupper(c)) {
1051
+ tok = lex_ident(state, tUIDENT);
1052
+ }
1053
+ if (rb_isalpha(c) && rb_islower(c)) {
1054
+ tok = lex_ident(state, tLIDENT);
1055
+ }
1056
+ if (rb_isdigit(c)) {
1057
+ tok = lex_number(state);
1058
+ }
1059
+ }
1060
+
1061
+ if (tok.type == NullType) {
1062
+ tok = next_token(state, ErrorToken);
1063
+ }
1064
+
1065
+ return tok;
1066
+ }
1067
+
1068
+ char *peek_token(lexstate *state, token tok) {
1069
+ return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
1070
+ }