rbs 1.6.2 → 1.7.0.beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +0 -4
  3. data/.gitignore +1 -0
  4. data/CHANGELOG.md +6 -0
  5. data/Gemfile +1 -0
  6. data/Rakefile +7 -22
  7. data/core/kernel.rbs +4 -4
  8. data/core/trace_point.rbs +1 -1
  9. data/ext/rbs/extension/constants.c +140 -0
  10. data/ext/rbs/extension/constants.h +72 -0
  11. data/ext/rbs/extension/extconf.rb +3 -0
  12. data/ext/rbs/extension/lexer.c +1070 -0
  13. data/ext/rbs/extension/lexer.h +145 -0
  14. data/ext/rbs/extension/location.c +295 -0
  15. data/ext/rbs/extension/location.h +59 -0
  16. data/ext/rbs/extension/main.c +9 -0
  17. data/ext/rbs/extension/parser.c +2418 -0
  18. data/ext/rbs/extension/parser.h +23 -0
  19. data/ext/rbs/extension/parserstate.c +313 -0
  20. data/ext/rbs/extension/parserstate.h +141 -0
  21. data/ext/rbs/extension/rbs_extension.h +40 -0
  22. data/ext/rbs/extension/ruby_objs.c +585 -0
  23. data/ext/rbs/extension/ruby_objs.h +46 -0
  24. data/ext/rbs/extension/unescape.c +65 -0
  25. data/goodcheck.yml +1 -1
  26. data/lib/rbs/ast/comment.rb +0 -12
  27. data/lib/rbs/buffer.rb +4 -0
  28. data/lib/rbs/cli.rb +5 -8
  29. data/lib/rbs/collection/sources/git.rb +18 -3
  30. data/lib/rbs/errors.rb +14 -1
  31. data/lib/rbs/location.rb +221 -217
  32. data/lib/rbs/location_aux.rb +108 -0
  33. data/lib/rbs/locator.rb +10 -7
  34. data/lib/rbs/parser_aux.rb +24 -0
  35. data/lib/rbs/types.rb +2 -3
  36. data/lib/rbs/version.rb +1 -1
  37. data/lib/rbs/writer.rb +4 -2
  38. data/lib/rbs.rb +3 -7
  39. data/rbs.gemspec +2 -1
  40. data/sig/ancestor_builder.rbs +2 -2
  41. data/sig/annotation.rbs +2 -2
  42. data/sig/comment.rbs +7 -7
  43. data/sig/constant_table.rbs +1 -1
  44. data/sig/declarations.rbs +9 -9
  45. data/sig/definition.rbs +1 -1
  46. data/sig/definition_builder.rbs +2 -2
  47. data/sig/errors.rbs +30 -25
  48. data/sig/location.rbs +42 -79
  49. data/sig/locator.rbs +2 -2
  50. data/sig/members.rbs +7 -7
  51. data/sig/method_types.rbs +3 -3
  52. data/sig/parser.rbs +11 -21
  53. data/sig/types.rbs +45 -27
  54. data/sig/writer.rbs +1 -1
  55. data/stdlib/json/0/json.rbs +3 -3
  56. metadata +24 -6
  57. data/lib/rbs/parser.rb +0 -3614
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de88dd41e7cee057e4668ffd3ed94e91d5d09746cdbfcd26602cc04b9ef2abd1
4
- data.tar.gz: '06092d8a703d157bc214f81155bc24009491db68073fbcedc5fde9b09aca85e7'
3
+ metadata.gz: 6f2e7e51d08feb7b1a562ac43c679da8fc6f0a7cfc2ecef65e11c1e8c3d0d2b9
4
+ data.tar.gz: d41ed33c0b5de01cd1746faa3572e56f651ecfc808c046cdb4166a0ec357138e
5
5
  SHA512:
6
- metadata.gz: 2a53e2b25733fd63c832748ae629275becb31badd9b120cf2b8c72c66cea7e084d0aefefeaa09c00f2364ab1335ee030b0b54e8a14f3f7dbe08e1523cfe1d7e2
7
- data.tar.gz: 66b02700d2a81f5d8367451356af01a0cac8b0152194adb5f844abf92ffdfc16bcec975f2e21302477b32ee344242d3e0407105c3cf4e7a78387fa454003a418
6
+ metadata.gz: bc198dc7d40caaa802604cb1f633098955540d838c4e1adcca635da85880703378d981a4f37c528e7f4b3720fc6f152d7fc6c7bcb11ce3e0abba76fd14a6d47c
7
+ data.tar.gz: 68c6643f9adf4761bec2a2ffb211e223cb732a37c341652c4625191b5ab6d6791c91196f3c3bc4fd5624d6d853558c33e28f8462f483f923badad8f38498a675
@@ -19,10 +19,6 @@ jobs:
19
19
  - test
20
20
  - stdlib_test
21
21
  - rubocop validate test_doc build test_generate_stdlib
22
- - confirm_parser
23
- exclude:
24
- - container_tag: master-nightly-focal
25
- job: confirm_parser
26
22
  container:
27
23
  image: rubylang/ruby:${{ matrix.container_tag }}
28
24
  steps:
data/.gitignore CHANGED
@@ -11,3 +11,4 @@
11
11
  /vendor/sigs
12
12
  /Gemfile.lock
13
13
 
14
+ lib/**/*.bundle
data/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  ## master
4
4
 
5
+ ## 1.7.0 (beta.1)
6
+
7
+ ## Library changes
8
+
9
+ * Replace RBS::Parser ([#788](https://github.com/ruby/rbs/pull/788))
10
+
5
11
  ## 1.6.2 (2021-09-09)
6
12
 
7
13
  ## Signature updates
data/Gemfile CHANGED
@@ -5,6 +5,7 @@ gemspec
5
5
 
6
6
  # Development dependencies
7
7
  gem "rake"
8
+ gem "rake-compiler"
8
9
  gem "test-unit"
9
10
  gem "rspec"
10
11
  gem "racc"
data/Rakefile CHANGED
@@ -1,15 +1,17 @@
1
1
  require "bundler/gem_tasks"
2
2
  require "rake/testtask"
3
3
  require "rbconfig"
4
+ require 'rake/extensiontask'
4
5
 
5
6
  $LOAD_PATH << File.join(__dir__, "test")
6
7
 
7
8
  ruby = ENV["RUBY"] || RbConfig.ruby
8
- racc = ENV.fetch("RACC", "racc")
9
9
  rbs = File.join(__dir__, "exe/rbs")
10
10
  bin = File.join(__dir__, "bin")
11
11
 
12
- Rake::TestTask.new(:test) do |t|
12
+ Rake::ExtensionTask.new("rbs/extension")
13
+
14
+ Rake::TestTask.new(:test => :compile) do |t|
13
15
  t.libs << "test"
14
16
  t.libs << "lib"
15
17
  t.test_files = FileList["test/**/*_test.rb"].reject do |path|
@@ -19,7 +21,7 @@ end
19
21
 
20
22
  multitask :default => [:test, :stdlib_test, :rubocop, :validate, :test_doc]
21
23
 
22
- task :test_doc => :parser do
24
+ task :test_doc do
23
25
  files = Dir.chdir(File.expand_path('..', __FILE__)) do
24
26
  `git ls-files -z`.split("\x0").select do |file| Pathname(file).extname == ".md" end
25
27
  end
@@ -27,7 +29,7 @@ task :test_doc => :parser do
27
29
  sh "#{ruby} #{__dir__}/bin/run_in_md.rb #{files.join(" ")}"
28
30
  end
29
31
 
30
- task :validate => :parser do
32
+ task :validate => :compile do
31
33
  sh "#{ruby} #{rbs} validate --silent"
32
34
 
33
35
  FileList["stdlib/*"].each do |path|
@@ -72,7 +74,7 @@ task :validate => :parser do
72
74
  end
73
75
 
74
76
  FileList["test/stdlib/**/*_test.rb"].each do |test|
75
- task test => :parser do
77
+ task test => :compile do
76
78
  sh "#{ruby} -Ilib #{bin}/test_runner.rb #{test}"
77
79
  end
78
80
  task stdlib_test: test
@@ -82,21 +84,6 @@ task :rubocop do
82
84
  sh "rubocop --parallel"
83
85
  end
84
86
 
85
- rule ".rb" => ".y" do |t|
86
- sh "#{racc} -v -o #{t.name} #{t.source}"
87
- end
88
-
89
- task :parser => "lib/rbs/parser.rb"
90
- task :test => :parser
91
- task :stdlib_test => :parser
92
- task :build => :parser
93
-
94
- task :confirm_parser do
95
- puts "Testing if parser.rb is updated with respect to parser.y"
96
- sh "#{racc} -v -o lib/rbs/parser.rb lib/rbs/parser.y"
97
- sh "git diff --exit-code lib/rbs/parser.rb"
98
- end
99
-
100
87
  namespace :generate do
101
88
  desc "Generate a test file for a stdlib class signatures"
102
89
  task :stdlib_test, [:class] do |_task, args|
@@ -225,5 +212,3 @@ task :test_generate_stdlib do
225
212
  sh "RBS_GENERATE_TEST_PATH=/tmp/Array_test.rb rake 'generate:stdlib_test[Array]'"
226
213
  sh "ruby -c /tmp/Array_test.rb"
227
214
  end
228
-
229
- CLEAN.include("lib/rbs/parser.rb")
data/core/kernel.rbs CHANGED
@@ -482,8 +482,8 @@ module Kernel : BasicObject
482
482
  | [:child, int] # redirect to the redirected file descriptor
483
483
  | :close # close the file descriptor in child process
484
484
 
485
- def self?.spawn: (String command, *String args, ?unsetenv_others: boolish, ?pgroup?: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> Integer
486
- | (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup?: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> Integer
485
+ def self?.spawn: (String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> Integer
486
+ | (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> Integer
487
487
 
488
488
  # Executes *command…* in a subshell. *command…* is one of following forms.
489
489
  #
@@ -510,8 +510,8 @@ module Kernel : BasicObject
510
510
  # *
511
511
  #
512
512
  # See `Kernel.exec` for the standard shell.
513
- def self?.system: (String command, *String args, ?unsetenv_others: boolish, ?pgroup?: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> (NilClass | FalseClass | TrueClass)
514
- | (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup?: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> (NilClass | FalseClass | TrueClass)
513
+ def self?.system: (String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> (NilClass | FalseClass | TrueClass)
514
+ | (Hash[string, string?] env, String command, *String args, ?unsetenv_others: boolish, ?pgroup: (true | Integer), ?umask: Integer, ?in: redirect_fd, ?out: redirect_fd, ?err: redirect_fd, ?close_others: boolish, ?chdir: String) -> (NilClass | FalseClass | TrueClass)
515
515
  end
516
516
 
517
517
  Kernel::RUBYGEMS_ACTIVATION_MONITOR: untyped
data/core/trace_point.rbs CHANGED
@@ -286,5 +286,5 @@ class TracePoint < Object
286
286
  # Same as TracePoint#binding:
287
287
  # trace.binding.eval('self')
288
288
  #
289
- def `self`: () -> Binding
289
+ def self: () -> Binding
290
290
  end
@@ -0,0 +1,140 @@
1
+ #include "rbs_extension.h"
2
+
3
+ VALUE RBS_Parser;
4
+ VALUE RBS_Parser_KEYWORDS;
5
+
6
+ VALUE RBS;
7
+ VALUE RBS_AST;
8
+ VALUE RBS_AST_Comment;
9
+ VALUE RBS_AST_Annotation;
10
+
11
+ VALUE RBS_AST_Declarations;
12
+
13
+ VALUE RBS_AST_Declarations_ModuleTypeParams;
14
+ VALUE RBS_AST_Declarations_ModuleTypeParams_TypeParam;
15
+
16
+ VALUE RBS_AST_Declarations_Alias;
17
+ VALUE RBS_AST_Declarations_Constant;
18
+ VALUE RBS_AST_Declarations_Global;
19
+ VALUE RBS_AST_Declarations_Interface;
20
+ VALUE RBS_AST_Declarations_Module;
21
+ VALUE RBS_AST_Declarations_Module_Self;
22
+ VALUE RBS_AST_Declarations_Class;
23
+ VALUE RBS_AST_Declarations_Class_Super;
24
+
25
+ VALUE RBS_AST_Members;
26
+ VALUE RBS_AST_Members_Alias;
27
+ VALUE RBS_AST_Members_AttrAccessor;
28
+ VALUE RBS_AST_Members_AttrReader;
29
+ VALUE RBS_AST_Members_AttrWriter;
30
+ VALUE RBS_AST_Members_ClassInstanceVariable;
31
+ VALUE RBS_AST_Members_ClassVariable;
32
+ VALUE RBS_AST_Members_Extend;
33
+ VALUE RBS_AST_Members_Include;
34
+ VALUE RBS_AST_Members_InstanceVariable;
35
+ VALUE RBS_AST_Members_MethodDefinition;
36
+ VALUE RBS_AST_Members_Prepend;
37
+ VALUE RBS_AST_Members_Private;
38
+ VALUE RBS_AST_Members_Public;
39
+
40
+ VALUE RBS_Namespace;
41
+ VALUE RBS_TypeName;
42
+
43
+ VALUE RBS_Types_Alias;
44
+ VALUE RBS_Types_Bases_Any;
45
+ VALUE RBS_Types_Bases_Bool;
46
+ VALUE RBS_Types_Bases_Bottom;
47
+ VALUE RBS_Types_Bases_Class;
48
+ VALUE RBS_Types_Bases_Instance;
49
+ VALUE RBS_Types_Bases_Nil;
50
+ VALUE RBS_Types_Bases_Self;
51
+ VALUE RBS_Types_Bases_Top;
52
+ VALUE RBS_Types_Bases_Void;
53
+ VALUE RBS_Types_Bases;
54
+ VALUE RBS_Types_Block;
55
+ VALUE RBS_Types_ClassInstance;
56
+ VALUE RBS_Types_ClassSingleton;
57
+ VALUE RBS_Types_Function_Param;
58
+ VALUE RBS_Types_Function;
59
+ VALUE RBS_Types_Interface;
60
+ VALUE RBS_Types_Intersection;
61
+ VALUE RBS_Types_Literal;
62
+ VALUE RBS_Types_Optional;
63
+ VALUE RBS_Types_Proc;
64
+ VALUE RBS_Types_Record;
65
+ VALUE RBS_Types_Tuple;
66
+ VALUE RBS_Types_Union;
67
+ VALUE RBS_Types_Variable;
68
+ VALUE RBS_Types;
69
+ VALUE RBS_MethodType;
70
+
71
+ VALUE RBS_ParsingError;
72
+
73
+ void rbs__init_constants() {
74
+ ID id_RBS = rb_intern_const("RBS");
75
+
76
+ RBS = rb_const_get(rb_cObject, id_RBS);
77
+ RBS_ParsingError = rb_const_get(RBS, rb_intern("ParsingError"));
78
+ RBS_AST = rb_const_get(RBS, rb_intern("AST"));
79
+ RBS_AST_Comment = rb_const_get(RBS_AST, rb_intern("Comment"));
80
+ RBS_AST_Annotation = rb_const_get(RBS_AST, rb_intern("Annotation"));
81
+
82
+ RBS_AST_Declarations = rb_const_get(RBS_AST, rb_intern("Declarations"));
83
+
84
+ RBS_AST_Declarations_ModuleTypeParams = rb_const_get(RBS_AST_Declarations, rb_intern("ModuleTypeParams"));
85
+ RBS_AST_Declarations_ModuleTypeParams_TypeParam = rb_const_get(RBS_AST_Declarations_ModuleTypeParams, rb_intern("TypeParam"));
86
+
87
+ RBS_AST_Declarations_Alias = rb_const_get(RBS_AST_Declarations, rb_intern("Alias"));
88
+ RBS_AST_Declarations_Constant = rb_const_get(RBS_AST_Declarations, rb_intern("Constant"));
89
+ RBS_AST_Declarations_Global = rb_const_get(RBS_AST_Declarations, rb_intern("Global"));
90
+ RBS_AST_Declarations_Interface = rb_const_get(RBS_AST_Declarations, rb_intern("Interface"));
91
+ RBS_AST_Declarations_Module = rb_const_get(RBS_AST_Declarations, rb_intern("Module"));
92
+ RBS_AST_Declarations_Module_Self = rb_const_get(RBS_AST_Declarations_Module, rb_intern("Self"));
93
+ RBS_AST_Declarations_Class = rb_const_get(RBS_AST_Declarations, rb_intern("Class"));
94
+ RBS_AST_Declarations_Class_Super = rb_const_get(RBS_AST_Declarations_Class, rb_intern("Super"));
95
+
96
+ RBS_AST_Members = rb_const_get(RBS_AST, rb_intern("Members"));
97
+ RBS_AST_Members_Alias = rb_const_get(RBS_AST_Members, rb_intern("Alias"));
98
+ RBS_AST_Members_AttrAccessor = rb_const_get(RBS_AST_Members, rb_intern("AttrAccessor"));
99
+ RBS_AST_Members_AttrReader = rb_const_get(RBS_AST_Members, rb_intern("AttrReader"));
100
+ RBS_AST_Members_AttrWriter = rb_const_get(RBS_AST_Members, rb_intern("AttrWriter"));
101
+ RBS_AST_Members_ClassInstanceVariable = rb_const_get(RBS_AST_Members, rb_intern("ClassInstanceVariable"));
102
+ RBS_AST_Members_ClassVariable = rb_const_get(RBS_AST_Members, rb_intern("ClassVariable"));
103
+ RBS_AST_Members_Extend = rb_const_get(RBS_AST_Members, rb_intern("Extend"));
104
+ RBS_AST_Members_Include = rb_const_get(RBS_AST_Members, rb_intern("Include"));
105
+ RBS_AST_Members_InstanceVariable = rb_const_get(RBS_AST_Members, rb_intern("InstanceVariable"));
106
+ RBS_AST_Members_MethodDefinition = rb_const_get(RBS_AST_Members, rb_intern("MethodDefinition"));
107
+ RBS_AST_Members_Prepend = rb_const_get(RBS_AST_Members, rb_intern("Prepend"));
108
+ RBS_AST_Members_Private = rb_const_get(RBS_AST_Members, rb_intern("Private"));
109
+ RBS_AST_Members_Public = rb_const_get(RBS_AST_Members, rb_intern("Public"));
110
+
111
+ RBS_Namespace = rb_const_get(RBS, rb_intern("Namespace"));
112
+ RBS_TypeName = rb_const_get(RBS, rb_intern("TypeName"));
113
+ RBS_Types = rb_const_get(RBS, rb_intern("Types"));
114
+ RBS_Types_Alias = rb_const_get(RBS_Types, rb_intern("Alias"));
115
+ RBS_Types_Bases = rb_const_get(RBS_Types, rb_intern("Bases"));
116
+ RBS_Types_Bases_Any = rb_const_get(RBS_Types_Bases, rb_intern("Any"));
117
+ RBS_Types_Bases_Bool = rb_const_get(RBS_Types_Bases, rb_intern("Bool"));
118
+ RBS_Types_Bases_Bottom = rb_const_get(RBS_Types_Bases, rb_intern("Bottom"));
119
+ RBS_Types_Bases_Class = rb_const_get(RBS_Types_Bases, rb_intern("Class"));
120
+ RBS_Types_Bases_Instance = rb_const_get(RBS_Types_Bases, rb_intern("Instance"));
121
+ RBS_Types_Bases_Nil = rb_const_get(RBS_Types_Bases, rb_intern("Nil"));
122
+ RBS_Types_Bases_Self = rb_const_get(RBS_Types_Bases, rb_intern("Self"));
123
+ RBS_Types_Bases_Top = rb_const_get(RBS_Types_Bases, rb_intern("Top"));
124
+ RBS_Types_Bases_Void = rb_const_get(RBS_Types_Bases, rb_intern("Void"));
125
+ RBS_Types_Block = rb_const_get(RBS_Types, rb_intern("Block"));
126
+ RBS_Types_ClassInstance = rb_const_get(RBS_Types, rb_intern("ClassInstance"));
127
+ RBS_Types_ClassSingleton = rb_const_get(RBS_Types, rb_intern("ClassSingleton"));
128
+ RBS_Types_Function = rb_const_get(RBS_Types, rb_intern("Function"));
129
+ RBS_Types_Function_Param = rb_const_get(RBS_Types_Function, rb_intern("Param"));
130
+ RBS_Types_Interface = rb_const_get(RBS_Types, rb_intern("Interface"));
131
+ RBS_Types_Intersection = rb_const_get(RBS_Types, rb_intern("Intersection"));
132
+ RBS_Types_Literal = rb_const_get(RBS_Types, rb_intern("Literal"));
133
+ RBS_Types_Optional = rb_const_get(RBS_Types, rb_intern("Optional"));
134
+ RBS_Types_Proc = rb_const_get(RBS_Types, rb_intern("Proc"));
135
+ RBS_Types_Record = rb_const_get(RBS_Types, rb_intern("Record"));
136
+ RBS_Types_Tuple = rb_const_get(RBS_Types, rb_intern("Tuple"));
137
+ RBS_Types_Union = rb_const_get(RBS_Types, rb_intern("Union"));
138
+ RBS_Types_Variable = rb_const_get(RBS_Types, rb_intern("Variable"));
139
+ RBS_MethodType = rb_const_get(RBS, rb_intern("MethodType"));
140
+ }
@@ -0,0 +1,72 @@
1
+ #ifndef RBS__CONSTANTS_H
2
+ #define RBS__CONSTANTS_H
3
+
4
+ extern VALUE RBS;
5
+
6
+ extern VALUE RBS_AST;
7
+ extern VALUE RBS_AST_Annotation;
8
+ extern VALUE RBS_AST_Comment;
9
+
10
+ extern VALUE RBS_AST_Declarations;
11
+ extern VALUE RBS_AST_Declarations_Alias;
12
+ extern VALUE RBS_AST_Declarations_Class_Super;
13
+ extern VALUE RBS_AST_Declarations_Class;
14
+ extern VALUE RBS_AST_Declarations_Constant;
15
+ extern VALUE RBS_AST_Declarations_Global;
16
+ extern VALUE RBS_AST_Declarations_Interface;
17
+ extern VALUE RBS_AST_Declarations_Module_Self;
18
+ extern VALUE RBS_AST_Declarations_Module;
19
+ extern VALUE RBS_AST_Declarations_ModuleTypeParams_TypeParam;
20
+ extern VALUE RBS_AST_Declarations_ModuleTypeParams;
21
+
22
+ extern VALUE RBS_AST_Members;
23
+ extern VALUE RBS_AST_Members_Alias;
24
+ extern VALUE RBS_AST_Members_AttrAccessor;
25
+ extern VALUE RBS_AST_Members_AttrReader;
26
+ extern VALUE RBS_AST_Members_AttrWriter;
27
+ extern VALUE RBS_AST_Members_ClassInstanceVariable;
28
+ extern VALUE RBS_AST_Members_ClassVariable;
29
+ extern VALUE RBS_AST_Members_Extend;
30
+ extern VALUE RBS_AST_Members_Include;
31
+ extern VALUE RBS_AST_Members_InstanceVariable;
32
+ extern VALUE RBS_AST_Members_MethodDefinition;
33
+ extern VALUE RBS_AST_Members_Prepend;
34
+ extern VALUE RBS_AST_Members_Private;
35
+ extern VALUE RBS_AST_Members_Public;
36
+
37
+ extern VALUE RBS_MethodType;
38
+ extern VALUE RBS_Namespace;
39
+
40
+ extern VALUE RBS_ParsingError;
41
+ extern VALUE RBS_TypeName;
42
+
43
+ extern VALUE RBS_Types;
44
+ extern VALUE RBS_Types_Alias;
45
+ extern VALUE RBS_Types_Bases;
46
+ extern VALUE RBS_Types_Bases_Any;
47
+ extern VALUE RBS_Types_Bases_Bool;
48
+ extern VALUE RBS_Types_Bases_Bottom;
49
+ extern VALUE RBS_Types_Bases_Class;
50
+ extern VALUE RBS_Types_Bases_Instance;
51
+ extern VALUE RBS_Types_Bases_Nil;
52
+ extern VALUE RBS_Types_Bases_Self;
53
+ extern VALUE RBS_Types_Bases_Top;
54
+ extern VALUE RBS_Types_Bases_Void;
55
+ extern VALUE RBS_Types_Block;
56
+ extern VALUE RBS_Types_ClassInstance;
57
+ extern VALUE RBS_Types_ClassSingleton;
58
+ extern VALUE RBS_Types_Function_Param;
59
+ extern VALUE RBS_Types_Function;
60
+ extern VALUE RBS_Types_Interface;
61
+ extern VALUE RBS_Types_Intersection;
62
+ extern VALUE RBS_Types_Literal;
63
+ extern VALUE RBS_Types_Optional;
64
+ extern VALUE RBS_Types_Proc;
65
+ extern VALUE RBS_Types_Record;
66
+ extern VALUE RBS_Types_Tuple;
67
+ extern VALUE RBS_Types_Union;
68
+ extern VALUE RBS_Types_Variable;
69
+
70
+ void rbs__init_constants();
71
+
72
+ #endif
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ $INCFLAGS << " -I$(top_srcdir)" if $extmk
3
+ create_makefile 'extension'
@@ -0,0 +1,1070 @@
1
+ #include "rbs_extension.h"
2
+
3
+ #define ONE_CHAR_PATTERN(c, t) case c: tok = next_token(state, t); break
4
+
5
+ /**
6
+ * Returns one character at current.
7
+ *
8
+ * ... A B C ...
9
+ * ^ current => A
10
+ * */
11
+ #define peek(state) rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string))
12
+
13
+ static const char *RBS_TOKENTYPE_NAMES[] = {
14
+ "NullType",
15
+ "pEOF",
16
+ "ErrorToken",
17
+
18
+ "pLPAREN", /* ( */
19
+ "pRPAREN", /* ) */
20
+ "pCOLON", /* : */
21
+ "pCOLON2", /* :: */
22
+ "pLBRACKET", /* [ */
23
+ "pRBRACKET", /* ] */
24
+ "pLBRACE", /* { */
25
+ "pRBRACE", /* } */
26
+ "pHAT", /* ^ */
27
+ "pARROW", /* -> */
28
+ "pFATARROW", /* => */
29
+ "pCOMMA", /* , */
30
+ "pBAR", /* | */
31
+ "pAMP", /* & */
32
+ "pSTAR", /* * */
33
+ "pSTAR2", /* ** */
34
+ "pDOT", /* . */
35
+ "pDOT3", /* ... */
36
+ "pBANG", /* ! */
37
+ "pQUESTION", /* ? */
38
+ "pLT", /* < */
39
+ "pEQ", /* = */
40
+
41
+ "kBOOL", /* bool */
42
+ "kBOT", /* bot */
43
+ "kCLASS", /* class */
44
+ "kFALSE", /* kFALSE */
45
+ "kINSTANCE", /* instance */
46
+ "kINTERFACE", /* interface */
47
+ "kNIL", /* nil */
48
+ "kSELF", /* self */
49
+ "kSINGLETON", /* singleton */
50
+ "kTOP", /* top */
51
+ "kTRUE", /* true */
52
+ "kVOID", /* void */
53
+ "kTYPE", /* type */
54
+ "kUNCHECKED", /* unchecked */
55
+ "kIN", /* in */
56
+ "kOUT", /* out */
57
+ "kEND", /* end */
58
+ "kDEF", /* def */
59
+ "kINCLUDE", /* include */
60
+ "kEXTEND", /* extend */
61
+ "kPREPEND", /* prepend */
62
+ "kALIAS", /* alias */
63
+ "kMODULE", /* module */
64
+ "kATTRREADER", /* attr_reader */
65
+ "kATTRWRITER", /* attr_writer */
66
+ "kATTRACCESSOR", /* attr_accessor */
67
+ "kPUBLIC", /* public */
68
+ "kPRIVATE", /* private */
69
+ "kUNTYPED", /* untyped */
70
+
71
+ "tLIDENT", /* Identifiers starting with lower case */
72
+ "tUIDENT", /* Identifiers starting with upper case */
73
+ "tULIDENT", /* Identifiers starting with `_` */
74
+ "tULLIDENT",
75
+ "tGIDENT", /* Identifiers starting with `$` */
76
+ "tAIDENT", /* Identifiers starting with `@` */
77
+ "tA2IDENT", /* Identifiers starting with `@@` */
78
+ "tBANGIDENT",
79
+ "tEQIDENT",
80
+ "tQIDENT", /* Quoted identifier */
81
+ "tOPERATOR", /* Operator identifier */
82
+
83
+ "tCOMMENT",
84
+ "tLINECOMMENT",
85
+
86
+ "tDQSTRING", /* Double quoted string */
87
+ "tSQSTRING", /* Single quoted string */
88
+ "tINTEGER", /* Integer */
89
+ "tSYMBOL", /* Symbol */
90
+ "tDQSYMBOL",
91
+ "tSQSYMBOL",
92
+ "tANNOTATION", /* Annotation */
93
+ };
94
+
95
+ token NullToken = { NullType };
96
+ position NullPosition = { -1, -1, -1, -1 };
97
+ range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
98
+
99
+ const char *token_type_str(enum TokenType type) {
100
+ return RBS_TOKENTYPE_NAMES[type];
101
+ }
102
+
103
+ unsigned int peekn(lexstate *state, unsigned int chars[], size_t length) {
104
+ int byteoffset = 0;
105
+
106
+ rb_encoding *encoding = rb_enc_get(state->string);
107
+ char *start = RSTRING_PTR(state->string) + state->current.byte_pos;
108
+ char *end = RSTRING_END(state->string);
109
+
110
+ for (size_t i = 0; i < length; i++)
111
+ {
112
+ chars[i] = rb_enc_mbc_to_codepoint(start + byteoffset, end, encoding);
113
+ byteoffset += rb_enc_codelen(chars[i], rb_enc_get(state->string));
114
+ }
115
+
116
+ return byteoffset;
117
+ }
118
+
119
+ int token_chars(token tok) {
120
+ return tok.range.end.char_pos - tok.range.start.char_pos;
121
+ }
122
+
123
+ int token_bytes(token tok) {
124
+ return RANGE_BYTES(tok.range);
125
+ }
126
+
127
+ /**
128
+ * ... token ...
129
+ * ^ start
130
+ * ^ current
131
+ *
132
+ * */
133
+ token next_token(lexstate *state, enum TokenType type) {
134
+ token t;
135
+
136
+ t.type = type;
137
+ t.range.start = state->start;
138
+ t.range.end = state->current;
139
+ state->start = state->current;
140
+ state->first_token_of_line = false;
141
+
142
+ return t;
143
+ }
144
+
145
+ void advance_skip(lexstate *state, unsigned int c, bool skip) {
146
+ int len = rb_enc_codelen(c, rb_enc_get(state->string));
147
+
148
+ state->current.char_pos += 1;
149
+ state->current.byte_pos += len;
150
+
151
+ if (c == '\n') {
152
+ state->current.line += 1;
153
+ state->current.column = 0;
154
+ state->first_token_of_line = true;
155
+ } else {
156
+ state->current.column += 1;
157
+ }
158
+
159
+ if (skip) {
160
+ state->start = state->current;
161
+ }
162
+ }
163
+
164
+ void advance_char(lexstate *state, unsigned int c) {
165
+ advance_skip(state, c, false);
166
+ }
167
+
168
+ void skip_char(lexstate *state, unsigned int c) {
169
+ advance_skip(state, c, true);
170
+ }
171
+
172
+ void skip(lexstate *state) {
173
+ unsigned char c = peek(state);
174
+ skip_char(state, c);
175
+ }
176
+
177
+ void advance(lexstate *state) {
178
+ unsigned char c = peek(state);
179
+ advance_char(state, c);
180
+ }
181
+
182
+ /*
183
+ 1. Peek one character from state
184
+ 2. If read characetr equals to given `c`, skip the character and return true.
185
+ 3. Return false otherwise.
186
+ */
187
+ static bool advance_next_character_if(lexstate *state, unsigned int c) {
188
+ if (peek(state) == c) {
189
+ advance_char(state, c);
190
+ return true;
191
+ } else {
192
+ return false;
193
+ }
194
+ }
195
+
196
+ /*
197
+ ... 0 1 ...
198
+ ^ current
199
+ ^ current (return)
200
+ */
201
+ static token lex_number(lexstate *state) {
202
+ unsigned int c;
203
+
204
+ while (true) {
205
+ c = peek(state);
206
+
207
+ if (rb_isdigit(c) || c == '_') {
208
+ advance_char(state, c);
209
+ } else {
210
+ break;
211
+ }
212
+ }
213
+
214
+ return next_token(state, tINTEGER);
215
+ }
216
+
217
+ /*
218
+ lex_hyphen ::= - (tOPERATOR)
219
+ | - @ (tOPERATOR)
220
+ | - > (pARROW)
221
+ | - 1 ... (tINTEGER)
222
+ */
223
+ static token lex_hyphen(lexstate* state) {
224
+ if (advance_next_character_if(state, '>')) {
225
+ return next_token(state, pARROW);
226
+ } else if (advance_next_character_if(state, '@')) {
227
+ return next_token(state, tOPERATOR);
228
+ } else {
229
+ unsigned int c = peek(state);
230
+
231
+ if (rb_isdigit(c)) {
232
+ advance_char(state, c);
233
+ return lex_number(state);
234
+ } else {
235
+ return next_token(state, tOPERATOR);
236
+ }
237
+ }
238
+ }
239
+
240
+ /*
241
+ lex_plus ::= +
242
+ | + @
243
+ | + \d
244
+ */
245
+ static token lex_plus(lexstate *state) {
246
+ if (advance_next_character_if(state, '@')) {
247
+ return next_token(state, tOPERATOR);
248
+ } else if (rb_isdigit(peek(state))) {
249
+ return lex_number(state);
250
+ } else {
251
+ return next_token(state, tOPERATOR);
252
+ }
253
+ }
254
+
255
+ /*
256
+ lex_dot ::= . pDOT
257
+ | . . . pDOT3
258
+ */
259
+ static token lex_dot(lexstate *state) {
260
+ unsigned int cs[2];
261
+
262
+ peekn(state, cs, 2);
263
+
264
+ if (cs[0] == '.' && cs[1] == '.') {
265
+ advance_char(state, '.');
266
+ advance_char(state, '.');
267
+ return next_token(state, pDOT3);
268
+ } else {
269
+ return next_token(state, pDOT);
270
+ }
271
+ }
272
+
273
+ /*
274
+ lex_eq ::= =
275
+ | ==
276
+ | ===
277
+ | =~
278
+ | =>
279
+ */
280
+ static token lex_eq(lexstate *state) {
281
+ unsigned int cs[2];
282
+ peekn(state, cs, 2);
283
+
284
+ if (cs[0] == '=' && cs[1] == '=') {
285
+ // ===
286
+ advance_char(state, cs[0]);
287
+ advance_char(state, cs[1]);
288
+ return next_token(state, tOPERATOR);
289
+ } else if (cs[0] == '=') {
290
+ // ==
291
+ advance_char(state, cs[0]);
292
+ return next_token(state, tOPERATOR);
293
+ } else if (cs[0] == '~') {
294
+ // =~
295
+ advance_char(state, cs[0]);
296
+ return next_token(state, tOPERATOR);
297
+ } else if (cs[0] == '>') {
298
+ // =>
299
+ advance_char(state, cs[0]);
300
+ return next_token(state, pFATARROW);
301
+ } else {
302
+ return next_token(state, pEQ);
303
+ }
304
+ }
305
+
306
+ /*
307
+ underscore ::= _A tULIDENT
308
+ | _a tULLIDENT
309
+ | _ tULLIDENT
310
+ */
311
+ static token lex_underscore(lexstate *state) {
312
+ unsigned int c;
313
+
314
+ c = peek(state);
315
+
316
+ if ('A' <= c && c <= 'Z') {
317
+ advance_char(state, c);
318
+
319
+ while (true) {
320
+ c = peek(state);
321
+
322
+ if (rb_isalnum(c) || c == '_') {
323
+ // ok
324
+ advance_char(state, c);
325
+ } else {
326
+ break;
327
+ }
328
+ }
329
+
330
+ return next_token(state, tULIDENT);
331
+ } else if (rb_isalnum(c) || c == '_') {
332
+ advance_char(state, c);
333
+
334
+ while (true) {
335
+ c = peek(state);
336
+
337
+ if (rb_isalnum(c) || c == '_') {
338
+ // ok
339
+ advance_char(state, c);
340
+ } else {
341
+ break;
342
+ }
343
+ }
344
+
345
+ if (c == '!') {
346
+ advance_char(state, c);
347
+ return next_token(state, tBANGIDENT);
348
+ } else if (c == '=') {
349
+ advance_char(state, c);
350
+ return next_token(state, tEQIDENT);
351
+ } else {
352
+ return next_token(state, tULLIDENT);
353
+ }
354
+ } else {
355
+ return next_token(state, tULLIDENT);
356
+ }
357
+ }
358
+
359
+ static bool is_opr(unsigned int c) {
360
+ switch (c) {
361
+ case ':':
362
+ case ';':
363
+ case '=':
364
+ case '.':
365
+ case ',':
366
+ case '!':
367
+ case '"':
368
+ case '$':
369
+ case '%':
370
+ case '&':
371
+ case '(':
372
+ case ')':
373
+ case '-':
374
+ case '+':
375
+ case '~':
376
+ case '|':
377
+ case '\\':
378
+ case '\'':
379
+ case '[':
380
+ case ']':
381
+ case '{':
382
+ case '}':
383
+ case '*':
384
+ case '/':
385
+ case '<':
386
+ case '>':
387
+ case '^':
388
+ return true;
389
+ default:
390
+ return false;
391
+ }
392
+ }
393
+
394
+ static token lex_global(lexstate *state) {
395
+ unsigned int c;
396
+
397
+ c = peek(state);
398
+
399
+ if (rb_isspace(c) || c == 0) {
400
+ return next_token(state, ErrorToken);
401
+ }
402
+
403
+ if (rb_isdigit(c)) {
404
+ // `$` [`0`-`9`]+
405
+ advance_char(state, c);
406
+
407
+ while (true) {
408
+ c = peek(state);
409
+ if (rb_isdigit(c)) {
410
+ advance_char(state, c);
411
+ } else {
412
+ return next_token(state, tGIDENT);
413
+ }
414
+ }
415
+ }
416
+
417
+ if (c == '-') {
418
+ // `$` `-` [a-zA-Z0-9_]
419
+ advance_char(state, c);
420
+ c = peek(state);
421
+
422
+ if (rb_isalnum(c) || c == '_') {
423
+ advance_char(state, c);
424
+ return next_token(state, tGIDENT);
425
+ } else {
426
+ return next_token(state, ErrorToken);
427
+ }
428
+ }
429
+
430
+ switch (c) {
431
+ case '~':
432
+ case '*':
433
+ case '$':
434
+ case '?':
435
+ case '!':
436
+ case '@':
437
+ case '\\':
438
+ case '/':
439
+ case ';':
440
+ case ',':
441
+ case '.':
442
+ case '=':
443
+ case ':':
444
+ case '<':
445
+ case '>':
446
+ case '"':
447
+ case '&':
448
+ case '\'':
449
+ case '`':
450
+ case '+':
451
+ advance_char(state, c);
452
+ return next_token(state, tGIDENT);
453
+
454
+ default:
455
+ if (is_opr(c) || c == 0) {
456
+ return next_token(state, ErrorToken);
457
+ }
458
+
459
+ while (true) {
460
+ advance_char(state, c);
461
+ c = peek(state);
462
+
463
+ if (rb_isspace(c) || is_opr(c) || c == 0) {
464
+ break;
465
+ }
466
+ }
467
+
468
+ return next_token(state, tGIDENT);
469
+ }
470
+ }
471
+
472
+ void pp(VALUE object) {
473
+ VALUE inspect = rb_funcall(object, rb_intern("inspect"), 0);
474
+ printf("pp >> %s\n", RSTRING_PTR(inspect));
475
+ }
476
+
477
+ static token lex_ident(lexstate *state, enum TokenType default_type) {
478
+ unsigned int c;
479
+ token tok;
480
+
481
+ while (true) {
482
+ c = peek(state);
483
+ if (rb_isalnum(c) || c == '_') {
484
+ advance_char(state, c);
485
+ } else if (c == '!') {
486
+ advance_char(state, c);
487
+ tok = next_token(state, tBANGIDENT);
488
+ break;
489
+ } else if (c == '=') {
490
+ advance_char(state, c);
491
+ tok = next_token(state, tEQIDENT);
492
+ break;
493
+ } else {
494
+ tok = next_token(state, default_type);
495
+ break;
496
+ }
497
+ }
498
+
499
+ if (tok.type == tLIDENT) {
500
+ VALUE string = rb_enc_str_new(
501
+ RSTRING_PTR(state->string) + tok.range.start.byte_pos,
502
+ RANGE_BYTES(tok.range),
503
+ rb_enc_get(state->string)
504
+ );
505
+
506
+ VALUE type = rb_hash_aref(RBS_Parser_KEYWORDS, string);
507
+ if (FIXNUM_P(type)) {
508
+ tok.type = FIX2INT(type);
509
+ }
510
+ }
511
+
512
+ return tok;
513
+ }
514
+
515
+ static token lex_comment(lexstate *state, enum TokenType type) {
516
+ unsigned int c;
517
+
518
+ c = peek(state);
519
+ if (c == ' ') {
520
+ advance_char(state, c);
521
+ }
522
+
523
+ while (true) {
524
+ c = peek(state);
525
+
526
+ if (c == '\n' || c == '\0') {
527
+ break;
528
+ } else {
529
+ advance_char(state, c);
530
+ }
531
+ }
532
+
533
+ token tok = next_token(state, type);
534
+
535
+ skip_char(state, c);
536
+
537
+ return tok;
538
+ }
539
+
540
+ /*
541
+ ... " ... " ...
542
+ ^ start
543
+ ^ current
544
+ ^ current (after)
545
+ */
546
+ static token lex_dqstring(lexstate *state) {
547
+ unsigned int c;
548
+
549
+ while (true) {
550
+ c = peek(state);
551
+ advance_char(state, c);
552
+
553
+ if (c == '\\') {
554
+ if (peek(state) == '"') {
555
+ advance_char(state, c);
556
+ c = peek(state);
557
+ }
558
+ } else if (c == '"') {
559
+ break;
560
+ }
561
+ }
562
+
563
+ return next_token(state, tDQSTRING);
564
+ }
565
+
566
+ /*
567
+ ... @ foo ...
568
+ ^ start
569
+ ^ current
570
+ ^ current (return)
571
+
572
+ ... @ @ foo ...
573
+ ^ start
574
+ ^ current
575
+ ^ current (return)
576
+ */
577
+ static token lex_ivar(lexstate *state) {
578
+ unsigned int c;
579
+
580
+ enum TokenType type = tAIDENT;
581
+
582
+ c = peek(state);
583
+
584
+ if (c == '@') {
585
+ type = tA2IDENT;
586
+ advance_char(state, c);
587
+ c = peek(state);
588
+ }
589
+
590
+ if (rb_isalpha(c) || c == '_') {
591
+ advance_char(state, c);
592
+ c = peek(state);
593
+ } else {
594
+ return next_token(state, ErrorToken);
595
+ }
596
+
597
+ while (rb_isalnum(c) || c == '_') {
598
+ advance_char(state, c);
599
+ c = peek(state);
600
+ }
601
+
602
+ return next_token(state, type);
603
+ }
604
+
605
+ /*
606
+ ... ' ... ' ...
607
+ ^ start
608
+ ^ current
609
+ ^ current (after)
610
+ */
611
+ static token lex_sqstring(lexstate *state) {
612
+ unsigned int c;
613
+
614
+ c = peek(state);
615
+
616
+ while (true) {
617
+ c = peek(state);
618
+ advance_char(state, c);
619
+
620
+ if (c == '\\') {
621
+ if (peek(state) == '\'') {
622
+ advance_char(state, c);
623
+ c = peek(state);
624
+ }
625
+ } else if (c == '\'') {
626
+ break;
627
+ }
628
+ }
629
+
630
+ return next_token(state, tSQSTRING);
631
+ }
632
+
633
+ #define EQPOINTS2(c0, c1, s) (c0 == s[0] && c1 == s[1])
634
+ #define EQPOINTS3(c0, c1, c2, s) (c0 == s[0] && c1 == s[1] && c2 == s[2])
635
+
636
+ /*
637
+ ... : @ ...
638
+ ^ start
639
+ ^ current
640
+ ^ current (return)
641
+ */
642
+ static token lex_colon_symbol(lexstate *state) {
643
+ unsigned int c[3];
644
+ peekn(state, c, 3);
645
+
646
+ switch (c[0]) {
647
+ case '|':
648
+ case '&':
649
+ case '/':
650
+ case '%':
651
+ case '~':
652
+ case '`':
653
+ case '^':
654
+ advance_char(state, c[0]);
655
+ return next_token(state, tSYMBOL);
656
+ case '=':
657
+ if (EQPOINTS2(c[0], c[1], "=~")) {
658
+ // :=~
659
+ advance_char(state, c[0]);
660
+ advance_char(state, c[1]);
661
+ return next_token(state, tSYMBOL);
662
+ } else if (EQPOINTS3(c[0], c[1], c[2], "===")) {
663
+ // :===
664
+ advance_char(state, c[0]);
665
+ advance_char(state, c[1]);
666
+ advance_char(state, c[2]);
667
+ return next_token(state, tSYMBOL);
668
+ } else if (EQPOINTS2(c[0], c[1], "==")) {
669
+ // :==
670
+ advance_char(state, c[0]);
671
+ advance_char(state, c[1]);
672
+ return next_token(state, tSYMBOL);
673
+ }
674
+ break;
675
+ case '<':
676
+ if (EQPOINTS3(c[0], c[1], c[2], "<=>")) {
677
+ advance_char(state, c[0]);
678
+ advance_char(state, c[1]);
679
+ advance_char(state, c[2]);
680
+ } else if (EQPOINTS2(c[0], c[1], "<=") || EQPOINTS2(c[0], c[1], "<<")) {
681
+ advance_char(state, c[0]);
682
+ advance_char(state, c[1]);
683
+ } else {
684
+ advance_char(state, c[0]);
685
+ }
686
+ return next_token(state, tSYMBOL);
687
+ case '>':
688
+ if (EQPOINTS2(c[0], c[1], ">=") || EQPOINTS2(c[0], c[1], ">>")) {
689
+ advance_char(state, c[0]);
690
+ advance_char(state, c[1]);
691
+ } else {
692
+ advance_char(state, c[0]);
693
+ }
694
+ return next_token(state, tSYMBOL);
695
+ case '-':
696
+ case '+':
697
+ if (EQPOINTS2(c[0], c[1], "+@") || EQPOINTS2(c[0], c[1], "-@")) {
698
+ advance_char(state, c[0]);
699
+ advance_char(state, c[1]);
700
+ } else {
701
+ advance_char(state, c[0]);
702
+ }
703
+ return next_token(state, tSYMBOL);
704
+ case '*':
705
+ if (EQPOINTS2(c[0], c[1], "**")) {
706
+ advance_char(state, c[0]);
707
+ advance_char(state, c[1]);
708
+ } else {
709
+ advance_char(state, c[0]);
710
+ }
711
+ return next_token(state, tSYMBOL);
712
+ case '[':
713
+ if (EQPOINTS3(c[0], c[1], c[2], "[]=")) {
714
+ advance_char(state, c[0]);
715
+ advance_char(state, c[1]);
716
+ advance_char(state, c[2]);
717
+ } else if (EQPOINTS2(c[0], c[1], "[]")) {
718
+ advance_char(state, c[0]);
719
+ advance_char(state, c[1]);
720
+ } else {
721
+ break;
722
+ }
723
+ return next_token(state, tSYMBOL);
724
+ case '!':
725
+ if (EQPOINTS2(c[0], c[1], "!=") || EQPOINTS2(c[0], c[1], "!~")) {
726
+ advance_char(state, c[0]);
727
+ advance_char(state, c[1]);
728
+ } else {
729
+ advance_char(state, c[0]);
730
+ }
731
+ return next_token(state, tSYMBOL);
732
+ case '@': {
733
+ advance_char(state, '@');
734
+ token tok = lex_ivar(state);
735
+ if (tok.type != ErrorToken) {
736
+ tok.type = tSYMBOL;
737
+ }
738
+ return tok;
739
+ }
740
+ case '$': {
741
+ advance_char(state, '$');
742
+ token tok = lex_global(state);
743
+ if (tok.type != ErrorToken) {
744
+ tok.type = tSYMBOL;
745
+ }
746
+ return tok;
747
+ }
748
+ case '\'': {
749
+ position start = state->start;
750
+ advance_char(state, '\'');
751
+ token tok = lex_sqstring(state);
752
+ tok.type = tSQSYMBOL;
753
+ tok.range.start = start;
754
+ return tok;
755
+ }
756
+ case '"': {
757
+ position start = state->start;
758
+ advance_char(state, '"');
759
+ token tok = lex_dqstring(state);
760
+ tok.type = tDQSYMBOL;
761
+ tok.range.start = start;
762
+ return tok;
763
+ }
764
+ default:
765
+ if (rb_isalpha(c[0]) || c[0] == '_') {
766
+ position start = state->start;
767
+ token tok = lex_ident(state, NullType);
768
+ tok.range.start = start;
769
+
770
+ if (peek(state) == '?') {
771
+ if (tok.type != tBANGIDENT && tok.type != tEQIDENT) {
772
+ skip_char(state, '?');
773
+ tok.range.end = state->current;
774
+ }
775
+ }
776
+
777
+ tok.type = tSYMBOL;
778
+ return tok;
779
+ }
780
+ }
781
+
782
+ return next_token(state, pCOLON);
783
+ }
784
+
785
+ /*
786
+ ... : : ...
787
+ ^ start
788
+ ^ current
789
+ ^ current (return)
790
+
791
+ ... : ...
792
+ ^ start
793
+ ^ current (lex_colon_symbol)
794
+ */
795
+ static token lex_colon(lexstate *state) {
796
+ unsigned int c = peek(state);
797
+
798
+ if (c == ':') {
799
+ advance_char(state, c);
800
+ return next_token(state, pCOLON2);
801
+ } else {
802
+ return lex_colon_symbol(state);
803
+ }
804
+ }
805
+
806
+ /*
807
+ lex_lt ::= < (pLT)
808
+ | < < (tOPERATOR)
809
+ | < = (tOPERATOR)
810
+ | < = > (tOPERATOR)
811
+ */
812
+ static token lex_lt(lexstate *state) {
813
+ if (advance_next_character_if(state, '<')) {
814
+ return next_token(state, tOPERATOR);
815
+ } else if (advance_next_character_if(state, '=')) {
816
+ advance_next_character_if(state, '>');
817
+ return next_token(state, tOPERATOR);
818
+ } else {
819
+ return next_token(state, pLT);
820
+ }
821
+ }
822
+
823
+ /*
824
+ lex_gt ::= >
825
+ | > =
826
+ | > >
827
+ */
828
+ static token lex_gt(lexstate *state) {
829
+ advance_next_character_if(state, '=') || advance_next_character_if(state, '>');
830
+ return next_token(state, tOPERATOR);
831
+ }
832
+
833
+ /*
834
+ ... `%` `a` `{` ... `}` ...
835
+ ^ start
836
+ ^ current
837
+ ^ current (exit)
838
+ --- token
839
+ */
840
+ static token lex_percent(lexstate *state) {
841
+ unsigned int cs[2];
842
+ unsigned int end_char;
843
+
844
+ peekn(state, cs, 2);
845
+
846
+ if (cs[0] != 'a') {
847
+ return next_token(state, tOPERATOR);
848
+ }
849
+
850
+ switch (cs[1])
851
+ {
852
+ case '{':
853
+ end_char = '}';
854
+ break;
855
+ case '(':
856
+ end_char = ')';
857
+ break;
858
+ case '[':
859
+ end_char = ']';
860
+ break;
861
+ case '|':
862
+ end_char = '|';
863
+ break;
864
+ case '<':
865
+ end_char = '>';
866
+ break;
867
+ default:
868
+ return next_token(state, tOPERATOR);
869
+ }
870
+
871
+ advance_char(state, cs[0]);
872
+ advance_char(state, cs[1]);
873
+
874
+ unsigned int c;
875
+
876
+ while ((c = peek(state))) {
877
+ if (c == end_char) {
878
+ advance_char(state, c);
879
+ return next_token(state, tANNOTATION);
880
+ }
881
+ advance_char(state, c);
882
+ }
883
+
884
+ return next_token(state, ErrorToken);
885
+ }
886
+
887
+ /*
888
+ bracket ::= [ (pLBRACKET)
889
+ * ^
890
+ | [ ] (tOPERATOR)
891
+ * ^ $
892
+ | [ ] = (tOPERATOR)
893
+ * ^ $
894
+ */
895
+ static token lex_bracket(lexstate *state) {
896
+ if (advance_next_character_if(state, ']')) {
897
+ advance_next_character_if(state, '=');
898
+ return next_token(state, tOPERATOR);
899
+ } else {
900
+ return next_token(state, pLBRACKET);
901
+ }
902
+ }
903
+
904
+ /*
905
+ bracket ::= *
906
+ | * *
907
+ */
908
+ static token lex_star(lexstate *state) {
909
+ if (advance_next_character_if(state, '*')) {
910
+ return next_token(state, pSTAR2);
911
+ } else {
912
+ return next_token(state, pSTAR);
913
+ }
914
+ }
915
+
916
+ /*
917
+ bang ::= !
918
+ | ! =
919
+ | ! ~
920
+ */
921
+ static token lex_bang(lexstate *state) {
922
+ advance_next_character_if(state, '=') || advance_next_character_if(state, '~');
923
+ return next_token(state, tOPERATOR);
924
+ }
925
+
926
+ /*
927
+ backquote ::= ` (tOPERATOR)
928
+ | `[^ :][^`]` (tQIDENT)
929
+ */
930
+ static token lex_backquote(lexstate *state) {
931
+ unsigned int c = peek(state);
932
+
933
+ if (c == ' ' || c == ':') {
934
+ return next_token(state, tOPERATOR);
935
+ } else {
936
+ while (true) {
937
+ if (c == '`') {
938
+ break;
939
+ }
940
+
941
+ c = peek(state);
942
+ advance_char(state, c);
943
+ }
944
+
945
+ return next_token(state, tQIDENT);
946
+ }
947
+ }
948
+
949
+ token rbsparser_next_token(lexstate *state) {
950
+ token tok = NullToken;
951
+
952
+ unsigned int c;
953
+ bool skipping = true;
954
+
955
+ while (skipping) {
956
+ c = peek(state);
957
+
958
+ switch (c) {
959
+ case ' ':
960
+ case '\t':
961
+ case '\n':
962
+ // nop
963
+ skip_char(state, c);
964
+ break;
965
+ case '\0':
966
+ return next_token(state, pEOF);
967
+ default:
968
+ advance_char(state, c);
969
+ skipping = false;
970
+ break;
971
+ }
972
+ }
973
+
974
+ /* ... c d .. */
975
+ /* ^ state->current */
976
+ /* ^ start */
977
+ switch (c) {
978
+ case '\0': tok = next_token(state, pEOF);
979
+ ONE_CHAR_PATTERN('(', pLPAREN);
980
+ ONE_CHAR_PATTERN(')', pRPAREN);
981
+ ONE_CHAR_PATTERN(']', pRBRACKET);
982
+ ONE_CHAR_PATTERN('{', pLBRACE);
983
+ ONE_CHAR_PATTERN('}', pRBRACE);
984
+ ONE_CHAR_PATTERN(',', pCOMMA);
985
+ ONE_CHAR_PATTERN('|', pBAR);
986
+ ONE_CHAR_PATTERN('^', pHAT);
987
+ ONE_CHAR_PATTERN('&', pAMP);
988
+ ONE_CHAR_PATTERN('?', pQUESTION);
989
+ ONE_CHAR_PATTERN('/', tOPERATOR);
990
+ ONE_CHAR_PATTERN('~', tOPERATOR);
991
+ case '[':
992
+ tok = lex_bracket(state);
993
+ break;
994
+ case '-':
995
+ tok = lex_hyphen(state);
996
+ break;
997
+ case '+':
998
+ tok = lex_plus(state);
999
+ break;
1000
+ case '*':
1001
+ tok = lex_star(state);
1002
+ break;
1003
+ case '<':
1004
+ tok = lex_lt(state);
1005
+ break;
1006
+ case '=':
1007
+ tok = lex_eq(state);
1008
+ break;
1009
+ case '>':
1010
+ tok = lex_gt(state);
1011
+ break;
1012
+ case '!':
1013
+ tok = lex_bang(state);
1014
+ break;
1015
+ case '#':
1016
+ if (state->first_token_of_line) {
1017
+ tok = lex_comment(state, tLINECOMMENT);
1018
+ } else {
1019
+ tok = lex_comment(state, tCOMMENT);
1020
+ }
1021
+ break;
1022
+ case ':':
1023
+ tok = lex_colon(state);
1024
+ break;
1025
+ case '.':
1026
+ tok = lex_dot(state);
1027
+ break;
1028
+ case '_':
1029
+ tok = lex_underscore(state);
1030
+ break;
1031
+ case '$':
1032
+ tok = lex_global(state);
1033
+ break;
1034
+ case '@':
1035
+ tok = lex_ivar(state);
1036
+ break;
1037
+ case '"':
1038
+ tok = lex_dqstring(state);
1039
+ break;
1040
+ case '\'':
1041
+ tok = lex_sqstring(state);
1042
+ break;
1043
+ case '%':
1044
+ tok = lex_percent(state);
1045
+ break;
1046
+ case '`':
1047
+ tok = lex_backquote(state);
1048
+ break;
1049
+ default:
1050
+ if (rb_isalpha(c) && rb_isupper(c)) {
1051
+ tok = lex_ident(state, tUIDENT);
1052
+ }
1053
+ if (rb_isalpha(c) && rb_islower(c)) {
1054
+ tok = lex_ident(state, tLIDENT);
1055
+ }
1056
+ if (rb_isdigit(c)) {
1057
+ tok = lex_number(state);
1058
+ }
1059
+ }
1060
+
1061
+ if (tok.type == NullType) {
1062
+ tok = next_token(state, ErrorToken);
1063
+ }
1064
+
1065
+ return tok;
1066
+ }
1067
+
1068
+ char *peek_token(lexstate *state, token tok) {
1069
+ return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
1070
+ }