natalie_parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Dockerfile +26 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +55 -0
  7. data/Rakefile +242 -0
  8. data/ext/natalie_parser/extconf.rb +9 -0
  9. data/ext/natalie_parser/mri_creator.hpp +139 -0
  10. data/ext/natalie_parser/natalie_parser.cpp +144 -0
  11. data/include/natalie_parser/creator/debug_creator.hpp +113 -0
  12. data/include/natalie_parser/creator.hpp +108 -0
  13. data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
  14. data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
  15. data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
  16. data/include/natalie_parser/lexer.hpp +135 -0
  17. data/include/natalie_parser/node/alias_node.hpp +35 -0
  18. data/include/natalie_parser/node/arg_node.hpp +74 -0
  19. data/include/natalie_parser/node/array_node.hpp +34 -0
  20. data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
  21. data/include/natalie_parser/node/assignment_node.hpp +34 -0
  22. data/include/natalie_parser/node/back_ref_node.hpp +28 -0
  23. data/include/natalie_parser/node/begin_block_node.hpp +25 -0
  24. data/include/natalie_parser/node/begin_node.hpp +52 -0
  25. data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
  26. data/include/natalie_parser/node/bignum_node.hpp +37 -0
  27. data/include/natalie_parser/node/block_node.hpp +55 -0
  28. data/include/natalie_parser/node/block_pass_node.hpp +33 -0
  29. data/include/natalie_parser/node/break_node.hpp +32 -0
  30. data/include/natalie_parser/node/call_node.hpp +85 -0
  31. data/include/natalie_parser/node/case_in_node.hpp +40 -0
  32. data/include/natalie_parser/node/case_node.hpp +52 -0
  33. data/include/natalie_parser/node/case_when_node.hpp +43 -0
  34. data/include/natalie_parser/node/class_node.hpp +39 -0
  35. data/include/natalie_parser/node/colon2_node.hpp +44 -0
  36. data/include/natalie_parser/node/colon3_node.hpp +34 -0
  37. data/include/natalie_parser/node/constant_node.hpp +26 -0
  38. data/include/natalie_parser/node/def_node.hpp +55 -0
  39. data/include/natalie_parser/node/defined_node.hpp +33 -0
  40. data/include/natalie_parser/node/encoding_node.hpp +26 -0
  41. data/include/natalie_parser/node/end_block_node.hpp +25 -0
  42. data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
  43. data/include/natalie_parser/node/false_node.hpp +23 -0
  44. data/include/natalie_parser/node/fixnum_node.hpp +36 -0
  45. data/include/natalie_parser/node/float_node.hpp +36 -0
  46. data/include/natalie_parser/node/hash_node.hpp +34 -0
  47. data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
  48. data/include/natalie_parser/node/identifier_node.hpp +123 -0
  49. data/include/natalie_parser/node/if_node.hpp +43 -0
  50. data/include/natalie_parser/node/infix_op_node.hpp +46 -0
  51. data/include/natalie_parser/node/interpolated_node.hpp +33 -0
  52. data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
  53. data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
  54. data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
  55. data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
  56. data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
  57. data/include/natalie_parser/node/iter_node.hpp +45 -0
  58. data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
  59. data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
  60. data/include/natalie_parser/node/logical_and_node.hpp +40 -0
  61. data/include/natalie_parser/node/logical_or_node.hpp +40 -0
  62. data/include/natalie_parser/node/match_node.hpp +38 -0
  63. data/include/natalie_parser/node/module_node.hpp +32 -0
  64. data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
  65. data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
  66. data/include/natalie_parser/node/next_node.hpp +37 -0
  67. data/include/natalie_parser/node/nil_node.hpp +23 -0
  68. data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
  69. data/include/natalie_parser/node/node.hpp +155 -0
  70. data/include/natalie_parser/node/node_with_args.hpp +47 -0
  71. data/include/natalie_parser/node/not_match_node.hpp +35 -0
  72. data/include/natalie_parser/node/not_node.hpp +37 -0
  73. data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
  74. data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
  75. data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
  76. data/include/natalie_parser/node/op_assign_node.hpp +47 -0
  77. data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
  78. data/include/natalie_parser/node/pin_node.hpp +33 -0
  79. data/include/natalie_parser/node/range_node.hpp +52 -0
  80. data/include/natalie_parser/node/redo_node.hpp +20 -0
  81. data/include/natalie_parser/node/regexp_node.hpp +36 -0
  82. data/include/natalie_parser/node/retry_node.hpp +20 -0
  83. data/include/natalie_parser/node/return_node.hpp +34 -0
  84. data/include/natalie_parser/node/safe_call_node.hpp +31 -0
  85. data/include/natalie_parser/node/sclass_node.hpp +37 -0
  86. data/include/natalie_parser/node/self_node.hpp +23 -0
  87. data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
  88. data/include/natalie_parser/node/shell_node.hpp +32 -0
  89. data/include/natalie_parser/node/splat_node.hpp +39 -0
  90. data/include/natalie_parser/node/splat_value_node.hpp +32 -0
  91. data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
  92. data/include/natalie_parser/node/string_node.hpp +42 -0
  93. data/include/natalie_parser/node/super_node.hpp +44 -0
  94. data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
  95. data/include/natalie_parser/node/symbol_node.hpp +30 -0
  96. data/include/natalie_parser/node/to_array_node.hpp +33 -0
  97. data/include/natalie_parser/node/true_node.hpp +23 -0
  98. data/include/natalie_parser/node/unary_op_node.hpp +41 -0
  99. data/include/natalie_parser/node/undef_node.hpp +31 -0
  100. data/include/natalie_parser/node/until_node.hpp +21 -0
  101. data/include/natalie_parser/node/while_node.hpp +52 -0
  102. data/include/natalie_parser/node/yield_node.hpp +29 -0
  103. data/include/natalie_parser/node.hpp +89 -0
  104. data/include/natalie_parser/parser.hpp +218 -0
  105. data/include/natalie_parser/token.hpp +842 -0
  106. data/include/tm/defer.hpp +34 -0
  107. data/include/tm/hashmap.hpp +826 -0
  108. data/include/tm/macros.hpp +16 -0
  109. data/include/tm/optional.hpp +223 -0
  110. data/include/tm/owned_ptr.hpp +186 -0
  111. data/include/tm/recursion_guard.hpp +156 -0
  112. data/include/tm/shared_ptr.hpp +259 -0
  113. data/include/tm/string.hpp +1447 -0
  114. data/include/tm/tests.hpp +78 -0
  115. data/include/tm/vector.hpp +796 -0
  116. data/lib/natalie_parser/sexp.rb +36 -0
  117. data/lib/natalie_parser/version.rb +5 -0
  118. data/lib/natalie_parser.rb +3 -0
  119. data/natalie_parser.gemspec +23 -0
  120. data/src/lexer/interpolated_string_lexer.cpp +88 -0
  121. data/src/lexer/regexp_lexer.cpp +95 -0
  122. data/src/lexer/word_array_lexer.cpp +134 -0
  123. data/src/lexer.cpp +1703 -0
  124. data/src/node/alias_node.cpp +11 -0
  125. data/src/node/assignment_node.cpp +33 -0
  126. data/src/node/begin_node.cpp +29 -0
  127. data/src/node/begin_rescue_node.cpp +33 -0
  128. data/src/node/class_node.cpp +22 -0
  129. data/src/node/interpolated_regexp_node.cpp +19 -0
  130. data/src/node/interpolated_shell_node.cpp +25 -0
  131. data/src/node/interpolated_string_node.cpp +111 -0
  132. data/src/node/interpolated_symbol_node.cpp +25 -0
  133. data/src/node/match_node.cpp +14 -0
  134. data/src/node/module_node.cpp +21 -0
  135. data/src/node/multiple_assignment_node.cpp +37 -0
  136. data/src/node/node.cpp +10 -0
  137. data/src/node/node_with_args.cpp +35 -0
  138. data/src/node/op_assign_node.cpp +36 -0
  139. data/src/node/string_node.cpp +33 -0
  140. data/src/parser.cpp +2972 -0
  141. data/src/token.cpp +27 -0
  142. metadata +186 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: '048e048ede42652f8a81298a3310313f8bf9929ba5cd952a89d3269d8333b363'
4
+ data.tar.gz: 5c2388c8125b1444c454c0ee3a0d4a07f305cc3fd668d8e03dbc4404201a50f7
5
+ SHA512:
6
+ metadata.gz: 55932c3dc24ceeeab109a4a980ddf96babf276ced15dffb68d7c013ca5cc246d2551b59acc94f82c1ec5c0073859267ace3a264a3d8b9470465672ea5aebb59d
7
+ data.tar.gz: bcbaaad396877bcbf6453e8d55ad3f46dcb68ba09bcfe2f6cf0f1fa6d96966c646f530e3872d77736ef40b5818feed888d69fe33e62beeb95dcfd1c6f5115203
data/CHANGELOG.md ADDED
@@ -0,0 +1,22 @@
1
+ # Changelog
2
+
3
+ ## 1.0.0 (2022-06-03)
4
+
5
+ ### Summary
6
+
7
+ This is the initial public release. The 1.0 milestone was chosen as soon
8
+ as NatalieParser was useful for integration back with the upstream Natalie
9
+ compiler project, i.e. it could fully replace RubyParser as the parser
10
+ in use by Natalie.
11
+
12
+ That is not to say that NatalieParser is _complete_ -- it is merely _useful_.
13
+
14
+ These are the features known to still be missing in this release:
15
+
16
+ - [ ] Support different source encodings
17
+ - [ ] Support more of the Ruby 3.0 syntax
18
+ - [ ] Argument forwarding (`...`)
19
+ - [ ] Pattern matching
20
+ - [ ] Numbered block parameters (`_1`, `_2`, etc.)
21
+ - [ ] Non-ASCII identifiers
22
+ - [ ] Rational and Complex literals (`1r` and `2i`)
data/Dockerfile ADDED
@@ -0,0 +1,26 @@
1
+ ARG IMAGE=ruby:3.0
2
+ FROM $IMAGE
3
+
4
+ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y -q build-essential clang
5
+ RUN gem install bundler --no-doc
6
+
7
+ ENV LC_ALL C.UTF-8
8
+
9
+ WORKDIR natalie_parser
10
+
11
+ COPY Gemfile /natalie_parser/
12
+ RUN bundle install
13
+
14
+ ARG CC=gcc
15
+ ENV CC=$CC
16
+ ARG CXX=g++
17
+ ENV CXX=$CXX
18
+
19
+ COPY Rakefile Rakefile
20
+ COPY ext ext
21
+ COPY lib lib
22
+ COPY src src
23
+ COPY include include
24
+ RUN rake
25
+
26
+ COPY test test
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ # NOTE: This Gemfile is used for testing the project.
2
+ # These dependencies are not needed to consume the library as a C extension for MRI.
3
+
4
+ source 'https://rubygems.org'
5
+
6
+ gem 'minitest'
7
+ gem 'minitest-focus'
8
+ gem 'minitest-reporters'
9
+ gem 'ruby_parser'
10
+ gem 'rake'
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Tim Morgan and contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,55 @@
1
+ # Natalie Parser
2
+
3
+ [![github build status](https://github.com/natalie-lang/natalie_parser/actions/workflows/build.yml/badge.svg)](https://github.com/natalie-lang/natalie_parser/actions?query=workflow%3ABuild+branch%3Amaster)
4
+ [![MIT License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/natalie-lang/natalie_parser/blob/master/LICENSE)
5
+
6
+ This is a parser for the Ruby programming language, written in C++.
7
+ It was extracted from the [Natalie](https://github.com/natalie-lang/natalie) project.
8
+
9
+ You can use this library directly from a C/C++ project, or you can
10
+ build it as a Ruby gem and use it from Ruby itself.
11
+
12
+ We are currently targeting Ruby 3.0 syntax, but that will probably
13
+ change over time, depending on what things we want to support and
14
+ what kind of help we get from the community.
15
+
16
+ NOTE: This project is still very new and there are certainly bugs.
17
+ See the list below for things we already know about, but expect there
18
+ are more we don't know about yet. **We don't recommend you use this in
19
+ production applications.**
20
+
21
+ ## To Do
22
+
23
+ - [x] Parse the [Natalie](https://github.com/natalie-lang/natalie) compiler and standard library
24
+ - [x] Pass (mostly) the [RubyParser](https://github.com/seattlerb/ruby_parser) test suite
25
+ - [ ] Support different source encodings
26
+ - [ ] Support more of the Ruby 3.0 syntax
27
+ - [x] "Endless" method definition (`def foo = bar`)
28
+ - [ ] Argument forwarding (`...`)
29
+ - [ ] Pattern matching
30
+ - [ ] Numbered block parameters (`_1`, `_2`, etc.)
31
+ - [ ] Non-ASCII identifiers
32
+ - [ ] Rational and Complex literals (`1r` and `2i`)
33
+
34
+ ## Development
35
+
36
+ ```sh
37
+ rake
38
+ ruby -I lib:ext -r natalie_parser -e "p NatalieParser.parse('1 + 2')"
39
+ # => s(:block, s(:call, s(:lit, 1), :+, s(:lit, 2)))
40
+ ```
41
+
42
+ ### Running Tests
43
+
44
+ ```sh
45
+ rake test
46
+ ```
47
+
48
+ ## Copyright & License
49
+
50
+ Natalie is copyright 2022, Tim Morgan and contributors. Natalie is licensed
51
+ under the MIT License; see the `LICENSE` file in this directory for the full text.
52
+
53
+ ### Note about Outside Sources
54
+
55
+ The file `test/test_ruby_parser.rb` is copyright Ryan Davis and is licensed MIT.
data/Rakefile ADDED
@@ -0,0 +1,242 @@
1
+ task default: :build
2
+
3
+ desc 'Build Natalie Parser library and MRI C extension'
4
+ task build: %i[
5
+ bundle_install
6
+ build_dir
7
+ library
8
+ parser_c_ext
9
+ write_compile_database
10
+ ]
11
+
12
+ so_ext = RUBY_PLATFORM =~ /darwin/ ? 'bundle' : 'so'
13
+
14
+ desc 'Build Natalie Parser library'
15
+ task library: [:build_dir, "build/libnatalie_parser.a"]
16
+
17
+ desc 'Build Natalie Parser MRI C extension'
18
+ task parser_c_ext: [:build_dir, "ext/natalie_parser/natalie_parser.#{so_ext}"]
19
+
20
+ desc 'Remove temporary files created during build'
21
+ task :clean do
22
+ Rake::FileList[%w[
23
+ build/build.log
24
+ build/*.o
25
+ build/node
26
+ build/asan_test
27
+ ext/natalie_parser/*.{h,log,o}
28
+ ]].each { |path| rm_rf path if File.exist?(path) }
29
+ end
30
+
31
+ desc 'Remove all generated files'
32
+ task :clobber do
33
+ Rake::FileList[%w[
34
+ build
35
+ ext/natalie_parser/*.{so,bundle,h,log,o}
36
+ ]].each { |path| rm_rf path if File.exist?(path) }
37
+ end
38
+
39
+ task distclean: :clobber
40
+
41
+ desc 'Run the test suite'
42
+ task test: [:build, 'build/asan_test'] do
43
+ sh 'bundle exec ruby test/all.rb'
44
+ sh 'build/asan_test'
45
+ sh 'bundle exec ruby test/test_ruby_parser.rb'
46
+ end
47
+
48
+ desc 'Run test test suite when changes are made (requires entr binary)'
49
+ task :watch do
50
+ files = Rake::FileList['**/*.cpp', '**/*.hpp', '**/*.rb']
51
+ sh "ls #{files} | entr -c -s 'rake test'"
52
+ end
53
+
54
+ desc 'Show line counts for the project'
55
+ task :cloc do
56
+ sh 'cloc include lib src test'
57
+ end
58
+
59
+ desc 'Generate tags file for development'
60
+ task :ctags do
61
+ sh 'ctags -R --exclude=.cquery_cache --exclude=ext --exclude=build --append=no .'
62
+ end
63
+ task tags: :ctags
64
+
65
+ desc 'Format C++ code with clang-format'
66
+ task :format do
67
+ sh "find include -type f -name '*.hpp' -exec clang-format -i --style=file {} +"
68
+ sh "find src -type f -name '*.cpp' -exec clang-format -i --style=file {} +"
69
+ end
70
+
71
+ desc 'Show TODO and FIXME comments in the project'
72
+ task :todo do
73
+ sh "egrep -r 'FIXME|TODO' src include lib"
74
+ end
75
+
76
+ desc 'Run the benchmark script'
77
+ task benchmark: :build do
78
+ require_relative './test/benchmark'
79
+ end
80
+
81
+ # # # # Docker Tasks (used for CI) # # # #
82
+
83
+ DOCKER_FLAGS =
84
+ if !ENV['CI'] && STDOUT.isatty
85
+ '-i -t'
86
+ elsif ENV['CI']
87
+ "-e CI=#{ENV['CI']}"
88
+ end
89
+
90
+ task :docker_build do
91
+ sh 'docker build -t natalie-parser .'
92
+ end
93
+
94
+ task docker_bash: :docker_build do
95
+ sh 'docker run -it --rm --entrypoint bash natalie-parser'
96
+ end
97
+
98
+ task :docker_build_clang do
99
+ sh 'docker build -t natalie-parser-clang --build-arg CC=clang --build-arg CXX=clang++ .'
100
+ end
101
+
102
+ task :docker_build_ruby27 do
103
+ sh 'docker build -t natalie-parser-ruby27 --build-arg IMAGE="ruby:2.7" .'
104
+ end
105
+
106
+ task docker_test: %i[docker_test_gcc docker_test_clang]
107
+
108
+ task docker_test_gcc: :docker_build do
109
+ sh "docker run #{DOCKER_FLAGS} --rm --entrypoint rake natalie-parser test"
110
+ end
111
+
112
+ task docker_test_clang: :docker_build_clang do
113
+ sh "docker run #{DOCKER_FLAGS} --rm --entrypoint rake natalie-parser-clang test"
114
+ end
115
+
116
+ # # # # Build Compile Database # # # #
117
+
118
+ if system('which compiledb 2>&1 >/dev/null')
119
+ $compiledb_out = []
120
+
121
+ def $stderr.puts(str)
122
+ write(str + "\n")
123
+ $compiledb_out << str
124
+ end
125
+
126
+ task :write_compile_database do
127
+ if $compiledb_out.any?
128
+ File.write('build/build.log', $compiledb_out.join("\n"))
129
+ sh 'compiledb < build/build.log'
130
+ end
131
+ end
132
+ else
133
+ task :write_compile_database do
134
+ # noop
135
+ end
136
+ end
137
+
138
+ # # # # Internal Tasks and Rules # # # #
139
+
140
+ STANDARD = 'c++17'
141
+ HEADERS = Rake::FileList['include/**/{*.h,*.hpp}']
142
+ SOURCES = Rake::FileList['src/**/*.{c,cpp}']
143
+ OBJECT_FILES = SOURCES.sub('src/', 'build/').pathmap('%p.o')
144
+
145
+ require 'tempfile'
146
+
147
+ task :build_dir do
148
+ mkdir_p 'build/lexer' unless File.exist?('build/lexer')
149
+ mkdir_p 'build/node' unless File.exist?('build/node')
150
+ end
151
+
152
+ rule '.cpp.o' => ['src/%n'] + HEADERS do |t|
153
+ sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
154
+ end
155
+
156
+ rule %r{lexer/.*\.cpp\.o$} => ['src/lexer/%n'] + HEADERS do |t|
157
+ sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
158
+ end
159
+
160
+ rule %r{node/.*\.cpp\.o$} => ['src/node/%n'] + HEADERS do |t|
161
+ sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
162
+ end
163
+
164
+ multitask objects: OBJECT_FILES
165
+
166
+ file 'build/libnatalie_parser.a' => HEADERS + [:objects] do |t|
167
+ sh "ar rcs #{t.name} #{OBJECT_FILES}"
168
+ end
169
+
170
+ file "ext/natalie_parser/natalie_parser.#{so_ext}" => [
171
+ 'ext/natalie_parser/natalie_parser.cpp',
172
+ 'ext/natalie_parser/mri_creator.hpp',
173
+ ] + SOURCES + HEADERS do |t|
174
+ build_dir = File.expand_path('ext/natalie_parser', __dir__)
175
+ Rake::FileList['ext/natalie_parser/*.o'].each { |path| rm path }
176
+ rm_rf 'ext/natalie_parser/natalie_parser.so'
177
+ sh <<-SH
178
+ cd #{build_dir} && \
179
+ ruby extconf.rb && \
180
+ make -j
181
+ SH
182
+ end
183
+
184
+ file 'build/fragments.hpp' => ['test/parser_test.rb', 'test/support/extract_parser_test_fragments.rb'] do
185
+ sh 'ruby -I lib:ext test/support/extract_parser_test_fragments.rb'
186
+ end
187
+
188
+ file 'build/asan_test' => ['test/asan_test.cpp', 'build/fragments.hpp', :library] do |t|
189
+ sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -I build -I include -o #{t.name} #{t.source} -L build -lnatalie_parser"
190
+ end
191
+
192
+ task :bundle_install do
193
+ sh 'bundle check || bundle install'
194
+ end
195
+
196
+ def cc
197
+ @cc ||=
198
+ if ENV['CC']
199
+ ENV['CC']
200
+ elsif system('which ccache 2>&1 > /dev/null')
201
+ 'ccache cc'
202
+ else
203
+ 'cc'
204
+ end
205
+ end
206
+
207
+ def cxx
208
+ @cxx ||=
209
+ if ENV['CXX']
210
+ ENV['CXX']
211
+ elsif system('which ccache 2>&1 > /dev/null')
212
+ 'ccache c++'
213
+ else
214
+ 'c++'
215
+ end
216
+ end
217
+
218
+ def cxx_flags
219
+ base_flags =
220
+ case ENV['BUILD']
221
+ when 'release'
222
+ %w[
223
+ -fPIC
224
+ -g
225
+ -O2
226
+ ]
227
+ else
228
+ %w[
229
+ -fPIC
230
+ -g
231
+ -Wall
232
+ -Wextra
233
+ -Werror
234
+ -fsanitize=address
235
+ ]
236
+ end
237
+ base_flags + include_paths.map { |path| "-I #{path}" }
238
+ end
239
+
240
+ def include_paths
241
+ [File.expand_path('include', __dir__)]
242
+ end
@@ -0,0 +1,9 @@
1
+ require 'mkmf'
2
+ $CXXFLAGS += ' -g -std=c++17'
3
+ $INCFLAGS += ' -I ../../include'
4
+ $srcs = Dir['../../src/**/*.cpp', 'natalie_parser.cpp']
5
+ $VPATH << "$(srcdir)/../../src"
6
+ $VPATH << "$(srcdir)/../../src/lexer"
7
+ $VPATH << "$(srcdir)/../../src/node"
8
+ create_header
9
+ create_makefile 'natalie_parser'
@@ -0,0 +1,139 @@
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
3
+ #include "ruby/intern.h"
4
+
5
+ #include "natalie_parser/creator.hpp"
6
+ #include "natalie_parser/node.hpp"
7
+
8
+ extern VALUE Sexp;
9
+
10
+ namespace NatalieParser {
11
+
12
+ class MRICreator : public Creator {
13
+ public:
14
+ MRICreator(const Node &node)
15
+ : Creator { node.file().static_cast_as<const String>(), node.line(), node.column() } {
16
+ reset_sexp();
17
+ }
18
+
19
+ MRICreator(const MRICreator &other)
20
+ : Creator { other.file(), other.line(), other.column() } {
21
+ reset_sexp();
22
+ }
23
+
24
+ virtual ~MRICreator() { }
25
+
26
+ virtual void reset_sexp() override {
27
+ m_sexp = rb_class_new_instance(0, nullptr, Sexp);
28
+ rb_ivar_set(m_sexp, rb_intern("@file"), get_file_string(file()));
29
+ rb_ivar_set(m_sexp, rb_intern("@line"), rb_int_new(line() + 1));
30
+ rb_ivar_set(m_sexp, rb_intern("@column"), rb_int_new(column() + 1));
31
+ }
32
+
33
+ virtual void set_comments(const TM::String &comments) override {
34
+ auto string_obj = rb_utf8_str_new(comments.c_str(), comments.length());
35
+ rb_ivar_set(m_sexp, rb_intern("@comments"), string_obj);
36
+ }
37
+
38
+ virtual void set_type(const char *type) override {
39
+ rb_ary_store(m_sexp, 0, ID2SYM(rb_intern(type)));
40
+ }
41
+
42
+ virtual void append(const Node &node) override {
43
+ if (node.type() == Node::Type::Nil) {
44
+ rb_ary_push(m_sexp, Qnil);
45
+ return;
46
+ }
47
+ MRICreator creator { node };
48
+ creator.set_assignment(assignment());
49
+ node.transform(&creator);
50
+ rb_ary_push(m_sexp, creator.sexp());
51
+ }
52
+
53
+ virtual void append_array(const ArrayNode &array) override {
54
+ MRICreator creator { array };
55
+ creator.set_assignment(assignment());
56
+ array.ArrayNode::transform(&creator);
57
+ rb_ary_push(m_sexp, creator.sexp());
58
+ }
59
+
60
+ virtual void append_false() override {
61
+ rb_ary_push(m_sexp, Qfalse);
62
+ }
63
+
64
+ virtual void append_float(double number) override {
65
+ rb_ary_push(m_sexp, rb_float_new(number));
66
+ }
67
+
68
+ virtual void append_integer(long long number) override {
69
+ rb_ary_push(m_sexp, rb_int_new(number));
70
+ }
71
+
72
+ virtual void append_integer(TM::String &number) override {
73
+ auto string_obj = rb_utf8_str_new(number.c_str(), number.length());
74
+ rb_ary_push(m_sexp, rb_Integer(string_obj));
75
+ }
76
+
77
+ virtual void append_nil() override {
78
+ rb_ary_push(m_sexp, Qnil);
79
+ }
80
+
81
+ virtual void append_range(long long first, long long last, bool exclude_end) override {
82
+ rb_ary_push(m_sexp, rb_range_new(rb_int_new(first), rb_int_new(last), exclude_end ? Qtrue : Qfalse));
83
+ }
84
+
85
+ virtual void append_regexp(TM::String &pattern, int options) override {
86
+ auto encoding = pattern.contains_utf8_encoded_multibyte_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
87
+ auto regexp = rb_enc_reg_new(pattern.c_str(), pattern.size(), encoding, options);
88
+ rb_ary_push(m_sexp, regexp);
89
+ }
90
+
91
+ virtual void append_sexp(std::function<void(Creator *)> fn) override {
92
+ MRICreator creator { *this };
93
+ fn(&creator);
94
+ rb_ary_push(m_sexp, creator.sexp());
95
+ }
96
+
97
+ virtual void append_string(TM::String &string) override {
98
+ auto encoding = string.contains_seemingly_valid_utf8_encoded_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
99
+ rb_ary_push(m_sexp, rb_enc_str_new(string.c_str(), string.length(), encoding));
100
+ }
101
+
102
+ virtual void append_symbol(TM::String &name) override {
103
+ auto encoding = name.contains_utf8_encoded_multibyte_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
104
+ auto symbol = ID2SYM(rb_intern3(name.c_str(), name.size(), encoding));
105
+ rb_ary_push(m_sexp, symbol);
106
+ }
107
+
108
+ virtual void append_true() override {
109
+ rb_ary_push(m_sexp, Qtrue);
110
+ }
111
+
112
+ virtual void wrap(const char *type) override {
113
+ auto inner = m_sexp;
114
+ reset_sexp();
115
+ set_type(type);
116
+ rb_ary_push(m_sexp, inner);
117
+ }
118
+
119
+ VALUE sexp() const { return m_sexp; }
120
+
121
+ private:
122
+ VALUE m_sexp { Qnil };
123
+
124
+ static VALUE get_file_string(SharedPtr<const String> file) {
125
+ auto file_string = s_file_cache.get(*file);
126
+ if (!file_string) {
127
+ file_string = rb_str_new(file->c_str(), file->length());
128
+ // FIXME: Seems there is no way to un-register and object. :-(
129
+ rb_gc_register_mark_object(file_string);
130
+ s_file_cache.put(*file, file_string);
131
+ }
132
+ return file_string;
133
+ }
134
+
135
+ // TODO: Move this to the Parser object, pass it in, clean it up when finished with it.
136
+ // (Otherwise we leak memory if the user parses lots of different files in a long-running process.)
137
+ inline static TM::Hashmap<const String, VALUE> s_file_cache { TM::HashType::TMString };
138
+ };
139
+ }
@@ -0,0 +1,144 @@
1
+ #include "extconf.h"
2
+ #include "ruby.h"
3
+ #include "ruby/encoding.h"
4
+ #include "ruby/intern.h"
5
+ #include "stdio.h"
6
+
7
+ // this includes MUST come after
8
+ #include "mri_creator.hpp"
9
+ #include "natalie_parser/parser.hpp"
10
+
11
+ VALUE Parser;
12
+ VALUE Sexp;
13
+
14
+ extern "C" {
15
+
16
+ VALUE initialize(int argc, VALUE *argv, VALUE self) {
17
+ if (argc < 1 || argc > 2)
18
+ rb_raise(rb_eSyntaxError,
19
+ "wrong number of arguments (given %d, expected 1..2)", argc);
20
+ rb_ivar_set(self, rb_intern("@code"), argv[0]);
21
+ VALUE path;
22
+ if (argc > 1)
23
+ path = argv[1];
24
+ else
25
+ path = rb_str_new_cstr("(string)");
26
+ rb_ivar_set(self, rb_intern("@path"), path);
27
+ return self;
28
+ }
29
+
30
+ VALUE node_to_ruby(TM::SharedPtr<NatalieParser::Node> node) {
31
+ NatalieParser::MRICreator creator { node.ref() };
32
+ node->transform(&creator);
33
+ return creator.sexp();
34
+ }
35
+
36
+ VALUE parse_on_instance(VALUE self) {
37
+ VALUE code = rb_ivar_get(self, rb_intern("@code"));
38
+ VALUE path = rb_ivar_get(self, rb_intern("@path"));
39
+ auto code_string = new TM::String { StringValueCStr(code) };
40
+ auto path_string = new TM::String { StringValueCStr(path) };
41
+ auto parser = NatalieParser::Parser { code_string, path_string };
42
+ try {
43
+ auto tree = parser.tree();
44
+ VALUE ast = node_to_ruby(tree);
45
+ return ast;
46
+ } catch (NatalieParser::Parser::SyntaxError &error) {
47
+ rb_raise(rb_eSyntaxError, "%s", error.message());
48
+ }
49
+ }
50
+
51
+ VALUE parse(int argc, VALUE *argv, VALUE self) {
52
+ VALUE parser = rb_class_new_instance(argc, argv, Parser);
53
+ return parse_on_instance(parser);
54
+ }
55
+
56
+ VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
57
+ if (token.is_eof())
58
+ return Qnil;
59
+ try {
60
+ token.validate();
61
+ } catch (NatalieParser::Parser::SyntaxError &error) {
62
+ rb_raise(rb_eSyntaxError, "%s", error.message());
63
+ }
64
+ const char *type = token.type_value();
65
+ if (!type) abort(); // FIXME: assert no workie?
66
+ auto hash = rb_hash_new();
67
+ rb_hash_aset(hash, ID2SYM(rb_intern("type")), ID2SYM(rb_intern(type)));
68
+ auto lit = token.literal_or_blank();
69
+ switch (token.type()) {
70
+ case NatalieParser::Token::Type::Bignum:
71
+ case NatalieParser::Token::Type::Doc:
72
+ case NatalieParser::Token::Type::String: {
73
+ auto literal = token.literal_string();
74
+ rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_utf8_str_new(literal->c_str(), literal->length()));
75
+ break;
76
+ }
77
+ case NatalieParser::Token::Type::BackRef:
78
+ case NatalieParser::Token::Type::BareName:
79
+ case NatalieParser::Token::Type::ClassVariable:
80
+ case NatalieParser::Token::Type::Constant:
81
+ case NatalieParser::Token::Type::GlobalVariable:
82
+ case NatalieParser::Token::Type::InstanceVariable:
83
+ case NatalieParser::Token::Type::Symbol:
84
+ case NatalieParser::Token::Type::SymbolKey: {
85
+ auto literal = token.literal_string();
86
+ rb_hash_aset(hash, ID2SYM(rb_intern("literal")), ID2SYM(rb_intern3(literal->c_str(), literal->size(), rb_utf8_encoding())));
87
+ break;
88
+ }
89
+ case NatalieParser::Token::Type::Fixnum:
90
+ case NatalieParser::Token::Type::NthRef:
91
+ rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_int_new(token.get_fixnum()));
92
+ break;
93
+ case NatalieParser::Token::Type::Float:
94
+ rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_float_new(token.get_double()));
95
+ break;
96
+ case NatalieParser::Token::Type::InterpolatedRegexpEnd:
97
+ if (token.has_literal()) {
98
+ auto options = token.literal_string();
99
+ rb_hash_aset(hash, ID2SYM(rb_intern("options")), rb_str_new(options->c_str(), options->length()));
100
+ }
101
+ break;
102
+ default:
103
+ void();
104
+ }
105
+ if (include_location_info) {
106
+ rb_hash_aset(hash, ID2SYM(rb_intern("line")), rb_int_new(token.line()));
107
+ rb_hash_aset(hash, ID2SYM(rb_intern("column")), rb_int_new(token.column()));
108
+ }
109
+ return hash;
110
+ }
111
+
112
+ VALUE tokens_on_instance(VALUE self, VALUE include_location_info = Qfalse) {
113
+ VALUE code = rb_ivar_get(self, rb_intern("@code"));
114
+ VALUE path = rb_ivar_get(self, rb_intern("@path"));
115
+ auto code_string = new TM::String { StringValueCStr(code) };
116
+ auto path_string = new TM::String { StringValueCStr(path) };
117
+ auto lexer = NatalieParser::Lexer { code_string, path_string };
118
+ auto array = rb_ary_new();
119
+ auto the_tokens = lexer.tokens();
120
+ for (auto token : *the_tokens) {
121
+ auto token_value = token_to_ruby(token, RTEST(include_location_info));
122
+ if (token_value != Qnil && token_value != Qfalse)
123
+ rb_ary_push(array, token_value);
124
+ }
125
+ return array;
126
+ }
127
+
128
+ VALUE tokens(int argc, VALUE *argv, VALUE self) {
129
+ VALUE parser = rb_class_new_instance(1, argv, Parser);
130
+ VALUE include_location_info = argc > 1 ? argv[1] : Qfalse;
131
+ return tokens_on_instance(parser, include_location_info);
132
+ }
133
+
134
+ void Init_natalie_parser() {
135
+ int error;
136
+ Sexp = rb_const_get(rb_cObject, rb_intern("Sexp"));
137
+ Parser = rb_define_class("NatalieParser", rb_cObject);
138
+ rb_define_method(Parser, "initialize", initialize, -1);
139
+ rb_define_method(Parser, "parse", parse_on_instance, 0);
140
+ rb_define_method(Parser, "tokens", tokens_on_instance, 1);
141
+ rb_define_singleton_method(Parser, "parse", parse, -1);
142
+ rb_define_singleton_method(Parser, "tokens", tokens, -1);
143
+ }
144
+ }