natalie_parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Dockerfile +26 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +55 -0
  7. data/Rakefile +242 -0
  8. data/ext/natalie_parser/extconf.rb +9 -0
  9. data/ext/natalie_parser/mri_creator.hpp +139 -0
  10. data/ext/natalie_parser/natalie_parser.cpp +144 -0
  11. data/include/natalie_parser/creator/debug_creator.hpp +113 -0
  12. data/include/natalie_parser/creator.hpp +108 -0
  13. data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
  14. data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
  15. data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
  16. data/include/natalie_parser/lexer.hpp +135 -0
  17. data/include/natalie_parser/node/alias_node.hpp +35 -0
  18. data/include/natalie_parser/node/arg_node.hpp +74 -0
  19. data/include/natalie_parser/node/array_node.hpp +34 -0
  20. data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
  21. data/include/natalie_parser/node/assignment_node.hpp +34 -0
  22. data/include/natalie_parser/node/back_ref_node.hpp +28 -0
  23. data/include/natalie_parser/node/begin_block_node.hpp +25 -0
  24. data/include/natalie_parser/node/begin_node.hpp +52 -0
  25. data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
  26. data/include/natalie_parser/node/bignum_node.hpp +37 -0
  27. data/include/natalie_parser/node/block_node.hpp +55 -0
  28. data/include/natalie_parser/node/block_pass_node.hpp +33 -0
  29. data/include/natalie_parser/node/break_node.hpp +32 -0
  30. data/include/natalie_parser/node/call_node.hpp +85 -0
  31. data/include/natalie_parser/node/case_in_node.hpp +40 -0
  32. data/include/natalie_parser/node/case_node.hpp +52 -0
  33. data/include/natalie_parser/node/case_when_node.hpp +43 -0
  34. data/include/natalie_parser/node/class_node.hpp +39 -0
  35. data/include/natalie_parser/node/colon2_node.hpp +44 -0
  36. data/include/natalie_parser/node/colon3_node.hpp +34 -0
  37. data/include/natalie_parser/node/constant_node.hpp +26 -0
  38. data/include/natalie_parser/node/def_node.hpp +55 -0
  39. data/include/natalie_parser/node/defined_node.hpp +33 -0
  40. data/include/natalie_parser/node/encoding_node.hpp +26 -0
  41. data/include/natalie_parser/node/end_block_node.hpp +25 -0
  42. data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
  43. data/include/natalie_parser/node/false_node.hpp +23 -0
  44. data/include/natalie_parser/node/fixnum_node.hpp +36 -0
  45. data/include/natalie_parser/node/float_node.hpp +36 -0
  46. data/include/natalie_parser/node/hash_node.hpp +34 -0
  47. data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
  48. data/include/natalie_parser/node/identifier_node.hpp +123 -0
  49. data/include/natalie_parser/node/if_node.hpp +43 -0
  50. data/include/natalie_parser/node/infix_op_node.hpp +46 -0
  51. data/include/natalie_parser/node/interpolated_node.hpp +33 -0
  52. data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
  53. data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
  54. data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
  55. data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
  56. data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
  57. data/include/natalie_parser/node/iter_node.hpp +45 -0
  58. data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
  59. data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
  60. data/include/natalie_parser/node/logical_and_node.hpp +40 -0
  61. data/include/natalie_parser/node/logical_or_node.hpp +40 -0
  62. data/include/natalie_parser/node/match_node.hpp +38 -0
  63. data/include/natalie_parser/node/module_node.hpp +32 -0
  64. data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
  65. data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
  66. data/include/natalie_parser/node/next_node.hpp +37 -0
  67. data/include/natalie_parser/node/nil_node.hpp +23 -0
  68. data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
  69. data/include/natalie_parser/node/node.hpp +155 -0
  70. data/include/natalie_parser/node/node_with_args.hpp +47 -0
  71. data/include/natalie_parser/node/not_match_node.hpp +35 -0
  72. data/include/natalie_parser/node/not_node.hpp +37 -0
  73. data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
  74. data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
  75. data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
  76. data/include/natalie_parser/node/op_assign_node.hpp +47 -0
  77. data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
  78. data/include/natalie_parser/node/pin_node.hpp +33 -0
  79. data/include/natalie_parser/node/range_node.hpp +52 -0
  80. data/include/natalie_parser/node/redo_node.hpp +20 -0
  81. data/include/natalie_parser/node/regexp_node.hpp +36 -0
  82. data/include/natalie_parser/node/retry_node.hpp +20 -0
  83. data/include/natalie_parser/node/return_node.hpp +34 -0
  84. data/include/natalie_parser/node/safe_call_node.hpp +31 -0
  85. data/include/natalie_parser/node/sclass_node.hpp +37 -0
  86. data/include/natalie_parser/node/self_node.hpp +23 -0
  87. data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
  88. data/include/natalie_parser/node/shell_node.hpp +32 -0
  89. data/include/natalie_parser/node/splat_node.hpp +39 -0
  90. data/include/natalie_parser/node/splat_value_node.hpp +32 -0
  91. data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
  92. data/include/natalie_parser/node/string_node.hpp +42 -0
  93. data/include/natalie_parser/node/super_node.hpp +44 -0
  94. data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
  95. data/include/natalie_parser/node/symbol_node.hpp +30 -0
  96. data/include/natalie_parser/node/to_array_node.hpp +33 -0
  97. data/include/natalie_parser/node/true_node.hpp +23 -0
  98. data/include/natalie_parser/node/unary_op_node.hpp +41 -0
  99. data/include/natalie_parser/node/undef_node.hpp +31 -0
  100. data/include/natalie_parser/node/until_node.hpp +21 -0
  101. data/include/natalie_parser/node/while_node.hpp +52 -0
  102. data/include/natalie_parser/node/yield_node.hpp +29 -0
  103. data/include/natalie_parser/node.hpp +89 -0
  104. data/include/natalie_parser/parser.hpp +218 -0
  105. data/include/natalie_parser/token.hpp +842 -0
  106. data/include/tm/defer.hpp +34 -0
  107. data/include/tm/hashmap.hpp +826 -0
  108. data/include/tm/macros.hpp +16 -0
  109. data/include/tm/optional.hpp +223 -0
  110. data/include/tm/owned_ptr.hpp +186 -0
  111. data/include/tm/recursion_guard.hpp +156 -0
  112. data/include/tm/shared_ptr.hpp +259 -0
  113. data/include/tm/string.hpp +1447 -0
  114. data/include/tm/tests.hpp +78 -0
  115. data/include/tm/vector.hpp +796 -0
  116. data/lib/natalie_parser/sexp.rb +36 -0
  117. data/lib/natalie_parser/version.rb +5 -0
  118. data/lib/natalie_parser.rb +3 -0
  119. data/natalie_parser.gemspec +23 -0
  120. data/src/lexer/interpolated_string_lexer.cpp +88 -0
  121. data/src/lexer/regexp_lexer.cpp +95 -0
  122. data/src/lexer/word_array_lexer.cpp +134 -0
  123. data/src/lexer.cpp +1703 -0
  124. data/src/node/alias_node.cpp +11 -0
  125. data/src/node/assignment_node.cpp +33 -0
  126. data/src/node/begin_node.cpp +29 -0
  127. data/src/node/begin_rescue_node.cpp +33 -0
  128. data/src/node/class_node.cpp +22 -0
  129. data/src/node/interpolated_regexp_node.cpp +19 -0
  130. data/src/node/interpolated_shell_node.cpp +25 -0
  131. data/src/node/interpolated_string_node.cpp +111 -0
  132. data/src/node/interpolated_symbol_node.cpp +25 -0
  133. data/src/node/match_node.cpp +14 -0
  134. data/src/node/module_node.cpp +21 -0
  135. data/src/node/multiple_assignment_node.cpp +37 -0
  136. data/src/node/node.cpp +10 -0
  137. data/src/node/node_with_args.cpp +35 -0
  138. data/src/node/op_assign_node.cpp +36 -0
  139. data/src/node/string_node.cpp +33 -0
  140. data/src/parser.cpp +2972 -0
  141. data/src/token.cpp +27 -0
  142. metadata +186 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: '048e048ede42652f8a81298a3310313f8bf9929ba5cd952a89d3269d8333b363'
4
+ data.tar.gz: 5c2388c8125b1444c454c0ee3a0d4a07f305cc3fd668d8e03dbc4404201a50f7
5
+ SHA512:
6
+ metadata.gz: 55932c3dc24ceeeab109a4a980ddf96babf276ced15dffb68d7c013ca5cc246d2551b59acc94f82c1ec5c0073859267ace3a264a3d8b9470465672ea5aebb59d
7
+ data.tar.gz: bcbaaad396877bcbf6453e8d55ad3f46dcb68ba09bcfe2f6cf0f1fa6d96966c646f530e3872d77736ef40b5818feed888d69fe33e62beeb95dcfd1c6f5115203
data/CHANGELOG.md ADDED
@@ -0,0 +1,22 @@
1
+ # Changelog
2
+
3
+ ## 1.0.0 (2022-06-03)
4
+
5
+ ### Summary
6
+
7
+ This is the initial public release. The 1.0 milestone was chosen as soon
8
+ as NatalieParser was useful for integration back with the upstream Natalie
9
+ compiler project, i.e. it could fully replace RubyParser as the parser
10
+ in use by Natalie.
11
+
12
+ That is not to say that NatalieParser is _complete_ -- it is merely _useful_.
13
+
14
+ These are the features known to still be missing in this release:
15
+
16
+ - [ ] Support different source encodings
17
+ - [ ] Support more of the Ruby 3.0 syntax
18
+ - [ ] Argument forwarding (`...`)
19
+ - [ ] Pattern matching
20
+ - [ ] Numbered block parameters (`_1`, `_2`, etc.)
21
+ - [ ] Non-ASCII identifiers
22
+ - [ ] Rational and Complex literals (`1r` and `2i`)
data/Dockerfile ADDED
@@ -0,0 +1,26 @@
1
+ ARG IMAGE=ruby:3.0
2
+ FROM $IMAGE
3
+
4
+ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y -q build-essential clang
5
+ RUN gem install bundler --no-doc
6
+
7
+ ENV LC_ALL C.UTF-8
8
+
9
+ WORKDIR natalie_parser
10
+
11
+ COPY Gemfile /natalie_parser/
12
+ RUN bundle install
13
+
14
+ ARG CC=gcc
15
+ ENV CC=$CC
16
+ ARG CXX=g++
17
+ ENV CXX=$CXX
18
+
19
+ COPY Rakefile Rakefile
20
+ COPY ext ext
21
+ COPY lib lib
22
+ COPY src src
23
+ COPY include include
24
+ RUN rake
25
+
26
+ COPY test test
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ # NOTE: This Gemfile is used for testing the project.
2
+ # These dependencies are not needed to consume the library as a C extension for MRI.
3
+
4
+ source 'https://rubygems.org'
5
+
6
+ gem 'minitest'
7
+ gem 'minitest-focus'
8
+ gem 'minitest-reporters'
9
+ gem 'ruby_parser'
10
+ gem 'rake'
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Tim Morgan and contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,55 @@
1
+ # Natalie Parser
2
+
3
+ [![github build status](https://github.com/natalie-lang/natalie_parser/actions/workflows/build.yml/badge.svg)](https://github.com/natalie-lang/natalie_parser/actions?query=workflow%3ABuild+branch%3Amaster)
4
+ [![MIT License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/natalie-lang/natalie_parser/blob/master/LICENSE)
5
+
6
+ This is a parser for the Ruby programming language, written in C++.
7
+ It was extracted from the [Natalie](https://github.com/natalie-lang/natalie) project.
8
+
9
+ You can use this library directly from a C/C++ project, or you can
10
+ build it as a Ruby gem and use it from Ruby itself.
11
+
12
+ We are currently targeting Ruby 3.0 syntax, but that will probably
13
+ change over time, depending on what things we want to support and
14
+ what kind of help we get from the community.
15
+
16
+ NOTE: This project is still very new and there are certainly bugs.
17
+ See the list below for things we already know about, but expect there
18
+ are more we don't know about yet. **We don't recommend you use this in
19
+ production applications.**
20
+
21
+ ## To Do
22
+
23
+ - [x] Parse the [Natalie](https://github.com/natalie-lang/natalie) compiler and standard library
24
+ - [x] Pass (mostly) the [RubyParser](https://github.com/seattlerb/ruby_parser) test suite
25
+ - [ ] Support different source encodings
26
+ - [ ] Support more of the Ruby 3.0 syntax
27
+ - [x] "Endless" method definition (`def foo = bar`)
28
+ - [ ] Argument forwarding (`...`)
29
+ - [ ] Pattern matching
30
+ - [ ] Numbered block parameters (`_1`, `_2`, etc.)
31
+ - [ ] Non-ASCII identifiers
32
+ - [ ] Rational and Complex literals (`1r` and `2i`)
33
+
34
+ ## Development
35
+
36
+ ```sh
37
+ rake
38
+ ruby -I lib:ext -r natalie_parser -e "p NatalieParser.parse('1 + 2')"
39
+ # => s(:block, s(:call, s(:lit, 1), :+, s(:lit, 2)))
40
+ ```
41
+
42
+ ### Running Tests
43
+
44
+ ```sh
45
+ rake test
46
+ ```
47
+
48
+ ## Copyright & License
49
+
50
+ Natalie is copyright 2022, Tim Morgan and contributors. Natalie is licensed
51
+ under the MIT License; see the `LICENSE` file in this directory for the full text.
52
+
53
+ ### Note about Outside Sources
54
+
55
+ The file `test/test_ruby_parser.rb` is copyright Ryan Davis and is licensed MIT.
data/Rakefile ADDED
@@ -0,0 +1,242 @@
1
+ task default: :build
2
+
3
+ desc 'Build Natalie Parser library and MRI C extension'
4
+ task build: %i[
5
+ bundle_install
6
+ build_dir
7
+ library
8
+ parser_c_ext
9
+ write_compile_database
10
+ ]
11
+
12
+ so_ext = RUBY_PLATFORM =~ /darwin/ ? 'bundle' : 'so'
13
+
14
+ desc 'Build Natalie Parser library'
15
+ task library: [:build_dir, "build/libnatalie_parser.a"]
16
+
17
+ desc 'Build Natalie Parser MRI C extension'
18
+ task parser_c_ext: [:build_dir, "ext/natalie_parser/natalie_parser.#{so_ext}"]
19
+
20
+ desc 'Remove temporary files created during build'
21
+ task :clean do
22
+ Rake::FileList[%w[
23
+ build/build.log
24
+ build/*.o
25
+ build/node
26
+ build/asan_test
27
+ ext/natalie_parser/*.{h,log,o}
28
+ ]].each { |path| rm_rf path if File.exist?(path) }
29
+ end
30
+
31
+ desc 'Remove all generated files'
32
+ task :clobber do
33
+ Rake::FileList[%w[
34
+ build
35
+ ext/natalie_parser/*.{so,bundle,h,log,o}
36
+ ]].each { |path| rm_rf path if File.exist?(path) }
37
+ end
38
+
39
+ task distclean: :clobber
40
+
41
+ desc 'Run the test suite'
42
+ task test: [:build, 'build/asan_test'] do
43
+ sh 'bundle exec ruby test/all.rb'
44
+ sh 'build/asan_test'
45
+ sh 'bundle exec ruby test/test_ruby_parser.rb'
46
+ end
47
+
48
+ desc 'Run test test suite when changes are made (requires entr binary)'
49
+ task :watch do
50
+ files = Rake::FileList['**/*.cpp', '**/*.hpp', '**/*.rb']
51
+ sh "ls #{files} | entr -c -s 'rake test'"
52
+ end
53
+
54
+ desc 'Show line counts for the project'
55
+ task :cloc do
56
+ sh 'cloc include lib src test'
57
+ end
58
+
59
+ desc 'Generate tags file for development'
60
+ task :ctags do
61
+ sh 'ctags -R --exclude=.cquery_cache --exclude=ext --exclude=build --append=no .'
62
+ end
63
+ task tags: :ctags
64
+
65
+ desc 'Format C++ code with clang-format'
66
+ task :format do
67
+ sh "find include -type f -name '*.hpp' -exec clang-format -i --style=file {} +"
68
+ sh "find src -type f -name '*.cpp' -exec clang-format -i --style=file {} +"
69
+ end
70
+
71
+ desc 'Show TODO and FIXME comments in the project'
72
+ task :todo do
73
+ sh "egrep -r 'FIXME|TODO' src include lib"
74
+ end
75
+
76
+ desc 'Run the benchmark script'
77
+ task benchmark: :build do
78
+ require_relative './test/benchmark'
79
+ end
80
+
81
+ # # # # Docker Tasks (used for CI) # # # #
82
+
83
+ DOCKER_FLAGS =
84
+ if !ENV['CI'] && STDOUT.isatty
85
+ '-i -t'
86
+ elsif ENV['CI']
87
+ "-e CI=#{ENV['CI']}"
88
+ end
89
+
90
+ task :docker_build do
91
+ sh 'docker build -t natalie-parser .'
92
+ end
93
+
94
+ task docker_bash: :docker_build do
95
+ sh 'docker run -it --rm --entrypoint bash natalie-parser'
96
+ end
97
+
98
+ task :docker_build_clang do
99
+ sh 'docker build -t natalie-parser-clang --build-arg CC=clang --build-arg CXX=clang++ .'
100
+ end
101
+
102
+ task :docker_build_ruby27 do
103
+ sh 'docker build -t natalie-parser-ruby27 --build-arg IMAGE="ruby:2.7" .'
104
+ end
105
+
106
+ task docker_test: %i[docker_test_gcc docker_test_clang]
107
+
108
+ task docker_test_gcc: :docker_build do
109
+ sh "docker run #{DOCKER_FLAGS} --rm --entrypoint rake natalie-parser test"
110
+ end
111
+
112
+ task docker_test_clang: :docker_build_clang do
113
+ sh "docker run #{DOCKER_FLAGS} --rm --entrypoint rake natalie-parser-clang test"
114
+ end
115
+
116
+ # # # # Build Compile Database # # # #
117
+
118
+ if system('which compiledb 2>&1 >/dev/null')
119
+ $compiledb_out = []
120
+
121
+ def $stderr.puts(str)
122
+ write(str + "\n")
123
+ $compiledb_out << str
124
+ end
125
+
126
+ task :write_compile_database do
127
+ if $compiledb_out.any?
128
+ File.write('build/build.log', $compiledb_out.join("\n"))
129
+ sh 'compiledb < build/build.log'
130
+ end
131
+ end
132
+ else
133
+ task :write_compile_database do
134
+ # noop
135
+ end
136
+ end
137
+
138
+ # # # # Internal Tasks and Rules # # # #
139
+
140
+ STANDARD = 'c++17'
141
+ HEADERS = Rake::FileList['include/**/{*.h,*.hpp}']
142
+ SOURCES = Rake::FileList['src/**/*.{c,cpp}']
143
+ OBJECT_FILES = SOURCES.sub('src/', 'build/').pathmap('%p.o')
144
+
145
+ require 'tempfile'
146
+
147
+ task :build_dir do
148
+ mkdir_p 'build/lexer' unless File.exist?('build/lexer')
149
+ mkdir_p 'build/node' unless File.exist?('build/node')
150
+ end
151
+
152
+ rule '.cpp.o' => ['src/%n'] + HEADERS do |t|
153
+ sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
154
+ end
155
+
156
+ rule %r{lexer/.*\.cpp\.o$} => ['src/lexer/%n'] + HEADERS do |t|
157
+ sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
158
+ end
159
+
160
+ rule %r{node/.*\.cpp\.o$} => ['src/node/%n'] + HEADERS do |t|
161
+ sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
162
+ end
163
+
164
+ multitask objects: OBJECT_FILES
165
+
166
+ file 'build/libnatalie_parser.a' => HEADERS + [:objects] do |t|
167
+ sh "ar rcs #{t.name} #{OBJECT_FILES}"
168
+ end
169
+
170
+ file "ext/natalie_parser/natalie_parser.#{so_ext}" => [
171
+ 'ext/natalie_parser/natalie_parser.cpp',
172
+ 'ext/natalie_parser/mri_creator.hpp',
173
+ ] + SOURCES + HEADERS do |t|
174
+ build_dir = File.expand_path('ext/natalie_parser', __dir__)
175
+ Rake::FileList['ext/natalie_parser/*.o'].each { |path| rm path }
176
+ rm_rf 'ext/natalie_parser/natalie_parser.so'
177
+ sh <<-SH
178
+ cd #{build_dir} && \
179
+ ruby extconf.rb && \
180
+ make -j
181
+ SH
182
+ end
183
+
184
+ file 'build/fragments.hpp' => ['test/parser_test.rb', 'test/support/extract_parser_test_fragments.rb'] do
185
+ sh 'ruby -I lib:ext test/support/extract_parser_test_fragments.rb'
186
+ end
187
+
188
+ file 'build/asan_test' => ['test/asan_test.cpp', 'build/fragments.hpp', :library] do |t|
189
+ sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -I build -I include -o #{t.name} #{t.source} -L build -lnatalie_parser"
190
+ end
191
+
192
+ task :bundle_install do
193
+ sh 'bundle check || bundle install'
194
+ end
195
+
196
+ def cc
197
+ @cc ||=
198
+ if ENV['CC']
199
+ ENV['CC']
200
+ elsif system('which ccache 2>&1 > /dev/null')
201
+ 'ccache cc'
202
+ else
203
+ 'cc'
204
+ end
205
+ end
206
+
207
+ def cxx
208
+ @cxx ||=
209
+ if ENV['CXX']
210
+ ENV['CXX']
211
+ elsif system('which ccache 2>&1 > /dev/null')
212
+ 'ccache c++'
213
+ else
214
+ 'c++'
215
+ end
216
+ end
217
+
218
+ def cxx_flags
219
+ base_flags =
220
+ case ENV['BUILD']
221
+ when 'release'
222
+ %w[
223
+ -fPIC
224
+ -g
225
+ -O2
226
+ ]
227
+ else
228
+ %w[
229
+ -fPIC
230
+ -g
231
+ -Wall
232
+ -Wextra
233
+ -Werror
234
+ -fsanitize=address
235
+ ]
236
+ end
237
+ base_flags + include_paths.map { |path| "-I #{path}" }
238
+ end
239
+
240
+ def include_paths
241
+ [File.expand_path('include', __dir__)]
242
+ end
@@ -0,0 +1,9 @@
1
+ require 'mkmf'
2
+ $CXXFLAGS += ' -g -std=c++17'
3
+ $INCFLAGS += ' -I ../../include'
4
+ $srcs = Dir['../../src/**/*.cpp', 'natalie_parser.cpp']
5
+ $VPATH << "$(srcdir)/../../src"
6
+ $VPATH << "$(srcdir)/../../src/lexer"
7
+ $VPATH << "$(srcdir)/../../src/node"
8
+ create_header
9
+ create_makefile 'natalie_parser'
@@ -0,0 +1,139 @@
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
3
+ #include "ruby/intern.h"
4
+
5
+ #include "natalie_parser/creator.hpp"
6
+ #include "natalie_parser/node.hpp"
7
+
8
+ extern VALUE Sexp;
9
+
10
+ namespace NatalieParser {
11
+
12
+ class MRICreator : public Creator {
13
+ public:
14
+ MRICreator(const Node &node)
15
+ : Creator { node.file().static_cast_as<const String>(), node.line(), node.column() } {
16
+ reset_sexp();
17
+ }
18
+
19
+ MRICreator(const MRICreator &other)
20
+ : Creator { other.file(), other.line(), other.column() } {
21
+ reset_sexp();
22
+ }
23
+
24
+ virtual ~MRICreator() { }
25
+
26
+ virtual void reset_sexp() override {
27
+ m_sexp = rb_class_new_instance(0, nullptr, Sexp);
28
+ rb_ivar_set(m_sexp, rb_intern("@file"), get_file_string(file()));
29
+ rb_ivar_set(m_sexp, rb_intern("@line"), rb_int_new(line() + 1));
30
+ rb_ivar_set(m_sexp, rb_intern("@column"), rb_int_new(column() + 1));
31
+ }
32
+
33
+ virtual void set_comments(const TM::String &comments) override {
34
+ auto string_obj = rb_utf8_str_new(comments.c_str(), comments.length());
35
+ rb_ivar_set(m_sexp, rb_intern("@comments"), string_obj);
36
+ }
37
+
38
+ virtual void set_type(const char *type) override {
39
+ rb_ary_store(m_sexp, 0, ID2SYM(rb_intern(type)));
40
+ }
41
+
42
+ virtual void append(const Node &node) override {
43
+ if (node.type() == Node::Type::Nil) {
44
+ rb_ary_push(m_sexp, Qnil);
45
+ return;
46
+ }
47
+ MRICreator creator { node };
48
+ creator.set_assignment(assignment());
49
+ node.transform(&creator);
50
+ rb_ary_push(m_sexp, creator.sexp());
51
+ }
52
+
53
+ virtual void append_array(const ArrayNode &array) override {
54
+ MRICreator creator { array };
55
+ creator.set_assignment(assignment());
56
+ array.ArrayNode::transform(&creator);
57
+ rb_ary_push(m_sexp, creator.sexp());
58
+ }
59
+
60
+ virtual void append_false() override {
61
+ rb_ary_push(m_sexp, Qfalse);
62
+ }
63
+
64
+ virtual void append_float(double number) override {
65
+ rb_ary_push(m_sexp, rb_float_new(number));
66
+ }
67
+
68
+ virtual void append_integer(long long number) override {
69
+ rb_ary_push(m_sexp, rb_int_new(number));
70
+ }
71
+
72
+ virtual void append_integer(TM::String &number) override {
73
+ auto string_obj = rb_utf8_str_new(number.c_str(), number.length());
74
+ rb_ary_push(m_sexp, rb_Integer(string_obj));
75
+ }
76
+
77
+ virtual void append_nil() override {
78
+ rb_ary_push(m_sexp, Qnil);
79
+ }
80
+
81
+ virtual void append_range(long long first, long long last, bool exclude_end) override {
82
+ rb_ary_push(m_sexp, rb_range_new(rb_int_new(first), rb_int_new(last), exclude_end ? Qtrue : Qfalse));
83
+ }
84
+
85
+ virtual void append_regexp(TM::String &pattern, int options) override {
86
+ auto encoding = pattern.contains_utf8_encoded_multibyte_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
87
+ auto regexp = rb_enc_reg_new(pattern.c_str(), pattern.size(), encoding, options);
88
+ rb_ary_push(m_sexp, regexp);
89
+ }
90
+
91
+ virtual void append_sexp(std::function<void(Creator *)> fn) override {
92
+ MRICreator creator { *this };
93
+ fn(&creator);
94
+ rb_ary_push(m_sexp, creator.sexp());
95
+ }
96
+
97
+ virtual void append_string(TM::String &string) override {
98
+ auto encoding = string.contains_seemingly_valid_utf8_encoded_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
99
+ rb_ary_push(m_sexp, rb_enc_str_new(string.c_str(), string.length(), encoding));
100
+ }
101
+
102
+ virtual void append_symbol(TM::String &name) override {
103
+ auto encoding = name.contains_utf8_encoded_multibyte_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
104
+ auto symbol = ID2SYM(rb_intern3(name.c_str(), name.size(), encoding));
105
+ rb_ary_push(m_sexp, symbol);
106
+ }
107
+
108
+ virtual void append_true() override {
109
+ rb_ary_push(m_sexp, Qtrue);
110
+ }
111
+
112
+ virtual void wrap(const char *type) override {
113
+ auto inner = m_sexp;
114
+ reset_sexp();
115
+ set_type(type);
116
+ rb_ary_push(m_sexp, inner);
117
+ }
118
+
119
+ VALUE sexp() const { return m_sexp; }
120
+
121
+ private:
122
+ VALUE m_sexp { Qnil };
123
+
124
+ static VALUE get_file_string(SharedPtr<const String> file) {
125
+ auto file_string = s_file_cache.get(*file);
126
+ if (!file_string) {
127
+ file_string = rb_str_new(file->c_str(), file->length());
128
+ // FIXME: Seems there is no way to un-register and object. :-(
129
+ rb_gc_register_mark_object(file_string);
130
+ s_file_cache.put(*file, file_string);
131
+ }
132
+ return file_string;
133
+ }
134
+
135
+ // TODO: Move this to the Parser object, pass it in, clean it up when finished with it.
136
+ // (Otherwise we leak memory if the user parses lots of different files in a long-running process.)
137
+ inline static TM::Hashmap<const String, VALUE> s_file_cache { TM::HashType::TMString };
138
+ };
139
+ }
@@ -0,0 +1,144 @@
1
+ #include "extconf.h"
2
+ #include "ruby.h"
3
+ #include "ruby/encoding.h"
4
+ #include "ruby/intern.h"
5
+ #include "stdio.h"
6
+
7
+ // this includes MUST come after
8
+ #include "mri_creator.hpp"
9
+ #include "natalie_parser/parser.hpp"
10
+
11
+ VALUE Parser;
12
+ VALUE Sexp;
13
+
14
+ extern "C" {
15
+
16
+ VALUE initialize(int argc, VALUE *argv, VALUE self) {
17
+ if (argc < 1 || argc > 2)
18
+ rb_raise(rb_eSyntaxError,
19
+ "wrong number of arguments (given %d, expected 1..2)", argc);
20
+ rb_ivar_set(self, rb_intern("@code"), argv[0]);
21
+ VALUE path;
22
+ if (argc > 1)
23
+ path = argv[1];
24
+ else
25
+ path = rb_str_new_cstr("(string)");
26
+ rb_ivar_set(self, rb_intern("@path"), path);
27
+ return self;
28
+ }
29
+
30
+ VALUE node_to_ruby(TM::SharedPtr<NatalieParser::Node> node) {
31
+ NatalieParser::MRICreator creator { node.ref() };
32
+ node->transform(&creator);
33
+ return creator.sexp();
34
+ }
35
+
36
+ VALUE parse_on_instance(VALUE self) {
37
+ VALUE code = rb_ivar_get(self, rb_intern("@code"));
38
+ VALUE path = rb_ivar_get(self, rb_intern("@path"));
39
+ auto code_string = new TM::String { StringValueCStr(code) };
40
+ auto path_string = new TM::String { StringValueCStr(path) };
41
+ auto parser = NatalieParser::Parser { code_string, path_string };
42
+ try {
43
+ auto tree = parser.tree();
44
+ VALUE ast = node_to_ruby(tree);
45
+ return ast;
46
+ } catch (NatalieParser::Parser::SyntaxError &error) {
47
+ rb_raise(rb_eSyntaxError, "%s", error.message());
48
+ }
49
+ }
50
+
51
+ VALUE parse(int argc, VALUE *argv, VALUE self) {
52
+ VALUE parser = rb_class_new_instance(argc, argv, Parser);
53
+ return parse_on_instance(parser);
54
+ }
55
+
56
+ VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
57
+ if (token.is_eof())
58
+ return Qnil;
59
+ try {
60
+ token.validate();
61
+ } catch (NatalieParser::Parser::SyntaxError &error) {
62
+ rb_raise(rb_eSyntaxError, "%s", error.message());
63
+ }
64
+ const char *type = token.type_value();
65
+ if (!type) abort(); // FIXME: assert no workie?
66
+ auto hash = rb_hash_new();
67
+ rb_hash_aset(hash, ID2SYM(rb_intern("type")), ID2SYM(rb_intern(type)));
68
+ auto lit = token.literal_or_blank();
69
+ switch (token.type()) {
70
+ case NatalieParser::Token::Type::Bignum:
71
+ case NatalieParser::Token::Type::Doc:
72
+ case NatalieParser::Token::Type::String: {
73
+ auto literal = token.literal_string();
74
+ rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_utf8_str_new(literal->c_str(), literal->length()));
75
+ break;
76
+ }
77
+ case NatalieParser::Token::Type::BackRef:
78
+ case NatalieParser::Token::Type::BareName:
79
+ case NatalieParser::Token::Type::ClassVariable:
80
+ case NatalieParser::Token::Type::Constant:
81
+ case NatalieParser::Token::Type::GlobalVariable:
82
+ case NatalieParser::Token::Type::InstanceVariable:
83
+ case NatalieParser::Token::Type::Symbol:
84
+ case NatalieParser::Token::Type::SymbolKey: {
85
+ auto literal = token.literal_string();
86
+ rb_hash_aset(hash, ID2SYM(rb_intern("literal")), ID2SYM(rb_intern3(literal->c_str(), literal->size(), rb_utf8_encoding())));
87
+ break;
88
+ }
89
+ case NatalieParser::Token::Type::Fixnum:
90
+ case NatalieParser::Token::Type::NthRef:
91
+ rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_int_new(token.get_fixnum()));
92
+ break;
93
+ case NatalieParser::Token::Type::Float:
94
+ rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_float_new(token.get_double()));
95
+ break;
96
+ case NatalieParser::Token::Type::InterpolatedRegexpEnd:
97
+ if (token.has_literal()) {
98
+ auto options = token.literal_string();
99
+ rb_hash_aset(hash, ID2SYM(rb_intern("options")), rb_str_new(options->c_str(), options->length()));
100
+ }
101
+ break;
102
+ default:
103
+ void();
104
+ }
105
+ if (include_location_info) {
106
+ rb_hash_aset(hash, ID2SYM(rb_intern("line")), rb_int_new(token.line()));
107
+ rb_hash_aset(hash, ID2SYM(rb_intern("column")), rb_int_new(token.column()));
108
+ }
109
+ return hash;
110
+ }
111
+
112
+ VALUE tokens_on_instance(VALUE self, VALUE include_location_info = Qfalse) {
113
+ VALUE code = rb_ivar_get(self, rb_intern("@code"));
114
+ VALUE path = rb_ivar_get(self, rb_intern("@path"));
115
+ auto code_string = new TM::String { StringValueCStr(code) };
116
+ auto path_string = new TM::String { StringValueCStr(path) };
117
+ auto lexer = NatalieParser::Lexer { code_string, path_string };
118
+ auto array = rb_ary_new();
119
+ auto the_tokens = lexer.tokens();
120
+ for (auto token : *the_tokens) {
121
+ auto token_value = token_to_ruby(token, RTEST(include_location_info));
122
+ if (token_value != Qnil && token_value != Qfalse)
123
+ rb_ary_push(array, token_value);
124
+ }
125
+ return array;
126
+ }
127
+
128
+ VALUE tokens(int argc, VALUE *argv, VALUE self) {
129
+ VALUE parser = rb_class_new_instance(1, argv, Parser);
130
+ VALUE include_location_info = argc > 1 ? argv[1] : Qfalse;
131
+ return tokens_on_instance(parser, include_location_info);
132
+ }
133
+
134
+ void Init_natalie_parser() {
135
+ int error;
136
+ Sexp = rb_const_get(rb_cObject, rb_intern("Sexp"));
137
+ Parser = rb_define_class("NatalieParser", rb_cObject);
138
+ rb_define_method(Parser, "initialize", initialize, -1);
139
+ rb_define_method(Parser, "parse", parse_on_instance, 0);
140
+ rb_define_method(Parser, "tokens", tokens_on_instance, 1);
141
+ rb_define_singleton_method(Parser, "parse", parse, -1);
142
+ rb_define_singleton_method(Parser, "tokens", tokens, -1);
143
+ }
144
+ }