natalie_parser 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/Dockerfile +26 -0
- data/Gemfile +10 -0
- data/LICENSE +21 -0
- data/README.md +55 -0
- data/Rakefile +242 -0
- data/ext/natalie_parser/extconf.rb +9 -0
- data/ext/natalie_parser/mri_creator.hpp +139 -0
- data/ext/natalie_parser/natalie_parser.cpp +144 -0
- data/include/natalie_parser/creator/debug_creator.hpp +113 -0
- data/include/natalie_parser/creator.hpp +108 -0
- data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
- data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
- data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
- data/include/natalie_parser/lexer.hpp +135 -0
- data/include/natalie_parser/node/alias_node.hpp +35 -0
- data/include/natalie_parser/node/arg_node.hpp +74 -0
- data/include/natalie_parser/node/array_node.hpp +34 -0
- data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
- data/include/natalie_parser/node/assignment_node.hpp +34 -0
- data/include/natalie_parser/node/back_ref_node.hpp +28 -0
- data/include/natalie_parser/node/begin_block_node.hpp +25 -0
- data/include/natalie_parser/node/begin_node.hpp +52 -0
- data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
- data/include/natalie_parser/node/bignum_node.hpp +37 -0
- data/include/natalie_parser/node/block_node.hpp +55 -0
- data/include/natalie_parser/node/block_pass_node.hpp +33 -0
- data/include/natalie_parser/node/break_node.hpp +32 -0
- data/include/natalie_parser/node/call_node.hpp +85 -0
- data/include/natalie_parser/node/case_in_node.hpp +40 -0
- data/include/natalie_parser/node/case_node.hpp +52 -0
- data/include/natalie_parser/node/case_when_node.hpp +43 -0
- data/include/natalie_parser/node/class_node.hpp +39 -0
- data/include/natalie_parser/node/colon2_node.hpp +44 -0
- data/include/natalie_parser/node/colon3_node.hpp +34 -0
- data/include/natalie_parser/node/constant_node.hpp +26 -0
- data/include/natalie_parser/node/def_node.hpp +55 -0
- data/include/natalie_parser/node/defined_node.hpp +33 -0
- data/include/natalie_parser/node/encoding_node.hpp +26 -0
- data/include/natalie_parser/node/end_block_node.hpp +25 -0
- data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
- data/include/natalie_parser/node/false_node.hpp +23 -0
- data/include/natalie_parser/node/fixnum_node.hpp +36 -0
- data/include/natalie_parser/node/float_node.hpp +36 -0
- data/include/natalie_parser/node/hash_node.hpp +34 -0
- data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
- data/include/natalie_parser/node/identifier_node.hpp +123 -0
- data/include/natalie_parser/node/if_node.hpp +43 -0
- data/include/natalie_parser/node/infix_op_node.hpp +46 -0
- data/include/natalie_parser/node/interpolated_node.hpp +33 -0
- data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
- data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
- data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
- data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
- data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
- data/include/natalie_parser/node/iter_node.hpp +45 -0
- data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
- data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
- data/include/natalie_parser/node/logical_and_node.hpp +40 -0
- data/include/natalie_parser/node/logical_or_node.hpp +40 -0
- data/include/natalie_parser/node/match_node.hpp +38 -0
- data/include/natalie_parser/node/module_node.hpp +32 -0
- data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
- data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
- data/include/natalie_parser/node/next_node.hpp +37 -0
- data/include/natalie_parser/node/nil_node.hpp +23 -0
- data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
- data/include/natalie_parser/node/node.hpp +155 -0
- data/include/natalie_parser/node/node_with_args.hpp +47 -0
- data/include/natalie_parser/node/not_match_node.hpp +35 -0
- data/include/natalie_parser/node/not_node.hpp +37 -0
- data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
- data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
- data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
- data/include/natalie_parser/node/op_assign_node.hpp +47 -0
- data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
- data/include/natalie_parser/node/pin_node.hpp +33 -0
- data/include/natalie_parser/node/range_node.hpp +52 -0
- data/include/natalie_parser/node/redo_node.hpp +20 -0
- data/include/natalie_parser/node/regexp_node.hpp +36 -0
- data/include/natalie_parser/node/retry_node.hpp +20 -0
- data/include/natalie_parser/node/return_node.hpp +34 -0
- data/include/natalie_parser/node/safe_call_node.hpp +31 -0
- data/include/natalie_parser/node/sclass_node.hpp +37 -0
- data/include/natalie_parser/node/self_node.hpp +23 -0
- data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
- data/include/natalie_parser/node/shell_node.hpp +32 -0
- data/include/natalie_parser/node/splat_node.hpp +39 -0
- data/include/natalie_parser/node/splat_value_node.hpp +32 -0
- data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
- data/include/natalie_parser/node/string_node.hpp +42 -0
- data/include/natalie_parser/node/super_node.hpp +44 -0
- data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
- data/include/natalie_parser/node/symbol_node.hpp +30 -0
- data/include/natalie_parser/node/to_array_node.hpp +33 -0
- data/include/natalie_parser/node/true_node.hpp +23 -0
- data/include/natalie_parser/node/unary_op_node.hpp +41 -0
- data/include/natalie_parser/node/undef_node.hpp +31 -0
- data/include/natalie_parser/node/until_node.hpp +21 -0
- data/include/natalie_parser/node/while_node.hpp +52 -0
- data/include/natalie_parser/node/yield_node.hpp +29 -0
- data/include/natalie_parser/node.hpp +89 -0
- data/include/natalie_parser/parser.hpp +218 -0
- data/include/natalie_parser/token.hpp +842 -0
- data/include/tm/defer.hpp +34 -0
- data/include/tm/hashmap.hpp +826 -0
- data/include/tm/macros.hpp +16 -0
- data/include/tm/optional.hpp +223 -0
- data/include/tm/owned_ptr.hpp +186 -0
- data/include/tm/recursion_guard.hpp +156 -0
- data/include/tm/shared_ptr.hpp +259 -0
- data/include/tm/string.hpp +1447 -0
- data/include/tm/tests.hpp +78 -0
- data/include/tm/vector.hpp +796 -0
- data/lib/natalie_parser/sexp.rb +36 -0
- data/lib/natalie_parser/version.rb +5 -0
- data/lib/natalie_parser.rb +3 -0
- data/natalie_parser.gemspec +23 -0
- data/src/lexer/interpolated_string_lexer.cpp +88 -0
- data/src/lexer/regexp_lexer.cpp +95 -0
- data/src/lexer/word_array_lexer.cpp +134 -0
- data/src/lexer.cpp +1703 -0
- data/src/node/alias_node.cpp +11 -0
- data/src/node/assignment_node.cpp +33 -0
- data/src/node/begin_node.cpp +29 -0
- data/src/node/begin_rescue_node.cpp +33 -0
- data/src/node/class_node.cpp +22 -0
- data/src/node/interpolated_regexp_node.cpp +19 -0
- data/src/node/interpolated_shell_node.cpp +25 -0
- data/src/node/interpolated_string_node.cpp +111 -0
- data/src/node/interpolated_symbol_node.cpp +25 -0
- data/src/node/match_node.cpp +14 -0
- data/src/node/module_node.cpp +21 -0
- data/src/node/multiple_assignment_node.cpp +37 -0
- data/src/node/node.cpp +10 -0
- data/src/node/node_with_args.cpp +35 -0
- data/src/node/op_assign_node.cpp +36 -0
- data/src/node/string_node.cpp +33 -0
- data/src/parser.cpp +2972 -0
- data/src/token.cpp +27 -0
- metadata +186 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: '048e048ede42652f8a81298a3310313f8bf9929ba5cd952a89d3269d8333b363'
|
4
|
+
data.tar.gz: 5c2388c8125b1444c454c0ee3a0d4a07f305cc3fd668d8e03dbc4404201a50f7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 55932c3dc24ceeeab109a4a980ddf96babf276ced15dffb68d7c013ca5cc246d2551b59acc94f82c1ec5c0073859267ace3a264a3d8b9470465672ea5aebb59d
|
7
|
+
data.tar.gz: bcbaaad396877bcbf6453e8d55ad3f46dcb68ba09bcfe2f6cf0f1fa6d96966c646f530e3872d77736ef40b5818feed888d69fe33e62beeb95dcfd1c6f5115203
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
## 1.0.0 (2022-06-03)
|
4
|
+
|
5
|
+
### Summary
|
6
|
+
|
7
|
+
This is the initial public release. The 1.0 milestone was chosen as soon
|
8
|
+
as NatalieParser was useful for integration back with the upstream Natalie
|
9
|
+
compiler project, i.e. it could fully replace RubyParser as the parser
|
10
|
+
in use by Natalie.
|
11
|
+
|
12
|
+
That is not to say that NatalieParser is _complete_ -- it is merely _useful_.
|
13
|
+
|
14
|
+
These are the features known to still be missing in this release:
|
15
|
+
|
16
|
+
- [ ] Support different source encodings
|
17
|
+
- [ ] Support more of the Ruby 3.0 syntax
|
18
|
+
- [ ] Argument forwarding (`...`)
|
19
|
+
- [ ] Pattern matching
|
20
|
+
- [ ] Numbered block parameters (`_1`, `_2`, etc.)
|
21
|
+
- [ ] Non-ASCII identifiers
|
22
|
+
- [ ] Rational and Complex literals (`1r` and `2i`)
|
data/Dockerfile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
ARG IMAGE=ruby:3.0
|
2
|
+
FROM $IMAGE
|
3
|
+
|
4
|
+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y -q build-essential clang
|
5
|
+
RUN gem install bundler --no-doc
|
6
|
+
|
7
|
+
ENV LC_ALL C.UTF-8
|
8
|
+
|
9
|
+
WORKDIR natalie_parser
|
10
|
+
|
11
|
+
COPY Gemfile /natalie_parser/
|
12
|
+
RUN bundle install
|
13
|
+
|
14
|
+
ARG CC=gcc
|
15
|
+
ENV CC=$CC
|
16
|
+
ARG CXX=g++
|
17
|
+
ENV CXX=$CXX
|
18
|
+
|
19
|
+
COPY Rakefile Rakefile
|
20
|
+
COPY ext ext
|
21
|
+
COPY lib lib
|
22
|
+
COPY src src
|
23
|
+
COPY include include
|
24
|
+
RUN rake
|
25
|
+
|
26
|
+
COPY test test
|
data/Gemfile
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# NOTE: This Gemfile is used for testing the project.
|
2
|
+
# These dependencies are not needed to consume the library as a C extension for MRI.
|
3
|
+
|
4
|
+
source 'https://rubygems.org'
|
5
|
+
|
6
|
+
gem 'minitest'
|
7
|
+
gem 'minitest-focus'
|
8
|
+
gem 'minitest-reporters'
|
9
|
+
gem 'ruby_parser'
|
10
|
+
gem 'rake'
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2022 Tim Morgan and contributors
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# Natalie Parser
|
2
|
+
|
3
|
+
[![github build status](https://github.com/natalie-lang/natalie_parser/actions/workflows/build.yml/badge.svg)](https://github.com/natalie-lang/natalie_parser/actions?query=workflow%3ABuild+branch%3Amaster)
|
4
|
+
[![MIT License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/natalie-lang/natalie_parser/blob/master/LICENSE)
|
5
|
+
|
6
|
+
This is a parser for the Ruby programming language, written in C++.
|
7
|
+
It was extracted from the [Natalie](https://github.com/natalie-lang/natalie) project.
|
8
|
+
|
9
|
+
You can use this library directly from a C/C++ project, or you can
|
10
|
+
build it as a Ruby gem and use it from Ruby itself.
|
11
|
+
|
12
|
+
We are currently targeting Ruby 3.0 syntax, but that will probably
|
13
|
+
change over time, depending on what things we want to support and
|
14
|
+
what kind of help we get from the community.
|
15
|
+
|
16
|
+
NOTE: This project is still very new and there are certainly bugs.
|
17
|
+
See the list below for things we already know about, but expect there
|
18
|
+
are more we don't know about yet. **We don't recommend you use this in
|
19
|
+
production applications.**
|
20
|
+
|
21
|
+
## To Do
|
22
|
+
|
23
|
+
- [x] Parse the [Natalie](https://github.com/natalie-lang/natalie) compiler and standard library
|
24
|
+
- [x] Pass (mostly) the [RubyParser](https://github.com/seattlerb/ruby_parser) test suite
|
25
|
+
- [ ] Support different source encodings
|
26
|
+
- [ ] Support more of the Ruby 3.0 syntax
|
27
|
+
- [x] "Endless" method definition (`def foo = bar`)
|
28
|
+
- [ ] Argument forwarding (`...`)
|
29
|
+
- [ ] Pattern matching
|
30
|
+
- [ ] Numbered block parameters (`_1`, `_2`, etc.)
|
31
|
+
- [ ] Non-ASCII identifiers
|
32
|
+
- [ ] Rational and Complex literals (`1r` and `2i`)
|
33
|
+
|
34
|
+
## Development
|
35
|
+
|
36
|
+
```sh
|
37
|
+
rake
|
38
|
+
ruby -I lib:ext -r natalie_parser -e "p NatalieParser.parse('1 + 2')"
|
39
|
+
# => s(:block, s(:call, s(:lit, 1), :+, s(:lit, 2)))
|
40
|
+
```
|
41
|
+
|
42
|
+
### Running Tests
|
43
|
+
|
44
|
+
```sh
|
45
|
+
rake test
|
46
|
+
```
|
47
|
+
|
48
|
+
## Copyright & License
|
49
|
+
|
50
|
+
Natalie is copyright 2022, Tim Morgan and contributors. Natalie is licensed
|
51
|
+
under the MIT License; see the `LICENSE` file in this directory for the full text.
|
52
|
+
|
53
|
+
### Note about Outside Sources
|
54
|
+
|
55
|
+
The file `test/test_ruby_parser.rb` is copyright Ryan Davis and is licensed MIT.
|
data/Rakefile
ADDED
@@ -0,0 +1,242 @@
|
|
1
|
+
task default: :build
|
2
|
+
|
3
|
+
desc 'Build Natalie Parser library and MRI C extension'
|
4
|
+
task build: %i[
|
5
|
+
bundle_install
|
6
|
+
build_dir
|
7
|
+
library
|
8
|
+
parser_c_ext
|
9
|
+
write_compile_database
|
10
|
+
]
|
11
|
+
|
12
|
+
so_ext = RUBY_PLATFORM =~ /darwin/ ? 'bundle' : 'so'
|
13
|
+
|
14
|
+
desc 'Build Natalie Parser library'
|
15
|
+
task library: [:build_dir, "build/libnatalie_parser.a"]
|
16
|
+
|
17
|
+
desc 'Build Natalie Parser MRI C extension'
|
18
|
+
task parser_c_ext: [:build_dir, "ext/natalie_parser/natalie_parser.#{so_ext}"]
|
19
|
+
|
20
|
+
desc 'Remove temporary files created during build'
|
21
|
+
task :clean do
|
22
|
+
Rake::FileList[%w[
|
23
|
+
build/build.log
|
24
|
+
build/*.o
|
25
|
+
build/node
|
26
|
+
build/asan_test
|
27
|
+
ext/natalie_parser/*.{h,log,o}
|
28
|
+
]].each { |path| rm_rf path if File.exist?(path) }
|
29
|
+
end
|
30
|
+
|
31
|
+
desc 'Remove all generated files'
|
32
|
+
task :clobber do
|
33
|
+
Rake::FileList[%w[
|
34
|
+
build
|
35
|
+
ext/natalie_parser/*.{so,bundle,h,log,o}
|
36
|
+
]].each { |path| rm_rf path if File.exist?(path) }
|
37
|
+
end
|
38
|
+
|
39
|
+
task distclean: :clobber
|
40
|
+
|
41
|
+
desc 'Run the test suite'
|
42
|
+
task test: [:build, 'build/asan_test'] do
|
43
|
+
sh 'bundle exec ruby test/all.rb'
|
44
|
+
sh 'build/asan_test'
|
45
|
+
sh 'bundle exec ruby test/test_ruby_parser.rb'
|
46
|
+
end
|
47
|
+
|
48
|
+
desc 'Run test test suite when changes are made (requires entr binary)'
|
49
|
+
task :watch do
|
50
|
+
files = Rake::FileList['**/*.cpp', '**/*.hpp', '**/*.rb']
|
51
|
+
sh "ls #{files} | entr -c -s 'rake test'"
|
52
|
+
end
|
53
|
+
|
54
|
+
desc 'Show line counts for the project'
|
55
|
+
task :cloc do
|
56
|
+
sh 'cloc include lib src test'
|
57
|
+
end
|
58
|
+
|
59
|
+
desc 'Generate tags file for development'
|
60
|
+
task :ctags do
|
61
|
+
sh 'ctags -R --exclude=.cquery_cache --exclude=ext --exclude=build --append=no .'
|
62
|
+
end
|
63
|
+
task tags: :ctags
|
64
|
+
|
65
|
+
desc 'Format C++ code with clang-format'
|
66
|
+
task :format do
|
67
|
+
sh "find include -type f -name '*.hpp' -exec clang-format -i --style=file {} +"
|
68
|
+
sh "find src -type f -name '*.cpp' -exec clang-format -i --style=file {} +"
|
69
|
+
end
|
70
|
+
|
71
|
+
desc 'Show TODO and FIXME comments in the project'
|
72
|
+
task :todo do
|
73
|
+
sh "egrep -r 'FIXME|TODO' src include lib"
|
74
|
+
end
|
75
|
+
|
76
|
+
desc 'Run the benchmark script'
|
77
|
+
task benchmark: :build do
|
78
|
+
require_relative './test/benchmark'
|
79
|
+
end
|
80
|
+
|
81
|
+
# # # # Docker Tasks (used for CI) # # # #
|
82
|
+
|
83
|
+
DOCKER_FLAGS =
|
84
|
+
if !ENV['CI'] && STDOUT.isatty
|
85
|
+
'-i -t'
|
86
|
+
elsif ENV['CI']
|
87
|
+
"-e CI=#{ENV['CI']}"
|
88
|
+
end
|
89
|
+
|
90
|
+
task :docker_build do
|
91
|
+
sh 'docker build -t natalie-parser .'
|
92
|
+
end
|
93
|
+
|
94
|
+
task docker_bash: :docker_build do
|
95
|
+
sh 'docker run -it --rm --entrypoint bash natalie-parser'
|
96
|
+
end
|
97
|
+
|
98
|
+
task :docker_build_clang do
|
99
|
+
sh 'docker build -t natalie-parser-clang --build-arg CC=clang --build-arg CXX=clang++ .'
|
100
|
+
end
|
101
|
+
|
102
|
+
task :docker_build_ruby27 do
|
103
|
+
sh 'docker build -t natalie-parser-ruby27 --build-arg IMAGE="ruby:2.7" .'
|
104
|
+
end
|
105
|
+
|
106
|
+
task docker_test: %i[docker_test_gcc docker_test_clang]
|
107
|
+
|
108
|
+
task docker_test_gcc: :docker_build do
|
109
|
+
sh "docker run #{DOCKER_FLAGS} --rm --entrypoint rake natalie-parser test"
|
110
|
+
end
|
111
|
+
|
112
|
+
task docker_test_clang: :docker_build_clang do
|
113
|
+
sh "docker run #{DOCKER_FLAGS} --rm --entrypoint rake natalie-parser-clang test"
|
114
|
+
end
|
115
|
+
|
116
|
+
# # # # Build Compile Database # # # #
|
117
|
+
|
118
|
+
if system('which compiledb 2>&1 >/dev/null')
|
119
|
+
$compiledb_out = []
|
120
|
+
|
121
|
+
def $stderr.puts(str)
|
122
|
+
write(str + "\n")
|
123
|
+
$compiledb_out << str
|
124
|
+
end
|
125
|
+
|
126
|
+
task :write_compile_database do
|
127
|
+
if $compiledb_out.any?
|
128
|
+
File.write('build/build.log', $compiledb_out.join("\n"))
|
129
|
+
sh 'compiledb < build/build.log'
|
130
|
+
end
|
131
|
+
end
|
132
|
+
else
|
133
|
+
task :write_compile_database do
|
134
|
+
# noop
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# # # # Internal Tasks and Rules # # # #
|
139
|
+
|
140
|
+
STANDARD = 'c++17'
|
141
|
+
HEADERS = Rake::FileList['include/**/{*.h,*.hpp}']
|
142
|
+
SOURCES = Rake::FileList['src/**/*.{c,cpp}']
|
143
|
+
OBJECT_FILES = SOURCES.sub('src/', 'build/').pathmap('%p.o')
|
144
|
+
|
145
|
+
require 'tempfile'
|
146
|
+
|
147
|
+
task :build_dir do
|
148
|
+
mkdir_p 'build/lexer' unless File.exist?('build/lexer')
|
149
|
+
mkdir_p 'build/node' unless File.exist?('build/node')
|
150
|
+
end
|
151
|
+
|
152
|
+
rule '.cpp.o' => ['src/%n'] + HEADERS do |t|
|
153
|
+
sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
|
154
|
+
end
|
155
|
+
|
156
|
+
rule %r{lexer/.*\.cpp\.o$} => ['src/lexer/%n'] + HEADERS do |t|
|
157
|
+
sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
|
158
|
+
end
|
159
|
+
|
160
|
+
rule %r{node/.*\.cpp\.o$} => ['src/node/%n'] + HEADERS do |t|
|
161
|
+
sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
|
162
|
+
end
|
163
|
+
|
164
|
+
multitask objects: OBJECT_FILES
|
165
|
+
|
166
|
+
file 'build/libnatalie_parser.a' => HEADERS + [:objects] do |t|
|
167
|
+
sh "ar rcs #{t.name} #{OBJECT_FILES}"
|
168
|
+
end
|
169
|
+
|
170
|
+
file "ext/natalie_parser/natalie_parser.#{so_ext}" => [
|
171
|
+
'ext/natalie_parser/natalie_parser.cpp',
|
172
|
+
'ext/natalie_parser/mri_creator.hpp',
|
173
|
+
] + SOURCES + HEADERS do |t|
|
174
|
+
build_dir = File.expand_path('ext/natalie_parser', __dir__)
|
175
|
+
Rake::FileList['ext/natalie_parser/*.o'].each { |path| rm path }
|
176
|
+
rm_rf 'ext/natalie_parser/natalie_parser.so'
|
177
|
+
sh <<-SH
|
178
|
+
cd #{build_dir} && \
|
179
|
+
ruby extconf.rb && \
|
180
|
+
make -j
|
181
|
+
SH
|
182
|
+
end
|
183
|
+
|
184
|
+
file 'build/fragments.hpp' => ['test/parser_test.rb', 'test/support/extract_parser_test_fragments.rb'] do
|
185
|
+
sh 'ruby -I lib:ext test/support/extract_parser_test_fragments.rb'
|
186
|
+
end
|
187
|
+
|
188
|
+
file 'build/asan_test' => ['test/asan_test.cpp', 'build/fragments.hpp', :library] do |t|
|
189
|
+
sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -I build -I include -o #{t.name} #{t.source} -L build -lnatalie_parser"
|
190
|
+
end
|
191
|
+
|
192
|
+
task :bundle_install do
|
193
|
+
sh 'bundle check || bundle install'
|
194
|
+
end
|
195
|
+
|
196
|
+
def cc
|
197
|
+
@cc ||=
|
198
|
+
if ENV['CC']
|
199
|
+
ENV['CC']
|
200
|
+
elsif system('which ccache 2>&1 > /dev/null')
|
201
|
+
'ccache cc'
|
202
|
+
else
|
203
|
+
'cc'
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def cxx
|
208
|
+
@cxx ||=
|
209
|
+
if ENV['CXX']
|
210
|
+
ENV['CXX']
|
211
|
+
elsif system('which ccache 2>&1 > /dev/null')
|
212
|
+
'ccache c++'
|
213
|
+
else
|
214
|
+
'c++'
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def cxx_flags
|
219
|
+
base_flags =
|
220
|
+
case ENV['BUILD']
|
221
|
+
when 'release'
|
222
|
+
%w[
|
223
|
+
-fPIC
|
224
|
+
-g
|
225
|
+
-O2
|
226
|
+
]
|
227
|
+
else
|
228
|
+
%w[
|
229
|
+
-fPIC
|
230
|
+
-g
|
231
|
+
-Wall
|
232
|
+
-Wextra
|
233
|
+
-Werror
|
234
|
+
-fsanitize=address
|
235
|
+
]
|
236
|
+
end
|
237
|
+
base_flags + include_paths.map { |path| "-I #{path}" }
|
238
|
+
end
|
239
|
+
|
240
|
+
def include_paths
|
241
|
+
[File.expand_path('include', __dir__)]
|
242
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
$CXXFLAGS += ' -g -std=c++17'
|
3
|
+
$INCFLAGS += ' -I ../../include'
|
4
|
+
$srcs = Dir['../../src/**/*.cpp', 'natalie_parser.cpp']
|
5
|
+
$VPATH << "$(srcdir)/../../src"
|
6
|
+
$VPATH << "$(srcdir)/../../src/lexer"
|
7
|
+
$VPATH << "$(srcdir)/../../src/node"
|
8
|
+
create_header
|
9
|
+
create_makefile 'natalie_parser'
|
@@ -0,0 +1,139 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "ruby/encoding.h"
|
3
|
+
#include "ruby/intern.h"
|
4
|
+
|
5
|
+
#include "natalie_parser/creator.hpp"
|
6
|
+
#include "natalie_parser/node.hpp"
|
7
|
+
|
8
|
+
extern VALUE Sexp;
|
9
|
+
|
10
|
+
namespace NatalieParser {
|
11
|
+
|
12
|
+
class MRICreator : public Creator {
|
13
|
+
public:
|
14
|
+
MRICreator(const Node &node)
|
15
|
+
: Creator { node.file().static_cast_as<const String>(), node.line(), node.column() } {
|
16
|
+
reset_sexp();
|
17
|
+
}
|
18
|
+
|
19
|
+
MRICreator(const MRICreator &other)
|
20
|
+
: Creator { other.file(), other.line(), other.column() } {
|
21
|
+
reset_sexp();
|
22
|
+
}
|
23
|
+
|
24
|
+
virtual ~MRICreator() { }
|
25
|
+
|
26
|
+
virtual void reset_sexp() override {
|
27
|
+
m_sexp = rb_class_new_instance(0, nullptr, Sexp);
|
28
|
+
rb_ivar_set(m_sexp, rb_intern("@file"), get_file_string(file()));
|
29
|
+
rb_ivar_set(m_sexp, rb_intern("@line"), rb_int_new(line() + 1));
|
30
|
+
rb_ivar_set(m_sexp, rb_intern("@column"), rb_int_new(column() + 1));
|
31
|
+
}
|
32
|
+
|
33
|
+
virtual void set_comments(const TM::String &comments) override {
|
34
|
+
auto string_obj = rb_utf8_str_new(comments.c_str(), comments.length());
|
35
|
+
rb_ivar_set(m_sexp, rb_intern("@comments"), string_obj);
|
36
|
+
}
|
37
|
+
|
38
|
+
virtual void set_type(const char *type) override {
|
39
|
+
rb_ary_store(m_sexp, 0, ID2SYM(rb_intern(type)));
|
40
|
+
}
|
41
|
+
|
42
|
+
virtual void append(const Node &node) override {
|
43
|
+
if (node.type() == Node::Type::Nil) {
|
44
|
+
rb_ary_push(m_sexp, Qnil);
|
45
|
+
return;
|
46
|
+
}
|
47
|
+
MRICreator creator { node };
|
48
|
+
creator.set_assignment(assignment());
|
49
|
+
node.transform(&creator);
|
50
|
+
rb_ary_push(m_sexp, creator.sexp());
|
51
|
+
}
|
52
|
+
|
53
|
+
virtual void append_array(const ArrayNode &array) override {
|
54
|
+
MRICreator creator { array };
|
55
|
+
creator.set_assignment(assignment());
|
56
|
+
array.ArrayNode::transform(&creator);
|
57
|
+
rb_ary_push(m_sexp, creator.sexp());
|
58
|
+
}
|
59
|
+
|
60
|
+
virtual void append_false() override {
|
61
|
+
rb_ary_push(m_sexp, Qfalse);
|
62
|
+
}
|
63
|
+
|
64
|
+
virtual void append_float(double number) override {
|
65
|
+
rb_ary_push(m_sexp, rb_float_new(number));
|
66
|
+
}
|
67
|
+
|
68
|
+
virtual void append_integer(long long number) override {
|
69
|
+
rb_ary_push(m_sexp, rb_int_new(number));
|
70
|
+
}
|
71
|
+
|
72
|
+
virtual void append_integer(TM::String &number) override {
|
73
|
+
auto string_obj = rb_utf8_str_new(number.c_str(), number.length());
|
74
|
+
rb_ary_push(m_sexp, rb_Integer(string_obj));
|
75
|
+
}
|
76
|
+
|
77
|
+
virtual void append_nil() override {
|
78
|
+
rb_ary_push(m_sexp, Qnil);
|
79
|
+
}
|
80
|
+
|
81
|
+
virtual void append_range(long long first, long long last, bool exclude_end) override {
|
82
|
+
rb_ary_push(m_sexp, rb_range_new(rb_int_new(first), rb_int_new(last), exclude_end ? Qtrue : Qfalse));
|
83
|
+
}
|
84
|
+
|
85
|
+
virtual void append_regexp(TM::String &pattern, int options) override {
|
86
|
+
auto encoding = pattern.contains_utf8_encoded_multibyte_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
|
87
|
+
auto regexp = rb_enc_reg_new(pattern.c_str(), pattern.size(), encoding, options);
|
88
|
+
rb_ary_push(m_sexp, regexp);
|
89
|
+
}
|
90
|
+
|
91
|
+
virtual void append_sexp(std::function<void(Creator *)> fn) override {
|
92
|
+
MRICreator creator { *this };
|
93
|
+
fn(&creator);
|
94
|
+
rb_ary_push(m_sexp, creator.sexp());
|
95
|
+
}
|
96
|
+
|
97
|
+
virtual void append_string(TM::String &string) override {
|
98
|
+
auto encoding = string.contains_seemingly_valid_utf8_encoded_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
|
99
|
+
rb_ary_push(m_sexp, rb_enc_str_new(string.c_str(), string.length(), encoding));
|
100
|
+
}
|
101
|
+
|
102
|
+
virtual void append_symbol(TM::String &name) override {
|
103
|
+
auto encoding = name.contains_utf8_encoded_multibyte_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
|
104
|
+
auto symbol = ID2SYM(rb_intern3(name.c_str(), name.size(), encoding));
|
105
|
+
rb_ary_push(m_sexp, symbol);
|
106
|
+
}
|
107
|
+
|
108
|
+
virtual void append_true() override {
|
109
|
+
rb_ary_push(m_sexp, Qtrue);
|
110
|
+
}
|
111
|
+
|
112
|
+
virtual void wrap(const char *type) override {
|
113
|
+
auto inner = m_sexp;
|
114
|
+
reset_sexp();
|
115
|
+
set_type(type);
|
116
|
+
rb_ary_push(m_sexp, inner);
|
117
|
+
}
|
118
|
+
|
119
|
+
VALUE sexp() const { return m_sexp; }
|
120
|
+
|
121
|
+
private:
|
122
|
+
VALUE m_sexp { Qnil };
|
123
|
+
|
124
|
+
static VALUE get_file_string(SharedPtr<const String> file) {
|
125
|
+
auto file_string = s_file_cache.get(*file);
|
126
|
+
if (!file_string) {
|
127
|
+
file_string = rb_str_new(file->c_str(), file->length());
|
128
|
+
// FIXME: Seems there is no way to un-register and object. :-(
|
129
|
+
rb_gc_register_mark_object(file_string);
|
130
|
+
s_file_cache.put(*file, file_string);
|
131
|
+
}
|
132
|
+
return file_string;
|
133
|
+
}
|
134
|
+
|
135
|
+
// TODO: Move this to the Parser object, pass it in, clean it up when finished with it.
|
136
|
+
// (Otherwise we leak memory if the user parses lots of different files in a long-running process.)
|
137
|
+
inline static TM::Hashmap<const String, VALUE> s_file_cache { TM::HashType::TMString };
|
138
|
+
};
|
139
|
+
}
|
@@ -0,0 +1,144 @@
|
|
1
|
+
#include "extconf.h"
|
2
|
+
#include "ruby.h"
|
3
|
+
#include "ruby/encoding.h"
|
4
|
+
#include "ruby/intern.h"
|
5
|
+
#include "stdio.h"
|
6
|
+
|
7
|
+
// this includes MUST come after
|
8
|
+
#include "mri_creator.hpp"
|
9
|
+
#include "natalie_parser/parser.hpp"
|
10
|
+
|
11
|
+
VALUE Parser;
|
12
|
+
VALUE Sexp;
|
13
|
+
|
14
|
+
extern "C" {
|
15
|
+
|
16
|
+
VALUE initialize(int argc, VALUE *argv, VALUE self) {
|
17
|
+
if (argc < 1 || argc > 2)
|
18
|
+
rb_raise(rb_eSyntaxError,
|
19
|
+
"wrong number of arguments (given %d, expected 1..2)", argc);
|
20
|
+
rb_ivar_set(self, rb_intern("@code"), argv[0]);
|
21
|
+
VALUE path;
|
22
|
+
if (argc > 1)
|
23
|
+
path = argv[1];
|
24
|
+
else
|
25
|
+
path = rb_str_new_cstr("(string)");
|
26
|
+
rb_ivar_set(self, rb_intern("@path"), path);
|
27
|
+
return self;
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE node_to_ruby(TM::SharedPtr<NatalieParser::Node> node) {
|
31
|
+
NatalieParser::MRICreator creator { node.ref() };
|
32
|
+
node->transform(&creator);
|
33
|
+
return creator.sexp();
|
34
|
+
}
|
35
|
+
|
36
|
+
VALUE parse_on_instance(VALUE self) {
|
37
|
+
VALUE code = rb_ivar_get(self, rb_intern("@code"));
|
38
|
+
VALUE path = rb_ivar_get(self, rb_intern("@path"));
|
39
|
+
auto code_string = new TM::String { StringValueCStr(code) };
|
40
|
+
auto path_string = new TM::String { StringValueCStr(path) };
|
41
|
+
auto parser = NatalieParser::Parser { code_string, path_string };
|
42
|
+
try {
|
43
|
+
auto tree = parser.tree();
|
44
|
+
VALUE ast = node_to_ruby(tree);
|
45
|
+
return ast;
|
46
|
+
} catch (NatalieParser::Parser::SyntaxError &error) {
|
47
|
+
rb_raise(rb_eSyntaxError, "%s", error.message());
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
VALUE parse(int argc, VALUE *argv, VALUE self) {
|
52
|
+
VALUE parser = rb_class_new_instance(argc, argv, Parser);
|
53
|
+
return parse_on_instance(parser);
|
54
|
+
}
|
55
|
+
|
56
|
+
VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
|
57
|
+
if (token.is_eof())
|
58
|
+
return Qnil;
|
59
|
+
try {
|
60
|
+
token.validate();
|
61
|
+
} catch (NatalieParser::Parser::SyntaxError &error) {
|
62
|
+
rb_raise(rb_eSyntaxError, "%s", error.message());
|
63
|
+
}
|
64
|
+
const char *type = token.type_value();
|
65
|
+
if (!type) abort(); // FIXME: assert no workie?
|
66
|
+
auto hash = rb_hash_new();
|
67
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("type")), ID2SYM(rb_intern(type)));
|
68
|
+
auto lit = token.literal_or_blank();
|
69
|
+
switch (token.type()) {
|
70
|
+
case NatalieParser::Token::Type::Bignum:
|
71
|
+
case NatalieParser::Token::Type::Doc:
|
72
|
+
case NatalieParser::Token::Type::String: {
|
73
|
+
auto literal = token.literal_string();
|
74
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_utf8_str_new(literal->c_str(), literal->length()));
|
75
|
+
break;
|
76
|
+
}
|
77
|
+
case NatalieParser::Token::Type::BackRef:
|
78
|
+
case NatalieParser::Token::Type::BareName:
|
79
|
+
case NatalieParser::Token::Type::ClassVariable:
|
80
|
+
case NatalieParser::Token::Type::Constant:
|
81
|
+
case NatalieParser::Token::Type::GlobalVariable:
|
82
|
+
case NatalieParser::Token::Type::InstanceVariable:
|
83
|
+
case NatalieParser::Token::Type::Symbol:
|
84
|
+
case NatalieParser::Token::Type::SymbolKey: {
|
85
|
+
auto literal = token.literal_string();
|
86
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("literal")), ID2SYM(rb_intern3(literal->c_str(), literal->size(), rb_utf8_encoding())));
|
87
|
+
break;
|
88
|
+
}
|
89
|
+
case NatalieParser::Token::Type::Fixnum:
|
90
|
+
case NatalieParser::Token::Type::NthRef:
|
91
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_int_new(token.get_fixnum()));
|
92
|
+
break;
|
93
|
+
case NatalieParser::Token::Type::Float:
|
94
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_float_new(token.get_double()));
|
95
|
+
break;
|
96
|
+
case NatalieParser::Token::Type::InterpolatedRegexpEnd:
|
97
|
+
if (token.has_literal()) {
|
98
|
+
auto options = token.literal_string();
|
99
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("options")), rb_str_new(options->c_str(), options->length()));
|
100
|
+
}
|
101
|
+
break;
|
102
|
+
default:
|
103
|
+
void();
|
104
|
+
}
|
105
|
+
if (include_location_info) {
|
106
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("line")), rb_int_new(token.line()));
|
107
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("column")), rb_int_new(token.column()));
|
108
|
+
}
|
109
|
+
return hash;
|
110
|
+
}
|
111
|
+
|
112
|
+
VALUE tokens_on_instance(VALUE self, VALUE include_location_info = Qfalse) {
|
113
|
+
VALUE code = rb_ivar_get(self, rb_intern("@code"));
|
114
|
+
VALUE path = rb_ivar_get(self, rb_intern("@path"));
|
115
|
+
auto code_string = new TM::String { StringValueCStr(code) };
|
116
|
+
auto path_string = new TM::String { StringValueCStr(path) };
|
117
|
+
auto lexer = NatalieParser::Lexer { code_string, path_string };
|
118
|
+
auto array = rb_ary_new();
|
119
|
+
auto the_tokens = lexer.tokens();
|
120
|
+
for (auto token : *the_tokens) {
|
121
|
+
auto token_value = token_to_ruby(token, RTEST(include_location_info));
|
122
|
+
if (token_value != Qnil && token_value != Qfalse)
|
123
|
+
rb_ary_push(array, token_value);
|
124
|
+
}
|
125
|
+
return array;
|
126
|
+
}
|
127
|
+
|
128
|
+
VALUE tokens(int argc, VALUE *argv, VALUE self) {
|
129
|
+
VALUE parser = rb_class_new_instance(1, argv, Parser);
|
130
|
+
VALUE include_location_info = argc > 1 ? argv[1] : Qfalse;
|
131
|
+
return tokens_on_instance(parser, include_location_info);
|
132
|
+
}
|
133
|
+
|
134
|
+
void Init_natalie_parser() {
|
135
|
+
int error;
|
136
|
+
Sexp = rb_const_get(rb_cObject, rb_intern("Sexp"));
|
137
|
+
Parser = rb_define_class("NatalieParser", rb_cObject);
|
138
|
+
rb_define_method(Parser, "initialize", initialize, -1);
|
139
|
+
rb_define_method(Parser, "parse", parse_on_instance, 0);
|
140
|
+
rb_define_method(Parser, "tokens", tokens_on_instance, 1);
|
141
|
+
rb_define_singleton_method(Parser, "parse", parse, -1);
|
142
|
+
rb_define_singleton_method(Parser, "tokens", tokens, -1);
|
143
|
+
}
|
144
|
+
}
|