natalie_parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/Dockerfile +26 -0
- data/Gemfile +10 -0
- data/LICENSE +21 -0
- data/README.md +55 -0
- data/Rakefile +242 -0
- data/ext/natalie_parser/extconf.rb +9 -0
- data/ext/natalie_parser/mri_creator.hpp +139 -0
- data/ext/natalie_parser/natalie_parser.cpp +144 -0
- data/include/natalie_parser/creator/debug_creator.hpp +113 -0
- data/include/natalie_parser/creator.hpp +108 -0
- data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
- data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
- data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
- data/include/natalie_parser/lexer.hpp +135 -0
- data/include/natalie_parser/node/alias_node.hpp +35 -0
- data/include/natalie_parser/node/arg_node.hpp +74 -0
- data/include/natalie_parser/node/array_node.hpp +34 -0
- data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
- data/include/natalie_parser/node/assignment_node.hpp +34 -0
- data/include/natalie_parser/node/back_ref_node.hpp +28 -0
- data/include/natalie_parser/node/begin_block_node.hpp +25 -0
- data/include/natalie_parser/node/begin_node.hpp +52 -0
- data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
- data/include/natalie_parser/node/bignum_node.hpp +37 -0
- data/include/natalie_parser/node/block_node.hpp +55 -0
- data/include/natalie_parser/node/block_pass_node.hpp +33 -0
- data/include/natalie_parser/node/break_node.hpp +32 -0
- data/include/natalie_parser/node/call_node.hpp +85 -0
- data/include/natalie_parser/node/case_in_node.hpp +40 -0
- data/include/natalie_parser/node/case_node.hpp +52 -0
- data/include/natalie_parser/node/case_when_node.hpp +43 -0
- data/include/natalie_parser/node/class_node.hpp +39 -0
- data/include/natalie_parser/node/colon2_node.hpp +44 -0
- data/include/natalie_parser/node/colon3_node.hpp +34 -0
- data/include/natalie_parser/node/constant_node.hpp +26 -0
- data/include/natalie_parser/node/def_node.hpp +55 -0
- data/include/natalie_parser/node/defined_node.hpp +33 -0
- data/include/natalie_parser/node/encoding_node.hpp +26 -0
- data/include/natalie_parser/node/end_block_node.hpp +25 -0
- data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
- data/include/natalie_parser/node/false_node.hpp +23 -0
- data/include/natalie_parser/node/fixnum_node.hpp +36 -0
- data/include/natalie_parser/node/float_node.hpp +36 -0
- data/include/natalie_parser/node/hash_node.hpp +34 -0
- data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
- data/include/natalie_parser/node/identifier_node.hpp +123 -0
- data/include/natalie_parser/node/if_node.hpp +43 -0
- data/include/natalie_parser/node/infix_op_node.hpp +46 -0
- data/include/natalie_parser/node/interpolated_node.hpp +33 -0
- data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
- data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
- data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
- data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
- data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
- data/include/natalie_parser/node/iter_node.hpp +45 -0
- data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
- data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
- data/include/natalie_parser/node/logical_and_node.hpp +40 -0
- data/include/natalie_parser/node/logical_or_node.hpp +40 -0
- data/include/natalie_parser/node/match_node.hpp +38 -0
- data/include/natalie_parser/node/module_node.hpp +32 -0
- data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
- data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
- data/include/natalie_parser/node/next_node.hpp +37 -0
- data/include/natalie_parser/node/nil_node.hpp +23 -0
- data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
- data/include/natalie_parser/node/node.hpp +155 -0
- data/include/natalie_parser/node/node_with_args.hpp +47 -0
- data/include/natalie_parser/node/not_match_node.hpp +35 -0
- data/include/natalie_parser/node/not_node.hpp +37 -0
- data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
- data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
- data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
- data/include/natalie_parser/node/op_assign_node.hpp +47 -0
- data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
- data/include/natalie_parser/node/pin_node.hpp +33 -0
- data/include/natalie_parser/node/range_node.hpp +52 -0
- data/include/natalie_parser/node/redo_node.hpp +20 -0
- data/include/natalie_parser/node/regexp_node.hpp +36 -0
- data/include/natalie_parser/node/retry_node.hpp +20 -0
- data/include/natalie_parser/node/return_node.hpp +34 -0
- data/include/natalie_parser/node/safe_call_node.hpp +31 -0
- data/include/natalie_parser/node/sclass_node.hpp +37 -0
- data/include/natalie_parser/node/self_node.hpp +23 -0
- data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
- data/include/natalie_parser/node/shell_node.hpp +32 -0
- data/include/natalie_parser/node/splat_node.hpp +39 -0
- data/include/natalie_parser/node/splat_value_node.hpp +32 -0
- data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
- data/include/natalie_parser/node/string_node.hpp +42 -0
- data/include/natalie_parser/node/super_node.hpp +44 -0
- data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
- data/include/natalie_parser/node/symbol_node.hpp +30 -0
- data/include/natalie_parser/node/to_array_node.hpp +33 -0
- data/include/natalie_parser/node/true_node.hpp +23 -0
- data/include/natalie_parser/node/unary_op_node.hpp +41 -0
- data/include/natalie_parser/node/undef_node.hpp +31 -0
- data/include/natalie_parser/node/until_node.hpp +21 -0
- data/include/natalie_parser/node/while_node.hpp +52 -0
- data/include/natalie_parser/node/yield_node.hpp +29 -0
- data/include/natalie_parser/node.hpp +89 -0
- data/include/natalie_parser/parser.hpp +218 -0
- data/include/natalie_parser/token.hpp +842 -0
- data/include/tm/defer.hpp +34 -0
- data/include/tm/hashmap.hpp +826 -0
- data/include/tm/macros.hpp +16 -0
- data/include/tm/optional.hpp +223 -0
- data/include/tm/owned_ptr.hpp +186 -0
- data/include/tm/recursion_guard.hpp +156 -0
- data/include/tm/shared_ptr.hpp +259 -0
- data/include/tm/string.hpp +1447 -0
- data/include/tm/tests.hpp +78 -0
- data/include/tm/vector.hpp +796 -0
- data/lib/natalie_parser/sexp.rb +36 -0
- data/lib/natalie_parser/version.rb +5 -0
- data/lib/natalie_parser.rb +3 -0
- data/natalie_parser.gemspec +23 -0
- data/src/lexer/interpolated_string_lexer.cpp +88 -0
- data/src/lexer/regexp_lexer.cpp +95 -0
- data/src/lexer/word_array_lexer.cpp +134 -0
- data/src/lexer.cpp +1703 -0
- data/src/node/alias_node.cpp +11 -0
- data/src/node/assignment_node.cpp +33 -0
- data/src/node/begin_node.cpp +29 -0
- data/src/node/begin_rescue_node.cpp +33 -0
- data/src/node/class_node.cpp +22 -0
- data/src/node/interpolated_regexp_node.cpp +19 -0
- data/src/node/interpolated_shell_node.cpp +25 -0
- data/src/node/interpolated_string_node.cpp +111 -0
- data/src/node/interpolated_symbol_node.cpp +25 -0
- data/src/node/match_node.cpp +14 -0
- data/src/node/module_node.cpp +21 -0
- data/src/node/multiple_assignment_node.cpp +37 -0
- data/src/node/node.cpp +10 -0
- data/src/node/node_with_args.cpp +35 -0
- data/src/node/op_assign_node.cpp +36 -0
- data/src/node/string_node.cpp +33 -0
- data/src/parser.cpp +2972 -0
- data/src/token.cpp +27 -0
- metadata +186 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: '048e048ede42652f8a81298a3310313f8bf9929ba5cd952a89d3269d8333b363'
|
|
4
|
+
data.tar.gz: 5c2388c8125b1444c454c0ee3a0d4a07f305cc3fd668d8e03dbc4404201a50f7
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 55932c3dc24ceeeab109a4a980ddf96babf276ced15dffb68d7c013ca5cc246d2551b59acc94f82c1ec5c0073859267ace3a264a3d8b9470465672ea5aebb59d
|
|
7
|
+
data.tar.gz: bcbaaad396877bcbf6453e8d55ad3f46dcb68ba09bcfe2f6cf0f1fa6d96966c646f530e3872d77736ef40b5818feed888d69fe33e62beeb95dcfd1c6f5115203
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 1.0.0 (2022-06-03)
|
|
4
|
+
|
|
5
|
+
### Summary
|
|
6
|
+
|
|
7
|
+
This is the initial public release. The 1.0 milestone was chosen as soon
|
|
8
|
+
as NatalieParser was useful for integration back with the upstream Natalie
|
|
9
|
+
compiler project, i.e. it could fully replace RubyParser as the parser
|
|
10
|
+
in use by Natalie.
|
|
11
|
+
|
|
12
|
+
That is not to say that NatalieParser is _complete_ -- it is merely _useful_.
|
|
13
|
+
|
|
14
|
+
These are the features known to still be missing in this release:
|
|
15
|
+
|
|
16
|
+
- [ ] Support different source encodings
|
|
17
|
+
- [ ] Support more of the Ruby 3.0 syntax
|
|
18
|
+
- [ ] Argument forwarding (`...`)
|
|
19
|
+
- [ ] Pattern matching
|
|
20
|
+
- [ ] Numbered block parameters (`_1`, `_2`, etc.)
|
|
21
|
+
- [ ] Non-ASCII identifiers
|
|
22
|
+
- [ ] Rational and Complex literals (`1r` and `2i`)
|
data/Dockerfile
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
ARG IMAGE=ruby:3.0
|
|
2
|
+
FROM $IMAGE
|
|
3
|
+
|
|
4
|
+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y -q build-essential clang
|
|
5
|
+
RUN gem install bundler --no-doc
|
|
6
|
+
|
|
7
|
+
ENV LC_ALL C.UTF-8
|
|
8
|
+
|
|
9
|
+
WORKDIR natalie_parser
|
|
10
|
+
|
|
11
|
+
COPY Gemfile /natalie_parser/
|
|
12
|
+
RUN bundle install
|
|
13
|
+
|
|
14
|
+
ARG CC=gcc
|
|
15
|
+
ENV CC=$CC
|
|
16
|
+
ARG CXX=g++
|
|
17
|
+
ENV CXX=$CXX
|
|
18
|
+
|
|
19
|
+
COPY Rakefile Rakefile
|
|
20
|
+
COPY ext ext
|
|
21
|
+
COPY lib lib
|
|
22
|
+
COPY src src
|
|
23
|
+
COPY include include
|
|
24
|
+
RUN rake
|
|
25
|
+
|
|
26
|
+
COPY test test
|
data/Gemfile
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# NOTE: This Gemfile is used for testing the project.
|
|
2
|
+
# These dependencies are not needed to consume the library as a C extension for MRI.
|
|
3
|
+
|
|
4
|
+
source 'https://rubygems.org'
|
|
5
|
+
|
|
6
|
+
gem 'minitest'
|
|
7
|
+
gem 'minitest-focus'
|
|
8
|
+
gem 'minitest-reporters'
|
|
9
|
+
gem 'ruby_parser'
|
|
10
|
+
gem 'rake'
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2022 Tim Morgan and contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Natalie Parser
|
|
2
|
+
|
|
3
|
+
[](https://github.com/natalie-lang/natalie_parser/actions?query=workflow%3ABuild+branch%3Amaster)
|
|
4
|
+
[](https://github.com/natalie-lang/natalie_parser/blob/master/LICENSE)
|
|
5
|
+
|
|
6
|
+
This is a parser for the Ruby programming language, written in C++.
|
|
7
|
+
It was extracted from the [Natalie](https://github.com/natalie-lang/natalie) project.
|
|
8
|
+
|
|
9
|
+
You can use this library directly from a C/C++ project, or you can
|
|
10
|
+
build it as a Ruby gem and use it from Ruby itself.
|
|
11
|
+
|
|
12
|
+
We are currently targeting Ruby 3.0 syntax, but that will probably
|
|
13
|
+
change over time, depending on what things we want to support and
|
|
14
|
+
what kind of help we get from the community.
|
|
15
|
+
|
|
16
|
+
NOTE: This project is still very new and there are certainly bugs.
|
|
17
|
+
See the list below for things we already know about, but expect there
|
|
18
|
+
are more we don't know about yet. **We don't recommend you use this in
|
|
19
|
+
production applications.**
|
|
20
|
+
|
|
21
|
+
## To Do
|
|
22
|
+
|
|
23
|
+
- [x] Parse the [Natalie](https://github.com/natalie-lang/natalie) compiler and standard library
|
|
24
|
+
- [x] Pass (mostly) the [RubyParser](https://github.com/seattlerb/ruby_parser) test suite
|
|
25
|
+
- [ ] Support different source encodings
|
|
26
|
+
- [ ] Support more of the Ruby 3.0 syntax
|
|
27
|
+
- [x] "Endless" method definition (`def foo = bar`)
|
|
28
|
+
- [ ] Argument forwarding (`...`)
|
|
29
|
+
- [ ] Pattern matching
|
|
30
|
+
- [ ] Numbered block parameters (`_1`, `_2`, etc.)
|
|
31
|
+
- [ ] Non-ASCII identifiers
|
|
32
|
+
- [ ] Rational and Complex literals (`1r` and `2i`)
|
|
33
|
+
|
|
34
|
+
## Development
|
|
35
|
+
|
|
36
|
+
```sh
|
|
37
|
+
rake
|
|
38
|
+
ruby -I lib:ext -r natalie_parser -e "p NatalieParser.parse('1 + 2')"
|
|
39
|
+
# => s(:block, s(:call, s(:lit, 1), :+, s(:lit, 2)))
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Running Tests
|
|
43
|
+
|
|
44
|
+
```sh
|
|
45
|
+
rake test
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Copyright & License
|
|
49
|
+
|
|
50
|
+
Natalie is copyright 2022, Tim Morgan and contributors. Natalie is licensed
|
|
51
|
+
under the MIT License; see the `LICENSE` file in this directory for the full text.
|
|
52
|
+
|
|
53
|
+
### Note about Outside Sources
|
|
54
|
+
|
|
55
|
+
The file `test/test_ruby_parser.rb` is copyright Ryan Davis and is licensed MIT.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
task default: :build
|
|
2
|
+
|
|
3
|
+
desc 'Build Natalie Parser library and MRI C extension'
|
|
4
|
+
task build: %i[
|
|
5
|
+
bundle_install
|
|
6
|
+
build_dir
|
|
7
|
+
library
|
|
8
|
+
parser_c_ext
|
|
9
|
+
write_compile_database
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
so_ext = RUBY_PLATFORM =~ /darwin/ ? 'bundle' : 'so'
|
|
13
|
+
|
|
14
|
+
desc 'Build Natalie Parser library'
|
|
15
|
+
task library: [:build_dir, "build/libnatalie_parser.a"]
|
|
16
|
+
|
|
17
|
+
desc 'Build Natalie Parser MRI C extension'
|
|
18
|
+
task parser_c_ext: [:build_dir, "ext/natalie_parser/natalie_parser.#{so_ext}"]
|
|
19
|
+
|
|
20
|
+
desc 'Remove temporary files created during build'
|
|
21
|
+
task :clean do
|
|
22
|
+
Rake::FileList[%w[
|
|
23
|
+
build/build.log
|
|
24
|
+
build/*.o
|
|
25
|
+
build/node
|
|
26
|
+
build/asan_test
|
|
27
|
+
ext/natalie_parser/*.{h,log,o}
|
|
28
|
+
]].each { |path| rm_rf path if File.exist?(path) }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
desc 'Remove all generated files'
|
|
32
|
+
task :clobber do
|
|
33
|
+
Rake::FileList[%w[
|
|
34
|
+
build
|
|
35
|
+
ext/natalie_parser/*.{so,bundle,h,log,o}
|
|
36
|
+
]].each { |path| rm_rf path if File.exist?(path) }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
task distclean: :clobber
|
|
40
|
+
|
|
41
|
+
desc 'Run the test suite'
|
|
42
|
+
task test: [:build, 'build/asan_test'] do
|
|
43
|
+
sh 'bundle exec ruby test/all.rb'
|
|
44
|
+
sh 'build/asan_test'
|
|
45
|
+
sh 'bundle exec ruby test/test_ruby_parser.rb'
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
desc 'Run test test suite when changes are made (requires entr binary)'
|
|
49
|
+
task :watch do
|
|
50
|
+
files = Rake::FileList['**/*.cpp', '**/*.hpp', '**/*.rb']
|
|
51
|
+
sh "ls #{files} | entr -c -s 'rake test'"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
desc 'Show line counts for the project'
|
|
55
|
+
task :cloc do
|
|
56
|
+
sh 'cloc include lib src test'
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
desc 'Generate tags file for development'
|
|
60
|
+
task :ctags do
|
|
61
|
+
sh 'ctags -R --exclude=.cquery_cache --exclude=ext --exclude=build --append=no .'
|
|
62
|
+
end
|
|
63
|
+
task tags: :ctags
|
|
64
|
+
|
|
65
|
+
desc 'Format C++ code with clang-format'
|
|
66
|
+
task :format do
|
|
67
|
+
sh "find include -type f -name '*.hpp' -exec clang-format -i --style=file {} +"
|
|
68
|
+
sh "find src -type f -name '*.cpp' -exec clang-format -i --style=file {} +"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
desc 'Show TODO and FIXME comments in the project'
|
|
72
|
+
task :todo do
|
|
73
|
+
sh "egrep -r 'FIXME|TODO' src include lib"
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
desc 'Run the benchmark script'
|
|
77
|
+
task benchmark: :build do
|
|
78
|
+
require_relative './test/benchmark'
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# # # # Docker Tasks (used for CI) # # # #
|
|
82
|
+
|
|
83
|
+
DOCKER_FLAGS =
|
|
84
|
+
if !ENV['CI'] && STDOUT.isatty
|
|
85
|
+
'-i -t'
|
|
86
|
+
elsif ENV['CI']
|
|
87
|
+
"-e CI=#{ENV['CI']}"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
task :docker_build do
|
|
91
|
+
sh 'docker build -t natalie-parser .'
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
task docker_bash: :docker_build do
|
|
95
|
+
sh 'docker run -it --rm --entrypoint bash natalie-parser'
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
task :docker_build_clang do
|
|
99
|
+
sh 'docker build -t natalie-parser-clang --build-arg CC=clang --build-arg CXX=clang++ .'
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
task :docker_build_ruby27 do
|
|
103
|
+
sh 'docker build -t natalie-parser-ruby27 --build-arg IMAGE="ruby:2.7" .'
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
task docker_test: %i[docker_test_gcc docker_test_clang]
|
|
107
|
+
|
|
108
|
+
task docker_test_gcc: :docker_build do
|
|
109
|
+
sh "docker run #{DOCKER_FLAGS} --rm --entrypoint rake natalie-parser test"
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
task docker_test_clang: :docker_build_clang do
|
|
113
|
+
sh "docker run #{DOCKER_FLAGS} --rm --entrypoint rake natalie-parser-clang test"
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# # # # Build Compile Database # # # #
|
|
117
|
+
|
|
118
|
+
if system('which compiledb 2>&1 >/dev/null')
|
|
119
|
+
$compiledb_out = []
|
|
120
|
+
|
|
121
|
+
def $stderr.puts(str)
|
|
122
|
+
write(str + "\n")
|
|
123
|
+
$compiledb_out << str
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
task :write_compile_database do
|
|
127
|
+
if $compiledb_out.any?
|
|
128
|
+
File.write('build/build.log', $compiledb_out.join("\n"))
|
|
129
|
+
sh 'compiledb < build/build.log'
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
else
|
|
133
|
+
task :write_compile_database do
|
|
134
|
+
# noop
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# # # # Internal Tasks and Rules # # # #
|
|
139
|
+
|
|
140
|
+
STANDARD = 'c++17'
|
|
141
|
+
HEADERS = Rake::FileList['include/**/{*.h,*.hpp}']
|
|
142
|
+
SOURCES = Rake::FileList['src/**/*.{c,cpp}']
|
|
143
|
+
OBJECT_FILES = SOURCES.sub('src/', 'build/').pathmap('%p.o')
|
|
144
|
+
|
|
145
|
+
require 'tempfile'
|
|
146
|
+
|
|
147
|
+
task :build_dir do
|
|
148
|
+
mkdir_p 'build/lexer' unless File.exist?('build/lexer')
|
|
149
|
+
mkdir_p 'build/node' unless File.exist?('build/node')
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
rule '.cpp.o' => ['src/%n'] + HEADERS do |t|
|
|
153
|
+
sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
rule %r{lexer/.*\.cpp\.o$} => ['src/lexer/%n'] + HEADERS do |t|
|
|
157
|
+
sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
rule %r{node/.*\.cpp\.o$} => ['src/node/%n'] + HEADERS do |t|
|
|
161
|
+
sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -c -o #{t.name} #{t.source}"
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
multitask objects: OBJECT_FILES
|
|
165
|
+
|
|
166
|
+
file 'build/libnatalie_parser.a' => HEADERS + [:objects] do |t|
|
|
167
|
+
sh "ar rcs #{t.name} #{OBJECT_FILES}"
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
file "ext/natalie_parser/natalie_parser.#{so_ext}" => [
|
|
171
|
+
'ext/natalie_parser/natalie_parser.cpp',
|
|
172
|
+
'ext/natalie_parser/mri_creator.hpp',
|
|
173
|
+
] + SOURCES + HEADERS do |t|
|
|
174
|
+
build_dir = File.expand_path('ext/natalie_parser', __dir__)
|
|
175
|
+
Rake::FileList['ext/natalie_parser/*.o'].each { |path| rm path }
|
|
176
|
+
rm_rf 'ext/natalie_parser/natalie_parser.so'
|
|
177
|
+
sh <<-SH
|
|
178
|
+
cd #{build_dir} && \
|
|
179
|
+
ruby extconf.rb && \
|
|
180
|
+
make -j
|
|
181
|
+
SH
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
file 'build/fragments.hpp' => ['test/parser_test.rb', 'test/support/extract_parser_test_fragments.rb'] do
|
|
185
|
+
sh 'ruby -I lib:ext test/support/extract_parser_test_fragments.rb'
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
file 'build/asan_test' => ['test/asan_test.cpp', 'build/fragments.hpp', :library] do |t|
|
|
189
|
+
sh "#{cxx} #{cxx_flags.join(' ')} -std=#{STANDARD} -I build -I include -o #{t.name} #{t.source} -L build -lnatalie_parser"
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
task :bundle_install do
|
|
193
|
+
sh 'bundle check || bundle install'
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def cc
|
|
197
|
+
@cc ||=
|
|
198
|
+
if ENV['CC']
|
|
199
|
+
ENV['CC']
|
|
200
|
+
elsif system('which ccache 2>&1 > /dev/null')
|
|
201
|
+
'ccache cc'
|
|
202
|
+
else
|
|
203
|
+
'cc'
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def cxx
|
|
208
|
+
@cxx ||=
|
|
209
|
+
if ENV['CXX']
|
|
210
|
+
ENV['CXX']
|
|
211
|
+
elsif system('which ccache 2>&1 > /dev/null')
|
|
212
|
+
'ccache c++'
|
|
213
|
+
else
|
|
214
|
+
'c++'
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def cxx_flags
|
|
219
|
+
base_flags =
|
|
220
|
+
case ENV['BUILD']
|
|
221
|
+
when 'release'
|
|
222
|
+
%w[
|
|
223
|
+
-fPIC
|
|
224
|
+
-g
|
|
225
|
+
-O2
|
|
226
|
+
]
|
|
227
|
+
else
|
|
228
|
+
%w[
|
|
229
|
+
-fPIC
|
|
230
|
+
-g
|
|
231
|
+
-Wall
|
|
232
|
+
-Wextra
|
|
233
|
+
-Werror
|
|
234
|
+
-fsanitize=address
|
|
235
|
+
]
|
|
236
|
+
end
|
|
237
|
+
base_flags + include_paths.map { |path| "-I #{path}" }
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def include_paths
|
|
241
|
+
[File.expand_path('include', __dir__)]
|
|
242
|
+
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
require 'mkmf'
|
|
2
|
+
$CXXFLAGS += ' -g -std=c++17'
|
|
3
|
+
$INCFLAGS += ' -I ../../include'
|
|
4
|
+
$srcs = Dir['../../src/**/*.cpp', 'natalie_parser.cpp']
|
|
5
|
+
$VPATH << "$(srcdir)/../../src"
|
|
6
|
+
$VPATH << "$(srcdir)/../../src/lexer"
|
|
7
|
+
$VPATH << "$(srcdir)/../../src/node"
|
|
8
|
+
create_header
|
|
9
|
+
create_makefile 'natalie_parser'
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
#include "ruby.h"
|
|
2
|
+
#include "ruby/encoding.h"
|
|
3
|
+
#include "ruby/intern.h"
|
|
4
|
+
|
|
5
|
+
#include "natalie_parser/creator.hpp"
|
|
6
|
+
#include "natalie_parser/node.hpp"
|
|
7
|
+
|
|
8
|
+
extern VALUE Sexp;
|
|
9
|
+
|
|
10
|
+
namespace NatalieParser {
|
|
11
|
+
|
|
12
|
+
class MRICreator : public Creator {
|
|
13
|
+
public:
|
|
14
|
+
MRICreator(const Node &node)
|
|
15
|
+
: Creator { node.file().static_cast_as<const String>(), node.line(), node.column() } {
|
|
16
|
+
reset_sexp();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
MRICreator(const MRICreator &other)
|
|
20
|
+
: Creator { other.file(), other.line(), other.column() } {
|
|
21
|
+
reset_sexp();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
virtual ~MRICreator() { }
|
|
25
|
+
|
|
26
|
+
virtual void reset_sexp() override {
|
|
27
|
+
m_sexp = rb_class_new_instance(0, nullptr, Sexp);
|
|
28
|
+
rb_ivar_set(m_sexp, rb_intern("@file"), get_file_string(file()));
|
|
29
|
+
rb_ivar_set(m_sexp, rb_intern("@line"), rb_int_new(line() + 1));
|
|
30
|
+
rb_ivar_set(m_sexp, rb_intern("@column"), rb_int_new(column() + 1));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
virtual void set_comments(const TM::String &comments) override {
|
|
34
|
+
auto string_obj = rb_utf8_str_new(comments.c_str(), comments.length());
|
|
35
|
+
rb_ivar_set(m_sexp, rb_intern("@comments"), string_obj);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
virtual void set_type(const char *type) override {
|
|
39
|
+
rb_ary_store(m_sexp, 0, ID2SYM(rb_intern(type)));
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
virtual void append(const Node &node) override {
|
|
43
|
+
if (node.type() == Node::Type::Nil) {
|
|
44
|
+
rb_ary_push(m_sexp, Qnil);
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
MRICreator creator { node };
|
|
48
|
+
creator.set_assignment(assignment());
|
|
49
|
+
node.transform(&creator);
|
|
50
|
+
rb_ary_push(m_sexp, creator.sexp());
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
virtual void append_array(const ArrayNode &array) override {
|
|
54
|
+
MRICreator creator { array };
|
|
55
|
+
creator.set_assignment(assignment());
|
|
56
|
+
array.ArrayNode::transform(&creator);
|
|
57
|
+
rb_ary_push(m_sexp, creator.sexp());
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
virtual void append_false() override {
|
|
61
|
+
rb_ary_push(m_sexp, Qfalse);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
virtual void append_float(double number) override {
|
|
65
|
+
rb_ary_push(m_sexp, rb_float_new(number));
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
virtual void append_integer(long long number) override {
|
|
69
|
+
rb_ary_push(m_sexp, rb_int_new(number));
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
virtual void append_integer(TM::String &number) override {
|
|
73
|
+
auto string_obj = rb_utf8_str_new(number.c_str(), number.length());
|
|
74
|
+
rb_ary_push(m_sexp, rb_Integer(string_obj));
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
virtual void append_nil() override {
|
|
78
|
+
rb_ary_push(m_sexp, Qnil);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
virtual void append_range(long long first, long long last, bool exclude_end) override {
|
|
82
|
+
rb_ary_push(m_sexp, rb_range_new(rb_int_new(first), rb_int_new(last), exclude_end ? Qtrue : Qfalse));
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
virtual void append_regexp(TM::String &pattern, int options) override {
|
|
86
|
+
auto encoding = pattern.contains_utf8_encoded_multibyte_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
|
|
87
|
+
auto regexp = rb_enc_reg_new(pattern.c_str(), pattern.size(), encoding, options);
|
|
88
|
+
rb_ary_push(m_sexp, regexp);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
virtual void append_sexp(std::function<void(Creator *)> fn) override {
|
|
92
|
+
MRICreator creator { *this };
|
|
93
|
+
fn(&creator);
|
|
94
|
+
rb_ary_push(m_sexp, creator.sexp());
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
virtual void append_string(TM::String &string) override {
|
|
98
|
+
auto encoding = string.contains_seemingly_valid_utf8_encoded_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
|
|
99
|
+
rb_ary_push(m_sexp, rb_enc_str_new(string.c_str(), string.length(), encoding));
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
virtual void append_symbol(TM::String &name) override {
|
|
103
|
+
auto encoding = name.contains_utf8_encoded_multibyte_characters() ? rb_utf8_encoding() : rb_ascii8bit_encoding();
|
|
104
|
+
auto symbol = ID2SYM(rb_intern3(name.c_str(), name.size(), encoding));
|
|
105
|
+
rb_ary_push(m_sexp, symbol);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
virtual void append_true() override {
|
|
109
|
+
rb_ary_push(m_sexp, Qtrue);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
virtual void wrap(const char *type) override {
|
|
113
|
+
auto inner = m_sexp;
|
|
114
|
+
reset_sexp();
|
|
115
|
+
set_type(type);
|
|
116
|
+
rb_ary_push(m_sexp, inner);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
VALUE sexp() const { return m_sexp; }
|
|
120
|
+
|
|
121
|
+
private:
|
|
122
|
+
VALUE m_sexp { Qnil };
|
|
123
|
+
|
|
124
|
+
static VALUE get_file_string(SharedPtr<const String> file) {
|
|
125
|
+
auto file_string = s_file_cache.get(*file);
|
|
126
|
+
if (!file_string) {
|
|
127
|
+
file_string = rb_str_new(file->c_str(), file->length());
|
|
128
|
+
// FIXME: Seems there is no way to un-register and object. :-(
|
|
129
|
+
rb_gc_register_mark_object(file_string);
|
|
130
|
+
s_file_cache.put(*file, file_string);
|
|
131
|
+
}
|
|
132
|
+
return file_string;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// TODO: Move this to the Parser object, pass it in, clean it up when finished with it.
|
|
136
|
+
// (Otherwise we leak memory if the user parses lots of different files in a long-running process.)
|
|
137
|
+
inline static TM::Hashmap<const String, VALUE> s_file_cache { TM::HashType::TMString };
|
|
138
|
+
};
|
|
139
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#include "extconf.h"
|
|
2
|
+
#include "ruby.h"
|
|
3
|
+
#include "ruby/encoding.h"
|
|
4
|
+
#include "ruby/intern.h"
|
|
5
|
+
#include "stdio.h"
|
|
6
|
+
|
|
7
|
+
// this includes MUST come after
|
|
8
|
+
#include "mri_creator.hpp"
|
|
9
|
+
#include "natalie_parser/parser.hpp"
|
|
10
|
+
|
|
11
|
+
VALUE Parser;
|
|
12
|
+
VALUE Sexp;
|
|
13
|
+
|
|
14
|
+
extern "C" {
|
|
15
|
+
|
|
16
|
+
VALUE initialize(int argc, VALUE *argv, VALUE self) {
|
|
17
|
+
if (argc < 1 || argc > 2)
|
|
18
|
+
rb_raise(rb_eSyntaxError,
|
|
19
|
+
"wrong number of arguments (given %d, expected 1..2)", argc);
|
|
20
|
+
rb_ivar_set(self, rb_intern("@code"), argv[0]);
|
|
21
|
+
VALUE path;
|
|
22
|
+
if (argc > 1)
|
|
23
|
+
path = argv[1];
|
|
24
|
+
else
|
|
25
|
+
path = rb_str_new_cstr("(string)");
|
|
26
|
+
rb_ivar_set(self, rb_intern("@path"), path);
|
|
27
|
+
return self;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
VALUE node_to_ruby(TM::SharedPtr<NatalieParser::Node> node) {
|
|
31
|
+
NatalieParser::MRICreator creator { node.ref() };
|
|
32
|
+
node->transform(&creator);
|
|
33
|
+
return creator.sexp();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
VALUE parse_on_instance(VALUE self) {
|
|
37
|
+
VALUE code = rb_ivar_get(self, rb_intern("@code"));
|
|
38
|
+
VALUE path = rb_ivar_get(self, rb_intern("@path"));
|
|
39
|
+
auto code_string = new TM::String { StringValueCStr(code) };
|
|
40
|
+
auto path_string = new TM::String { StringValueCStr(path) };
|
|
41
|
+
auto parser = NatalieParser::Parser { code_string, path_string };
|
|
42
|
+
try {
|
|
43
|
+
auto tree = parser.tree();
|
|
44
|
+
VALUE ast = node_to_ruby(tree);
|
|
45
|
+
return ast;
|
|
46
|
+
} catch (NatalieParser::Parser::SyntaxError &error) {
|
|
47
|
+
rb_raise(rb_eSyntaxError, "%s", error.message());
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
VALUE parse(int argc, VALUE *argv, VALUE self) {
|
|
52
|
+
VALUE parser = rb_class_new_instance(argc, argv, Parser);
|
|
53
|
+
return parse_on_instance(parser);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
|
|
57
|
+
if (token.is_eof())
|
|
58
|
+
return Qnil;
|
|
59
|
+
try {
|
|
60
|
+
token.validate();
|
|
61
|
+
} catch (NatalieParser::Parser::SyntaxError &error) {
|
|
62
|
+
rb_raise(rb_eSyntaxError, "%s", error.message());
|
|
63
|
+
}
|
|
64
|
+
const char *type = token.type_value();
|
|
65
|
+
if (!type) abort(); // FIXME: assert no workie?
|
|
66
|
+
auto hash = rb_hash_new();
|
|
67
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("type")), ID2SYM(rb_intern(type)));
|
|
68
|
+
auto lit = token.literal_or_blank();
|
|
69
|
+
switch (token.type()) {
|
|
70
|
+
case NatalieParser::Token::Type::Bignum:
|
|
71
|
+
case NatalieParser::Token::Type::Doc:
|
|
72
|
+
case NatalieParser::Token::Type::String: {
|
|
73
|
+
auto literal = token.literal_string();
|
|
74
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_utf8_str_new(literal->c_str(), literal->length()));
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
case NatalieParser::Token::Type::BackRef:
|
|
78
|
+
case NatalieParser::Token::Type::BareName:
|
|
79
|
+
case NatalieParser::Token::Type::ClassVariable:
|
|
80
|
+
case NatalieParser::Token::Type::Constant:
|
|
81
|
+
case NatalieParser::Token::Type::GlobalVariable:
|
|
82
|
+
case NatalieParser::Token::Type::InstanceVariable:
|
|
83
|
+
case NatalieParser::Token::Type::Symbol:
|
|
84
|
+
case NatalieParser::Token::Type::SymbolKey: {
|
|
85
|
+
auto literal = token.literal_string();
|
|
86
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("literal")), ID2SYM(rb_intern3(literal->c_str(), literal->size(), rb_utf8_encoding())));
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
case NatalieParser::Token::Type::Fixnum:
|
|
90
|
+
case NatalieParser::Token::Type::NthRef:
|
|
91
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_int_new(token.get_fixnum()));
|
|
92
|
+
break;
|
|
93
|
+
case NatalieParser::Token::Type::Float:
|
|
94
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("literal")), rb_float_new(token.get_double()));
|
|
95
|
+
break;
|
|
96
|
+
case NatalieParser::Token::Type::InterpolatedRegexpEnd:
|
|
97
|
+
if (token.has_literal()) {
|
|
98
|
+
auto options = token.literal_string();
|
|
99
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("options")), rb_str_new(options->c_str(), options->length()));
|
|
100
|
+
}
|
|
101
|
+
break;
|
|
102
|
+
default:
|
|
103
|
+
void();
|
|
104
|
+
}
|
|
105
|
+
if (include_location_info) {
|
|
106
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("line")), rb_int_new(token.line()));
|
|
107
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("column")), rb_int_new(token.column()));
|
|
108
|
+
}
|
|
109
|
+
return hash;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
VALUE tokens_on_instance(VALUE self, VALUE include_location_info = Qfalse) {
|
|
113
|
+
VALUE code = rb_ivar_get(self, rb_intern("@code"));
|
|
114
|
+
VALUE path = rb_ivar_get(self, rb_intern("@path"));
|
|
115
|
+
auto code_string = new TM::String { StringValueCStr(code) };
|
|
116
|
+
auto path_string = new TM::String { StringValueCStr(path) };
|
|
117
|
+
auto lexer = NatalieParser::Lexer { code_string, path_string };
|
|
118
|
+
auto array = rb_ary_new();
|
|
119
|
+
auto the_tokens = lexer.tokens();
|
|
120
|
+
for (auto token : *the_tokens) {
|
|
121
|
+
auto token_value = token_to_ruby(token, RTEST(include_location_info));
|
|
122
|
+
if (token_value != Qnil && token_value != Qfalse)
|
|
123
|
+
rb_ary_push(array, token_value);
|
|
124
|
+
}
|
|
125
|
+
return array;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
VALUE tokens(int argc, VALUE *argv, VALUE self) {
|
|
129
|
+
VALUE parser = rb_class_new_instance(1, argv, Parser);
|
|
130
|
+
VALUE include_location_info = argc > 1 ? argv[1] : Qfalse;
|
|
131
|
+
return tokens_on_instance(parser, include_location_info);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
void Init_natalie_parser() {
|
|
135
|
+
int error;
|
|
136
|
+
Sexp = rb_const_get(rb_cObject, rb_intern("Sexp"));
|
|
137
|
+
Parser = rb_define_class("NatalieParser", rb_cObject);
|
|
138
|
+
rb_define_method(Parser, "initialize", initialize, -1);
|
|
139
|
+
rb_define_method(Parser, "parse", parse_on_instance, 0);
|
|
140
|
+
rb_define_method(Parser, "tokens", tokens_on_instance, 1);
|
|
141
|
+
rb_define_singleton_method(Parser, "parse", parse, -1);
|
|
142
|
+
rb_define_singleton_method(Parser, "tokens", tokens, -1);
|
|
143
|
+
}
|
|
144
|
+
}
|