@coana-tech/cli 14.12.76 → 14.12.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.mjs +1 -1
- package/package.json +1 -1
- package/reachability-analyzers-cli.mjs +1 -2
- package/repos/coana-tech/alucard/alucard.jar +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/cli.mjs +24669 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/LICENSE.md +9 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/README.md +95 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/common.gypi +21 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/except.gypi +25 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/index.js +14 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/napi-inl.deprecated.h +186 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/napi-inl.h +6941 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/napi.h +3295 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/node_addon_api.gyp +42 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/node_api.gyp +9 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/noexcept.gypi +26 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/nothing.c +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/package-support.json +21 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/package.json +480 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/tools/README.md +73 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/tools/check-napi.js +99 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/tools/clang-format.js +71 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-addon-api/tools/conversion.js +301 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-gyp-build/LICENSE +21 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-gyp-build/README.md +58 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-gyp-build/SECURITY.md +5 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-gyp-build/bin.js +84 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-gyp-build/build-test.js +19 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-gyp-build/index.js +6 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-gyp-build/node-gyp-build.js +207 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-gyp-build/optional.js +7 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/node-gyp-build/package.json +43 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/LICENSE +21 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/README.md +128 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/binding.gyp +80 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/index.js +919 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/node_modules/.bin/node-gyp-build +16 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/node_modules/.bin/node-gyp-build-optional +16 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/node_modules/.bin/node-gyp-build-test +16 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/package.json +76 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/prebuilds/darwin-arm64/tree-sitter.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/prebuilds/darwin-x64/tree-sitter.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/prebuilds/linux-arm64/tree-sitter.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/prebuilds/linux-x64/tree-sitter.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/prebuilds/win32-arm64/tree-sitter.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/prebuilds/win32-x64/tree-sitter.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/addon_data.h +47 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/binding.cc +35 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/conversions.cc +140 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/conversions.h +22 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/language.cc +106 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/language.h +17 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/logger.cc +70 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/logger.h +19 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/lookaheaditerator.cc +122 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/lookaheaditerator.h +33 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/node.cc +1088 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/node.h +30 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/parser.cc +306 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/parser.h +35 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/query.cc +397 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/query.h +40 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/tree.cc +316 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/tree.h +45 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/tree_cursor.cc +213 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/src/tree_cursor.h +52 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/tree-sitter.d.ts +1042 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/include/tree_sitter/api.h +1318 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/alloc.c +48 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/alloc.h +41 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/array.h +291 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/atomic.h +68 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/clock.h +146 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/error_costs.h +11 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/get_changed_ranges.c +501 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/get_changed_ranges.h +36 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/host.h +21 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/language.c +223 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/language.h +297 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/length.h +52 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/lexer.c +435 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/lexer.h +49 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/lib.c +12 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/node.c +920 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/parser.c +2165 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/parser.h +266 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/point.h +62 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/portable/endian.h +206 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/query.c +4187 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/reduce_action.h +34 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/reusable_node.h +95 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/stack.c +899 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/stack.h +130 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/subtree.c +1066 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/subtree.h +399 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/tree.c +170 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/tree.h +31 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/tree_cursor.c +712 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/tree_cursor.h +48 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/ts_assert.h +11 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/ICU_SHA +1 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/LICENSE +414 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/README.md +29 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/ptypes.h +1 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/umachine.h +448 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/urename.h +1 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/utf.h +1 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/utf16.h +733 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/utf8.h +881 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode.h +83 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/wasm/stdlib-symbols.txt +24 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/wasm/stdlib.c +109 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/wasm/wasm-stdlib.h +1302 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/wasm_store.c +1859 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter/vendor/tree-sitter/lib/src/wasm_store.h +31 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/LICENSE +22 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/README.md +21 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/binding.gyp +30 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/bindings/node/binding.cc +20 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/bindings/node/binding_test.js +9 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/bindings/node/index.d.ts +28 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/bindings/node/index.js +11 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/grammar.js +1281 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/node_modules/.bin/node-gyp-build +16 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/node_modules/.bin/node-gyp-build-optional +16 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/node_modules/.bin/node-gyp-build-test +16 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/package.json +77 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/prebuilds/darwin-arm64/tree-sitter-ruby.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/prebuilds/darwin-x64/tree-sitter-ruby.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/prebuilds/linux-arm64/tree-sitter-ruby.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/prebuilds/linux-x64/tree-sitter-ruby.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/prebuilds/win32-arm64/tree-sitter-ruby.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/prebuilds/win32-x64/tree-sitter-ruby.node +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/queries/highlights.scm +154 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/queries/locals.scm +27 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/queries/tags.scm +64 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/src/grammar.json +8364 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/src/node-types.json +4108 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/src/parser.c +471247 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/src/scanner.c +1107 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/src/tree_sitter/alloc.h +54 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/src/tree_sitter/array.h +290 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/src/tree_sitter/parser.h +266 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/tree-sitter-ruby.wasm +0 -0
- package/repos/coana-tech/callgraph-reachability-analyzers/packages/cli/dist/bundle/node_modules/tree-sitter-ruby/tree-sitter.json +46 -0
- package/repos/coana-tech/goana/bin/goana-darwin-amd64.gz +0 -0
- package/repos/coana-tech/goana/bin/goana-darwin-arm64.gz +0 -0
- package/repos/coana-tech/goana/bin/goana-linux-amd64.gz +0 -0
- package/repos/coana-tech/goana/bin/goana-linux-arm64.gz +0 -0
|
@@ -0,0 +1,4187 @@
|
|
|
1
|
+
#include "tree_sitter/api.h"
|
|
2
|
+
#include "./alloc.h"
|
|
3
|
+
#include "./array.h"
|
|
4
|
+
#include "./clock.h"
|
|
5
|
+
#include "./language.h"
|
|
6
|
+
#include "./point.h"
|
|
7
|
+
#include "./tree_cursor.h"
|
|
8
|
+
#include "./unicode.h"
|
|
9
|
+
#include <wctype.h>
|
|
10
|
+
|
|
11
|
+
// #define DEBUG_ANALYZE_QUERY
|
|
12
|
+
// #define DEBUG_EXECUTE_QUERY
|
|
13
|
+
|
|
14
|
+
#define MAX_STEP_CAPTURE_COUNT 3
|
|
15
|
+
#define MAX_NEGATED_FIELD_COUNT 8
|
|
16
|
+
#define MAX_STATE_PREDECESSOR_COUNT 256
|
|
17
|
+
#define MAX_ANALYSIS_STATE_DEPTH 8
|
|
18
|
+
#define MAX_ANALYSIS_ITERATION_COUNT 256
|
|
19
|
+
|
|
20
|
+
/*
|
|
21
|
+
* Stream - A sequence of unicode characters derived from a UTF8 string.
|
|
22
|
+
* This struct is used in parsing queries from S-expressions.
|
|
23
|
+
*/
|
|
24
|
+
typedef struct {
|
|
25
|
+
const char *input;
|
|
26
|
+
const char *start;
|
|
27
|
+
const char *end;
|
|
28
|
+
int32_t next;
|
|
29
|
+
uint8_t next_size;
|
|
30
|
+
} Stream;
|
|
31
|
+
|
|
32
|
+
/*
|
|
33
|
+
* QueryStep - A step in the process of matching a query. Each node within
|
|
34
|
+
* a query S-expression corresponds to one of these steps. An entire pattern
|
|
35
|
+
* is represented as a sequence of these steps. The basic properties of a
|
|
36
|
+
* node are represented by these fields:
|
|
37
|
+
* - `symbol` - The grammar symbol to match. A zero value represents the
|
|
38
|
+
* wildcard symbol, '_'.
|
|
39
|
+
* - `field` - The field name to match. A zero value means that a field name
|
|
40
|
+
* was not specified.
|
|
41
|
+
* - `capture_ids` - An array of integers representing the names of captures
|
|
42
|
+
* associated with this node in the pattern, terminated by a `NONE` value.
|
|
43
|
+
* - `depth` - The depth where this node occurs in the pattern. The root node
|
|
44
|
+
* of the pattern has depth zero.
|
|
45
|
+
* - `negated_field_list_id` - An id representing a set of fields that must
|
|
46
|
+
* not be present on a node matching this step.
|
|
47
|
+
*
|
|
48
|
+
* Steps have some additional fields in order to handle the `.` (or "anchor") operator,
|
|
49
|
+
* which forbids additional child nodes:
|
|
50
|
+
* - `is_immediate` - Indicates that the node matching this step cannot be preceded
|
|
51
|
+
* by other sibling nodes that weren't specified in the pattern.
|
|
52
|
+
* - `is_last_child` - Indicates that the node matching this step cannot have any
|
|
53
|
+
* subsequent named siblings.
|
|
54
|
+
*
|
|
55
|
+
* For simple patterns, steps are matched in sequential order. But in order to
|
|
56
|
+
* handle alternative/repeated/optional sub-patterns, query steps are not always
|
|
57
|
+
* structured as a linear sequence; they sometimes need to split and merge. This
|
|
58
|
+
* is done using the following fields:
|
|
59
|
+
* - `alternative_index` - The index of a different query step that serves as
|
|
60
|
+
* an alternative to this step. A `NONE` value represents no alternative.
|
|
61
|
+
* When a query state reaches a step with an alternative index, the state
|
|
62
|
+
* is duplicated, with one copy remaining at the original step, and one copy
|
|
63
|
+
* moving to the alternative step. The alternative may have its own alternative
|
|
64
|
+
* step, so this splitting is an iterative process.
|
|
65
|
+
* - `is_dead_end` - Indicates that this state cannot be passed directly, and
|
|
66
|
+
* exists only in order to redirect to an alternative index, with no splitting.
|
|
67
|
+
* - `is_pass_through` - Indicates that state has no matching logic of its own,
|
|
68
|
+
* and exists only to split a state. One copy of the state advances immediately
|
|
69
|
+
* to the next step, and one moves to the alternative step.
|
|
70
|
+
* - `alternative_is_immediate` - Indicates that this step's alternative step
|
|
71
|
+
* should be treated as if `is_immediate` is true.
|
|
72
|
+
*
|
|
73
|
+
* Steps also store some derived state that summarizes how they relate to other
|
|
74
|
+
* steps within the same pattern. This is used to optimize the matching process:
|
|
75
|
+
* - `contains_captures` - Indicates that this step or one of its child steps
|
|
76
|
+
* has a non-empty `capture_ids` list.
|
|
77
|
+
* - `parent_pattern_guaranteed` - Indicates that if this step is reached, then
|
|
78
|
+
* it and all of its subsequent sibling steps within the same parent pattern
|
|
79
|
+
* are guaranteed to match.
|
|
80
|
+
* - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but
|
|
81
|
+
* for the entire top-level pattern. When iterating through a query's
|
|
82
|
+
* captures using `ts_query_cursor_next_capture`, this field is used to
|
|
83
|
+
* detect that a capture can safely be returned from a match that has not
|
|
84
|
+
* even completed yet.
|
|
85
|
+
*/
|
|
86
|
+
typedef struct {
|
|
87
|
+
TSSymbol symbol;
|
|
88
|
+
TSSymbol supertype_symbol;
|
|
89
|
+
TSFieldId field;
|
|
90
|
+
uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
|
|
91
|
+
uint16_t depth;
|
|
92
|
+
uint16_t alternative_index;
|
|
93
|
+
uint16_t negated_field_list_id;
|
|
94
|
+
bool is_named: 1;
|
|
95
|
+
bool is_immediate: 1;
|
|
96
|
+
bool is_last_child: 1;
|
|
97
|
+
bool is_pass_through: 1;
|
|
98
|
+
bool is_dead_end: 1;
|
|
99
|
+
bool alternative_is_immediate: 1;
|
|
100
|
+
bool contains_captures: 1;
|
|
101
|
+
bool root_pattern_guaranteed: 1;
|
|
102
|
+
bool parent_pattern_guaranteed: 1;
|
|
103
|
+
} QueryStep;
|
|
104
|
+
|
|
105
|
+
/*
|
|
106
|
+
* Slice - A slice of an external array. Within a query, capture names,
|
|
107
|
+
* literal string values, and predicate step information are stored in three
|
|
108
|
+
* contiguous arrays. Individual captures, string values, and predicates are
|
|
109
|
+
* represented as slices of these three arrays.
|
|
110
|
+
*/
|
|
111
|
+
typedef struct {
|
|
112
|
+
uint32_t offset;
|
|
113
|
+
uint32_t length;
|
|
114
|
+
} Slice;
|
|
115
|
+
|
|
116
|
+
/*
|
|
117
|
+
* SymbolTable - a two-way mapping of strings to ids.
|
|
118
|
+
*/
|
|
119
|
+
typedef struct {
|
|
120
|
+
Array(char) characters;
|
|
121
|
+
Array(Slice) slices;
|
|
122
|
+
} SymbolTable;
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* CaptureQuantififers - a data structure holding the quantifiers of pattern captures.
|
|
126
|
+
*/
|
|
127
|
+
typedef Array(uint8_t) CaptureQuantifiers;
|
|
128
|
+
|
|
129
|
+
/*
|
|
130
|
+
* PatternEntry - Information about the starting point for matching a particular
|
|
131
|
+
* pattern. These entries are stored in a 'pattern map' - a sorted array that
|
|
132
|
+
* makes it possible to efficiently lookup patterns based on the symbol for their
|
|
133
|
+
* first step. The entry consists of the following fields:
|
|
134
|
+
* - `pattern_index` - the index of the pattern within the query
|
|
135
|
+
* - `step_index` - the index of the pattern's first step in the shared `steps` array
|
|
136
|
+
* - `is_rooted` - whether or not the pattern has a single root node. This property
|
|
137
|
+
* affects decisions about whether or not to start the pattern for nodes outside
|
|
138
|
+
* of a QueryCursor's range restriction.
|
|
139
|
+
*/
|
|
140
|
+
typedef struct {
|
|
141
|
+
uint16_t step_index;
|
|
142
|
+
uint16_t pattern_index;
|
|
143
|
+
bool is_rooted;
|
|
144
|
+
} PatternEntry;
|
|
145
|
+
|
|
146
|
+
typedef struct {
|
|
147
|
+
Slice steps;
|
|
148
|
+
Slice predicate_steps;
|
|
149
|
+
uint32_t start_byte;
|
|
150
|
+
uint32_t end_byte;
|
|
151
|
+
bool is_non_local;
|
|
152
|
+
} QueryPattern;
|
|
153
|
+
|
|
154
|
+
typedef struct {
|
|
155
|
+
uint32_t byte_offset;
|
|
156
|
+
uint16_t step_index;
|
|
157
|
+
} StepOffset;
|
|
158
|
+
|
|
159
|
+
/*
|
|
160
|
+
* QueryState - The state of an in-progress match of a particular pattern
|
|
161
|
+
* in a query. While executing, a `TSQueryCursor` must keep track of a number
|
|
162
|
+
* of possible in-progress matches. Each of those possible matches is
|
|
163
|
+
* represented as one of these states. Fields:
|
|
164
|
+
* - `id` - A numeric id that is exposed to the public API. This allows the
|
|
165
|
+
* caller to remove a given match, preventing any more of its captures
|
|
166
|
+
* from being returned.
|
|
167
|
+
* - `start_depth` - The depth in the tree where the first step of the state's
|
|
168
|
+
* pattern was matched.
|
|
169
|
+
* - `pattern_index` - The pattern that the state is matching.
|
|
170
|
+
* - `consumed_capture_count` - The number of captures from this match that
|
|
171
|
+
* have already been returned.
|
|
172
|
+
* - `capture_list_id` - A numeric id that can be used to retrieve the state's
|
|
173
|
+
* list of captures from the `CaptureListPool`.
|
|
174
|
+
* - `seeking_immediate_match` - A flag that indicates that the state's next
|
|
175
|
+
* step must be matched by the very next sibling. This is used when
|
|
176
|
+
* processing repetitions.
|
|
177
|
+
* - `has_in_progress_alternatives` - A flag that indicates that there is are
|
|
178
|
+
* other states that have the same captures as this state, but are at
|
|
179
|
+
* different steps in their pattern. This means that in order to obey the
|
|
180
|
+
* 'longest-match' rule, this state should not be returned as a match until
|
|
181
|
+
* it is clear that there can be no other alternative match with more captures.
|
|
182
|
+
*/
|
|
183
|
+
typedef struct {
|
|
184
|
+
uint32_t id;
|
|
185
|
+
uint32_t capture_list_id;
|
|
186
|
+
uint16_t start_depth;
|
|
187
|
+
uint16_t step_index;
|
|
188
|
+
uint16_t pattern_index;
|
|
189
|
+
uint16_t consumed_capture_count: 12;
|
|
190
|
+
bool seeking_immediate_match: 1;
|
|
191
|
+
bool has_in_progress_alternatives: 1;
|
|
192
|
+
bool dead: 1;
|
|
193
|
+
bool needs_parent: 1;
|
|
194
|
+
} QueryState;
|
|
195
|
+
|
|
196
|
+
typedef Array(TSQueryCapture) CaptureList;
|
|
197
|
+
|
|
198
|
+
/*
|
|
199
|
+
* CaptureListPool - A collection of *lists* of captures. Each query state needs
|
|
200
|
+
* to maintain its own list of captures. To avoid repeated allocations, this struct
|
|
201
|
+
* maintains a fixed set of capture lists, and keeps track of which ones are
|
|
202
|
+
* currently in use by a query state.
|
|
203
|
+
*/
|
|
204
|
+
typedef struct {
|
|
205
|
+
Array(CaptureList) list;
|
|
206
|
+
CaptureList empty_list;
|
|
207
|
+
// The maximum number of capture lists that we are allowed to allocate. We
|
|
208
|
+
// never allow `list` to allocate more entries than this, dropping pending
|
|
209
|
+
// matches if needed to stay under the limit.
|
|
210
|
+
uint32_t max_capture_list_count;
|
|
211
|
+
// The number of capture lists allocated in `list` that are not currently in
|
|
212
|
+
// use. We reuse those existing-but-unused capture lists before trying to
|
|
213
|
+
// allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
|
|
214
|
+
// list's length to indicate that it's not in use.
|
|
215
|
+
uint32_t free_capture_list_count;
|
|
216
|
+
} CaptureListPool;
|
|
217
|
+
|
|
218
|
+
/*
|
|
219
|
+
* AnalysisState - The state needed for walking the parse table when analyzing
|
|
220
|
+
* a query pattern, to determine at which steps the pattern might fail to match.
|
|
221
|
+
*/
|
|
222
|
+
typedef struct {
|
|
223
|
+
TSStateId parse_state;
|
|
224
|
+
TSSymbol parent_symbol;
|
|
225
|
+
uint16_t child_index;
|
|
226
|
+
TSFieldId field_id: 15;
|
|
227
|
+
bool done: 1;
|
|
228
|
+
} AnalysisStateEntry;
|
|
229
|
+
|
|
230
|
+
typedef struct {
|
|
231
|
+
AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH];
|
|
232
|
+
uint16_t depth;
|
|
233
|
+
uint16_t step_index;
|
|
234
|
+
TSSymbol root_symbol;
|
|
235
|
+
} AnalysisState;
|
|
236
|
+
|
|
237
|
+
typedef Array(AnalysisState *) AnalysisStateSet;
|
|
238
|
+
|
|
239
|
+
typedef struct {
|
|
240
|
+
AnalysisStateSet states;
|
|
241
|
+
AnalysisStateSet next_states;
|
|
242
|
+
AnalysisStateSet deeper_states;
|
|
243
|
+
AnalysisStateSet state_pool;
|
|
244
|
+
Array(uint16_t) final_step_indices;
|
|
245
|
+
Array(TSSymbol) finished_parent_symbols;
|
|
246
|
+
bool did_abort;
|
|
247
|
+
} QueryAnalysis;
|
|
248
|
+
|
|
249
|
+
/*
|
|
250
|
+
* AnalysisSubgraph - A subset of the states in the parse table that are used
|
|
251
|
+
* in constructing nodes with a certain symbol. Each state is accompanied by
|
|
252
|
+
* some information about the possible node that could be produced in
|
|
253
|
+
* downstream states.
|
|
254
|
+
*/
|
|
255
|
+
typedef struct {
|
|
256
|
+
TSStateId state;
|
|
257
|
+
uint16_t production_id;
|
|
258
|
+
uint8_t child_index: 7;
|
|
259
|
+
bool done: 1;
|
|
260
|
+
} AnalysisSubgraphNode;
|
|
261
|
+
|
|
262
|
+
typedef struct {
|
|
263
|
+
TSSymbol symbol;
|
|
264
|
+
Array(TSStateId) start_states;
|
|
265
|
+
Array(AnalysisSubgraphNode) nodes;
|
|
266
|
+
} AnalysisSubgraph;
|
|
267
|
+
|
|
268
|
+
typedef Array(AnalysisSubgraph) AnalysisSubgraphArray;
|
|
269
|
+
|
|
270
|
+
/*
|
|
271
|
+
* StatePredecessorMap - A map that stores the predecessors of each parse state.
|
|
272
|
+
* This is used during query analysis to determine which parse states can lead
|
|
273
|
+
* to which reduce actions.
|
|
274
|
+
*/
|
|
275
|
+
typedef struct {
|
|
276
|
+
TSStateId *contents;
|
|
277
|
+
} StatePredecessorMap;
|
|
278
|
+
|
|
279
|
+
/*
|
|
280
|
+
* TSQuery - A tree query, compiled from a string of S-expressions. The query
|
|
281
|
+
* itself is immutable. The mutable state used in the process of executing the
|
|
282
|
+
* query is stored in a `TSQueryCursor`.
|
|
283
|
+
*/
|
|
284
|
+
struct TSQuery {
|
|
285
|
+
SymbolTable captures;
|
|
286
|
+
SymbolTable predicate_values;
|
|
287
|
+
Array(CaptureQuantifiers) capture_quantifiers;
|
|
288
|
+
Array(QueryStep) steps;
|
|
289
|
+
Array(PatternEntry) pattern_map;
|
|
290
|
+
Array(TSQueryPredicateStep) predicate_steps;
|
|
291
|
+
Array(QueryPattern) patterns;
|
|
292
|
+
Array(StepOffset) step_offsets;
|
|
293
|
+
Array(TSFieldId) negated_fields;
|
|
294
|
+
Array(char) string_buffer;
|
|
295
|
+
Array(TSSymbol) repeat_symbols_with_rootless_patterns;
|
|
296
|
+
const TSLanguage *language;
|
|
297
|
+
uint16_t wildcard_root_pattern_count;
|
|
298
|
+
};
|
|
299
|
+
|
|
300
|
+
/*
|
|
301
|
+
* TSQueryCursor - A stateful struct used to execute a query on a tree.
|
|
302
|
+
*/
|
|
303
|
+
struct TSQueryCursor {
|
|
304
|
+
const TSQuery *query;
|
|
305
|
+
TSTreeCursor cursor;
|
|
306
|
+
Array(QueryState) states;
|
|
307
|
+
Array(QueryState) finished_states;
|
|
308
|
+
CaptureListPool capture_list_pool;
|
|
309
|
+
uint32_t depth;
|
|
310
|
+
uint32_t max_start_depth;
|
|
311
|
+
uint32_t start_byte;
|
|
312
|
+
uint32_t end_byte;
|
|
313
|
+
TSPoint start_point;
|
|
314
|
+
TSPoint end_point;
|
|
315
|
+
uint32_t next_state_id;
|
|
316
|
+
TSClock end_clock;
|
|
317
|
+
TSDuration timeout_duration;
|
|
318
|
+
unsigned operation_count;
|
|
319
|
+
bool on_visible_node;
|
|
320
|
+
bool ascending;
|
|
321
|
+
bool halted;
|
|
322
|
+
bool did_exceed_match_limit;
|
|
323
|
+
};
|
|
324
|
+
|
|
325
|
+
static const TSQueryError PARENT_DONE = -1;
|
|
326
|
+
static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
|
|
327
|
+
static const uint16_t NONE = UINT16_MAX;
|
|
328
|
+
static const TSSymbol WILDCARD_SYMBOL = 0;
|
|
329
|
+
static const unsigned OP_COUNT_PER_QUERY_TIMEOUT_CHECK = 100;
|
|
330
|
+
|
|
331
|
+
/**********
|
|
332
|
+
* Stream
|
|
333
|
+
**********/
|
|
334
|
+
|
|
335
|
+
// Advance to the next unicode code point in the stream.
|
|
336
|
+
static bool stream_advance(Stream *self) {
|
|
337
|
+
self->input += self->next_size;
|
|
338
|
+
if (self->input < self->end) {
|
|
339
|
+
uint32_t size = ts_decode_utf8(
|
|
340
|
+
(const uint8_t *)self->input,
|
|
341
|
+
(uint32_t)(self->end - self->input),
|
|
342
|
+
&self->next
|
|
343
|
+
);
|
|
344
|
+
if (size > 0) {
|
|
345
|
+
self->next_size = size;
|
|
346
|
+
return true;
|
|
347
|
+
}
|
|
348
|
+
} else {
|
|
349
|
+
self->next_size = 0;
|
|
350
|
+
self->next = '\0';
|
|
351
|
+
}
|
|
352
|
+
return false;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// Reset the stream to the given input position, represented as a pointer
|
|
356
|
+
// into the input string.
|
|
357
|
+
static void stream_reset(Stream *self, const char *input) {
|
|
358
|
+
self->input = input;
|
|
359
|
+
self->next_size = 0;
|
|
360
|
+
stream_advance(self);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
static Stream stream_new(const char *string, uint32_t length) {
|
|
364
|
+
Stream self = {
|
|
365
|
+
.next = 0,
|
|
366
|
+
.input = string,
|
|
367
|
+
.start = string,
|
|
368
|
+
.end = string + length,
|
|
369
|
+
};
|
|
370
|
+
stream_advance(&self);
|
|
371
|
+
return self;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
static void stream_skip_whitespace(Stream *self) {
|
|
375
|
+
for (;;) {
|
|
376
|
+
if (iswspace(self->next)) {
|
|
377
|
+
stream_advance(self);
|
|
378
|
+
} else if (self->next == ';') {
|
|
379
|
+
// skip over comments
|
|
380
|
+
stream_advance(self);
|
|
381
|
+
while (self->next && self->next != '\n') {
|
|
382
|
+
if (!stream_advance(self)) break;
|
|
383
|
+
}
|
|
384
|
+
} else {
|
|
385
|
+
break;
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
static bool stream_is_ident_start(Stream *self) {
|
|
391
|
+
return iswalnum(self->next) || self->next == '_' || self->next == '-';
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
static void stream_scan_identifier(Stream *stream) {
|
|
395
|
+
do {
|
|
396
|
+
stream_advance(stream);
|
|
397
|
+
} while (
|
|
398
|
+
iswalnum(stream->next) ||
|
|
399
|
+
stream->next == '_' ||
|
|
400
|
+
stream->next == '-' ||
|
|
401
|
+
stream->next == '.' ||
|
|
402
|
+
stream->next == '?' ||
|
|
403
|
+
stream->next == '!'
|
|
404
|
+
);
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
static uint32_t stream_offset(Stream *self) {
|
|
408
|
+
return (uint32_t)(self->input - self->start);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/******************
|
|
412
|
+
* CaptureListPool
|
|
413
|
+
******************/
|
|
414
|
+
|
|
415
|
+
static CaptureListPool capture_list_pool_new(void) {
|
|
416
|
+
return (CaptureListPool) {
|
|
417
|
+
.list = array_new(),
|
|
418
|
+
.empty_list = array_new(),
|
|
419
|
+
.max_capture_list_count = UINT32_MAX,
|
|
420
|
+
.free_capture_list_count = 0,
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
static void capture_list_pool_reset(CaptureListPool *self) {
|
|
425
|
+
for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
|
|
426
|
+
// This invalid size means that the list is not in use.
|
|
427
|
+
self->list.contents[i].size = UINT32_MAX;
|
|
428
|
+
}
|
|
429
|
+
self->free_capture_list_count = self->list.size;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
static void capture_list_pool_delete(CaptureListPool *self) {
|
|
433
|
+
for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
|
|
434
|
+
array_delete(&self->list.contents[i]);
|
|
435
|
+
}
|
|
436
|
+
array_delete(&self->list);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
|
|
440
|
+
if (id >= self->list.size) return &self->empty_list;
|
|
441
|
+
return &self->list.contents[id];
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
|
|
445
|
+
ts_assert(id < self->list.size);
|
|
446
|
+
return &self->list.contents[id];
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
static bool capture_list_pool_is_empty(const CaptureListPool *self) {
|
|
450
|
+
// The capture list pool is empty if all allocated lists are in use, and we
|
|
451
|
+
// have reached the maximum allowed number of allocated lists.
|
|
452
|
+
return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
|
|
456
|
+
// First see if any already allocated capture list is currently unused.
|
|
457
|
+
if (self->free_capture_list_count > 0) {
|
|
458
|
+
for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
|
|
459
|
+
if (self->list.contents[i].size == UINT32_MAX) {
|
|
460
|
+
array_clear(&self->list.contents[i]);
|
|
461
|
+
self->free_capture_list_count--;
|
|
462
|
+
return i;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// Otherwise allocate and initialize a new capture list, as long as that
|
|
468
|
+
// doesn't put us over the requested maximum.
|
|
469
|
+
uint32_t i = self->list.size;
|
|
470
|
+
if (i >= self->max_capture_list_count) {
|
|
471
|
+
return NONE;
|
|
472
|
+
}
|
|
473
|
+
CaptureList list;
|
|
474
|
+
array_init(&list);
|
|
475
|
+
array_push(&self->list, list);
|
|
476
|
+
return i;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
|
|
480
|
+
if (id >= self->list.size) return;
|
|
481
|
+
self->list.contents[id].size = UINT32_MAX;
|
|
482
|
+
self->free_capture_list_count++;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/**************
|
|
486
|
+
* Quantifiers
|
|
487
|
+
**************/
|
|
488
|
+
|
|
489
|
+
static TSQuantifier quantifier_mul(
|
|
490
|
+
TSQuantifier left,
|
|
491
|
+
TSQuantifier right
|
|
492
|
+
) {
|
|
493
|
+
switch (left)
|
|
494
|
+
{
|
|
495
|
+
case TSQuantifierZero:
|
|
496
|
+
return TSQuantifierZero;
|
|
497
|
+
case TSQuantifierZeroOrOne:
|
|
498
|
+
switch (right) {
|
|
499
|
+
case TSQuantifierZero:
|
|
500
|
+
return TSQuantifierZero;
|
|
501
|
+
case TSQuantifierZeroOrOne:
|
|
502
|
+
case TSQuantifierOne:
|
|
503
|
+
return TSQuantifierZeroOrOne;
|
|
504
|
+
case TSQuantifierZeroOrMore:
|
|
505
|
+
case TSQuantifierOneOrMore:
|
|
506
|
+
return TSQuantifierZeroOrMore;
|
|
507
|
+
};
|
|
508
|
+
break;
|
|
509
|
+
case TSQuantifierZeroOrMore:
|
|
510
|
+
switch (right) {
|
|
511
|
+
case TSQuantifierZero:
|
|
512
|
+
return TSQuantifierZero;
|
|
513
|
+
case TSQuantifierZeroOrOne:
|
|
514
|
+
case TSQuantifierZeroOrMore:
|
|
515
|
+
case TSQuantifierOne:
|
|
516
|
+
case TSQuantifierOneOrMore:
|
|
517
|
+
return TSQuantifierZeroOrMore;
|
|
518
|
+
};
|
|
519
|
+
break;
|
|
520
|
+
case TSQuantifierOne:
|
|
521
|
+
return right;
|
|
522
|
+
case TSQuantifierOneOrMore:
|
|
523
|
+
switch (right) {
|
|
524
|
+
case TSQuantifierZero:
|
|
525
|
+
return TSQuantifierZero;
|
|
526
|
+
case TSQuantifierZeroOrOne:
|
|
527
|
+
case TSQuantifierZeroOrMore:
|
|
528
|
+
return TSQuantifierZeroOrMore;
|
|
529
|
+
case TSQuantifierOne:
|
|
530
|
+
case TSQuantifierOneOrMore:
|
|
531
|
+
return TSQuantifierOneOrMore;
|
|
532
|
+
};
|
|
533
|
+
break;
|
|
534
|
+
}
|
|
535
|
+
return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
static TSQuantifier quantifier_join(
|
|
539
|
+
TSQuantifier left,
|
|
540
|
+
TSQuantifier right
|
|
541
|
+
) {
|
|
542
|
+
switch (left)
|
|
543
|
+
{
|
|
544
|
+
case TSQuantifierZero:
|
|
545
|
+
switch (right) {
|
|
546
|
+
case TSQuantifierZero:
|
|
547
|
+
return TSQuantifierZero;
|
|
548
|
+
case TSQuantifierZeroOrOne:
|
|
549
|
+
case TSQuantifierOne:
|
|
550
|
+
return TSQuantifierZeroOrOne;
|
|
551
|
+
case TSQuantifierZeroOrMore:
|
|
552
|
+
case TSQuantifierOneOrMore:
|
|
553
|
+
return TSQuantifierZeroOrMore;
|
|
554
|
+
};
|
|
555
|
+
break;
|
|
556
|
+
case TSQuantifierZeroOrOne:
|
|
557
|
+
switch (right) {
|
|
558
|
+
case TSQuantifierZero:
|
|
559
|
+
case TSQuantifierZeroOrOne:
|
|
560
|
+
case TSQuantifierOne:
|
|
561
|
+
return TSQuantifierZeroOrOne;
|
|
562
|
+
break;
|
|
563
|
+
case TSQuantifierZeroOrMore:
|
|
564
|
+
case TSQuantifierOneOrMore:
|
|
565
|
+
return TSQuantifierZeroOrMore;
|
|
566
|
+
break;
|
|
567
|
+
};
|
|
568
|
+
break;
|
|
569
|
+
case TSQuantifierZeroOrMore:
|
|
570
|
+
return TSQuantifierZeroOrMore;
|
|
571
|
+
case TSQuantifierOne:
|
|
572
|
+
switch (right) {
|
|
573
|
+
case TSQuantifierZero:
|
|
574
|
+
case TSQuantifierZeroOrOne:
|
|
575
|
+
return TSQuantifierZeroOrOne;
|
|
576
|
+
case TSQuantifierZeroOrMore:
|
|
577
|
+
return TSQuantifierZeroOrMore;
|
|
578
|
+
case TSQuantifierOne:
|
|
579
|
+
return TSQuantifierOne;
|
|
580
|
+
case TSQuantifierOneOrMore:
|
|
581
|
+
return TSQuantifierOneOrMore;
|
|
582
|
+
};
|
|
583
|
+
break;
|
|
584
|
+
case TSQuantifierOneOrMore:
|
|
585
|
+
switch (right) {
|
|
586
|
+
case TSQuantifierZero:
|
|
587
|
+
case TSQuantifierZeroOrOne:
|
|
588
|
+
case TSQuantifierZeroOrMore:
|
|
589
|
+
return TSQuantifierZeroOrMore;
|
|
590
|
+
case TSQuantifierOne:
|
|
591
|
+
case TSQuantifierOneOrMore:
|
|
592
|
+
return TSQuantifierOneOrMore;
|
|
593
|
+
};
|
|
594
|
+
break;
|
|
595
|
+
}
|
|
596
|
+
return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
static TSQuantifier quantifier_add(
|
|
600
|
+
TSQuantifier left,
|
|
601
|
+
TSQuantifier right
|
|
602
|
+
) {
|
|
603
|
+
switch (left)
|
|
604
|
+
{
|
|
605
|
+
case TSQuantifierZero:
|
|
606
|
+
return right;
|
|
607
|
+
case TSQuantifierZeroOrOne:
|
|
608
|
+
switch (right) {
|
|
609
|
+
case TSQuantifierZero:
|
|
610
|
+
return TSQuantifierZeroOrOne;
|
|
611
|
+
case TSQuantifierZeroOrOne:
|
|
612
|
+
case TSQuantifierZeroOrMore:
|
|
613
|
+
return TSQuantifierZeroOrMore;
|
|
614
|
+
case TSQuantifierOne:
|
|
615
|
+
case TSQuantifierOneOrMore:
|
|
616
|
+
return TSQuantifierOneOrMore;
|
|
617
|
+
};
|
|
618
|
+
break;
|
|
619
|
+
case TSQuantifierZeroOrMore:
|
|
620
|
+
switch (right) {
|
|
621
|
+
case TSQuantifierZero:
|
|
622
|
+
return TSQuantifierZeroOrMore;
|
|
623
|
+
case TSQuantifierZeroOrOne:
|
|
624
|
+
case TSQuantifierZeroOrMore:
|
|
625
|
+
return TSQuantifierZeroOrMore;
|
|
626
|
+
case TSQuantifierOne:
|
|
627
|
+
case TSQuantifierOneOrMore:
|
|
628
|
+
return TSQuantifierOneOrMore;
|
|
629
|
+
};
|
|
630
|
+
break;
|
|
631
|
+
case TSQuantifierOne:
|
|
632
|
+
switch (right) {
|
|
633
|
+
case TSQuantifierZero:
|
|
634
|
+
return TSQuantifierOne;
|
|
635
|
+
case TSQuantifierZeroOrOne:
|
|
636
|
+
case TSQuantifierZeroOrMore:
|
|
637
|
+
case TSQuantifierOne:
|
|
638
|
+
case TSQuantifierOneOrMore:
|
|
639
|
+
return TSQuantifierOneOrMore;
|
|
640
|
+
};
|
|
641
|
+
break;
|
|
642
|
+
case TSQuantifierOneOrMore:
|
|
643
|
+
return TSQuantifierOneOrMore;
|
|
644
|
+
}
|
|
645
|
+
return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Create new capture quantifiers structure
|
|
649
|
+
static CaptureQuantifiers capture_quantifiers_new(void) {
|
|
650
|
+
return (CaptureQuantifiers) array_new();
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// Delete capture quantifiers structure
|
|
654
|
+
static void capture_quantifiers_delete(
|
|
655
|
+
CaptureQuantifiers *self
|
|
656
|
+
) {
|
|
657
|
+
array_delete(self);
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// Clear capture quantifiers structure
|
|
661
|
+
static void capture_quantifiers_clear(
|
|
662
|
+
CaptureQuantifiers *self
|
|
663
|
+
) {
|
|
664
|
+
array_clear(self);
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
// Replace capture quantifiers with the given quantifiers
|
|
668
|
+
static void capture_quantifiers_replace(
|
|
669
|
+
CaptureQuantifiers *self,
|
|
670
|
+
CaptureQuantifiers *quantifiers
|
|
671
|
+
) {
|
|
672
|
+
array_clear(self);
|
|
673
|
+
array_push_all(self, quantifiers);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// Return capture quantifier for the given capture id
|
|
677
|
+
static TSQuantifier capture_quantifier_for_id(
|
|
678
|
+
const CaptureQuantifiers *self,
|
|
679
|
+
uint16_t id
|
|
680
|
+
) {
|
|
681
|
+
return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id);
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
// Add the given quantifier to the current value for id
|
|
685
|
+
static void capture_quantifiers_add_for_id(
|
|
686
|
+
CaptureQuantifiers *self,
|
|
687
|
+
uint16_t id,
|
|
688
|
+
TSQuantifier quantifier
|
|
689
|
+
) {
|
|
690
|
+
if (self->size <= id) {
|
|
691
|
+
array_grow_by(self, id + 1 - self->size);
|
|
692
|
+
}
|
|
693
|
+
uint8_t *own_quantifier = array_get(self, id);
|
|
694
|
+
*own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier);
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// Point-wise add the given quantifiers to the current values
|
|
698
|
+
static void capture_quantifiers_add_all(
|
|
699
|
+
CaptureQuantifiers *self,
|
|
700
|
+
CaptureQuantifiers *quantifiers
|
|
701
|
+
) {
|
|
702
|
+
if (self->size < quantifiers->size) {
|
|
703
|
+
array_grow_by(self, quantifiers->size - self->size);
|
|
704
|
+
}
|
|
705
|
+
for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) {
|
|
706
|
+
uint8_t *quantifier = array_get(quantifiers, id);
|
|
707
|
+
uint8_t *own_quantifier = array_get(self, id);
|
|
708
|
+
*own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
// Join the given quantifier with the current values
|
|
713
|
+
static void capture_quantifiers_mul(
|
|
714
|
+
CaptureQuantifiers *self,
|
|
715
|
+
TSQuantifier quantifier
|
|
716
|
+
) {
|
|
717
|
+
for (uint16_t id = 0; id < (uint16_t)self->size; id++) {
|
|
718
|
+
uint8_t *own_quantifier = array_get(self, id);
|
|
719
|
+
*own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier);
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
// Point-wise join the quantifiers from a list of alternatives with the current values
|
|
724
|
+
static void capture_quantifiers_join_all(
|
|
725
|
+
CaptureQuantifiers *self,
|
|
726
|
+
CaptureQuantifiers *quantifiers
|
|
727
|
+
) {
|
|
728
|
+
if (self->size < quantifiers->size) {
|
|
729
|
+
array_grow_by(self, quantifiers->size - self->size);
|
|
730
|
+
}
|
|
731
|
+
for (uint32_t id = 0; id < quantifiers->size; id++) {
|
|
732
|
+
uint8_t *quantifier = array_get(quantifiers, id);
|
|
733
|
+
uint8_t *own_quantifier = array_get(self, id);
|
|
734
|
+
*own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
|
|
735
|
+
}
|
|
736
|
+
for (uint32_t id = quantifiers->size; id < self->size; id++) {
|
|
737
|
+
uint8_t *own_quantifier = array_get(self, id);
|
|
738
|
+
*own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero);
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
/**************
|
|
743
|
+
* SymbolTable
|
|
744
|
+
**************/
|
|
745
|
+
|
|
746
|
+
static SymbolTable symbol_table_new(void) {
|
|
747
|
+
return (SymbolTable) {
|
|
748
|
+
.characters = array_new(),
|
|
749
|
+
.slices = array_new(),
|
|
750
|
+
};
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
static void symbol_table_delete(SymbolTable *self) {
|
|
754
|
+
array_delete(&self->characters);
|
|
755
|
+
array_delete(&self->slices);
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
static int symbol_table_id_for_name(
|
|
759
|
+
const SymbolTable *self,
|
|
760
|
+
const char *name,
|
|
761
|
+
uint32_t length
|
|
762
|
+
) {
|
|
763
|
+
for (unsigned i = 0; i < self->slices.size; i++) {
|
|
764
|
+
Slice slice = self->slices.contents[i];
|
|
765
|
+
if (
|
|
766
|
+
slice.length == length &&
|
|
767
|
+
!strncmp(&self->characters.contents[slice.offset], name, length)
|
|
768
|
+
) return i;
|
|
769
|
+
}
|
|
770
|
+
return -1;
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
static const char *symbol_table_name_for_id(
|
|
774
|
+
const SymbolTable *self,
|
|
775
|
+
uint16_t id,
|
|
776
|
+
uint32_t *length
|
|
777
|
+
) {
|
|
778
|
+
Slice slice = self->slices.contents[id];
|
|
779
|
+
*length = slice.length;
|
|
780
|
+
return &self->characters.contents[slice.offset];
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
static uint16_t symbol_table_insert_name(
|
|
784
|
+
SymbolTable *self,
|
|
785
|
+
const char *name,
|
|
786
|
+
uint32_t length
|
|
787
|
+
) {
|
|
788
|
+
int id = symbol_table_id_for_name(self, name, length);
|
|
789
|
+
if (id >= 0) return (uint16_t)id;
|
|
790
|
+
Slice slice = {
|
|
791
|
+
.offset = self->characters.size,
|
|
792
|
+
.length = length,
|
|
793
|
+
};
|
|
794
|
+
array_grow_by(&self->characters, length + 1);
|
|
795
|
+
memcpy(&self->characters.contents[slice.offset], name, length);
|
|
796
|
+
self->characters.contents[self->characters.size - 1] = 0;
|
|
797
|
+
array_push(&self->slices, slice);
|
|
798
|
+
return self->slices.size - 1;
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
/************
|
|
802
|
+
* QueryStep
|
|
803
|
+
************/
|
|
804
|
+
|
|
805
|
+
static QueryStep query_step__new(
|
|
806
|
+
TSSymbol symbol,
|
|
807
|
+
uint16_t depth,
|
|
808
|
+
bool is_immediate
|
|
809
|
+
) {
|
|
810
|
+
QueryStep step = {
|
|
811
|
+
.symbol = symbol,
|
|
812
|
+
.depth = depth,
|
|
813
|
+
.field = 0,
|
|
814
|
+
.alternative_index = NONE,
|
|
815
|
+
.negated_field_list_id = 0,
|
|
816
|
+
.contains_captures = false,
|
|
817
|
+
.is_last_child = false,
|
|
818
|
+
.is_named = false,
|
|
819
|
+
.is_pass_through = false,
|
|
820
|
+
.is_dead_end = false,
|
|
821
|
+
.root_pattern_guaranteed = false,
|
|
822
|
+
.is_immediate = is_immediate,
|
|
823
|
+
.alternative_is_immediate = false,
|
|
824
|
+
};
|
|
825
|
+
for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
|
|
826
|
+
step.capture_ids[i] = NONE;
|
|
827
|
+
}
|
|
828
|
+
return step;
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
static void query_step__add_capture(QueryStep *self, uint16_t capture_id) {
|
|
832
|
+
for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
|
|
833
|
+
if (self->capture_ids[i] == NONE) {
|
|
834
|
+
self->capture_ids[i] = capture_id;
|
|
835
|
+
break;
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
|
|
841
|
+
for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
|
|
842
|
+
if (self->capture_ids[i] == capture_id) {
|
|
843
|
+
self->capture_ids[i] = NONE;
|
|
844
|
+
while (i + 1 < MAX_STEP_CAPTURE_COUNT) {
|
|
845
|
+
if (self->capture_ids[i + 1] == NONE) break;
|
|
846
|
+
self->capture_ids[i] = self->capture_ids[i + 1];
|
|
847
|
+
self->capture_ids[i + 1] = NONE;
|
|
848
|
+
i++;
|
|
849
|
+
}
|
|
850
|
+
break;
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
/**********************
|
|
856
|
+
* StatePredecessorMap
|
|
857
|
+
**********************/
|
|
858
|
+
|
|
859
|
+
static inline StatePredecessorMap state_predecessor_map_new(
|
|
860
|
+
const TSLanguage *language
|
|
861
|
+
) {
|
|
862
|
+
return (StatePredecessorMap) {
|
|
863
|
+
.contents = ts_calloc(
|
|
864
|
+
(size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
|
|
865
|
+
sizeof(TSStateId)
|
|
866
|
+
),
|
|
867
|
+
};
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
static inline void state_predecessor_map_delete(StatePredecessorMap *self) {
|
|
871
|
+
ts_free(self->contents);
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
static inline void state_predecessor_map_add(
|
|
875
|
+
StatePredecessorMap *self,
|
|
876
|
+
TSStateId state,
|
|
877
|
+
TSStateId predecessor
|
|
878
|
+
) {
|
|
879
|
+
size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
|
|
880
|
+
TSStateId *count = &self->contents[index];
|
|
881
|
+
if (
|
|
882
|
+
*count == 0 ||
|
|
883
|
+
(*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)
|
|
884
|
+
) {
|
|
885
|
+
(*count)++;
|
|
886
|
+
self->contents[index + *count] = predecessor;
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
static inline const TSStateId *state_predecessor_map_get(
|
|
891
|
+
const StatePredecessorMap *self,
|
|
892
|
+
TSStateId state,
|
|
893
|
+
unsigned *count
|
|
894
|
+
) {
|
|
895
|
+
size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
|
|
896
|
+
*count = self->contents[index];
|
|
897
|
+
return &self->contents[index + 1];
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
/****************
|
|
901
|
+
* AnalysisState
|
|
902
|
+
****************/
|
|
903
|
+
|
|
904
|
+
static unsigned analysis_state__recursion_depth(const AnalysisState *self) {
|
|
905
|
+
unsigned result = 0;
|
|
906
|
+
for (unsigned i = 0; i < self->depth; i++) {
|
|
907
|
+
TSSymbol symbol = self->stack[i].parent_symbol;
|
|
908
|
+
for (unsigned j = 0; j < i; j++) {
|
|
909
|
+
if (self->stack[j].parent_symbol == symbol) {
|
|
910
|
+
result++;
|
|
911
|
+
break;
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
return result;
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
static inline int analysis_state__compare_position(
|
|
919
|
+
AnalysisState *const *self,
|
|
920
|
+
AnalysisState *const *other
|
|
921
|
+
) {
|
|
922
|
+
for (unsigned i = 0; i < (*self)->depth; i++) {
|
|
923
|
+
if (i >= (*other)->depth) return -1;
|
|
924
|
+
if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1;
|
|
925
|
+
if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1;
|
|
926
|
+
}
|
|
927
|
+
if ((*self)->depth < (*other)->depth) return 1;
|
|
928
|
+
if ((*self)->step_index < (*other)->step_index) return -1;
|
|
929
|
+
if ((*self)->step_index > (*other)->step_index) return 1;
|
|
930
|
+
return 0;
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
static inline int analysis_state__compare(
|
|
934
|
+
AnalysisState *const *self,
|
|
935
|
+
AnalysisState *const *other
|
|
936
|
+
) {
|
|
937
|
+
int result = analysis_state__compare_position(self, other);
|
|
938
|
+
if (result != 0) return result;
|
|
939
|
+
for (unsigned i = 0; i < (*self)->depth; i++) {
|
|
940
|
+
if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1;
|
|
941
|
+
if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1;
|
|
942
|
+
if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1;
|
|
943
|
+
if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1;
|
|
944
|
+
if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1;
|
|
945
|
+
if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1;
|
|
946
|
+
}
|
|
947
|
+
return 0;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) {
|
|
951
|
+
if (self->depth == 0) {
|
|
952
|
+
return &self->stack[0];
|
|
953
|
+
}
|
|
954
|
+
return &self->stack[self->depth - 1];
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
|
|
958
|
+
for (unsigned i = 0; i < self->depth; i++) {
|
|
959
|
+
if (self->stack[i].parent_symbol == symbol) return true;
|
|
960
|
+
}
|
|
961
|
+
return false;
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
/******************
|
|
965
|
+
* AnalysisStateSet
|
|
966
|
+
******************/
|
|
967
|
+
|
|
968
|
+
// Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by
|
|
969
|
+
// cloning one from scratch.
|
|
970
|
+
static inline AnalysisState *analysis_state_pool__clone_or_reuse(
|
|
971
|
+
AnalysisStateSet *self,
|
|
972
|
+
AnalysisState *borrowed_item
|
|
973
|
+
) {
|
|
974
|
+
AnalysisState *new_item;
|
|
975
|
+
if (self->size) {
|
|
976
|
+
new_item = array_pop(self);
|
|
977
|
+
} else {
|
|
978
|
+
new_item = ts_malloc(sizeof(AnalysisState));
|
|
979
|
+
}
|
|
980
|
+
*new_item = *borrowed_item;
|
|
981
|
+
return new_item;
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
// Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this
|
|
985
|
+
// set. The set does not contain duplicates, so if the item is already present, it will not be
|
|
986
|
+
// inserted, and no clone will be made.
|
|
987
|
+
//
|
|
988
|
+
// The caller retains ownership of the passed-in memory. However, the clone that is created by this
|
|
989
|
+
// function will be managed by the state set.
|
|
990
|
+
static inline void analysis_state_set__insert_sorted(
|
|
991
|
+
AnalysisStateSet *self,
|
|
992
|
+
AnalysisStateSet *pool,
|
|
993
|
+
AnalysisState *borrowed_item
|
|
994
|
+
) {
|
|
995
|
+
unsigned index, exists;
|
|
996
|
+
array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists);
|
|
997
|
+
if (!exists) {
|
|
998
|
+
AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
|
|
999
|
+
array_insert(self, index, new_item);
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
// Inserts a clone of the passed-in item at the end position of this list.
|
|
1004
|
+
//
|
|
1005
|
+
// IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function
|
|
1006
|
+
// `analysis_state__compare`) than largest item already in this set. If items are inserted in the
|
|
1007
|
+
// wrong order, the set will not function properly for future use.
|
|
1008
|
+
//
|
|
1009
|
+
// The caller retains ownership of the passed-in memory. However, the clone that is created by this
|
|
1010
|
+
// function will be managed by the state set.
|
|
1011
|
+
static inline void analysis_state_set__push(
|
|
1012
|
+
AnalysisStateSet *self,
|
|
1013
|
+
AnalysisStateSet *pool,
|
|
1014
|
+
AnalysisState *borrowed_item
|
|
1015
|
+
) {
|
|
1016
|
+
AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
|
|
1017
|
+
array_push(self, new_item);
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
// Removes all items from this set, returning it to an empty state.
|
|
1021
|
+
static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) {
|
|
1022
|
+
array_push_all(pool, self);
|
|
1023
|
+
array_clear(self);
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
// Releases all memory that is managed with this state set, including any items currently present.
|
|
1027
|
+
// After calling this function, the set is no longer suitable for use.
|
|
1028
|
+
static inline void analysis_state_set__delete(AnalysisStateSet *self) {
|
|
1029
|
+
for (unsigned i = 0; i < self->size; i++) {
|
|
1030
|
+
ts_free(self->contents[i]);
|
|
1031
|
+
}
|
|
1032
|
+
array_delete(self);
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
/****************
|
|
1036
|
+
* QueryAnalyzer
|
|
1037
|
+
****************/
|
|
1038
|
+
|
|
1039
|
+
static inline QueryAnalysis query_analysis__new(void) {
|
|
1040
|
+
return (QueryAnalysis) {
|
|
1041
|
+
.states = array_new(),
|
|
1042
|
+
.next_states = array_new(),
|
|
1043
|
+
.deeper_states = array_new(),
|
|
1044
|
+
.state_pool = array_new(),
|
|
1045
|
+
.final_step_indices = array_new(),
|
|
1046
|
+
.finished_parent_symbols = array_new(),
|
|
1047
|
+
.did_abort = false,
|
|
1048
|
+
};
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
static inline void query_analysis__delete(QueryAnalysis *self) {
|
|
1052
|
+
analysis_state_set__delete(&self->states);
|
|
1053
|
+
analysis_state_set__delete(&self->next_states);
|
|
1054
|
+
analysis_state_set__delete(&self->deeper_states);
|
|
1055
|
+
analysis_state_set__delete(&self->state_pool);
|
|
1056
|
+
array_delete(&self->final_step_indices);
|
|
1057
|
+
array_delete(&self->finished_parent_symbols);
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
/***********************
|
|
1061
|
+
* AnalysisSubgraphNode
|
|
1062
|
+
***********************/
|
|
1063
|
+
|
|
1064
|
+
static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) {
|
|
1065
|
+
if (self->state < other->state) return -1;
|
|
1066
|
+
if (self->state > other->state) return 1;
|
|
1067
|
+
if (self->child_index < other->child_index) return -1;
|
|
1068
|
+
if (self->child_index > other->child_index) return 1;
|
|
1069
|
+
if (self->done < other->done) return -1;
|
|
1070
|
+
if (self->done > other->done) return 1;
|
|
1071
|
+
if (self->production_id < other->production_id) return -1;
|
|
1072
|
+
if (self->production_id > other->production_id) return 1;
|
|
1073
|
+
return 0;
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
/*********
|
|
1077
|
+
* Query
|
|
1078
|
+
*********/
|
|
1079
|
+
|
|
1080
|
+
// The `pattern_map` contains a mapping from TSSymbol values to indices in the
|
|
1081
|
+
// `steps` array. For a given syntax node, the `pattern_map` makes it possible
|
|
1082
|
+
// to quickly find the starting steps of all of the patterns whose root matches
|
|
1083
|
+
// that node. Each entry has two fields: a `pattern_index`, which identifies one
|
|
1084
|
+
// of the patterns in the query, and a `step_index`, which indicates the start
|
|
1085
|
+
// offset of that pattern's steps within the `steps` array.
|
|
1086
|
+
//
|
|
1087
|
+
// The entries are sorted by the patterns' root symbols, and lookups use a
|
|
1088
|
+
// binary search. This ensures that the cost of this initial lookup step
|
|
1089
|
+
// scales logarithmically with the number of patterns in the query.
|
|
1090
|
+
//
|
|
1091
|
+
// This returns `true` if the symbol is present and `false` otherwise.
|
|
1092
|
+
// If the symbol is not present `*result` is set to the index where the
|
|
1093
|
+
// symbol should be inserted.
|
|
1094
|
+
static inline bool ts_query__pattern_map_search(
|
|
1095
|
+
const TSQuery *self,
|
|
1096
|
+
TSSymbol needle,
|
|
1097
|
+
uint32_t *result
|
|
1098
|
+
) {
|
|
1099
|
+
uint32_t base_index = self->wildcard_root_pattern_count;
|
|
1100
|
+
uint32_t size = self->pattern_map.size - base_index;
|
|
1101
|
+
if (size == 0) {
|
|
1102
|
+
*result = base_index;
|
|
1103
|
+
return false;
|
|
1104
|
+
}
|
|
1105
|
+
while (size > 1) {
|
|
1106
|
+
uint32_t half_size = size / 2;
|
|
1107
|
+
uint32_t mid_index = base_index + half_size;
|
|
1108
|
+
TSSymbol mid_symbol = self->steps.contents[
|
|
1109
|
+
self->pattern_map.contents[mid_index].step_index
|
|
1110
|
+
].symbol;
|
|
1111
|
+
if (needle > mid_symbol) base_index = mid_index;
|
|
1112
|
+
size -= half_size;
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
TSSymbol symbol = self->steps.contents[
|
|
1116
|
+
self->pattern_map.contents[base_index].step_index
|
|
1117
|
+
].symbol;
|
|
1118
|
+
|
|
1119
|
+
if (needle > symbol) {
|
|
1120
|
+
base_index++;
|
|
1121
|
+
if (base_index < self->pattern_map.size) {
|
|
1122
|
+
symbol = self->steps.contents[
|
|
1123
|
+
self->pattern_map.contents[base_index].step_index
|
|
1124
|
+
].symbol;
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
*result = base_index;
|
|
1129
|
+
return needle == symbol;
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
// Insert a new pattern's start index into the pattern map, maintaining
|
|
1133
|
+
// the pattern map's ordering invariant.
|
|
1134
|
+
static inline void ts_query__pattern_map_insert(
|
|
1135
|
+
TSQuery *self,
|
|
1136
|
+
TSSymbol symbol,
|
|
1137
|
+
PatternEntry new_entry
|
|
1138
|
+
) {
|
|
1139
|
+
uint32_t index;
|
|
1140
|
+
ts_query__pattern_map_search(self, symbol, &index);
|
|
1141
|
+
|
|
1142
|
+
// Ensure that the entries are sorted not only by symbol, but also
|
|
1143
|
+
// by pattern_index. This way, states for earlier patterns will be
|
|
1144
|
+
// initiated first, which allows the ordering of the states array
|
|
1145
|
+
// to be maintained more efficiently.
|
|
1146
|
+
while (index < self->pattern_map.size) {
|
|
1147
|
+
PatternEntry *entry = &self->pattern_map.contents[index];
|
|
1148
|
+
if (
|
|
1149
|
+
self->steps.contents[entry->step_index].symbol == symbol &&
|
|
1150
|
+
entry->pattern_index < new_entry.pattern_index
|
|
1151
|
+
) {
|
|
1152
|
+
index++;
|
|
1153
|
+
} else {
|
|
1154
|
+
break;
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
array_insert(&self->pattern_map, index, new_entry);
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
// Walk the subgraph for this non-terminal, tracking all of the possible
|
|
1162
|
+
// sequences of progress within the pattern.
|
|
1163
|
+
static void ts_query__perform_analysis(
|
|
1164
|
+
TSQuery *self,
|
|
1165
|
+
const AnalysisSubgraphArray *subgraphs,
|
|
1166
|
+
QueryAnalysis *analysis
|
|
1167
|
+
) {
|
|
1168
|
+
unsigned recursion_depth_limit = 0;
|
|
1169
|
+
unsigned prev_final_step_count = 0;
|
|
1170
|
+
array_clear(&analysis->final_step_indices);
|
|
1171
|
+
array_clear(&analysis->finished_parent_symbols);
|
|
1172
|
+
|
|
1173
|
+
for (unsigned iteration = 0;; iteration++) {
|
|
1174
|
+
if (iteration == MAX_ANALYSIS_ITERATION_COUNT) {
|
|
1175
|
+
analysis->did_abort = true;
|
|
1176
|
+
break;
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
#ifdef DEBUG_ANALYZE_QUERY
|
|
1180
|
+
printf("Iteration: %u. Final step indices:", iteration);
|
|
1181
|
+
for (unsigned j = 0; j < analysis->final_step_indices.size; j++) {
|
|
1182
|
+
printf(" %4u", analysis->final_step_indices.contents[j]);
|
|
1183
|
+
}
|
|
1184
|
+
printf("\n");
|
|
1185
|
+
for (unsigned j = 0; j < analysis->states.size; j++) {
|
|
1186
|
+
AnalysisState *state = analysis->states.contents[j];
|
|
1187
|
+
printf(" %3u: step: %u, stack: [", j, state->step_index);
|
|
1188
|
+
for (unsigned k = 0; k < state->depth; k++) {
|
|
1189
|
+
printf(
|
|
1190
|
+
" {%s, child: %u, state: %4u",
|
|
1191
|
+
self->language->symbol_names[state->stack[k].parent_symbol],
|
|
1192
|
+
state->stack[k].child_index,
|
|
1193
|
+
state->stack[k].parse_state
|
|
1194
|
+
);
|
|
1195
|
+
if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]);
|
|
1196
|
+
if (state->stack[k].done) printf(", DONE");
|
|
1197
|
+
printf("}");
|
|
1198
|
+
}
|
|
1199
|
+
printf(" ]\n");
|
|
1200
|
+
}
|
|
1201
|
+
#endif
|
|
1202
|
+
|
|
1203
|
+
// If no further progress can be made within the current recursion depth limit, then
|
|
1204
|
+
// bump the depth limit by one, and continue to process the states the exceeded the
|
|
1205
|
+
// limit. But only allow this if progress has been made since the last time the depth
|
|
1206
|
+
// limit was increased.
|
|
1207
|
+
if (analysis->states.size == 0) {
|
|
1208
|
+
if (
|
|
1209
|
+
analysis->deeper_states.size > 0 &&
|
|
1210
|
+
analysis->final_step_indices.size > prev_final_step_count
|
|
1211
|
+
) {
|
|
1212
|
+
#ifdef DEBUG_ANALYZE_QUERY
|
|
1213
|
+
printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1);
|
|
1214
|
+
#endif
|
|
1215
|
+
|
|
1216
|
+
prev_final_step_count = analysis->final_step_indices.size;
|
|
1217
|
+
recursion_depth_limit++;
|
|
1218
|
+
AnalysisStateSet _states = analysis->states;
|
|
1219
|
+
analysis->states = analysis->deeper_states;
|
|
1220
|
+
analysis->deeper_states = _states;
|
|
1221
|
+
continue;
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
break;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
analysis_state_set__clear(&analysis->next_states, &analysis->state_pool);
|
|
1228
|
+
for (unsigned j = 0; j < analysis->states.size; j++) {
|
|
1229
|
+
AnalysisState * const state = analysis->states.contents[j];
|
|
1230
|
+
|
|
1231
|
+
// For efficiency, it's important to avoid processing the same analysis state more
|
|
1232
|
+
// than once. To achieve this, keep the states in order of ascending position within
|
|
1233
|
+
// their hypothetical syntax trees. In each iteration of this loop, start by advancing
|
|
1234
|
+
// the states that have made the least progress. Avoid advancing states that have already
|
|
1235
|
+
// made more progress.
|
|
1236
|
+
if (analysis->next_states.size > 0) {
|
|
1237
|
+
int comparison = analysis_state__compare_position(
|
|
1238
|
+
&state,
|
|
1239
|
+
array_back(&analysis->next_states)
|
|
1240
|
+
);
|
|
1241
|
+
if (comparison == 0) {
|
|
1242
|
+
analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state);
|
|
1243
|
+
continue;
|
|
1244
|
+
} else if (comparison > 0) {
|
|
1245
|
+
#ifdef DEBUG_ANALYZE_QUERY
|
|
1246
|
+
printf("Terminate iteration at state %u\n", j);
|
|
1247
|
+
#endif
|
|
1248
|
+
while (j < analysis->states.size) {
|
|
1249
|
+
analysis_state_set__push(
|
|
1250
|
+
&analysis->next_states,
|
|
1251
|
+
&analysis->state_pool,
|
|
1252
|
+
analysis->states.contents[j]
|
|
1253
|
+
);
|
|
1254
|
+
j++;
|
|
1255
|
+
}
|
|
1256
|
+
break;
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
const TSStateId parse_state = analysis_state__top(state)->parse_state;
|
|
1261
|
+
const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol;
|
|
1262
|
+
const TSFieldId parent_field_id = analysis_state__top(state)->field_id;
|
|
1263
|
+
const unsigned child_index = analysis_state__top(state)->child_index;
|
|
1264
|
+
const QueryStep * const step = &self->steps.contents[state->step_index];
|
|
1265
|
+
|
|
1266
|
+
unsigned subgraph_index, exists;
|
|
1267
|
+
array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
|
|
1268
|
+
if (!exists) continue;
|
|
1269
|
+
const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index];
|
|
1270
|
+
|
|
1271
|
+
// Follow every possible path in the parse table, but only visit states that
|
|
1272
|
+
// are part of the subgraph for the current symbol.
|
|
1273
|
+
LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state);
|
|
1274
|
+
while (ts_lookahead_iterator__next(&lookahead_iterator)) {
|
|
1275
|
+
TSSymbol sym = lookahead_iterator.symbol;
|
|
1276
|
+
|
|
1277
|
+
AnalysisSubgraphNode successor = {
|
|
1278
|
+
.state = parse_state,
|
|
1279
|
+
.child_index = child_index,
|
|
1280
|
+
};
|
|
1281
|
+
if (lookahead_iterator.action_count) {
|
|
1282
|
+
const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1];
|
|
1283
|
+
if (action->type == TSParseActionTypeShift) {
|
|
1284
|
+
if (!action->shift.extra) {
|
|
1285
|
+
successor.state = action->shift.state;
|
|
1286
|
+
successor.child_index++;
|
|
1287
|
+
}
|
|
1288
|
+
} else {
|
|
1289
|
+
continue;
|
|
1290
|
+
}
|
|
1291
|
+
} else if (lookahead_iterator.next_state != 0) {
|
|
1292
|
+
successor.state = lookahead_iterator.next_state;
|
|
1293
|
+
successor.child_index++;
|
|
1294
|
+
} else {
|
|
1295
|
+
continue;
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
unsigned node_index;
|
|
1299
|
+
array_search_sorted_with(
|
|
1300
|
+
&subgraph->nodes,
|
|
1301
|
+
analysis_subgraph_node__compare, &successor,
|
|
1302
|
+
&node_index, &exists
|
|
1303
|
+
);
|
|
1304
|
+
while (node_index < subgraph->nodes.size) {
|
|
1305
|
+
AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++];
|
|
1306
|
+
if (node->state != successor.state || node->child_index != successor.child_index) break;
|
|
1307
|
+
|
|
1308
|
+
// Use the subgraph to determine what alias and field will eventually be applied
|
|
1309
|
+
// to this child node.
|
|
1310
|
+
TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
|
|
1311
|
+
TSSymbol visible_symbol = alias
|
|
1312
|
+
? alias
|
|
1313
|
+
: self->language->symbol_metadata[sym].visible
|
|
1314
|
+
? self->language->public_symbol_map[sym]
|
|
1315
|
+
: 0;
|
|
1316
|
+
TSFieldId field_id = parent_field_id;
|
|
1317
|
+
if (!field_id) {
|
|
1318
|
+
const TSFieldMapEntry *field_map, *field_map_end;
|
|
1319
|
+
ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
|
|
1320
|
+
for (; field_map != field_map_end; field_map++) {
|
|
1321
|
+
if (!field_map->inherited && field_map->child_index == child_index) {
|
|
1322
|
+
field_id = field_map->field_id;
|
|
1323
|
+
break;
|
|
1324
|
+
}
|
|
1325
|
+
}
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
// Create a new state that has advanced past this hypothetical subtree.
|
|
1329
|
+
AnalysisState next_state = *state;
|
|
1330
|
+
AnalysisStateEntry *next_state_top = analysis_state__top(&next_state);
|
|
1331
|
+
next_state_top->child_index = successor.child_index;
|
|
1332
|
+
next_state_top->parse_state = successor.state;
|
|
1333
|
+
if (node->done) next_state_top->done = true;
|
|
1334
|
+
|
|
1335
|
+
// Determine if this hypothetical child node would match the current step
|
|
1336
|
+
// of the query pattern.
|
|
1337
|
+
bool does_match = false;
|
|
1338
|
+
if (visible_symbol) {
|
|
1339
|
+
does_match = true;
|
|
1340
|
+
if (step->symbol == WILDCARD_SYMBOL) {
|
|
1341
|
+
if (
|
|
1342
|
+
step->is_named &&
|
|
1343
|
+
!self->language->symbol_metadata[visible_symbol].named
|
|
1344
|
+
) does_match = false;
|
|
1345
|
+
} else if (step->symbol != visible_symbol) {
|
|
1346
|
+
does_match = false;
|
|
1347
|
+
}
|
|
1348
|
+
if (step->field && step->field != field_id) {
|
|
1349
|
+
does_match = false;
|
|
1350
|
+
}
|
|
1351
|
+
if (
|
|
1352
|
+
step->supertype_symbol &&
|
|
1353
|
+
!analysis_state__has_supertype(state, step->supertype_symbol)
|
|
1354
|
+
) does_match = false;
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
// If this child is hidden, then descend into it and walk through its children.
|
|
1358
|
+
// If the top entry of the stack is at the end of its rule, then that entry can
|
|
1359
|
+
// be replaced. Otherwise, push a new entry onto the stack.
|
|
1360
|
+
else if (sym >= self->language->token_count) {
|
|
1361
|
+
if (!next_state_top->done) {
|
|
1362
|
+
if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) {
|
|
1363
|
+
#ifdef DEBUG_ANALYZE_QUERY
|
|
1364
|
+
printf("Exceeded depth limit for state %u\n", j);
|
|
1365
|
+
#endif
|
|
1366
|
+
|
|
1367
|
+
analysis->did_abort = true;
|
|
1368
|
+
continue;
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
next_state.depth++;
|
|
1372
|
+
next_state_top = analysis_state__top(&next_state);
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
*next_state_top = (AnalysisStateEntry) {
|
|
1376
|
+
.parse_state = parse_state,
|
|
1377
|
+
.parent_symbol = sym,
|
|
1378
|
+
.child_index = 0,
|
|
1379
|
+
.field_id = field_id,
|
|
1380
|
+
.done = false,
|
|
1381
|
+
};
|
|
1382
|
+
|
|
1383
|
+
if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) {
|
|
1384
|
+
analysis_state_set__insert_sorted(
|
|
1385
|
+
&analysis->deeper_states,
|
|
1386
|
+
&analysis->state_pool,
|
|
1387
|
+
&next_state
|
|
1388
|
+
);
|
|
1389
|
+
continue;
|
|
1390
|
+
}
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
// Pop from the stack when this state reached the end of its current syntax node.
|
|
1394
|
+
while (next_state.depth > 0 && next_state_top->done) {
|
|
1395
|
+
next_state.depth--;
|
|
1396
|
+
next_state_top = analysis_state__top(&next_state);
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
// If this hypothetical child did match the current step of the query pattern,
|
|
1400
|
+
// then advance to the next step at the current depth. This involves skipping
|
|
1401
|
+
// over any descendant steps of the current child.
|
|
1402
|
+
const QueryStep *next_step = step;
|
|
1403
|
+
if (does_match) {
|
|
1404
|
+
for (;;) {
|
|
1405
|
+
next_state.step_index++;
|
|
1406
|
+
next_step = &self->steps.contents[next_state.step_index];
|
|
1407
|
+
if (
|
|
1408
|
+
next_step->depth == PATTERN_DONE_MARKER ||
|
|
1409
|
+
next_step->depth <= step->depth
|
|
1410
|
+
) break;
|
|
1411
|
+
}
|
|
1412
|
+
} else if (successor.state == parse_state) {
|
|
1413
|
+
continue;
|
|
1414
|
+
}
|
|
1415
|
+
|
|
1416
|
+
for (;;) {
|
|
1417
|
+
// Skip pass-through states. Although these states have alternatives, they are only
|
|
1418
|
+
// used to implement repetitions, and query analysis does not need to process
|
|
1419
|
+
// repetitions in order to determine whether steps are possible and definite.
|
|
1420
|
+
if (next_step->is_pass_through) {
|
|
1421
|
+
next_state.step_index++;
|
|
1422
|
+
next_step++;
|
|
1423
|
+
continue;
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
// If the pattern is finished or hypothetical parent node is complete, then
|
|
1427
|
+
// record that matching can terminate at this step of the pattern. Otherwise,
|
|
1428
|
+
// add this state to the list of states to process on the next iteration.
|
|
1429
|
+
if (!next_step->is_dead_end) {
|
|
1430
|
+
bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth;
|
|
1431
|
+
if (did_finish_pattern) {
|
|
1432
|
+
array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol);
|
|
1433
|
+
} else if (next_state.depth == 0) {
|
|
1434
|
+
array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index);
|
|
1435
|
+
} else {
|
|
1436
|
+
analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state);
|
|
1437
|
+
}
|
|
1438
|
+
}
|
|
1439
|
+
|
|
1440
|
+
// If the state has advanced to a step with an alternative step, then add another state
|
|
1441
|
+
// at that alternative step. This process is simpler than the process of actually matching a
|
|
1442
|
+
// pattern during query execution, because for the purposes of query analysis, there is no
|
|
1443
|
+
// need to process repetitions.
|
|
1444
|
+
if (
|
|
1445
|
+
does_match &&
|
|
1446
|
+
next_step->alternative_index != NONE &&
|
|
1447
|
+
next_step->alternative_index > next_state.step_index
|
|
1448
|
+
) {
|
|
1449
|
+
next_state.step_index = next_step->alternative_index;
|
|
1450
|
+
next_step = &self->steps.contents[next_state.step_index];
|
|
1451
|
+
} else {
|
|
1452
|
+
break;
|
|
1453
|
+
}
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
AnalysisStateSet _states = analysis->states;
|
|
1460
|
+
analysis->states = analysis->next_states;
|
|
1461
|
+
analysis->next_states = _states;
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
|
|
1465
|
+
static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
|
|
1466
|
+
Array(uint16_t) non_rooted_pattern_start_steps = array_new();
|
|
1467
|
+
for (unsigned i = 0; i < self->pattern_map.size; i++) {
|
|
1468
|
+
PatternEntry *pattern = &self->pattern_map.contents[i];
|
|
1469
|
+
if (!pattern->is_rooted) {
|
|
1470
|
+
QueryStep *step = &self->steps.contents[pattern->step_index];
|
|
1471
|
+
if (step->symbol != WILDCARD_SYMBOL) {
|
|
1472
|
+
array_push(&non_rooted_pattern_start_steps, i);
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
// Walk forward through all of the steps in the query, computing some
|
|
1478
|
+
// basic information about each step. Mark all of the steps that contain
|
|
1479
|
+
// captures, and record the indices of all of the steps that have child steps.
|
|
1480
|
+
Array(uint32_t) parent_step_indices = array_new();
|
|
1481
|
+
for (unsigned i = 0; i < self->steps.size; i++) {
|
|
1482
|
+
QueryStep *step = &self->steps.contents[i];
|
|
1483
|
+
if (step->depth == PATTERN_DONE_MARKER) {
|
|
1484
|
+
step->parent_pattern_guaranteed = true;
|
|
1485
|
+
step->root_pattern_guaranteed = true;
|
|
1486
|
+
continue;
|
|
1487
|
+
}
|
|
1488
|
+
|
|
1489
|
+
bool has_children = false;
|
|
1490
|
+
bool is_wildcard = step->symbol == WILDCARD_SYMBOL;
|
|
1491
|
+
step->contains_captures = step->capture_ids[0] != NONE;
|
|
1492
|
+
for (unsigned j = i + 1; j < self->steps.size; j++) {
|
|
1493
|
+
QueryStep *next_step = &self->steps.contents[j];
|
|
1494
|
+
if (
|
|
1495
|
+
next_step->depth == PATTERN_DONE_MARKER ||
|
|
1496
|
+
next_step->depth <= step->depth
|
|
1497
|
+
) break;
|
|
1498
|
+
if (next_step->capture_ids[0] != NONE) {
|
|
1499
|
+
step->contains_captures = true;
|
|
1500
|
+
}
|
|
1501
|
+
if (!is_wildcard) {
|
|
1502
|
+
next_step->root_pattern_guaranteed = true;
|
|
1503
|
+
next_step->parent_pattern_guaranteed = true;
|
|
1504
|
+
}
|
|
1505
|
+
has_children = true;
|
|
1506
|
+
}
|
|
1507
|
+
|
|
1508
|
+
if (has_children && !is_wildcard) {
|
|
1509
|
+
array_push(&parent_step_indices, i);
|
|
1510
|
+
}
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
// For every parent symbol in the query, initialize an 'analysis subgraph'.
|
|
1514
|
+
// This subgraph lists all of the states in the parse table that are directly
|
|
1515
|
+
// involved in building subtrees for this symbol.
|
|
1516
|
+
//
|
|
1517
|
+
// In addition to the parent symbols in the query, construct subgraphs for all
|
|
1518
|
+
// of the hidden symbols in the grammar, because these might occur within
|
|
1519
|
+
// one of the parent nodes, such that their children appear to belong to the
|
|
1520
|
+
// parent.
|
|
1521
|
+
AnalysisSubgraphArray subgraphs = array_new();
|
|
1522
|
+
for (unsigned i = 0; i < parent_step_indices.size; i++) {
|
|
1523
|
+
uint32_t parent_step_index = parent_step_indices.contents[i];
|
|
1524
|
+
TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
|
|
1525
|
+
AnalysisSubgraph subgraph = { .symbol = parent_symbol };
|
|
1526
|
+
array_insert_sorted_by(&subgraphs, .symbol, subgraph);
|
|
1527
|
+
}
|
|
1528
|
+
for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) {
|
|
1529
|
+
if (!ts_language_symbol_metadata(self->language, sym).visible) {
|
|
1530
|
+
AnalysisSubgraph subgraph = { .symbol = sym };
|
|
1531
|
+
array_insert_sorted_by(&subgraphs, .symbol, subgraph);
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1535
|
+
// Scan the parse table to find the data needed to populate these subgraphs.
|
|
1536
|
+
// Collect three things during this scan:
|
|
1537
|
+
// 1) All of the parse states where one of these symbols can start.
|
|
1538
|
+
// 2) All of the parse states where one of these symbols can end, along
|
|
1539
|
+
// with information about the node that would be created.
|
|
1540
|
+
// 3) A list of predecessor states for each state.
|
|
1541
|
+
StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
|
|
1542
|
+
for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) {
|
|
1543
|
+
unsigned subgraph_index, exists;
|
|
1544
|
+
LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state);
|
|
1545
|
+
while (ts_lookahead_iterator__next(&lookahead_iterator)) {
|
|
1546
|
+
if (lookahead_iterator.action_count) {
|
|
1547
|
+
for (unsigned i = 0; i < lookahead_iterator.action_count; i++) {
|
|
1548
|
+
const TSParseAction *action = &lookahead_iterator.actions[i];
|
|
1549
|
+
if (action->type == TSParseActionTypeReduce) {
|
|
1550
|
+
const TSSymbol *aliases, *aliases_end;
|
|
1551
|
+
ts_language_aliases_for_symbol(
|
|
1552
|
+
self->language,
|
|
1553
|
+
action->reduce.symbol,
|
|
1554
|
+
&aliases,
|
|
1555
|
+
&aliases_end
|
|
1556
|
+
);
|
|
1557
|
+
for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
|
|
1558
|
+
array_search_sorted_by(
|
|
1559
|
+
&subgraphs,
|
|
1560
|
+
.symbol,
|
|
1561
|
+
*symbol,
|
|
1562
|
+
&subgraph_index,
|
|
1563
|
+
&exists
|
|
1564
|
+
);
|
|
1565
|
+
if (exists) {
|
|
1566
|
+
AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
|
1567
|
+
if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
|
|
1568
|
+
array_push(&subgraph->nodes, ((AnalysisSubgraphNode) {
|
|
1569
|
+
.state = state,
|
|
1570
|
+
.production_id = action->reduce.production_id,
|
|
1571
|
+
.child_index = action->reduce.child_count,
|
|
1572
|
+
.done = true,
|
|
1573
|
+
}));
|
|
1574
|
+
}
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
} else if (action->type == TSParseActionTypeShift && !action->shift.extra) {
|
|
1578
|
+
TSStateId next_state = action->shift.state;
|
|
1579
|
+
state_predecessor_map_add(&predecessor_map, next_state, state);
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
} else if (lookahead_iterator.next_state != 0) {
|
|
1583
|
+
if (lookahead_iterator.next_state != state) {
|
|
1584
|
+
state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
|
|
1585
|
+
}
|
|
1586
|
+
if (ts_language_state_is_primary(self->language, state)) {
|
|
1587
|
+
const TSSymbol *aliases, *aliases_end;
|
|
1588
|
+
ts_language_aliases_for_symbol(
|
|
1589
|
+
self->language,
|
|
1590
|
+
lookahead_iterator.symbol,
|
|
1591
|
+
&aliases,
|
|
1592
|
+
&aliases_end
|
|
1593
|
+
);
|
|
1594
|
+
for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
|
|
1595
|
+
array_search_sorted_by(
|
|
1596
|
+
&subgraphs,
|
|
1597
|
+
.symbol,
|
|
1598
|
+
*symbol,
|
|
1599
|
+
&subgraph_index,
|
|
1600
|
+
&exists
|
|
1601
|
+
);
|
|
1602
|
+
if (exists) {
|
|
1603
|
+
AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
|
1604
|
+
if (
|
|
1605
|
+
subgraph->start_states.size == 0 ||
|
|
1606
|
+
*array_back(&subgraph->start_states) != state
|
|
1607
|
+
)
|
|
1608
|
+
array_push(&subgraph->start_states, state);
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
}
|
|
1613
|
+
}
|
|
1614
|
+
}
|
|
1615
|
+
|
|
1616
|
+
// For each subgraph, compute the preceding states by walking backward
|
|
1617
|
+
// from the end states using the predecessor map.
|
|
1618
|
+
Array(AnalysisSubgraphNode) next_nodes = array_new();
|
|
1619
|
+
for (unsigned i = 0; i < subgraphs.size; i++) {
|
|
1620
|
+
AnalysisSubgraph *subgraph = &subgraphs.contents[i];
|
|
1621
|
+
if (subgraph->nodes.size == 0) {
|
|
1622
|
+
array_delete(&subgraph->start_states);
|
|
1623
|
+
array_erase(&subgraphs, i);
|
|
1624
|
+
i--;
|
|
1625
|
+
continue;
|
|
1626
|
+
}
|
|
1627
|
+
array_assign(&next_nodes, &subgraph->nodes);
|
|
1628
|
+
while (next_nodes.size > 0) {
|
|
1629
|
+
AnalysisSubgraphNode node = array_pop(&next_nodes);
|
|
1630
|
+
if (node.child_index > 1) {
|
|
1631
|
+
unsigned predecessor_count;
|
|
1632
|
+
const TSStateId *predecessors = state_predecessor_map_get(
|
|
1633
|
+
&predecessor_map,
|
|
1634
|
+
node.state,
|
|
1635
|
+
&predecessor_count
|
|
1636
|
+
);
|
|
1637
|
+
for (unsigned j = 0; j < predecessor_count; j++) {
|
|
1638
|
+
AnalysisSubgraphNode predecessor_node = {
|
|
1639
|
+
.state = predecessors[j],
|
|
1640
|
+
.child_index = node.child_index - 1,
|
|
1641
|
+
.production_id = node.production_id,
|
|
1642
|
+
.done = false,
|
|
1643
|
+
};
|
|
1644
|
+
unsigned index, exists;
|
|
1645
|
+
array_search_sorted_with(
|
|
1646
|
+
&subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node,
|
|
1647
|
+
&index, &exists
|
|
1648
|
+
);
|
|
1649
|
+
if (!exists) {
|
|
1650
|
+
array_insert(&subgraph->nodes, index, predecessor_node);
|
|
1651
|
+
array_push(&next_nodes, predecessor_node);
|
|
1652
|
+
}
|
|
1653
|
+
}
|
|
1654
|
+
}
|
|
1655
|
+
}
|
|
1656
|
+
}
|
|
1657
|
+
|
|
1658
|
+
#ifdef DEBUG_ANALYZE_QUERY
|
|
1659
|
+
printf("\nSubgraphs:\n");
|
|
1660
|
+
for (unsigned i = 0; i < subgraphs.size; i++) {
|
|
1661
|
+
AnalysisSubgraph *subgraph = &subgraphs.contents[i];
|
|
1662
|
+
printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
|
|
1663
|
+
for (unsigned j = 0; j < subgraph->start_states.size; j++) {
|
|
1664
|
+
printf(
|
|
1665
|
+
" {state: %u}\n",
|
|
1666
|
+
subgraph->start_states.contents[j]
|
|
1667
|
+
);
|
|
1668
|
+
}
|
|
1669
|
+
for (unsigned j = 0; j < subgraph->nodes.size; j++) {
|
|
1670
|
+
AnalysisSubgraphNode *node = &subgraph->nodes.contents[j];
|
|
1671
|
+
printf(
|
|
1672
|
+
" {state: %u, child_index: %u, production_id: %u, done: %d}\n",
|
|
1673
|
+
node->state, node->child_index, node->production_id, node->done
|
|
1674
|
+
);
|
|
1675
|
+
}
|
|
1676
|
+
printf("\n");
|
|
1677
|
+
}
|
|
1678
|
+
#endif
|
|
1679
|
+
|
|
1680
|
+
// For each non-terminal pattern, determine if the pattern can successfully match,
|
|
1681
|
+
// and identify all of the possible children within the pattern where matching could fail.
|
|
1682
|
+
bool all_patterns_are_valid = true;
|
|
1683
|
+
QueryAnalysis analysis = query_analysis__new();
|
|
1684
|
+
for (unsigned i = 0; i < parent_step_indices.size; i++) {
|
|
1685
|
+
uint16_t parent_step_index = parent_step_indices.contents[i];
|
|
1686
|
+
uint16_t parent_depth = self->steps.contents[parent_step_index].depth;
|
|
1687
|
+
TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
|
|
1688
|
+
if (parent_symbol == ts_builtin_sym_error) continue;
|
|
1689
|
+
|
|
1690
|
+
// Find the subgraph that corresponds to this pattern's root symbol. If the pattern's
|
|
1691
|
+
// root symbol is a terminal, then return an error.
|
|
1692
|
+
unsigned subgraph_index, exists;
|
|
1693
|
+
array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
|
|
1694
|
+
if (!exists) {
|
|
1695
|
+
unsigned first_child_step_index = parent_step_index + 1;
|
|
1696
|
+
uint32_t j, child_exists;
|
|
1697
|
+
array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists);
|
|
1698
|
+
ts_assert(child_exists);
|
|
1699
|
+
*error_offset = self->step_offsets.contents[j].byte_offset;
|
|
1700
|
+
all_patterns_are_valid = false;
|
|
1701
|
+
break;
|
|
1702
|
+
}
|
|
1703
|
+
|
|
1704
|
+
// Initialize an analysis state at every parse state in the table where
|
|
1705
|
+
// this parent symbol can occur.
|
|
1706
|
+
AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
|
|
1707
|
+
analysis_state_set__clear(&analysis.states, &analysis.state_pool);
|
|
1708
|
+
analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
|
|
1709
|
+
for (unsigned j = 0; j < subgraph->start_states.size; j++) {
|
|
1710
|
+
TSStateId parse_state = subgraph->start_states.contents[j];
|
|
1711
|
+
analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
|
|
1712
|
+
.step_index = parent_step_index + 1,
|
|
1713
|
+
.stack = {
|
|
1714
|
+
[0] = {
|
|
1715
|
+
.parse_state = parse_state,
|
|
1716
|
+
.parent_symbol = parent_symbol,
|
|
1717
|
+
.child_index = 0,
|
|
1718
|
+
.field_id = 0,
|
|
1719
|
+
.done = false,
|
|
1720
|
+
},
|
|
1721
|
+
},
|
|
1722
|
+
.depth = 1,
|
|
1723
|
+
.root_symbol = parent_symbol,
|
|
1724
|
+
}));
|
|
1725
|
+
}
|
|
1726
|
+
|
|
1727
|
+
#ifdef DEBUG_ANALYZE_QUERY
|
|
1728
|
+
printf(
|
|
1729
|
+
"\nWalk states for %s:\n",
|
|
1730
|
+
ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol)
|
|
1731
|
+
);
|
|
1732
|
+
#endif
|
|
1733
|
+
|
|
1734
|
+
analysis.did_abort = false;
|
|
1735
|
+
ts_query__perform_analysis(self, &subgraphs, &analysis);
|
|
1736
|
+
|
|
1737
|
+
// If this pattern could not be fully analyzed, then every step should
|
|
1738
|
+
// be considered fallible.
|
|
1739
|
+
if (analysis.did_abort) {
|
|
1740
|
+
for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
|
|
1741
|
+
QueryStep *step = &self->steps.contents[j];
|
|
1742
|
+
if (
|
|
1743
|
+
step->depth <= parent_depth ||
|
|
1744
|
+
step->depth == PATTERN_DONE_MARKER
|
|
1745
|
+
) break;
|
|
1746
|
+
if (!step->is_dead_end) {
|
|
1747
|
+
step->parent_pattern_guaranteed = false;
|
|
1748
|
+
step->root_pattern_guaranteed = false;
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
1751
|
+
continue;
|
|
1752
|
+
}
|
|
1753
|
+
|
|
1754
|
+
// If this pattern cannot match, store the pattern index so that it can be
|
|
1755
|
+
// returned to the caller.
|
|
1756
|
+
if (analysis.finished_parent_symbols.size == 0) {
|
|
1757
|
+
ts_assert(analysis.final_step_indices.size > 0);
|
|
1758
|
+
uint16_t impossible_step_index = *array_back(&analysis.final_step_indices);
|
|
1759
|
+
uint32_t j, impossible_exists;
|
|
1760
|
+
array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists);
|
|
1761
|
+
if (j >= self->step_offsets.size) j = self->step_offsets.size - 1;
|
|
1762
|
+
*error_offset = self->step_offsets.contents[j].byte_offset;
|
|
1763
|
+
all_patterns_are_valid = false;
|
|
1764
|
+
break;
|
|
1765
|
+
}
|
|
1766
|
+
|
|
1767
|
+
// Mark as fallible any step where a match terminated.
|
|
1768
|
+
// Later, this property will be propagated to all of the step's predecessors.
|
|
1769
|
+
for (unsigned j = 0; j < analysis.final_step_indices.size; j++) {
|
|
1770
|
+
uint32_t final_step_index = analysis.final_step_indices.contents[j];
|
|
1771
|
+
QueryStep *step = &self->steps.contents[final_step_index];
|
|
1772
|
+
if (
|
|
1773
|
+
step->depth != PATTERN_DONE_MARKER &&
|
|
1774
|
+
step->depth > parent_depth &&
|
|
1775
|
+
!step->is_dead_end
|
|
1776
|
+
) {
|
|
1777
|
+
step->parent_pattern_guaranteed = false;
|
|
1778
|
+
step->root_pattern_guaranteed = false;
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
}
|
|
1782
|
+
|
|
1783
|
+
// Mark as indefinite any step with captures that are used in predicates.
|
|
1784
|
+
Array(uint16_t) predicate_capture_ids = array_new();
|
|
1785
|
+
for (unsigned i = 0; i < self->patterns.size; i++) {
|
|
1786
|
+
QueryPattern *pattern = &self->patterns.contents[i];
|
|
1787
|
+
|
|
1788
|
+
// Gather all of the captures that are used in predicates for this pattern.
|
|
1789
|
+
array_clear(&predicate_capture_ids);
|
|
1790
|
+
for (
|
|
1791
|
+
unsigned start = pattern->predicate_steps.offset,
|
|
1792
|
+
end = start + pattern->predicate_steps.length,
|
|
1793
|
+
j = start; j < end; j++
|
|
1794
|
+
) {
|
|
1795
|
+
TSQueryPredicateStep *step = &self->predicate_steps.contents[j];
|
|
1796
|
+
if (step->type == TSQueryPredicateStepTypeCapture) {
|
|
1797
|
+
uint16_t value_id = step->value_id;
|
|
1798
|
+
array_insert_sorted_by(&predicate_capture_ids, , value_id);
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
|
|
1802
|
+
// Find all of the steps that have these captures.
|
|
1803
|
+
for (
|
|
1804
|
+
unsigned start = pattern->steps.offset,
|
|
1805
|
+
end = start + pattern->steps.length,
|
|
1806
|
+
j = start; j < end; j++
|
|
1807
|
+
) {
|
|
1808
|
+
QueryStep *step = &self->steps.contents[j];
|
|
1809
|
+
for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
|
|
1810
|
+
uint16_t capture_id = step->capture_ids[k];
|
|
1811
|
+
if (capture_id == NONE) break;
|
|
1812
|
+
unsigned index, exists;
|
|
1813
|
+
array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists);
|
|
1814
|
+
if (exists) {
|
|
1815
|
+
step->root_pattern_guaranteed = false;
|
|
1816
|
+
break;
|
|
1817
|
+
}
|
|
1818
|
+
}
|
|
1819
|
+
}
|
|
1820
|
+
}
|
|
1821
|
+
|
|
1822
|
+
// Propagate fallibility. If a pattern is fallible at a given step, then it is
|
|
1823
|
+
// fallible at all of its preceding steps.
|
|
1824
|
+
bool done = self->steps.size == 0;
|
|
1825
|
+
while (!done) {
|
|
1826
|
+
done = true;
|
|
1827
|
+
for (unsigned i = self->steps.size - 1; i > 0; i--) {
|
|
1828
|
+
QueryStep *step = &self->steps.contents[i];
|
|
1829
|
+
if (step->depth == PATTERN_DONE_MARKER) continue;
|
|
1830
|
+
|
|
1831
|
+
// Determine if this step is definite or has definite alternatives.
|
|
1832
|
+
bool parent_pattern_guaranteed = false;
|
|
1833
|
+
for (;;) {
|
|
1834
|
+
if (step->root_pattern_guaranteed) {
|
|
1835
|
+
parent_pattern_guaranteed = true;
|
|
1836
|
+
break;
|
|
1837
|
+
}
|
|
1838
|
+
if (step->alternative_index == NONE || step->alternative_index < i) {
|
|
1839
|
+
break;
|
|
1840
|
+
}
|
|
1841
|
+
step = &self->steps.contents[step->alternative_index];
|
|
1842
|
+
}
|
|
1843
|
+
|
|
1844
|
+
// If not, mark its predecessor as indefinite.
|
|
1845
|
+
if (!parent_pattern_guaranteed) {
|
|
1846
|
+
QueryStep *prev_step = &self->steps.contents[i - 1];
|
|
1847
|
+
if (
|
|
1848
|
+
!prev_step->is_dead_end &&
|
|
1849
|
+
prev_step->depth != PATTERN_DONE_MARKER &&
|
|
1850
|
+
prev_step->root_pattern_guaranteed
|
|
1851
|
+
) {
|
|
1852
|
+
prev_step->root_pattern_guaranteed = false;
|
|
1853
|
+
done = false;
|
|
1854
|
+
}
|
|
1855
|
+
}
|
|
1856
|
+
}
|
|
1857
|
+
}
|
|
1858
|
+
|
|
1859
|
+
#ifdef DEBUG_ANALYZE_QUERY
|
|
1860
|
+
printf("Steps:\n");
|
|
1861
|
+
for (unsigned i = 0; i < self->steps.size; i++) {
|
|
1862
|
+
QueryStep *step = &self->steps.contents[i];
|
|
1863
|
+
if (step->depth == PATTERN_DONE_MARKER) {
|
|
1864
|
+
printf(" %u: DONE\n", i);
|
|
1865
|
+
} else {
|
|
1866
|
+
printf(
|
|
1867
|
+
" %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n",
|
|
1868
|
+
i,
|
|
1869
|
+
(step->symbol == WILDCARD_SYMBOL)
|
|
1870
|
+
? "ANY"
|
|
1871
|
+
: ts_language_symbol_name(self->language, step->symbol),
|
|
1872
|
+
(step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
|
|
1873
|
+
step->depth,
|
|
1874
|
+
step->parent_pattern_guaranteed,
|
|
1875
|
+
step->root_pattern_guaranteed
|
|
1876
|
+
);
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1879
|
+
#endif
|
|
1880
|
+
|
|
1881
|
+
// Determine which repetition symbols in this language have the possibility
|
|
1882
|
+
// of matching non-rooted patterns in this query. These repetition symbols
|
|
1883
|
+
// prevent certain optimizations with range restrictions.
|
|
1884
|
+
analysis.did_abort = false;
|
|
1885
|
+
for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) {
|
|
1886
|
+
uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i];
|
|
1887
|
+
PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index];
|
|
1888
|
+
|
|
1889
|
+
analysis_state_set__clear(&analysis.states, &analysis.state_pool);
|
|
1890
|
+
analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
|
|
1891
|
+
for (unsigned j = 0; j < subgraphs.size; j++) {
|
|
1892
|
+
AnalysisSubgraph *subgraph = &subgraphs.contents[j];
|
|
1893
|
+
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol);
|
|
1894
|
+
if (metadata.visible || metadata.named) continue;
|
|
1895
|
+
|
|
1896
|
+
for (uint32_t k = 0; k < subgraph->start_states.size; k++) {
|
|
1897
|
+
TSStateId parse_state = subgraph->start_states.contents[k];
|
|
1898
|
+
analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
|
|
1899
|
+
.step_index = pattern_entry->step_index,
|
|
1900
|
+
.stack = {
|
|
1901
|
+
[0] = {
|
|
1902
|
+
.parse_state = parse_state,
|
|
1903
|
+
.parent_symbol = subgraph->symbol,
|
|
1904
|
+
.child_index = 0,
|
|
1905
|
+
.field_id = 0,
|
|
1906
|
+
.done = false,
|
|
1907
|
+
},
|
|
1908
|
+
},
|
|
1909
|
+
.root_symbol = subgraph->symbol,
|
|
1910
|
+
.depth = 1,
|
|
1911
|
+
}));
|
|
1912
|
+
}
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1915
|
+
#ifdef DEBUG_ANALYZE_QUERY
|
|
1916
|
+
printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index);
|
|
1917
|
+
#endif
|
|
1918
|
+
|
|
1919
|
+
ts_query__perform_analysis(
|
|
1920
|
+
self,
|
|
1921
|
+
&subgraphs,
|
|
1922
|
+
&analysis
|
|
1923
|
+
);
|
|
1924
|
+
|
|
1925
|
+
if (analysis.finished_parent_symbols.size > 0) {
|
|
1926
|
+
self->patterns.contents[pattern_entry->pattern_index].is_non_local = true;
|
|
1927
|
+
}
|
|
1928
|
+
|
|
1929
|
+
for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) {
|
|
1930
|
+
TSSymbol symbol = analysis.finished_parent_symbols.contents[k];
|
|
1931
|
+
array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol);
|
|
1932
|
+
}
|
|
1933
|
+
}
|
|
1934
|
+
|
|
1935
|
+
#ifdef DEBUG_ANALYZE_QUERY
|
|
1936
|
+
if (self->repeat_symbols_with_rootless_patterns.size > 0) {
|
|
1937
|
+
printf("\nRepetition symbols with rootless patterns:\n");
|
|
1938
|
+
printf("aborted analysis: %d\n", analysis.did_abort);
|
|
1939
|
+
for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) {
|
|
1940
|
+
TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i];
|
|
1941
|
+
printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol));
|
|
1942
|
+
}
|
|
1943
|
+
printf("\n");
|
|
1944
|
+
}
|
|
1945
|
+
#endif
|
|
1946
|
+
|
|
1947
|
+
// Cleanup
|
|
1948
|
+
for (unsigned i = 0; i < subgraphs.size; i++) {
|
|
1949
|
+
array_delete(&subgraphs.contents[i].start_states);
|
|
1950
|
+
array_delete(&subgraphs.contents[i].nodes);
|
|
1951
|
+
}
|
|
1952
|
+
array_delete(&subgraphs);
|
|
1953
|
+
query_analysis__delete(&analysis);
|
|
1954
|
+
array_delete(&next_nodes);
|
|
1955
|
+
array_delete(&non_rooted_pattern_start_steps);
|
|
1956
|
+
array_delete(&parent_step_indices);
|
|
1957
|
+
array_delete(&predicate_capture_ids);
|
|
1958
|
+
state_predecessor_map_delete(&predecessor_map);
|
|
1959
|
+
|
|
1960
|
+
return all_patterns_are_valid;
|
|
1961
|
+
}
|
|
1962
|
+
|
|
1963
|
+
static void ts_query__add_negated_fields(
|
|
1964
|
+
TSQuery *self,
|
|
1965
|
+
uint16_t step_index,
|
|
1966
|
+
TSFieldId *field_ids,
|
|
1967
|
+
uint16_t field_count
|
|
1968
|
+
) {
|
|
1969
|
+
QueryStep *step = &self->steps.contents[step_index];
|
|
1970
|
+
|
|
1971
|
+
// The negated field array stores a list of field lists, separated by zeros.
|
|
1972
|
+
// Try to find the start index of an existing list that matches this new list.
|
|
1973
|
+
bool failed_match = false;
|
|
1974
|
+
unsigned match_count = 0;
|
|
1975
|
+
unsigned start_i = 0;
|
|
1976
|
+
for (unsigned i = 0; i < self->negated_fields.size; i++) {
|
|
1977
|
+
TSFieldId existing_field_id = self->negated_fields.contents[i];
|
|
1978
|
+
|
|
1979
|
+
// At each zero value, terminate the match attempt. If we've exactly
|
|
1980
|
+
// matched the new field list, then reuse this index. Otherwise,
|
|
1981
|
+
// start over the matching process.
|
|
1982
|
+
if (existing_field_id == 0) {
|
|
1983
|
+
if (match_count == field_count) {
|
|
1984
|
+
step->negated_field_list_id = start_i;
|
|
1985
|
+
return;
|
|
1986
|
+
} else {
|
|
1987
|
+
start_i = i + 1;
|
|
1988
|
+
match_count = 0;
|
|
1989
|
+
failed_match = false;
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
|
|
1993
|
+
// If the existing list matches our new list so far, then advance
|
|
1994
|
+
// to the next element of the new list.
|
|
1995
|
+
else if (
|
|
1996
|
+
match_count < field_count &&
|
|
1997
|
+
existing_field_id == field_ids[match_count] &&
|
|
1998
|
+
!failed_match
|
|
1999
|
+
) {
|
|
2000
|
+
match_count++;
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
// Otherwise, this existing list has failed to match.
|
|
2004
|
+
else {
|
|
2005
|
+
match_count = 0;
|
|
2006
|
+
failed_match = true;
|
|
2007
|
+
}
|
|
2008
|
+
}
|
|
2009
|
+
|
|
2010
|
+
step->negated_field_list_id = self->negated_fields.size;
|
|
2011
|
+
array_extend(&self->negated_fields, field_count, field_ids);
|
|
2012
|
+
array_push(&self->negated_fields, 0);
|
|
2013
|
+
}
|
|
2014
|
+
|
|
2015
|
+
static TSQueryError ts_query__parse_string_literal(
|
|
2016
|
+
TSQuery *self,
|
|
2017
|
+
Stream *stream
|
|
2018
|
+
) {
|
|
2019
|
+
const char *string_start = stream->input;
|
|
2020
|
+
if (stream->next != '"') return TSQueryErrorSyntax;
|
|
2021
|
+
stream_advance(stream);
|
|
2022
|
+
const char *prev_position = stream->input;
|
|
2023
|
+
|
|
2024
|
+
bool is_escaped = false;
|
|
2025
|
+
array_clear(&self->string_buffer);
|
|
2026
|
+
for (;;) {
|
|
2027
|
+
if (is_escaped) {
|
|
2028
|
+
is_escaped = false;
|
|
2029
|
+
switch (stream->next) {
|
|
2030
|
+
case 'n':
|
|
2031
|
+
array_push(&self->string_buffer, '\n');
|
|
2032
|
+
break;
|
|
2033
|
+
case 'r':
|
|
2034
|
+
array_push(&self->string_buffer, '\r');
|
|
2035
|
+
break;
|
|
2036
|
+
case 't':
|
|
2037
|
+
array_push(&self->string_buffer, '\t');
|
|
2038
|
+
break;
|
|
2039
|
+
case '0':
|
|
2040
|
+
array_push(&self->string_buffer, '\0');
|
|
2041
|
+
break;
|
|
2042
|
+
default:
|
|
2043
|
+
array_extend(&self->string_buffer, stream->next_size, stream->input);
|
|
2044
|
+
break;
|
|
2045
|
+
}
|
|
2046
|
+
prev_position = stream->input + stream->next_size;
|
|
2047
|
+
} else {
|
|
2048
|
+
if (stream->next == '\\') {
|
|
2049
|
+
array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
|
|
2050
|
+
prev_position = stream->input + 1;
|
|
2051
|
+
is_escaped = true;
|
|
2052
|
+
} else if (stream->next == '"') {
|
|
2053
|
+
array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
|
|
2054
|
+
stream_advance(stream);
|
|
2055
|
+
return TSQueryErrorNone;
|
|
2056
|
+
} else if (stream->next == '\n') {
|
|
2057
|
+
stream_reset(stream, string_start);
|
|
2058
|
+
return TSQueryErrorSyntax;
|
|
2059
|
+
}
|
|
2060
|
+
}
|
|
2061
|
+
if (!stream_advance(stream)) {
|
|
2062
|
+
stream_reset(stream, string_start);
|
|
2063
|
+
return TSQueryErrorSyntax;
|
|
2064
|
+
}
|
|
2065
|
+
}
|
|
2066
|
+
}
|
|
2067
|
+
|
|
2068
|
+
// Parse a single predicate associated with a pattern, adding it to the
|
|
2069
|
+
// query's internal `predicate_steps` array. Predicates are arbitrary
|
|
2070
|
+
// S-expressions associated with a pattern which are meant to be handled at
|
|
2071
|
+
// a higher level of abstraction, such as the Rust/JavaScript bindings. They
|
|
2072
|
+
// can contain '@'-prefixed capture names, double-quoted strings, and bare
|
|
2073
|
+
// symbols, which also represent strings.
|
|
2074
|
+
static TSQueryError ts_query__parse_predicate(
|
|
2075
|
+
TSQuery *self,
|
|
2076
|
+
Stream *stream
|
|
2077
|
+
) {
|
|
2078
|
+
if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
|
|
2079
|
+
const char *predicate_name = stream->input;
|
|
2080
|
+
stream_scan_identifier(stream);
|
|
2081
|
+
uint32_t length = (uint32_t)(stream->input - predicate_name);
|
|
2082
|
+
uint16_t id = symbol_table_insert_name(
|
|
2083
|
+
&self->predicate_values,
|
|
2084
|
+
predicate_name,
|
|
2085
|
+
length
|
|
2086
|
+
);
|
|
2087
|
+
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
|
2088
|
+
.type = TSQueryPredicateStepTypeString,
|
|
2089
|
+
.value_id = id,
|
|
2090
|
+
}));
|
|
2091
|
+
stream_skip_whitespace(stream);
|
|
2092
|
+
|
|
2093
|
+
for (;;) {
|
|
2094
|
+
if (stream->next == ')') {
|
|
2095
|
+
stream_advance(stream);
|
|
2096
|
+
stream_skip_whitespace(stream);
|
|
2097
|
+
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
|
2098
|
+
.type = TSQueryPredicateStepTypeDone,
|
|
2099
|
+
.value_id = 0,
|
|
2100
|
+
}));
|
|
2101
|
+
break;
|
|
2102
|
+
}
|
|
2103
|
+
|
|
2104
|
+
// Parse an '@'-prefixed capture name
|
|
2105
|
+
else if (stream->next == '@') {
|
|
2106
|
+
stream_advance(stream);
|
|
2107
|
+
|
|
2108
|
+
// Parse the capture name
|
|
2109
|
+
if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
|
|
2110
|
+
const char *capture_name = stream->input;
|
|
2111
|
+
stream_scan_identifier(stream);
|
|
2112
|
+
uint32_t capture_length = (uint32_t)(stream->input - capture_name);
|
|
2113
|
+
|
|
2114
|
+
// Add the capture id to the first step of the pattern
|
|
2115
|
+
int capture_id = symbol_table_id_for_name(
|
|
2116
|
+
&self->captures,
|
|
2117
|
+
capture_name,
|
|
2118
|
+
capture_length
|
|
2119
|
+
);
|
|
2120
|
+
if (capture_id == -1) {
|
|
2121
|
+
stream_reset(stream, capture_name);
|
|
2122
|
+
return TSQueryErrorCapture;
|
|
2123
|
+
}
|
|
2124
|
+
|
|
2125
|
+
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
|
2126
|
+
.type = TSQueryPredicateStepTypeCapture,
|
|
2127
|
+
.value_id = capture_id,
|
|
2128
|
+
}));
|
|
2129
|
+
}
|
|
2130
|
+
|
|
2131
|
+
// Parse a string literal
|
|
2132
|
+
else if (stream->next == '"') {
|
|
2133
|
+
TSQueryError e = ts_query__parse_string_literal(self, stream);
|
|
2134
|
+
if (e) return e;
|
|
2135
|
+
uint16_t query_id = symbol_table_insert_name(
|
|
2136
|
+
&self->predicate_values,
|
|
2137
|
+
self->string_buffer.contents,
|
|
2138
|
+
self->string_buffer.size
|
|
2139
|
+
);
|
|
2140
|
+
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
|
2141
|
+
.type = TSQueryPredicateStepTypeString,
|
|
2142
|
+
.value_id = query_id,
|
|
2143
|
+
}));
|
|
2144
|
+
}
|
|
2145
|
+
|
|
2146
|
+
// Parse a bare symbol
|
|
2147
|
+
else if (stream_is_ident_start(stream)) {
|
|
2148
|
+
const char *symbol_start = stream->input;
|
|
2149
|
+
stream_scan_identifier(stream);
|
|
2150
|
+
uint32_t symbol_length = (uint32_t)(stream->input - symbol_start);
|
|
2151
|
+
uint16_t query_id = symbol_table_insert_name(
|
|
2152
|
+
&self->predicate_values,
|
|
2153
|
+
symbol_start,
|
|
2154
|
+
symbol_length
|
|
2155
|
+
);
|
|
2156
|
+
array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
|
|
2157
|
+
.type = TSQueryPredicateStepTypeString,
|
|
2158
|
+
.value_id = query_id,
|
|
2159
|
+
}));
|
|
2160
|
+
}
|
|
2161
|
+
|
|
2162
|
+
else {
|
|
2163
|
+
return TSQueryErrorSyntax;
|
|
2164
|
+
}
|
|
2165
|
+
|
|
2166
|
+
stream_skip_whitespace(stream);
|
|
2167
|
+
}
|
|
2168
|
+
|
|
2169
|
+
return 0;
|
|
2170
|
+
}
|
|
2171
|
+
|
|
2172
|
+
// Read one S-expression pattern from the stream, and incorporate it into
|
|
2173
|
+
// the query's internal state machine representation. For nested patterns,
|
|
2174
|
+
// this function calls itself recursively.
|
|
2175
|
+
//
|
|
2176
|
+
// The caller is responsible for passing in a dedicated CaptureQuantifiers.
|
|
2177
|
+
// These should not be shared between different calls to ts_query__parse_pattern!
|
|
2178
|
+
static TSQueryError ts_query__parse_pattern(
|
|
2179
|
+
TSQuery *self,
|
|
2180
|
+
Stream *stream,
|
|
2181
|
+
uint32_t depth,
|
|
2182
|
+
bool is_immediate,
|
|
2183
|
+
CaptureQuantifiers *capture_quantifiers
|
|
2184
|
+
) {
|
|
2185
|
+
if (stream->next == 0) return TSQueryErrorSyntax;
|
|
2186
|
+
if (stream->next == ')' || stream->next == ']') return PARENT_DONE;
|
|
2187
|
+
|
|
2188
|
+
const uint32_t starting_step_index = self->steps.size;
|
|
2189
|
+
|
|
2190
|
+
// Store the byte offset of each step in the query.
|
|
2191
|
+
if (
|
|
2192
|
+
self->step_offsets.size == 0 ||
|
|
2193
|
+
array_back(&self->step_offsets)->step_index != starting_step_index
|
|
2194
|
+
) {
|
|
2195
|
+
array_push(&self->step_offsets, ((StepOffset) {
|
|
2196
|
+
.step_index = starting_step_index,
|
|
2197
|
+
.byte_offset = stream_offset(stream),
|
|
2198
|
+
}));
|
|
2199
|
+
}
|
|
2200
|
+
|
|
2201
|
+
// An open bracket is the start of an alternation.
|
|
2202
|
+
if (stream->next == '[') {
|
|
2203
|
+
stream_advance(stream);
|
|
2204
|
+
stream_skip_whitespace(stream);
|
|
2205
|
+
|
|
2206
|
+
// Parse each branch, and add a placeholder step in between the branches.
|
|
2207
|
+
Array(uint32_t) branch_step_indices = array_new();
|
|
2208
|
+
CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new();
|
|
2209
|
+
for (;;) {
|
|
2210
|
+
uint32_t start_index = self->steps.size;
|
|
2211
|
+
TSQueryError e = ts_query__parse_pattern(
|
|
2212
|
+
self,
|
|
2213
|
+
stream,
|
|
2214
|
+
depth,
|
|
2215
|
+
is_immediate,
|
|
2216
|
+
&branch_capture_quantifiers
|
|
2217
|
+
);
|
|
2218
|
+
|
|
2219
|
+
if (e == PARENT_DONE) {
|
|
2220
|
+
if (stream->next == ']' && branch_step_indices.size > 0) {
|
|
2221
|
+
stream_advance(stream);
|
|
2222
|
+
break;
|
|
2223
|
+
}
|
|
2224
|
+
e = TSQueryErrorSyntax;
|
|
2225
|
+
}
|
|
2226
|
+
if (e) {
|
|
2227
|
+
capture_quantifiers_delete(&branch_capture_quantifiers);
|
|
2228
|
+
array_delete(&branch_step_indices);
|
|
2229
|
+
return e;
|
|
2230
|
+
}
|
|
2231
|
+
|
|
2232
|
+
if (start_index == starting_step_index) {
|
|
2233
|
+
capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers);
|
|
2234
|
+
} else {
|
|
2235
|
+
capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers);
|
|
2236
|
+
}
|
|
2237
|
+
|
|
2238
|
+
array_push(&branch_step_indices, start_index);
|
|
2239
|
+
array_push(&self->steps, query_step__new(0, depth, false));
|
|
2240
|
+
capture_quantifiers_clear(&branch_capture_quantifiers);
|
|
2241
|
+
}
|
|
2242
|
+
(void)array_pop(&self->steps);
|
|
2243
|
+
|
|
2244
|
+
// For all of the branches except for the last one, add the subsequent branch as an
|
|
2245
|
+
// alternative, and link the end of the branch to the current end of the steps.
|
|
2246
|
+
for (unsigned i = 0; i < branch_step_indices.size - 1; i++) {
|
|
2247
|
+
uint32_t step_index = branch_step_indices.contents[i];
|
|
2248
|
+
uint32_t next_step_index = branch_step_indices.contents[i + 1];
|
|
2249
|
+
QueryStep *start_step = &self->steps.contents[step_index];
|
|
2250
|
+
QueryStep *end_step = &self->steps.contents[next_step_index - 1];
|
|
2251
|
+
start_step->alternative_index = next_step_index;
|
|
2252
|
+
end_step->alternative_index = self->steps.size;
|
|
2253
|
+
end_step->is_dead_end = true;
|
|
2254
|
+
}
|
|
2255
|
+
|
|
2256
|
+
capture_quantifiers_delete(&branch_capture_quantifiers);
|
|
2257
|
+
array_delete(&branch_step_indices);
|
|
2258
|
+
}
|
|
2259
|
+
|
|
2260
|
+
// An open parenthesis can be the start of three possible constructs:
|
|
2261
|
+
// * A grouped sequence
|
|
2262
|
+
// * A predicate
|
|
2263
|
+
// * A named node
|
|
2264
|
+
else if (stream->next == '(') {
|
|
2265
|
+
stream_advance(stream);
|
|
2266
|
+
stream_skip_whitespace(stream);
|
|
2267
|
+
|
|
2268
|
+
// If this parenthesis is followed by a node, then it represents a grouped sequence.
|
|
2269
|
+
if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
|
|
2270
|
+
bool child_is_immediate = is_immediate;
|
|
2271
|
+
CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
|
|
2272
|
+
for (;;) {
|
|
2273
|
+
if (stream->next == '.') {
|
|
2274
|
+
child_is_immediate = true;
|
|
2275
|
+
stream_advance(stream);
|
|
2276
|
+
stream_skip_whitespace(stream);
|
|
2277
|
+
}
|
|
2278
|
+
TSQueryError e = ts_query__parse_pattern(
|
|
2279
|
+
self,
|
|
2280
|
+
stream,
|
|
2281
|
+
depth,
|
|
2282
|
+
child_is_immediate,
|
|
2283
|
+
&child_capture_quantifiers
|
|
2284
|
+
);
|
|
2285
|
+
if (e == PARENT_DONE) {
|
|
2286
|
+
if (stream->next == ')') {
|
|
2287
|
+
stream_advance(stream);
|
|
2288
|
+
break;
|
|
2289
|
+
}
|
|
2290
|
+
e = TSQueryErrorSyntax;
|
|
2291
|
+
}
|
|
2292
|
+
if (e) {
|
|
2293
|
+
capture_quantifiers_delete(&child_capture_quantifiers);
|
|
2294
|
+
return e;
|
|
2295
|
+
}
|
|
2296
|
+
|
|
2297
|
+
capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
|
|
2298
|
+
capture_quantifiers_clear(&child_capture_quantifiers);
|
|
2299
|
+
child_is_immediate = false;
|
|
2300
|
+
}
|
|
2301
|
+
|
|
2302
|
+
capture_quantifiers_delete(&child_capture_quantifiers);
|
|
2303
|
+
}
|
|
2304
|
+
|
|
2305
|
+
// A dot/pound character indicates the start of a predicate.
|
|
2306
|
+
else if (stream->next == '.' || stream->next == '#') {
|
|
2307
|
+
stream_advance(stream);
|
|
2308
|
+
return ts_query__parse_predicate(self, stream);
|
|
2309
|
+
}
|
|
2310
|
+
|
|
2311
|
+
// Otherwise, this parenthesis is the start of a named node.
|
|
2312
|
+
else {
|
|
2313
|
+
TSSymbol symbol;
|
|
2314
|
+
|
|
2315
|
+
// Parse a normal node name
|
|
2316
|
+
if (stream_is_ident_start(stream)) {
|
|
2317
|
+
const char *node_name = stream->input;
|
|
2318
|
+
stream_scan_identifier(stream);
|
|
2319
|
+
uint32_t length = (uint32_t)(stream->input - node_name);
|
|
2320
|
+
|
|
2321
|
+
// Parse the wildcard symbol
|
|
2322
|
+
if (length == 1 && node_name[0] == '_') {
|
|
2323
|
+
symbol = WILDCARD_SYMBOL;
|
|
2324
|
+
}
|
|
2325
|
+
|
|
2326
|
+
else {
|
|
2327
|
+
symbol = ts_language_symbol_for_name(
|
|
2328
|
+
self->language,
|
|
2329
|
+
node_name,
|
|
2330
|
+
length,
|
|
2331
|
+
true
|
|
2332
|
+
);
|
|
2333
|
+
if (!symbol) {
|
|
2334
|
+
stream_reset(stream, node_name);
|
|
2335
|
+
return TSQueryErrorNodeType;
|
|
2336
|
+
}
|
|
2337
|
+
}
|
|
2338
|
+
} else {
|
|
2339
|
+
return TSQueryErrorSyntax;
|
|
2340
|
+
}
|
|
2341
|
+
|
|
2342
|
+
// Add a step for the node.
|
|
2343
|
+
array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
|
|
2344
|
+
QueryStep *step = array_back(&self->steps);
|
|
2345
|
+
if (ts_language_symbol_metadata(self->language, symbol).supertype) {
|
|
2346
|
+
step->supertype_symbol = step->symbol;
|
|
2347
|
+
step->symbol = WILDCARD_SYMBOL;
|
|
2348
|
+
}
|
|
2349
|
+
if (symbol == WILDCARD_SYMBOL) {
|
|
2350
|
+
step->is_named = true;
|
|
2351
|
+
}
|
|
2352
|
+
|
|
2353
|
+
stream_skip_whitespace(stream);
|
|
2354
|
+
|
|
2355
|
+
if (stream->next == '/') {
|
|
2356
|
+
stream_advance(stream);
|
|
2357
|
+
if (!stream_is_ident_start(stream)) {
|
|
2358
|
+
return TSQueryErrorSyntax;
|
|
2359
|
+
}
|
|
2360
|
+
|
|
2361
|
+
const char *node_name = stream->input;
|
|
2362
|
+
stream_scan_identifier(stream);
|
|
2363
|
+
uint32_t length = (uint32_t)(stream->input - node_name);
|
|
2364
|
+
|
|
2365
|
+
step->symbol = ts_language_symbol_for_name(
|
|
2366
|
+
self->language,
|
|
2367
|
+
node_name,
|
|
2368
|
+
length,
|
|
2369
|
+
true
|
|
2370
|
+
);
|
|
2371
|
+
if (!step->symbol) {
|
|
2372
|
+
stream_reset(stream, node_name);
|
|
2373
|
+
return TSQueryErrorNodeType;
|
|
2374
|
+
}
|
|
2375
|
+
|
|
2376
|
+
stream_skip_whitespace(stream);
|
|
2377
|
+
}
|
|
2378
|
+
|
|
2379
|
+
// Parse the child patterns
|
|
2380
|
+
bool child_is_immediate = false;
|
|
2381
|
+
uint16_t last_child_step_index = 0;
|
|
2382
|
+
uint16_t negated_field_count = 0;
|
|
2383
|
+
TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT];
|
|
2384
|
+
CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
|
|
2385
|
+
for (;;) {
|
|
2386
|
+
// Parse a negated field assertion
|
|
2387
|
+
if (stream->next == '!') {
|
|
2388
|
+
stream_advance(stream);
|
|
2389
|
+
stream_skip_whitespace(stream);
|
|
2390
|
+
if (!stream_is_ident_start(stream)) {
|
|
2391
|
+
capture_quantifiers_delete(&child_capture_quantifiers);
|
|
2392
|
+
return TSQueryErrorSyntax;
|
|
2393
|
+
}
|
|
2394
|
+
const char *field_name = stream->input;
|
|
2395
|
+
stream_scan_identifier(stream);
|
|
2396
|
+
uint32_t length = (uint32_t)(stream->input - field_name);
|
|
2397
|
+
stream_skip_whitespace(stream);
|
|
2398
|
+
|
|
2399
|
+
TSFieldId field_id = ts_language_field_id_for_name(
|
|
2400
|
+
self->language,
|
|
2401
|
+
field_name,
|
|
2402
|
+
length
|
|
2403
|
+
);
|
|
2404
|
+
if (!field_id) {
|
|
2405
|
+
stream->input = field_name;
|
|
2406
|
+
capture_quantifiers_delete(&child_capture_quantifiers);
|
|
2407
|
+
return TSQueryErrorField;
|
|
2408
|
+
}
|
|
2409
|
+
|
|
2410
|
+
// Keep the field ids sorted.
|
|
2411
|
+
if (negated_field_count < MAX_NEGATED_FIELD_COUNT) {
|
|
2412
|
+
negated_field_ids[negated_field_count] = field_id;
|
|
2413
|
+
negated_field_count++;
|
|
2414
|
+
}
|
|
2415
|
+
|
|
2416
|
+
continue;
|
|
2417
|
+
}
|
|
2418
|
+
|
|
2419
|
+
// Parse a sibling anchor
|
|
2420
|
+
if (stream->next == '.') {
|
|
2421
|
+
child_is_immediate = true;
|
|
2422
|
+
stream_advance(stream);
|
|
2423
|
+
stream_skip_whitespace(stream);
|
|
2424
|
+
}
|
|
2425
|
+
|
|
2426
|
+
uint16_t step_index = self->steps.size;
|
|
2427
|
+
TSQueryError e = ts_query__parse_pattern(
|
|
2428
|
+
self,
|
|
2429
|
+
stream,
|
|
2430
|
+
depth + 1,
|
|
2431
|
+
child_is_immediate,
|
|
2432
|
+
&child_capture_quantifiers
|
|
2433
|
+
);
|
|
2434
|
+
if (e == PARENT_DONE) {
|
|
2435
|
+
if (stream->next == ')') {
|
|
2436
|
+
if (child_is_immediate) {
|
|
2437
|
+
if (last_child_step_index == 0) {
|
|
2438
|
+
capture_quantifiers_delete(&child_capture_quantifiers);
|
|
2439
|
+
return TSQueryErrorSyntax;
|
|
2440
|
+
}
|
|
2441
|
+
self->steps.contents[last_child_step_index].is_last_child = true;
|
|
2442
|
+
}
|
|
2443
|
+
|
|
2444
|
+
if (negated_field_count) {
|
|
2445
|
+
ts_query__add_negated_fields(
|
|
2446
|
+
self,
|
|
2447
|
+
starting_step_index,
|
|
2448
|
+
negated_field_ids,
|
|
2449
|
+
negated_field_count
|
|
2450
|
+
);
|
|
2451
|
+
}
|
|
2452
|
+
|
|
2453
|
+
stream_advance(stream);
|
|
2454
|
+
break;
|
|
2455
|
+
}
|
|
2456
|
+
e = TSQueryErrorSyntax;
|
|
2457
|
+
}
|
|
2458
|
+
if (e) {
|
|
2459
|
+
capture_quantifiers_delete(&child_capture_quantifiers);
|
|
2460
|
+
return e;
|
|
2461
|
+
}
|
|
2462
|
+
|
|
2463
|
+
capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
|
|
2464
|
+
|
|
2465
|
+
last_child_step_index = step_index;
|
|
2466
|
+
child_is_immediate = false;
|
|
2467
|
+
capture_quantifiers_clear(&child_capture_quantifiers);
|
|
2468
|
+
}
|
|
2469
|
+
capture_quantifiers_delete(&child_capture_quantifiers);
|
|
2470
|
+
}
|
|
2471
|
+
}
|
|
2472
|
+
|
|
2473
|
+
// Parse a wildcard pattern
|
|
2474
|
+
else if (stream->next == '_') {
|
|
2475
|
+
stream_advance(stream);
|
|
2476
|
+
stream_skip_whitespace(stream);
|
|
2477
|
+
|
|
2478
|
+
// Add a step that matches any kind of node
|
|
2479
|
+
array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate));
|
|
2480
|
+
}
|
|
2481
|
+
|
|
2482
|
+
// Parse a double-quoted anonymous leaf node expression
|
|
2483
|
+
else if (stream->next == '"') {
|
|
2484
|
+
const char *string_start = stream->input;
|
|
2485
|
+
TSQueryError e = ts_query__parse_string_literal(self, stream);
|
|
2486
|
+
if (e) return e;
|
|
2487
|
+
|
|
2488
|
+
// Add a step for the node
|
|
2489
|
+
TSSymbol symbol = ts_language_symbol_for_name(
|
|
2490
|
+
self->language,
|
|
2491
|
+
self->string_buffer.contents,
|
|
2492
|
+
self->string_buffer.size,
|
|
2493
|
+
false
|
|
2494
|
+
);
|
|
2495
|
+
if (!symbol) {
|
|
2496
|
+
stream_reset(stream, string_start + 1);
|
|
2497
|
+
return TSQueryErrorNodeType;
|
|
2498
|
+
}
|
|
2499
|
+
array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
|
|
2500
|
+
}
|
|
2501
|
+
|
|
2502
|
+
// Parse a field-prefixed pattern
|
|
2503
|
+
else if (stream_is_ident_start(stream)) {
|
|
2504
|
+
// Parse the field name
|
|
2505
|
+
const char *field_name = stream->input;
|
|
2506
|
+
stream_scan_identifier(stream);
|
|
2507
|
+
uint32_t length = (uint32_t)(stream->input - field_name);
|
|
2508
|
+
stream_skip_whitespace(stream);
|
|
2509
|
+
|
|
2510
|
+
if (stream->next != ':') {
|
|
2511
|
+
stream_reset(stream, field_name);
|
|
2512
|
+
return TSQueryErrorSyntax;
|
|
2513
|
+
}
|
|
2514
|
+
stream_advance(stream);
|
|
2515
|
+
stream_skip_whitespace(stream);
|
|
2516
|
+
|
|
2517
|
+
// Parse the pattern
|
|
2518
|
+
CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new();
|
|
2519
|
+
TSQueryError e = ts_query__parse_pattern(
|
|
2520
|
+
self,
|
|
2521
|
+
stream,
|
|
2522
|
+
depth,
|
|
2523
|
+
is_immediate,
|
|
2524
|
+
&field_capture_quantifiers
|
|
2525
|
+
);
|
|
2526
|
+
if (e) {
|
|
2527
|
+
capture_quantifiers_delete(&field_capture_quantifiers);
|
|
2528
|
+
if (e == PARENT_DONE) e = TSQueryErrorSyntax;
|
|
2529
|
+
return e;
|
|
2530
|
+
}
|
|
2531
|
+
|
|
2532
|
+
// Add the field name to the first step of the pattern
|
|
2533
|
+
TSFieldId field_id = ts_language_field_id_for_name(
|
|
2534
|
+
self->language,
|
|
2535
|
+
field_name,
|
|
2536
|
+
length
|
|
2537
|
+
);
|
|
2538
|
+
if (!field_id) {
|
|
2539
|
+
stream->input = field_name;
|
|
2540
|
+
return TSQueryErrorField;
|
|
2541
|
+
}
|
|
2542
|
+
|
|
2543
|
+
uint32_t step_index = starting_step_index;
|
|
2544
|
+
QueryStep *step = &self->steps.contents[step_index];
|
|
2545
|
+
for (;;) {
|
|
2546
|
+
step->field = field_id;
|
|
2547
|
+
if (
|
|
2548
|
+
step->alternative_index != NONE &&
|
|
2549
|
+
step->alternative_index > step_index &&
|
|
2550
|
+
step->alternative_index < self->steps.size
|
|
2551
|
+
) {
|
|
2552
|
+
step_index = step->alternative_index;
|
|
2553
|
+
step = &self->steps.contents[step_index];
|
|
2554
|
+
} else {
|
|
2555
|
+
break;
|
|
2556
|
+
}
|
|
2557
|
+
}
|
|
2558
|
+
|
|
2559
|
+
capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers);
|
|
2560
|
+
capture_quantifiers_delete(&field_capture_quantifiers);
|
|
2561
|
+
}
|
|
2562
|
+
|
|
2563
|
+
else {
|
|
2564
|
+
return TSQueryErrorSyntax;
|
|
2565
|
+
}
|
|
2566
|
+
|
|
2567
|
+
stream_skip_whitespace(stream);
|
|
2568
|
+
|
|
2569
|
+
// Parse suffixes modifiers for this pattern
|
|
2570
|
+
TSQuantifier quantifier = TSQuantifierOne;
|
|
2571
|
+
for (;;) {
|
|
2572
|
+
// Parse the one-or-more operator.
|
|
2573
|
+
if (stream->next == '+') {
|
|
2574
|
+
quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier);
|
|
2575
|
+
|
|
2576
|
+
stream_advance(stream);
|
|
2577
|
+
stream_skip_whitespace(stream);
|
|
2578
|
+
|
|
2579
|
+
QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
|
|
2580
|
+
repeat_step.alternative_index = starting_step_index;
|
|
2581
|
+
repeat_step.is_pass_through = true;
|
|
2582
|
+
repeat_step.alternative_is_immediate = true;
|
|
2583
|
+
array_push(&self->steps, repeat_step);
|
|
2584
|
+
}
|
|
2585
|
+
|
|
2586
|
+
// Parse the zero-or-more repetition operator.
|
|
2587
|
+
else if (stream->next == '*') {
|
|
2588
|
+
quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier);
|
|
2589
|
+
|
|
2590
|
+
stream_advance(stream);
|
|
2591
|
+
stream_skip_whitespace(stream);
|
|
2592
|
+
|
|
2593
|
+
QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
|
|
2594
|
+
repeat_step.alternative_index = starting_step_index;
|
|
2595
|
+
repeat_step.is_pass_through = true;
|
|
2596
|
+
repeat_step.alternative_is_immediate = true;
|
|
2597
|
+
array_push(&self->steps, repeat_step);
|
|
2598
|
+
|
|
2599
|
+
// Stop when `step->alternative_index` is `NONE` or it points to
|
|
2600
|
+
// `repeat_step` or beyond. Note that having just been pushed,
|
|
2601
|
+
// `repeat_step` occupies slot `self->steps.size - 1`.
|
|
2602
|
+
QueryStep *step = &self->steps.contents[starting_step_index];
|
|
2603
|
+
while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) {
|
|
2604
|
+
step = &self->steps.contents[step->alternative_index];
|
|
2605
|
+
}
|
|
2606
|
+
step->alternative_index = self->steps.size;
|
|
2607
|
+
}
|
|
2608
|
+
|
|
2609
|
+
// Parse the optional operator.
|
|
2610
|
+
else if (stream->next == '?') {
|
|
2611
|
+
quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier);
|
|
2612
|
+
|
|
2613
|
+
stream_advance(stream);
|
|
2614
|
+
stream_skip_whitespace(stream);
|
|
2615
|
+
|
|
2616
|
+
QueryStep *step = &self->steps.contents[starting_step_index];
|
|
2617
|
+
while (step->alternative_index != NONE && step->alternative_index < self->steps.size) {
|
|
2618
|
+
step = &self->steps.contents[step->alternative_index];
|
|
2619
|
+
}
|
|
2620
|
+
step->alternative_index = self->steps.size;
|
|
2621
|
+
}
|
|
2622
|
+
|
|
2623
|
+
// Parse an '@'-prefixed capture pattern
|
|
2624
|
+
else if (stream->next == '@') {
|
|
2625
|
+
stream_advance(stream);
|
|
2626
|
+
if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
|
|
2627
|
+
const char *capture_name = stream->input;
|
|
2628
|
+
stream_scan_identifier(stream);
|
|
2629
|
+
uint32_t length = (uint32_t)(stream->input - capture_name);
|
|
2630
|
+
stream_skip_whitespace(stream);
|
|
2631
|
+
|
|
2632
|
+
// Add the capture id to the first step of the pattern
|
|
2633
|
+
uint16_t capture_id = symbol_table_insert_name(
|
|
2634
|
+
&self->captures,
|
|
2635
|
+
capture_name,
|
|
2636
|
+
length
|
|
2637
|
+
);
|
|
2638
|
+
|
|
2639
|
+
// Add the capture quantifier
|
|
2640
|
+
capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne);
|
|
2641
|
+
|
|
2642
|
+
uint32_t step_index = starting_step_index;
|
|
2643
|
+
for (;;) {
|
|
2644
|
+
QueryStep *step = &self->steps.contents[step_index];
|
|
2645
|
+
query_step__add_capture(step, capture_id);
|
|
2646
|
+
if (
|
|
2647
|
+
step->alternative_index != NONE &&
|
|
2648
|
+
step->alternative_index > step_index &&
|
|
2649
|
+
step->alternative_index < self->steps.size
|
|
2650
|
+
) {
|
|
2651
|
+
step_index = step->alternative_index;
|
|
2652
|
+
} else {
|
|
2653
|
+
break;
|
|
2654
|
+
}
|
|
2655
|
+
}
|
|
2656
|
+
}
|
|
2657
|
+
|
|
2658
|
+
// No more suffix modifiers
|
|
2659
|
+
else {
|
|
2660
|
+
break;
|
|
2661
|
+
}
|
|
2662
|
+
}
|
|
2663
|
+
|
|
2664
|
+
capture_quantifiers_mul(capture_quantifiers, quantifier);
|
|
2665
|
+
|
|
2666
|
+
return 0;
|
|
2667
|
+
}
|
|
2668
|
+
|
|
2669
|
+
TSQuery *ts_query_new(
|
|
2670
|
+
const TSLanguage *language,
|
|
2671
|
+
const char *source,
|
|
2672
|
+
uint32_t source_len,
|
|
2673
|
+
uint32_t *error_offset,
|
|
2674
|
+
TSQueryError *error_type
|
|
2675
|
+
) {
|
|
2676
|
+
if (
|
|
2677
|
+
!language ||
|
|
2678
|
+
language->version > TREE_SITTER_LANGUAGE_VERSION ||
|
|
2679
|
+
language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
|
|
2680
|
+
) {
|
|
2681
|
+
*error_type = TSQueryErrorLanguage;
|
|
2682
|
+
return NULL;
|
|
2683
|
+
}
|
|
2684
|
+
|
|
2685
|
+
TSQuery *self = ts_malloc(sizeof(TSQuery));
|
|
2686
|
+
*self = (TSQuery) {
|
|
2687
|
+
.steps = array_new(),
|
|
2688
|
+
.pattern_map = array_new(),
|
|
2689
|
+
.captures = symbol_table_new(),
|
|
2690
|
+
.capture_quantifiers = array_new(),
|
|
2691
|
+
.predicate_values = symbol_table_new(),
|
|
2692
|
+
.predicate_steps = array_new(),
|
|
2693
|
+
.patterns = array_new(),
|
|
2694
|
+
.step_offsets = array_new(),
|
|
2695
|
+
.string_buffer = array_new(),
|
|
2696
|
+
.negated_fields = array_new(),
|
|
2697
|
+
.repeat_symbols_with_rootless_patterns = array_new(),
|
|
2698
|
+
.wildcard_root_pattern_count = 0,
|
|
2699
|
+
.language = ts_language_copy(language),
|
|
2700
|
+
};
|
|
2701
|
+
|
|
2702
|
+
array_push(&self->negated_fields, 0);
|
|
2703
|
+
|
|
2704
|
+
// Parse all of the S-expressions in the given string.
|
|
2705
|
+
Stream stream = stream_new(source, source_len);
|
|
2706
|
+
stream_skip_whitespace(&stream);
|
|
2707
|
+
while (stream.input < stream.end) {
|
|
2708
|
+
uint32_t pattern_index = self->patterns.size;
|
|
2709
|
+
uint32_t start_step_index = self->steps.size;
|
|
2710
|
+
uint32_t start_predicate_step_index = self->predicate_steps.size;
|
|
2711
|
+
array_push(&self->patterns, ((QueryPattern) {
|
|
2712
|
+
.steps = (Slice) {.offset = start_step_index},
|
|
2713
|
+
.predicate_steps = (Slice) {.offset = start_predicate_step_index},
|
|
2714
|
+
.start_byte = stream_offset(&stream),
|
|
2715
|
+
.is_non_local = false,
|
|
2716
|
+
}));
|
|
2717
|
+
CaptureQuantifiers capture_quantifiers = capture_quantifiers_new();
|
|
2718
|
+
*error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers);
|
|
2719
|
+
array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
|
|
2720
|
+
|
|
2721
|
+
QueryPattern *pattern = array_back(&self->patterns);
|
|
2722
|
+
pattern->steps.length = self->steps.size - start_step_index;
|
|
2723
|
+
pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index;
|
|
2724
|
+
pattern->end_byte = stream_offset(&stream);
|
|
2725
|
+
|
|
2726
|
+
// If any pattern could not be parsed, then report the error information
|
|
2727
|
+
// and terminate.
|
|
2728
|
+
if (*error_type) {
|
|
2729
|
+
if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax;
|
|
2730
|
+
*error_offset = stream_offset(&stream);
|
|
2731
|
+
capture_quantifiers_delete(&capture_quantifiers);
|
|
2732
|
+
ts_query_delete(self);
|
|
2733
|
+
return NULL;
|
|
2734
|
+
}
|
|
2735
|
+
|
|
2736
|
+
// Maintain a list of capture quantifiers for each pattern
|
|
2737
|
+
array_push(&self->capture_quantifiers, capture_quantifiers);
|
|
2738
|
+
|
|
2739
|
+
// Maintain a map that can look up patterns for a given root symbol.
|
|
2740
|
+
uint16_t wildcard_root_alternative_index = NONE;
|
|
2741
|
+
for (;;) {
|
|
2742
|
+
QueryStep *step = &self->steps.contents[start_step_index];
|
|
2743
|
+
|
|
2744
|
+
// If a pattern has a wildcard at its root, but it has a non-wildcard child,
|
|
2745
|
+
// then optimize the matching process by skipping matching the wildcard.
|
|
2746
|
+
// Later, during the matching process, the query cursor will check that
|
|
2747
|
+
// there is a parent node, and capture it if necessary.
|
|
2748
|
+
if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) {
|
|
2749
|
+
QueryStep *second_step = &self->steps.contents[start_step_index + 1];
|
|
2750
|
+
if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1 && !second_step->is_immediate) {
|
|
2751
|
+
wildcard_root_alternative_index = step->alternative_index;
|
|
2752
|
+
start_step_index += 1;
|
|
2753
|
+
step = second_step;
|
|
2754
|
+
}
|
|
2755
|
+
}
|
|
2756
|
+
|
|
2757
|
+
// Determine whether the pattern has a single root node. This affects
|
|
2758
|
+
// decisions about whether or not to start matching the pattern when
|
|
2759
|
+
// a query cursor has a range restriction or when immediately within an
|
|
2760
|
+
// error node.
|
|
2761
|
+
uint32_t start_depth = step->depth;
|
|
2762
|
+
bool is_rooted = start_depth == 0;
|
|
2763
|
+
for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) {
|
|
2764
|
+
QueryStep *child_step = &self->steps.contents[step_index];
|
|
2765
|
+
if (child_step->is_dead_end) break;
|
|
2766
|
+
if (child_step->depth == start_depth) {
|
|
2767
|
+
is_rooted = false;
|
|
2768
|
+
break;
|
|
2769
|
+
}
|
|
2770
|
+
}
|
|
2771
|
+
|
|
2772
|
+
ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) {
|
|
2773
|
+
.step_index = start_step_index,
|
|
2774
|
+
.pattern_index = pattern_index,
|
|
2775
|
+
.is_rooted = is_rooted
|
|
2776
|
+
});
|
|
2777
|
+
if (step->symbol == WILDCARD_SYMBOL) {
|
|
2778
|
+
self->wildcard_root_pattern_count++;
|
|
2779
|
+
}
|
|
2780
|
+
|
|
2781
|
+
// If there are alternatives or options at the root of the pattern,
|
|
2782
|
+
// then add multiple entries to the pattern map.
|
|
2783
|
+
if (step->alternative_index != NONE) {
|
|
2784
|
+
start_step_index = step->alternative_index;
|
|
2785
|
+
} else if (wildcard_root_alternative_index != NONE) {
|
|
2786
|
+
start_step_index = wildcard_root_alternative_index;
|
|
2787
|
+
wildcard_root_alternative_index = NONE;
|
|
2788
|
+
} else {
|
|
2789
|
+
break;
|
|
2790
|
+
}
|
|
2791
|
+
}
|
|
2792
|
+
}
|
|
2793
|
+
|
|
2794
|
+
if (!ts_query__analyze_patterns(self, error_offset)) {
|
|
2795
|
+
*error_type = TSQueryErrorStructure;
|
|
2796
|
+
ts_query_delete(self);
|
|
2797
|
+
return NULL;
|
|
2798
|
+
}
|
|
2799
|
+
|
|
2800
|
+
array_delete(&self->string_buffer);
|
|
2801
|
+
return self;
|
|
2802
|
+
}
|
|
2803
|
+
|
|
2804
|
+
void ts_query_delete(TSQuery *self) {
|
|
2805
|
+
if (self) {
|
|
2806
|
+
array_delete(&self->steps);
|
|
2807
|
+
array_delete(&self->pattern_map);
|
|
2808
|
+
array_delete(&self->predicate_steps);
|
|
2809
|
+
array_delete(&self->patterns);
|
|
2810
|
+
array_delete(&self->step_offsets);
|
|
2811
|
+
array_delete(&self->string_buffer);
|
|
2812
|
+
array_delete(&self->negated_fields);
|
|
2813
|
+
array_delete(&self->repeat_symbols_with_rootless_patterns);
|
|
2814
|
+
ts_language_delete(self->language);
|
|
2815
|
+
symbol_table_delete(&self->captures);
|
|
2816
|
+
symbol_table_delete(&self->predicate_values);
|
|
2817
|
+
for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) {
|
|
2818
|
+
CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index);
|
|
2819
|
+
capture_quantifiers_delete(capture_quantifiers);
|
|
2820
|
+
}
|
|
2821
|
+
array_delete(&self->capture_quantifiers);
|
|
2822
|
+
ts_free(self);
|
|
2823
|
+
}
|
|
2824
|
+
}
|
|
2825
|
+
|
|
2826
|
+
uint32_t ts_query_pattern_count(const TSQuery *self) {
|
|
2827
|
+
return self->patterns.size;
|
|
2828
|
+
}
|
|
2829
|
+
|
|
2830
|
+
uint32_t ts_query_capture_count(const TSQuery *self) {
|
|
2831
|
+
return self->captures.slices.size;
|
|
2832
|
+
}
|
|
2833
|
+
|
|
2834
|
+
uint32_t ts_query_string_count(const TSQuery *self) {
|
|
2835
|
+
return self->predicate_values.slices.size;
|
|
2836
|
+
}
|
|
2837
|
+
|
|
2838
|
+
const char *ts_query_capture_name_for_id(
|
|
2839
|
+
const TSQuery *self,
|
|
2840
|
+
uint32_t index,
|
|
2841
|
+
uint32_t *length
|
|
2842
|
+
) {
|
|
2843
|
+
return symbol_table_name_for_id(&self->captures, index, length);
|
|
2844
|
+
}
|
|
2845
|
+
|
|
2846
|
+
TSQuantifier ts_query_capture_quantifier_for_id(
|
|
2847
|
+
const TSQuery *self,
|
|
2848
|
+
uint32_t pattern_index,
|
|
2849
|
+
uint32_t capture_index
|
|
2850
|
+
) {
|
|
2851
|
+
CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index);
|
|
2852
|
+
return capture_quantifier_for_id(capture_quantifiers, capture_index);
|
|
2853
|
+
}
|
|
2854
|
+
|
|
2855
|
+
const char *ts_query_string_value_for_id(
|
|
2856
|
+
const TSQuery *self,
|
|
2857
|
+
uint32_t index,
|
|
2858
|
+
uint32_t *length
|
|
2859
|
+
) {
|
|
2860
|
+
return symbol_table_name_for_id(&self->predicate_values, index, length);
|
|
2861
|
+
}
|
|
2862
|
+
|
|
2863
|
+
const TSQueryPredicateStep *ts_query_predicates_for_pattern(
|
|
2864
|
+
const TSQuery *self,
|
|
2865
|
+
uint32_t pattern_index,
|
|
2866
|
+
uint32_t *step_count
|
|
2867
|
+
) {
|
|
2868
|
+
Slice slice = self->patterns.contents[pattern_index].predicate_steps;
|
|
2869
|
+
*step_count = slice.length;
|
|
2870
|
+
if (self->predicate_steps.contents == NULL) {
|
|
2871
|
+
return NULL;
|
|
2872
|
+
}
|
|
2873
|
+
return &self->predicate_steps.contents[slice.offset];
|
|
2874
|
+
}
|
|
2875
|
+
|
|
2876
|
+
uint32_t ts_query_start_byte_for_pattern(
|
|
2877
|
+
const TSQuery *self,
|
|
2878
|
+
uint32_t pattern_index
|
|
2879
|
+
) {
|
|
2880
|
+
return self->patterns.contents[pattern_index].start_byte;
|
|
2881
|
+
}
|
|
2882
|
+
|
|
2883
|
+
uint32_t ts_query_end_byte_for_pattern(
|
|
2884
|
+
const TSQuery *self,
|
|
2885
|
+
uint32_t pattern_index
|
|
2886
|
+
) {
|
|
2887
|
+
return self->patterns.contents[pattern_index].end_byte;
|
|
2888
|
+
}
|
|
2889
|
+
|
|
2890
|
+
bool ts_query_is_pattern_rooted(
|
|
2891
|
+
const TSQuery *self,
|
|
2892
|
+
uint32_t pattern_index
|
|
2893
|
+
) {
|
|
2894
|
+
for (unsigned i = 0; i < self->pattern_map.size; i++) {
|
|
2895
|
+
PatternEntry *entry = &self->pattern_map.contents[i];
|
|
2896
|
+
if (entry->pattern_index == pattern_index) {
|
|
2897
|
+
if (!entry->is_rooted) return false;
|
|
2898
|
+
}
|
|
2899
|
+
}
|
|
2900
|
+
return true;
|
|
2901
|
+
}
|
|
2902
|
+
|
|
2903
|
+
bool ts_query_is_pattern_non_local(
|
|
2904
|
+
const TSQuery *self,
|
|
2905
|
+
uint32_t pattern_index
|
|
2906
|
+
) {
|
|
2907
|
+
if (pattern_index < self->patterns.size) {
|
|
2908
|
+
return self->patterns.contents[pattern_index].is_non_local;
|
|
2909
|
+
} else {
|
|
2910
|
+
return false;
|
|
2911
|
+
}
|
|
2912
|
+
}
|
|
2913
|
+
|
|
2914
|
+
bool ts_query_is_pattern_guaranteed_at_step(
|
|
2915
|
+
const TSQuery *self,
|
|
2916
|
+
uint32_t byte_offset
|
|
2917
|
+
) {
|
|
2918
|
+
uint32_t step_index = UINT32_MAX;
|
|
2919
|
+
for (unsigned i = 0; i < self->step_offsets.size; i++) {
|
|
2920
|
+
StepOffset *step_offset = &self->step_offsets.contents[i];
|
|
2921
|
+
if (step_offset->byte_offset > byte_offset) break;
|
|
2922
|
+
step_index = step_offset->step_index;
|
|
2923
|
+
}
|
|
2924
|
+
if (step_index < self->steps.size) {
|
|
2925
|
+
return self->steps.contents[step_index].root_pattern_guaranteed;
|
|
2926
|
+
} else {
|
|
2927
|
+
return false;
|
|
2928
|
+
}
|
|
2929
|
+
}
|
|
2930
|
+
|
|
2931
|
+
bool ts_query__step_is_fallible(
|
|
2932
|
+
const TSQuery *self,
|
|
2933
|
+
uint16_t step_index
|
|
2934
|
+
) {
|
|
2935
|
+
ts_assert((uint32_t)step_index + 1 < self->steps.size);
|
|
2936
|
+
QueryStep *step = &self->steps.contents[step_index];
|
|
2937
|
+
QueryStep *next_step = &self->steps.contents[step_index + 1];
|
|
2938
|
+
return (
|
|
2939
|
+
next_step->depth != PATTERN_DONE_MARKER &&
|
|
2940
|
+
next_step->depth > step->depth &&
|
|
2941
|
+
!next_step->parent_pattern_guaranteed
|
|
2942
|
+
);
|
|
2943
|
+
}
|
|
2944
|
+
|
|
2945
|
+
void ts_query_disable_capture(
|
|
2946
|
+
TSQuery *self,
|
|
2947
|
+
const char *name,
|
|
2948
|
+
uint32_t length
|
|
2949
|
+
) {
|
|
2950
|
+
// Remove capture information for any pattern step that previously
|
|
2951
|
+
// captured with the given name.
|
|
2952
|
+
int id = symbol_table_id_for_name(&self->captures, name, length);
|
|
2953
|
+
if (id != -1) {
|
|
2954
|
+
for (unsigned i = 0; i < self->steps.size; i++) {
|
|
2955
|
+
QueryStep *step = &self->steps.contents[i];
|
|
2956
|
+
query_step__remove_capture(step, id);
|
|
2957
|
+
}
|
|
2958
|
+
}
|
|
2959
|
+
}
|
|
2960
|
+
|
|
2961
|
+
void ts_query_disable_pattern(
|
|
2962
|
+
TSQuery *self,
|
|
2963
|
+
uint32_t pattern_index
|
|
2964
|
+
) {
|
|
2965
|
+
// Remove the given pattern from the pattern map. Its steps will still
|
|
2966
|
+
// be in the `steps` array, but they will never be read.
|
|
2967
|
+
for (unsigned i = 0; i < self->pattern_map.size; i++) {
|
|
2968
|
+
PatternEntry *pattern = &self->pattern_map.contents[i];
|
|
2969
|
+
if (pattern->pattern_index == pattern_index) {
|
|
2970
|
+
array_erase(&self->pattern_map, i);
|
|
2971
|
+
i--;
|
|
2972
|
+
}
|
|
2973
|
+
}
|
|
2974
|
+
}
|
|
2975
|
+
|
|
2976
|
+
/***************
|
|
2977
|
+
* QueryCursor
|
|
2978
|
+
***************/
|
|
2979
|
+
|
|
2980
|
+
TSQueryCursor *ts_query_cursor_new(void) {
|
|
2981
|
+
TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
|
|
2982
|
+
*self = (TSQueryCursor) {
|
|
2983
|
+
.did_exceed_match_limit = false,
|
|
2984
|
+
.ascending = false,
|
|
2985
|
+
.halted = false,
|
|
2986
|
+
.states = array_new(),
|
|
2987
|
+
.finished_states = array_new(),
|
|
2988
|
+
.capture_list_pool = capture_list_pool_new(),
|
|
2989
|
+
.start_byte = 0,
|
|
2990
|
+
.end_byte = UINT32_MAX,
|
|
2991
|
+
.start_point = {0, 0},
|
|
2992
|
+
.end_point = POINT_MAX,
|
|
2993
|
+
.max_start_depth = UINT32_MAX,
|
|
2994
|
+
.timeout_duration = 0,
|
|
2995
|
+
.end_clock = clock_null(),
|
|
2996
|
+
.operation_count = 0,
|
|
2997
|
+
};
|
|
2998
|
+
array_reserve(&self->states, 8);
|
|
2999
|
+
array_reserve(&self->finished_states, 8);
|
|
3000
|
+
return self;
|
|
3001
|
+
}
|
|
3002
|
+
|
|
3003
|
+
void ts_query_cursor_delete(TSQueryCursor *self) {
|
|
3004
|
+
array_delete(&self->states);
|
|
3005
|
+
array_delete(&self->finished_states);
|
|
3006
|
+
ts_tree_cursor_delete(&self->cursor);
|
|
3007
|
+
capture_list_pool_delete(&self->capture_list_pool);
|
|
3008
|
+
ts_free(self);
|
|
3009
|
+
}
|
|
3010
|
+
|
|
3011
|
+
bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
|
|
3012
|
+
return self->did_exceed_match_limit;
|
|
3013
|
+
}
|
|
3014
|
+
|
|
3015
|
+
uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
|
|
3016
|
+
return self->capture_list_pool.max_capture_list_count;
|
|
3017
|
+
}
|
|
3018
|
+
|
|
3019
|
+
void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
|
|
3020
|
+
self->capture_list_pool.max_capture_list_count = limit;
|
|
3021
|
+
}
|
|
3022
|
+
|
|
3023
|
+
uint64_t ts_query_cursor_timeout_micros(const TSQueryCursor *self) {
|
|
3024
|
+
return duration_to_micros(self->timeout_duration);
|
|
3025
|
+
}
|
|
3026
|
+
|
|
3027
|
+
void ts_query_cursor_set_timeout_micros(TSQueryCursor *self, uint64_t timeout_micros) {
|
|
3028
|
+
self->timeout_duration = duration_from_micros(timeout_micros);
|
|
3029
|
+
}
|
|
3030
|
+
|
|
3031
|
+
#ifdef DEBUG_EXECUTE_QUERY
|
|
3032
|
+
#define LOG(...) fprintf(stderr, __VA_ARGS__)
|
|
3033
|
+
#else
|
|
3034
|
+
#define LOG(...)
|
|
3035
|
+
#endif
|
|
3036
|
+
|
|
3037
|
+
void ts_query_cursor_exec(
|
|
3038
|
+
TSQueryCursor *self,
|
|
3039
|
+
const TSQuery *query,
|
|
3040
|
+
TSNode node
|
|
3041
|
+
) {
|
|
3042
|
+
if (query) {
|
|
3043
|
+
LOG("query steps:\n");
|
|
3044
|
+
for (unsigned i = 0; i < query->steps.size; i++) {
|
|
3045
|
+
QueryStep *step = &query->steps.contents[i];
|
|
3046
|
+
LOG(" %u: {", i);
|
|
3047
|
+
if (step->depth == PATTERN_DONE_MARKER) {
|
|
3048
|
+
LOG("DONE");
|
|
3049
|
+
} else if (step->is_dead_end) {
|
|
3050
|
+
LOG("dead_end");
|
|
3051
|
+
} else if (step->is_pass_through) {
|
|
3052
|
+
LOG("pass_through");
|
|
3053
|
+
} else if (step->symbol != WILDCARD_SYMBOL) {
|
|
3054
|
+
LOG("symbol: %s", query->language->symbol_names[step->symbol]);
|
|
3055
|
+
} else {
|
|
3056
|
+
LOG("symbol: *");
|
|
3057
|
+
}
|
|
3058
|
+
if (step->field) {
|
|
3059
|
+
LOG(", field: %s", query->language->field_names[step->field]);
|
|
3060
|
+
}
|
|
3061
|
+
if (step->alternative_index != NONE) {
|
|
3062
|
+
LOG(", alternative: %u", step->alternative_index);
|
|
3063
|
+
}
|
|
3064
|
+
LOG("},\n");
|
|
3065
|
+
}
|
|
3066
|
+
}
|
|
3067
|
+
|
|
3068
|
+
array_clear(&self->states);
|
|
3069
|
+
array_clear(&self->finished_states);
|
|
3070
|
+
ts_tree_cursor_reset(&self->cursor, node);
|
|
3071
|
+
capture_list_pool_reset(&self->capture_list_pool);
|
|
3072
|
+
self->on_visible_node = true;
|
|
3073
|
+
self->next_state_id = 0;
|
|
3074
|
+
self->depth = 0;
|
|
3075
|
+
self->ascending = false;
|
|
3076
|
+
self->halted = false;
|
|
3077
|
+
self->query = query;
|
|
3078
|
+
self->did_exceed_match_limit = false;
|
|
3079
|
+
self->operation_count = 0;
|
|
3080
|
+
if (self->timeout_duration) {
|
|
3081
|
+
self->end_clock = clock_after(clock_now(), self->timeout_duration);
|
|
3082
|
+
} else {
|
|
3083
|
+
self->end_clock = clock_null();
|
|
3084
|
+
}
|
|
3085
|
+
}
|
|
3086
|
+
|
|
3087
|
+
void ts_query_cursor_set_byte_range(
|
|
3088
|
+
TSQueryCursor *self,
|
|
3089
|
+
uint32_t start_byte,
|
|
3090
|
+
uint32_t end_byte
|
|
3091
|
+
) {
|
|
3092
|
+
if (end_byte == 0) {
|
|
3093
|
+
end_byte = UINT32_MAX;
|
|
3094
|
+
}
|
|
3095
|
+
self->start_byte = start_byte;
|
|
3096
|
+
self->end_byte = end_byte;
|
|
3097
|
+
}
|
|
3098
|
+
|
|
3099
|
+
void ts_query_cursor_set_point_range(
|
|
3100
|
+
TSQueryCursor *self,
|
|
3101
|
+
TSPoint start_point,
|
|
3102
|
+
TSPoint end_point
|
|
3103
|
+
) {
|
|
3104
|
+
if (end_point.row == 0 && end_point.column == 0) {
|
|
3105
|
+
end_point = POINT_MAX;
|
|
3106
|
+
}
|
|
3107
|
+
self->start_point = start_point;
|
|
3108
|
+
self->end_point = end_point;
|
|
3109
|
+
}
|
|
3110
|
+
|
|
3111
|
+
// Search through all of the in-progress states, and find the captured
|
|
3112
|
+
// node that occurs earliest in the document.
|
|
3113
|
+
static bool ts_query_cursor__first_in_progress_capture(
|
|
3114
|
+
TSQueryCursor *self,
|
|
3115
|
+
uint32_t *state_index,
|
|
3116
|
+
uint32_t *byte_offset,
|
|
3117
|
+
uint32_t *pattern_index,
|
|
3118
|
+
bool *root_pattern_guaranteed
|
|
3119
|
+
) {
|
|
3120
|
+
bool result = false;
|
|
3121
|
+
*state_index = UINT32_MAX;
|
|
3122
|
+
*byte_offset = UINT32_MAX;
|
|
3123
|
+
*pattern_index = UINT32_MAX;
|
|
3124
|
+
for (unsigned i = 0; i < self->states.size; i++) {
|
|
3125
|
+
QueryState *state = &self->states.contents[i];
|
|
3126
|
+
if (state->dead) continue;
|
|
3127
|
+
|
|
3128
|
+
const CaptureList *captures = capture_list_pool_get(
|
|
3129
|
+
&self->capture_list_pool,
|
|
3130
|
+
state->capture_list_id
|
|
3131
|
+
);
|
|
3132
|
+
if (state->consumed_capture_count >= captures->size) {
|
|
3133
|
+
continue;
|
|
3134
|
+
}
|
|
3135
|
+
|
|
3136
|
+
TSNode node = captures->contents[state->consumed_capture_count].node;
|
|
3137
|
+
if (
|
|
3138
|
+
ts_node_end_byte(node) <= self->start_byte ||
|
|
3139
|
+
point_lte(ts_node_end_point(node), self->start_point)
|
|
3140
|
+
) {
|
|
3141
|
+
state->consumed_capture_count++;
|
|
3142
|
+
i--;
|
|
3143
|
+
continue;
|
|
3144
|
+
}
|
|
3145
|
+
|
|
3146
|
+
uint32_t node_start_byte = ts_node_start_byte(node);
|
|
3147
|
+
if (
|
|
3148
|
+
!result ||
|
|
3149
|
+
node_start_byte < *byte_offset ||
|
|
3150
|
+
(node_start_byte == *byte_offset && state->pattern_index < *pattern_index)
|
|
3151
|
+
) {
|
|
3152
|
+
QueryStep *step = &self->query->steps.contents[state->step_index];
|
|
3153
|
+
if (root_pattern_guaranteed) {
|
|
3154
|
+
*root_pattern_guaranteed = step->root_pattern_guaranteed;
|
|
3155
|
+
} else if (step->root_pattern_guaranteed) {
|
|
3156
|
+
continue;
|
|
3157
|
+
}
|
|
3158
|
+
|
|
3159
|
+
result = true;
|
|
3160
|
+
*state_index = i;
|
|
3161
|
+
*byte_offset = node_start_byte;
|
|
3162
|
+
*pattern_index = state->pattern_index;
|
|
3163
|
+
}
|
|
3164
|
+
}
|
|
3165
|
+
return result;
|
|
3166
|
+
}
|
|
3167
|
+
|
|
3168
|
+
// Determine which node is first in a depth-first traversal
|
|
3169
|
+
int ts_query_cursor__compare_nodes(TSNode left, TSNode right) {
|
|
3170
|
+
if (left.id != right.id) {
|
|
3171
|
+
uint32_t left_start = ts_node_start_byte(left);
|
|
3172
|
+
uint32_t right_start = ts_node_start_byte(right);
|
|
3173
|
+
if (left_start < right_start) return -1;
|
|
3174
|
+
if (left_start > right_start) return 1;
|
|
3175
|
+
uint32_t left_node_count = ts_node_end_byte(left);
|
|
3176
|
+
uint32_t right_node_count = ts_node_end_byte(right);
|
|
3177
|
+
if (left_node_count > right_node_count) return -1;
|
|
3178
|
+
if (left_node_count < right_node_count) return 1;
|
|
3179
|
+
}
|
|
3180
|
+
return 0;
|
|
3181
|
+
}
|
|
3182
|
+
|
|
3183
|
+
// Determine if either state contains a superset of the other state's captures.
|
|
3184
|
+
void ts_query_cursor__compare_captures(
|
|
3185
|
+
TSQueryCursor *self,
|
|
3186
|
+
QueryState *left_state,
|
|
3187
|
+
QueryState *right_state,
|
|
3188
|
+
bool *left_contains_right,
|
|
3189
|
+
bool *right_contains_left
|
|
3190
|
+
) {
|
|
3191
|
+
const CaptureList *left_captures = capture_list_pool_get(
|
|
3192
|
+
&self->capture_list_pool,
|
|
3193
|
+
left_state->capture_list_id
|
|
3194
|
+
);
|
|
3195
|
+
const CaptureList *right_captures = capture_list_pool_get(
|
|
3196
|
+
&self->capture_list_pool,
|
|
3197
|
+
right_state->capture_list_id
|
|
3198
|
+
);
|
|
3199
|
+
*left_contains_right = true;
|
|
3200
|
+
*right_contains_left = true;
|
|
3201
|
+
unsigned i = 0, j = 0;
|
|
3202
|
+
for (;;) {
|
|
3203
|
+
if (i < left_captures->size) {
|
|
3204
|
+
if (j < right_captures->size) {
|
|
3205
|
+
TSQueryCapture *left = &left_captures->contents[i];
|
|
3206
|
+
TSQueryCapture *right = &right_captures->contents[j];
|
|
3207
|
+
if (left->node.id == right->node.id && left->index == right->index) {
|
|
3208
|
+
i++;
|
|
3209
|
+
j++;
|
|
3210
|
+
} else {
|
|
3211
|
+
switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
|
|
3212
|
+
case -1:
|
|
3213
|
+
*right_contains_left = false;
|
|
3214
|
+
i++;
|
|
3215
|
+
break;
|
|
3216
|
+
case 1:
|
|
3217
|
+
*left_contains_right = false;
|
|
3218
|
+
j++;
|
|
3219
|
+
break;
|
|
3220
|
+
default:
|
|
3221
|
+
*right_contains_left = false;
|
|
3222
|
+
*left_contains_right = false;
|
|
3223
|
+
i++;
|
|
3224
|
+
j++;
|
|
3225
|
+
break;
|
|
3226
|
+
}
|
|
3227
|
+
}
|
|
3228
|
+
} else {
|
|
3229
|
+
*right_contains_left = false;
|
|
3230
|
+
break;
|
|
3231
|
+
}
|
|
3232
|
+
} else {
|
|
3233
|
+
if (j < right_captures->size) {
|
|
3234
|
+
*left_contains_right = false;
|
|
3235
|
+
}
|
|
3236
|
+
break;
|
|
3237
|
+
}
|
|
3238
|
+
}
|
|
3239
|
+
}
|
|
3240
|
+
|
|
3241
|
+
static void ts_query_cursor__add_state(
|
|
3242
|
+
TSQueryCursor *self,
|
|
3243
|
+
const PatternEntry *pattern
|
|
3244
|
+
) {
|
|
3245
|
+
QueryStep *step = &self->query->steps.contents[pattern->step_index];
|
|
3246
|
+
uint32_t start_depth = self->depth - step->depth;
|
|
3247
|
+
|
|
3248
|
+
// Keep the states array in ascending order of start_depth and pattern_index,
|
|
3249
|
+
// so that it can be processed more efficiently elsewhere. Usually, there is
|
|
3250
|
+
// no work to do here because of two facts:
|
|
3251
|
+
// * States with lower start_depth are naturally added first due to the
|
|
3252
|
+
// order in which nodes are visited.
|
|
3253
|
+
// * Earlier patterns are naturally added first because of the ordering of the
|
|
3254
|
+
// pattern_map data structure that's used to initiate matches.
|
|
3255
|
+
//
|
|
3256
|
+
// This loop is only needed in cases where two conditions hold:
|
|
3257
|
+
// * A pattern consists of more than one sibling node, so that its states
|
|
3258
|
+
// remain in progress after exiting the node that started the match.
|
|
3259
|
+
// * The first node in the pattern matches against multiple nodes at the
|
|
3260
|
+
// same depth.
|
|
3261
|
+
//
|
|
3262
|
+
// An example of this is the pattern '((comment)* (function))'. If multiple
|
|
3263
|
+
// `comment` nodes appear in a row, then we may initiate a new state for this
|
|
3264
|
+
// pattern while another state for the same pattern is already in progress.
|
|
3265
|
+
// If there are multiple patterns like this in a query, then this loop will
|
|
3266
|
+
// need to execute in order to keep the states ordered by pattern_index.
|
|
3267
|
+
uint32_t index = self->states.size;
|
|
3268
|
+
while (index > 0) {
|
|
3269
|
+
QueryState *prev_state = &self->states.contents[index - 1];
|
|
3270
|
+
if (prev_state->start_depth < start_depth) break;
|
|
3271
|
+
if (prev_state->start_depth == start_depth) {
|
|
3272
|
+
// Avoid inserting an unnecessary duplicate state, which would be
|
|
3273
|
+
// immediately pruned by the longest-match criteria.
|
|
3274
|
+
if (
|
|
3275
|
+
prev_state->pattern_index == pattern->pattern_index &&
|
|
3276
|
+
prev_state->step_index == pattern->step_index
|
|
3277
|
+
) return;
|
|
3278
|
+
if (prev_state->pattern_index <= pattern->pattern_index) break;
|
|
3279
|
+
}
|
|
3280
|
+
index--;
|
|
3281
|
+
}
|
|
3282
|
+
|
|
3283
|
+
LOG(
|
|
3284
|
+
" start state. pattern:%u, step:%u\n",
|
|
3285
|
+
pattern->pattern_index,
|
|
3286
|
+
pattern->step_index
|
|
3287
|
+
);
|
|
3288
|
+
array_insert(&self->states, index, ((QueryState) {
|
|
3289
|
+
.id = UINT32_MAX,
|
|
3290
|
+
.capture_list_id = NONE,
|
|
3291
|
+
.step_index = pattern->step_index,
|
|
3292
|
+
.pattern_index = pattern->pattern_index,
|
|
3293
|
+
.start_depth = start_depth,
|
|
3294
|
+
.consumed_capture_count = 0,
|
|
3295
|
+
.seeking_immediate_match = true,
|
|
3296
|
+
.has_in_progress_alternatives = false,
|
|
3297
|
+
.needs_parent = step->depth == 1,
|
|
3298
|
+
.dead = false,
|
|
3299
|
+
}));
|
|
3300
|
+
}
|
|
3301
|
+
|
|
3302
|
+
// Acquire a capture list for this state. If there are no capture lists left in the
|
|
3303
|
+
// pool, this will steal the capture list from another existing state, and mark that
|
|
3304
|
+
// other state as 'dead'.
|
|
3305
|
+
static CaptureList *ts_query_cursor__prepare_to_capture(
|
|
3306
|
+
TSQueryCursor *self,
|
|
3307
|
+
QueryState *state,
|
|
3308
|
+
unsigned state_index_to_preserve
|
|
3309
|
+
) {
|
|
3310
|
+
if (state->capture_list_id == NONE) {
|
|
3311
|
+
state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
|
|
3312
|
+
|
|
3313
|
+
// If there are no capture lists left in the pool, then terminate whichever
|
|
3314
|
+
// state has captured the earliest node in the document, and steal its
|
|
3315
|
+
// capture list.
|
|
3316
|
+
if (state->capture_list_id == NONE) {
|
|
3317
|
+
self->did_exceed_match_limit = true;
|
|
3318
|
+
uint32_t state_index, byte_offset, pattern_index;
|
|
3319
|
+
if (
|
|
3320
|
+
ts_query_cursor__first_in_progress_capture(
|
|
3321
|
+
self,
|
|
3322
|
+
&state_index,
|
|
3323
|
+
&byte_offset,
|
|
3324
|
+
&pattern_index,
|
|
3325
|
+
NULL
|
|
3326
|
+
) &&
|
|
3327
|
+
state_index != state_index_to_preserve
|
|
3328
|
+
) {
|
|
3329
|
+
LOG(
|
|
3330
|
+
" abandon state. index:%u, pattern:%u, offset:%u.\n",
|
|
3331
|
+
state_index, pattern_index, byte_offset
|
|
3332
|
+
);
|
|
3333
|
+
QueryState *other_state = &self->states.contents[state_index];
|
|
3334
|
+
state->capture_list_id = other_state->capture_list_id;
|
|
3335
|
+
other_state->capture_list_id = NONE;
|
|
3336
|
+
other_state->dead = true;
|
|
3337
|
+
CaptureList *list = capture_list_pool_get_mut(
|
|
3338
|
+
&self->capture_list_pool,
|
|
3339
|
+
state->capture_list_id
|
|
3340
|
+
);
|
|
3341
|
+
array_clear(list);
|
|
3342
|
+
return list;
|
|
3343
|
+
} else {
|
|
3344
|
+
LOG(" ran out of capture lists");
|
|
3345
|
+
return NULL;
|
|
3346
|
+
}
|
|
3347
|
+
}
|
|
3348
|
+
}
|
|
3349
|
+
return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
|
|
3350
|
+
}
|
|
3351
|
+
|
|
3352
|
+
static void ts_query_cursor__capture(
|
|
3353
|
+
TSQueryCursor *self,
|
|
3354
|
+
QueryState *state,
|
|
3355
|
+
QueryStep *step,
|
|
3356
|
+
TSNode node
|
|
3357
|
+
) {
|
|
3358
|
+
if (state->dead) return;
|
|
3359
|
+
CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
|
|
3360
|
+
if (!capture_list) {
|
|
3361
|
+
state->dead = true;
|
|
3362
|
+
return;
|
|
3363
|
+
}
|
|
3364
|
+
|
|
3365
|
+
for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
|
|
3366
|
+
uint16_t capture_id = step->capture_ids[j];
|
|
3367
|
+
if (step->capture_ids[j] == NONE) break;
|
|
3368
|
+
array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
|
|
3369
|
+
LOG(
|
|
3370
|
+
" capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n",
|
|
3371
|
+
ts_node_type(node),
|
|
3372
|
+
state->pattern_index,
|
|
3373
|
+
capture_id,
|
|
3374
|
+
capture_list->size
|
|
3375
|
+
);
|
|
3376
|
+
}
|
|
3377
|
+
}
|
|
3378
|
+
|
|
3379
|
+
// Duplicate the given state and insert the newly-created state immediately after
|
|
3380
|
+
// the given state in the `states` array. Ensures that the given state reference is
|
|
3381
|
+
// still valid, even if the states array is reallocated.
|
|
3382
|
+
static QueryState *ts_query_cursor__copy_state(
|
|
3383
|
+
TSQueryCursor *self,
|
|
3384
|
+
QueryState **state_ref
|
|
3385
|
+
) {
|
|
3386
|
+
const QueryState *state = *state_ref;
|
|
3387
|
+
uint32_t state_index = (uint32_t)(state - self->states.contents);
|
|
3388
|
+
QueryState copy = *state;
|
|
3389
|
+
copy.capture_list_id = NONE;
|
|
3390
|
+
|
|
3391
|
+
// If the state has captures, copy its capture list.
|
|
3392
|
+
if (state->capture_list_id != NONE) {
|
|
3393
|
+
CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index);
|
|
3394
|
+
if (!new_captures) return NULL;
|
|
3395
|
+
const CaptureList *old_captures = capture_list_pool_get(
|
|
3396
|
+
&self->capture_list_pool,
|
|
3397
|
+
state->capture_list_id
|
|
3398
|
+
);
|
|
3399
|
+
array_push_all(new_captures, old_captures);
|
|
3400
|
+
}
|
|
3401
|
+
|
|
3402
|
+
array_insert(&self->states, state_index + 1, copy);
|
|
3403
|
+
*state_ref = &self->states.contents[state_index];
|
|
3404
|
+
return &self->states.contents[state_index + 1];
|
|
3405
|
+
}
|
|
3406
|
+
|
|
3407
|
+
static inline bool ts_query_cursor__should_descend(
|
|
3408
|
+
TSQueryCursor *self,
|
|
3409
|
+
bool node_intersects_range
|
|
3410
|
+
) {
|
|
3411
|
+
|
|
3412
|
+
if (node_intersects_range && self->depth < self->max_start_depth) {
|
|
3413
|
+
return true;
|
|
3414
|
+
}
|
|
3415
|
+
|
|
3416
|
+
// If there are in-progress matches whose remaining steps occur
|
|
3417
|
+
// deeper in the tree, then descend.
|
|
3418
|
+
for (unsigned i = 0; i < self->states.size; i++) {
|
|
3419
|
+
QueryState *state = &self->states.contents[i];;
|
|
3420
|
+
QueryStep *next_step = &self->query->steps.contents[state->step_index];
|
|
3421
|
+
if (
|
|
3422
|
+
next_step->depth != PATTERN_DONE_MARKER &&
|
|
3423
|
+
state->start_depth + next_step->depth > self->depth
|
|
3424
|
+
) {
|
|
3425
|
+
return true;
|
|
3426
|
+
}
|
|
3427
|
+
}
|
|
3428
|
+
|
|
3429
|
+
if (self->depth >= self->max_start_depth) {
|
|
3430
|
+
return false;
|
|
3431
|
+
}
|
|
3432
|
+
|
|
3433
|
+
// If the current node is hidden, then a non-rooted pattern might match
|
|
3434
|
+
// one if its roots inside of this node, and match another of its roots
|
|
3435
|
+
// as part of a sibling node, so we may need to descend.
|
|
3436
|
+
if (!self->on_visible_node) {
|
|
3437
|
+
// Descending into a repetition node outside of the range can be
|
|
3438
|
+
// expensive, because these nodes can have many visible children.
|
|
3439
|
+
// Avoid descending into repetition nodes unless we have already
|
|
3440
|
+
// determined that this query can match rootless patterns inside
|
|
3441
|
+
// of this type of repetition node.
|
|
3442
|
+
Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor);
|
|
3443
|
+
if (ts_subtree_is_repetition(subtree)) {
|
|
3444
|
+
bool exists;
|
|
3445
|
+
uint32_t index;
|
|
3446
|
+
array_search_sorted_by(
|
|
3447
|
+
&self->query->repeat_symbols_with_rootless_patterns,,
|
|
3448
|
+
ts_subtree_symbol(subtree),
|
|
3449
|
+
&index,
|
|
3450
|
+
&exists
|
|
3451
|
+
);
|
|
3452
|
+
return exists;
|
|
3453
|
+
}
|
|
3454
|
+
|
|
3455
|
+
return true;
|
|
3456
|
+
}
|
|
3457
|
+
|
|
3458
|
+
return false;
|
|
3459
|
+
}
|
|
3460
|
+
|
|
3461
|
+
// Walk the tree, processing patterns until at least one pattern finishes,
|
|
3462
|
+
// If one or more patterns finish, return `true` and store their states in the
|
|
3463
|
+
// `finished_states` array. Multiple patterns can finish on the same node. If
|
|
3464
|
+
// there are no more matches, return `false`.
|
|
3465
|
+
static inline bool ts_query_cursor__advance(
|
|
3466
|
+
TSQueryCursor *self,
|
|
3467
|
+
bool stop_on_definite_step
|
|
3468
|
+
) {
|
|
3469
|
+
bool did_match = false;
|
|
3470
|
+
for (;;) {
|
|
3471
|
+
if (self->halted) {
|
|
3472
|
+
while (self->states.size > 0) {
|
|
3473
|
+
QueryState state = array_pop(&self->states);
|
|
3474
|
+
capture_list_pool_release(
|
|
3475
|
+
&self->capture_list_pool,
|
|
3476
|
+
state.capture_list_id
|
|
3477
|
+
);
|
|
3478
|
+
}
|
|
3479
|
+
}
|
|
3480
|
+
|
|
3481
|
+
if (++self->operation_count == OP_COUNT_PER_QUERY_TIMEOUT_CHECK) {
|
|
3482
|
+
self->operation_count = 0;
|
|
3483
|
+
}
|
|
3484
|
+
if (
|
|
3485
|
+
did_match ||
|
|
3486
|
+
self->halted ||
|
|
3487
|
+
(
|
|
3488
|
+
self->operation_count == 0 &&
|
|
3489
|
+
!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)
|
|
3490
|
+
)
|
|
3491
|
+
) {
|
|
3492
|
+
return did_match;
|
|
3493
|
+
}
|
|
3494
|
+
|
|
3495
|
+
// Exit the current node.
|
|
3496
|
+
if (self->ascending) {
|
|
3497
|
+
if (self->on_visible_node) {
|
|
3498
|
+
LOG(
|
|
3499
|
+
"leave node. depth:%u, type:%s\n",
|
|
3500
|
+
self->depth,
|
|
3501
|
+
ts_node_type(ts_tree_cursor_current_node(&self->cursor))
|
|
3502
|
+
);
|
|
3503
|
+
|
|
3504
|
+
// After leaving a node, remove any states that cannot make further progress.
|
|
3505
|
+
uint32_t deleted_count = 0;
|
|
3506
|
+
for (unsigned i = 0, n = self->states.size; i < n; i++) {
|
|
3507
|
+
QueryState *state = &self->states.contents[i];
|
|
3508
|
+
QueryStep *step = &self->query->steps.contents[state->step_index];
|
|
3509
|
+
|
|
3510
|
+
// If a state completed its pattern inside of this node, but was deferred from finishing
|
|
3511
|
+
// in order to search for longer matches, mark it as finished.
|
|
3512
|
+
if (
|
|
3513
|
+
step->depth == PATTERN_DONE_MARKER &&
|
|
3514
|
+
(state->start_depth > self->depth || self->depth == 0)
|
|
3515
|
+
) {
|
|
3516
|
+
LOG(" finish pattern %u\n", state->pattern_index);
|
|
3517
|
+
array_push(&self->finished_states, *state);
|
|
3518
|
+
did_match = true;
|
|
3519
|
+
deleted_count++;
|
|
3520
|
+
}
|
|
3521
|
+
|
|
3522
|
+
// If a state needed to match something within this node, then remove that state
|
|
3523
|
+
// as it has failed to match.
|
|
3524
|
+
else if (
|
|
3525
|
+
step->depth != PATTERN_DONE_MARKER &&
|
|
3526
|
+
(uint32_t)state->start_depth + (uint32_t)step->depth > self->depth
|
|
3527
|
+
) {
|
|
3528
|
+
LOG(
|
|
3529
|
+
" failed to match. pattern:%u, step:%u\n",
|
|
3530
|
+
state->pattern_index,
|
|
3531
|
+
state->step_index
|
|
3532
|
+
);
|
|
3533
|
+
capture_list_pool_release(
|
|
3534
|
+
&self->capture_list_pool,
|
|
3535
|
+
state->capture_list_id
|
|
3536
|
+
);
|
|
3537
|
+
deleted_count++;
|
|
3538
|
+
}
|
|
3539
|
+
|
|
3540
|
+
else if (deleted_count > 0) {
|
|
3541
|
+
self->states.contents[i - deleted_count] = *state;
|
|
3542
|
+
}
|
|
3543
|
+
}
|
|
3544
|
+
self->states.size -= deleted_count;
|
|
3545
|
+
}
|
|
3546
|
+
|
|
3547
|
+
// Leave this node by stepping to its next sibling or to its parent.
|
|
3548
|
+
switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) {
|
|
3549
|
+
case TreeCursorStepVisible:
|
|
3550
|
+
if (!self->on_visible_node) {
|
|
3551
|
+
self->depth++;
|
|
3552
|
+
self->on_visible_node = true;
|
|
3553
|
+
}
|
|
3554
|
+
self->ascending = false;
|
|
3555
|
+
break;
|
|
3556
|
+
case TreeCursorStepHidden:
|
|
3557
|
+
if (self->on_visible_node) {
|
|
3558
|
+
self->depth--;
|
|
3559
|
+
self->on_visible_node = false;
|
|
3560
|
+
}
|
|
3561
|
+
self->ascending = false;
|
|
3562
|
+
break;
|
|
3563
|
+
default:
|
|
3564
|
+
if (ts_tree_cursor_goto_parent(&self->cursor)) {
|
|
3565
|
+
self->depth--;
|
|
3566
|
+
} else {
|
|
3567
|
+
LOG("halt at root\n");
|
|
3568
|
+
self->halted = true;
|
|
3569
|
+
}
|
|
3570
|
+
}
|
|
3571
|
+
}
|
|
3572
|
+
|
|
3573
|
+
// Enter a new node.
|
|
3574
|
+
else {
|
|
3575
|
+
// Get the properties of the current node.
|
|
3576
|
+
TSNode node = ts_tree_cursor_current_node(&self->cursor);
|
|
3577
|
+
TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor);
|
|
3578
|
+
|
|
3579
|
+
uint32_t start_byte = ts_node_start_byte(node);
|
|
3580
|
+
uint32_t end_byte = ts_node_end_byte(node);
|
|
3581
|
+
TSPoint start_point = ts_node_start_point(node);
|
|
3582
|
+
TSPoint end_point = ts_node_end_point(node);
|
|
3583
|
+
bool is_empty = start_byte == end_byte;
|
|
3584
|
+
|
|
3585
|
+
bool parent_precedes_range = !ts_node_is_null(parent_node) && (
|
|
3586
|
+
ts_node_end_byte(parent_node) <= self->start_byte ||
|
|
3587
|
+
point_lte(ts_node_end_point(parent_node), self->start_point)
|
|
3588
|
+
);
|
|
3589
|
+
bool parent_follows_range = !ts_node_is_null(parent_node) && (
|
|
3590
|
+
ts_node_start_byte(parent_node) >= self->end_byte ||
|
|
3591
|
+
point_gte(ts_node_start_point(parent_node), self->end_point)
|
|
3592
|
+
);
|
|
3593
|
+
bool node_precedes_range =
|
|
3594
|
+
parent_precedes_range ||
|
|
3595
|
+
end_byte < self->start_byte ||
|
|
3596
|
+
point_lt(end_point, self->start_point) ||
|
|
3597
|
+
(!is_empty && end_byte == self->start_byte) ||
|
|
3598
|
+
(!is_empty && point_eq(end_point, self->start_point));
|
|
3599
|
+
|
|
3600
|
+
bool node_follows_range = parent_follows_range || (
|
|
3601
|
+
start_byte >= self->end_byte ||
|
|
3602
|
+
point_gte(start_point, self->end_point)
|
|
3603
|
+
);
|
|
3604
|
+
bool parent_intersects_range = !parent_precedes_range && !parent_follows_range;
|
|
3605
|
+
bool node_intersects_range = !node_precedes_range && !node_follows_range;
|
|
3606
|
+
|
|
3607
|
+
if (self->on_visible_node) {
|
|
3608
|
+
TSSymbol symbol = ts_node_symbol(node);
|
|
3609
|
+
bool is_named = ts_node_is_named(node);
|
|
3610
|
+
bool has_later_siblings;
|
|
3611
|
+
bool has_later_named_siblings;
|
|
3612
|
+
bool can_have_later_siblings_with_this_field;
|
|
3613
|
+
TSFieldId field_id = 0;
|
|
3614
|
+
TSSymbol supertypes[8] = {0};
|
|
3615
|
+
unsigned supertype_count = 8;
|
|
3616
|
+
ts_tree_cursor_current_status(
|
|
3617
|
+
&self->cursor,
|
|
3618
|
+
&field_id,
|
|
3619
|
+
&has_later_siblings,
|
|
3620
|
+
&has_later_named_siblings,
|
|
3621
|
+
&can_have_later_siblings_with_this_field,
|
|
3622
|
+
supertypes,
|
|
3623
|
+
&supertype_count
|
|
3624
|
+
);
|
|
3625
|
+
LOG(
|
|
3626
|
+
"enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
|
|
3627
|
+
self->depth,
|
|
3628
|
+
ts_node_type(node),
|
|
3629
|
+
ts_language_field_name_for_id(self->query->language, field_id),
|
|
3630
|
+
ts_node_start_point(node).row,
|
|
3631
|
+
self->states.size,
|
|
3632
|
+
self->finished_states.size
|
|
3633
|
+
);
|
|
3634
|
+
|
|
3635
|
+
bool node_is_error = symbol == ts_builtin_sym_error;
|
|
3636
|
+
bool parent_is_error =
|
|
3637
|
+
!ts_node_is_null(parent_node) &&
|
|
3638
|
+
ts_node_symbol(parent_node) == ts_builtin_sym_error;
|
|
3639
|
+
|
|
3640
|
+
// Add new states for any patterns whose root node is a wildcard.
|
|
3641
|
+
if (!node_is_error) {
|
|
3642
|
+
for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
|
|
3643
|
+
PatternEntry *pattern = &self->query->pattern_map.contents[i];
|
|
3644
|
+
|
|
3645
|
+
// If this node matches the first step of the pattern, then add a new
|
|
3646
|
+
// state at the start of this pattern.
|
|
3647
|
+
QueryStep *step = &self->query->steps.contents[pattern->step_index];
|
|
3648
|
+
uint32_t start_depth = self->depth - step->depth;
|
|
3649
|
+
if (
|
|
3650
|
+
(pattern->is_rooted ?
|
|
3651
|
+
node_intersects_range :
|
|
3652
|
+
(parent_intersects_range && !parent_is_error)) &&
|
|
3653
|
+
(!step->field || field_id == step->field) &&
|
|
3654
|
+
(!step->supertype_symbol || supertype_count > 0) &&
|
|
3655
|
+
(start_depth <= self->max_start_depth)
|
|
3656
|
+
) {
|
|
3657
|
+
ts_query_cursor__add_state(self, pattern);
|
|
3658
|
+
}
|
|
3659
|
+
}
|
|
3660
|
+
}
|
|
3661
|
+
|
|
3662
|
+
// Add new states for any patterns whose root node matches this node.
|
|
3663
|
+
unsigned i;
|
|
3664
|
+
if (ts_query__pattern_map_search(self->query, symbol, &i)) {
|
|
3665
|
+
PatternEntry *pattern = &self->query->pattern_map.contents[i];
|
|
3666
|
+
|
|
3667
|
+
QueryStep *step = &self->query->steps.contents[pattern->step_index];
|
|
3668
|
+
uint32_t start_depth = self->depth - step->depth;
|
|
3669
|
+
do {
|
|
3670
|
+
// If this node matches the first step of the pattern, then add a new
|
|
3671
|
+
// state at the start of this pattern.
|
|
3672
|
+
if (
|
|
3673
|
+
(pattern->is_rooted ?
|
|
3674
|
+
node_intersects_range :
|
|
3675
|
+
(parent_intersects_range && !parent_is_error)) &&
|
|
3676
|
+
(!step->field || field_id == step->field) &&
|
|
3677
|
+
(start_depth <= self->max_start_depth)
|
|
3678
|
+
) {
|
|
3679
|
+
ts_query_cursor__add_state(self, pattern);
|
|
3680
|
+
}
|
|
3681
|
+
|
|
3682
|
+
// Advance to the next pattern whose root node matches this node.
|
|
3683
|
+
i++;
|
|
3684
|
+
if (i == self->query->pattern_map.size) break;
|
|
3685
|
+
pattern = &self->query->pattern_map.contents[i];
|
|
3686
|
+
step = &self->query->steps.contents[pattern->step_index];
|
|
3687
|
+
} while (step->symbol == symbol);
|
|
3688
|
+
}
|
|
3689
|
+
|
|
3690
|
+
// Update all of the in-progress states with current node.
|
|
3691
|
+
for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) {
|
|
3692
|
+
QueryState *state = &self->states.contents[j];
|
|
3693
|
+
QueryStep *step = &self->query->steps.contents[state->step_index];
|
|
3694
|
+
state->has_in_progress_alternatives = false;
|
|
3695
|
+
copy_count = 0;
|
|
3696
|
+
|
|
3697
|
+
// Check that the node matches all of the criteria for the next
|
|
3698
|
+
// step of the pattern.
|
|
3699
|
+
if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
|
|
3700
|
+
|
|
3701
|
+
// Determine if this node matches this step of the pattern, and also
|
|
3702
|
+
// if this node can have later siblings that match this step of the
|
|
3703
|
+
// pattern.
|
|
3704
|
+
bool node_does_match = false;
|
|
3705
|
+
if (step->symbol == WILDCARD_SYMBOL) {
|
|
3706
|
+
node_does_match = !node_is_error && (is_named || !step->is_named);
|
|
3707
|
+
} else {
|
|
3708
|
+
node_does_match = symbol == step->symbol;
|
|
3709
|
+
}
|
|
3710
|
+
bool later_sibling_can_match = has_later_siblings;
|
|
3711
|
+
if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
|
|
3712
|
+
later_sibling_can_match = false;
|
|
3713
|
+
}
|
|
3714
|
+
if (step->is_last_child && has_later_named_siblings) {
|
|
3715
|
+
node_does_match = false;
|
|
3716
|
+
}
|
|
3717
|
+
if (step->supertype_symbol) {
|
|
3718
|
+
bool has_supertype = false;
|
|
3719
|
+
for (unsigned k = 0; k < supertype_count; k++) {
|
|
3720
|
+
if (supertypes[k] == step->supertype_symbol) {
|
|
3721
|
+
has_supertype = true;
|
|
3722
|
+
break;
|
|
3723
|
+
}
|
|
3724
|
+
}
|
|
3725
|
+
if (!has_supertype) node_does_match = false;
|
|
3726
|
+
}
|
|
3727
|
+
if (step->field) {
|
|
3728
|
+
if (step->field == field_id) {
|
|
3729
|
+
if (!can_have_later_siblings_with_this_field) {
|
|
3730
|
+
later_sibling_can_match = false;
|
|
3731
|
+
}
|
|
3732
|
+
} else {
|
|
3733
|
+
node_does_match = false;
|
|
3734
|
+
}
|
|
3735
|
+
}
|
|
3736
|
+
|
|
3737
|
+
if (step->negated_field_list_id) {
|
|
3738
|
+
TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id];
|
|
3739
|
+
for (;;) {
|
|
3740
|
+
TSFieldId negated_field_id = *negated_field_ids;
|
|
3741
|
+
if (negated_field_id) {
|
|
3742
|
+
negated_field_ids++;
|
|
3743
|
+
if (ts_node_child_by_field_id(node, negated_field_id).id) {
|
|
3744
|
+
node_does_match = false;
|
|
3745
|
+
break;
|
|
3746
|
+
}
|
|
3747
|
+
} else {
|
|
3748
|
+
break;
|
|
3749
|
+
}
|
|
3750
|
+
}
|
|
3751
|
+
}
|
|
3752
|
+
|
|
3753
|
+
// Remove states immediately if it is ever clear that they cannot match.
|
|
3754
|
+
if (!node_does_match) {
|
|
3755
|
+
if (!later_sibling_can_match) {
|
|
3756
|
+
LOG(
|
|
3757
|
+
" discard state. pattern:%u, step:%u\n",
|
|
3758
|
+
state->pattern_index,
|
|
3759
|
+
state->step_index
|
|
3760
|
+
);
|
|
3761
|
+
capture_list_pool_release(
|
|
3762
|
+
&self->capture_list_pool,
|
|
3763
|
+
state->capture_list_id
|
|
3764
|
+
);
|
|
3765
|
+
array_erase(&self->states, j);
|
|
3766
|
+
j--;
|
|
3767
|
+
}
|
|
3768
|
+
continue;
|
|
3769
|
+
}
|
|
3770
|
+
|
|
3771
|
+
// Some patterns can match their root node in multiple ways, capturing different
|
|
3772
|
+
// children. If this pattern step could match later children within the same
|
|
3773
|
+
// parent, then this query state cannot simply be updated in place. It must be
|
|
3774
|
+
// split into two states: one that matches this node, and one which skips over
|
|
3775
|
+
// this node, to preserve the possibility of matching later siblings.
|
|
3776
|
+
if (later_sibling_can_match && (
|
|
3777
|
+
step->contains_captures ||
|
|
3778
|
+
ts_query__step_is_fallible(self->query, state->step_index)
|
|
3779
|
+
)) {
|
|
3780
|
+
if (ts_query_cursor__copy_state(self, &state)) {
|
|
3781
|
+
LOG(
|
|
3782
|
+
" split state for capture. pattern:%u, step:%u\n",
|
|
3783
|
+
state->pattern_index,
|
|
3784
|
+
state->step_index
|
|
3785
|
+
);
|
|
3786
|
+
copy_count++;
|
|
3787
|
+
}
|
|
3788
|
+
}
|
|
3789
|
+
|
|
3790
|
+
// If this pattern started with a wildcard, such that the pattern map
|
|
3791
|
+
// actually points to the *second* step of the pattern, then check
|
|
3792
|
+
// that the node has a parent, and capture the parent node if necessary.
|
|
3793
|
+
if (state->needs_parent) {
|
|
3794
|
+
TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
|
|
3795
|
+
if (ts_node_is_null(parent)) {
|
|
3796
|
+
LOG(" missing parent node\n");
|
|
3797
|
+
state->dead = true;
|
|
3798
|
+
} else {
|
|
3799
|
+
state->needs_parent = false;
|
|
3800
|
+
QueryStep *skipped_wildcard_step = step;
|
|
3801
|
+
do {
|
|
3802
|
+
skipped_wildcard_step--;
|
|
3803
|
+
} while (
|
|
3804
|
+
skipped_wildcard_step->is_dead_end ||
|
|
3805
|
+
skipped_wildcard_step->is_pass_through ||
|
|
3806
|
+
skipped_wildcard_step->depth > 0
|
|
3807
|
+
);
|
|
3808
|
+
if (skipped_wildcard_step->capture_ids[0] != NONE) {
|
|
3809
|
+
LOG(" capture wildcard parent\n");
|
|
3810
|
+
ts_query_cursor__capture(
|
|
3811
|
+
self,
|
|
3812
|
+
state,
|
|
3813
|
+
skipped_wildcard_step,
|
|
3814
|
+
parent
|
|
3815
|
+
);
|
|
3816
|
+
}
|
|
3817
|
+
}
|
|
3818
|
+
}
|
|
3819
|
+
|
|
3820
|
+
// If the current node is captured in this pattern, add it to the capture list.
|
|
3821
|
+
if (step->capture_ids[0] != NONE) {
|
|
3822
|
+
ts_query_cursor__capture(self, state, step, node);
|
|
3823
|
+
}
|
|
3824
|
+
|
|
3825
|
+
if (state->dead) {
|
|
3826
|
+
array_erase(&self->states, j);
|
|
3827
|
+
j--;
|
|
3828
|
+
continue;
|
|
3829
|
+
}
|
|
3830
|
+
|
|
3831
|
+
// Advance this state to the next step of its pattern.
|
|
3832
|
+
state->step_index++;
|
|
3833
|
+
state->seeking_immediate_match = false;
|
|
3834
|
+
LOG(
|
|
3835
|
+
" advance state. pattern:%u, step:%u\n",
|
|
3836
|
+
state->pattern_index,
|
|
3837
|
+
state->step_index
|
|
3838
|
+
);
|
|
3839
|
+
|
|
3840
|
+
QueryStep *next_step = &self->query->steps.contents[state->step_index];
|
|
3841
|
+
if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true;
|
|
3842
|
+
|
|
3843
|
+
// If this state's next step has an alternative step, then copy the state in order
|
|
3844
|
+
// to pursue both alternatives. The alternative step itself may have an alternative,
|
|
3845
|
+
// so this is an interactive process.
|
|
3846
|
+
unsigned end_index = j + 1;
|
|
3847
|
+
for (unsigned k = j; k < end_index; k++) {
|
|
3848
|
+
QueryState *child_state = &self->states.contents[k];
|
|
3849
|
+
QueryStep *child_step = &self->query->steps.contents[child_state->step_index];
|
|
3850
|
+
if (child_step->alternative_index != NONE) {
|
|
3851
|
+
// A "dead-end" step exists only to add a non-sequential jump into the step sequence,
|
|
3852
|
+
// via its alternative index. When a state reaches a dead-end step, it jumps straight
|
|
3853
|
+
// to the step's alternative.
|
|
3854
|
+
if (child_step->is_dead_end) {
|
|
3855
|
+
child_state->step_index = child_step->alternative_index;
|
|
3856
|
+
k--;
|
|
3857
|
+
continue;
|
|
3858
|
+
}
|
|
3859
|
+
|
|
3860
|
+
// A "pass-through" step exists only to add a branch into the step sequence,
|
|
3861
|
+
// via its alternative_index. When a state reaches a pass-through step, it splits
|
|
3862
|
+
// in order to process the alternative step, and then it advances to the next step.
|
|
3863
|
+
if (child_step->is_pass_through) {
|
|
3864
|
+
child_state->step_index++;
|
|
3865
|
+
k--;
|
|
3866
|
+
}
|
|
3867
|
+
|
|
3868
|
+
QueryState *copy = ts_query_cursor__copy_state(self, &child_state);
|
|
3869
|
+
if (copy) {
|
|
3870
|
+
LOG(
|
|
3871
|
+
" split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
|
|
3872
|
+
copy->pattern_index,
|
|
3873
|
+
copy->step_index,
|
|
3874
|
+
next_step->alternative_index,
|
|
3875
|
+
next_step->alternative_is_immediate,
|
|
3876
|
+
capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
|
|
3877
|
+
);
|
|
3878
|
+
end_index++;
|
|
3879
|
+
copy_count++;
|
|
3880
|
+
copy->step_index = child_step->alternative_index;
|
|
3881
|
+
if (child_step->alternative_is_immediate) {
|
|
3882
|
+
copy->seeking_immediate_match = true;
|
|
3883
|
+
}
|
|
3884
|
+
}
|
|
3885
|
+
}
|
|
3886
|
+
}
|
|
3887
|
+
}
|
|
3888
|
+
|
|
3889
|
+
for (unsigned j = 0; j < self->states.size; j++) {
|
|
3890
|
+
QueryState *state = &self->states.contents[j];
|
|
3891
|
+
if (state->dead) {
|
|
3892
|
+
array_erase(&self->states, j);
|
|
3893
|
+
j--;
|
|
3894
|
+
continue;
|
|
3895
|
+
}
|
|
3896
|
+
|
|
3897
|
+
// Enforce the longest-match criteria. When a query pattern contains optional or
|
|
3898
|
+
// repeated nodes, this is necessary to avoid multiple redundant states, where
|
|
3899
|
+
// one state has a strict subset of another state's captures.
|
|
3900
|
+
bool did_remove = false;
|
|
3901
|
+
for (unsigned k = j + 1; k < self->states.size; k++) {
|
|
3902
|
+
QueryState *other_state = &self->states.contents[k];
|
|
3903
|
+
|
|
3904
|
+
// Query states are kept in ascending order of start_depth and pattern_index.
|
|
3905
|
+
// Since the longest-match criteria is only used for deduping matches of the same
|
|
3906
|
+
// pattern and root node, we only need to perform pairwise comparisons within a
|
|
3907
|
+
// small slice of the states array.
|
|
3908
|
+
if (
|
|
3909
|
+
other_state->start_depth != state->start_depth ||
|
|
3910
|
+
other_state->pattern_index != state->pattern_index
|
|
3911
|
+
) break;
|
|
3912
|
+
|
|
3913
|
+
bool left_contains_right, right_contains_left;
|
|
3914
|
+
ts_query_cursor__compare_captures(
|
|
3915
|
+
self,
|
|
3916
|
+
state,
|
|
3917
|
+
other_state,
|
|
3918
|
+
&left_contains_right,
|
|
3919
|
+
&right_contains_left
|
|
3920
|
+
);
|
|
3921
|
+
if (left_contains_right) {
|
|
3922
|
+
if (state->step_index == other_state->step_index) {
|
|
3923
|
+
LOG(
|
|
3924
|
+
" drop shorter state. pattern: %u, step_index: %u\n",
|
|
3925
|
+
state->pattern_index,
|
|
3926
|
+
state->step_index
|
|
3927
|
+
);
|
|
3928
|
+
capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
|
|
3929
|
+
array_erase(&self->states, k);
|
|
3930
|
+
k--;
|
|
3931
|
+
continue;
|
|
3932
|
+
}
|
|
3933
|
+
other_state->has_in_progress_alternatives = true;
|
|
3934
|
+
}
|
|
3935
|
+
if (right_contains_left) {
|
|
3936
|
+
if (state->step_index == other_state->step_index) {
|
|
3937
|
+
LOG(
|
|
3938
|
+
" drop shorter state. pattern: %u, step_index: %u\n",
|
|
3939
|
+
state->pattern_index,
|
|
3940
|
+
state->step_index
|
|
3941
|
+
);
|
|
3942
|
+
capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
|
|
3943
|
+
array_erase(&self->states, j);
|
|
3944
|
+
j--;
|
|
3945
|
+
did_remove = true;
|
|
3946
|
+
break;
|
|
3947
|
+
}
|
|
3948
|
+
state->has_in_progress_alternatives = true;
|
|
3949
|
+
}
|
|
3950
|
+
}
|
|
3951
|
+
|
|
3952
|
+
// If the state is at the end of its pattern, remove it from the list
|
|
3953
|
+
// of in-progress states and add it to the list of finished states.
|
|
3954
|
+
if (!did_remove) {
|
|
3955
|
+
LOG(
|
|
3956
|
+
" keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
|
|
3957
|
+
state->pattern_index,
|
|
3958
|
+
state->start_depth,
|
|
3959
|
+
state->step_index,
|
|
3960
|
+
capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
|
|
3961
|
+
);
|
|
3962
|
+
QueryStep *next_step = &self->query->steps.contents[state->step_index];
|
|
3963
|
+
if (next_step->depth == PATTERN_DONE_MARKER) {
|
|
3964
|
+
if (state->has_in_progress_alternatives) {
|
|
3965
|
+
LOG(" defer finishing pattern %u\n", state->pattern_index);
|
|
3966
|
+
} else {
|
|
3967
|
+
LOG(" finish pattern %u\n", state->pattern_index);
|
|
3968
|
+
array_push(&self->finished_states, *state);
|
|
3969
|
+
array_erase(&self->states, (uint32_t)(state - self->states.contents));
|
|
3970
|
+
did_match = true;
|
|
3971
|
+
j--;
|
|
3972
|
+
}
|
|
3973
|
+
}
|
|
3974
|
+
}
|
|
3975
|
+
}
|
|
3976
|
+
}
|
|
3977
|
+
|
|
3978
|
+
if (ts_query_cursor__should_descend(self, node_intersects_range)) {
|
|
3979
|
+
switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) {
|
|
3980
|
+
case TreeCursorStepVisible:
|
|
3981
|
+
self->depth++;
|
|
3982
|
+
self->on_visible_node = true;
|
|
3983
|
+
continue;
|
|
3984
|
+
case TreeCursorStepHidden:
|
|
3985
|
+
self->on_visible_node = false;
|
|
3986
|
+
continue;
|
|
3987
|
+
default:
|
|
3988
|
+
break;
|
|
3989
|
+
}
|
|
3990
|
+
}
|
|
3991
|
+
|
|
3992
|
+
self->ascending = true;
|
|
3993
|
+
}
|
|
3994
|
+
}
|
|
3995
|
+
}
|
|
3996
|
+
|
|
3997
|
+
bool ts_query_cursor_next_match(
|
|
3998
|
+
TSQueryCursor *self,
|
|
3999
|
+
TSQueryMatch *match
|
|
4000
|
+
) {
|
|
4001
|
+
if (self->finished_states.size == 0) {
|
|
4002
|
+
if (!ts_query_cursor__advance(self, false)) {
|
|
4003
|
+
return false;
|
|
4004
|
+
}
|
|
4005
|
+
}
|
|
4006
|
+
|
|
4007
|
+
QueryState *state = &self->finished_states.contents[0];
|
|
4008
|
+
if (state->id == UINT32_MAX) state->id = self->next_state_id++;
|
|
4009
|
+
match->id = state->id;
|
|
4010
|
+
match->pattern_index = state->pattern_index;
|
|
4011
|
+
const CaptureList *captures = capture_list_pool_get(
|
|
4012
|
+
&self->capture_list_pool,
|
|
4013
|
+
state->capture_list_id
|
|
4014
|
+
);
|
|
4015
|
+
match->captures = captures->contents;
|
|
4016
|
+
match->capture_count = captures->size;
|
|
4017
|
+
capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
|
|
4018
|
+
array_erase(&self->finished_states, 0);
|
|
4019
|
+
return true;
|
|
4020
|
+
}
|
|
4021
|
+
|
|
4022
|
+
void ts_query_cursor_remove_match(
|
|
4023
|
+
TSQueryCursor *self,
|
|
4024
|
+
uint32_t match_id
|
|
4025
|
+
) {
|
|
4026
|
+
for (unsigned i = 0; i < self->finished_states.size; i++) {
|
|
4027
|
+
const QueryState *state = &self->finished_states.contents[i];
|
|
4028
|
+
if (state->id == match_id) {
|
|
4029
|
+
capture_list_pool_release(
|
|
4030
|
+
&self->capture_list_pool,
|
|
4031
|
+
state->capture_list_id
|
|
4032
|
+
);
|
|
4033
|
+
array_erase(&self->finished_states, i);
|
|
4034
|
+
return;
|
|
4035
|
+
}
|
|
4036
|
+
}
|
|
4037
|
+
|
|
4038
|
+
// Remove unfinished query states as well to prevent future
|
|
4039
|
+
// captures for a match being removed.
|
|
4040
|
+
for (unsigned i = 0; i < self->states.size; i++) {
|
|
4041
|
+
const QueryState *state = &self->states.contents[i];
|
|
4042
|
+
if (state->id == match_id) {
|
|
4043
|
+
capture_list_pool_release(
|
|
4044
|
+
&self->capture_list_pool,
|
|
4045
|
+
state->capture_list_id
|
|
4046
|
+
);
|
|
4047
|
+
array_erase(&self->states, i);
|
|
4048
|
+
return;
|
|
4049
|
+
}
|
|
4050
|
+
}
|
|
4051
|
+
}
|
|
4052
|
+
|
|
4053
|
+
bool ts_query_cursor_next_capture(
|
|
4054
|
+
TSQueryCursor *self,
|
|
4055
|
+
TSQueryMatch *match,
|
|
4056
|
+
uint32_t *capture_index
|
|
4057
|
+
) {
|
|
4058
|
+
// The goal here is to return captures in order, even though they may not
|
|
4059
|
+
// be discovered in order, because patterns can overlap. Search for matches
|
|
4060
|
+
// until there is a finished capture that is before any unfinished capture.
|
|
4061
|
+
for (;;) {
|
|
4062
|
+
// First, find the earliest capture in an unfinished match.
|
|
4063
|
+
uint32_t first_unfinished_capture_byte;
|
|
4064
|
+
uint32_t first_unfinished_pattern_index;
|
|
4065
|
+
uint32_t first_unfinished_state_index;
|
|
4066
|
+
bool first_unfinished_state_is_definite = false;
|
|
4067
|
+
bool found_unfinished_state = ts_query_cursor__first_in_progress_capture(
|
|
4068
|
+
self,
|
|
4069
|
+
&first_unfinished_state_index,
|
|
4070
|
+
&first_unfinished_capture_byte,
|
|
4071
|
+
&first_unfinished_pattern_index,
|
|
4072
|
+
&first_unfinished_state_is_definite
|
|
4073
|
+
);
|
|
4074
|
+
|
|
4075
|
+
// Then find the earliest capture in a finished match. It must occur
|
|
4076
|
+
// before the first capture in an *unfinished* match.
|
|
4077
|
+
QueryState *first_finished_state = NULL;
|
|
4078
|
+
uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
|
|
4079
|
+
uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
|
|
4080
|
+
for (unsigned i = 0; i < self->finished_states.size;) {
|
|
4081
|
+
QueryState *state = &self->finished_states.contents[i];
|
|
4082
|
+
const CaptureList *captures = capture_list_pool_get(
|
|
4083
|
+
&self->capture_list_pool,
|
|
4084
|
+
state->capture_list_id
|
|
4085
|
+
);
|
|
4086
|
+
|
|
4087
|
+
// Remove states whose captures are all consumed.
|
|
4088
|
+
if (state->consumed_capture_count >= captures->size) {
|
|
4089
|
+
capture_list_pool_release(
|
|
4090
|
+
&self->capture_list_pool,
|
|
4091
|
+
state->capture_list_id
|
|
4092
|
+
);
|
|
4093
|
+
array_erase(&self->finished_states, i);
|
|
4094
|
+
continue;
|
|
4095
|
+
}
|
|
4096
|
+
|
|
4097
|
+
TSNode node = captures->contents[state->consumed_capture_count].node;
|
|
4098
|
+
|
|
4099
|
+
bool node_precedes_range = (
|
|
4100
|
+
ts_node_end_byte(node) <= self->start_byte ||
|
|
4101
|
+
point_lte(ts_node_end_point(node), self->start_point)
|
|
4102
|
+
);
|
|
4103
|
+
bool node_follows_range = (
|
|
4104
|
+
ts_node_start_byte(node) >= self->end_byte ||
|
|
4105
|
+
point_gte(ts_node_start_point(node), self->end_point)
|
|
4106
|
+
);
|
|
4107
|
+
bool node_outside_of_range = node_precedes_range || node_follows_range;
|
|
4108
|
+
|
|
4109
|
+
// Skip captures that are outside of the cursor's range.
|
|
4110
|
+
if (node_outside_of_range) {
|
|
4111
|
+
state->consumed_capture_count++;
|
|
4112
|
+
continue;
|
|
4113
|
+
}
|
|
4114
|
+
|
|
4115
|
+
uint32_t node_start_byte = ts_node_start_byte(node);
|
|
4116
|
+
if (
|
|
4117
|
+
node_start_byte < first_finished_capture_byte ||
|
|
4118
|
+
(
|
|
4119
|
+
node_start_byte == first_finished_capture_byte &&
|
|
4120
|
+
state->pattern_index < first_finished_pattern_index
|
|
4121
|
+
)
|
|
4122
|
+
) {
|
|
4123
|
+
first_finished_state = state;
|
|
4124
|
+
first_finished_capture_byte = node_start_byte;
|
|
4125
|
+
first_finished_pattern_index = state->pattern_index;
|
|
4126
|
+
}
|
|
4127
|
+
i++;
|
|
4128
|
+
}
|
|
4129
|
+
|
|
4130
|
+
// If there is finished capture that is clearly before any unfinished
|
|
4131
|
+
// capture, then return its match, and its capture index. Internally
|
|
4132
|
+
// record the fact that the capture has been 'consumed'.
|
|
4133
|
+
QueryState *state;
|
|
4134
|
+
if (first_finished_state) {
|
|
4135
|
+
state = first_finished_state;
|
|
4136
|
+
} else if (first_unfinished_state_is_definite) {
|
|
4137
|
+
state = &self->states.contents[first_unfinished_state_index];
|
|
4138
|
+
} else {
|
|
4139
|
+
state = NULL;
|
|
4140
|
+
}
|
|
4141
|
+
|
|
4142
|
+
if (state) {
|
|
4143
|
+
if (state->id == UINT32_MAX) state->id = self->next_state_id++;
|
|
4144
|
+
match->id = state->id;
|
|
4145
|
+
match->pattern_index = state->pattern_index;
|
|
4146
|
+
const CaptureList *captures = capture_list_pool_get(
|
|
4147
|
+
&self->capture_list_pool,
|
|
4148
|
+
state->capture_list_id
|
|
4149
|
+
);
|
|
4150
|
+
match->captures = captures->contents;
|
|
4151
|
+
match->capture_count = captures->size;
|
|
4152
|
+
*capture_index = state->consumed_capture_count;
|
|
4153
|
+
state->consumed_capture_count++;
|
|
4154
|
+
return true;
|
|
4155
|
+
}
|
|
4156
|
+
|
|
4157
|
+
if (capture_list_pool_is_empty(&self->capture_list_pool) && found_unfinished_state) {
|
|
4158
|
+
LOG(
|
|
4159
|
+
" abandon state. index:%u, pattern:%u, offset:%u.\n",
|
|
4160
|
+
first_unfinished_state_index,
|
|
4161
|
+
first_unfinished_pattern_index,
|
|
4162
|
+
first_unfinished_capture_byte
|
|
4163
|
+
);
|
|
4164
|
+
capture_list_pool_release(
|
|
4165
|
+
&self->capture_list_pool,
|
|
4166
|
+
self->states.contents[first_unfinished_state_index].capture_list_id
|
|
4167
|
+
);
|
|
4168
|
+
array_erase(&self->states, first_unfinished_state_index);
|
|
4169
|
+
}
|
|
4170
|
+
|
|
4171
|
+
// If there are no finished matches that are ready to be returned, then
|
|
4172
|
+
// continue finding more matches.
|
|
4173
|
+
if (
|
|
4174
|
+
!ts_query_cursor__advance(self, true) &&
|
|
4175
|
+
self->finished_states.size == 0
|
|
4176
|
+
) return false;
|
|
4177
|
+
}
|
|
4178
|
+
}
|
|
4179
|
+
|
|
4180
|
+
void ts_query_cursor_set_max_start_depth(
|
|
4181
|
+
TSQueryCursor *self,
|
|
4182
|
+
uint32_t max_start_depth
|
|
4183
|
+
) {
|
|
4184
|
+
self->max_start_depth = max_start_depth;
|
|
4185
|
+
}
|
|
4186
|
+
|
|
4187
|
+
#undef LOG
|