@yalehwang/archguard 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. package/README.md +90 -58
  2. package/dist/cli/analyze/normalize-to-diagrams.d.ts +5 -0
  3. package/dist/cli/analyze/normalize-to-diagrams.d.ts.map +1 -0
  4. package/dist/cli/analyze/normalize-to-diagrams.js +94 -0
  5. package/dist/cli/analyze/normalize-to-diagrams.js.map +1 -0
  6. package/dist/cli/analyze/run-analysis.d.ts +20 -0
  7. package/dist/cli/analyze/run-analysis.d.ts.map +1 -0
  8. package/dist/cli/analyze/run-analysis.js +150 -0
  9. package/dist/cli/analyze/run-analysis.js.map +1 -0
  10. package/dist/cli/cache/arch-json-disk-cache.d.ts.map +1 -1
  11. package/dist/cli/cache/arch-json-disk-cache.js +2 -1
  12. package/dist/cli/cache/arch-json-disk-cache.js.map +1 -1
  13. package/dist/cli/cache/diagram-manifest.d.ts +13 -0
  14. package/dist/cli/cache/diagram-manifest.d.ts.map +1 -0
  15. package/dist/cli/cache/diagram-manifest.js +60 -0
  16. package/dist/cli/cache/diagram-manifest.js.map +1 -0
  17. package/dist/cli/cache/render-hash-cache.d.ts +13 -0
  18. package/dist/cli/cache/render-hash-cache.d.ts.map +1 -0
  19. package/dist/cli/cache/render-hash-cache.js +39 -0
  20. package/dist/cli/cache/render-hash-cache.js.map +1 -0
  21. package/dist/cli/commands/analyze.d.ts +1 -4
  22. package/dist/cli/commands/analyze.d.ts.map +1 -1
  23. package/dist/cli/commands/analyze.js +26 -143
  24. package/dist/cli/commands/analyze.js.map +1 -1
  25. package/dist/cli/commands/cache.d.ts.map +1 -1
  26. package/dist/cli/commands/cache.js +7 -0
  27. package/dist/cli/commands/cache.js.map +1 -1
  28. package/dist/cli/commands/mcp.d.ts +3 -0
  29. package/dist/cli/commands/mcp.d.ts.map +1 -0
  30. package/dist/cli/commands/mcp.js +17 -0
  31. package/dist/cli/commands/mcp.js.map +1 -0
  32. package/dist/cli/commands/query.d.ts +3 -0
  33. package/dist/cli/commands/query.d.ts.map +1 -0
  34. package/dist/cli/commands/query.js +227 -0
  35. package/dist/cli/commands/query.js.map +1 -0
  36. package/dist/cli/index.d.ts.map +1 -1
  37. package/dist/cli/index.js +4 -0
  38. package/dist/cli/index.js.map +1 -1
  39. package/dist/cli/mcp/analyze-tool.d.ts +6 -0
  40. package/dist/cli/mcp/analyze-tool.d.ts.map +1 -0
  41. package/dist/cli/mcp/analyze-tool.js +112 -0
  42. package/dist/cli/mcp/analyze-tool.js.map +1 -0
  43. package/dist/cli/mcp/mcp-server.d.ts +6 -0
  44. package/dist/cli/mcp/mcp-server.d.ts.map +1 -0
  45. package/dist/cli/mcp/mcp-server.js +165 -0
  46. package/dist/cli/mcp/mcp-server.js.map +1 -0
  47. package/dist/cli/processors/arch-json-provider.d.ts +38 -0
  48. package/dist/cli/processors/arch-json-provider.d.ts.map +1 -0
  49. package/dist/cli/processors/arch-json-provider.js +295 -0
  50. package/dist/cli/processors/arch-json-provider.js.map +1 -0
  51. package/dist/cli/processors/diagram-output-router.d.ts +26 -0
  52. package/dist/cli/processors/diagram-output-router.d.ts.map +1 -0
  53. package/dist/cli/processors/diagram-output-router.js +235 -0
  54. package/dist/cli/processors/diagram-output-router.js.map +1 -0
  55. package/dist/cli/processors/diagram-processor.d.ts +11 -22
  56. package/dist/cli/processors/diagram-processor.d.ts.map +1 -1
  57. package/dist/cli/processors/diagram-processor.js +37 -455
  58. package/dist/cli/processors/diagram-processor.js.map +1 -1
  59. package/dist/cli/progress.d.ts +22 -1
  60. package/dist/cli/progress.d.ts.map +1 -1
  61. package/dist/cli/progress.js +24 -0
  62. package/dist/cli/progress.js.map +1 -1
  63. package/dist/cli/query/arch-index-builder.d.ts +4 -0
  64. package/dist/cli/query/arch-index-builder.d.ts.map +1 -0
  65. package/dist/cli/query/arch-index-builder.js +126 -0
  66. package/dist/cli/query/arch-index-builder.js.map +1 -0
  67. package/dist/cli/query/arch-index.d.ts +17 -0
  68. package/dist/cli/query/arch-index.d.ts.map +1 -0
  69. package/dist/cli/query/arch-index.js +2 -0
  70. package/dist/cli/query/arch-index.js.map +1 -0
  71. package/dist/cli/query/engine-loader.d.ts +7 -0
  72. package/dist/cli/query/engine-loader.d.ts.map +1 -0
  73. package/dist/cli/query/engine-loader.js +98 -0
  74. package/dist/cli/query/engine-loader.js.map +1 -0
  75. package/dist/cli/query/query-artifacts.d.ts +7 -0
  76. package/dist/cli/query/query-artifacts.d.ts.map +1 -0
  77. package/dist/cli/query/query-artifacts.js +78 -0
  78. package/dist/cli/query/query-artifacts.js.map +1 -0
  79. package/dist/cli/query/query-engine.d.ts +49 -0
  80. package/dist/cli/query/query-engine.d.ts.map +1 -0
  81. package/dist/cli/query/query-engine.js +137 -0
  82. package/dist/cli/query/query-engine.js.map +1 -0
  83. package/dist/cli/query/query-manifest.d.ts +24 -0
  84. package/dist/cli/query/query-manifest.d.ts.map +1 -0
  85. package/dist/cli/query/query-manifest.js +2 -0
  86. package/dist/cli/query/query-manifest.js.map +1 -0
  87. package/dist/cli/utils/canonicalize-arch-json.d.ts +3 -0
  88. package/dist/cli/utils/canonicalize-arch-json.d.ts.map +1 -0
  89. package/dist/cli/utils/canonicalize-arch-json.js +81 -0
  90. package/dist/cli/utils/canonicalize-arch-json.js.map +1 -0
  91. package/dist/cli/utils/diagram-index-generator.d.ts.map +1 -1
  92. package/dist/cli/utils/diagram-index-generator.js +0 -5
  93. package/dist/cli/utils/diagram-index-generator.js.map +1 -1
  94. package/dist/cli/utils/project-structure-detector.d.ts.map +1 -1
  95. package/dist/cli/utils/project-structure-detector.js +0 -10
  96. package/dist/cli/utils/project-structure-detector.js.map +1 -1
  97. package/dist/mermaid/renderer.d.ts.map +1 -1
  98. package/dist/mermaid/renderer.js +34 -0
  99. package/dist/mermaid/renderer.js.map +1 -1
  100. package/dist/plugins/golang/atlas/index.d.ts.map +1 -1
  101. package/dist/plugins/golang/atlas/index.js +3 -1
  102. package/dist/plugins/golang/atlas/index.js.map +1 -1
  103. package/dist/plugins/golang/atlas/types.d.ts +1 -0
  104. package/dist/plugins/golang/atlas/types.d.ts.map +1 -1
  105. package/dist/plugins/golang/index.d.ts.map +1 -1
  106. package/dist/plugins/golang/index.js +15 -6
  107. package/dist/plugins/golang/index.js.map +1 -1
  108. package/dist/plugins/golang/source-scope.d.ts +7 -0
  109. package/dist/plugins/golang/source-scope.d.ts.map +1 -0
  110. package/dist/plugins/golang/source-scope.js +83 -0
  111. package/dist/plugins/golang/source-scope.js.map +1 -0
  112. package/node_modules/node-addon-api/LICENSE.md +9 -0
  113. package/node_modules/node-addon-api/README.md +95 -0
  114. package/node_modules/node-addon-api/common.gypi +21 -0
  115. package/node_modules/node-addon-api/except.gypi +25 -0
  116. package/node_modules/node-addon-api/index.js +14 -0
  117. package/node_modules/node-addon-api/napi-inl.deprecated.h +186 -0
  118. package/node_modules/node-addon-api/napi-inl.h +7033 -0
  119. package/node_modules/node-addon-api/napi.h +3309 -0
  120. package/node_modules/node-addon-api/node_addon_api.gyp +42 -0
  121. package/node_modules/node-addon-api/node_api.gyp +9 -0
  122. package/node_modules/node-addon-api/noexcept.gypi +26 -0
  123. package/node_modules/node-addon-api/nothing.c +0 -0
  124. package/node_modules/node-addon-api/package-support.json +21 -0
  125. package/node_modules/node-addon-api/package.json +480 -0
  126. package/node_modules/node-addon-api/tools/README.md +73 -0
  127. package/node_modules/node-addon-api/tools/check-napi.js +99 -0
  128. package/node_modules/node-addon-api/tools/clang-format.js +71 -0
  129. package/node_modules/node-addon-api/tools/conversion.js +301 -0
  130. package/node_modules/node-gyp-build/LICENSE +21 -0
  131. package/node_modules/node-gyp-build/README.md +58 -0
  132. package/node_modules/node-gyp-build/SECURITY.md +5 -0
  133. package/node_modules/node-gyp-build/bin.js +84 -0
  134. package/node_modules/node-gyp-build/build-test.js +19 -0
  135. package/node_modules/node-gyp-build/index.js +6 -0
  136. package/node_modules/node-gyp-build/node-gyp-build.js +207 -0
  137. package/node_modules/node-gyp-build/optional.js +7 -0
  138. package/node_modules/node-gyp-build/package.json +43 -0
  139. package/node_modules/tree-sitter/LICENSE +21 -0
  140. package/node_modules/tree-sitter/README.md +128 -0
  141. package/node_modules/tree-sitter/binding.gyp +80 -0
  142. package/node_modules/tree-sitter/index.js +916 -0
  143. package/node_modules/tree-sitter/package.json +76 -0
  144. package/node_modules/tree-sitter/prebuilds/linux-x64/node.napi.glibc.node +0 -0
  145. package/node_modules/tree-sitter/src/addon_data.h +47 -0
  146. package/node_modules/tree-sitter/src/binding.cc +35 -0
  147. package/node_modules/tree-sitter/src/conversions.cc +140 -0
  148. package/node_modules/tree-sitter/src/conversions.h +22 -0
  149. package/node_modules/tree-sitter/src/language.cc +106 -0
  150. package/node_modules/tree-sitter/src/language.h +17 -0
  151. package/node_modules/tree-sitter/src/logger.cc +70 -0
  152. package/node_modules/tree-sitter/src/logger.h +19 -0
  153. package/node_modules/tree-sitter/src/lookaheaditerator.cc +122 -0
  154. package/node_modules/tree-sitter/src/lookaheaditerator.h +33 -0
  155. package/node_modules/tree-sitter/src/node.cc +1088 -0
  156. package/node_modules/tree-sitter/src/node.h +30 -0
  157. package/node_modules/tree-sitter/src/parser.cc +306 -0
  158. package/node_modules/tree-sitter/src/parser.h +35 -0
  159. package/node_modules/tree-sitter/src/query.cc +397 -0
  160. package/node_modules/tree-sitter/src/query.h +40 -0
  161. package/node_modules/tree-sitter/src/tree.cc +316 -0
  162. package/node_modules/tree-sitter/src/tree.h +45 -0
  163. package/node_modules/tree-sitter/src/tree_cursor.cc +213 -0
  164. package/node_modules/tree-sitter/src/tree_cursor.h +52 -0
  165. package/node_modules/tree-sitter/tree-sitter.d.ts +1042 -0
  166. package/node_modules/tree-sitter/vendor/tree-sitter/lib/include/tree_sitter/api.h +1478 -0
  167. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/alloc.c +48 -0
  168. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/alloc.h +41 -0
  169. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/array.h +291 -0
  170. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/atomic.h +68 -0
  171. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/clock.h +146 -0
  172. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/error_costs.h +11 -0
  173. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/get_changed_ranges.c +523 -0
  174. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/get_changed_ranges.h +36 -0
  175. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/host.h +21 -0
  176. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/language.c +293 -0
  177. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/language.h +293 -0
  178. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/length.h +52 -0
  179. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/lexer.c +483 -0
  180. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/lexer.h +54 -0
  181. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/lib.c +12 -0
  182. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/node.c +875 -0
  183. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/parser.c +2293 -0
  184. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/parser.h +286 -0
  185. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/point.h +48 -0
  186. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/portable/endian.h +239 -0
  187. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/query.c +4350 -0
  188. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/reduce_action.h +34 -0
  189. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/reusable_node.h +95 -0
  190. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/stack.c +911 -0
  191. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/stack.h +133 -0
  192. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/subtree.c +1034 -0
  193. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/subtree.h +399 -0
  194. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/tree.c +170 -0
  195. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/tree.h +31 -0
  196. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/tree_cursor.c +717 -0
  197. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/tree_cursor.h +48 -0
  198. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/ts_assert.h +11 -0
  199. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/ICU_SHA +1 -0
  200. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/LICENSE +414 -0
  201. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/README.md +29 -0
  202. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/ptypes.h +1 -0
  203. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/umachine.h +448 -0
  204. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/urename.h +1 -0
  205. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/utf.h +1 -0
  206. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/utf16.h +733 -0
  207. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode/utf8.h +881 -0
  208. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/unicode.h +75 -0
  209. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/wasm/stdlib-symbols.txt +24 -0
  210. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/wasm/stdlib.c +113 -0
  211. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/wasm/wasm-stdlib.h +1314 -0
  212. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/wasm_store.c +1935 -0
  213. package/node_modules/tree-sitter/vendor/tree-sitter/lib/src/wasm_store.h +31 -0
  214. package/package.json +9 -2
  215. package/scripts/postinstall-tree-sitter.mjs +28 -0
@@ -0,0 +1,4350 @@
1
+ /*
2
+ * On NetBSD, defining standard requirements like this removes symbols
3
+ * from the namespace; however, we need non-standard symbols for
4
+ * endian.h.
5
+ */
6
+ #if defined(__NetBSD__) && defined(_POSIX_C_SOURCE)
7
+ #undef _POSIX_C_SOURCE
8
+ #endif
9
+
10
+ #include "tree_sitter/api.h"
11
+ #include "./alloc.h"
12
+ #include "./array.h"
13
+ #include "./clock.h"
14
+ #include "./language.h"
15
+ #include "./point.h"
16
+ #include "./tree_cursor.h"
17
+ #include "./unicode.h"
18
+ #include <wctype.h>
19
+
20
+ // #define DEBUG_ANALYZE_QUERY
21
+ // #define DEBUG_EXECUTE_QUERY
22
+
23
+ #define MAX_STEP_CAPTURE_COUNT 3
24
+ #define MAX_NEGATED_FIELD_COUNT 8
25
+ #define MAX_STATE_PREDECESSOR_COUNT 256
26
+ #define MAX_ANALYSIS_STATE_DEPTH 8
27
+ #define MAX_ANALYSIS_ITERATION_COUNT 256
28
+
29
+ /*
30
+ * Stream - A sequence of unicode characters derived from a UTF8 string.
31
+ * This struct is used in parsing queries from S-expressions.
32
+ */
33
+ typedef struct {
34
+ const char *input;
35
+ const char *start;
36
+ const char *end;
37
+ int32_t next;
38
+ uint8_t next_size;
39
+ } Stream;
40
+
41
+ /*
42
+ * QueryStep - A step in the process of matching a query. Each node within
43
+ * a query S-expression corresponds to one of these steps. An entire pattern
44
+ * is represented as a sequence of these steps. The basic properties of a
45
+ * node are represented by these fields:
46
+ * - `symbol` - The grammar symbol to match. A zero value represents the
47
+ * wildcard symbol, '_'.
48
+ * - `field` - The field name to match. A zero value means that a field name
49
+ * was not specified.
50
+ * - `capture_ids` - An array of integers representing the names of captures
51
+ * associated with this node in the pattern, terminated by a `NONE` value.
52
+ * - `depth` - The depth where this node occurs in the pattern. The root node
53
+ * of the pattern has depth zero.
54
+ * - `negated_field_list_id` - An id representing a set of fields that must
55
+ * not be present on a node matching this step.
56
+ *
57
+ * Steps have some additional fields in order to handle the `.` (or "anchor") operator,
58
+ * which forbids additional child nodes:
59
+ * - `is_immediate` - Indicates that the node matching this step cannot be preceded
60
+ * by other sibling nodes that weren't specified in the pattern.
61
+ * - `is_last_child` - Indicates that the node matching this step cannot have any
62
+ * subsequent named siblings.
63
+ *
64
+ * For simple patterns, steps are matched in sequential order. But in order to
65
+ * handle alternative/repeated/optional sub-patterns, query steps are not always
66
+ * structured as a linear sequence; they sometimes need to split and merge. This
67
+ * is done using the following fields:
68
+ * - `alternative_index` - The index of a different query step that serves as
69
+ * an alternative to this step. A `NONE` value represents no alternative.
70
+ * When a query state reaches a step with an alternative index, the state
71
+ * is duplicated, with one copy remaining at the original step, and one copy
72
+ * moving to the alternative step. The alternative may have its own alternative
73
+ * step, so this splitting is an iterative process.
74
+ * - `is_dead_end` - Indicates that this state cannot be passed directly, and
75
+ * exists only in order to redirect to an alternative index, with no splitting.
76
+ * - `is_pass_through` - Indicates that state has no matching logic of its own,
77
+ * and exists only to split a state. One copy of the state advances immediately
78
+ * to the next step, and one moves to the alternative step.
79
+ * - `alternative_is_immediate` - Indicates that this step's alternative step
80
+ * should be treated as if `is_immediate` is true.
81
+ *
82
+ * Steps also store some derived state that summarizes how they relate to other
83
+ * steps within the same pattern. This is used to optimize the matching process:
84
+ * - `contains_captures` - Indicates that this step or one of its child steps
85
+ * has a non-empty `capture_ids` list.
86
+ * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then
87
+ * it and all of its subsequent sibling steps within the same parent pattern
88
+ * are guaranteed to match.
89
+ * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but
90
+ * for the entire top-level pattern. When iterating through a query's
91
+ * captures using `ts_query_cursor_next_capture`, this field is used to
92
+ * detect that a capture can safely be returned from a match that has not
93
+ * even completed yet.
94
+ */
95
+ typedef struct {
96
+ TSSymbol symbol;
97
+ TSSymbol supertype_symbol;
98
+ TSFieldId field;
99
+ uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
100
+ uint16_t depth;
101
+ uint16_t alternative_index;
102
+ uint16_t negated_field_list_id;
103
+ bool is_named: 1;
104
+ bool is_immediate: 1;
105
+ bool is_last_child: 1;
106
+ bool is_pass_through: 1;
107
+ bool is_dead_end: 1;
108
+ bool alternative_is_immediate: 1;
109
+ bool contains_captures: 1;
110
+ bool root_pattern_guaranteed: 1;
111
+ bool parent_pattern_guaranteed: 1;
112
+ bool is_missing: 1;
113
+ } QueryStep;
114
+
115
+ /*
116
+ * Slice - A slice of an external array. Within a query, capture names,
117
+ * literal string values, and predicate step information are stored in three
118
+ * contiguous arrays. Individual captures, string values, and predicates are
119
+ * represented as slices of these three arrays.
120
+ */
121
+ typedef struct {
122
+ uint32_t offset;
123
+ uint32_t length;
124
+ } Slice;
125
+
126
+ /*
127
+ * SymbolTable - a two-way mapping of strings to ids.
128
+ */
129
+ typedef struct {
130
+ Array(char) characters;
131
+ Array(Slice) slices;
132
+ } SymbolTable;
133
+
134
+ /**
135
+ * CaptureQuantifiers - a data structure holding the quantifiers of pattern captures.
136
+ */
137
+ typedef Array(uint8_t) CaptureQuantifiers;
138
+
139
+ /*
140
+ * PatternEntry - Information about the starting point for matching a particular
141
+ * pattern. These entries are stored in a 'pattern map' - a sorted array that
142
+ * makes it possible to efficiently lookup patterns based on the symbol for their
143
+ * first step. The entry consists of the following fields:
144
+ * - `pattern_index` - the index of the pattern within the query
145
+ * - `step_index` - the index of the pattern's first step in the shared `steps` array
146
+ * - `is_rooted` - whether or not the pattern has a single root node. This property
147
+ * affects decisions about whether or not to start the pattern for nodes outside
148
+ * of a QueryCursor's range restriction.
149
+ */
150
+ typedef struct {
151
+ uint16_t step_index;
152
+ uint16_t pattern_index;
153
+ bool is_rooted;
154
+ } PatternEntry;
155
+
156
+ typedef struct {
157
+ Slice steps;
158
+ Slice predicate_steps;
159
+ uint32_t start_byte;
160
+ uint32_t end_byte;
161
+ bool is_non_local;
162
+ } QueryPattern;
163
+
164
+ typedef struct {
165
+ uint32_t byte_offset;
166
+ uint16_t step_index;
167
+ } StepOffset;
168
+
169
+ /*
170
+ * QueryState - The state of an in-progress match of a particular pattern
171
+ * in a query. While executing, a `TSQueryCursor` must keep track of a number
172
+ * of possible in-progress matches. Each of those possible matches is
173
+ * represented as one of these states. Fields:
174
+ * - `id` - A numeric id that is exposed to the public API. This allows the
175
+ * caller to remove a given match, preventing any more of its captures
176
+ * from being returned.
177
+ * - `start_depth` - The depth in the tree where the first step of the state's
178
+ * pattern was matched.
179
+ * - `pattern_index` - The pattern that the state is matching.
180
+ * - `consumed_capture_count` - The number of captures from this match that
181
+ * have already been returned.
182
+ * - `capture_list_id` - A numeric id that can be used to retrieve the state's
183
+ * list of captures from the `CaptureListPool`.
184
+ * - `seeking_immediate_match` - A flag that indicates that the state's next
185
+ * step must be matched by the very next sibling. This is used when
186
+ * processing repetitions, or when processing a wildcard node followed by
187
+ * an anchor.
188
+ * - `has_in_progress_alternatives` - A flag that indicates that there is are
189
+ * other states that have the same captures as this state, but are at
190
+ * different steps in their pattern. This means that in order to obey the
191
+ * 'longest-match' rule, this state should not be returned as a match until
192
+ * it is clear that there can be no other alternative match with more captures.
193
+ */
194
+ typedef struct {
195
+ uint32_t id;
196
+ uint32_t capture_list_id;
197
+ uint16_t start_depth;
198
+ uint16_t step_index;
199
+ uint16_t pattern_index;
200
+ uint16_t consumed_capture_count: 12;
201
+ bool seeking_immediate_match: 1;
202
+ bool has_in_progress_alternatives: 1;
203
+ bool dead: 1;
204
+ bool needs_parent: 1;
205
+ } QueryState;
206
+
207
+ typedef Array(TSQueryCapture) CaptureList;
208
+
209
+ /*
210
+ * CaptureListPool - A collection of *lists* of captures. Each query state needs
211
+ * to maintain its own list of captures. To avoid repeated allocations, this struct
212
+ * maintains a fixed set of capture lists, and keeps track of which ones are
213
+ * currently in use by a query state.
214
+ */
215
+ typedef struct {
216
+ Array(CaptureList) list;
217
+ CaptureList empty_list;
218
+ // The maximum number of capture lists that we are allowed to allocate. We
219
+ // never allow `list` to allocate more entries than this, dropping pending
220
+ // matches if needed to stay under the limit.
221
+ uint32_t max_capture_list_count;
222
+ // The number of capture lists allocated in `list` that are not currently in
223
+ // use. We reuse those existing-but-unused capture lists before trying to
224
+ // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
225
+ // list's length to indicate that it's not in use.
226
+ uint32_t free_capture_list_count;
227
+ } CaptureListPool;
228
+
229
+ /*
230
+ * AnalysisState - The state needed for walking the parse table when analyzing
231
+ * a query pattern, to determine at which steps the pattern might fail to match.
232
+ */
233
+ typedef struct {
234
+ TSStateId parse_state;
235
+ TSSymbol parent_symbol;
236
+ uint16_t child_index;
237
+ TSFieldId field_id: 15;
238
+ bool done: 1;
239
+ } AnalysisStateEntry;
240
+
241
+ typedef struct {
242
+ AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH];
243
+ uint16_t depth;
244
+ uint16_t step_index;
245
+ TSSymbol root_symbol;
246
+ } AnalysisState;
247
+
248
+ typedef Array(AnalysisState *) AnalysisStateSet;
249
+
250
+ typedef struct {
251
+ AnalysisStateSet states;
252
+ AnalysisStateSet next_states;
253
+ AnalysisStateSet deeper_states;
254
+ AnalysisStateSet state_pool;
255
+ Array(uint16_t) final_step_indices;
256
+ Array(TSSymbol) finished_parent_symbols;
257
+ bool did_abort;
258
+ } QueryAnalysis;
259
+
260
+ /*
261
+ * AnalysisSubgraph - A subset of the states in the parse table that are used
262
+ * in constructing nodes with a certain symbol. Each state is accompanied by
263
+ * some information about the possible node that could be produced in
264
+ * downstream states.
265
+ */
266
+ typedef struct {
267
+ TSStateId state;
268
+ uint16_t production_id;
269
+ uint8_t child_index: 7;
270
+ bool done: 1;
271
+ } AnalysisSubgraphNode;
272
+
273
+ typedef struct {
274
+ TSSymbol symbol;
275
+ Array(TSStateId) start_states;
276
+ Array(AnalysisSubgraphNode) nodes;
277
+ } AnalysisSubgraph;
278
+
279
+ typedef Array(AnalysisSubgraph) AnalysisSubgraphArray;
280
+
281
+ /*
282
+ * StatePredecessorMap - A map that stores the predecessors of each parse state.
283
+ * This is used during query analysis to determine which parse states can lead
284
+ * to which reduce actions.
285
+ */
286
+ typedef struct {
287
+ TSStateId *contents;
288
+ } StatePredecessorMap;
289
+
290
+ /*
291
+ * TSQuery - A tree query, compiled from a string of S-expressions. The query
292
+ * itself is immutable. The mutable state used in the process of executing the
293
+ * query is stored in a `TSQueryCursor`.
294
+ */
295
+ struct TSQuery {
296
+ SymbolTable captures;
297
+ SymbolTable predicate_values;
298
+ Array(CaptureQuantifiers) capture_quantifiers;
299
+ Array(QueryStep) steps;
300
+ Array(PatternEntry) pattern_map;
301
+ Array(TSQueryPredicateStep) predicate_steps;
302
+ Array(QueryPattern) patterns;
303
+ Array(StepOffset) step_offsets;
304
+ Array(TSFieldId) negated_fields;
305
+ Array(char) string_buffer;
306
+ Array(TSSymbol) repeat_symbols_with_rootless_patterns;
307
+ const TSLanguage *language;
308
+ uint16_t wildcard_root_pattern_count;
309
+ };
310
+
311
+ /*
312
+ * TSQueryCursor - A stateful struct used to execute a query on a tree.
313
+ */
314
+ struct TSQueryCursor {
315
+ const TSQuery *query;
316
+ TSTreeCursor cursor;
317
+ Array(QueryState) states;
318
+ Array(QueryState) finished_states;
319
+ CaptureListPool capture_list_pool;
320
+ uint32_t depth;
321
+ uint32_t max_start_depth;
322
+ uint32_t start_byte;
323
+ uint32_t end_byte;
324
+ TSPoint start_point;
325
+ TSPoint end_point;
326
+ uint32_t next_state_id;
327
+ TSClock end_clock;
328
+ TSDuration timeout_duration;
329
+ const TSQueryCursorOptions *query_options;
330
+ TSQueryCursorState query_state;
331
+ unsigned operation_count;
332
+ bool on_visible_node;
333
+ bool ascending;
334
+ bool halted;
335
+ bool did_exceed_match_limit;
336
+ };
337
+
338
+ static const TSQueryError PARENT_DONE = -1;
339
+ static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
340
+ static const uint16_t NONE = UINT16_MAX;
341
+ static const TSSymbol WILDCARD_SYMBOL = 0;
342
+ static const unsigned OP_COUNT_PER_QUERY_TIMEOUT_CHECK = 100;
343
+
344
+ /**********
345
+ * Stream
346
+ **********/
347
+
348
+ // Advance to the next unicode code point in the stream.
349
+ static bool stream_advance(Stream *self) {
350
+ self->input += self->next_size;
351
+ if (self->input < self->end) {
352
+ uint32_t size = ts_decode_utf8(
353
+ (const uint8_t *)self->input,
354
+ (uint32_t)(self->end - self->input),
355
+ &self->next
356
+ );
357
+ if (size > 0) {
358
+ self->next_size = size;
359
+ return true;
360
+ }
361
+ } else {
362
+ self->next_size = 0;
363
+ self->next = '\0';
364
+ }
365
+ return false;
366
+ }
367
+
368
+ // Reset the stream to the given input position, represented as a pointer
369
+ // into the input string.
370
+ static void stream_reset(Stream *self, const char *input) {
371
+ self->input = input;
372
+ self->next_size = 0;
373
+ stream_advance(self);
374
+ }
375
+
376
+ static Stream stream_new(const char *string, uint32_t length) {
377
+ Stream self = {
378
+ .next = 0,
379
+ .input = string,
380
+ .start = string,
381
+ .end = string + length,
382
+ };
383
+ stream_advance(&self);
384
+ return self;
385
+ }
386
+
387
+ static void stream_skip_whitespace(Stream *self) {
388
+ for (;;) {
389
+ if (iswspace(self->next)) {
390
+ stream_advance(self);
391
+ } else if (self->next == ';') {
392
+ // skip over comments
393
+ stream_advance(self);
394
+ while (self->next && self->next != '\n') {
395
+ if (!stream_advance(self)) break;
396
+ }
397
+ } else {
398
+ break;
399
+ }
400
+ }
401
+ }
402
+
403
+ static bool stream_is_ident_start(Stream *self) {
404
+ return iswalnum(self->next) || self->next == '_' || self->next == '-';
405
+ }
406
+
407
+ static void stream_scan_identifier(Stream *stream) {
408
+ do {
409
+ stream_advance(stream);
410
+ } while (
411
+ iswalnum(stream->next) ||
412
+ stream->next == '_' ||
413
+ stream->next == '-' ||
414
+ stream->next == '.' ||
415
+ stream->next == '?' ||
416
+ stream->next == '!'
417
+ );
418
+ }
419
+
420
+ static uint32_t stream_offset(Stream *self) {
421
+ return (uint32_t)(self->input - self->start);
422
+ }
423
+
424
+ /******************
425
+ * CaptureListPool
426
+ ******************/
427
+
428
+ static CaptureListPool capture_list_pool_new(void) {
429
+ return (CaptureListPool) {
430
+ .list = array_new(),
431
+ .empty_list = array_new(),
432
+ .max_capture_list_count = UINT32_MAX,
433
+ .free_capture_list_count = 0,
434
+ };
435
+ }
436
+
437
+ static void capture_list_pool_reset(CaptureListPool *self) {
438
+ for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
439
+ // This invalid size means that the list is not in use.
440
+ self->list.contents[i].size = UINT32_MAX;
441
+ }
442
+ self->free_capture_list_count = self->list.size;
443
+ }
444
+
445
+ static void capture_list_pool_delete(CaptureListPool *self) {
446
+ for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
447
+ array_delete(&self->list.contents[i]);
448
+ }
449
+ array_delete(&self->list);
450
+ }
451
+
452
+ static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
453
+ if (id >= self->list.size) return &self->empty_list;
454
+ return &self->list.contents[id];
455
+ }
456
+
457
+ static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
458
+ ts_assert(id < self->list.size);
459
+ return &self->list.contents[id];
460
+ }
461
+
462
+ static bool capture_list_pool_is_empty(const CaptureListPool *self) {
463
+ // The capture list pool is empty if all allocated lists are in use, and we
464
+ // have reached the maximum allowed number of allocated lists.
465
+ return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
466
+ }
467
+
468
+ static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
469
+ // First see if any already allocated capture list is currently unused.
470
+ if (self->free_capture_list_count > 0) {
471
+ for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
472
+ if (self->list.contents[i].size == UINT32_MAX) {
473
+ array_clear(&self->list.contents[i]);
474
+ self->free_capture_list_count--;
475
+ return i;
476
+ }
477
+ }
478
+ }
479
+
480
+ // Otherwise allocate and initialize a new capture list, as long as that
481
+ // doesn't put us over the requested maximum.
482
+ uint32_t i = self->list.size;
483
+ if (i >= self->max_capture_list_count) {
484
+ return NONE;
485
+ }
486
+ CaptureList list;
487
+ array_init(&list);
488
+ array_push(&self->list, list);
489
+ return i;
490
+ }
491
+
492
+ static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
493
+ if (id >= self->list.size) return;
494
+ self->list.contents[id].size = UINT32_MAX;
495
+ self->free_capture_list_count++;
496
+ }
497
+
498
+ /**************
499
+ * Quantifiers
500
+ **************/
501
+
502
+ static TSQuantifier quantifier_mul(
503
+ TSQuantifier left,
504
+ TSQuantifier right
505
+ ) {
506
+ switch (left)
507
+ {
508
+ case TSQuantifierZero:
509
+ return TSQuantifierZero;
510
+ case TSQuantifierZeroOrOne:
511
+ switch (right) {
512
+ case TSQuantifierZero:
513
+ return TSQuantifierZero;
514
+ case TSQuantifierZeroOrOne:
515
+ case TSQuantifierOne:
516
+ return TSQuantifierZeroOrOne;
517
+ case TSQuantifierZeroOrMore:
518
+ case TSQuantifierOneOrMore:
519
+ return TSQuantifierZeroOrMore;
520
+ };
521
+ break;
522
+ case TSQuantifierZeroOrMore:
523
+ switch (right) {
524
+ case TSQuantifierZero:
525
+ return TSQuantifierZero;
526
+ case TSQuantifierZeroOrOne:
527
+ case TSQuantifierZeroOrMore:
528
+ case TSQuantifierOne:
529
+ case TSQuantifierOneOrMore:
530
+ return TSQuantifierZeroOrMore;
531
+ };
532
+ break;
533
+ case TSQuantifierOne:
534
+ return right;
535
+ case TSQuantifierOneOrMore:
536
+ switch (right) {
537
+ case TSQuantifierZero:
538
+ return TSQuantifierZero;
539
+ case TSQuantifierZeroOrOne:
540
+ case TSQuantifierZeroOrMore:
541
+ return TSQuantifierZeroOrMore;
542
+ case TSQuantifierOne:
543
+ case TSQuantifierOneOrMore:
544
+ return TSQuantifierOneOrMore;
545
+ };
546
+ break;
547
+ }
548
+ return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
549
+ }
550
+
551
+ static TSQuantifier quantifier_join(
552
+ TSQuantifier left,
553
+ TSQuantifier right
554
+ ) {
555
+ switch (left)
556
+ {
557
+ case TSQuantifierZero:
558
+ switch (right) {
559
+ case TSQuantifierZero:
560
+ return TSQuantifierZero;
561
+ case TSQuantifierZeroOrOne:
562
+ case TSQuantifierOne:
563
+ return TSQuantifierZeroOrOne;
564
+ case TSQuantifierZeroOrMore:
565
+ case TSQuantifierOneOrMore:
566
+ return TSQuantifierZeroOrMore;
567
+ };
568
+ break;
569
+ case TSQuantifierZeroOrOne:
570
+ switch (right) {
571
+ case TSQuantifierZero:
572
+ case TSQuantifierZeroOrOne:
573
+ case TSQuantifierOne:
574
+ return TSQuantifierZeroOrOne;
575
+ break;
576
+ case TSQuantifierZeroOrMore:
577
+ case TSQuantifierOneOrMore:
578
+ return TSQuantifierZeroOrMore;
579
+ break;
580
+ };
581
+ break;
582
+ case TSQuantifierZeroOrMore:
583
+ return TSQuantifierZeroOrMore;
584
+ case TSQuantifierOne:
585
+ switch (right) {
586
+ case TSQuantifierZero:
587
+ case TSQuantifierZeroOrOne:
588
+ return TSQuantifierZeroOrOne;
589
+ case TSQuantifierZeroOrMore:
590
+ return TSQuantifierZeroOrMore;
591
+ case TSQuantifierOne:
592
+ return TSQuantifierOne;
593
+ case TSQuantifierOneOrMore:
594
+ return TSQuantifierOneOrMore;
595
+ };
596
+ break;
597
+ case TSQuantifierOneOrMore:
598
+ switch (right) {
599
+ case TSQuantifierZero:
600
+ case TSQuantifierZeroOrOne:
601
+ case TSQuantifierZeroOrMore:
602
+ return TSQuantifierZeroOrMore;
603
+ case TSQuantifierOne:
604
+ case TSQuantifierOneOrMore:
605
+ return TSQuantifierOneOrMore;
606
+ };
607
+ break;
608
+ }
609
+ return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
610
+ }
611
+
612
+ static TSQuantifier quantifier_add(
613
+ TSQuantifier left,
614
+ TSQuantifier right
615
+ ) {
616
+ switch (left)
617
+ {
618
+ case TSQuantifierZero:
619
+ return right;
620
+ case TSQuantifierZeroOrOne:
621
+ switch (right) {
622
+ case TSQuantifierZero:
623
+ return TSQuantifierZeroOrOne;
624
+ case TSQuantifierZeroOrOne:
625
+ case TSQuantifierZeroOrMore:
626
+ return TSQuantifierZeroOrMore;
627
+ case TSQuantifierOne:
628
+ case TSQuantifierOneOrMore:
629
+ return TSQuantifierOneOrMore;
630
+ };
631
+ break;
632
+ case TSQuantifierZeroOrMore:
633
+ switch (right) {
634
+ case TSQuantifierZero:
635
+ return TSQuantifierZeroOrMore;
636
+ case TSQuantifierZeroOrOne:
637
+ case TSQuantifierZeroOrMore:
638
+ return TSQuantifierZeroOrMore;
639
+ case TSQuantifierOne:
640
+ case TSQuantifierOneOrMore:
641
+ return TSQuantifierOneOrMore;
642
+ };
643
+ break;
644
+ case TSQuantifierOne:
645
+ switch (right) {
646
+ case TSQuantifierZero:
647
+ return TSQuantifierOne;
648
+ case TSQuantifierZeroOrOne:
649
+ case TSQuantifierZeroOrMore:
650
+ case TSQuantifierOne:
651
+ case TSQuantifierOneOrMore:
652
+ return TSQuantifierOneOrMore;
653
+ };
654
+ break;
655
+ case TSQuantifierOneOrMore:
656
+ return TSQuantifierOneOrMore;
657
+ }
658
+ return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
659
+ }
660
+
661
+ // Create new capture quantifiers structure
662
+ static CaptureQuantifiers capture_quantifiers_new(void) {
663
+ return (CaptureQuantifiers) array_new();
664
+ }
665
+
666
+ // Delete capture quantifiers structure
667
+ static void capture_quantifiers_delete(
668
+ CaptureQuantifiers *self
669
+ ) {
670
+ array_delete(self);
671
+ }
672
+
673
+ // Clear capture quantifiers structure
674
+ static void capture_quantifiers_clear(
675
+ CaptureQuantifiers *self
676
+ ) {
677
+ array_clear(self);
678
+ }
679
+
680
+ // Replace capture quantifiers with the given quantifiers
681
+ static void capture_quantifiers_replace(
682
+ CaptureQuantifiers *self,
683
+ CaptureQuantifiers *quantifiers
684
+ ) {
685
+ array_clear(self);
686
+ array_push_all(self, quantifiers);
687
+ }
688
+
689
+ // Return capture quantifier for the given capture id
690
+ static TSQuantifier capture_quantifier_for_id(
691
+ const CaptureQuantifiers *self,
692
+ uint16_t id
693
+ ) {
694
+ return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id);
695
+ }
696
+
697
+ // Add the given quantifier to the current value for id
698
+ static void capture_quantifiers_add_for_id(
699
+ CaptureQuantifiers *self,
700
+ uint16_t id,
701
+ TSQuantifier quantifier
702
+ ) {
703
+ if (self->size <= id) {
704
+ array_grow_by(self, id + 1 - self->size);
705
+ }
706
+ uint8_t *own_quantifier = array_get(self, id);
707
+ *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier);
708
+ }
709
+
710
+ // Point-wise add the given quantifiers to the current values
711
+ static void capture_quantifiers_add_all(
712
+ CaptureQuantifiers *self,
713
+ CaptureQuantifiers *quantifiers
714
+ ) {
715
+ if (self->size < quantifiers->size) {
716
+ array_grow_by(self, quantifiers->size - self->size);
717
+ }
718
+ for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) {
719
+ uint8_t *quantifier = array_get(quantifiers, id);
720
+ uint8_t *own_quantifier = array_get(self, id);
721
+ *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
722
+ }
723
+ }
724
+
725
+ // Join the given quantifier with the current values
726
+ static void capture_quantifiers_mul(
727
+ CaptureQuantifiers *self,
728
+ TSQuantifier quantifier
729
+ ) {
730
+ for (uint16_t id = 0; id < (uint16_t)self->size; id++) {
731
+ uint8_t *own_quantifier = array_get(self, id);
732
+ *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier);
733
+ }
734
+ }
735
+
736
+ // Point-wise join the quantifiers from a list of alternatives with the current values
737
+ static void capture_quantifiers_join_all(
738
+ CaptureQuantifiers *self,
739
+ CaptureQuantifiers *quantifiers
740
+ ) {
741
+ if (self->size < quantifiers->size) {
742
+ array_grow_by(self, quantifiers->size - self->size);
743
+ }
744
+ for (uint32_t id = 0; id < quantifiers->size; id++) {
745
+ uint8_t *quantifier = array_get(quantifiers, id);
746
+ uint8_t *own_quantifier = array_get(self, id);
747
+ *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
748
+ }
749
+ for (uint32_t id = quantifiers->size; id < self->size; id++) {
750
+ uint8_t *own_quantifier = array_get(self, id);
751
+ *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero);
752
+ }
753
+ }
754
+
755
+ /**************
756
+ * SymbolTable
757
+ **************/
758
+
759
+ static SymbolTable symbol_table_new(void) {
760
+ return (SymbolTable) {
761
+ .characters = array_new(),
762
+ .slices = array_new(),
763
+ };
764
+ }
765
+
766
+ static void symbol_table_delete(SymbolTable *self) {
767
+ array_delete(&self->characters);
768
+ array_delete(&self->slices);
769
+ }
770
+
771
+ static int symbol_table_id_for_name(
772
+ const SymbolTable *self,
773
+ const char *name,
774
+ uint32_t length
775
+ ) {
776
+ for (unsigned i = 0; i < self->slices.size; i++) {
777
+ Slice slice = self->slices.contents[i];
778
+ if (
779
+ slice.length == length &&
780
+ !strncmp(&self->characters.contents[slice.offset], name, length)
781
+ ) return i;
782
+ }
783
+ return -1;
784
+ }
785
+
786
+ static const char *symbol_table_name_for_id(
787
+ const SymbolTable *self,
788
+ uint16_t id,
789
+ uint32_t *length
790
+ ) {
791
+ Slice slice = self->slices.contents[id];
792
+ *length = slice.length;
793
+ return &self->characters.contents[slice.offset];
794
+ }
795
+
796
+ static uint16_t symbol_table_insert_name(
797
+ SymbolTable *self,
798
+ const char *name,
799
+ uint32_t length
800
+ ) {
801
+ int id = symbol_table_id_for_name(self, name, length);
802
+ if (id >= 0) return (uint16_t)id;
803
+ Slice slice = {
804
+ .offset = self->characters.size,
805
+ .length = length,
806
+ };
807
+ array_grow_by(&self->characters, length + 1);
808
+ memcpy(&self->characters.contents[slice.offset], name, length);
809
+ self->characters.contents[self->characters.size - 1] = 0;
810
+ array_push(&self->slices, slice);
811
+ return self->slices.size - 1;
812
+ }
813
+
814
+ /************
815
+ * QueryStep
816
+ ************/
817
+
818
+ static QueryStep query_step__new(
819
+ TSSymbol symbol,
820
+ uint16_t depth,
821
+ bool is_immediate
822
+ ) {
823
+ QueryStep step = {
824
+ .symbol = symbol,
825
+ .depth = depth,
826
+ .field = 0,
827
+ .alternative_index = NONE,
828
+ .negated_field_list_id = 0,
829
+ .contains_captures = false,
830
+ .is_last_child = false,
831
+ .is_named = false,
832
+ .is_pass_through = false,
833
+ .is_dead_end = false,
834
+ .root_pattern_guaranteed = false,
835
+ .is_immediate = is_immediate,
836
+ .alternative_is_immediate = false,
837
+ };
838
+ for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
839
+ step.capture_ids[i] = NONE;
840
+ }
841
+ return step;
842
+ }
843
+
844
+ static void query_step__add_capture(QueryStep *self, uint16_t capture_id) {
845
+ for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
846
+ if (self->capture_ids[i] == NONE) {
847
+ self->capture_ids[i] = capture_id;
848
+ break;
849
+ }
850
+ }
851
+ }
852
+
853
+ static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
854
+ for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
855
+ if (self->capture_ids[i] == capture_id) {
856
+ self->capture_ids[i] = NONE;
857
+ while (i + 1 < MAX_STEP_CAPTURE_COUNT) {
858
+ if (self->capture_ids[i + 1] == NONE) break;
859
+ self->capture_ids[i] = self->capture_ids[i + 1];
860
+ self->capture_ids[i + 1] = NONE;
861
+ i++;
862
+ }
863
+ break;
864
+ }
865
+ }
866
+ }
867
+
868
+ /**********************
869
+ * StatePredecessorMap
870
+ **********************/
871
+
872
+ static inline StatePredecessorMap state_predecessor_map_new(
873
+ const TSLanguage *language
874
+ ) {
875
+ return (StatePredecessorMap) {
876
+ .contents = ts_calloc(
877
+ (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
878
+ sizeof(TSStateId)
879
+ ),
880
+ };
881
+ }
882
+
883
+ static inline void state_predecessor_map_delete(StatePredecessorMap *self) {
884
+ ts_free(self->contents);
885
+ }
886
+
887
+ static inline void state_predecessor_map_add(
888
+ StatePredecessorMap *self,
889
+ TSStateId state,
890
+ TSStateId predecessor
891
+ ) {
892
+ size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
893
+ TSStateId *count = &self->contents[index];
894
+ if (
895
+ *count == 0 ||
896
+ (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)
897
+ ) {
898
+ (*count)++;
899
+ self->contents[index + *count] = predecessor;
900
+ }
901
+ }
902
+
903
+ static inline const TSStateId *state_predecessor_map_get(
904
+ const StatePredecessorMap *self,
905
+ TSStateId state,
906
+ unsigned *count
907
+ ) {
908
+ size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
909
+ *count = self->contents[index];
910
+ return &self->contents[index + 1];
911
+ }
912
+
913
+ /****************
914
+ * AnalysisState
915
+ ****************/
916
+
917
+ static unsigned analysis_state__recursion_depth(const AnalysisState *self) {
918
+ unsigned result = 0;
919
+ for (unsigned i = 0; i < self->depth; i++) {
920
+ TSSymbol symbol = self->stack[i].parent_symbol;
921
+ for (unsigned j = 0; j < i; j++) {
922
+ if (self->stack[j].parent_symbol == symbol) {
923
+ result++;
924
+ break;
925
+ }
926
+ }
927
+ }
928
+ return result;
929
+ }
930
+
931
+ static inline int analysis_state__compare_position(
932
+ AnalysisState *const *self,
933
+ AnalysisState *const *other
934
+ ) {
935
+ for (unsigned i = 0; i < (*self)->depth; i++) {
936
+ if (i >= (*other)->depth) return -1;
937
+ if ((*self)->stack[i].child_index < (*other)->stack[i].child_index) return -1;
938
+ if ((*self)->stack[i].child_index > (*other)->stack[i].child_index) return 1;
939
+ }
940
+ if ((*self)->depth < (*other)->depth) return 1;
941
+ if ((*self)->step_index < (*other)->step_index) return -1;
942
+ if ((*self)->step_index > (*other)->step_index) return 1;
943
+ return 0;
944
+ }
945
+
946
+ static inline int analysis_state__compare(
947
+ AnalysisState *const *self,
948
+ AnalysisState *const *other
949
+ ) {
950
+ int result = analysis_state__compare_position(self, other);
951
+ if (result != 0) return result;
952
+ for (unsigned i = 0; i < (*self)->depth; i++) {
953
+ if ((*self)->stack[i].parent_symbol < (*other)->stack[i].parent_symbol) return -1;
954
+ if ((*self)->stack[i].parent_symbol > (*other)->stack[i].parent_symbol) return 1;
955
+ if ((*self)->stack[i].parse_state < (*other)->stack[i].parse_state) return -1;
956
+ if ((*self)->stack[i].parse_state > (*other)->stack[i].parse_state) return 1;
957
+ if ((*self)->stack[i].field_id < (*other)->stack[i].field_id) return -1;
958
+ if ((*self)->stack[i].field_id > (*other)->stack[i].field_id) return 1;
959
+ }
960
+ return 0;
961
+ }
962
+
963
+ static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) {
964
+ if (self->depth == 0) {
965
+ return &self->stack[0];
966
+ }
967
+ return &self->stack[self->depth - 1];
968
+ }
969
+
970
+ static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
971
+ for (unsigned i = 0; i < self->depth; i++) {
972
+ if (self->stack[i].parent_symbol == symbol) return true;
973
+ }
974
+ return false;
975
+ }
976
+
977
+ /******************
978
+ * AnalysisStateSet
979
+ ******************/
980
+
981
+ // Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by
982
+ // cloning one from scratch.
983
+ static inline AnalysisState *analysis_state_pool__clone_or_reuse(
984
+ AnalysisStateSet *self,
985
+ AnalysisState *borrowed_item
986
+ ) {
987
+ AnalysisState *new_item;
988
+ if (self->size) {
989
+ new_item = array_pop(self);
990
+ } else {
991
+ new_item = ts_malloc(sizeof(AnalysisState));
992
+ }
993
+ *new_item = *borrowed_item;
994
+ return new_item;
995
+ }
996
+
997
+ // Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this
998
+ // set. The set does not contain duplicates, so if the item is already present, it will not be
999
+ // inserted, and no clone will be made.
1000
+ //
1001
+ // The caller retains ownership of the passed-in memory. However, the clone that is created by this
1002
+ // function will be managed by the state set.
1003
+ static inline void analysis_state_set__insert_sorted(
1004
+ AnalysisStateSet *self,
1005
+ AnalysisStateSet *pool,
1006
+ AnalysisState *borrowed_item
1007
+ ) {
1008
+ unsigned index, exists;
1009
+ array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists);
1010
+ if (!exists) {
1011
+ AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
1012
+ array_insert(self, index, new_item);
1013
+ }
1014
+ }
1015
+
1016
+ // Inserts a clone of the passed-in item at the end position of this list.
1017
+ //
1018
+ // IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function
1019
+ // `analysis_state__compare`) than largest item already in this set. If items are inserted in the
1020
+ // wrong order, the set will not function properly for future use.
1021
+ //
1022
+ // The caller retains ownership of the passed-in memory. However, the clone that is created by this
1023
+ // function will be managed by the state set.
1024
+ static inline void analysis_state_set__push(
1025
+ AnalysisStateSet *self,
1026
+ AnalysisStateSet *pool,
1027
+ AnalysisState *borrowed_item
1028
+ ) {
1029
+ AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
1030
+ array_push(self, new_item);
1031
+ }
1032
+
1033
+ // Removes all items from this set, returning it to an empty state.
1034
+ static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) {
1035
+ array_push_all(pool, self);
1036
+ array_clear(self);
1037
+ }
1038
+
1039
+ // Releases all memory that is managed with this state set, including any items currently present.
1040
+ // After calling this function, the set is no longer suitable for use.
1041
+ static inline void analysis_state_set__delete(AnalysisStateSet *self) {
1042
+ for (unsigned i = 0; i < self->size; i++) {
1043
+ ts_free(self->contents[i]);
1044
+ }
1045
+ array_delete(self);
1046
+ }
1047
+
1048
+ /****************
1049
+ * QueryAnalyzer
1050
+ ****************/
1051
+
1052
+ static inline QueryAnalysis query_analysis__new(void) {
1053
+ return (QueryAnalysis) {
1054
+ .states = array_new(),
1055
+ .next_states = array_new(),
1056
+ .deeper_states = array_new(),
1057
+ .state_pool = array_new(),
1058
+ .final_step_indices = array_new(),
1059
+ .finished_parent_symbols = array_new(),
1060
+ .did_abort = false,
1061
+ };
1062
+ }
1063
+
1064
+ static inline void query_analysis__delete(QueryAnalysis *self) {
1065
+ analysis_state_set__delete(&self->states);
1066
+ analysis_state_set__delete(&self->next_states);
1067
+ analysis_state_set__delete(&self->deeper_states);
1068
+ analysis_state_set__delete(&self->state_pool);
1069
+ array_delete(&self->final_step_indices);
1070
+ array_delete(&self->finished_parent_symbols);
1071
+ }
1072
+
1073
+ /***********************
1074
+ * AnalysisSubgraphNode
1075
+ ***********************/
1076
+
1077
+ static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) {
1078
+ if (self->state < other->state) return -1;
1079
+ if (self->state > other->state) return 1;
1080
+ if (self->child_index < other->child_index) return -1;
1081
+ if (self->child_index > other->child_index) return 1;
1082
+ if (self->done < other->done) return -1;
1083
+ if (self->done > other->done) return 1;
1084
+ if (self->production_id < other->production_id) return -1;
1085
+ if (self->production_id > other->production_id) return 1;
1086
+ return 0;
1087
+ }
1088
+
1089
+ /*********
1090
+ * Query
1091
+ *********/
1092
+
1093
+ // The `pattern_map` contains a mapping from TSSymbol values to indices in the
1094
+ // `steps` array. For a given syntax node, the `pattern_map` makes it possible
1095
+ // to quickly find the starting steps of all of the patterns whose root matches
1096
+ // that node. Each entry has two fields: a `pattern_index`, which identifies one
1097
+ // of the patterns in the query, and a `step_index`, which indicates the start
1098
+ // offset of that pattern's steps within the `steps` array.
1099
+ //
1100
+ // The entries are sorted by the patterns' root symbols, and lookups use a
1101
+ // binary search. This ensures that the cost of this initial lookup step
1102
+ // scales logarithmically with the number of patterns in the query.
1103
+ //
1104
+ // This returns `true` if the symbol is present and `false` otherwise.
1105
+ // If the symbol is not present `*result` is set to the index where the
1106
+ // symbol should be inserted.
1107
+ static inline bool ts_query__pattern_map_search(
1108
+ const TSQuery *self,
1109
+ TSSymbol needle,
1110
+ uint32_t *result
1111
+ ) {
1112
+ uint32_t base_index = self->wildcard_root_pattern_count;
1113
+ uint32_t size = self->pattern_map.size - base_index;
1114
+ if (size == 0) {
1115
+ *result = base_index;
1116
+ return false;
1117
+ }
1118
+ while (size > 1) {
1119
+ uint32_t half_size = size / 2;
1120
+ uint32_t mid_index = base_index + half_size;
1121
+ TSSymbol mid_symbol = self->steps.contents[
1122
+ self->pattern_map.contents[mid_index].step_index
1123
+ ].symbol;
1124
+ if (needle > mid_symbol) base_index = mid_index;
1125
+ size -= half_size;
1126
+ }
1127
+
1128
+ TSSymbol symbol = self->steps.contents[
1129
+ self->pattern_map.contents[base_index].step_index
1130
+ ].symbol;
1131
+
1132
+ if (needle > symbol) {
1133
+ base_index++;
1134
+ if (base_index < self->pattern_map.size) {
1135
+ symbol = self->steps.contents[
1136
+ self->pattern_map.contents[base_index].step_index
1137
+ ].symbol;
1138
+ }
1139
+ }
1140
+
1141
+ *result = base_index;
1142
+ return needle == symbol;
1143
+ }
1144
+
1145
+ // Insert a new pattern's start index into the pattern map, maintaining
1146
+ // the pattern map's ordering invariant.
1147
+ static inline void ts_query__pattern_map_insert(
1148
+ TSQuery *self,
1149
+ TSSymbol symbol,
1150
+ PatternEntry new_entry
1151
+ ) {
1152
+ uint32_t index;
1153
+ ts_query__pattern_map_search(self, symbol, &index);
1154
+
1155
+ // Ensure that the entries are sorted not only by symbol, but also
1156
+ // by pattern_index. This way, states for earlier patterns will be
1157
+ // initiated first, which allows the ordering of the states array
1158
+ // to be maintained more efficiently.
1159
+ while (index < self->pattern_map.size) {
1160
+ PatternEntry *entry = &self->pattern_map.contents[index];
1161
+ if (
1162
+ self->steps.contents[entry->step_index].symbol == symbol &&
1163
+ entry->pattern_index < new_entry.pattern_index
1164
+ ) {
1165
+ index++;
1166
+ } else {
1167
+ break;
1168
+ }
1169
+ }
1170
+
1171
+ array_insert(&self->pattern_map, index, new_entry);
1172
+ }
1173
+
1174
+ // Walk the subgraph for this non-terminal, tracking all of the possible
1175
+ // sequences of progress within the pattern.
1176
+ static void ts_query__perform_analysis(
1177
+ TSQuery *self,
1178
+ const AnalysisSubgraphArray *subgraphs,
1179
+ QueryAnalysis *analysis
1180
+ ) {
1181
+ unsigned recursion_depth_limit = 0;
1182
+ unsigned prev_final_step_count = 0;
1183
+ array_clear(&analysis->final_step_indices);
1184
+ array_clear(&analysis->finished_parent_symbols);
1185
+
1186
+ for (unsigned iteration = 0;; iteration++) {
1187
+ if (iteration == MAX_ANALYSIS_ITERATION_COUNT) {
1188
+ analysis->did_abort = true;
1189
+ break;
1190
+ }
1191
+
1192
+ #ifdef DEBUG_ANALYZE_QUERY
1193
+ printf("Iteration: %u. Final step indices:", iteration);
1194
+ for (unsigned j = 0; j < analysis->final_step_indices.size; j++) {
1195
+ printf(" %4u", analysis->final_step_indices.contents[j]);
1196
+ }
1197
+ printf("\n");
1198
+ for (unsigned j = 0; j < analysis->states.size; j++) {
1199
+ AnalysisState *state = analysis->states.contents[j];
1200
+ printf(" %3u: step: %u, stack: [", j, state->step_index);
1201
+ for (unsigned k = 0; k < state->depth; k++) {
1202
+ printf(
1203
+ " {%s, child: %u, state: %4u",
1204
+ self->language->symbol_names[state->stack[k].parent_symbol],
1205
+ state->stack[k].child_index,
1206
+ state->stack[k].parse_state
1207
+ );
1208
+ if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]);
1209
+ if (state->stack[k].done) printf(", DONE");
1210
+ printf("}");
1211
+ }
1212
+ printf(" ]\n");
1213
+ }
1214
+ #endif
1215
+
1216
+ // If no further progress can be made within the current recursion depth limit, then
1217
+ // bump the depth limit by one, and continue to process the states the exceeded the
1218
+ // limit. But only allow this if progress has been made since the last time the depth
1219
+ // limit was increased.
1220
+ if (analysis->states.size == 0) {
1221
+ if (
1222
+ analysis->deeper_states.size > 0 &&
1223
+ analysis->final_step_indices.size > prev_final_step_count
1224
+ ) {
1225
+ #ifdef DEBUG_ANALYZE_QUERY
1226
+ printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1);
1227
+ #endif
1228
+
1229
+ prev_final_step_count = analysis->final_step_indices.size;
1230
+ recursion_depth_limit++;
1231
+ AnalysisStateSet _states = analysis->states;
1232
+ analysis->states = analysis->deeper_states;
1233
+ analysis->deeper_states = _states;
1234
+ continue;
1235
+ }
1236
+
1237
+ break;
1238
+ }
1239
+
1240
+ analysis_state_set__clear(&analysis->next_states, &analysis->state_pool);
1241
+ for (unsigned j = 0; j < analysis->states.size; j++) {
1242
+ AnalysisState * const state = analysis->states.contents[j];
1243
+
1244
+ // For efficiency, it's important to avoid processing the same analysis state more
1245
+ // than once. To achieve this, keep the states in order of ascending position within
1246
+ // their hypothetical syntax trees. In each iteration of this loop, start by advancing
1247
+ // the states that have made the least progress. Avoid advancing states that have already
1248
+ // made more progress.
1249
+ if (analysis->next_states.size > 0) {
1250
+ int comparison = analysis_state__compare_position(
1251
+ &state,
1252
+ array_back(&analysis->next_states)
1253
+ );
1254
+ if (comparison == 0) {
1255
+ analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state);
1256
+ continue;
1257
+ } else if (comparison > 0) {
1258
+ #ifdef DEBUG_ANALYZE_QUERY
1259
+ printf("Terminate iteration at state %u\n", j);
1260
+ #endif
1261
+ while (j < analysis->states.size) {
1262
+ analysis_state_set__push(
1263
+ &analysis->next_states,
1264
+ &analysis->state_pool,
1265
+ analysis->states.contents[j]
1266
+ );
1267
+ j++;
1268
+ }
1269
+ break;
1270
+ }
1271
+ }
1272
+
1273
+ const TSStateId parse_state = analysis_state__top(state)->parse_state;
1274
+ const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol;
1275
+ const TSFieldId parent_field_id = analysis_state__top(state)->field_id;
1276
+ const unsigned child_index = analysis_state__top(state)->child_index;
1277
+ const QueryStep * const step = &self->steps.contents[state->step_index];
1278
+
1279
+ unsigned subgraph_index, exists;
1280
+ array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1281
+ if (!exists) continue;
1282
+ const AnalysisSubgraph *subgraph = &subgraphs->contents[subgraph_index];
1283
+
1284
+ // Follow every possible path in the parse table, but only visit states that
1285
+ // are part of the subgraph for the current symbol.
1286
+ LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state);
1287
+ while (ts_lookahead_iterator__next(&lookahead_iterator)) {
1288
+ TSSymbol sym = lookahead_iterator.symbol;
1289
+
1290
+ AnalysisSubgraphNode successor = {
1291
+ .state = parse_state,
1292
+ .child_index = child_index,
1293
+ };
1294
+ if (lookahead_iterator.action_count) {
1295
+ const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1];
1296
+ if (action->type == TSParseActionTypeShift) {
1297
+ if (!action->shift.extra) {
1298
+ successor.state = action->shift.state;
1299
+ successor.child_index++;
1300
+ }
1301
+ } else {
1302
+ continue;
1303
+ }
1304
+ } else if (lookahead_iterator.next_state != 0) {
1305
+ successor.state = lookahead_iterator.next_state;
1306
+ successor.child_index++;
1307
+ } else {
1308
+ continue;
1309
+ }
1310
+
1311
+ unsigned node_index;
1312
+ array_search_sorted_with(
1313
+ &subgraph->nodes,
1314
+ analysis_subgraph_node__compare, &successor,
1315
+ &node_index, &exists
1316
+ );
1317
+ while (node_index < subgraph->nodes.size) {
1318
+ AnalysisSubgraphNode *node = &subgraph->nodes.contents[node_index++];
1319
+ if (node->state != successor.state || node->child_index != successor.child_index) break;
1320
+
1321
+ // Use the subgraph to determine what alias and field will eventually be applied
1322
+ // to this child node.
1323
+ TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
1324
+ TSSymbol visible_symbol = alias
1325
+ ? alias
1326
+ : self->language->symbol_metadata[sym].visible
1327
+ ? self->language->public_symbol_map[sym]
1328
+ : 0;
1329
+ TSFieldId field_id = parent_field_id;
1330
+ if (!field_id) {
1331
+ const TSFieldMapEntry *field_map, *field_map_end;
1332
+ ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
1333
+ for (; field_map != field_map_end; field_map++) {
1334
+ if (!field_map->inherited && field_map->child_index == child_index) {
1335
+ field_id = field_map->field_id;
1336
+ break;
1337
+ }
1338
+ }
1339
+ }
1340
+
1341
+ // Create a new state that has advanced past this hypothetical subtree.
1342
+ AnalysisState next_state = *state;
1343
+ AnalysisStateEntry *next_state_top = analysis_state__top(&next_state);
1344
+ next_state_top->child_index = successor.child_index;
1345
+ next_state_top->parse_state = successor.state;
1346
+ if (node->done) next_state_top->done = true;
1347
+
1348
+ // Determine if this hypothetical child node would match the current step
1349
+ // of the query pattern.
1350
+ bool does_match = false;
1351
+ if (visible_symbol) {
1352
+ does_match = true;
1353
+ if (step->symbol == WILDCARD_SYMBOL) {
1354
+ if (
1355
+ step->is_named &&
1356
+ !self->language->symbol_metadata[visible_symbol].named
1357
+ ) does_match = false;
1358
+ } else if (step->symbol != visible_symbol) {
1359
+ does_match = false;
1360
+ }
1361
+ if (step->field && step->field != field_id) {
1362
+ does_match = false;
1363
+ }
1364
+ if (
1365
+ step->supertype_symbol &&
1366
+ !analysis_state__has_supertype(state, step->supertype_symbol)
1367
+ ) does_match = false;
1368
+ }
1369
+
1370
+ // If this child is hidden, then descend into it and walk through its children.
1371
+ // If the top entry of the stack is at the end of its rule, then that entry can
1372
+ // be replaced. Otherwise, push a new entry onto the stack.
1373
+ else if (sym >= self->language->token_count) {
1374
+ if (!next_state_top->done) {
1375
+ if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) {
1376
+ #ifdef DEBUG_ANALYZE_QUERY
1377
+ printf("Exceeded depth limit for state %u\n", j);
1378
+ #endif
1379
+
1380
+ analysis->did_abort = true;
1381
+ continue;
1382
+ }
1383
+
1384
+ next_state.depth++;
1385
+ next_state_top = analysis_state__top(&next_state);
1386
+ }
1387
+
1388
+ *next_state_top = (AnalysisStateEntry) {
1389
+ .parse_state = parse_state,
1390
+ .parent_symbol = sym,
1391
+ .child_index = 0,
1392
+ .field_id = field_id,
1393
+ .done = false,
1394
+ };
1395
+
1396
+ if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) {
1397
+ analysis_state_set__insert_sorted(
1398
+ &analysis->deeper_states,
1399
+ &analysis->state_pool,
1400
+ &next_state
1401
+ );
1402
+ continue;
1403
+ }
1404
+ }
1405
+
1406
+ // Pop from the stack when this state reached the end of its current syntax node.
1407
+ while (next_state.depth > 0 && next_state_top->done) {
1408
+ next_state.depth--;
1409
+ next_state_top = analysis_state__top(&next_state);
1410
+ }
1411
+
1412
+ // If this hypothetical child did match the current step of the query pattern,
1413
+ // then advance to the next step at the current depth. This involves skipping
1414
+ // over any descendant steps of the current child.
1415
+ const QueryStep *next_step = step;
1416
+ if (does_match) {
1417
+ for (;;) {
1418
+ next_state.step_index++;
1419
+ next_step = &self->steps.contents[next_state.step_index];
1420
+ if (
1421
+ next_step->depth == PATTERN_DONE_MARKER ||
1422
+ next_step->depth <= step->depth
1423
+ ) break;
1424
+ }
1425
+ } else if (successor.state == parse_state) {
1426
+ continue;
1427
+ }
1428
+
1429
+ for (;;) {
1430
+ // Skip pass-through states. Although these states have alternatives, they are only
1431
+ // used to implement repetitions, and query analysis does not need to process
1432
+ // repetitions in order to determine whether steps are possible and definite.
1433
+ if (next_step->is_pass_through) {
1434
+ next_state.step_index++;
1435
+ next_step++;
1436
+ continue;
1437
+ }
1438
+
1439
+ // If the pattern is finished or hypothetical parent node is complete, then
1440
+ // record that matching can terminate at this step of the pattern. Otherwise,
1441
+ // add this state to the list of states to process on the next iteration.
1442
+ if (!next_step->is_dead_end) {
1443
+ bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != step->depth;
1444
+ if (did_finish_pattern) {
1445
+ array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol);
1446
+ } else if (next_state.depth == 0) {
1447
+ array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index);
1448
+ } else {
1449
+ analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state);
1450
+ }
1451
+ }
1452
+
1453
+ // If the state has advanced to a step with an alternative step, then add another state
1454
+ // at that alternative step. This process is simpler than the process of actually matching a
1455
+ // pattern during query execution, because for the purposes of query analysis, there is no
1456
+ // need to process repetitions.
1457
+ if (
1458
+ does_match &&
1459
+ next_step->alternative_index != NONE &&
1460
+ next_step->alternative_index > next_state.step_index
1461
+ ) {
1462
+ next_state.step_index = next_step->alternative_index;
1463
+ next_step = &self->steps.contents[next_state.step_index];
1464
+ } else {
1465
+ break;
1466
+ }
1467
+ }
1468
+ }
1469
+ }
1470
+ }
1471
+
1472
+ AnalysisStateSet _states = analysis->states;
1473
+ analysis->states = analysis->next_states;
1474
+ analysis->next_states = _states;
1475
+ }
1476
+ }
1477
+
1478
+ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
1479
+ Array(uint16_t) non_rooted_pattern_start_steps = array_new();
1480
+ for (unsigned i = 0; i < self->pattern_map.size; i++) {
1481
+ PatternEntry *pattern = &self->pattern_map.contents[i];
1482
+ if (!pattern->is_rooted) {
1483
+ QueryStep *step = &self->steps.contents[pattern->step_index];
1484
+ if (step->symbol != WILDCARD_SYMBOL) {
1485
+ array_push(&non_rooted_pattern_start_steps, i);
1486
+ }
1487
+ }
1488
+ }
1489
+
1490
+ // Walk forward through all of the steps in the query, computing some
1491
+ // basic information about each step. Mark all of the steps that contain
1492
+ // captures, and record the indices of all of the steps that have child steps.
1493
+ Array(uint32_t) parent_step_indices = array_new();
1494
+ for (unsigned i = 0; i < self->steps.size; i++) {
1495
+ QueryStep *step = &self->steps.contents[i];
1496
+ if (step->depth == PATTERN_DONE_MARKER) {
1497
+ step->parent_pattern_guaranteed = true;
1498
+ step->root_pattern_guaranteed = true;
1499
+ continue;
1500
+ }
1501
+
1502
+ bool has_children = false;
1503
+ bool is_wildcard = step->symbol == WILDCARD_SYMBOL;
1504
+ step->contains_captures = step->capture_ids[0] != NONE;
1505
+ for (unsigned j = i + 1; j < self->steps.size; j++) {
1506
+ QueryStep *next_step = &self->steps.contents[j];
1507
+ if (
1508
+ next_step->depth == PATTERN_DONE_MARKER ||
1509
+ next_step->depth <= step->depth
1510
+ ) break;
1511
+ if (next_step->capture_ids[0] != NONE) {
1512
+ step->contains_captures = true;
1513
+ }
1514
+ if (!is_wildcard) {
1515
+ next_step->root_pattern_guaranteed = true;
1516
+ next_step->parent_pattern_guaranteed = true;
1517
+ }
1518
+ has_children = true;
1519
+ }
1520
+
1521
+ if (has_children && !is_wildcard) {
1522
+ array_push(&parent_step_indices, i);
1523
+ }
1524
+ }
1525
+
1526
+ // For every parent symbol in the query, initialize an 'analysis subgraph'.
1527
+ // This subgraph lists all of the states in the parse table that are directly
1528
+ // involved in building subtrees for this symbol.
1529
+ //
1530
+ // In addition to the parent symbols in the query, construct subgraphs for all
1531
+ // of the hidden symbols in the grammar, because these might occur within
1532
+ // one of the parent nodes, such that their children appear to belong to the
1533
+ // parent.
1534
+ AnalysisSubgraphArray subgraphs = array_new();
1535
+ for (unsigned i = 0; i < parent_step_indices.size; i++) {
1536
+ uint32_t parent_step_index = parent_step_indices.contents[i];
1537
+ TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
1538
+ AnalysisSubgraph subgraph = { .symbol = parent_symbol };
1539
+ array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1540
+ }
1541
+ for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) {
1542
+ if (!ts_language_symbol_metadata(self->language, sym).visible) {
1543
+ AnalysisSubgraph subgraph = { .symbol = sym };
1544
+ array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1545
+ }
1546
+ }
1547
+
1548
+ // Scan the parse table to find the data needed to populate these subgraphs.
1549
+ // Collect three things during this scan:
1550
+ // 1) All of the parse states where one of these symbols can start.
1551
+ // 2) All of the parse states where one of these symbols can end, along
1552
+ // with information about the node that would be created.
1553
+ // 3) A list of predecessor states for each state.
1554
+ StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
1555
+ for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) {
1556
+ unsigned subgraph_index, exists;
1557
+ LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state);
1558
+ while (ts_lookahead_iterator__next(&lookahead_iterator)) {
1559
+ if (lookahead_iterator.action_count) {
1560
+ for (unsigned i = 0; i < lookahead_iterator.action_count; i++) {
1561
+ const TSParseAction *action = &lookahead_iterator.actions[i];
1562
+ if (action->type == TSParseActionTypeReduce) {
1563
+ const TSSymbol *aliases, *aliases_end;
1564
+ ts_language_aliases_for_symbol(
1565
+ self->language,
1566
+ action->reduce.symbol,
1567
+ &aliases,
1568
+ &aliases_end
1569
+ );
1570
+ for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1571
+ array_search_sorted_by(
1572
+ &subgraphs,
1573
+ .symbol,
1574
+ *symbol,
1575
+ &subgraph_index,
1576
+ &exists
1577
+ );
1578
+ if (exists) {
1579
+ AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1580
+ if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
1581
+ array_push(&subgraph->nodes, ((AnalysisSubgraphNode) {
1582
+ .state = state,
1583
+ .production_id = action->reduce.production_id,
1584
+ .child_index = action->reduce.child_count,
1585
+ .done = true,
1586
+ }));
1587
+ }
1588
+ }
1589
+ }
1590
+ } else if (action->type == TSParseActionTypeShift && !action->shift.extra) {
1591
+ TSStateId next_state = action->shift.state;
1592
+ state_predecessor_map_add(&predecessor_map, next_state, state);
1593
+ }
1594
+ }
1595
+ } else if (lookahead_iterator.next_state != 0) {
1596
+ if (lookahead_iterator.next_state != state) {
1597
+ state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
1598
+ }
1599
+ if (ts_language_state_is_primary(self->language, state)) {
1600
+ const TSSymbol *aliases, *aliases_end;
1601
+ ts_language_aliases_for_symbol(
1602
+ self->language,
1603
+ lookahead_iterator.symbol,
1604
+ &aliases,
1605
+ &aliases_end
1606
+ );
1607
+ for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1608
+ array_search_sorted_by(
1609
+ &subgraphs,
1610
+ .symbol,
1611
+ *symbol,
1612
+ &subgraph_index,
1613
+ &exists
1614
+ );
1615
+ if (exists) {
1616
+ AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1617
+ if (
1618
+ subgraph->start_states.size == 0 ||
1619
+ *array_back(&subgraph->start_states) != state
1620
+ )
1621
+ array_push(&subgraph->start_states, state);
1622
+ }
1623
+ }
1624
+ }
1625
+ }
1626
+ }
1627
+ }
1628
+
1629
+ // For each subgraph, compute the preceding states by walking backward
1630
+ // from the end states using the predecessor map.
1631
+ Array(AnalysisSubgraphNode) next_nodes = array_new();
1632
+ for (unsigned i = 0; i < subgraphs.size; i++) {
1633
+ AnalysisSubgraph *subgraph = &subgraphs.contents[i];
1634
+ if (subgraph->nodes.size == 0) {
1635
+ array_delete(&subgraph->start_states);
1636
+ array_erase(&subgraphs, i);
1637
+ i--;
1638
+ continue;
1639
+ }
1640
+ array_assign(&next_nodes, &subgraph->nodes);
1641
+ while (next_nodes.size > 0) {
1642
+ AnalysisSubgraphNode node = array_pop(&next_nodes);
1643
+ if (node.child_index > 1) {
1644
+ unsigned predecessor_count;
1645
+ const TSStateId *predecessors = state_predecessor_map_get(
1646
+ &predecessor_map,
1647
+ node.state,
1648
+ &predecessor_count
1649
+ );
1650
+ for (unsigned j = 0; j < predecessor_count; j++) {
1651
+ AnalysisSubgraphNode predecessor_node = {
1652
+ .state = predecessors[j],
1653
+ .child_index = node.child_index - 1,
1654
+ .production_id = node.production_id,
1655
+ .done = false,
1656
+ };
1657
+ unsigned index, exists;
1658
+ array_search_sorted_with(
1659
+ &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node,
1660
+ &index, &exists
1661
+ );
1662
+ if (!exists) {
1663
+ array_insert(&subgraph->nodes, index, predecessor_node);
1664
+ array_push(&next_nodes, predecessor_node);
1665
+ }
1666
+ }
1667
+ }
1668
+ }
1669
+ }
1670
+
1671
+ #ifdef DEBUG_ANALYZE_QUERY
1672
+ printf("\nSubgraphs:\n");
1673
+ for (unsigned i = 0; i < subgraphs.size; i++) {
1674
+ AnalysisSubgraph *subgraph = &subgraphs.contents[i];
1675
+ printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
1676
+ for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1677
+ printf(
1678
+ " {state: %u}\n",
1679
+ subgraph->start_states.contents[j]
1680
+ );
1681
+ }
1682
+ for (unsigned j = 0; j < subgraph->nodes.size; j++) {
1683
+ AnalysisSubgraphNode *node = &subgraph->nodes.contents[j];
1684
+ printf(
1685
+ " {state: %u, child_index: %u, production_id: %u, done: %d}\n",
1686
+ node->state, node->child_index, node->production_id, node->done
1687
+ );
1688
+ }
1689
+ printf("\n");
1690
+ }
1691
+ #endif
1692
+
1693
+ // For each non-terminal pattern, determine if the pattern can successfully match,
1694
+ // and identify all of the possible children within the pattern where matching could fail.
1695
+ bool all_patterns_are_valid = true;
1696
+ QueryAnalysis analysis = query_analysis__new();
1697
+ for (unsigned i = 0; i < parent_step_indices.size; i++) {
1698
+ uint16_t parent_step_index = parent_step_indices.contents[i];
1699
+ uint16_t parent_depth = self->steps.contents[parent_step_index].depth;
1700
+ TSSymbol parent_symbol = self->steps.contents[parent_step_index].symbol;
1701
+ if (parent_symbol == ts_builtin_sym_error) continue;
1702
+
1703
+ // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's
1704
+ // root symbol is a terminal, then return an error.
1705
+ unsigned subgraph_index, exists;
1706
+ array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1707
+ if (!exists) {
1708
+ unsigned first_child_step_index = parent_step_index + 1;
1709
+ uint32_t j, child_exists;
1710
+ array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists);
1711
+ ts_assert(child_exists);
1712
+ *error_offset = self->step_offsets.contents[j].byte_offset;
1713
+ all_patterns_are_valid = false;
1714
+ break;
1715
+ }
1716
+
1717
+ // Initialize an analysis state at every parse state in the table where
1718
+ // this parent symbol can occur.
1719
+ AnalysisSubgraph *subgraph = &subgraphs.contents[subgraph_index];
1720
+ analysis_state_set__clear(&analysis.states, &analysis.state_pool);
1721
+ analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
1722
+ for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1723
+ TSStateId parse_state = subgraph->start_states.contents[j];
1724
+ analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
1725
+ .step_index = parent_step_index + 1,
1726
+ .stack = {
1727
+ [0] = {
1728
+ .parse_state = parse_state,
1729
+ .parent_symbol = parent_symbol,
1730
+ .child_index = 0,
1731
+ .field_id = 0,
1732
+ .done = false,
1733
+ },
1734
+ },
1735
+ .depth = 1,
1736
+ .root_symbol = parent_symbol,
1737
+ }));
1738
+ }
1739
+
1740
+ #ifdef DEBUG_ANALYZE_QUERY
1741
+ printf(
1742
+ "\nWalk states for %s:\n",
1743
+ ts_language_symbol_name(self->language, analysis.states.contents[0]->stack[0].parent_symbol)
1744
+ );
1745
+ #endif
1746
+
1747
+ analysis.did_abort = false;
1748
+ ts_query__perform_analysis(self, &subgraphs, &analysis);
1749
+
1750
+ // If this pattern could not be fully analyzed, then every step should
1751
+ // be considered fallible.
1752
+ if (analysis.did_abort) {
1753
+ for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
1754
+ QueryStep *step = &self->steps.contents[j];
1755
+ if (
1756
+ step->depth <= parent_depth ||
1757
+ step->depth == PATTERN_DONE_MARKER
1758
+ ) break;
1759
+ if (!step->is_dead_end) {
1760
+ step->parent_pattern_guaranteed = false;
1761
+ step->root_pattern_guaranteed = false;
1762
+ }
1763
+ }
1764
+ continue;
1765
+ }
1766
+
1767
+ // If this pattern cannot match, store the pattern index so that it can be
1768
+ // returned to the caller.
1769
+ if (analysis.finished_parent_symbols.size == 0) {
1770
+ ts_assert(analysis.final_step_indices.size > 0);
1771
+ uint16_t impossible_step_index = *array_back(&analysis.final_step_indices);
1772
+ uint32_t j, impossible_exists;
1773
+ array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists);
1774
+ if (j >= self->step_offsets.size) j = self->step_offsets.size - 1;
1775
+ *error_offset = self->step_offsets.contents[j].byte_offset;
1776
+ all_patterns_are_valid = false;
1777
+ break;
1778
+ }
1779
+
1780
+ // Mark as fallible any step where a match terminated.
1781
+ // Later, this property will be propagated to all of the step's predecessors.
1782
+ for (unsigned j = 0; j < analysis.final_step_indices.size; j++) {
1783
+ uint32_t final_step_index = analysis.final_step_indices.contents[j];
1784
+ QueryStep *step = &self->steps.contents[final_step_index];
1785
+ if (
1786
+ step->depth != PATTERN_DONE_MARKER &&
1787
+ step->depth > parent_depth &&
1788
+ !step->is_dead_end
1789
+ ) {
1790
+ step->parent_pattern_guaranteed = false;
1791
+ step->root_pattern_guaranteed = false;
1792
+ }
1793
+ }
1794
+ }
1795
+
1796
+ // Mark as indefinite any step with captures that are used in predicates.
1797
+ Array(uint16_t) predicate_capture_ids = array_new();
1798
+ for (unsigned i = 0; i < self->patterns.size; i++) {
1799
+ QueryPattern *pattern = &self->patterns.contents[i];
1800
+
1801
+ // Gather all of the captures that are used in predicates for this pattern.
1802
+ array_clear(&predicate_capture_ids);
1803
+ for (
1804
+ unsigned start = pattern->predicate_steps.offset,
1805
+ end = start + pattern->predicate_steps.length,
1806
+ j = start; j < end; j++
1807
+ ) {
1808
+ TSQueryPredicateStep *step = &self->predicate_steps.contents[j];
1809
+ if (step->type == TSQueryPredicateStepTypeCapture) {
1810
+ uint16_t value_id = step->value_id;
1811
+ array_insert_sorted_by(&predicate_capture_ids, , value_id);
1812
+ }
1813
+ }
1814
+
1815
+ // Find all of the steps that have these captures.
1816
+ for (
1817
+ unsigned start = pattern->steps.offset,
1818
+ end = start + pattern->steps.length,
1819
+ j = start; j < end; j++
1820
+ ) {
1821
+ QueryStep *step = &self->steps.contents[j];
1822
+ for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
1823
+ uint16_t capture_id = step->capture_ids[k];
1824
+ if (capture_id == NONE) break;
1825
+ unsigned index, exists;
1826
+ array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists);
1827
+ if (exists) {
1828
+ step->root_pattern_guaranteed = false;
1829
+ break;
1830
+ }
1831
+ }
1832
+ }
1833
+ }
1834
+
1835
+ // Propagate fallibility. If a pattern is fallible at a given step, then it is
1836
+ // fallible at all of its preceding steps.
1837
+ bool done = self->steps.size == 0;
1838
+ while (!done) {
1839
+ done = true;
1840
+ for (unsigned i = self->steps.size - 1; i > 0; i--) {
1841
+ QueryStep *step = &self->steps.contents[i];
1842
+ if (step->depth == PATTERN_DONE_MARKER) continue;
1843
+
1844
+ // Determine if this step is definite or has definite alternatives.
1845
+ bool parent_pattern_guaranteed = false;
1846
+ for (;;) {
1847
+ if (step->root_pattern_guaranteed) {
1848
+ parent_pattern_guaranteed = true;
1849
+ break;
1850
+ }
1851
+ if (step->alternative_index == NONE || step->alternative_index < i) {
1852
+ break;
1853
+ }
1854
+ step = &self->steps.contents[step->alternative_index];
1855
+ }
1856
+
1857
+ // If not, mark its predecessor as indefinite.
1858
+ if (!parent_pattern_guaranteed) {
1859
+ QueryStep *prev_step = &self->steps.contents[i - 1];
1860
+ if (
1861
+ !prev_step->is_dead_end &&
1862
+ prev_step->depth != PATTERN_DONE_MARKER &&
1863
+ prev_step->root_pattern_guaranteed
1864
+ ) {
1865
+ prev_step->root_pattern_guaranteed = false;
1866
+ done = false;
1867
+ }
1868
+ }
1869
+ }
1870
+ }
1871
+
1872
+ #ifdef DEBUG_ANALYZE_QUERY
1873
+ printf("Steps:\n");
1874
+ for (unsigned i = 0; i < self->steps.size; i++) {
1875
+ QueryStep *step = &self->steps.contents[i];
1876
+ if (step->depth == PATTERN_DONE_MARKER) {
1877
+ printf(" %u: DONE\n", i);
1878
+ } else {
1879
+ printf(
1880
+ " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n",
1881
+ i,
1882
+ (step->symbol == WILDCARD_SYMBOL)
1883
+ ? "ANY"
1884
+ : ts_language_symbol_name(self->language, step->symbol),
1885
+ (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
1886
+ step->depth,
1887
+ step->parent_pattern_guaranteed,
1888
+ step->root_pattern_guaranteed
1889
+ );
1890
+ }
1891
+ }
1892
+ #endif
1893
+
1894
+ // Determine which repetition symbols in this language have the possibility
1895
+ // of matching non-rooted patterns in this query. These repetition symbols
1896
+ // prevent certain optimizations with range restrictions.
1897
+ analysis.did_abort = false;
1898
+ for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) {
1899
+ uint16_t pattern_entry_index = non_rooted_pattern_start_steps.contents[i];
1900
+ PatternEntry *pattern_entry = &self->pattern_map.contents[pattern_entry_index];
1901
+
1902
+ analysis_state_set__clear(&analysis.states, &analysis.state_pool);
1903
+ analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
1904
+ for (unsigned j = 0; j < subgraphs.size; j++) {
1905
+ AnalysisSubgraph *subgraph = &subgraphs.contents[j];
1906
+ TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol);
1907
+ if (metadata.visible || metadata.named) continue;
1908
+
1909
+ for (uint32_t k = 0; k < subgraph->start_states.size; k++) {
1910
+ TSStateId parse_state = subgraph->start_states.contents[k];
1911
+ analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
1912
+ .step_index = pattern_entry->step_index,
1913
+ .stack = {
1914
+ [0] = {
1915
+ .parse_state = parse_state,
1916
+ .parent_symbol = subgraph->symbol,
1917
+ .child_index = 0,
1918
+ .field_id = 0,
1919
+ .done = false,
1920
+ },
1921
+ },
1922
+ .root_symbol = subgraph->symbol,
1923
+ .depth = 1,
1924
+ }));
1925
+ }
1926
+ }
1927
+
1928
+ #ifdef DEBUG_ANALYZE_QUERY
1929
+ printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index);
1930
+ #endif
1931
+
1932
+ ts_query__perform_analysis(
1933
+ self,
1934
+ &subgraphs,
1935
+ &analysis
1936
+ );
1937
+
1938
+ if (analysis.finished_parent_symbols.size > 0) {
1939
+ self->patterns.contents[pattern_entry->pattern_index].is_non_local = true;
1940
+ }
1941
+
1942
+ for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) {
1943
+ TSSymbol symbol = analysis.finished_parent_symbols.contents[k];
1944
+ array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol);
1945
+ }
1946
+ }
1947
+
1948
+ #ifdef DEBUG_ANALYZE_QUERY
1949
+ if (self->repeat_symbols_with_rootless_patterns.size > 0) {
1950
+ printf("\nRepetition symbols with rootless patterns:\n");
1951
+ printf("aborted analysis: %d\n", analysis.did_abort);
1952
+ for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) {
1953
+ TSSymbol symbol = self->repeat_symbols_with_rootless_patterns.contents[i];
1954
+ printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol));
1955
+ }
1956
+ printf("\n");
1957
+ }
1958
+ #endif
1959
+
1960
+ // Cleanup
1961
+ for (unsigned i = 0; i < subgraphs.size; i++) {
1962
+ array_delete(&subgraphs.contents[i].start_states);
1963
+ array_delete(&subgraphs.contents[i].nodes);
1964
+ }
1965
+ array_delete(&subgraphs);
1966
+ query_analysis__delete(&analysis);
1967
+ array_delete(&next_nodes);
1968
+ array_delete(&non_rooted_pattern_start_steps);
1969
+ array_delete(&parent_step_indices);
1970
+ array_delete(&predicate_capture_ids);
1971
+ state_predecessor_map_delete(&predecessor_map);
1972
+
1973
+ return all_patterns_are_valid;
1974
+ }
1975
+
1976
+ static void ts_query__add_negated_fields(
1977
+ TSQuery *self,
1978
+ uint16_t step_index,
1979
+ TSFieldId *field_ids,
1980
+ uint16_t field_count
1981
+ ) {
1982
+ QueryStep *step = &self->steps.contents[step_index];
1983
+
1984
+ // The negated field array stores a list of field lists, separated by zeros.
1985
+ // Try to find the start index of an existing list that matches this new list.
1986
+ bool failed_match = false;
1987
+ unsigned match_count = 0;
1988
+ unsigned start_i = 0;
1989
+ for (unsigned i = 0; i < self->negated_fields.size; i++) {
1990
+ TSFieldId existing_field_id = self->negated_fields.contents[i];
1991
+
1992
+ // At each zero value, terminate the match attempt. If we've exactly
1993
+ // matched the new field list, then reuse this index. Otherwise,
1994
+ // start over the matching process.
1995
+ if (existing_field_id == 0) {
1996
+ if (match_count == field_count) {
1997
+ step->negated_field_list_id = start_i;
1998
+ return;
1999
+ } else {
2000
+ start_i = i + 1;
2001
+ match_count = 0;
2002
+ failed_match = false;
2003
+ }
2004
+ }
2005
+
2006
+ // If the existing list matches our new list so far, then advance
2007
+ // to the next element of the new list.
2008
+ else if (
2009
+ match_count < field_count &&
2010
+ existing_field_id == field_ids[match_count] &&
2011
+ !failed_match
2012
+ ) {
2013
+ match_count++;
2014
+ }
2015
+
2016
+ // Otherwise, this existing list has failed to match.
2017
+ else {
2018
+ match_count = 0;
2019
+ failed_match = true;
2020
+ }
2021
+ }
2022
+
2023
+ step->negated_field_list_id = self->negated_fields.size;
2024
+ array_extend(&self->negated_fields, field_count, field_ids);
2025
+ array_push(&self->negated_fields, 0);
2026
+ }
2027
+
2028
+ static TSQueryError ts_query__parse_string_literal(
2029
+ TSQuery *self,
2030
+ Stream *stream
2031
+ ) {
2032
+ const char *string_start = stream->input;
2033
+ if (stream->next != '"') return TSQueryErrorSyntax;
2034
+ stream_advance(stream);
2035
+ const char *prev_position = stream->input;
2036
+
2037
+ bool is_escaped = false;
2038
+ array_clear(&self->string_buffer);
2039
+ for (;;) {
2040
+ if (is_escaped) {
2041
+ is_escaped = false;
2042
+ switch (stream->next) {
2043
+ case 'n':
2044
+ array_push(&self->string_buffer, '\n');
2045
+ break;
2046
+ case 'r':
2047
+ array_push(&self->string_buffer, '\r');
2048
+ break;
2049
+ case 't':
2050
+ array_push(&self->string_buffer, '\t');
2051
+ break;
2052
+ case '0':
2053
+ array_push(&self->string_buffer, '\0');
2054
+ break;
2055
+ default:
2056
+ array_extend(&self->string_buffer, stream->next_size, stream->input);
2057
+ break;
2058
+ }
2059
+ prev_position = stream->input + stream->next_size;
2060
+ } else {
2061
+ if (stream->next == '\\') {
2062
+ array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
2063
+ prev_position = stream->input + 1;
2064
+ is_escaped = true;
2065
+ } else if (stream->next == '"') {
2066
+ array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
2067
+ stream_advance(stream);
2068
+ return TSQueryErrorNone;
2069
+ } else if (stream->next == '\n') {
2070
+ stream_reset(stream, string_start);
2071
+ return TSQueryErrorSyntax;
2072
+ }
2073
+ }
2074
+ if (!stream_advance(stream)) {
2075
+ stream_reset(stream, string_start);
2076
+ return TSQueryErrorSyntax;
2077
+ }
2078
+ }
2079
+ }
2080
+
2081
+ // Parse a single predicate associated with a pattern, adding it to the
2082
+ // query's internal `predicate_steps` array. Predicates are arbitrary
2083
+ // S-expressions associated with a pattern which are meant to be handled at
2084
+ // a higher level of abstraction, such as the Rust/JavaScript bindings. They
2085
+ // can contain '@'-prefixed capture names, double-quoted strings, and bare
2086
+ // symbols, which also represent strings.
2087
+ static TSQueryError ts_query__parse_predicate(
2088
+ TSQuery *self,
2089
+ Stream *stream
2090
+ ) {
2091
+ if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2092
+ const char *predicate_name = stream->input;
2093
+ stream_scan_identifier(stream);
2094
+ uint32_t length = (uint32_t)(stream->input - predicate_name);
2095
+ uint16_t id = symbol_table_insert_name(
2096
+ &self->predicate_values,
2097
+ predicate_name,
2098
+ length
2099
+ );
2100
+ array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2101
+ .type = TSQueryPredicateStepTypeString,
2102
+ .value_id = id,
2103
+ }));
2104
+ stream_skip_whitespace(stream);
2105
+
2106
+ for (;;) {
2107
+ if (stream->next == ')') {
2108
+ stream_advance(stream);
2109
+ stream_skip_whitespace(stream);
2110
+ array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2111
+ .type = TSQueryPredicateStepTypeDone,
2112
+ .value_id = 0,
2113
+ }));
2114
+ break;
2115
+ }
2116
+
2117
+ // Parse an '@'-prefixed capture name
2118
+ else if (stream->next == '@') {
2119
+ stream_advance(stream);
2120
+
2121
+ // Parse the capture name
2122
+ if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2123
+ const char *capture_name = stream->input;
2124
+ stream_scan_identifier(stream);
2125
+ uint32_t capture_length = (uint32_t)(stream->input - capture_name);
2126
+
2127
+ // Add the capture id to the first step of the pattern
2128
+ int capture_id = symbol_table_id_for_name(
2129
+ &self->captures,
2130
+ capture_name,
2131
+ capture_length
2132
+ );
2133
+ if (capture_id == -1) {
2134
+ stream_reset(stream, capture_name);
2135
+ return TSQueryErrorCapture;
2136
+ }
2137
+
2138
+ array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2139
+ .type = TSQueryPredicateStepTypeCapture,
2140
+ .value_id = capture_id,
2141
+ }));
2142
+ }
2143
+
2144
+ // Parse a string literal
2145
+ else if (stream->next == '"') {
2146
+ TSQueryError e = ts_query__parse_string_literal(self, stream);
2147
+ if (e) return e;
2148
+ uint16_t query_id = symbol_table_insert_name(
2149
+ &self->predicate_values,
2150
+ self->string_buffer.contents,
2151
+ self->string_buffer.size
2152
+ );
2153
+ array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2154
+ .type = TSQueryPredicateStepTypeString,
2155
+ .value_id = query_id,
2156
+ }));
2157
+ }
2158
+
2159
+ // Parse a bare symbol
2160
+ else if (stream_is_ident_start(stream)) {
2161
+ const char *symbol_start = stream->input;
2162
+ stream_scan_identifier(stream);
2163
+ uint32_t symbol_length = (uint32_t)(stream->input - symbol_start);
2164
+ uint16_t query_id = symbol_table_insert_name(
2165
+ &self->predicate_values,
2166
+ symbol_start,
2167
+ symbol_length
2168
+ );
2169
+ array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2170
+ .type = TSQueryPredicateStepTypeString,
2171
+ .value_id = query_id,
2172
+ }));
2173
+ }
2174
+
2175
+ else {
2176
+ return TSQueryErrorSyntax;
2177
+ }
2178
+
2179
+ stream_skip_whitespace(stream);
2180
+ }
2181
+
2182
+ return 0;
2183
+ }
2184
+
2185
+ // Read one S-expression pattern from the stream, and incorporate it into
2186
+ // the query's internal state machine representation. For nested patterns,
2187
+ // this function calls itself recursively.
2188
+ //
2189
+ // The caller is responsible for passing in a dedicated CaptureQuantifiers.
2190
+ // These should not be shared between different calls to ts_query__parse_pattern!
2191
+ static TSQueryError ts_query__parse_pattern(
2192
+ TSQuery *self,
2193
+ Stream *stream,
2194
+ uint32_t depth,
2195
+ bool is_immediate,
2196
+ CaptureQuantifiers *capture_quantifiers
2197
+ ) {
2198
+ if (stream->next == 0) return TSQueryErrorSyntax;
2199
+ if (stream->next == ')' || stream->next == ']') return PARENT_DONE;
2200
+
2201
+ const uint32_t starting_step_index = self->steps.size;
2202
+
2203
+ // Store the byte offset of each step in the query.
2204
+ if (
2205
+ self->step_offsets.size == 0 ||
2206
+ array_back(&self->step_offsets)->step_index != starting_step_index
2207
+ ) {
2208
+ array_push(&self->step_offsets, ((StepOffset) {
2209
+ .step_index = starting_step_index,
2210
+ .byte_offset = stream_offset(stream),
2211
+ }));
2212
+ }
2213
+
2214
+ // An open bracket is the start of an alternation.
2215
+ if (stream->next == '[') {
2216
+ stream_advance(stream);
2217
+ stream_skip_whitespace(stream);
2218
+
2219
+ // Parse each branch, and add a placeholder step in between the branches.
2220
+ Array(uint32_t) branch_step_indices = array_new();
2221
+ CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new();
2222
+ for (;;) {
2223
+ uint32_t start_index = self->steps.size;
2224
+ TSQueryError e = ts_query__parse_pattern(
2225
+ self,
2226
+ stream,
2227
+ depth,
2228
+ is_immediate,
2229
+ &branch_capture_quantifiers
2230
+ );
2231
+
2232
+ if (e == PARENT_DONE) {
2233
+ if (stream->next == ']' && branch_step_indices.size > 0) {
2234
+ stream_advance(stream);
2235
+ break;
2236
+ }
2237
+ e = TSQueryErrorSyntax;
2238
+ }
2239
+ if (e) {
2240
+ capture_quantifiers_delete(&branch_capture_quantifiers);
2241
+ array_delete(&branch_step_indices);
2242
+ return e;
2243
+ }
2244
+
2245
+ if (start_index == starting_step_index) {
2246
+ capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers);
2247
+ } else {
2248
+ capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers);
2249
+ }
2250
+
2251
+ array_push(&branch_step_indices, start_index);
2252
+ array_push(&self->steps, query_step__new(0, depth, false));
2253
+ capture_quantifiers_clear(&branch_capture_quantifiers);
2254
+ }
2255
+ (void)array_pop(&self->steps);
2256
+
2257
+ // For all of the branches except for the last one, add the subsequent branch as an
2258
+ // alternative, and link the end of the branch to the current end of the steps.
2259
+ for (unsigned i = 0; i < branch_step_indices.size - 1; i++) {
2260
+ uint32_t step_index = branch_step_indices.contents[i];
2261
+ uint32_t next_step_index = branch_step_indices.contents[i + 1];
2262
+ QueryStep *start_step = &self->steps.contents[step_index];
2263
+ QueryStep *end_step = &self->steps.contents[next_step_index - 1];
2264
+ start_step->alternative_index = next_step_index;
2265
+ end_step->alternative_index = self->steps.size;
2266
+ end_step->is_dead_end = true;
2267
+ }
2268
+
2269
+ capture_quantifiers_delete(&branch_capture_quantifiers);
2270
+ array_delete(&branch_step_indices);
2271
+ }
2272
+
2273
+ // An open parenthesis can be the start of three possible constructs:
2274
+ // * A grouped sequence
2275
+ // * A predicate
2276
+ // * A named node
2277
+ else if (stream->next == '(') {
2278
+ stream_advance(stream);
2279
+ stream_skip_whitespace(stream);
2280
+
2281
+ // If this parenthesis is followed by a node, then it represents a grouped sequence.
2282
+ if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
2283
+ bool child_is_immediate = is_immediate;
2284
+ CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2285
+ for (;;) {
2286
+ if (stream->next == '.') {
2287
+ child_is_immediate = true;
2288
+ stream_advance(stream);
2289
+ stream_skip_whitespace(stream);
2290
+ }
2291
+ TSQueryError e = ts_query__parse_pattern(
2292
+ self,
2293
+ stream,
2294
+ depth,
2295
+ child_is_immediate,
2296
+ &child_capture_quantifiers
2297
+ );
2298
+ if (e == PARENT_DONE) {
2299
+ if (stream->next == ')') {
2300
+ stream_advance(stream);
2301
+ break;
2302
+ }
2303
+ e = TSQueryErrorSyntax;
2304
+ }
2305
+ if (e) {
2306
+ capture_quantifiers_delete(&child_capture_quantifiers);
2307
+ return e;
2308
+ }
2309
+
2310
+ capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2311
+ capture_quantifiers_clear(&child_capture_quantifiers);
2312
+ child_is_immediate = false;
2313
+ }
2314
+
2315
+ capture_quantifiers_delete(&child_capture_quantifiers);
2316
+ }
2317
+
2318
+ // A dot/pound character indicates the start of a predicate.
2319
+ else if (stream->next == '.' || stream->next == '#') {
2320
+ stream_advance(stream);
2321
+ return ts_query__parse_predicate(self, stream);
2322
+ }
2323
+
2324
+ // Otherwise, this parenthesis is the start of a named node.
2325
+ else {
2326
+ TSSymbol symbol;
2327
+ bool is_missing = false;
2328
+ const char *node_name = stream->input;
2329
+
2330
+ // Parse a normal node name
2331
+ if (stream_is_ident_start(stream)) {
2332
+ stream_scan_identifier(stream);
2333
+ uint32_t length = (uint32_t)(stream->input - node_name);
2334
+
2335
+ // Parse the wildcard symbol
2336
+ if (length == 1 && node_name[0] == '_') {
2337
+ symbol = WILDCARD_SYMBOL;
2338
+ } else if (!strncmp(node_name, "MISSING", length)) {
2339
+ is_missing = true;
2340
+ stream_skip_whitespace(stream);
2341
+
2342
+ if (stream_is_ident_start(stream)) {
2343
+ const char *missing_node_name = stream->input;
2344
+ stream_scan_identifier(stream);
2345
+ uint32_t missing_node_length = (uint32_t)(stream->input - missing_node_name);
2346
+ symbol = ts_language_symbol_for_name(
2347
+ self->language,
2348
+ missing_node_name,
2349
+ missing_node_length,
2350
+ true
2351
+ );
2352
+ if (!symbol) {
2353
+ stream_reset(stream, missing_node_name);
2354
+ return TSQueryErrorNodeType;
2355
+ }
2356
+ }
2357
+
2358
+ else if (stream->next == '"') {
2359
+ const char *string_start = stream->input;
2360
+ TSQueryError e = ts_query__parse_string_literal(self, stream);
2361
+ if (e) return e;
2362
+
2363
+ symbol = ts_language_symbol_for_name(
2364
+ self->language,
2365
+ self->string_buffer.contents,
2366
+ self->string_buffer.size,
2367
+ false
2368
+ );
2369
+ if (!symbol) {
2370
+ stream_reset(stream, string_start + 1);
2371
+ return TSQueryErrorNodeType;
2372
+ }
2373
+ }
2374
+
2375
+ else if (stream->next == ')') {
2376
+ symbol = WILDCARD_SYMBOL;
2377
+ }
2378
+
2379
+ else {
2380
+ stream_reset(stream, stream->input);
2381
+ return TSQueryErrorSyntax;
2382
+ }
2383
+ }
2384
+
2385
+ else {
2386
+ symbol = ts_language_symbol_for_name(
2387
+ self->language,
2388
+ node_name,
2389
+ length,
2390
+ true
2391
+ );
2392
+ if (!symbol) {
2393
+ stream_reset(stream, node_name);
2394
+ return TSQueryErrorNodeType;
2395
+ }
2396
+ }
2397
+ } else {
2398
+ return TSQueryErrorSyntax;
2399
+ }
2400
+
2401
+ // Add a step for the node.
2402
+ array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2403
+ QueryStep *step = array_back(&self->steps);
2404
+ if (ts_language_symbol_metadata(self->language, symbol).supertype) {
2405
+ step->supertype_symbol = step->symbol;
2406
+ step->symbol = WILDCARD_SYMBOL;
2407
+ }
2408
+ if (is_missing) {
2409
+ step->is_missing = true;
2410
+ }
2411
+ if (symbol == WILDCARD_SYMBOL) {
2412
+ step->is_named = true;
2413
+ }
2414
+
2415
+ stream_skip_whitespace(stream);
2416
+
2417
+ if (stream->next == '/') {
2418
+ if (!step->supertype_symbol) {
2419
+ stream_reset(stream, node_name - 1); // reset to the start of the node
2420
+ return TSQueryErrorStructure;
2421
+ }
2422
+
2423
+ stream_advance(stream);
2424
+ if (!stream_is_ident_start(stream)) {
2425
+ return TSQueryErrorSyntax;
2426
+ }
2427
+
2428
+ const char *subtype_node_name = stream->input;
2429
+ stream_scan_identifier(stream);
2430
+ uint32_t length = (uint32_t)(stream->input - subtype_node_name);
2431
+
2432
+ step->symbol = ts_language_symbol_for_name(
2433
+ self->language,
2434
+ subtype_node_name,
2435
+ length,
2436
+ true
2437
+ );
2438
+ if (!step->symbol) {
2439
+ stream_reset(stream, subtype_node_name);
2440
+ return TSQueryErrorNodeType;
2441
+ }
2442
+
2443
+ // Get all the possible subtypes for the given supertype,
2444
+ // and check if the given subtype is valid.
2445
+ if (self->language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
2446
+ uint32_t subtype_length;
2447
+ const TSSymbol *subtypes = ts_language_subtypes(
2448
+ self->language,
2449
+ step->supertype_symbol,
2450
+ &subtype_length
2451
+ );
2452
+
2453
+ bool subtype_is_valid = false;
2454
+ for (uint32_t i = 0; i < subtype_length; i++) {
2455
+ if (subtypes[i] == step->symbol) {
2456
+ subtype_is_valid = true;
2457
+ break;
2458
+ }
2459
+ }
2460
+
2461
+ // This subtype is not valid for the given supertype.
2462
+ if (!subtype_is_valid) {
2463
+ stream_reset(stream, node_name - 1); // reset to the start of the node
2464
+ return TSQueryErrorStructure;
2465
+ }
2466
+ }
2467
+
2468
+ stream_skip_whitespace(stream);
2469
+ }
2470
+
2471
+ // Parse the child patterns
2472
+ bool child_is_immediate = false;
2473
+ uint16_t last_child_step_index = 0;
2474
+ uint16_t negated_field_count = 0;
2475
+ TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT];
2476
+ CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2477
+ for (;;) {
2478
+ // Parse a negated field assertion
2479
+ if (stream->next == '!') {
2480
+ stream_advance(stream);
2481
+ stream_skip_whitespace(stream);
2482
+ if (!stream_is_ident_start(stream)) {
2483
+ capture_quantifiers_delete(&child_capture_quantifiers);
2484
+ return TSQueryErrorSyntax;
2485
+ }
2486
+ const char *field_name = stream->input;
2487
+ stream_scan_identifier(stream);
2488
+ uint32_t length = (uint32_t)(stream->input - field_name);
2489
+ stream_skip_whitespace(stream);
2490
+
2491
+ TSFieldId field_id = ts_language_field_id_for_name(
2492
+ self->language,
2493
+ field_name,
2494
+ length
2495
+ );
2496
+ if (!field_id) {
2497
+ stream->input = field_name;
2498
+ capture_quantifiers_delete(&child_capture_quantifiers);
2499
+ return TSQueryErrorField;
2500
+ }
2501
+
2502
+ // Keep the field ids sorted.
2503
+ if (negated_field_count < MAX_NEGATED_FIELD_COUNT) {
2504
+ negated_field_ids[negated_field_count] = field_id;
2505
+ negated_field_count++;
2506
+ }
2507
+
2508
+ continue;
2509
+ }
2510
+
2511
+ // Parse a sibling anchor
2512
+ if (stream->next == '.') {
2513
+ child_is_immediate = true;
2514
+ stream_advance(stream);
2515
+ stream_skip_whitespace(stream);
2516
+ }
2517
+
2518
+ uint16_t step_index = self->steps.size;
2519
+ TSQueryError e = ts_query__parse_pattern(
2520
+ self,
2521
+ stream,
2522
+ depth + 1,
2523
+ child_is_immediate,
2524
+ &child_capture_quantifiers
2525
+ );
2526
+ // In the event we only parsed a predicate, meaning no new steps were added,
2527
+ // then subtract one so we're not indexing past the end of the array
2528
+ if (step_index == self->steps.size) step_index--;
2529
+ if (e == PARENT_DONE) {
2530
+ if (stream->next == ')') {
2531
+ if (child_is_immediate) {
2532
+ if (last_child_step_index == 0) {
2533
+ capture_quantifiers_delete(&child_capture_quantifiers);
2534
+ return TSQueryErrorSyntax;
2535
+ }
2536
+ // Mark this step *and* its alternatives as the last child of the parent.
2537
+ QueryStep *last_child_step = array_get(&self->steps, last_child_step_index);
2538
+ last_child_step->is_last_child = true;
2539
+ if (
2540
+ last_child_step->alternative_index != NONE &&
2541
+ last_child_step->alternative_index < self->steps.size
2542
+ ) {
2543
+ QueryStep *alternative_step = &self->steps.contents[last_child_step->alternative_index];
2544
+ alternative_step->is_last_child = true;
2545
+ while (
2546
+ alternative_step->alternative_index != NONE &&
2547
+ alternative_step->alternative_index < self->steps.size
2548
+ ) {
2549
+ alternative_step = &self->steps.contents[alternative_step->alternative_index];
2550
+ alternative_step->is_last_child = true;
2551
+ }
2552
+ }
2553
+ }
2554
+
2555
+ if (negated_field_count) {
2556
+ ts_query__add_negated_fields(
2557
+ self,
2558
+ starting_step_index,
2559
+ negated_field_ids,
2560
+ negated_field_count
2561
+ );
2562
+ }
2563
+
2564
+ stream_advance(stream);
2565
+ break;
2566
+ }
2567
+ e = TSQueryErrorSyntax;
2568
+ }
2569
+ if (e) {
2570
+ capture_quantifiers_delete(&child_capture_quantifiers);
2571
+ return e;
2572
+ }
2573
+
2574
+ capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2575
+
2576
+ last_child_step_index = step_index;
2577
+ child_is_immediate = false;
2578
+ capture_quantifiers_clear(&child_capture_quantifiers);
2579
+ }
2580
+ capture_quantifiers_delete(&child_capture_quantifiers);
2581
+ }
2582
+ }
2583
+
2584
+ // Parse a wildcard pattern
2585
+ else if (stream->next == '_') {
2586
+ stream_advance(stream);
2587
+ stream_skip_whitespace(stream);
2588
+
2589
+ // Add a step that matches any kind of node
2590
+ array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate));
2591
+ }
2592
+
2593
+ // Parse a double-quoted anonymous leaf node expression
2594
+ else if (stream->next == '"') {
2595
+ const char *string_start = stream->input;
2596
+ TSQueryError e = ts_query__parse_string_literal(self, stream);
2597
+ if (e) return e;
2598
+
2599
+ // Add a step for the node
2600
+ TSSymbol symbol = ts_language_symbol_for_name(
2601
+ self->language,
2602
+ self->string_buffer.contents,
2603
+ self->string_buffer.size,
2604
+ false
2605
+ );
2606
+ if (!symbol) {
2607
+ stream_reset(stream, string_start + 1);
2608
+ return TSQueryErrorNodeType;
2609
+ }
2610
+ array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2611
+ }
2612
+
2613
+ // Parse a field-prefixed pattern
2614
+ else if (stream_is_ident_start(stream)) {
2615
+ // Parse the field name
2616
+ const char *field_name = stream->input;
2617
+ stream_scan_identifier(stream);
2618
+ uint32_t length = (uint32_t)(stream->input - field_name);
2619
+ stream_skip_whitespace(stream);
2620
+
2621
+ if (stream->next != ':') {
2622
+ stream_reset(stream, field_name);
2623
+ return TSQueryErrorSyntax;
2624
+ }
2625
+ stream_advance(stream);
2626
+ stream_skip_whitespace(stream);
2627
+
2628
+ // Parse the pattern
2629
+ CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new();
2630
+ TSQueryError e = ts_query__parse_pattern(
2631
+ self,
2632
+ stream,
2633
+ depth,
2634
+ is_immediate,
2635
+ &field_capture_quantifiers
2636
+ );
2637
+ if (e) {
2638
+ capture_quantifiers_delete(&field_capture_quantifiers);
2639
+ if (e == PARENT_DONE) e = TSQueryErrorSyntax;
2640
+ return e;
2641
+ }
2642
+
2643
+ // Add the field name to the first step of the pattern
2644
+ TSFieldId field_id = ts_language_field_id_for_name(
2645
+ self->language,
2646
+ field_name,
2647
+ length
2648
+ );
2649
+ if (!field_id) {
2650
+ stream->input = field_name;
2651
+ return TSQueryErrorField;
2652
+ }
2653
+
2654
+ uint32_t step_index = starting_step_index;
2655
+ QueryStep *step = &self->steps.contents[step_index];
2656
+ for (;;) {
2657
+ step->field = field_id;
2658
+ if (
2659
+ step->alternative_index != NONE &&
2660
+ step->alternative_index > step_index &&
2661
+ step->alternative_index < self->steps.size
2662
+ ) {
2663
+ step_index = step->alternative_index;
2664
+ step = &self->steps.contents[step_index];
2665
+ } else {
2666
+ break;
2667
+ }
2668
+ }
2669
+
2670
+ capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers);
2671
+ capture_quantifiers_delete(&field_capture_quantifiers);
2672
+ }
2673
+
2674
+ else {
2675
+ return TSQueryErrorSyntax;
2676
+ }
2677
+
2678
+ stream_skip_whitespace(stream);
2679
+
2680
+ // Parse suffixes modifiers for this pattern
2681
+ TSQuantifier quantifier = TSQuantifierOne;
2682
+ for (;;) {
2683
+ // Parse the one-or-more operator.
2684
+ if (stream->next == '+') {
2685
+ quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier);
2686
+
2687
+ stream_advance(stream);
2688
+ stream_skip_whitespace(stream);
2689
+
2690
+ QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2691
+ repeat_step.alternative_index = starting_step_index;
2692
+ repeat_step.is_pass_through = true;
2693
+ repeat_step.alternative_is_immediate = true;
2694
+ array_push(&self->steps, repeat_step);
2695
+ }
2696
+
2697
+ // Parse the zero-or-more repetition operator.
2698
+ else if (stream->next == '*') {
2699
+ quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier);
2700
+
2701
+ stream_advance(stream);
2702
+ stream_skip_whitespace(stream);
2703
+
2704
+ QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2705
+ repeat_step.alternative_index = starting_step_index;
2706
+ repeat_step.is_pass_through = true;
2707
+ repeat_step.alternative_is_immediate = true;
2708
+ array_push(&self->steps, repeat_step);
2709
+
2710
+ // Stop when `step->alternative_index` is `NONE` or it points to
2711
+ // `repeat_step` or beyond. Note that having just been pushed,
2712
+ // `repeat_step` occupies slot `self->steps.size - 1`.
2713
+ QueryStep *step = &self->steps.contents[starting_step_index];
2714
+ while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) {
2715
+ step = &self->steps.contents[step->alternative_index];
2716
+ }
2717
+ step->alternative_index = self->steps.size;
2718
+ }
2719
+
2720
+ // Parse the optional operator.
2721
+ else if (stream->next == '?') {
2722
+ quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier);
2723
+
2724
+ stream_advance(stream);
2725
+ stream_skip_whitespace(stream);
2726
+
2727
+ QueryStep *step = &self->steps.contents[starting_step_index];
2728
+ while (step->alternative_index != NONE && step->alternative_index < self->steps.size) {
2729
+ step = &self->steps.contents[step->alternative_index];
2730
+ }
2731
+ step->alternative_index = self->steps.size;
2732
+ }
2733
+
2734
+ // Parse an '@'-prefixed capture pattern
2735
+ else if (stream->next == '@') {
2736
+ stream_advance(stream);
2737
+ if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2738
+ const char *capture_name = stream->input;
2739
+ stream_scan_identifier(stream);
2740
+ uint32_t length = (uint32_t)(stream->input - capture_name);
2741
+ stream_skip_whitespace(stream);
2742
+
2743
+ // Add the capture id to the first step of the pattern
2744
+ uint16_t capture_id = symbol_table_insert_name(
2745
+ &self->captures,
2746
+ capture_name,
2747
+ length
2748
+ );
2749
+
2750
+ // Add the capture quantifier
2751
+ capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne);
2752
+
2753
+ uint32_t step_index = starting_step_index;
2754
+ for (;;) {
2755
+ QueryStep *step = &self->steps.contents[step_index];
2756
+ query_step__add_capture(step, capture_id);
2757
+ if (
2758
+ step->alternative_index != NONE &&
2759
+ step->alternative_index > step_index &&
2760
+ step->alternative_index < self->steps.size
2761
+ ) {
2762
+ step_index = step->alternative_index;
2763
+ } else {
2764
+ break;
2765
+ }
2766
+ }
2767
+ }
2768
+
2769
+ // No more suffix modifiers
2770
+ else {
2771
+ break;
2772
+ }
2773
+ }
2774
+
2775
+ capture_quantifiers_mul(capture_quantifiers, quantifier);
2776
+
2777
+ return 0;
2778
+ }
2779
+
2780
+ TSQuery *ts_query_new(
2781
+ const TSLanguage *language,
2782
+ const char *source,
2783
+ uint32_t source_len,
2784
+ uint32_t *error_offset,
2785
+ TSQueryError *error_type
2786
+ ) {
2787
+ if (
2788
+ !language ||
2789
+ language->abi_version > TREE_SITTER_LANGUAGE_VERSION ||
2790
+ language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
2791
+ ) {
2792
+ *error_type = TSQueryErrorLanguage;
2793
+ return NULL;
2794
+ }
2795
+
2796
+ TSQuery *self = ts_malloc(sizeof(TSQuery));
2797
+ *self = (TSQuery) {
2798
+ .steps = array_new(),
2799
+ .pattern_map = array_new(),
2800
+ .captures = symbol_table_new(),
2801
+ .capture_quantifiers = array_new(),
2802
+ .predicate_values = symbol_table_new(),
2803
+ .predicate_steps = array_new(),
2804
+ .patterns = array_new(),
2805
+ .step_offsets = array_new(),
2806
+ .string_buffer = array_new(),
2807
+ .negated_fields = array_new(),
2808
+ .repeat_symbols_with_rootless_patterns = array_new(),
2809
+ .wildcard_root_pattern_count = 0,
2810
+ .language = ts_language_copy(language),
2811
+ };
2812
+
2813
+ array_push(&self->negated_fields, 0);
2814
+
2815
+ // Parse all of the S-expressions in the given string.
2816
+ Stream stream = stream_new(source, source_len);
2817
+ stream_skip_whitespace(&stream);
2818
+ while (stream.input < stream.end) {
2819
+ uint32_t pattern_index = self->patterns.size;
2820
+ uint32_t start_step_index = self->steps.size;
2821
+ uint32_t start_predicate_step_index = self->predicate_steps.size;
2822
+ array_push(&self->patterns, ((QueryPattern) {
2823
+ .steps = (Slice) {.offset = start_step_index},
2824
+ .predicate_steps = (Slice) {.offset = start_predicate_step_index},
2825
+ .start_byte = stream_offset(&stream),
2826
+ .is_non_local = false,
2827
+ }));
2828
+ CaptureQuantifiers capture_quantifiers = capture_quantifiers_new();
2829
+ *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers);
2830
+ array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
2831
+
2832
+ QueryPattern *pattern = array_back(&self->patterns);
2833
+ pattern->steps.length = self->steps.size - start_step_index;
2834
+ pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index;
2835
+ pattern->end_byte = stream_offset(&stream);
2836
+
2837
+ // If any pattern could not be parsed, then report the error information
2838
+ // and terminate.
2839
+ if (*error_type) {
2840
+ if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax;
2841
+ *error_offset = stream_offset(&stream);
2842
+ capture_quantifiers_delete(&capture_quantifiers);
2843
+ ts_query_delete(self);
2844
+ return NULL;
2845
+ }
2846
+
2847
+ // Maintain a list of capture quantifiers for each pattern
2848
+ array_push(&self->capture_quantifiers, capture_quantifiers);
2849
+
2850
+ // Maintain a map that can look up patterns for a given root symbol.
2851
+ uint16_t wildcard_root_alternative_index = NONE;
2852
+ for (;;) {
2853
+ QueryStep *step = &self->steps.contents[start_step_index];
2854
+
2855
+ // If a pattern has a wildcard at its root, but it has a non-wildcard child,
2856
+ // then optimize the matching process by skipping matching the wildcard.
2857
+ // Later, during the matching process, the query cursor will check that
2858
+ // there is a parent node, and capture it if necessary.
2859
+ if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) {
2860
+ QueryStep *second_step = &self->steps.contents[start_step_index + 1];
2861
+ if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1 && !second_step->is_immediate) {
2862
+ wildcard_root_alternative_index = step->alternative_index;
2863
+ start_step_index += 1;
2864
+ step = second_step;
2865
+ }
2866
+ }
2867
+
2868
+ // Determine whether the pattern has a single root node. This affects
2869
+ // decisions about whether or not to start matching the pattern when
2870
+ // a query cursor has a range restriction or when immediately within an
2871
+ // error node.
2872
+ uint32_t start_depth = step->depth;
2873
+ bool is_rooted = start_depth == 0;
2874
+ for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) {
2875
+ QueryStep *child_step = &self->steps.contents[step_index];
2876
+ if (child_step->is_dead_end) break;
2877
+ if (child_step->depth == start_depth) {
2878
+ is_rooted = false;
2879
+ break;
2880
+ }
2881
+ }
2882
+
2883
+ ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) {
2884
+ .step_index = start_step_index,
2885
+ .pattern_index = pattern_index,
2886
+ .is_rooted = is_rooted
2887
+ });
2888
+ if (step->symbol == WILDCARD_SYMBOL) {
2889
+ self->wildcard_root_pattern_count++;
2890
+ }
2891
+
2892
+ // If there are alternatives or options at the root of the pattern,
2893
+ // then add multiple entries to the pattern map.
2894
+ if (step->alternative_index != NONE) {
2895
+ start_step_index = step->alternative_index;
2896
+ } else if (wildcard_root_alternative_index != NONE) {
2897
+ start_step_index = wildcard_root_alternative_index;
2898
+ wildcard_root_alternative_index = NONE;
2899
+ } else {
2900
+ break;
2901
+ }
2902
+ }
2903
+ }
2904
+
2905
+ if (!ts_query__analyze_patterns(self, error_offset)) {
2906
+ *error_type = TSQueryErrorStructure;
2907
+ ts_query_delete(self);
2908
+ return NULL;
2909
+ }
2910
+
2911
+ array_delete(&self->string_buffer);
2912
+ return self;
2913
+ }
2914
+
2915
+ void ts_query_delete(TSQuery *self) {
2916
+ if (self) {
2917
+ array_delete(&self->steps);
2918
+ array_delete(&self->pattern_map);
2919
+ array_delete(&self->predicate_steps);
2920
+ array_delete(&self->patterns);
2921
+ array_delete(&self->step_offsets);
2922
+ array_delete(&self->string_buffer);
2923
+ array_delete(&self->negated_fields);
2924
+ array_delete(&self->repeat_symbols_with_rootless_patterns);
2925
+ ts_language_delete(self->language);
2926
+ symbol_table_delete(&self->captures);
2927
+ symbol_table_delete(&self->predicate_values);
2928
+ for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) {
2929
+ CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index);
2930
+ capture_quantifiers_delete(capture_quantifiers);
2931
+ }
2932
+ array_delete(&self->capture_quantifiers);
2933
+ ts_free(self);
2934
+ }
2935
+ }
2936
+
2937
+ uint32_t ts_query_pattern_count(const TSQuery *self) {
2938
+ return self->patterns.size;
2939
+ }
2940
+
2941
+ uint32_t ts_query_capture_count(const TSQuery *self) {
2942
+ return self->captures.slices.size;
2943
+ }
2944
+
2945
+ uint32_t ts_query_string_count(const TSQuery *self) {
2946
+ return self->predicate_values.slices.size;
2947
+ }
2948
+
2949
+ const char *ts_query_capture_name_for_id(
2950
+ const TSQuery *self,
2951
+ uint32_t index,
2952
+ uint32_t *length
2953
+ ) {
2954
+ return symbol_table_name_for_id(&self->captures, index, length);
2955
+ }
2956
+
2957
+ TSQuantifier ts_query_capture_quantifier_for_id(
2958
+ const TSQuery *self,
2959
+ uint32_t pattern_index,
2960
+ uint32_t capture_index
2961
+ ) {
2962
+ CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index);
2963
+ return capture_quantifier_for_id(capture_quantifiers, capture_index);
2964
+ }
2965
+
2966
+ const char *ts_query_string_value_for_id(
2967
+ const TSQuery *self,
2968
+ uint32_t index,
2969
+ uint32_t *length
2970
+ ) {
2971
+ return symbol_table_name_for_id(&self->predicate_values, index, length);
2972
+ }
2973
+
2974
+ const TSQueryPredicateStep *ts_query_predicates_for_pattern(
2975
+ const TSQuery *self,
2976
+ uint32_t pattern_index,
2977
+ uint32_t *step_count
2978
+ ) {
2979
+ Slice slice = self->patterns.contents[pattern_index].predicate_steps;
2980
+ *step_count = slice.length;
2981
+ if (slice.length == 0) return NULL;
2982
+ return &self->predicate_steps.contents[slice.offset];
2983
+ }
2984
+
2985
+ uint32_t ts_query_start_byte_for_pattern(
2986
+ const TSQuery *self,
2987
+ uint32_t pattern_index
2988
+ ) {
2989
+ return self->patterns.contents[pattern_index].start_byte;
2990
+ }
2991
+
2992
+ uint32_t ts_query_end_byte_for_pattern(
2993
+ const TSQuery *self,
2994
+ uint32_t pattern_index
2995
+ ) {
2996
+ return self->patterns.contents[pattern_index].end_byte;
2997
+ }
2998
+
2999
+ bool ts_query_is_pattern_rooted(
3000
+ const TSQuery *self,
3001
+ uint32_t pattern_index
3002
+ ) {
3003
+ for (unsigned i = 0; i < self->pattern_map.size; i++) {
3004
+ PatternEntry *entry = &self->pattern_map.contents[i];
3005
+ if (entry->pattern_index == pattern_index) {
3006
+ if (!entry->is_rooted) return false;
3007
+ }
3008
+ }
3009
+ return true;
3010
+ }
3011
+
3012
+ bool ts_query_is_pattern_non_local(
3013
+ const TSQuery *self,
3014
+ uint32_t pattern_index
3015
+ ) {
3016
+ if (pattern_index < self->patterns.size) {
3017
+ return self->patterns.contents[pattern_index].is_non_local;
3018
+ } else {
3019
+ return false;
3020
+ }
3021
+ }
3022
+
3023
+ bool ts_query_is_pattern_guaranteed_at_step(
3024
+ const TSQuery *self,
3025
+ uint32_t byte_offset
3026
+ ) {
3027
+ uint32_t step_index = UINT32_MAX;
3028
+ for (unsigned i = 0; i < self->step_offsets.size; i++) {
3029
+ StepOffset *step_offset = &self->step_offsets.contents[i];
3030
+ if (step_offset->byte_offset > byte_offset) break;
3031
+ step_index = step_offset->step_index;
3032
+ }
3033
+ if (step_index < self->steps.size) {
3034
+ return self->steps.contents[step_index].root_pattern_guaranteed;
3035
+ } else {
3036
+ return false;
3037
+ }
3038
+ }
3039
+
3040
+ bool ts_query__step_is_fallible(
3041
+ const TSQuery *self,
3042
+ uint16_t step_index
3043
+ ) {
3044
+ ts_assert((uint32_t)step_index + 1 < self->steps.size);
3045
+ QueryStep *step = &self->steps.contents[step_index];
3046
+ QueryStep *next_step = &self->steps.contents[step_index + 1];
3047
+ return (
3048
+ next_step->depth != PATTERN_DONE_MARKER &&
3049
+ next_step->depth > step->depth &&
3050
+ (!next_step->parent_pattern_guaranteed || step->symbol == WILDCARD_SYMBOL)
3051
+ );
3052
+ }
3053
+
3054
+ void ts_query_disable_capture(
3055
+ TSQuery *self,
3056
+ const char *name,
3057
+ uint32_t length
3058
+ ) {
3059
+ // Remove capture information for any pattern step that previously
3060
+ // captured with the given name.
3061
+ int id = symbol_table_id_for_name(&self->captures, name, length);
3062
+ if (id != -1) {
3063
+ for (unsigned i = 0; i < self->steps.size; i++) {
3064
+ QueryStep *step = &self->steps.contents[i];
3065
+ query_step__remove_capture(step, id);
3066
+ }
3067
+ }
3068
+ }
3069
+
3070
+ void ts_query_disable_pattern(
3071
+ TSQuery *self,
3072
+ uint32_t pattern_index
3073
+ ) {
3074
+ // Remove the given pattern from the pattern map. Its steps will still
3075
+ // be in the `steps` array, but they will never be read.
3076
+ for (unsigned i = 0; i < self->pattern_map.size; i++) {
3077
+ PatternEntry *pattern = &self->pattern_map.contents[i];
3078
+ if (pattern->pattern_index == pattern_index) {
3079
+ array_erase(&self->pattern_map, i);
3080
+ i--;
3081
+ }
3082
+ }
3083
+ }
3084
+
3085
+ /***************
3086
+ * QueryCursor
3087
+ ***************/
3088
+
3089
+ TSQueryCursor *ts_query_cursor_new(void) {
3090
+ TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
3091
+ *self = (TSQueryCursor) {
3092
+ .did_exceed_match_limit = false,
3093
+ .ascending = false,
3094
+ .halted = false,
3095
+ .states = array_new(),
3096
+ .finished_states = array_new(),
3097
+ .capture_list_pool = capture_list_pool_new(),
3098
+ .start_byte = 0,
3099
+ .end_byte = UINT32_MAX,
3100
+ .start_point = {0, 0},
3101
+ .end_point = POINT_MAX,
3102
+ .max_start_depth = UINT32_MAX,
3103
+ .timeout_duration = 0,
3104
+ .end_clock = clock_null(),
3105
+ .operation_count = 0,
3106
+ };
3107
+ array_reserve(&self->states, 8);
3108
+ array_reserve(&self->finished_states, 8);
3109
+ return self;
3110
+ }
3111
+
3112
+ void ts_query_cursor_delete(TSQueryCursor *self) {
3113
+ array_delete(&self->states);
3114
+ array_delete(&self->finished_states);
3115
+ ts_tree_cursor_delete(&self->cursor);
3116
+ capture_list_pool_delete(&self->capture_list_pool);
3117
+ ts_free(self);
3118
+ }
3119
+
3120
+ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
3121
+ return self->did_exceed_match_limit;
3122
+ }
3123
+
3124
+ uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
3125
+ return self->capture_list_pool.max_capture_list_count;
3126
+ }
3127
+
3128
+ void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
3129
+ self->capture_list_pool.max_capture_list_count = limit;
3130
+ }
3131
+
3132
+ uint64_t ts_query_cursor_timeout_micros(const TSQueryCursor *self) {
3133
+ return duration_to_micros(self->timeout_duration);
3134
+ }
3135
+
3136
+ void ts_query_cursor_set_timeout_micros(TSQueryCursor *self, uint64_t timeout_micros) {
3137
+ self->timeout_duration = duration_from_micros(timeout_micros);
3138
+ }
3139
+
3140
+ #ifdef DEBUG_EXECUTE_QUERY
3141
+ #define LOG(...) fprintf(stderr, __VA_ARGS__)
3142
+ #else
3143
+ #define LOG(...)
3144
+ #endif
3145
+
3146
+ void ts_query_cursor_exec(
3147
+ TSQueryCursor *self,
3148
+ const TSQuery *query,
3149
+ TSNode node
3150
+ ) {
3151
+ if (query) {
3152
+ LOG("query steps:\n");
3153
+ for (unsigned i = 0; i < query->steps.size; i++) {
3154
+ QueryStep *step = &query->steps.contents[i];
3155
+ LOG(" %u: {", i);
3156
+ if (step->depth == PATTERN_DONE_MARKER) {
3157
+ LOG("DONE");
3158
+ } else if (step->is_dead_end) {
3159
+ LOG("dead_end");
3160
+ } else if (step->is_pass_through) {
3161
+ LOG("pass_through");
3162
+ } else if (step->symbol != WILDCARD_SYMBOL) {
3163
+ LOG("symbol: %s", query->language->symbol_names[step->symbol]);
3164
+ } else {
3165
+ LOG("symbol: *");
3166
+ }
3167
+ if (step->field) {
3168
+ LOG(", field: %s", query->language->field_names[step->field]);
3169
+ }
3170
+ if (step->alternative_index != NONE) {
3171
+ LOG(", alternative: %u", step->alternative_index);
3172
+ }
3173
+ LOG("},\n");
3174
+ }
3175
+ }
3176
+
3177
+ array_clear(&self->states);
3178
+ array_clear(&self->finished_states);
3179
+ ts_tree_cursor_reset(&self->cursor, node);
3180
+ capture_list_pool_reset(&self->capture_list_pool);
3181
+ self->on_visible_node = true;
3182
+ self->next_state_id = 0;
3183
+ self->depth = 0;
3184
+ self->ascending = false;
3185
+ self->halted = false;
3186
+ self->query = query;
3187
+ self->did_exceed_match_limit = false;
3188
+ self->operation_count = 0;
3189
+ if (self->timeout_duration) {
3190
+ self->end_clock = clock_after(clock_now(), self->timeout_duration);
3191
+ } else {
3192
+ self->end_clock = clock_null();
3193
+ }
3194
+ self->query_options = NULL;
3195
+ self->query_state = (TSQueryCursorState) {0};
3196
+ }
3197
+
3198
+ void ts_query_cursor_exec_with_options(
3199
+ TSQueryCursor *self,
3200
+ const TSQuery *query,
3201
+ TSNode node,
3202
+ const TSQueryCursorOptions *query_options
3203
+ ) {
3204
+ ts_query_cursor_exec(self, query, node);
3205
+ if (query_options) {
3206
+ self->query_options = query_options;
3207
+ self->query_state = (TSQueryCursorState) {
3208
+ .payload = query_options->payload
3209
+ };
3210
+ }
3211
+ }
3212
+
3213
+ bool ts_query_cursor_set_byte_range(
3214
+ TSQueryCursor *self,
3215
+ uint32_t start_byte,
3216
+ uint32_t end_byte
3217
+ ) {
3218
+ if (end_byte == 0) {
3219
+ end_byte = UINT32_MAX;
3220
+ }
3221
+ if (start_byte > end_byte) {
3222
+ return false;
3223
+ }
3224
+ self->start_byte = start_byte;
3225
+ self->end_byte = end_byte;
3226
+ return true;
3227
+ }
3228
+
3229
+ bool ts_query_cursor_set_point_range(
3230
+ TSQueryCursor *self,
3231
+ TSPoint start_point,
3232
+ TSPoint end_point
3233
+ ) {
3234
+ if (end_point.row == 0 && end_point.column == 0) {
3235
+ end_point = POINT_MAX;
3236
+ }
3237
+ if (point_gt(start_point, end_point)) {
3238
+ return false;
3239
+ }
3240
+ self->start_point = start_point;
3241
+ self->end_point = end_point;
3242
+ return true;
3243
+ }
3244
+
3245
+ // Search through all of the in-progress states, and find the captured
3246
+ // node that occurs earliest in the document.
3247
+ static bool ts_query_cursor__first_in_progress_capture(
3248
+ TSQueryCursor *self,
3249
+ uint32_t *state_index,
3250
+ uint32_t *byte_offset,
3251
+ uint32_t *pattern_index,
3252
+ bool *is_definite
3253
+ ) {
3254
+ bool result = false;
3255
+ *state_index = UINT32_MAX;
3256
+ *byte_offset = UINT32_MAX;
3257
+ *pattern_index = UINT32_MAX;
3258
+ for (unsigned i = 0; i < self->states.size; i++) {
3259
+ QueryState *state = &self->states.contents[i];
3260
+ if (state->dead) continue;
3261
+
3262
+ const CaptureList *captures = capture_list_pool_get(
3263
+ &self->capture_list_pool,
3264
+ state->capture_list_id
3265
+ );
3266
+ if (state->consumed_capture_count >= captures->size) {
3267
+ continue;
3268
+ }
3269
+
3270
+ TSNode node = captures->contents[state->consumed_capture_count].node;
3271
+ if (
3272
+ ts_node_end_byte(node) <= self->start_byte ||
3273
+ point_lte(ts_node_end_point(node), self->start_point)
3274
+ ) {
3275
+ state->consumed_capture_count++;
3276
+ i--;
3277
+ continue;
3278
+ }
3279
+
3280
+ uint32_t node_start_byte = ts_node_start_byte(node);
3281
+ if (
3282
+ !result ||
3283
+ node_start_byte < *byte_offset ||
3284
+ (node_start_byte == *byte_offset && state->pattern_index < *pattern_index)
3285
+ ) {
3286
+ QueryStep *step = &self->query->steps.contents[state->step_index];
3287
+ if (is_definite) {
3288
+ // We're being a bit conservative here by asserting that the following step
3289
+ // is not immediate, because this capture might end up being discarded if the
3290
+ // following symbol in the tree isn't the required symbol for this step.
3291
+ *is_definite = step->root_pattern_guaranteed && !step->is_immediate;
3292
+ } else if (step->root_pattern_guaranteed) {
3293
+ continue;
3294
+ }
3295
+
3296
+ result = true;
3297
+ *state_index = i;
3298
+ *byte_offset = node_start_byte;
3299
+ *pattern_index = state->pattern_index;
3300
+ }
3301
+ }
3302
+ return result;
3303
+ }
3304
+
3305
+ // Determine which node is first in a depth-first traversal
3306
+ int ts_query_cursor__compare_nodes(TSNode left, TSNode right) {
3307
+ if (left.id != right.id) {
3308
+ uint32_t left_start = ts_node_start_byte(left);
3309
+ uint32_t right_start = ts_node_start_byte(right);
3310
+ if (left_start < right_start) return -1;
3311
+ if (left_start > right_start) return 1;
3312
+ uint32_t left_node_count = ts_node_end_byte(left);
3313
+ uint32_t right_node_count = ts_node_end_byte(right);
3314
+ if (left_node_count > right_node_count) return -1;
3315
+ if (left_node_count < right_node_count) return 1;
3316
+ }
3317
+ return 0;
3318
+ }
3319
+
3320
+ // Determine if either state contains a superset of the other state's captures.
3321
+ void ts_query_cursor__compare_captures(
3322
+ TSQueryCursor *self,
3323
+ QueryState *left_state,
3324
+ QueryState *right_state,
3325
+ bool *left_contains_right,
3326
+ bool *right_contains_left
3327
+ ) {
3328
+ const CaptureList *left_captures = capture_list_pool_get(
3329
+ &self->capture_list_pool,
3330
+ left_state->capture_list_id
3331
+ );
3332
+ const CaptureList *right_captures = capture_list_pool_get(
3333
+ &self->capture_list_pool,
3334
+ right_state->capture_list_id
3335
+ );
3336
+ *left_contains_right = true;
3337
+ *right_contains_left = true;
3338
+ unsigned i = 0, j = 0;
3339
+ for (;;) {
3340
+ if (i < left_captures->size) {
3341
+ if (j < right_captures->size) {
3342
+ TSQueryCapture *left = &left_captures->contents[i];
3343
+ TSQueryCapture *right = &right_captures->contents[j];
3344
+ if (left->node.id == right->node.id && left->index == right->index) {
3345
+ i++;
3346
+ j++;
3347
+ } else {
3348
+ switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
3349
+ case -1:
3350
+ *right_contains_left = false;
3351
+ i++;
3352
+ break;
3353
+ case 1:
3354
+ *left_contains_right = false;
3355
+ j++;
3356
+ break;
3357
+ default:
3358
+ *right_contains_left = false;
3359
+ *left_contains_right = false;
3360
+ i++;
3361
+ j++;
3362
+ break;
3363
+ }
3364
+ }
3365
+ } else {
3366
+ *right_contains_left = false;
3367
+ break;
3368
+ }
3369
+ } else {
3370
+ if (j < right_captures->size) {
3371
+ *left_contains_right = false;
3372
+ }
3373
+ break;
3374
+ }
3375
+ }
3376
+ }
3377
+
3378
+ static void ts_query_cursor__add_state(
3379
+ TSQueryCursor *self,
3380
+ const PatternEntry *pattern
3381
+ ) {
3382
+ QueryStep *step = &self->query->steps.contents[pattern->step_index];
3383
+ uint32_t start_depth = self->depth - step->depth;
3384
+
3385
+ // Keep the states array in ascending order of start_depth and pattern_index,
3386
+ // so that it can be processed more efficiently elsewhere. Usually, there is
3387
+ // no work to do here because of two facts:
3388
+ // * States with lower start_depth are naturally added first due to the
3389
+ // order in which nodes are visited.
3390
+ // * Earlier patterns are naturally added first because of the ordering of the
3391
+ // pattern_map data structure that's used to initiate matches.
3392
+ //
3393
+ // This loop is only needed in cases where two conditions hold:
3394
+ // * A pattern consists of more than one sibling node, so that its states
3395
+ // remain in progress after exiting the node that started the match.
3396
+ // * The first node in the pattern matches against multiple nodes at the
3397
+ // same depth.
3398
+ //
3399
+ // An example of this is the pattern '((comment)* (function))'. If multiple
3400
+ // `comment` nodes appear in a row, then we may initiate a new state for this
3401
+ // pattern while another state for the same pattern is already in progress.
3402
+ // If there are multiple patterns like this in a query, then this loop will
3403
+ // need to execute in order to keep the states ordered by pattern_index.
3404
+ uint32_t index = self->states.size;
3405
+ while (index > 0) {
3406
+ QueryState *prev_state = &self->states.contents[index - 1];
3407
+ if (prev_state->start_depth < start_depth) break;
3408
+ if (prev_state->start_depth == start_depth) {
3409
+ // Avoid inserting an unnecessary duplicate state, which would be
3410
+ // immediately pruned by the longest-match criteria.
3411
+ if (
3412
+ prev_state->pattern_index == pattern->pattern_index &&
3413
+ prev_state->step_index == pattern->step_index
3414
+ ) return;
3415
+ if (prev_state->pattern_index <= pattern->pattern_index) break;
3416
+ }
3417
+ index--;
3418
+ }
3419
+
3420
+ LOG(
3421
+ " start state. pattern:%u, step:%u\n",
3422
+ pattern->pattern_index,
3423
+ pattern->step_index
3424
+ );
3425
+ array_insert(&self->states, index, ((QueryState) {
3426
+ .id = UINT32_MAX,
3427
+ .capture_list_id = NONE,
3428
+ .step_index = pattern->step_index,
3429
+ .pattern_index = pattern->pattern_index,
3430
+ .start_depth = start_depth,
3431
+ .consumed_capture_count = 0,
3432
+ .seeking_immediate_match = true,
3433
+ .has_in_progress_alternatives = false,
3434
+ .needs_parent = step->depth == 1,
3435
+ .dead = false,
3436
+ }));
3437
+ }
3438
+
3439
+ // Acquire a capture list for this state. If there are no capture lists left in the
3440
+ // pool, this will steal the capture list from another existing state, and mark that
3441
+ // other state as 'dead'.
3442
+ static CaptureList *ts_query_cursor__prepare_to_capture(
3443
+ TSQueryCursor *self,
3444
+ QueryState *state,
3445
+ unsigned state_index_to_preserve
3446
+ ) {
3447
+ if (state->capture_list_id == NONE) {
3448
+ state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
3449
+
3450
+ // If there are no capture lists left in the pool, then terminate whichever
3451
+ // state has captured the earliest node in the document, and steal its
3452
+ // capture list.
3453
+ if (state->capture_list_id == NONE) {
3454
+ self->did_exceed_match_limit = true;
3455
+ uint32_t state_index, byte_offset, pattern_index;
3456
+ if (
3457
+ ts_query_cursor__first_in_progress_capture(
3458
+ self,
3459
+ &state_index,
3460
+ &byte_offset,
3461
+ &pattern_index,
3462
+ NULL
3463
+ ) &&
3464
+ state_index != state_index_to_preserve
3465
+ ) {
3466
+ LOG(
3467
+ " abandon state. index:%u, pattern:%u, offset:%u.\n",
3468
+ state_index, pattern_index, byte_offset
3469
+ );
3470
+ QueryState *other_state = &self->states.contents[state_index];
3471
+ state->capture_list_id = other_state->capture_list_id;
3472
+ other_state->capture_list_id = NONE;
3473
+ other_state->dead = true;
3474
+ CaptureList *list = capture_list_pool_get_mut(
3475
+ &self->capture_list_pool,
3476
+ state->capture_list_id
3477
+ );
3478
+ array_clear(list);
3479
+ return list;
3480
+ } else {
3481
+ LOG(" ran out of capture lists");
3482
+ return NULL;
3483
+ }
3484
+ }
3485
+ }
3486
+ return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
3487
+ }
3488
+
3489
+ static void ts_query_cursor__capture(
3490
+ TSQueryCursor *self,
3491
+ QueryState *state,
3492
+ QueryStep *step,
3493
+ TSNode node
3494
+ ) {
3495
+ if (state->dead) return;
3496
+ CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
3497
+ if (!capture_list) {
3498
+ state->dead = true;
3499
+ return;
3500
+ }
3501
+
3502
+ for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
3503
+ uint16_t capture_id = step->capture_ids[j];
3504
+ if (step->capture_ids[j] == NONE) break;
3505
+ array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
3506
+ LOG(
3507
+ " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n",
3508
+ ts_node_type(node),
3509
+ state->pattern_index,
3510
+ capture_id,
3511
+ capture_list->size
3512
+ );
3513
+ }
3514
+ }
3515
+
3516
+ // Duplicate the given state and insert the newly-created state immediately after
3517
+ // the given state in the `states` array. Ensures that the given state reference is
3518
+ // still valid, even if the states array is reallocated.
3519
+ static QueryState *ts_query_cursor__copy_state(
3520
+ TSQueryCursor *self,
3521
+ QueryState **state_ref
3522
+ ) {
3523
+ const QueryState *state = *state_ref;
3524
+ uint32_t state_index = (uint32_t)(state - self->states.contents);
3525
+ QueryState copy = *state;
3526
+ copy.capture_list_id = NONE;
3527
+
3528
+ // If the state has captures, copy its capture list.
3529
+ if (state->capture_list_id != NONE) {
3530
+ CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, &copy, state_index);
3531
+ if (!new_captures) return NULL;
3532
+ const CaptureList *old_captures = capture_list_pool_get(
3533
+ &self->capture_list_pool,
3534
+ state->capture_list_id
3535
+ );
3536
+ array_push_all(new_captures, old_captures);
3537
+ }
3538
+
3539
+ array_insert(&self->states, state_index + 1, copy);
3540
+ *state_ref = &self->states.contents[state_index];
3541
+ return &self->states.contents[state_index + 1];
3542
+ }
3543
+
3544
+ static inline bool ts_query_cursor__should_descend(
3545
+ TSQueryCursor *self,
3546
+ bool node_intersects_range
3547
+ ) {
3548
+
3549
+ if (node_intersects_range && self->depth < self->max_start_depth) {
3550
+ return true;
3551
+ }
3552
+
3553
+ // If there are in-progress matches whose remaining steps occur
3554
+ // deeper in the tree, then descend.
3555
+ for (unsigned i = 0; i < self->states.size; i++) {
3556
+ QueryState *state = &self->states.contents[i];;
3557
+ QueryStep *next_step = &self->query->steps.contents[state->step_index];
3558
+ if (
3559
+ next_step->depth != PATTERN_DONE_MARKER &&
3560
+ state->start_depth + next_step->depth > self->depth
3561
+ ) {
3562
+ return true;
3563
+ }
3564
+ }
3565
+
3566
+ if (self->depth >= self->max_start_depth) {
3567
+ return false;
3568
+ }
3569
+
3570
+ // If the current node is hidden, then a non-rooted pattern might match
3571
+ // one if its roots inside of this node, and match another of its roots
3572
+ // as part of a sibling node, so we may need to descend.
3573
+ if (!self->on_visible_node) {
3574
+ // Descending into a repetition node outside of the range can be
3575
+ // expensive, because these nodes can have many visible children.
3576
+ // Avoid descending into repetition nodes unless we have already
3577
+ // determined that this query can match rootless patterns inside
3578
+ // of this type of repetition node.
3579
+ Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor);
3580
+ if (ts_subtree_is_repetition(subtree)) {
3581
+ bool exists;
3582
+ uint32_t index;
3583
+ array_search_sorted_by(
3584
+ &self->query->repeat_symbols_with_rootless_patterns,,
3585
+ ts_subtree_symbol(subtree),
3586
+ &index,
3587
+ &exists
3588
+ );
3589
+ return exists;
3590
+ }
3591
+
3592
+ return true;
3593
+ }
3594
+
3595
+ return false;
3596
+ }
3597
+
3598
+ // Walk the tree, processing patterns until at least one pattern finishes,
3599
+ // If one or more patterns finish, return `true` and store their states in the
3600
+ // `finished_states` array. Multiple patterns can finish on the same node. If
3601
+ // there are no more matches, return `false`.
3602
+ static inline bool ts_query_cursor__advance(
3603
+ TSQueryCursor *self,
3604
+ bool stop_on_definite_step
3605
+ ) {
3606
+ bool did_match = false;
3607
+ for (;;) {
3608
+ if (self->halted) {
3609
+ while (self->states.size > 0) {
3610
+ QueryState state = array_pop(&self->states);
3611
+ capture_list_pool_release(
3612
+ &self->capture_list_pool,
3613
+ state.capture_list_id
3614
+ );
3615
+ }
3616
+ }
3617
+
3618
+ if (++self->operation_count == OP_COUNT_PER_QUERY_TIMEOUT_CHECK) {
3619
+ self->operation_count = 0;
3620
+ }
3621
+
3622
+ if (self->query_options && self->query_options->progress_callback) {
3623
+ self->query_state.current_byte_offset = ts_node_start_byte(ts_tree_cursor_current_node(&self->cursor));
3624
+ }
3625
+ if (
3626
+ did_match ||
3627
+ self->halted ||
3628
+ (
3629
+ self->operation_count == 0 &&
3630
+ (
3631
+ (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)) ||
3632
+ (self->query_options && self->query_options->progress_callback && self->query_options->progress_callback(&self->query_state))
3633
+ )
3634
+ )
3635
+ ) {
3636
+ return did_match;
3637
+ }
3638
+
3639
+ // Exit the current node.
3640
+ if (self->ascending) {
3641
+ if (self->on_visible_node) {
3642
+ LOG(
3643
+ "leave node. depth:%u, type:%s\n",
3644
+ self->depth,
3645
+ ts_node_type(ts_tree_cursor_current_node(&self->cursor))
3646
+ );
3647
+
3648
+ // After leaving a node, remove any states that cannot make further progress.
3649
+ uint32_t deleted_count = 0;
3650
+ for (unsigned i = 0, n = self->states.size; i < n; i++) {
3651
+ QueryState *state = &self->states.contents[i];
3652
+ QueryStep *step = &self->query->steps.contents[state->step_index];
3653
+
3654
+ // If a state completed its pattern inside of this node, but was deferred from finishing
3655
+ // in order to search for longer matches, mark it as finished.
3656
+ if (
3657
+ step->depth == PATTERN_DONE_MARKER &&
3658
+ (state->start_depth > self->depth || self->depth == 0)
3659
+ ) {
3660
+ LOG(" finish pattern %u\n", state->pattern_index);
3661
+ array_push(&self->finished_states, *state);
3662
+ did_match = true;
3663
+ deleted_count++;
3664
+ }
3665
+
3666
+ // If a state needed to match something within this node, then remove that state
3667
+ // as it has failed to match.
3668
+ else if (
3669
+ step->depth != PATTERN_DONE_MARKER &&
3670
+ (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth
3671
+ ) {
3672
+ LOG(
3673
+ " failed to match. pattern:%u, step:%u\n",
3674
+ state->pattern_index,
3675
+ state->step_index
3676
+ );
3677
+ capture_list_pool_release(
3678
+ &self->capture_list_pool,
3679
+ state->capture_list_id
3680
+ );
3681
+ deleted_count++;
3682
+ }
3683
+
3684
+ else if (deleted_count > 0) {
3685
+ self->states.contents[i - deleted_count] = *state;
3686
+ }
3687
+ }
3688
+ self->states.size -= deleted_count;
3689
+ }
3690
+
3691
+ // Leave this node by stepping to its next sibling or to its parent.
3692
+ switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) {
3693
+ case TreeCursorStepVisible:
3694
+ if (!self->on_visible_node) {
3695
+ self->depth++;
3696
+ self->on_visible_node = true;
3697
+ }
3698
+ self->ascending = false;
3699
+ break;
3700
+ case TreeCursorStepHidden:
3701
+ if (self->on_visible_node) {
3702
+ self->depth--;
3703
+ self->on_visible_node = false;
3704
+ }
3705
+ self->ascending = false;
3706
+ break;
3707
+ default:
3708
+ if (ts_tree_cursor_goto_parent(&self->cursor)) {
3709
+ self->depth--;
3710
+ } else {
3711
+ LOG("halt at root\n");
3712
+ self->halted = true;
3713
+ }
3714
+ }
3715
+ }
3716
+
3717
+ // Enter a new node.
3718
+ else {
3719
+ // Get the properties of the current node.
3720
+ TSNode node = ts_tree_cursor_current_node(&self->cursor);
3721
+ TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor);
3722
+
3723
+ uint32_t start_byte = ts_node_start_byte(node);
3724
+ uint32_t end_byte = ts_node_end_byte(node);
3725
+ TSPoint start_point = ts_node_start_point(node);
3726
+ TSPoint end_point = ts_node_end_point(node);
3727
+ bool is_empty = start_byte == end_byte;
3728
+
3729
+ bool parent_precedes_range = !ts_node_is_null(parent_node) && (
3730
+ ts_node_end_byte(parent_node) <= self->start_byte ||
3731
+ point_lte(ts_node_end_point(parent_node), self->start_point)
3732
+ );
3733
+ bool parent_follows_range = !ts_node_is_null(parent_node) && (
3734
+ ts_node_start_byte(parent_node) >= self->end_byte ||
3735
+ point_gte(ts_node_start_point(parent_node), self->end_point)
3736
+ );
3737
+ bool node_precedes_range =
3738
+ parent_precedes_range ||
3739
+ end_byte < self->start_byte ||
3740
+ point_lt(end_point, self->start_point) ||
3741
+ (!is_empty && end_byte == self->start_byte) ||
3742
+ (!is_empty && point_eq(end_point, self->start_point));
3743
+
3744
+ bool node_follows_range = parent_follows_range || (
3745
+ start_byte >= self->end_byte ||
3746
+ point_gte(start_point, self->end_point)
3747
+ );
3748
+ bool parent_intersects_range = !parent_precedes_range && !parent_follows_range;
3749
+ bool node_intersects_range = !node_precedes_range && !node_follows_range;
3750
+
3751
+ if (self->on_visible_node) {
3752
+ TSSymbol symbol = ts_node_symbol(node);
3753
+ bool is_named = ts_node_is_named(node);
3754
+ bool is_missing = ts_node_is_missing(node);
3755
+ bool has_later_siblings;
3756
+ bool has_later_named_siblings;
3757
+ bool can_have_later_siblings_with_this_field;
3758
+ TSFieldId field_id = 0;
3759
+ TSSymbol supertypes[8] = {0};
3760
+ unsigned supertype_count = 8;
3761
+ ts_tree_cursor_current_status(
3762
+ &self->cursor,
3763
+ &field_id,
3764
+ &has_later_siblings,
3765
+ &has_later_named_siblings,
3766
+ &can_have_later_siblings_with_this_field,
3767
+ supertypes,
3768
+ &supertype_count
3769
+ );
3770
+ LOG(
3771
+ "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
3772
+ self->depth,
3773
+ ts_node_type(node),
3774
+ ts_language_field_name_for_id(self->query->language, field_id),
3775
+ ts_node_start_point(node).row,
3776
+ self->states.size,
3777
+ self->finished_states.size
3778
+ );
3779
+
3780
+ bool node_is_error = symbol == ts_builtin_sym_error;
3781
+ bool parent_is_error =
3782
+ !ts_node_is_null(parent_node) &&
3783
+ ts_node_symbol(parent_node) == ts_builtin_sym_error;
3784
+
3785
+ // Add new states for any patterns whose root node is a wildcard.
3786
+ if (!node_is_error) {
3787
+ for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
3788
+ PatternEntry *pattern = &self->query->pattern_map.contents[i];
3789
+
3790
+ // If this node matches the first step of the pattern, then add a new
3791
+ // state at the start of this pattern.
3792
+ QueryStep *step = &self->query->steps.contents[pattern->step_index];
3793
+ uint32_t start_depth = self->depth - step->depth;
3794
+ if (
3795
+ (pattern->is_rooted ?
3796
+ node_intersects_range :
3797
+ (parent_intersects_range && !parent_is_error)) &&
3798
+ (!step->field || field_id == step->field) &&
3799
+ (!step->supertype_symbol || supertype_count > 0) &&
3800
+ (start_depth <= self->max_start_depth)
3801
+ ) {
3802
+ ts_query_cursor__add_state(self, pattern);
3803
+ }
3804
+ }
3805
+ }
3806
+
3807
+ // Add new states for any patterns whose root node matches this node.
3808
+ unsigned i;
3809
+ if (ts_query__pattern_map_search(self->query, symbol, &i)) {
3810
+ PatternEntry *pattern = &self->query->pattern_map.contents[i];
3811
+
3812
+ QueryStep *step = &self->query->steps.contents[pattern->step_index];
3813
+ uint32_t start_depth = self->depth - step->depth;
3814
+ do {
3815
+ // If this node matches the first step of the pattern, then add a new
3816
+ // state at the start of this pattern.
3817
+ if (
3818
+ (pattern->is_rooted ?
3819
+ node_intersects_range :
3820
+ (parent_intersects_range && !parent_is_error)) &&
3821
+ (!step->field || field_id == step->field) &&
3822
+ (start_depth <= self->max_start_depth)
3823
+ ) {
3824
+ ts_query_cursor__add_state(self, pattern);
3825
+ }
3826
+
3827
+ // Advance to the next pattern whose root node matches this node.
3828
+ i++;
3829
+ if (i == self->query->pattern_map.size) break;
3830
+ pattern = &self->query->pattern_map.contents[i];
3831
+ step = &self->query->steps.contents[pattern->step_index];
3832
+ } while (step->symbol == symbol);
3833
+ }
3834
+
3835
+ // Update all of the in-progress states with current node.
3836
+ for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) {
3837
+ QueryState *state = &self->states.contents[j];
3838
+ QueryStep *step = &self->query->steps.contents[state->step_index];
3839
+ state->has_in_progress_alternatives = false;
3840
+ copy_count = 0;
3841
+
3842
+ // Check that the node matches all of the criteria for the next
3843
+ // step of the pattern.
3844
+ if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
3845
+
3846
+ // Determine if this node matches this step of the pattern, and also
3847
+ // if this node can have later siblings that match this step of the
3848
+ // pattern.
3849
+ bool node_does_match = false;
3850
+ if (step->symbol == WILDCARD_SYMBOL) {
3851
+ if (step->is_missing) {
3852
+ node_does_match = is_missing;
3853
+ } else {
3854
+ node_does_match = !node_is_error && (is_named || !step->is_named);
3855
+ }
3856
+ } else {
3857
+ node_does_match = symbol == step->symbol && (!step->is_missing || is_missing);
3858
+ }
3859
+ bool later_sibling_can_match = has_later_siblings;
3860
+ if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
3861
+ later_sibling_can_match = false;
3862
+ }
3863
+ if (step->is_last_child && has_later_named_siblings) {
3864
+ node_does_match = false;
3865
+ }
3866
+ if (step->supertype_symbol) {
3867
+ bool has_supertype = false;
3868
+ for (unsigned k = 0; k < supertype_count; k++) {
3869
+ if (supertypes[k] == step->supertype_symbol) {
3870
+ has_supertype = true;
3871
+ break;
3872
+ }
3873
+ }
3874
+ if (!has_supertype) node_does_match = false;
3875
+ }
3876
+ if (step->field) {
3877
+ if (step->field == field_id) {
3878
+ if (!can_have_later_siblings_with_this_field) {
3879
+ later_sibling_can_match = false;
3880
+ }
3881
+ } else {
3882
+ node_does_match = false;
3883
+ }
3884
+ }
3885
+
3886
+ if (step->negated_field_list_id) {
3887
+ TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id];
3888
+ for (;;) {
3889
+ TSFieldId negated_field_id = *negated_field_ids;
3890
+ if (negated_field_id) {
3891
+ negated_field_ids++;
3892
+ if (ts_node_child_by_field_id(node, negated_field_id).id) {
3893
+ node_does_match = false;
3894
+ break;
3895
+ }
3896
+ } else {
3897
+ break;
3898
+ }
3899
+ }
3900
+ }
3901
+
3902
+ // Remove states immediately if it is ever clear that they cannot match.
3903
+ if (!node_does_match) {
3904
+ if (!later_sibling_can_match) {
3905
+ LOG(
3906
+ " discard state. pattern:%u, step:%u\n",
3907
+ state->pattern_index,
3908
+ state->step_index
3909
+ );
3910
+ capture_list_pool_release(
3911
+ &self->capture_list_pool,
3912
+ state->capture_list_id
3913
+ );
3914
+ array_erase(&self->states, j);
3915
+ j--;
3916
+ }
3917
+ continue;
3918
+ }
3919
+
3920
+ // Some patterns can match their root node in multiple ways, capturing different
3921
+ // children. If this pattern step could match later children within the same
3922
+ // parent, then this query state cannot simply be updated in place. It must be
3923
+ // split into two states: one that matches this node, and one which skips over
3924
+ // this node, to preserve the possibility of matching later siblings.
3925
+ if (later_sibling_can_match && (
3926
+ step->contains_captures ||
3927
+ ts_query__step_is_fallible(self->query, state->step_index)
3928
+ )) {
3929
+ if (ts_query_cursor__copy_state(self, &state)) {
3930
+ LOG(
3931
+ " split state for capture. pattern:%u, step:%u\n",
3932
+ state->pattern_index,
3933
+ state->step_index
3934
+ );
3935
+ copy_count++;
3936
+ }
3937
+ }
3938
+
3939
+ // If this pattern started with a wildcard, such that the pattern map
3940
+ // actually points to the *second* step of the pattern, then check
3941
+ // that the node has a parent, and capture the parent node if necessary.
3942
+ if (state->needs_parent) {
3943
+ TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
3944
+ if (ts_node_is_null(parent)) {
3945
+ LOG(" missing parent node\n");
3946
+ state->dead = true;
3947
+ } else {
3948
+ state->needs_parent = false;
3949
+ QueryStep *skipped_wildcard_step = step;
3950
+ do {
3951
+ skipped_wildcard_step--;
3952
+ } while (
3953
+ skipped_wildcard_step->is_dead_end ||
3954
+ skipped_wildcard_step->is_pass_through ||
3955
+ skipped_wildcard_step->depth > 0
3956
+ );
3957
+ if (skipped_wildcard_step->capture_ids[0] != NONE) {
3958
+ LOG(" capture wildcard parent\n");
3959
+ ts_query_cursor__capture(
3960
+ self,
3961
+ state,
3962
+ skipped_wildcard_step,
3963
+ parent
3964
+ );
3965
+ }
3966
+ }
3967
+ }
3968
+
3969
+ // If the current node is captured in this pattern, add it to the capture list.
3970
+ if (step->capture_ids[0] != NONE) {
3971
+ ts_query_cursor__capture(self, state, step, node);
3972
+ }
3973
+
3974
+ if (state->dead) {
3975
+ array_erase(&self->states, j);
3976
+ j--;
3977
+ continue;
3978
+ }
3979
+
3980
+ // Advance this state to the next step of its pattern.
3981
+ state->step_index++;
3982
+ LOG(
3983
+ " advance state. pattern:%u, step:%u\n",
3984
+ state->pattern_index,
3985
+ state->step_index
3986
+ );
3987
+
3988
+ QueryStep *next_step = &self->query->steps.contents[state->step_index];
3989
+
3990
+ // For a given step, if the current symbol is the wildcard symbol, `_`, and it is **not**
3991
+ // named, meaning it should capture anonymous nodes, **and** the next step is immediate,
3992
+ // we reuse the `seeking_immediate_match` flag to indicate that we are looking for an
3993
+ // immediate match due to an unnamed wildcard symbol.
3994
+ //
3995
+ // The reason for this is that typically, anchors will not consider anonymous nodes,
3996
+ // but we're special casing the wildcard symbol to allow for any immediate matches,
3997
+ // regardless of whether they are named or not.
3998
+ if (step->symbol == WILDCARD_SYMBOL && !step->is_named && next_step->is_immediate) {
3999
+ state->seeking_immediate_match = true;
4000
+ } else {
4001
+ state->seeking_immediate_match = false;
4002
+ }
4003
+
4004
+ if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true;
4005
+
4006
+ // If this state's next step has an alternative step, then copy the state in order
4007
+ // to pursue both alternatives. The alternative step itself may have an alternative,
4008
+ // so this is an interactive process.
4009
+ unsigned end_index = j + 1;
4010
+ for (unsigned k = j; k < end_index; k++) {
4011
+ QueryState *child_state = &self->states.contents[k];
4012
+ QueryStep *child_step = &self->query->steps.contents[child_state->step_index];
4013
+ if (child_step->alternative_index != NONE) {
4014
+ // A "dead-end" step exists only to add a non-sequential jump into the step sequence,
4015
+ // via its alternative index. When a state reaches a dead-end step, it jumps straight
4016
+ // to the step's alternative.
4017
+ if (child_step->is_dead_end) {
4018
+ child_state->step_index = child_step->alternative_index;
4019
+ k--;
4020
+ continue;
4021
+ }
4022
+
4023
+ // A "pass-through" step exists only to add a branch into the step sequence,
4024
+ // via its alternative_index. When a state reaches a pass-through step, it splits
4025
+ // in order to process the alternative step, and then it advances to the next step.
4026
+ if (child_step->is_pass_through) {
4027
+ child_state->step_index++;
4028
+ k--;
4029
+ }
4030
+
4031
+ QueryState *copy = ts_query_cursor__copy_state(self, &child_state);
4032
+ if (copy) {
4033
+ LOG(
4034
+ " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
4035
+ copy->pattern_index,
4036
+ copy->step_index,
4037
+ next_step->alternative_index,
4038
+ next_step->alternative_is_immediate,
4039
+ capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
4040
+ );
4041
+ end_index++;
4042
+ copy_count++;
4043
+ copy->step_index = child_step->alternative_index;
4044
+ if (child_step->alternative_is_immediate) {
4045
+ copy->seeking_immediate_match = true;
4046
+ }
4047
+ }
4048
+ }
4049
+ }
4050
+ }
4051
+
4052
+ for (unsigned j = 0; j < self->states.size; j++) {
4053
+ QueryState *state = &self->states.contents[j];
4054
+ if (state->dead) {
4055
+ array_erase(&self->states, j);
4056
+ j--;
4057
+ continue;
4058
+ }
4059
+
4060
+ // Enforce the longest-match criteria. When a query pattern contains optional or
4061
+ // repeated nodes, this is necessary to avoid multiple redundant states, where
4062
+ // one state has a strict subset of another state's captures.
4063
+ bool did_remove = false;
4064
+ for (unsigned k = j + 1; k < self->states.size; k++) {
4065
+ QueryState *other_state = &self->states.contents[k];
4066
+
4067
+ // Query states are kept in ascending order of start_depth and pattern_index.
4068
+ // Since the longest-match criteria is only used for deduping matches of the same
4069
+ // pattern and root node, we only need to perform pairwise comparisons within a
4070
+ // small slice of the states array.
4071
+ if (
4072
+ other_state->start_depth != state->start_depth ||
4073
+ other_state->pattern_index != state->pattern_index
4074
+ ) break;
4075
+
4076
+ bool left_contains_right, right_contains_left;
4077
+ ts_query_cursor__compare_captures(
4078
+ self,
4079
+ state,
4080
+ other_state,
4081
+ &left_contains_right,
4082
+ &right_contains_left
4083
+ );
4084
+ if (left_contains_right) {
4085
+ if (state->step_index == other_state->step_index) {
4086
+ LOG(
4087
+ " drop shorter state. pattern: %u, step_index: %u\n",
4088
+ state->pattern_index,
4089
+ state->step_index
4090
+ );
4091
+ capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
4092
+ array_erase(&self->states, k);
4093
+ k--;
4094
+ continue;
4095
+ }
4096
+ other_state->has_in_progress_alternatives = true;
4097
+ }
4098
+ if (right_contains_left) {
4099
+ if (state->step_index == other_state->step_index) {
4100
+ LOG(
4101
+ " drop shorter state. pattern: %u, step_index: %u\n",
4102
+ state->pattern_index,
4103
+ state->step_index
4104
+ );
4105
+ capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
4106
+ array_erase(&self->states, j);
4107
+ j--;
4108
+ did_remove = true;
4109
+ break;
4110
+ }
4111
+ state->has_in_progress_alternatives = true;
4112
+ }
4113
+ }
4114
+
4115
+ // If the state is at the end of its pattern, remove it from the list
4116
+ // of in-progress states and add it to the list of finished states.
4117
+ if (!did_remove) {
4118
+ LOG(
4119
+ " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
4120
+ state->pattern_index,
4121
+ state->start_depth,
4122
+ state->step_index,
4123
+ capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
4124
+ );
4125
+ QueryStep *next_step = &self->query->steps.contents[state->step_index];
4126
+ if (next_step->depth == PATTERN_DONE_MARKER) {
4127
+ if (state->has_in_progress_alternatives) {
4128
+ LOG(" defer finishing pattern %u\n", state->pattern_index);
4129
+ } else {
4130
+ LOG(" finish pattern %u\n", state->pattern_index);
4131
+ array_push(&self->finished_states, *state);
4132
+ array_erase(&self->states, (uint32_t)(state - self->states.contents));
4133
+ did_match = true;
4134
+ j--;
4135
+ }
4136
+ }
4137
+ }
4138
+ }
4139
+ }
4140
+
4141
+ if (ts_query_cursor__should_descend(self, node_intersects_range)) {
4142
+ switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) {
4143
+ case TreeCursorStepVisible:
4144
+ self->depth++;
4145
+ self->on_visible_node = true;
4146
+ continue;
4147
+ case TreeCursorStepHidden:
4148
+ self->on_visible_node = false;
4149
+ continue;
4150
+ default:
4151
+ break;
4152
+ }
4153
+ }
4154
+
4155
+ self->ascending = true;
4156
+ }
4157
+ }
4158
+ }
4159
+
4160
+ bool ts_query_cursor_next_match(
4161
+ TSQueryCursor *self,
4162
+ TSQueryMatch *match
4163
+ ) {
4164
+ if (self->finished_states.size == 0) {
4165
+ if (!ts_query_cursor__advance(self, false)) {
4166
+ return false;
4167
+ }
4168
+ }
4169
+
4170
+ QueryState *state = &self->finished_states.contents[0];
4171
+ if (state->id == UINT32_MAX) state->id = self->next_state_id++;
4172
+ match->id = state->id;
4173
+ match->pattern_index = state->pattern_index;
4174
+ const CaptureList *captures = capture_list_pool_get(
4175
+ &self->capture_list_pool,
4176
+ state->capture_list_id
4177
+ );
4178
+ match->captures = captures->contents;
4179
+ match->capture_count = captures->size;
4180
+ capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
4181
+ array_erase(&self->finished_states, 0);
4182
+ return true;
4183
+ }
4184
+
4185
+ void ts_query_cursor_remove_match(
4186
+ TSQueryCursor *self,
4187
+ uint32_t match_id
4188
+ ) {
4189
+ for (unsigned i = 0; i < self->finished_states.size; i++) {
4190
+ const QueryState *state = &self->finished_states.contents[i];
4191
+ if (state->id == match_id) {
4192
+ capture_list_pool_release(
4193
+ &self->capture_list_pool,
4194
+ state->capture_list_id
4195
+ );
4196
+ array_erase(&self->finished_states, i);
4197
+ return;
4198
+ }
4199
+ }
4200
+
4201
+ // Remove unfinished query states as well to prevent future
4202
+ // captures for a match being removed.
4203
+ for (unsigned i = 0; i < self->states.size; i++) {
4204
+ const QueryState *state = &self->states.contents[i];
4205
+ if (state->id == match_id) {
4206
+ capture_list_pool_release(
4207
+ &self->capture_list_pool,
4208
+ state->capture_list_id
4209
+ );
4210
+ array_erase(&self->states, i);
4211
+ return;
4212
+ }
4213
+ }
4214
+ }
4215
+
4216
+ bool ts_query_cursor_next_capture(
4217
+ TSQueryCursor *self,
4218
+ TSQueryMatch *match,
4219
+ uint32_t *capture_index
4220
+ ) {
4221
+ // The goal here is to return captures in order, even though they may not
4222
+ // be discovered in order, because patterns can overlap. Search for matches
4223
+ // until there is a finished capture that is before any unfinished capture.
4224
+ for (;;) {
4225
+ // First, find the earliest capture in an unfinished match.
4226
+ uint32_t first_unfinished_capture_byte;
4227
+ uint32_t first_unfinished_pattern_index;
4228
+ uint32_t first_unfinished_state_index;
4229
+ bool first_unfinished_state_is_definite = false;
4230
+ bool found_unfinished_state = ts_query_cursor__first_in_progress_capture(
4231
+ self,
4232
+ &first_unfinished_state_index,
4233
+ &first_unfinished_capture_byte,
4234
+ &first_unfinished_pattern_index,
4235
+ &first_unfinished_state_is_definite
4236
+ );
4237
+
4238
+ // Then find the earliest capture in a finished match. It must occur
4239
+ // before the first capture in an *unfinished* match.
4240
+ QueryState *first_finished_state = NULL;
4241
+ uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
4242
+ uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
4243
+ for (unsigned i = 0; i < self->finished_states.size;) {
4244
+ QueryState *state = &self->finished_states.contents[i];
4245
+ const CaptureList *captures = capture_list_pool_get(
4246
+ &self->capture_list_pool,
4247
+ state->capture_list_id
4248
+ );
4249
+
4250
+ // Remove states whose captures are all consumed.
4251
+ if (state->consumed_capture_count >= captures->size) {
4252
+ capture_list_pool_release(
4253
+ &self->capture_list_pool,
4254
+ state->capture_list_id
4255
+ );
4256
+ array_erase(&self->finished_states, i);
4257
+ continue;
4258
+ }
4259
+
4260
+ TSNode node = captures->contents[state->consumed_capture_count].node;
4261
+
4262
+ bool node_precedes_range = (
4263
+ ts_node_end_byte(node) <= self->start_byte ||
4264
+ point_lte(ts_node_end_point(node), self->start_point)
4265
+ );
4266
+ bool node_follows_range = (
4267
+ ts_node_start_byte(node) >= self->end_byte ||
4268
+ point_gte(ts_node_start_point(node), self->end_point)
4269
+ );
4270
+ bool node_outside_of_range = node_precedes_range || node_follows_range;
4271
+
4272
+ // Skip captures that are outside of the cursor's range.
4273
+ if (node_outside_of_range) {
4274
+ state->consumed_capture_count++;
4275
+ continue;
4276
+ }
4277
+
4278
+ uint32_t node_start_byte = ts_node_start_byte(node);
4279
+ if (
4280
+ node_start_byte < first_finished_capture_byte ||
4281
+ (
4282
+ node_start_byte == first_finished_capture_byte &&
4283
+ state->pattern_index < first_finished_pattern_index
4284
+ )
4285
+ ) {
4286
+ first_finished_state = state;
4287
+ first_finished_capture_byte = node_start_byte;
4288
+ first_finished_pattern_index = state->pattern_index;
4289
+ }
4290
+ i++;
4291
+ }
4292
+
4293
+ // If there is finished capture that is clearly before any unfinished
4294
+ // capture, then return its match, and its capture index. Internally
4295
+ // record the fact that the capture has been 'consumed'.
4296
+ QueryState *state;
4297
+ if (first_finished_state) {
4298
+ state = first_finished_state;
4299
+ } else if (first_unfinished_state_is_definite) {
4300
+ state = &self->states.contents[first_unfinished_state_index];
4301
+ } else {
4302
+ state = NULL;
4303
+ }
4304
+
4305
+ if (state) {
4306
+ if (state->id == UINT32_MAX) state->id = self->next_state_id++;
4307
+ match->id = state->id;
4308
+ match->pattern_index = state->pattern_index;
4309
+ const CaptureList *captures = capture_list_pool_get(
4310
+ &self->capture_list_pool,
4311
+ state->capture_list_id
4312
+ );
4313
+ match->captures = captures->contents;
4314
+ match->capture_count = captures->size;
4315
+ *capture_index = state->consumed_capture_count;
4316
+ state->consumed_capture_count++;
4317
+ return true;
4318
+ }
4319
+
4320
+ if (capture_list_pool_is_empty(&self->capture_list_pool) && found_unfinished_state) {
4321
+ LOG(
4322
+ " abandon state. index:%u, pattern:%u, offset:%u.\n",
4323
+ first_unfinished_state_index,
4324
+ first_unfinished_pattern_index,
4325
+ first_unfinished_capture_byte
4326
+ );
4327
+ capture_list_pool_release(
4328
+ &self->capture_list_pool,
4329
+ self->states.contents[first_unfinished_state_index].capture_list_id
4330
+ );
4331
+ array_erase(&self->states, first_unfinished_state_index);
4332
+ }
4333
+
4334
+ // If there are no finished matches that are ready to be returned, then
4335
+ // continue finding more matches.
4336
+ if (
4337
+ !ts_query_cursor__advance(self, true) &&
4338
+ self->finished_states.size == 0
4339
+ ) return false;
4340
+ }
4341
+ }
4342
+
4343
+ void ts_query_cursor_set_max_start_depth(
4344
+ TSQueryCursor *self,
4345
+ uint32_t max_start_depth
4346
+ ) {
4347
+ self->max_start_depth = max_start_depth;
4348
+ }
4349
+
4350
+ #undef LOG