tree_haver 3.2.0 → 3.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +99 -1
- data/README.md +1 -1
- data/lib/tree_haver/backends/ffi.rb +8 -6
- data/lib/tree_haver/backends/java.rb +5 -3
- data/lib/tree_haver/backends/mri.rb +23 -20
- data/lib/tree_haver/backends/rust.rb +3 -4
- data/lib/tree_haver/grammar_finder.rb +4 -1
- data/lib/tree_haver/language.rb +255 -0
- data/lib/tree_haver/library_path_utils.rb +80 -0
- data/lib/tree_haver/parser.rb +352 -0
- data/lib/tree_haver/rspec/dependency_tags.rb +288 -47
- data/lib/tree_haver/version.rb +1 -1
- data/lib/tree_haver.rb +14 -553
- data.tar.gz.sig +2 -1
- metadata +7 -4
- metadata.gz.sig +0 -0
data/lib/tree_haver.rb
CHANGED
|
@@ -6,9 +6,8 @@ require "version_gem"
|
|
|
6
6
|
# Standard library
|
|
7
7
|
require "set"
|
|
8
8
|
|
|
9
|
-
# This gem
|
|
9
|
+
# This gem - only version can be required (never autoloaded)
|
|
10
10
|
require_relative "tree_haver/version"
|
|
11
|
-
require_relative "tree_haver/language_registry"
|
|
12
11
|
|
|
13
12
|
# TreeHaver is a cross-Ruby adapter for code parsing with 10 backends.
|
|
14
13
|
#
|
|
@@ -106,6 +105,10 @@ require_relative "tree_haver/language_registry"
|
|
|
106
105
|
# @see GrammarFinder For automatic grammar library discovery
|
|
107
106
|
# @see Backends For available parsing backends
|
|
108
107
|
module TreeHaver
|
|
108
|
+
# Autoload internal modules
|
|
109
|
+
autoload :LibraryPathUtils, File.join(__dir__, "tree_haver", "library_path_utils")
|
|
110
|
+
autoload :LanguageRegistry, File.join(__dir__, "tree_haver", "language_registry")
|
|
111
|
+
|
|
109
112
|
# Base error class for TreeHaver exceptions
|
|
110
113
|
# @see https://github.com/Faveod/ruby-tree-sitter/pull/83 for inherit from Exception reasoning
|
|
111
114
|
#
|
|
@@ -257,6 +260,12 @@ module TreeHaver
|
|
|
257
260
|
# Unified Tree wrapper providing consistent API across backends
|
|
258
261
|
autoload :Tree, File.join(__dir__, "tree_haver", "tree")
|
|
259
262
|
|
|
263
|
+
# Language class for loading grammar shared libraries
|
|
264
|
+
autoload :Language, File.join(__dir__, "tree_haver", "language")
|
|
265
|
+
|
|
266
|
+
# Parser class for parsing source code into syntax trees
|
|
267
|
+
autoload :Parser, File.join(__dir__, "tree_haver", "parser")
|
|
268
|
+
|
|
260
269
|
# Get the current backend selection
|
|
261
270
|
#
|
|
262
271
|
# @return [Symbol] one of :auto, :mri, :rust, :ffi, :java, or :citrus
|
|
@@ -937,557 +946,9 @@ module TreeHaver
|
|
|
937
946
|
end
|
|
938
947
|
end
|
|
939
948
|
|
|
940
|
-
#
|
|
941
|
-
#
|
|
942
|
-
#
|
|
943
|
-
# the grammar rules for parsing a specific programming language.
|
|
944
|
-
#
|
|
945
|
-
# @example Load a language from a shared library
|
|
946
|
-
# language = TreeHaver::Language.from_library(
|
|
947
|
-
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
948
|
-
# symbol: "tree_sitter_toml"
|
|
949
|
-
# )
|
|
950
|
-
#
|
|
951
|
-
# @example Use a registered language
|
|
952
|
-
# TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
|
|
953
|
-
# language = TreeHaver::Language.toml
|
|
954
|
-
class Language
|
|
955
|
-
class << self
|
|
956
|
-
# Load a language grammar from a shared library (ruby_tree_sitter compatibility)
|
|
957
|
-
#
|
|
958
|
-
# This method provides API compatibility with ruby_tree_sitter which uses
|
|
959
|
-
# `Language.load(name, path)`.
|
|
960
|
-
#
|
|
961
|
-
# @param name [String] the language name (e.g., "toml")
|
|
962
|
-
# @param path [String] absolute path to the language shared library
|
|
963
|
-
# @param validate [Boolean] if true, validates the path for safety (default: true)
|
|
964
|
-
# @return [Language] loaded language handle
|
|
965
|
-
# @raise [NotAvailable] if the library cannot be loaded
|
|
966
|
-
# @raise [ArgumentError] if the path fails security validation
|
|
967
|
-
# @example
|
|
968
|
-
# language = TreeHaver::Language.load("toml", "/usr/local/lib/libtree-sitter-toml.so")
|
|
969
|
-
def load(name, path, validate: true)
|
|
970
|
-
from_library(path, symbol: "tree_sitter_#{name}", name: name, validate: validate)
|
|
971
|
-
end
|
|
972
|
-
|
|
973
|
-
# Load a language grammar from a shared library
|
|
974
|
-
#
|
|
975
|
-
# The library must export a function that returns a pointer to a TSLanguage struct.
|
|
976
|
-
# By default, TreeHaver looks for a symbol named "tree_sitter_<name>".
|
|
977
|
-
#
|
|
978
|
-
# == Security
|
|
979
|
-
#
|
|
980
|
-
# By default, paths are validated using {PathValidator} to prevent path traversal
|
|
981
|
-
# and other attacks. Set `validate: false` to skip validation (not recommended
|
|
982
|
-
# unless you've already validated the path).
|
|
983
|
-
#
|
|
984
|
-
# @param path [String] absolute path to the language shared library (.so/.dylib/.dll)
|
|
985
|
-
# @param symbol [String, nil] name of the exported function (defaults to auto-detection)
|
|
986
|
-
# @param name [String, nil] logical name for the language (used in caching)
|
|
987
|
-
# @param validate [Boolean] if true, validates path and symbol for safety (default: true)
|
|
988
|
-
# @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
|
|
989
|
-
# @return [Language] loaded language handle
|
|
990
|
-
# @raise [NotAvailable] if the library cannot be loaded or the symbol is not found
|
|
991
|
-
# @raise [ArgumentError] if path or symbol fails security validation
|
|
992
|
-
# @example
|
|
993
|
-
# language = TreeHaver::Language.from_library(
|
|
994
|
-
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
995
|
-
# symbol: "tree_sitter_toml",
|
|
996
|
-
# name: "toml"
|
|
997
|
-
# )
|
|
998
|
-
# @example With explicit backend
|
|
999
|
-
# language = TreeHaver::Language.from_library(
|
|
1000
|
-
# "/usr/local/lib/libtree-sitter-toml.so",
|
|
1001
|
-
# symbol: "tree_sitter_toml",
|
|
1002
|
-
# backend: :ffi
|
|
1003
|
-
# )
|
|
1004
|
-
def from_library(path, symbol: nil, name: nil, validate: true, backend: nil)
|
|
1005
|
-
if validate
|
|
1006
|
-
unless PathValidator.safe_library_path?(path)
|
|
1007
|
-
errors = PathValidator.validation_errors(path)
|
|
1008
|
-
raise ArgumentError, "Unsafe library path: #{path.inspect}. Errors: #{errors.join("; ")}"
|
|
1009
|
-
end
|
|
1010
|
-
|
|
1011
|
-
if symbol && !PathValidator.safe_symbol_name?(symbol)
|
|
1012
|
-
raise ArgumentError, "Unsafe symbol name: #{symbol.inspect}. " \
|
|
1013
|
-
"Symbol names must be valid C identifiers."
|
|
1014
|
-
end
|
|
1015
|
-
end
|
|
1016
|
-
|
|
1017
|
-
# from_library only works with tree-sitter backends that support .so files
|
|
1018
|
-
# Pure Ruby backends (Citrus, Prism, Psych, Commonmarker, Markly) don't support from_library
|
|
1019
|
-
mod = TreeHaver.resolve_native_backend_module(backend)
|
|
1020
|
-
|
|
1021
|
-
if mod.nil?
|
|
1022
|
-
if backend
|
|
1023
|
-
raise NotAvailable, "Requested backend #{backend.inspect} is not available or does not support shared libraries"
|
|
1024
|
-
else
|
|
1025
|
-
raise NotAvailable,
|
|
1026
|
-
"No native tree-sitter backend is available for loading shared libraries. " \
|
|
1027
|
-
"Available native backends (MRI, Rust, FFI, Java) require platform-specific setup. " \
|
|
1028
|
-
"For pure-Ruby parsing, use backend-specific Language classes directly (e.g., Prism, Psych, Citrus)."
|
|
1029
|
-
end
|
|
1030
|
-
end
|
|
1031
|
-
|
|
1032
|
-
# Backend must implement .from_library; fallback to .from_path for older impls
|
|
1033
|
-
# Include effective backend AND ENV vars in cache key since they affect loading
|
|
1034
|
-
effective_b = TreeHaver.resolve_effective_backend(backend)
|
|
1035
|
-
key = [effective_b, path, symbol, name, ENV["TREE_SITTER_LANG_SYMBOL"]]
|
|
1036
|
-
LanguageRegistry.fetch(key) do
|
|
1037
|
-
if mod::Language.respond_to?(:from_library)
|
|
1038
|
-
mod::Language.from_library(path, symbol: symbol, name: name)
|
|
1039
|
-
else
|
|
1040
|
-
mod::Language.from_path(path)
|
|
1041
|
-
end
|
|
1042
|
-
end
|
|
1043
|
-
end
|
|
1044
|
-
# Alias for {from_library}
|
|
1045
|
-
# @see from_library
|
|
1046
|
-
alias_method :from_path, :from_library
|
|
1047
|
-
|
|
1048
|
-
# Dynamic helper to load a registered language by name
|
|
1049
|
-
#
|
|
1050
|
-
# After registering a language with {TreeHaver.register_language},
|
|
1051
|
-
# you can load it using a method call. The appropriate backend will be
|
|
1052
|
-
# used based on registration and current backend.
|
|
1053
|
-
#
|
|
1054
|
-
# @example With tree-sitter
|
|
1055
|
-
# TreeHaver.register_language(:toml, path: "/path/to/libtree-sitter-toml.so")
|
|
1056
|
-
# language = TreeHaver::Language.toml
|
|
1057
|
-
#
|
|
1058
|
-
# @example With both backends
|
|
1059
|
-
# TreeHaver.register_language(:toml,
|
|
1060
|
-
# path: "/path/to/libtree-sitter-toml.so", symbol: "tree_sitter_toml")
|
|
1061
|
-
# TreeHaver.register_language(:toml,
|
|
1062
|
-
# grammar_module: TomlRB::Document)
|
|
1063
|
-
# language = TreeHaver::Language.toml # Uses appropriate grammar for active backend
|
|
1064
|
-
#
|
|
1065
|
-
# @param method_name [Symbol] the registered language name
|
|
1066
|
-
# @param args [Array] positional arguments
|
|
1067
|
-
# @param kwargs [Hash] keyword arguments
|
|
1068
|
-
# @return [Language] loaded language handle
|
|
1069
|
-
# @raise [NoMethodError] if the language name is not registered
|
|
1070
|
-
def method_missing(method_name, *args, **kwargs, &block)
|
|
1071
|
-
# Resolve only if the language name was registered
|
|
1072
|
-
all_backends = TreeHaver.registered_language(method_name)
|
|
1073
|
-
return super unless all_backends
|
|
1074
|
-
|
|
1075
|
-
# Check current backend
|
|
1076
|
-
current_backend = TreeHaver.backend_module
|
|
1077
|
-
|
|
1078
|
-
# Determine which backend type to use
|
|
1079
|
-
backend_type = if current_backend == Backends::Citrus
|
|
1080
|
-
:citrus
|
|
1081
|
-
else
|
|
1082
|
-
:tree_sitter # MRI, Rust, FFI, Java all use tree-sitter
|
|
1083
|
-
end
|
|
1084
|
-
|
|
1085
|
-
# Get backend-specific registration
|
|
1086
|
-
reg = all_backends[backend_type]
|
|
1087
|
-
|
|
1088
|
-
# If Citrus backend is active
|
|
1089
|
-
if backend_type == :citrus
|
|
1090
|
-
if reg && reg[:grammar_module]
|
|
1091
|
-
return Backends::Citrus::Language.new(reg[:grammar_module])
|
|
1092
|
-
end
|
|
1093
|
-
|
|
1094
|
-
# Fall back to error if no Citrus grammar registered
|
|
1095
|
-
raise NotAvailable,
|
|
1096
|
-
"Citrus backend is active but no Citrus grammar registered for :#{method_name}. " \
|
|
1097
|
-
"Either register a Citrus grammar or use a tree-sitter backend. " \
|
|
1098
|
-
"Registered backends: #{all_backends.keys.inspect}"
|
|
1099
|
-
end
|
|
1100
|
-
|
|
1101
|
-
# For tree-sitter backends, try to load from path
|
|
1102
|
-
# If that fails, fall back to Citrus if available
|
|
1103
|
-
if reg && reg[:path]
|
|
1104
|
-
path = kwargs[:path] || args.first || reg[:path]
|
|
1105
|
-
# Symbol priority: kwargs override > registration > derive from method_name
|
|
1106
|
-
symbol = if kwargs.key?(:symbol)
|
|
1107
|
-
kwargs[:symbol]
|
|
1108
|
-
elsif reg[:symbol]
|
|
1109
|
-
reg[:symbol]
|
|
1110
|
-
else
|
|
1111
|
-
"tree_sitter_#{method_name}"
|
|
1112
|
-
end
|
|
1113
|
-
# Name priority: kwargs override > derive from symbol (strip tree_sitter_ prefix)
|
|
1114
|
-
# Using symbol-derived name ensures ruby_tree_sitter gets the correct language name
|
|
1115
|
-
# e.g., "toml" not "toml_both" when symbol is "tree_sitter_toml"
|
|
1116
|
-
name = kwargs[:name] || symbol&.sub(/\Atree_sitter_/, "")
|
|
1117
|
-
|
|
1118
|
-
begin
|
|
1119
|
-
return from_library(path, symbol: symbol, name: name)
|
|
1120
|
-
rescue NotAvailable, ArgumentError, LoadError, FFI::NotFoundError => _e
|
|
1121
|
-
# Tree-sitter failed to load - check for Citrus fallback
|
|
1122
|
-
# This handles cases where:
|
|
1123
|
-
# - The .so file doesn't exist or can't be loaded (NotAvailable, LoadError)
|
|
1124
|
-
# - FFI can't find required symbols like ts_parser_new (FFI::NotFoundError)
|
|
1125
|
-
# - Invalid arguments were provided (ArgumentError)
|
|
1126
|
-
citrus_reg = all_backends[:citrus]
|
|
1127
|
-
if citrus_reg && citrus_reg[:grammar_module]
|
|
1128
|
-
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
1129
|
-
end
|
|
1130
|
-
# No Citrus fallback available, re-raise the original error
|
|
1131
|
-
raise
|
|
1132
|
-
end
|
|
1133
|
-
end
|
|
1134
|
-
|
|
1135
|
-
# No tree-sitter path registered - check for Citrus fallback
|
|
1136
|
-
# This enables auto-fallback when tree-sitter grammar is not installed
|
|
1137
|
-
# but a Citrus grammar (pure Ruby) is available
|
|
1138
|
-
citrus_reg = all_backends[:citrus]
|
|
1139
|
-
if citrus_reg && citrus_reg[:grammar_module]
|
|
1140
|
-
return Backends::Citrus::Language.new(citrus_reg[:grammar_module])
|
|
1141
|
-
end
|
|
1142
|
-
|
|
1143
|
-
# No appropriate registration found
|
|
1144
|
-
raise ArgumentError,
|
|
1145
|
-
"No grammar registered for :#{method_name} compatible with #{backend_type} backend. " \
|
|
1146
|
-
"Registered backends: #{all_backends.keys.inspect}"
|
|
1147
|
-
end
|
|
1148
|
-
|
|
1149
|
-
# @api private
|
|
1150
|
-
def respond_to_missing?(method_name, include_private = false)
|
|
1151
|
-
!!TreeHaver.registered_language(method_name) || super
|
|
1152
|
-
end
|
|
1153
|
-
end
|
|
1154
|
-
end
|
|
1155
|
-
|
|
1156
|
-
# Represents a tree-sitter parser instance
|
|
1157
|
-
#
|
|
1158
|
-
# A Parser is used to parse source code into a syntax tree. You must
|
|
1159
|
-
# set a language before parsing.
|
|
1160
|
-
#
|
|
1161
|
-
# == Wrapping/Unwrapping Responsibility
|
|
1162
|
-
#
|
|
1163
|
-
# TreeHaver::Parser is responsible for ALL object wrapping and unwrapping:
|
|
1164
|
-
#
|
|
1165
|
-
# **Language objects:**
|
|
1166
|
-
# - Unwraps Language wrappers before passing to backend.language=
|
|
1167
|
-
# - MRI backend receives ::TreeSitter::Language
|
|
1168
|
-
# - Rust backend receives String (language name)
|
|
1169
|
-
# - FFI backend receives wrapped Language (needs to_ptr)
|
|
1170
|
-
#
|
|
1171
|
-
# **Tree objects:**
|
|
1172
|
-
# - parse() receives raw source, backend returns raw tree, Parser wraps it
|
|
1173
|
-
# - parse_string() unwraps old_tree before passing to backend, wraps returned tree
|
|
1174
|
-
# - Backends always work with raw backend trees, never TreeHaver::Tree
|
|
1175
|
-
#
|
|
1176
|
-
# **Node objects:**
|
|
1177
|
-
# - Backends return raw nodes, TreeHaver::Tree and TreeHaver::Node wrap them
|
|
1178
|
-
#
|
|
1179
|
-
# This design ensures:
|
|
1180
|
-
# - Principle of Least Surprise: wrapping happens at boundaries, consistently
|
|
1181
|
-
# - Backends are simple: they don't need to know about TreeHaver wrappers
|
|
1182
|
-
# - Single Responsibility: wrapping logic is only in TreeHaver::Parser
|
|
1183
|
-
#
|
|
1184
|
-
# @example Basic parsing
|
|
1185
|
-
# parser = TreeHaver::Parser.new
|
|
1186
|
-
# parser.language = TreeHaver::Language.toml
|
|
1187
|
-
# tree = parser.parse("[package]\nname = \"foo\"")
|
|
1188
|
-
class Parser
|
|
1189
|
-
# Create a new parser instance
|
|
1190
|
-
#
|
|
1191
|
-
# @param backend [Symbol, String, nil] optional backend to use (overrides context/global)
|
|
1192
|
-
# @raise [NotAvailable] if no backend is available or requested backend is unavailable
|
|
1193
|
-
# @example Default (uses context/global)
|
|
1194
|
-
# parser = TreeHaver::Parser.new
|
|
1195
|
-
# @example Explicit backend
|
|
1196
|
-
# parser = TreeHaver::Parser.new(backend: :ffi)
|
|
1197
|
-
def initialize(backend: nil)
|
|
1198
|
-
# Convert string backend names to symbols for consistency
|
|
1199
|
-
backend = backend.to_sym if backend.is_a?(String)
|
|
1200
|
-
|
|
1201
|
-
mod = TreeHaver.resolve_backend_module(backend)
|
|
1202
|
-
|
|
1203
|
-
if mod.nil?
|
|
1204
|
-
if backend
|
|
1205
|
-
raise NotAvailable, "Requested backend #{backend.inspect} is not available"
|
|
1206
|
-
else
|
|
1207
|
-
raise NotAvailable, "No TreeHaver backend is available"
|
|
1208
|
-
end
|
|
1209
|
-
end
|
|
1210
|
-
|
|
1211
|
-
# Try to create the parser, with fallback to Citrus if tree-sitter fails
|
|
1212
|
-
# This enables auto-fallback when tree-sitter runtime isn't available
|
|
1213
|
-
begin
|
|
1214
|
-
@impl = mod::Parser.new
|
|
1215
|
-
@explicit_backend = backend # Remember for introspection (always a Symbol or nil)
|
|
1216
|
-
rescue NoMethodError, FFI::NotFoundError, LoadError => e
|
|
1217
|
-
# Tree-sitter backend failed (likely missing runtime library)
|
|
1218
|
-
# Try Citrus as fallback if we weren't explicitly asked for a specific backend
|
|
1219
|
-
if backend.nil? || backend == :auto
|
|
1220
|
-
if Backends::Citrus.available?
|
|
1221
|
-
@impl = Backends::Citrus::Parser.new
|
|
1222
|
-
@explicit_backend = :citrus
|
|
1223
|
-
else
|
|
1224
|
-
# No fallback available, re-raise original error
|
|
1225
|
-
raise NotAvailable, "Tree-sitter backend failed: #{e.message}. " \
|
|
1226
|
-
"Citrus fallback not available. Install tree-sitter runtime or citrus gem."
|
|
1227
|
-
end
|
|
1228
|
-
else
|
|
1229
|
-
# Explicit backend was requested, don't fallback
|
|
1230
|
-
raise
|
|
1231
|
-
end
|
|
1232
|
-
end
|
|
1233
|
-
end
|
|
1234
|
-
|
|
1235
|
-
# Get the backend this parser is using (for introspection)
|
|
1236
|
-
#
|
|
1237
|
-
# Returns the actual backend in use, resolving :auto to the concrete backend.
|
|
1238
|
-
#
|
|
1239
|
-
# @return [Symbol] the backend name (:mri, :rust, :ffi, :java, or :citrus)
|
|
1240
|
-
def backend
|
|
1241
|
-
if @explicit_backend && @explicit_backend != :auto
|
|
1242
|
-
@explicit_backend
|
|
1243
|
-
else
|
|
1244
|
-
# Determine actual backend from the implementation class
|
|
1245
|
-
case @impl.class.name
|
|
1246
|
-
when /MRI/
|
|
1247
|
-
:mri
|
|
1248
|
-
when /Rust/
|
|
1249
|
-
:rust
|
|
1250
|
-
when /FFI/
|
|
1251
|
-
:ffi
|
|
1252
|
-
when /Java/
|
|
1253
|
-
:java
|
|
1254
|
-
when /Citrus/
|
|
1255
|
-
:citrus
|
|
1256
|
-
else
|
|
1257
|
-
# Fallback to effective_backend if we can't determine from class name
|
|
1258
|
-
TreeHaver.effective_backend
|
|
1259
|
-
end
|
|
1260
|
-
end
|
|
1261
|
-
end
|
|
1262
|
-
|
|
1263
|
-
# Set the language grammar for this parser
|
|
1264
|
-
#
|
|
1265
|
-
# @param lang [Language] the language to use for parsing
|
|
1266
|
-
# @return [Language] the language that was set
|
|
1267
|
-
# @example
|
|
1268
|
-
# parser.language = TreeHaver::Language.from_library("/path/to/grammar.so")
|
|
1269
|
-
def language=(lang)
|
|
1270
|
-
# Check if this is a Citrus language - if so, we need a Citrus parser
|
|
1271
|
-
# This enables automatic backend switching when tree-sitter fails and
|
|
1272
|
-
# falls back to Citrus
|
|
1273
|
-
if lang.is_a?(Backends::Citrus::Language)
|
|
1274
|
-
unless @impl.is_a?(Backends::Citrus::Parser)
|
|
1275
|
-
# Switch to Citrus parser to match the Citrus language
|
|
1276
|
-
@impl = Backends::Citrus::Parser.new
|
|
1277
|
-
@explicit_backend = :citrus
|
|
1278
|
-
end
|
|
1279
|
-
end
|
|
1280
|
-
|
|
1281
|
-
# Unwrap the language before passing to backend
|
|
1282
|
-
# Backends receive raw language objects, never TreeHaver wrappers
|
|
1283
|
-
inner_lang = unwrap_language(lang)
|
|
1284
|
-
@impl.language = inner_lang
|
|
1285
|
-
# Return the original (possibly wrapped) language for consistency
|
|
1286
|
-
lang # rubocop:disable Lint/Void (intentional return value)
|
|
1287
|
-
end
|
|
1288
|
-
|
|
1289
|
-
private
|
|
1290
|
-
|
|
1291
|
-
# Unwrap a language object to extract the raw backend language
|
|
1292
|
-
#
|
|
1293
|
-
# This method is smart about backend compatibility:
|
|
1294
|
-
# 1. If language has a backend attribute, checks if it matches current backend
|
|
1295
|
-
# 2. If mismatch detected, attempts to reload language for correct backend
|
|
1296
|
-
# 3. If reload successful, uses new language; otherwise continues with original
|
|
1297
|
-
# 4. Unwraps the language wrapper to get raw backend object
|
|
1298
|
-
#
|
|
1299
|
-
# @param lang [Object] wrapped or raw language object
|
|
1300
|
-
# @return [Object] raw backend language object appropriate for current backend
|
|
1301
|
-
# @api private
|
|
1302
|
-
def unwrap_language(lang)
|
|
1303
|
-
# Check if this is a TreeHaver language wrapper with backend info
|
|
1304
|
-
if lang.respond_to?(:backend)
|
|
1305
|
-
# Verify backend compatibility FIRST
|
|
1306
|
-
# This prevents passing languages from wrong backends to native code
|
|
1307
|
-
# Exception: :auto backend is permissive - accepts any language
|
|
1308
|
-
current_backend = backend
|
|
1309
|
-
|
|
1310
|
-
if lang.backend != current_backend && current_backend != :auto
|
|
1311
|
-
# Backend mismatch! Try to reload for correct backend
|
|
1312
|
-
reloaded = try_reload_language_for_backend(lang, current_backend)
|
|
1313
|
-
if reloaded
|
|
1314
|
-
lang = reloaded
|
|
1315
|
-
else
|
|
1316
|
-
# Couldn't reload - this is an error
|
|
1317
|
-
raise TreeHaver::Error,
|
|
1318
|
-
"Language backend mismatch: language is for #{lang.backend}, parser is #{current_backend}. " \
|
|
1319
|
-
"Cannot reload language for correct backend. " \
|
|
1320
|
-
"Create a new language with TreeHaver::Language.from_library when backend is #{current_backend}."
|
|
1321
|
-
end
|
|
1322
|
-
end
|
|
1323
|
-
|
|
1324
|
-
# Get the current parser's language (if set)
|
|
1325
|
-
current_lang = @impl.respond_to?(:language) ? @impl.language : nil
|
|
1326
|
-
|
|
1327
|
-
# Language mismatch detected! The parser might have a different language set
|
|
1328
|
-
# Compare the actual language objects using Comparable
|
|
1329
|
-
if current_lang && lang != current_lang
|
|
1330
|
-
# Different language being set (e.g., switching from TOML to JSON)
|
|
1331
|
-
# This is fine, just informational
|
|
1332
|
-
end
|
|
1333
|
-
end
|
|
1334
|
-
|
|
1335
|
-
# Unwrap based on backend type
|
|
1336
|
-
# All TreeHaver Language wrappers have the backend attribute
|
|
1337
|
-
unless lang.respond_to?(:backend)
|
|
1338
|
-
# This shouldn't happen - all our wrappers have backend attribute
|
|
1339
|
-
# If we get here, it's likely a raw backend object that was passed directly
|
|
1340
|
-
raise TreeHaver::Error,
|
|
1341
|
-
"Expected TreeHaver Language wrapper with backend attribute, got #{lang.class}. " \
|
|
1342
|
-
"Use TreeHaver::Language.from_library to create language objects."
|
|
1343
|
-
end
|
|
1344
|
-
|
|
1345
|
-
case lang.backend
|
|
1346
|
-
when :mri
|
|
1347
|
-
return lang.to_language if lang.respond_to?(:to_language)
|
|
1348
|
-
return lang.inner_language if lang.respond_to?(:inner_language)
|
|
1349
|
-
when :rust
|
|
1350
|
-
return lang.name if lang.respond_to?(:name)
|
|
1351
|
-
when :ffi
|
|
1352
|
-
return lang # FFI needs wrapper for to_ptr
|
|
1353
|
-
when :java
|
|
1354
|
-
return lang.impl if lang.respond_to?(:impl)
|
|
1355
|
-
when :citrus
|
|
1356
|
-
return lang.grammar_module if lang.respond_to?(:grammar_module)
|
|
1357
|
-
when :prism
|
|
1358
|
-
return lang # Prism backend expects the Language wrapper
|
|
1359
|
-
when :psych
|
|
1360
|
-
return lang # Psych backend expects the Language wrapper
|
|
1361
|
-
when :commonmarker
|
|
1362
|
-
return lang # Commonmarker backend expects the Language wrapper
|
|
1363
|
-
when :markly
|
|
1364
|
-
return lang # Markly backend expects the Language wrapper
|
|
1365
|
-
else
|
|
1366
|
-
# Unknown backend (e.g., test backend)
|
|
1367
|
-
# Try generic unwrapping methods for flexibility in testing
|
|
1368
|
-
return lang.to_language if lang.respond_to?(:to_language)
|
|
1369
|
-
return lang.inner_language if lang.respond_to?(:inner_language)
|
|
1370
|
-
return lang.impl if lang.respond_to?(:impl)
|
|
1371
|
-
return lang.grammar_module if lang.respond_to?(:grammar_module)
|
|
1372
|
-
return lang.name if lang.respond_to?(:name)
|
|
1373
|
-
|
|
1374
|
-
# If nothing works, pass through as-is
|
|
1375
|
-
# This allows test languages to be passed directly
|
|
1376
|
-
return lang
|
|
1377
|
-
end
|
|
1378
|
-
|
|
1379
|
-
# Shouldn't reach here, but just in case
|
|
1380
|
-
lang
|
|
1381
|
-
end
|
|
1382
|
-
|
|
1383
|
-
# Try to reload a language for the current backend
|
|
1384
|
-
#
|
|
1385
|
-
# This handles the case where a language was loaded for one backend,
|
|
1386
|
-
# but is now being used with a different backend (e.g., after backend switch).
|
|
1387
|
-
#
|
|
1388
|
-
# @param lang [Object] language object with metadata
|
|
1389
|
-
# @param target_backend [Symbol] backend to reload for
|
|
1390
|
-
# @return [Object, nil] reloaded language or nil if reload not possible
|
|
1391
|
-
# @api private
|
|
1392
|
-
def try_reload_language_for_backend(lang, target_backend)
|
|
1393
|
-
# Can't reload without path information
|
|
1394
|
-
return unless lang.respond_to?(:path) || lang.respond_to?(:grammar_module)
|
|
1395
|
-
|
|
1396
|
-
# For tree-sitter backends, reload from path
|
|
1397
|
-
if lang.respond_to?(:path) && lang.path
|
|
1398
|
-
begin
|
|
1399
|
-
# Use Language.from_library which respects current backend
|
|
1400
|
-
return Language.from_library(
|
|
1401
|
-
lang.path,
|
|
1402
|
-
symbol: lang.respond_to?(:symbol) ? lang.symbol : nil,
|
|
1403
|
-
name: lang.respond_to?(:name) ? lang.name : nil,
|
|
1404
|
-
)
|
|
1405
|
-
rescue => e
|
|
1406
|
-
# Reload failed, continue with original
|
|
1407
|
-
warn("TreeHaver: Failed to reload language for backend #{target_backend}: #{e.message}") if $VERBOSE
|
|
1408
|
-
return
|
|
1409
|
-
end
|
|
1410
|
-
end
|
|
1411
|
-
|
|
1412
|
-
# For Citrus, can't really reload as it's just a module reference
|
|
1413
|
-
nil
|
|
1414
|
-
end
|
|
1415
|
-
|
|
1416
|
-
public
|
|
1417
|
-
|
|
1418
|
-
# Parse source code into a syntax tree
|
|
1419
|
-
#
|
|
1420
|
-
# @param source [String] the source code to parse (should be UTF-8)
|
|
1421
|
-
# @return [Tree] the parsed syntax tree
|
|
1422
|
-
# @example
|
|
1423
|
-
# tree = parser.parse("x = 1")
|
|
1424
|
-
# puts tree.root_node.type
|
|
1425
|
-
def parse(source)
|
|
1426
|
-
tree_impl = @impl.parse(source)
|
|
1427
|
-
# Wrap backend tree with source so Node#text works
|
|
1428
|
-
Tree.new(tree_impl, source: source)
|
|
1429
|
-
end
|
|
1430
|
-
|
|
1431
|
-
# Parse source code into a syntax tree (with optional incremental parsing)
|
|
1432
|
-
#
|
|
1433
|
-
# This method provides API compatibility with ruby_tree_sitter which uses
|
|
1434
|
-
# `parse_string(old_tree, source)`.
|
|
1435
|
-
#
|
|
1436
|
-
# == Incremental Parsing
|
|
1437
|
-
#
|
|
1438
|
-
# tree-sitter supports **incremental parsing** where you can pass a previously
|
|
1439
|
-
# parsed tree along with edit information to efficiently re-parse only the
|
|
1440
|
-
# changed portions of source code. This is a major performance optimization
|
|
1441
|
-
# for editors and IDEs that need to re-parse on every keystroke.
|
|
1442
|
-
#
|
|
1443
|
-
# The workflow for incremental parsing is:
|
|
1444
|
-
# 1. Parse the initial source: `tree = parser.parse_string(nil, source)`
|
|
1445
|
-
# 2. User edits the source (e.g., inserts a character)
|
|
1446
|
-
# 3. Call `tree.edit(...)` to update the tree's position data
|
|
1447
|
-
# 4. Re-parse with the old tree: `new_tree = parser.parse_string(tree, new_source)`
|
|
1448
|
-
# 5. tree-sitter reuses unchanged nodes, only re-parsing affected regions
|
|
1449
|
-
#
|
|
1450
|
-
# TreeHaver passes through to the underlying backend if it supports incremental
|
|
1451
|
-
# parsing (MRI and Rust backends do). Check `TreeHaver.capabilities[:incremental]`
|
|
1452
|
-
# to see if the current backend supports it.
|
|
1453
|
-
#
|
|
1454
|
-
# @param old_tree [Tree, nil] previously parsed tree for incremental parsing, or nil for fresh parse
|
|
1455
|
-
# @param source [String] the source code to parse (should be UTF-8)
|
|
1456
|
-
# @return [Tree] the parsed syntax tree
|
|
1457
|
-
# @see https://tree-sitter.github.io/tree-sitter/using-parsers#editing tree-sitter incremental parsing docs
|
|
1458
|
-
# @see Tree#edit For marking edits before incremental re-parsing
|
|
1459
|
-
# @example First parse (no old tree)
|
|
1460
|
-
# tree = parser.parse_string(nil, "x = 1")
|
|
1461
|
-
# @example Incremental parse
|
|
1462
|
-
# tree.edit(start_byte: 4, old_end_byte: 5, new_end_byte: 6, ...)
|
|
1463
|
-
# new_tree = parser.parse_string(tree, "x = 42")
|
|
1464
|
-
def parse_string(old_tree, source)
|
|
1465
|
-
# Pass through to backend if it supports incremental parsing
|
|
1466
|
-
if old_tree && @impl.respond_to?(:parse_string)
|
|
1467
|
-
# Extract the underlying implementation from our Tree wrapper
|
|
1468
|
-
old_impl = if old_tree.respond_to?(:inner_tree)
|
|
1469
|
-
old_tree.inner_tree
|
|
1470
|
-
elsif old_tree.respond_to?(:instance_variable_get)
|
|
1471
|
-
# Fallback for compatibility
|
|
1472
|
-
old_tree.instance_variable_get(:@inner_tree) || old_tree.instance_variable_get(:@impl) || old_tree
|
|
1473
|
-
else
|
|
1474
|
-
old_tree
|
|
1475
|
-
end
|
|
1476
|
-
tree_impl = @impl.parse_string(old_impl, source)
|
|
1477
|
-
# Wrap backend tree with source so Node#text works
|
|
1478
|
-
Tree.new(tree_impl, source: source)
|
|
1479
|
-
elsif @impl.respond_to?(:parse_string)
|
|
1480
|
-
tree_impl = @impl.parse_string(nil, source)
|
|
1481
|
-
# Wrap backend tree with source so Node#text works
|
|
1482
|
-
Tree.new(tree_impl, source: source)
|
|
1483
|
-
else
|
|
1484
|
-
# Fallback for backends that don't support parse_string
|
|
1485
|
-
parse(source)
|
|
1486
|
-
end
|
|
1487
|
-
end
|
|
1488
|
-
end
|
|
1489
|
-
|
|
1490
|
-
# Tree and Node classes have been moved to separate files:
|
|
949
|
+
# Language and Parser classes have been moved to separate files:
|
|
950
|
+
# - tree_haver/language.rb: TreeHaver::Language - loads grammar shared libraries
|
|
951
|
+
# - tree_haver/parser.rb: TreeHaver::Parser - parses source code into syntax trees
|
|
1491
952
|
# - tree_haver/tree.rb: TreeHaver::Tree - unified wrapper providing consistent API
|
|
1492
953
|
# - tree_haver/node.rb: TreeHaver::Node - unified wrapper providing consistent API
|
|
1493
954
|
#
|
data.tar.gz.sig
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
|
|
1
|
+
$���w]�
|
|
2
|
+
tn���"y�'��Č2+���D_ �Sߪ�b��a��,T�ƫ�XJ��J[��Э��39��0�W�w��g��02�ݯ}:x8�JIdž����ȣ���>Z�w7�U"ي�c��ڱ���@G�g#��u��^O�R���[/���g���x
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tree_haver
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.2.
|
|
4
|
+
version: 3.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Peter H. Boling
|
|
@@ -272,8 +272,11 @@ files:
|
|
|
272
272
|
- lib/tree_haver/citrus_grammar_finder.rb
|
|
273
273
|
- lib/tree_haver/compat.rb
|
|
274
274
|
- lib/tree_haver/grammar_finder.rb
|
|
275
|
+
- lib/tree_haver/language.rb
|
|
275
276
|
- lib/tree_haver/language_registry.rb
|
|
277
|
+
- lib/tree_haver/library_path_utils.rb
|
|
276
278
|
- lib/tree_haver/node.rb
|
|
279
|
+
- lib/tree_haver/parser.rb
|
|
277
280
|
- lib/tree_haver/path_validator.rb
|
|
278
281
|
- lib/tree_haver/point.rb
|
|
279
282
|
- lib/tree_haver/rspec.rb
|
|
@@ -289,10 +292,10 @@ licenses:
|
|
|
289
292
|
- MIT
|
|
290
293
|
metadata:
|
|
291
294
|
homepage_uri: https://tree-haver.galtzo.com/
|
|
292
|
-
source_code_uri: https://github.com/kettle-rb/tree_haver/tree/v3.2.
|
|
293
|
-
changelog_uri: https://github.com/kettle-rb/tree_haver/blob/v3.2.
|
|
295
|
+
source_code_uri: https://github.com/kettle-rb/tree_haver/tree/v3.2.2
|
|
296
|
+
changelog_uri: https://github.com/kettle-rb/tree_haver/blob/v3.2.2/CHANGELOG.md
|
|
294
297
|
bug_tracker_uri: https://github.com/kettle-rb/tree_haver/issues
|
|
295
|
-
documentation_uri: https://www.rubydoc.info/gems/tree_haver/3.2.
|
|
298
|
+
documentation_uri: https://www.rubydoc.info/gems/tree_haver/3.2.2
|
|
296
299
|
funding_uri: https://github.com/sponsors/pboling
|
|
297
300
|
wiki_uri: https://github.com/kettle-rb/tree_haver/wiki
|
|
298
301
|
news_uri: https://www.railsbling.com/tags/tree_haver
|
metadata.gz.sig
CHANGED
|
Binary file
|