tree_sitter_language_pack 1.8.0.pre.rc.44-aarch64-linux → 1.9.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3cba8a04a9c6f609e0709b1b4d20bb0685e630c575bd802caa518ceba8512e80
4
- data.tar.gz: e9e1d028ecc3476e8dc52315d08e69281cb99b16ffe63e1e325d23761b29a499
3
+ metadata.gz: eca50798376373f2efbc9e5106deefcd3549c2d5bc34691717b4d02eaad20bee
4
+ data.tar.gz: 91179dbd223f0e63402ee29bea77d8221aec757becfb682bafafc1feb729ca75
5
5
  SHA512:
6
- metadata.gz: 0c3bd1bcfb0b989f6f72dc75b9b831f0b464d75ffdca8511fd8e184933ac8ffbe0f0e494184133d823c278bd8f736635fe107b0ef2d037596dac37f058f1f34c
7
- data.tar.gz: ce4e0e5f3c4421b7581412aa0f2516e17ad57a581dec4ee51d0b63b483036dd0657d82e6b948e1fddc6c1cb4a7cedd1b743ce0d9af99def0fa9fa43cb284f703
6
+ metadata.gz: 53a0ef69cff675b01462a4e59a17f66b453c2ab80ec8f42920b765cdf4b0b814178aff4e2053095a5724aaf04d72fd0eccd444a354b834babe6f5a01fde61db9
7
+ data.tar.gz: 8912aac5e70f0da34f0ae6af85013f7c2ee9928791b7de9bb9b087c5d794b464079711481bc70eaff3fcaf12559e70a58441ef455845234085d3e1157144e374
data/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright 2025-2026 Na'aman Hirschfeld
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,148 @@
1
+ # Ruby
2
+
3
+ <div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0">
4
+ <a href="https://github.com/kreuzberg-dev/alef">
5
+ <img src="https://img.shields.io/badge/Bindings-alef%20%D7%90-007ec6" alt="Bindings" />
6
+ </a>
7
+ <!-- Language Bindings -->
8
+ <a href="https://crates.io/crates/tree-sitter-language-pack">
9
+ <img src="https://img.shields.io/crates/v/tree-sitter-language-pack?label=Rust&color=007ec6" alt="Rust" />
10
+ </a>
11
+ <a href="https://pypi.org/project/tree-sitter-language-pack/">
12
+ <img src="https://img.shields.io/pypi/v/tree-sitter-language-pack?label=Python&color=007ec6" alt="Python" />
13
+ </a>
14
+ <a href="https://www.npmjs.com/package/@kreuzberg/tree-sitter-language-pack">
15
+ <img
16
+ src="https://img.shields.io/npm/v/@kreuzberg/tree-sitter-language-pack?label=Node.js&color=007ec6"
17
+ alt="Node.js"
18
+ />
19
+ </a>
20
+ <a href="https://www.npmjs.com/package/@kreuzberg/tree-sitter-language-pack-wasm">
21
+ <img
22
+ src="https://img.shields.io/npm/v/@kreuzberg/tree-sitter-language-pack-wasm?label=WASM&color=007ec6"
23
+ alt="WASM"
24
+ />
25
+ </a>
26
+ <a href="https://central.sonatype.com/artifact/dev.kreuzberg.treesitterlanguagepack/tree-sitter-language-pack">
27
+ <img
28
+ src="https://img.shields.io/maven-central/v/dev.kreuzberg.treesitterlanguagepack/tree-sitter-language-pack?label=Java&color=007ec6"
29
+ alt="Java"
30
+ />
31
+ </a>
32
+ <a href="https://pkg.go.dev/github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go">
33
+ <img
34
+ src="https://img.shields.io/github/v/tag/kreuzberg-dev/tree-sitter-language-pack?label=Go&color=007ec6"
35
+ alt="Go"
36
+ />
37
+ </a>
38
+ <a href="https://www.nuget.org/packages/TreeSitterLanguagePack/">
39
+ <img src="https://img.shields.io/nuget/v/TreeSitterLanguagePack?label=C%23&color=007ec6" alt="C#" />
40
+ </a>
41
+ <a href="https://packagist.org/packages/kreuzberg-dev/tree-sitter-language-pack">
42
+ <img
43
+ src="https://img.shields.io/packagist/v/kreuzberg-dev/tree-sitter-language-pack?label=PHP&color=007ec6"
44
+ alt="PHP"
45
+ />
46
+ </a>
47
+ <a href="https://rubygems.org/gems/tree_sitter_language_pack">
48
+ <img src="https://img.shields.io/gem/v/tree_sitter_language_pack?label=Ruby&color=007ec6" alt="Ruby" />
49
+ </a>
50
+ <a href="https://hex.pm/packages/tree_sitter_language_pack">
51
+ <img src="https://img.shields.io/hexpm/v/tree_sitter_language_pack?label=Elixir&color=007ec6" alt="Elixir" />
52
+ </a>
53
+ <a href="https://pub.dev/packages/tree_sitter_language_pack">
54
+ <img src="https://img.shields.io/pub/v/tree_sitter_language_pack?label=Dart&color=007ec6" alt="Dart" />
55
+ </a>
56
+ <a href="https://central.sonatype.com/artifact/dev.kreuzberg.tslp.android/tree-sitter-language-pack-android">
57
+ <img
58
+ src="https://img.shields.io/maven-central/v/dev.kreuzberg.tslp.android/tree-sitter-language-pack-android?label=Kotlin&color=007ec6"
59
+ alt="Kotlin"
60
+ />
61
+ </a>
62
+ <a href="https://github.com/kreuzberg-dev/tree-sitter-language-pack/tree/main/packages/swift">
63
+ <img src="https://img.shields.io/badge/Swift-SPM-007ec6" alt="Swift" />
64
+ </a>
65
+ <a href="https://github.com/kreuzberg-dev/tree-sitter-language-pack/tree/main/packages/zig">
66
+ <img src="https://img.shields.io/badge/Zig-package-007ec6" alt="Zig" />
67
+ </a>
68
+ <a href="https://github.com/kreuzberg-dev/tree-sitter-language-pack/releases">
69
+ <img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C FFI" />
70
+ </a>
71
+
72
+ <!-- Project Info -->
73
+ <a href="https://github.com/kreuzberg-dev/tree-sitter-language-pack/blob/main/LICENSE">
74
+ <img src="https://img.shields.io/badge/License-MIT-007ec6" alt="License" />
75
+ </a>
76
+ <a href="https://docs.tree-sitter-language-pack.kreuzberg.dev">
77
+ <img src="https://img.shields.io/badge/Docs-tree--sitter--language--pack-007ec6" alt="Documentation" />
78
+ </a>
79
+ </div>
80
+
81
+ <div align="center" style="margin: 24px 0 0">
82
+ <a href="https://kreuzberg.dev">
83
+ <img alt="tree-sitter-language-pack" src="https:&#x2f;&#x2f;github.com&#x2f;user-attachments&#x2f;assets&#x2f;478a83da-237b-446b-b3a8-e564c13e00a8" />
84
+ </a>
85
+ </div>
86
+
87
+ <div align="center" style="display: flex; flex-wrap: wrap; gap: 12px; justify-content: center; margin: 28px 0 24px">
88
+ <a href="https://discord.gg/xt9WY3GnKR">
89
+ <img
90
+ height="22"
91
+ src="https://img.shields.io/badge/Discord-Chat-007ec6?logo=discord&logoColor=white"
92
+ alt="Join Discord"
93
+ />
94
+ </a>
95
+ </div>
96
+
97
+ Pre-compiled tree-sitter grammars for 306 programming languages with Ruby bindings via Magnus.
98
+
99
+ ## What This Package Provides
100
+
101
+ - **Parser access** — load a tree-sitter language parser by name without wiring individual grammar crates or packages.
102
+ - **Code intelligence primitives** — parse trees, functions, classes, imports, exports, symbols, docstrings, diagnostics, and syntax-aware chunks.
103
+ - **Shared cache model** — parsers are fetched and cached once, then reused by every call in the process.
104
+ - **Same catalog as every binding** — Rust, Python, Node.js, Go, Java, PHP, Ruby, .NET, Elixir, WASM, Dart, Kotlin Android, Swift, Zig, and C FFI use the same grammar set.
105
+ - **Ruby package** — Magnus-backed parser access with Ruby objects.
106
+
107
+ ## Installation
108
+
109
+ ```bash
110
+ gem install tree_sitter_language_pack
111
+ ```
112
+
113
+ ## Quick Start
114
+
115
+ See the [language guide](https://docs.tree-sitter-language-pack.kreuzberg.dev) for `ruby`-specific usage.
116
+
117
+ ## Features
118
+
119
+ - **300+ languages** — pre-compiled tree-sitter grammars covering every major programming language and many minor ones.
120
+ - **On-demand download + cache** — parsers fetched at first use; subsequent runs hit the local cache.
121
+ - **Code intelligence** — extract functions, classes, imports, exports, symbols, docstrings, and diagnostics with one API.
122
+ - **Syntax-aware chunking** — semantic chunks for RAG/LLM pipelines.
123
+ - **Polyglot bindings** — Rust core with native bindings for Python, TypeScript, Go, Java, C#, Ruby, PHP, Elixir, and WebAssembly via [alef](https://github.com/kreuzberg-dev/alef).
124
+
125
+ ## Documentation
126
+
127
+ - **[Documentation](https://docs.tree-sitter-language-pack.kreuzberg.dev)** -- Full docs and API reference
128
+ - **[GitHub Repository](https://github.com/kreuzberg-dev/tree-sitter-language-pack)** -- Source, issues, and discussions
129
+
130
+ ## Part of Kreuzberg.dev
131
+
132
+ - [Kreuzberg](https://github.com/kreuzberg-dev/kreuzberg) — document intelligence: text, tables, metadata from 90+ formats with optional OCR.
133
+ - [Kreuzberg Cloud](https://github.com/kreuzberg-dev/kreuzberg-cloud) — managed extraction API with SDKs, dashboards, and observability.
134
+ - [kreuzcrawl](https://github.com/kreuzberg-dev/kreuzcrawl) — web crawling and scraping with HTML→Markdown and headless-Chrome fallback.
135
+ - [html-to-markdown](https://github.com/kreuzberg-dev/html-to-markdown) — fast, lossless HTML→Markdown engine.
136
+ - [liter-llm](https://github.com/kreuzberg-dev/liter-llm) — universal LLM API client with native bindings for 14 languages and 143 providers.
137
+ - [alef](https://github.com/kreuzberg-dev/alef) — the polyglot binding generator that produces this README and all per-language bindings.
138
+ - [Discord](https://discord.gg/xt9WY3GnKR) — community, roadmap, announcements.
139
+
140
+ ## Contributing
141
+
142
+ Contributions are welcome! See [CONTRIBUTING.md](https://github.com/kreuzberg-dev/tree-sitter-language-pack/blob/main/CONTRIBUTING.md) for guidelines.
143
+
144
+ Join our [Discord community](https://discord.gg/xt9WY3GnKR) for questions and discussion.
145
+
146
+ ## License
147
+
148
+ MIT -- see [LICENSE](https://github.com/kreuzberg-dev/tree-sitter-language-pack/blob/main/LICENSE) for details.
data/Steepfile CHANGED
@@ -1,6 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  target :lib do
4
- signature 'sig'
5
- check 'lib'
4
+ signature "sig"
5
+ check "lib"
6
+ # The generated `lib/tree_sitter_language_pack/native.rb` carries inline Sorbet
7
+ # `sig { ... }` blocks on tagged-enum variant Data classes. Sorbet's runtime
8
+ # provides those via `extend T::Sig`, but Steep does not understand the
9
+ # extension (it relies on RBS, not Sorbet sigs) and reports
10
+ # `Type `self` does not have method `sig`` on every block. RBS coverage
11
+ # for the same surface lives in `sig/types.rbs`, so we steer Steep to the
12
+ # RBS file by ignoring the .rb.
13
+ ignore "lib/tree_sitter_language_pack/native.rb"
6
14
  end
@@ -1,46 +1,9 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:f1cee06ff8900ef2d958db4c1c0a5cb8d233243dfa3834f678c826f070dc817d
2
+ # alef:hash:4fd28d28be5d5c08a42b1adaeb8565a890c281118692f20fb4e08cfac6ad7fcf
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
- # Issues & docs: https://github.com/kreuzberg-dev/alef
6
5
  # frozen_string_literal: true
7
6
 
8
- require 'json'
9
- require 'ts_pack_core_rb'
10
-
11
- # Add accessor methods to Hash-based internally-tagged enum instances
12
- class Hash
13
- # Support internally-tagged enum accessors like format.excel, format.email, etc.
14
- # Also support direct field access like format.sheet_count
15
- # rubocop:disable Metrics/CyclomaticComplexity
16
- def method_missing(method_name, *args, &)
17
- # Try symbol key first (how Magnus converts JSON keys)
18
- return self[method_name] if key?(method_name)
19
-
20
- # Try string key
21
- return self[method_name.to_s] if key?(method_name.to_s)
22
-
23
- # Check if this hash has a 'format_type' field (indicating an internally-tagged enum)
24
- format_type = self[:format_type] || self['format_type']
25
- return super unless format_type
26
-
27
- # If the method name matches the format_type (snake_case), extract and return the variant's wrapped data
28
- # Internally-tagged enums store variant data in the '_0' field (from alef's struct variant conversion)
29
- # This allows format.excel to return the ExcelMetadata hash with sheet_count, sheet_names, etc.
30
- snake_case_method = method_name.to_s.downcase
31
- return self[:_0] || self['_0'] || self if snake_case_method == format_type.to_s.downcase
32
-
33
- super
34
- end
35
- # rubocop:enable Metrics/CyclomaticComplexity
36
-
37
- def respond_to_missing?(method_name, include_private = false)
38
- return true if key?(method_name) || key?(method_name.to_s)
39
-
40
- format_type = self[:format_type] || self['format_type']
41
- return false unless format_type
42
-
43
- snake_case_method = method_name.to_s.downcase
44
- snake_case_method == format_type.to_s.downcase || super
45
- end
46
- end
7
+ require "json"
8
+ require "sorbet-runtime"
9
+ require "ts_pack_core_rb"
@@ -1,10 +1,10 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:e0342931f53b51d8116ea5c8a064e3acc1783cc8acd0d0fe337866407e99f370
2
+ # alef:hash:4fd28d28be5d5c08a42b1adaeb8565a890c281118692f20fb4e08cfac6ad7fcf
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
- # Issues & docs: https://github.com/kreuzberg-dev/alef
6
5
  # frozen_string_literal: true
7
6
 
8
7
  module TreeSitterLanguagePack
9
- VERSION = '1.8.0.pre.rc.44'
8
+ ## The version string for this package.
9
+ VERSION = "1.9.0"
10
10
  end
@@ -1,13 +1,18 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:d6b009b67f6b2b81396e42b3a3a93d5a0ad50e680621fbda45fed6329de157df
2
+ # alef:hash:4fd28d28be5d5c08a42b1adaeb8565a890c281118692f20fb4e08cfac6ad7fcf
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
- # Issues & docs: https://github.com/kreuzberg-dev/alef
6
5
  # frozen_string_literal: true
7
6
 
8
- require_relative 'tree_sitter_language_pack/version'
9
- require_relative 'tree_sitter_language_pack/native'
7
+ require_relative "tree_sitter_language_pack/version"
8
+ require_relative "tree_sitter_language_pack/native"
10
9
 
10
+ # Top-level namespace for the TreeSitterLanguagePack Ruby binding.
11
+ #
12
+ # All type and function symbols are re-exported from the native extension
13
+ # loaded via `require_relative "tree_sitter_language_pack/native"`. See
14
+ # {file:README.md} for usage examples and the upstream documentation
15
+ # for the full API reference.
11
16
  module TreeSitterLanguagePack
12
17
  # Re-export all types and functions from native extension
13
18
  end
Binary file
data/sig/types.rbs CHANGED
@@ -1,19 +1,34 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:3cbc9b7f5a084fe4fe656040ca379c3bbdd7a966ad4403cba58d0411a75cce07
2
+ # alef:hash:4fd28d28be5d5c08a42b1adaeb8565a890c281118692f20fb4e08cfac6ad7fcf
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
- # Issues & docs: https://github.com/kreuzberg-dev/alef
6
5
 
7
6
  module TreeSitterLanguagePack
8
7
 
9
8
  VERSION: String
10
9
 
11
- class Span
12
- # Byte and line/column range in source code.
13
- #
14
- # Represents both byte offsets (for slicing) and human-readable line/column
15
- # positions (for display and diagnostics).
10
+ type json_value = Hash[String, untyped] | Array[untyped] | String | Integer | Float | bool | nil
11
+
12
+ class DataAttribute
13
+ attr_accessor name: String?
14
+ attr_accessor value: String?
15
+ attr_accessor span: Span?
16
+
17
+ def initialize: (?name: String, ?value: String, ?span: Span) -> void
18
+ end
19
+
20
+ class DataNode
21
+ attr_accessor kind: DataNodeKind?
22
+ attr_accessor key: String?
23
+ attr_accessor value: String?
24
+ attr_accessor attributes: Array[DataAttribute]?
25
+ attr_accessor children: Array[DataNode]?
26
+ attr_accessor span: Span?
16
27
 
28
+ def initialize: (?kind: DataNodeKind, ?key: String, ?value: String, ?attributes: Array[DataAttribute], ?children: Array[DataNode], ?span: Span) -> void
29
+ end
30
+
31
+ class Span
17
32
  attr_accessor start_byte: Integer?
18
33
  attr_accessor end_byte: Integer?
19
34
  attr_accessor start_line: Integer?
@@ -21,29 +36,10 @@ module TreeSitterLanguagePack
21
36
  attr_accessor end_line: Integer?
22
37
  attr_accessor end_column: Integer?
23
38
 
24
- def initialize: (start_byte: Integer, end_byte: Integer, start_line: Integer, start_column: Integer, end_line: Integer, end_column: Integer) -> void
39
+ def initialize: (?start_byte: Integer, ?end_byte: Integer, ?start_line: Integer, ?start_column: Integer, ?end_line: Integer, ?end_column: Integer) -> void
25
40
  end
26
41
 
27
42
  class ProcessResult
28
- # Complete analysis result from processing a source file.
29
- #
30
- # Contains metrics, structural analysis, imports/exports, comments,
31
- # docstrings, symbols, diagnostics, and optionally chunked code segments.
32
- # Fields are populated based on the `ProcessConfig` flags.
33
- #
34
- # # Fields
35
- #
36
- # - `language` - The language used for parsing
37
- # - `metrics` - Always computed: line counts, byte sizes, error counts
38
- # - `structure` - Functions, classes, structs (when `config.structure = true`)
39
- # - `imports` - Import statements (when `config.imports = true`)
40
- # - `exports` - Export statements (when `config.exports = true`)
41
- # - `comments` - Comments (when `config.comments = true`)
42
- # - `docstrings` - Docstrings (when `config.docstrings = true`)
43
- # - `symbols` - Symbol definitions (when `config.symbols = true`)
44
- # - `diagnostics` - Parse errors (when `config.diagnostics = true`)
45
- # - `chunks` - Chunked code segments (when `config.chunk_max_size` is set)
46
-
47
43
  attr_accessor language: String?
48
44
  attr_accessor metrics: FileMetrics?
49
45
  attr_accessor structure: Array[StructureItem]?
@@ -54,13 +50,12 @@ module TreeSitterLanguagePack
54
50
  attr_accessor symbols: Array[SymbolInfo]?
55
51
  attr_accessor diagnostics: Array[Diagnostic]?
56
52
  attr_accessor chunks: Array[CodeChunk]?
53
+ attr_accessor data: DataNode?
57
54
 
58
- def initialize: (language: String, metrics: FileMetrics, structure: Array[StructureItem], imports: Array[ImportInfo], exports: Array[ExportInfo], comments: Array[CommentInfo], docstrings: Array[DocstringInfo], symbols: Array[SymbolInfo], diagnostics: Array[Diagnostic], chunks: Array[CodeChunk]) -> void
55
+ def initialize: (?language: String, ?metrics: FileMetrics, ?structure: Array[StructureItem], ?imports: Array[ImportInfo], ?exports: Array[ExportInfo], ?comments: Array[CommentInfo], ?docstrings: Array[DocstringInfo], ?symbols: Array[SymbolInfo], ?diagnostics: Array[Diagnostic], ?chunks: Array[CodeChunk], ?data: DataNode) -> void
59
56
  end
60
57
 
61
58
  class FileMetrics
62
- # Aggregate metrics for a source file.
63
-
64
59
  attr_accessor total_lines: Integer?
65
60
  attr_accessor code_lines: Integer?
66
61
  attr_accessor comment_lines: Integer?
@@ -70,12 +65,10 @@ module TreeSitterLanguagePack
70
65
  attr_accessor error_count: Integer?
71
66
  attr_accessor max_depth: Integer?
72
67
 
73
- def initialize: (total_lines: Integer, code_lines: Integer, comment_lines: Integer, blank_lines: Integer, total_bytes: Integer, node_count: Integer, error_count: Integer, max_depth: Integer) -> void
68
+ def initialize: (?total_lines: Integer, ?code_lines: Integer, ?comment_lines: Integer, ?blank_lines: Integer, ?total_bytes: Integer, ?node_count: Integer, ?error_count: Integer, ?max_depth: Integer) -> void
74
69
  end
75
70
 
76
71
  class StructureItem
77
- # A structural item (function, class, struct, etc.) in source code.
78
-
79
72
  attr_accessor kind: StructureKind?
80
73
  attr_accessor name: String?
81
74
  attr_accessor visibility: String?
@@ -86,89 +79,73 @@ module TreeSitterLanguagePack
86
79
  attr_accessor signature: String?
87
80
  attr_accessor body_span: Span?
88
81
 
89
- def initialize: (kind: StructureKind, ?name: String, ?visibility: String, span: Span, children: Array[StructureItem], decorators: Array[String], ?doc_comment: String, ?signature: String, ?body_span: Span) -> void
82
+ def initialize: (?kind: StructureKind, ?name: String, ?visibility: String, ?span: Span, ?children: Array[StructureItem], ?decorators: Array[String], ?doc_comment: String, ?signature: String, ?body_span: Span) -> void
90
83
  end
91
84
 
92
85
  class CommentInfo
93
- # A comment extracted from source code.
94
-
95
86
  attr_accessor text: String?
96
87
  attr_accessor kind: CommentKind?
97
88
  attr_accessor span: Span?
98
89
  attr_accessor associated_node: String?
99
90
 
100
- def initialize: (text: String, kind: CommentKind, span: Span, ?associated_node: String) -> void
91
+ def initialize: (?text: String, ?kind: CommentKind, ?span: Span, ?associated_node: String) -> void
101
92
  end
102
93
 
103
94
  class DocstringInfo
104
- # A docstring extracted from source code.
105
-
106
95
  attr_accessor text: String?
107
96
  attr_accessor format: DocstringFormat?
108
97
  attr_accessor span: Span?
109
98
  attr_accessor associated_item: String?
110
99
  attr_accessor parsed_sections: Array[DocSection]?
111
100
 
112
- def initialize: (text: String, format: DocstringFormat, span: Span, ?associated_item: String, parsed_sections: Array[DocSection]) -> void
101
+ def initialize: (?text: String, ?format: DocstringFormat, ?span: Span, ?associated_item: String, ?parsed_sections: Array[DocSection]) -> void
113
102
  end
114
103
 
115
104
  class DocSection
116
- # A section within a docstring (e.g., Args, Returns, Raises).
117
-
118
105
  attr_accessor kind: String?
119
106
  attr_accessor name: String?
120
107
  attr_accessor description: String?
121
108
 
122
- def initialize: (kind: String, ?name: String, description: String) -> void
109
+ def initialize: (?kind: String, ?name: String, ?description: String) -> void
123
110
  end
124
111
 
125
112
  class ImportInfo
126
- # An import statement extracted from source code.
127
-
128
113
  attr_accessor source: String?
129
114
  attr_accessor items: Array[String]?
130
115
  attr_accessor alias: String?
131
116
  attr_accessor is_wildcard: bool?
132
117
  attr_accessor span: Span?
133
118
 
134
- def initialize: (source: String, items: Array[String], ?alias: String, is_wildcard: bool, span: Span) -> void
119
+ def initialize: (?source: String, ?items: Array[String], ?alias: String, ?is_wildcard: bool, ?span: Span) -> void
135
120
  end
136
121
 
137
122
  class ExportInfo
138
- # An export statement extracted from source code.
139
-
140
123
  attr_accessor name: String?
141
124
  attr_accessor kind: ExportKind?
142
125
  attr_accessor span: Span?
143
126
 
144
- def initialize: (name: String, kind: ExportKind, span: Span) -> void
127
+ def initialize: (?name: String, ?kind: ExportKind, ?span: Span) -> void
145
128
  end
146
129
 
147
130
  class SymbolInfo
148
- # A symbol (variable, function, type, etc.) extracted from source code.
149
-
150
131
  attr_accessor name: String?
151
132
  attr_accessor kind: SymbolKind?
152
133
  attr_accessor span: Span?
153
134
  attr_accessor type_annotation: String?
154
135
  attr_accessor doc: String?
155
136
 
156
- def initialize: (name: String, kind: SymbolKind, span: Span, ?type_annotation: String, ?doc: String) -> void
137
+ def initialize: (?name: String, ?kind: SymbolKind, ?span: Span, ?type_annotation: String, ?doc: String) -> void
157
138
  end
158
139
 
159
140
  class Diagnostic
160
- # A diagnostic (syntax error, missing node, etc.) from parsing.
161
-
162
141
  attr_accessor message: String?
163
142
  attr_accessor severity: DiagnosticSeverity?
164
143
  attr_accessor span: Span?
165
144
 
166
- def initialize: (message: String, severity: DiagnosticSeverity, span: Span) -> void
145
+ def initialize: (?message: String, ?severity: DiagnosticSeverity, ?span: Span) -> void
167
146
  end
168
147
 
169
148
  class CodeChunk
170
- # A chunk of source code with rich metadata.
171
-
172
149
  attr_accessor content: String?
173
150
  attr_accessor start_byte: Integer?
174
151
  attr_accessor end_byte: Integer?
@@ -176,12 +153,10 @@ module TreeSitterLanguagePack
176
153
  attr_accessor end_line: Integer?
177
154
  attr_accessor metadata: ChunkContext?
178
155
 
179
- def initialize: (content: String, start_byte: Integer, end_byte: Integer, start_line: Integer, end_line: Integer, metadata: ChunkContext) -> void
156
+ def initialize: (?content: String, ?start_byte: Integer, ?end_byte: Integer, ?start_line: Integer, ?end_line: Integer, ?metadata: ChunkContext) -> void
180
157
  end
181
158
 
182
159
  class ChunkContext
183
- # Metadata for a single chunk of source code.
184
-
185
160
  attr_accessor language: String?
186
161
  attr_accessor chunk_index: Integer?
187
162
  attr_accessor total_chunks: Integer?
@@ -192,57 +167,78 @@ module TreeSitterLanguagePack
192
167
  attr_accessor docstrings: Array[DocstringInfo]?
193
168
  attr_accessor has_error_nodes: bool?
194
169
 
195
- def initialize: (language: String, chunk_index: Integer, total_chunks: Integer, node_types: Array[String], context_path: Array[String], symbols_defined: Array[String], comments: Array[CommentInfo], docstrings: Array[DocstringInfo], has_error_nodes: bool) -> void
170
+ def initialize: (?language: String, ?chunk_index: Integer, ?total_chunks: Integer, ?node_types: Array[String], ?context_path: Array[String], ?symbols_defined: Array[String], ?comments: Array[CommentInfo], ?docstrings: Array[DocstringInfo], ?has_error_nodes: bool) -> void
196
171
  end
197
172
 
198
173
  class PackConfig
199
- # Configuration for the tree-sitter language pack.
200
- #
201
- # Controls cache directory and which languages to pre-download.
202
- # Can be loaded from a TOML file, constructed programmatically,
203
- # or passed as a dict/object from language bindings.
204
- #
205
- # # Example
206
- #
207
- # ```no_run
208
- # use tree_sitter_language_pack::PackConfig;
209
- #
210
- # let config = PackConfig {
211
- # cache_dir: None,
212
- # languages: Some(vec!["python".to_string(), "rust".to_string()]),
213
- # groups: None,
214
- # };
215
- # ```
216
-
217
174
  attr_accessor cache_dir: String?
218
175
  attr_accessor languages: Array[String]?
219
176
  attr_accessor groups: Array[String]?
220
177
 
221
178
  def initialize: (?cache_dir: String, ?languages: Array[String], ?groups: Array[String]) -> void
222
- def self.from_toml_file: (String path) -> PackConfig
223
- def self.discover: () -> PackConfig?
224
179
  end
225
180
 
226
- class ProcessConfig
227
- # Configuration for the `process()` function.
228
- #
229
- # Controls which analysis features are enabled and whether chunking is performed.
230
- #
231
- # # Examples
232
- #
233
- # ```
234
- # use tree_sitter_language_pack::ProcessConfig;
235
- #
236
- # // Defaults: structure + imports + exports enabled
237
- # let config = ProcessConfig::new("python");
238
- #
239
- # // With chunking
240
- # let config = ProcessConfig::new("python").with_chunking(1000);
241
- #
242
- # // Everything enabled
243
- # let config = ProcessConfig::new("python").all();
244
- # ```
181
+ class Point
182
+ attr_reader row: Integer
183
+ attr_reader column: Integer
184
+
185
+ def initialize: (row: Integer, column: Integer) -> void
186
+ end
187
+
188
+ class ByteRange
189
+ attr_reader start: Integer
190
+ attr_reader end: Integer
191
+
192
+ def initialize: (start: Integer, end: Integer) -> void
193
+ end
194
+
195
+ class Parser
196
+ def set_language: (String name) -> void
197
+ def parse: (String source) -> Tree?
198
+ def parse_bytes: (String source) -> Tree?
199
+ def reset: () -> void
200
+ def self.new: () -> Parser
201
+ def self.default: () -> Parser
202
+ end
245
203
 
204
+ class Tree
205
+ def root_node: () -> Node
206
+ def walk: () -> TreeCursor
207
+ end
208
+
209
+ class Node
210
+ def clone: () -> Node
211
+ def kind: () -> String
212
+ def kind_id: () -> Integer
213
+ def start_byte: () -> Integer
214
+ def end_byte: () -> Integer
215
+ def byte_range: () -> ByteRange
216
+ def start_position: () -> Point
217
+ def end_position: () -> Point
218
+ def is_named: () -> bool
219
+ def is_error: () -> bool
220
+ def is_missing: () -> bool
221
+ def is_extra: () -> bool
222
+ def has_error: () -> bool
223
+ def parent: () -> Node?
224
+ def child: (Integer index) -> Node?
225
+ def child_count: () -> Integer
226
+ def named_child: (Integer index) -> Node?
227
+ def named_child_count: () -> Integer
228
+ def child_by_field_name: (String name) -> Node?
229
+ def to_sexp: () -> String
230
+ def walk: () -> TreeCursor
231
+ end
232
+
233
+ class TreeCursor
234
+ def node: () -> Node
235
+ def goto_first_child: () -> bool
236
+ def goto_parent: () -> bool
237
+ def goto_next_sibling: () -> bool
238
+ def field_name: () -> String?
239
+ end
240
+
241
+ class ProcessConfig
246
242
  attr_accessor language: String?
247
243
  attr_accessor structure: bool?
248
244
  attr_accessor imports: bool?
@@ -252,140 +248,60 @@ module TreeSitterLanguagePack
252
248
  attr_accessor symbols: bool?
253
249
  attr_accessor diagnostics: bool?
254
250
  attr_accessor chunk_max_size: Integer?
251
+ attr_accessor data_extraction: bool?
255
252
 
256
- def initialize: (language: String, structure: bool, imports: bool, exports: bool, comments: bool, docstrings: bool, symbols: bool, diagnostics: bool, ?chunk_max_size: Integer) -> void
253
+ def initialize: (?language: String, ?structure: bool, ?imports: bool, ?exports: bool, ?comments: bool, ?docstrings: bool, ?symbols: bool, ?diagnostics: bool, ?chunk_max_size: Integer, ?data_extraction: bool) -> void
257
254
  def with_chunking: (Integer max_size) -> ProcessConfig
258
255
  def all: () -> ProcessConfig
259
256
  def minimal: () -> ProcessConfig
257
+ def with_data_extraction: (bool enabled) -> ProcessConfig
260
258
  def self.default: () -> ProcessConfig
261
259
  end
262
260
 
263
261
  class LanguageRegistry
264
- # Thread-safe registry of tree-sitter language parsers.
265
- #
266
- # Manages both statically compiled and dynamically loaded language grammars.
267
- # Use [`LanguageRegistry::new()`] for the default registry, or access the
268
- # global instance via the module-level convenience functions
269
- # (`get_language`, `available_languages`, etc.).
270
- #
271
- # # Example
272
- #
273
- # ```no_run
274
- # use tree_sitter_language_pack::{LanguageRegistry, ProcessConfig};
275
- #
276
- # let registry = LanguageRegistry::new();
277
- # let langs = registry.available_languages();
278
- # println!("Available: {:?}", langs);
279
- #
280
- # let config = ProcessConfig::new("python").all();
281
- # let result = registry.process("def hello(): pass", &config).unwrap();
282
- # println!("Structure: {:?}", result.structure);
283
- # ```
284
-
285
- def add_extra_libs_dir: (String dir) -> void
286
262
  def get_language: (String name) -> Language
287
263
  def available_languages: () -> Array[String]
264
+ def has_parser: (String name) -> bool
288
265
  def has_language: (String name) -> bool
289
266
  def language_count: () -> Integer
290
267
  def process: (String source, ProcessConfig config) -> ProcessResult
291
- def self.with_libs_dir: (String libs_dir) -> LanguageRegistry
268
+ def self.new: () -> LanguageRegistry
292
269
  def self.default: () -> LanguageRegistry
293
270
  end
294
271
 
295
- class ParserManifest
296
- # Manifest describing available parser downloads for a specific version.
297
-
298
- attr_reader version: String
299
- attr_reader platforms: Hash[String, PlatformBundle]
300
- attr_reader languages: Hash[String, LanguageInfo]
301
- attr_reader groups: Hash[String, Array[String]]
302
-
303
- def initialize: (version: String, platforms: Hash[String, PlatformBundle], languages: Hash[String, LanguageInfo], groups: Hash[String, Array[String]]) -> void
304
- end
305
-
306
- class PlatformBundle
307
- attr_reader url: String
308
- attr_reader sha256: String
309
- attr_reader size: Integer
310
-
311
- def initialize: (url: String, sha256: String, size: Integer) -> void
312
- end
313
-
314
- class LanguageInfo
315
- attr_reader group: String
316
- attr_reader size: Integer
317
-
318
- def initialize: (group: String, size: Integer) -> void
319
- end
320
-
321
272
  class DownloadManager
322
- # Manages downloading and caching of pre-built parser shared libraries.
323
-
324
- def cache_dir: () -> String
325
273
  def installed_languages: () -> Array[String]
326
- def ensure_languages: (Array[String] names) -> void
327
- def ensure_group: (String group) -> void
328
- def lib_path: (String name) -> String
329
- def fetch_manifest: () -> ParserManifest
330
274
  def download_all_best_effort: () -> Integer
331
275
  def clean_cache: () -> void
332
276
  def self.new: (String version) -> DownloadManager
333
- def self.with_cache_dir: (String version, String cache_dir) -> DownloadManager
334
- def self.default_cache_dir: (String version) -> String
335
277
  end
336
278
 
337
279
  class Language
338
280
  end
339
281
 
340
- class Parser
341
- end
342
-
343
- class Tree
282
+ class DataNodeKind
283
+ type value = :key_value | :element | :sequence
344
284
  end
345
285
 
346
286
  class StructureKind
347
- # The kind of structural item found in source code.
348
- #
349
- # Categorizes top-level and nested declarations such as functions, classes,
350
- # structs, enums, traits, and more. Use [`Other`](StructureKind::Other) for
351
- # language-specific constructs that do not fit a standard category.
352
287
  end
353
288
 
354
289
  class CommentKind
355
- # The kind of a comment found in source code.
356
- #
357
- # Distinguishes between single-line comments, block (multi-line) comments,
358
- # and documentation comments.
359
- type instance = :line | :block | :doc
290
+ type value = :line | :block | :doc
360
291
  end
361
292
 
362
293
  class DocstringFormat
363
- # The format of a docstring extracted from source code.
364
- #
365
- # Identifies the docstring convention used, which varies by language
366
- # (e.g., Python triple-quoted strings, JSDoc, Rustdoc `///` comments).
367
294
  end
368
295
 
369
296
  class ExportKind
370
- # The kind of an export statement found in source code.
371
- #
372
- # Covers named exports, default exports, and re-exports from other modules.
373
- type instance = :named | :default | :re_export
297
+ type value = :named | :default | :re_export
374
298
  end
375
299
 
376
300
  class SymbolKind
377
- # The kind of a symbol definition found in source code.
378
- #
379
- # Categorizes symbol definitions such as variables, constants, functions,
380
- # classes, types, interfaces, enums, and modules.
381
301
  end
382
302
 
383
303
  class DiagnosticSeverity
384
- # Severity level of a diagnostic produced during parsing.
385
- #
386
- # Used to classify parse errors, warnings, and informational messages
387
- # found in the syntax tree.
388
- type instance = :error | :warning | :info
304
+ type value = :error | :warning | :info
389
305
  end
390
306
 
391
307
  def self.detect_language_from_extension: (String ext) -> String?
@@ -400,6 +316,8 @@ module TreeSitterLanguagePack
400
316
 
401
317
  def self.get_locals_query: (String language) -> String?
402
318
 
319
+ def self.get_tags_query: (String language) -> String?
320
+
403
321
  def self.get_language: (String name) -> Language
404
322
 
405
323
  def self.get_parser: (String name) -> Parser
@@ -422,6 +340,8 @@ module TreeSitterLanguagePack
422
340
 
423
341
  def self.download_all: () -> Integer
424
342
 
343
+ def self.download_group: (String name) -> Integer
344
+
425
345
  def self.manifest_languages: () -> Array[String]
426
346
 
427
347
  def self.downloaded_languages: () -> Array[String]
metadata CHANGED
@@ -1,19 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_sitter_language_pack
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0.pre.rc.44
4
+ version: 1.9.0
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Kreuzberg Team
8
8
  bindir: bin
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
11
- dependencies: []
12
- description: Pre-compiled tree-sitter grammars for 305 programming languages
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: sorbet-runtime
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '0.5'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '0.5'
26
+ description: Pre-compiled tree-sitter grammars for 306 programming languages
13
27
  executables: []
14
28
  extensions: []
15
29
  extra_rdoc_files: []
16
30
  files:
31
+ - LICENSE
32
+ - README.md
17
33
  - Steepfile
18
34
  - lib/tree_sitter_language_pack.rb
19
35
  - lib/tree_sitter_language_pack/native.rb
@@ -24,7 +40,7 @@ homepage: https://github.com/kreuzberg-dev/tree-sitter-language-pack
24
40
  licenses:
25
41
  - MIT
26
42
  metadata:
27
- keywords: tree-sitter,parser,syntax,language-pack
43
+ keywords: language-pack,parser,syntax,tree-sitter
28
44
  rubygems_mfa_required: 'true'
29
45
  rdoc_options: []
30
46
  require_paths:
@@ -34,6 +50,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
34
50
  - - ">="
35
51
  - !ruby/object:Gem::Version
36
52
  version: 3.2.0
53
+ - - "<"
54
+ - !ruby/object:Gem::Version
55
+ version: '4.0'
37
56
  required_rubygems_version: !ruby/object:Gem::Requirement
38
57
  requirements:
39
58
  - - ">="
@@ -42,5 +61,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
61
  requirements: []
43
62
  rubygems_version: 3.6.9
44
63
  specification_version: 4
45
- summary: Pre-compiled tree-sitter grammars for 305 programming languages
64
+ summary: Pre-compiled tree-sitter grammars for 306 programming languages
46
65
  test_files: []