tree_sitter_language_pack 1.8.1-aarch64-linux → 1.9.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9ea99ac7e66e345d26a2cae0c427c90ef70d74d855736eafdbb24e2de7f9c8d7
4
- data.tar.gz: 6cf990d5188accccf34b62b01378bf868a58305f621628fb17d53ef11e90d870
3
+ metadata.gz: eca50798376373f2efbc9e5106deefcd3549c2d5bc34691717b4d02eaad20bee
4
+ data.tar.gz: 91179dbd223f0e63402ee29bea77d8221aec757becfb682bafafc1feb729ca75
5
5
  SHA512:
6
- metadata.gz: 41f784914f5baea3d7b9a90b9b1f7c5c22ce7d855220650b50f818b69445a69488ac6313a8c612ac84cf868e19335eb66605b99d143f29c9e21036332164c3f6
7
- data.tar.gz: db82e8818527c0bde37490ba829e0685a843bbd06a7d5a6b352a7db185a8940d6d5ed64efd642beec7f9ed577d2826ac4fc5acaee25f971789e518db2112e303
6
+ metadata.gz: 53a0ef69cff675b01462a4e59a17f66b453c2ab80ec8f42920b765cdf4b0b814178aff4e2053095a5724aaf04d72fd0eccd444a354b834babe6f5a01fde61db9
7
+ data.tar.gz: 8912aac5e70f0da34f0ae6af85013f7c2ee9928791b7de9bb9b087c5d794b464079711481bc70eaff3fcaf12559e70a58441ef455845234085d3e1157144e374
data/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright 2025-2026 Na'aman Hirschfeld
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,148 @@
1
+ # Ruby
2
+
3
+ <div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0">
4
+ <a href="https://github.com/kreuzberg-dev/alef">
5
+ <img src="https://img.shields.io/badge/Bindings-alef%20%D7%90-007ec6" alt="Bindings" />
6
+ </a>
7
+ <!-- Language Bindings -->
8
+ <a href="https://crates.io/crates/tree-sitter-language-pack">
9
+ <img src="https://img.shields.io/crates/v/tree-sitter-language-pack?label=Rust&color=007ec6" alt="Rust" />
10
+ </a>
11
+ <a href="https://pypi.org/project/tree-sitter-language-pack/">
12
+ <img src="https://img.shields.io/pypi/v/tree-sitter-language-pack?label=Python&color=007ec6" alt="Python" />
13
+ </a>
14
+ <a href="https://www.npmjs.com/package/@kreuzberg/tree-sitter-language-pack">
15
+ <img
16
+ src="https://img.shields.io/npm/v/@kreuzberg/tree-sitter-language-pack?label=Node.js&color=007ec6"
17
+ alt="Node.js"
18
+ />
19
+ </a>
20
+ <a href="https://www.npmjs.com/package/@kreuzberg/tree-sitter-language-pack-wasm">
21
+ <img
22
+ src="https://img.shields.io/npm/v/@kreuzberg/tree-sitter-language-pack-wasm?label=WASM&color=007ec6"
23
+ alt="WASM"
24
+ />
25
+ </a>
26
+ <a href="https://central.sonatype.com/artifact/dev.kreuzberg.treesitterlanguagepack/tree-sitter-language-pack">
27
+ <img
28
+ src="https://img.shields.io/maven-central/v/dev.kreuzberg.treesitterlanguagepack/tree-sitter-language-pack?label=Java&color=007ec6"
29
+ alt="Java"
30
+ />
31
+ </a>
32
+ <a href="https://pkg.go.dev/github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go">
33
+ <img
34
+ src="https://img.shields.io/github/v/tag/kreuzberg-dev/tree-sitter-language-pack?label=Go&color=007ec6"
35
+ alt="Go"
36
+ />
37
+ </a>
38
+ <a href="https://www.nuget.org/packages/TreeSitterLanguagePack/">
39
+ <img src="https://img.shields.io/nuget/v/TreeSitterLanguagePack?label=C%23&color=007ec6" alt="C#" />
40
+ </a>
41
+ <a href="https://packagist.org/packages/kreuzberg-dev/tree-sitter-language-pack">
42
+ <img
43
+ src="https://img.shields.io/packagist/v/kreuzberg-dev/tree-sitter-language-pack?label=PHP&color=007ec6"
44
+ alt="PHP"
45
+ />
46
+ </a>
47
+ <a href="https://rubygems.org/gems/tree_sitter_language_pack">
48
+ <img src="https://img.shields.io/gem/v/tree_sitter_language_pack?label=Ruby&color=007ec6" alt="Ruby" />
49
+ </a>
50
+ <a href="https://hex.pm/packages/tree_sitter_language_pack">
51
+ <img src="https://img.shields.io/hexpm/v/tree_sitter_language_pack?label=Elixir&color=007ec6" alt="Elixir" />
52
+ </a>
53
+ <a href="https://pub.dev/packages/tree_sitter_language_pack">
54
+ <img src="https://img.shields.io/pub/v/tree_sitter_language_pack?label=Dart&color=007ec6" alt="Dart" />
55
+ </a>
56
+ <a href="https://central.sonatype.com/artifact/dev.kreuzberg.tslp.android/tree-sitter-language-pack-android">
57
+ <img
58
+ src="https://img.shields.io/maven-central/v/dev.kreuzberg.tslp.android/tree-sitter-language-pack-android?label=Kotlin&color=007ec6"
59
+ alt="Kotlin"
60
+ />
61
+ </a>
62
+ <a href="https://github.com/kreuzberg-dev/tree-sitter-language-pack/tree/main/packages/swift">
63
+ <img src="https://img.shields.io/badge/Swift-SPM-007ec6" alt="Swift" />
64
+ </a>
65
+ <a href="https://github.com/kreuzberg-dev/tree-sitter-language-pack/tree/main/packages/zig">
66
+ <img src="https://img.shields.io/badge/Zig-package-007ec6" alt="Zig" />
67
+ </a>
68
+ <a href="https://github.com/kreuzberg-dev/tree-sitter-language-pack/releases">
69
+ <img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C FFI" />
70
+ </a>
71
+
72
+ <!-- Project Info -->
73
+ <a href="https://github.com/kreuzberg-dev/tree-sitter-language-pack/blob/main/LICENSE">
74
+ <img src="https://img.shields.io/badge/License-MIT-007ec6" alt="License" />
75
+ </a>
76
+ <a href="https://docs.tree-sitter-language-pack.kreuzberg.dev">
77
+ <img src="https://img.shields.io/badge/Docs-tree--sitter--language--pack-007ec6" alt="Documentation" />
78
+ </a>
79
+ </div>
80
+
81
+ <div align="center" style="margin: 24px 0 0">
82
+ <a href="https://kreuzberg.dev">
83
+ <img alt="tree-sitter-language-pack" src="https:&#x2f;&#x2f;github.com&#x2f;user-attachments&#x2f;assets&#x2f;478a83da-237b-446b-b3a8-e564c13e00a8" />
84
+ </a>
85
+ </div>
86
+
87
+ <div align="center" style="display: flex; flex-wrap: wrap; gap: 12px; justify-content: center; margin: 28px 0 24px">
88
+ <a href="https://discord.gg/xt9WY3GnKR">
89
+ <img
90
+ height="22"
91
+ src="https://img.shields.io/badge/Discord-Chat-007ec6?logo=discord&logoColor=white"
92
+ alt="Join Discord"
93
+ />
94
+ </a>
95
+ </div>
96
+
97
+ Pre-compiled tree-sitter grammars for 306 programming languages with Ruby bindings via Magnus.
98
+
99
+ ## What This Package Provides
100
+
101
+ - **Parser access** — load a tree-sitter language parser by name without wiring individual grammar crates or packages.
102
+ - **Code intelligence primitives** — parse trees, functions, classes, imports, exports, symbols, docstrings, diagnostics, and syntax-aware chunks.
103
+ - **Shared cache model** — parsers are fetched and cached once, then reused by every call in the process.
104
+ - **Same catalog as every binding** — Rust, Python, Node.js, Go, Java, PHP, Ruby, .NET, Elixir, WASM, Dart, Kotlin Android, Swift, Zig, and C FFI use the same grammar set.
105
+ - **Ruby package** — Magnus-backed parser access with Ruby objects.
106
+
107
+ ## Installation
108
+
109
+ ```bash
110
+ gem install tree_sitter_language_pack
111
+ ```
112
+
113
+ ## Quick Start
114
+
115
+ See the [language guide](https://docs.tree-sitter-language-pack.kreuzberg.dev) for `ruby`-specific usage.
116
+
117
+ ## Features
118
+
119
+ - **300+ languages** — pre-compiled tree-sitter grammars covering every major programming language and many minor ones.
120
+ - **On-demand download + cache** — parsers fetched at first use; subsequent runs hit the local cache.
121
+ - **Code intelligence** — extract functions, classes, imports, exports, symbols, docstrings, and diagnostics with one API.
122
+ - **Syntax-aware chunking** — semantic chunks for RAG/LLM pipelines.
123
+ - **Polyglot bindings** — Rust core with native bindings for Python, TypeScript, Go, Java, C#, Ruby, PHP, Elixir, and WebAssembly via [alef](https://github.com/kreuzberg-dev/alef).
124
+
125
+ ## Documentation
126
+
127
+ - **[Documentation](https://docs.tree-sitter-language-pack.kreuzberg.dev)** -- Full docs and API reference
128
+ - **[GitHub Repository](https://github.com/kreuzberg-dev/tree-sitter-language-pack)** -- Source, issues, and discussions
129
+
130
+ ## Part of Kreuzberg.dev
131
+
132
+ - [Kreuzberg](https://github.com/kreuzberg-dev/kreuzberg) — document intelligence: text, tables, metadata from 90+ formats with optional OCR.
133
+ - [Kreuzberg Cloud](https://github.com/kreuzberg-dev/kreuzberg-cloud) — managed extraction API with SDKs, dashboards, and observability.
134
+ - [kreuzcrawl](https://github.com/kreuzberg-dev/kreuzcrawl) — web crawling and scraping with HTML→Markdown and headless-Chrome fallback.
135
+ - [html-to-markdown](https://github.com/kreuzberg-dev/html-to-markdown) — fast, lossless HTML→Markdown engine.
136
+ - [liter-llm](https://github.com/kreuzberg-dev/liter-llm) — universal LLM API client with native bindings for 14 languages and 143 providers.
137
+ - [alef](https://github.com/kreuzberg-dev/alef) — the polyglot binding generator that produces this README and all per-language bindings.
138
+ - [Discord](https://discord.gg/xt9WY3GnKR) — community, roadmap, announcements.
139
+
140
+ ## Contributing
141
+
142
+ Contributions are welcome! See [CONTRIBUTING.md](https://github.com/kreuzberg-dev/tree-sitter-language-pack/blob/main/CONTRIBUTING.md) for guidelines.
143
+
144
+ Join our [Discord community](https://discord.gg/xt9WY3GnKR) for questions and discussion.
145
+
146
+ ## License
147
+
148
+ MIT -- see [LICENSE](https://github.com/kreuzberg-dev/tree-sitter-language-pack/blob/main/LICENSE) for details.
data/Steepfile CHANGED
@@ -1,6 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  target :lib do
4
- signature 'sig'
5
- check 'lib'
4
+ signature "sig"
5
+ check "lib"
6
+ # The generated `lib/tree_sitter_language_pack/native.rb` carries inline Sorbet
7
+ # `sig { ... }` blocks on tagged-enum variant Data classes. Sorbet's runtime
8
+ # provides those via `extend T::Sig`, but Steep does not understand the
9
+ # extension (it relies on RBS, not Sorbet sigs) and reports
10
+ # `Type `self` does not have method `sig`` on every block. RBS coverage
11
+ # for the same surface lives in `sig/types.rbs`, so we steer Steep to the
12
+ # RBS file by ignoring the .rb.
13
+ ignore "lib/tree_sitter_language_pack/native.rb"
6
14
  end
@@ -1,46 +1,9 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:8b6fde19410058828311dde84042fe181eb3e1d609f3f53d64b4f7127db9e6af
2
+ # alef:hash:4fd28d28be5d5c08a42b1adaeb8565a890c281118692f20fb4e08cfac6ad7fcf
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
- # Issues & docs: https://github.com/kreuzberg-dev/alef
6
5
  # frozen_string_literal: true
7
6
 
8
- require 'json'
9
- require 'ts_pack_core_rb'
10
-
11
- # Add accessor methods to Hash-based internally-tagged enum instances
12
- class Hash
13
- # Support internally-tagged enum accessors like format.excel, format.email, etc.
14
- # Also support direct field access like format.sheet_count
15
- # rubocop:disable Metrics/CyclomaticComplexity
16
- def method_missing(method_name, *args, &)
17
- # Try symbol key first (how Magnus converts JSON keys)
18
- return self[method_name] if key?(method_name)
19
-
20
- # Try string key
21
- return self[method_name.to_s] if key?(method_name.to_s)
22
-
23
- # Check if this hash has a 'format_type' field (indicating an internally-tagged enum)
24
- format_type = self[:format_type] || self['format_type']
25
- return super unless format_type
26
-
27
- # If the method name matches the format_type (snake_case), extract and return the variant's wrapped data
28
- # Internally-tagged enums store variant data in the '_0' field (from alef's struct variant conversion)
29
- # This allows format.excel to return the ExcelMetadata hash with sheet_count, sheet_names, etc.
30
- snake_case_method = method_name.to_s.downcase
31
- return self[:_0] || self['_0'] || self if snake_case_method == format_type.to_s.downcase
32
-
33
- super
34
- end
35
- # rubocop:enable Metrics/CyclomaticComplexity
36
-
37
- def respond_to_missing?(method_name, include_private = false)
38
- return true if key?(method_name) || key?(method_name.to_s)
39
-
40
- format_type = self[:format_type] || self['format_type']
41
- return false unless format_type
42
-
43
- snake_case_method = method_name.to_s.downcase
44
- snake_case_method == format_type.to_s.downcase || super
45
- end
46
- end
7
+ require "json"
8
+ require "sorbet-runtime"
9
+ require "ts_pack_core_rb"
@@ -1,10 +1,10 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:d113f70858d98c7a984fcf099853b9c37b45fde94c1174988819b8516cc5d8fc
2
+ # alef:hash:4fd28d28be5d5c08a42b1adaeb8565a890c281118692f20fb4e08cfac6ad7fcf
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
- # Issues & docs: https://github.com/kreuzberg-dev/alef
6
5
  # frozen_string_literal: true
7
6
 
8
7
  module TreeSitterLanguagePack
9
- VERSION = '1.8.1'
8
+ ## The version string for this package.
9
+ VERSION = "1.9.0"
10
10
  end
@@ -1,13 +1,18 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:efdf5956fee9ce2f8507e58e07da47d0d28648f54484df7d7611d6a9272c9792
2
+ # alef:hash:4fd28d28be5d5c08a42b1adaeb8565a890c281118692f20fb4e08cfac6ad7fcf
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
- # Issues & docs: https://github.com/kreuzberg-dev/alef
6
5
  # frozen_string_literal: true
7
6
 
8
- require_relative 'tree_sitter_language_pack/version'
9
- require_relative 'tree_sitter_language_pack/native'
7
+ require_relative "tree_sitter_language_pack/version"
8
+ require_relative "tree_sitter_language_pack/native"
10
9
 
10
+ # Top-level namespace for the TreeSitterLanguagePack Ruby binding.
11
+ #
12
+ # All type and function symbols are re-exported from the native extension
13
+ # loaded via `require_relative "tree_sitter_language_pack/native"`. See
14
+ # {file:README.md} for usage examples and the upstream documentation
15
+ # for the full API reference.
11
16
  module TreeSitterLanguagePack
12
17
  # Re-export all types and functions from native extension
13
18
  end
Binary file
data/sig/types.rbs CHANGED
@@ -1,19 +1,34 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:82d69edc97fafaeafa8b66758fcd6fb121764169c5322d85640bb7e73f8c6817
2
+ # alef:hash:4fd28d28be5d5c08a42b1adaeb8565a890c281118692f20fb4e08cfac6ad7fcf
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
- # Issues & docs: https://github.com/kreuzberg-dev/alef
6
5
 
7
6
  module TreeSitterLanguagePack
8
7
 
9
8
  VERSION: String
10
9
 
11
- class Span
12
- # Byte and line/column range in source code.
13
- #
14
- # Represents both byte offsets (for slicing) and human-readable line/column
15
- # positions (for display and diagnostics).
10
+ type json_value = Hash[String, untyped] | Array[untyped] | String | Integer | Float | bool | nil
11
+
12
+ class DataAttribute
13
+ attr_accessor name: String?
14
+ attr_accessor value: String?
15
+ attr_accessor span: Span?
16
+
17
+ def initialize: (?name: String, ?value: String, ?span: Span) -> void
18
+ end
16
19
 
20
+ class DataNode
21
+ attr_accessor kind: DataNodeKind?
22
+ attr_accessor key: String?
23
+ attr_accessor value: String?
24
+ attr_accessor attributes: Array[DataAttribute]?
25
+ attr_accessor children: Array[DataNode]?
26
+ attr_accessor span: Span?
27
+
28
+ def initialize: (?kind: DataNodeKind, ?key: String, ?value: String, ?attributes: Array[DataAttribute], ?children: Array[DataNode], ?span: Span) -> void
29
+ end
30
+
31
+ class Span
17
32
  attr_accessor start_byte: Integer?
18
33
  attr_accessor end_byte: Integer?
19
34
  attr_accessor start_line: Integer?
@@ -21,29 +36,10 @@ module TreeSitterLanguagePack
21
36
  attr_accessor end_line: Integer?
22
37
  attr_accessor end_column: Integer?
23
38
 
24
- def initialize: (start_byte: Integer, end_byte: Integer, start_line: Integer, start_column: Integer, end_line: Integer, end_column: Integer) -> void
39
+ def initialize: (?start_byte: Integer, ?end_byte: Integer, ?start_line: Integer, ?start_column: Integer, ?end_line: Integer, ?end_column: Integer) -> void
25
40
  end
26
41
 
27
42
  class ProcessResult
28
- # Complete analysis result from processing a source file.
29
- #
30
- # Contains metrics, structural analysis, imports/exports, comments,
31
- # docstrings, symbols, diagnostics, and optionally chunked code segments.
32
- # Fields are populated based on the `ProcessConfig` flags.
33
- #
34
- # # Fields
35
- #
36
- # - `language` - The language used for parsing
37
- # - `metrics` - Always computed: line counts, byte sizes, error counts
38
- # - `structure` - Functions, classes, structs (when `config.structure = true`)
39
- # - `imports` - Import statements (when `config.imports = true`)
40
- # - `exports` - Export statements (when `config.exports = true`)
41
- # - `comments` - Comments (when `config.comments = true`)
42
- # - `docstrings` - Docstrings (when `config.docstrings = true`)
43
- # - `symbols` - Symbol definitions (when `config.symbols = true`)
44
- # - `diagnostics` - Parse errors (when `config.diagnostics = true`)
45
- # - `chunks` - Chunked code segments (when `config.chunk_max_size` is set)
46
-
47
43
  attr_accessor language: String?
48
44
  attr_accessor metrics: FileMetrics?
49
45
  attr_accessor structure: Array[StructureItem]?
@@ -54,13 +50,12 @@ module TreeSitterLanguagePack
54
50
  attr_accessor symbols: Array[SymbolInfo]?
55
51
  attr_accessor diagnostics: Array[Diagnostic]?
56
52
  attr_accessor chunks: Array[CodeChunk]?
53
+ attr_accessor data: DataNode?
57
54
 
58
- def initialize: (language: String, metrics: FileMetrics, structure: Array[StructureItem], imports: Array[ImportInfo], exports: Array[ExportInfo], comments: Array[CommentInfo], docstrings: Array[DocstringInfo], symbols: Array[SymbolInfo], diagnostics: Array[Diagnostic], chunks: Array[CodeChunk]) -> void
55
+ def initialize: (?language: String, ?metrics: FileMetrics, ?structure: Array[StructureItem], ?imports: Array[ImportInfo], ?exports: Array[ExportInfo], ?comments: Array[CommentInfo], ?docstrings: Array[DocstringInfo], ?symbols: Array[SymbolInfo], ?diagnostics: Array[Diagnostic], ?chunks: Array[CodeChunk], ?data: DataNode) -> void
59
56
  end
60
57
 
61
58
  class FileMetrics
62
- # Aggregate metrics for a source file.
63
-
64
59
  attr_accessor total_lines: Integer?
65
60
  attr_accessor code_lines: Integer?
66
61
  attr_accessor comment_lines: Integer?
@@ -70,12 +65,10 @@ module TreeSitterLanguagePack
70
65
  attr_accessor error_count: Integer?
71
66
  attr_accessor max_depth: Integer?
72
67
 
73
- def initialize: (total_lines: Integer, code_lines: Integer, comment_lines: Integer, blank_lines: Integer, total_bytes: Integer, node_count: Integer, error_count: Integer, max_depth: Integer) -> void
68
+ def initialize: (?total_lines: Integer, ?code_lines: Integer, ?comment_lines: Integer, ?blank_lines: Integer, ?total_bytes: Integer, ?node_count: Integer, ?error_count: Integer, ?max_depth: Integer) -> void
74
69
  end
75
70
 
76
71
  class StructureItem
77
- # A structural item (function, class, struct, etc.) in source code.
78
-
79
72
  attr_accessor kind: StructureKind?
80
73
  attr_accessor name: String?
81
74
  attr_accessor visibility: String?
@@ -86,89 +79,73 @@ module TreeSitterLanguagePack
86
79
  attr_accessor signature: String?
87
80
  attr_accessor body_span: Span?
88
81
 
89
- def initialize: (kind: StructureKind, ?name: String, ?visibility: String, span: Span, children: Array[StructureItem], decorators: Array[String], ?doc_comment: String, ?signature: String, ?body_span: Span) -> void
82
+ def initialize: (?kind: StructureKind, ?name: String, ?visibility: String, ?span: Span, ?children: Array[StructureItem], ?decorators: Array[String], ?doc_comment: String, ?signature: String, ?body_span: Span) -> void
90
83
  end
91
84
 
92
85
  class CommentInfo
93
- # A comment extracted from source code.
94
-
95
86
  attr_accessor text: String?
96
87
  attr_accessor kind: CommentKind?
97
88
  attr_accessor span: Span?
98
89
  attr_accessor associated_node: String?
99
90
 
100
- def initialize: (text: String, kind: CommentKind, span: Span, ?associated_node: String) -> void
91
+ def initialize: (?text: String, ?kind: CommentKind, ?span: Span, ?associated_node: String) -> void
101
92
  end
102
93
 
103
94
  class DocstringInfo
104
- # A docstring extracted from source code.
105
-
106
95
  attr_accessor text: String?
107
96
  attr_accessor format: DocstringFormat?
108
97
  attr_accessor span: Span?
109
98
  attr_accessor associated_item: String?
110
99
  attr_accessor parsed_sections: Array[DocSection]?
111
100
 
112
- def initialize: (text: String, format: DocstringFormat, span: Span, ?associated_item: String, parsed_sections: Array[DocSection]) -> void
101
+ def initialize: (?text: String, ?format: DocstringFormat, ?span: Span, ?associated_item: String, ?parsed_sections: Array[DocSection]) -> void
113
102
  end
114
103
 
115
104
  class DocSection
116
- # A section within a docstring (e.g., Args, Returns, Raises).
117
-
118
105
  attr_accessor kind: String?
119
106
  attr_accessor name: String?
120
107
  attr_accessor description: String?
121
108
 
122
- def initialize: (kind: String, ?name: String, description: String) -> void
109
+ def initialize: (?kind: String, ?name: String, ?description: String) -> void
123
110
  end
124
111
 
125
112
  class ImportInfo
126
- # An import statement extracted from source code.
127
-
128
113
  attr_accessor source: String?
129
114
  attr_accessor items: Array[String]?
130
115
  attr_accessor alias: String?
131
116
  attr_accessor is_wildcard: bool?
132
117
  attr_accessor span: Span?
133
118
 
134
- def initialize: (source: String, items: Array[String], ?alias: String, is_wildcard: bool, span: Span) -> void
119
+ def initialize: (?source: String, ?items: Array[String], ?alias: String, ?is_wildcard: bool, ?span: Span) -> void
135
120
  end
136
121
 
137
122
  class ExportInfo
138
- # An export statement extracted from source code.
139
-
140
123
  attr_accessor name: String?
141
124
  attr_accessor kind: ExportKind?
142
125
  attr_accessor span: Span?
143
126
 
144
- def initialize: (name: String, kind: ExportKind, span: Span) -> void
127
+ def initialize: (?name: String, ?kind: ExportKind, ?span: Span) -> void
145
128
  end
146
129
 
147
130
  class SymbolInfo
148
- # A symbol (variable, function, type, etc.) extracted from source code.
149
-
150
131
  attr_accessor name: String?
151
132
  attr_accessor kind: SymbolKind?
152
133
  attr_accessor span: Span?
153
134
  attr_accessor type_annotation: String?
154
135
  attr_accessor doc: String?
155
136
 
156
- def initialize: (name: String, kind: SymbolKind, span: Span, ?type_annotation: String, ?doc: String) -> void
137
+ def initialize: (?name: String, ?kind: SymbolKind, ?span: Span, ?type_annotation: String, ?doc: String) -> void
157
138
  end
158
139
 
159
140
  class Diagnostic
160
- # A diagnostic (syntax error, missing node, etc.) from parsing.
161
-
162
141
  attr_accessor message: String?
163
142
  attr_accessor severity: DiagnosticSeverity?
164
143
  attr_accessor span: Span?
165
144
 
166
- def initialize: (message: String, severity: DiagnosticSeverity, span: Span) -> void
145
+ def initialize: (?message: String, ?severity: DiagnosticSeverity, ?span: Span) -> void
167
146
  end
168
147
 
169
148
  class CodeChunk
170
- # A chunk of source code with rich metadata.
171
-
172
149
  attr_accessor content: String?
173
150
  attr_accessor start_byte: Integer?
174
151
  attr_accessor end_byte: Integer?
@@ -176,12 +153,10 @@ module TreeSitterLanguagePack
176
153
  attr_accessor end_line: Integer?
177
154
  attr_accessor metadata: ChunkContext?
178
155
 
179
- def initialize: (content: String, start_byte: Integer, end_byte: Integer, start_line: Integer, end_line: Integer, metadata: ChunkContext) -> void
156
+ def initialize: (?content: String, ?start_byte: Integer, ?end_byte: Integer, ?start_line: Integer, ?end_line: Integer, ?metadata: ChunkContext) -> void
180
157
  end
181
158
 
182
159
  class ChunkContext
183
- # Metadata for a single chunk of source code.
184
-
185
160
  attr_accessor language: String?
186
161
  attr_accessor chunk_index: Integer?
187
162
  attr_accessor total_chunks: Integer?
@@ -192,50 +167,25 @@ module TreeSitterLanguagePack
192
167
  attr_accessor docstrings: Array[DocstringInfo]?
193
168
  attr_accessor has_error_nodes: bool?
194
169
 
195
- def initialize: (language: String, chunk_index: Integer, total_chunks: Integer, node_types: Array[String], context_path: Array[String], symbols_defined: Array[String], comments: Array[CommentInfo], docstrings: Array[DocstringInfo], has_error_nodes: bool) -> void
170
+ def initialize: (?language: String, ?chunk_index: Integer, ?total_chunks: Integer, ?node_types: Array[String], ?context_path: Array[String], ?symbols_defined: Array[String], ?comments: Array[CommentInfo], ?docstrings: Array[DocstringInfo], ?has_error_nodes: bool) -> void
196
171
  end
197
172
 
198
173
  class PackConfig
199
- # Configuration for the tree-sitter language pack.
200
- #
201
- # Controls cache directory and which languages to pre-download.
202
- # Can be loaded from a TOML file, constructed programmatically,
203
- # or passed as a dict/object from language bindings.
204
- #
205
- # # Example
206
- #
207
- # ```no_run
208
- # use tree_sitter_language_pack::PackConfig;
209
- #
210
- # let config = PackConfig {
211
- # cache_dir: None,
212
- # languages: Some(vec!["python".to_string(), "rust".to_string()]),
213
- # groups: None,
214
- # };
215
- # ```
216
-
217
174
  attr_accessor cache_dir: String?
218
175
  attr_accessor languages: Array[String]?
219
176
  attr_accessor groups: Array[String]?
220
177
 
221
178
  def initialize: (?cache_dir: String, ?languages: Array[String], ?groups: Array[String]) -> void
222
- def self.from_toml_file: (String path) -> PackConfig
223
- def self.discover: () -> PackConfig?
224
179
  end
225
180
 
226
181
  class Point
227
- # A source position — row + column, zero-indexed.
228
-
229
182
  attr_reader row: Integer
230
183
  attr_reader column: Integer
231
184
 
232
185
  def initialize: (row: Integer, column: Integer) -> void
233
- def self.from: (Point p) -> Point
234
186
  end
235
187
 
236
188
  class ByteRange
237
- # A byte range — start (inclusive) to end (exclusive).
238
-
239
189
  attr_reader start: Integer
240
190
  attr_reader end: Integer
241
191
 
@@ -243,40 +193,20 @@ module TreeSitterLanguagePack
243
193
  end
244
194
 
245
195
  class Parser
246
- # A tree-sitter parser configured for one language at a time.
247
- #
248
- # # Example
249
- #
250
- # ```no_run
251
- # use tree_sitter_language_pack::Parser;
252
- #
253
- # let mut parser = Parser::new();
254
- # parser.set_language("python")?;
255
- # let tree = parser.parse("def hello(): pass").expect("parse failed");
256
- # assert_eq!(tree.root_node().kind(), "module");
257
- # # Ok::<(), tree_sitter_language_pack::Error>(())
258
- # ```
259
-
260
196
  def set_language: (String name) -> void
261
197
  def parse: (String source) -> Tree?
262
198
  def parse_bytes: (String source) -> Tree?
263
199
  def reset: () -> void
200
+ def self.new: () -> Parser
264
201
  def self.default: () -> Parser
265
202
  end
266
203
 
267
204
  class Tree
268
- # A parsed syntax tree. Cheap to clone (refcount bump).
269
-
270
205
  def root_node: () -> Node
271
206
  def walk: () -> TreeCursor
272
207
  end
273
208
 
274
209
  class Node
275
- # A single syntax node within a [`Tree`].
276
- #
277
- # Nodes hold a strong reference to their parent tree so they remain valid
278
- # regardless of how the tree is moved or stored at the FFI boundary.
279
-
280
210
  def clone: () -> Node
281
211
  def kind: () -> String
282
212
  def kind_id: () -> Integer
@@ -301,8 +231,6 @@ module TreeSitterLanguagePack
301
231
  end
302
232
 
303
233
  class TreeCursor
304
- # A cursor for traversing a [`Tree`].
305
-
306
234
  def node: () -> Node
307
235
  def goto_first_child: () -> bool
308
236
  def goto_parent: () -> bool
@@ -311,25 +239,6 @@ module TreeSitterLanguagePack
311
239
  end
312
240
 
313
241
  class ProcessConfig
314
- # Configuration for the `process()` function.
315
- #
316
- # Controls which analysis features are enabled and whether chunking is performed.
317
- #
318
- # # Examples
319
- #
320
- # ```
321
- # use tree_sitter_language_pack::ProcessConfig;
322
- #
323
- # // Defaults: structure + imports + exports enabled
324
- # let config = ProcessConfig::new("python");
325
- #
326
- # // With chunking
327
- # let config = ProcessConfig::new("python").with_chunking(1000);
328
- #
329
- # // Everything enabled
330
- # let config = ProcessConfig::new("python").all();
331
- # ```
332
-
333
242
  attr_accessor language: String?
334
243
  attr_accessor structure: bool?
335
244
  attr_accessor imports: bool?
@@ -339,133 +248,59 @@ module TreeSitterLanguagePack
339
248
  attr_accessor symbols: bool?
340
249
  attr_accessor diagnostics: bool?
341
250
  attr_accessor chunk_max_size: Integer?
251
+ attr_accessor data_extraction: bool?
342
252
 
343
- def initialize: (language: String, structure: bool, imports: bool, exports: bool, comments: bool, docstrings: bool, symbols: bool, diagnostics: bool, ?chunk_max_size: Integer) -> void
253
+ def initialize: (?language: String, ?structure: bool, ?imports: bool, ?exports: bool, ?comments: bool, ?docstrings: bool, ?symbols: bool, ?diagnostics: bool, ?chunk_max_size: Integer, ?data_extraction: bool) -> void
344
254
  def with_chunking: (Integer max_size) -> ProcessConfig
345
255
  def all: () -> ProcessConfig
346
256
  def minimal: () -> ProcessConfig
257
+ def with_data_extraction: (bool enabled) -> ProcessConfig
347
258
  def self.default: () -> ProcessConfig
348
259
  end
349
260
 
350
261
  class LanguageRegistry
351
- # Thread-safe registry of tree-sitter language parsers.
352
- #
353
- # Manages both statically compiled and dynamically loaded language grammars.
354
- # Use [`LanguageRegistry::new()`] for the default registry, or access the
355
- # global instance via the module-level convenience functions
356
- # (`get_language`, `available_languages`, etc.).
357
- #
358
- # # Example
359
- #
360
- # ```no_run
361
- # use tree_sitter_language_pack::{LanguageRegistry, ProcessConfig};
362
- #
363
- # let registry = LanguageRegistry::new();
364
- # let langs = registry.available_languages();
365
- # println!("Available: {:?}", langs);
366
- #
367
- # let config = ProcessConfig::new("python").all();
368
- # let result = registry.process("def hello(): pass", &config).unwrap();
369
- # println!("Structure: {:?}", result.structure);
370
- # ```
371
-
372
- def add_extra_libs_dir: (String dir) -> void
373
262
  def get_language: (String name) -> Language
374
263
  def available_languages: () -> Array[String]
264
+ def has_parser: (String name) -> bool
375
265
  def has_language: (String name) -> bool
376
266
  def language_count: () -> Integer
377
267
  def process: (String source, ProcessConfig config) -> ProcessResult
378
- def self.with_libs_dir: (String libs_dir) -> LanguageRegistry
268
+ def self.new: () -> LanguageRegistry
379
269
  def self.default: () -> LanguageRegistry
380
270
  end
381
271
 
382
- class ParserManifest
383
- # Manifest describing available parser downloads for a specific version.
384
-
385
- attr_reader version: String
386
- attr_reader platforms: Hash[String, PlatformBundle]
387
- attr_reader languages: Hash[String, LanguageInfo]
388
- attr_reader groups: Hash[String, Array[String]]
389
-
390
- def initialize: (version: String, platforms: Hash[String, PlatformBundle], languages: Hash[String, LanguageInfo], groups: Hash[String, Array[String]]) -> void
391
- end
392
-
393
- class PlatformBundle
394
- attr_reader url: String
395
- attr_reader sha256: String
396
- attr_reader size: Integer
397
-
398
- def initialize: (url: String, sha256: String, size: Integer) -> void
399
- end
400
-
401
- class LanguageInfo
402
- attr_reader group: String
403
- attr_reader size: Integer
404
-
405
- def initialize: (group: String, size: Integer) -> void
406
- end
407
-
408
272
  class DownloadManager
409
- # Manages downloading and caching of pre-built parser shared libraries.
410
-
411
- def cache_dir: () -> String
412
273
  def installed_languages: () -> Array[String]
413
- def ensure_languages: (Array[String] names) -> void
414
- def ensure_group: (String group) -> void
415
- def lib_path: (String name) -> String
416
- def fetch_manifest: () -> ParserManifest
417
274
  def download_all_best_effort: () -> Integer
418
275
  def clean_cache: () -> void
419
276
  def self.new: (String version) -> DownloadManager
420
- def self.with_cache_dir: (String version, String cache_dir) -> DownloadManager
421
- def self.default_cache_dir: (String version) -> String
422
277
  end
423
278
 
424
279
  class Language
425
280
  end
426
281
 
282
+ class DataNodeKind
283
+ type value = :key_value | :element | :sequence
284
+ end
285
+
427
286
  class StructureKind
428
- # The kind of structural item found in source code.
429
- #
430
- # Categorizes top-level and nested declarations such as functions, classes,
431
- # structs, enums, traits, and more. Use [`Other`](StructureKind::Other) for
432
- # language-specific constructs that do not fit a standard category.
433
287
  end
434
288
 
435
289
  class CommentKind
436
- # The kind of a comment found in source code.
437
- #
438
- # Distinguishes between single-line comments, block (multi-line) comments,
439
- # and documentation comments.
440
290
  type value = :line | :block | :doc
441
291
  end
442
292
 
443
293
  class DocstringFormat
444
- # The format of a docstring extracted from source code.
445
- #
446
- # Identifies the docstring convention used, which varies by language
447
- # (e.g., Python triple-quoted strings, JSDoc, Rustdoc `///` comments).
448
294
  end
449
295
 
450
296
  class ExportKind
451
- # The kind of an export statement found in source code.
452
- #
453
- # Covers named exports, default exports, and re-exports from other modules.
454
297
  type value = :named | :default | :re_export
455
298
  end
456
299
 
457
300
  class SymbolKind
458
- # The kind of a symbol definition found in source code.
459
- #
460
- # Categorizes symbol definitions such as variables, constants, functions,
461
- # classes, types, interfaces, enums, and modules.
462
301
  end
463
302
 
464
303
  class DiagnosticSeverity
465
- # Severity level of a diagnostic produced during parsing.
466
- #
467
- # Used to classify parse errors, warnings, and informational messages
468
- # found in the syntax tree.
469
304
  type value = :error | :warning | :info
470
305
  end
471
306
 
@@ -481,6 +316,8 @@ module TreeSitterLanguagePack
481
316
 
482
317
  def self.get_locals_query: (String language) -> String?
483
318
 
319
+ def self.get_tags_query: (String language) -> String?
320
+
484
321
  def self.get_language: (String name) -> Language
485
322
 
486
323
  def self.get_parser: (String name) -> Parser
@@ -503,6 +340,8 @@ module TreeSitterLanguagePack
503
340
 
504
341
  def self.download_all: () -> Integer
505
342
 
343
+ def self.download_group: (String name) -> Integer
344
+
506
345
  def self.manifest_languages: () -> Array[String]
507
346
 
508
347
  def self.downloaded_languages: () -> Array[String]
metadata CHANGED
@@ -1,19 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_sitter_language_pack
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.1
4
+ version: 1.9.0
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Kreuzberg Team
8
8
  bindir: bin
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
11
- dependencies: []
12
- description: Pre-compiled tree-sitter grammars for 305 programming languages
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: sorbet-runtime
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '0.5'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '0.5'
26
+ description: Pre-compiled tree-sitter grammars for 306 programming languages
13
27
  executables: []
14
28
  extensions: []
15
29
  extra_rdoc_files: []
16
30
  files:
31
+ - LICENSE
32
+ - README.md
17
33
  - Steepfile
18
34
  - lib/tree_sitter_language_pack.rb
19
35
  - lib/tree_sitter_language_pack/native.rb
@@ -24,7 +40,7 @@ homepage: https://github.com/kreuzberg-dev/tree-sitter-language-pack
24
40
  licenses:
25
41
  - MIT
26
42
  metadata:
27
- keywords: tree-sitter,parser,syntax,language-pack
43
+ keywords: language-pack,parser,syntax,tree-sitter
28
44
  rubygems_mfa_required: 'true'
29
45
  rdoc_options: []
30
46
  require_paths:
@@ -34,6 +50,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
34
50
  - - ">="
35
51
  - !ruby/object:Gem::Version
36
52
  version: 3.2.0
53
+ - - "<"
54
+ - !ruby/object:Gem::Version
55
+ version: '4.0'
37
56
  required_rubygems_version: !ruby/object:Gem::Requirement
38
57
  requirements:
39
58
  - - ">="
@@ -42,5 +61,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
61
  requirements: []
43
62
  rubygems_version: 3.6.9
44
63
  specification_version: 4
45
- summary: Pre-compiled tree-sitter grammars for 305 programming languages
64
+ summary: Pre-compiled tree-sitter grammars for 306 programming languages
46
65
  test_files: []