@rejot-dev/tree-sitter-thalo 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 - present ReJot Nederland B.V., and individual contributors.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,377 @@
1
+ # @rejot-dev/tree-sitter-thalo
2
+
3
+ A Tree-Sitter grammar for parsing **Thalo** entries used in the Knowledge Center.
4
+
5
+ ## Overview
6
+
7
+ Thalo is a syntax for recording structured knowledge entries including lore, opinions, references,
8
+ and journal entries. It also supports a meta-layer for defining entity schemas.
9
+
10
+ ## Markdown Integration
11
+
12
+ Thalo is designed to coexist with markdown. You can embed thalo code blocks inside markdown files
13
+ using fenced code blocks with the `thalo` language identifier:
14
+
15
+ ````markdown
16
+ # My Document
17
+
18
+ Some markdown content here.
19
+
20
+ ```thalo
21
+ 2026-01-05T18:00Z create lore "An insight" #example
22
+ type: "insight"
23
+ subject: ^self
24
+
25
+ # Summary
26
+ This thalo entry lives inside a markdown file.
27
+ ```
28
+
29
+ More markdown content.
30
+ ````
31
+
32
+ When using the `@rejot-dev/thalo-prettier` plugin, Prettier automatically formats Thalo code blocks
33
+ embedded in markdown files. This enables documentation files to include properly formatted Thalo
34
+ examples.
35
+
36
+ ## Instance Entries
37
+
38
+ Create or update instances of entities (lore, opinion, reference, journal):
39
+
40
+ ```
41
+ {timestamp} {directive} {entity} "Title" [^link-id] [#tags...]
42
+ {key}: {value}
43
+ ...
44
+
45
+ {content}
46
+ ```
47
+
48
+ ### Example
49
+
50
+ ```thalo
51
+ 2026-01-05T18:11Z create lore "Custom event streaming system" ^event-streaming #architecture #distributed
52
+ type: "fact"
53
+ subject: ^acme-corp
54
+ date: 2018 ~ 2022
55
+
56
+ # Summary
57
+ The company built a custom event streaming system on top of Postgres before Kafka became widely
58
+ adopted.
59
+ ```
60
+
61
+ ### Header Line
62
+
63
+ | Element | Pattern | Required | Example |
64
+ | --------- | ----------------------------------------- | -------- | ------------------ |
65
+ | Timestamp | `YYYY-MM-DDTHH:MM` | Yes | `2026-01-05T18:11` |
66
+ | Directive | `create` or `update` | Yes | `create` |
67
+ | Entity | `lore`, `opinion`, `reference`, `journal` | Yes | `lore` |
68
+ | Title | Quoted string | Yes | `"My title"` |
69
+ | Link | `^` + identifier | No | `^my-linked-entry` |
70
+ | Tags | `#` + identifier | No | `#architecture` |
71
+
72
+ ### Metadata
73
+
74
+ Indented key-value pairs (2-space indent):
75
+
76
+ ```
77
+ key: "value"
78
+ ref-type: "article"
79
+ related: ^other-entry
80
+ source: "Technical documentation"
81
+ updated: 2026-01-05T18:11
82
+ ```
83
+
84
+ - **Keys**: lowercase, may contain hyphens/underscores
85
+ - **Values**: quoted strings, links (`^id`), timestamps, date ranges, or queries (no plain text)
86
+
87
+ ### Content
88
+
89
+ Indented content after a blank line separator:
90
+
91
+ ```
92
+ # Section Header
93
+ Regular paragraph text continues here
94
+ across multiple lines.
95
+
96
+ # Another Section
97
+ More content with markdown-style headers.
98
+ ```
99
+
100
+ - Content lines must be indented (2 spaces)
101
+ - Markdown headers (`#`, `##`, etc.) are recognized
102
+ - Blank lines within content are preserved
103
+
104
+ ## Schema Entries
105
+
106
+ Define or alter entity schemas using `define-entity` and `alter-entity` directives:
107
+
108
+ ```
109
+ {timestamp} define-entity {entity-name} "Description" [#tags...]
110
+ # Metadata
111
+ {field-name}?: {type} [= {default}] [; "description"]
112
+ ...
113
+ # Sections
114
+ {SectionName}? [; "description"]
115
+ ...
116
+ ```
117
+
118
+ ### Example
119
+
120
+ ```thalo
121
+ 2026-01-05T18:12Z define-entity reference "Collected resources"
122
+ # Metadata
123
+ url?: string ; "the url to the resource"
124
+ ref-type: "article" | "video" | "tweet"
125
+ author?: string | link
126
+ status?: "unread" | "read" = "unread"
127
+ related?: link[]
128
+
129
+ # Sections
130
+ Summary ; "Brief summary of the content"
131
+ KeyTakeaways?
132
+ ```
133
+
134
+ ### alter-entity
135
+
136
+ Modify existing entity schemas by adding or removing fields/sections:
137
+
138
+ ```thalo
139
+ 2026-01-10T14:00Z alter-entity reference "Add published field, remove legacy"
140
+ # Metadata
141
+ published: datetime ; "publication date"
142
+ # Remove Metadata
143
+ legacy-field ; "deprecated in favor of new-field"
144
+
145
+ # Sections
146
+ New Section? ; "added section"
147
+
148
+ # Remove Sections
149
+ Old Section ; "no longer needed"
150
+ ```
151
+
152
+ ### Type System (Schema Definitions)
153
+
154
+ | Type | Example | Description |
155
+ | ------------ | -------------------- | ------------------------ |
156
+ | `string` | `name: string` | Free-form text |
157
+ | `date` | `published: date` | Single date |
158
+ | `date-range` | `period: date-range` | Date range (2022 ~ 2024) |
159
+ | `link` | `related: link` | Reference to entry |
160
+ | Literal | `status: "read"` | Exact string value |
161
+ | Union | `type: "a" \| "b"` | One of multiple types |
162
+ | Array | `tags: string[]` | Array of type |
163
+ | Default | `status?: "a" = "a"` | Default value |
164
+
165
+ ## Typed Metadata Values
166
+
167
+ The grammar parses metadata values into typed AST nodes, enabling downstream validation without
168
+ regex-based parsing. All values must be explicitly typed (no plain/unquoted values).
169
+
170
+ ### Links
171
+
172
+ Single link references:
173
+
174
+ ```text
175
+ subject: ^self
176
+ supersedes: ^previous-opinion
177
+ ```
178
+
179
+ **AST node**: `link`
180
+
181
+ ### Quoted Values
182
+
183
+ Values in double quotes (required for all string values including literal types):
184
+
185
+ ```text
186
+ type: "fact"
187
+ confidence: "high"
188
+ description: "A longer text value"
189
+ ```
190
+
191
+ **AST node**: `quoted_value`
192
+
193
+ ### Datetime Values
194
+
195
+ Date or datetime values (date with optional time):
196
+
197
+ ```text
198
+ published: 2026-01-07
199
+ updated: 2026-01-07T12:00
200
+ created: 2026-01-05T18:11
201
+ ```
202
+
203
+ **AST node**: `datetime_value`
204
+
205
+ ### Date Ranges
206
+
207
+ Date ranges with the `~` separator:
208
+
209
+ ```text
210
+ date: 2022 ~ 2024
211
+ period: 2022-05 ~ 2024-12-31
212
+ ```
213
+
214
+ **AST node**: `date_range`
215
+
216
+ ### Query Expressions
217
+
218
+ Source queries for synthesis entries:
219
+
220
+ ```text
221
+ sources: lore where subject = ^self and #career
222
+ sources: lore where type = "fact"
223
+ ```
224
+
225
+ **AST structure**: `query` → `query_conditions` → `query_condition`
226
+
227
+ Query conditions support:
228
+
229
+ - **Field conditions**: `field = "quoted value"` or `field = ^link`
230
+ - **Tag conditions**: `#tag`
231
+ - **Link conditions**: `^link-id`
232
+
233
+ ### Arrays (Unified)
234
+
235
+ Comma-separated lists of any value type (links, quoted values, timestamps, date ranges, or queries):
236
+
237
+ ```text
238
+ related: ^ref1, ^ref2, ^ref3
239
+ authors: "Jane Doe", ^john-ref, "Alice Smith"
240
+ periods: 2020 ~ 2022, 2023 ~ 2024
241
+ sources: lore where #career, journal where #reflection
242
+ ```
243
+
244
+ **AST structure**: `value_array` → `(link | quoted_value | datetime_value | date_range | query)*`
245
+
246
+ ### Field Syntax
247
+
248
+ - **Required by default**: Fields without `?` are required
249
+ - **Optional marker**: `?` after field name makes it optional
250
+ - **Description**: `; "text"` adds documentation
251
+
252
+ ### Section Names
253
+
254
+ - Must start with uppercase letter (PascalCase)
255
+ - Examples: `Summary`, `KeyTakeaways`, `Reasoning`
256
+
257
+ ## AST Structure
258
+
259
+ ### Instance Entry
260
+
261
+ ```
262
+ source_file
263
+ └── entry
264
+ └── instance_entry
265
+ ├── instance_header
266
+ │ ├── timestamp
267
+ │ ├── instance_directive
268
+ │ ├── entity
269
+ │ ├── title
270
+ │ ├── link?
271
+ │ └── tag*
272
+ ├── metadata*
273
+ │ ├── key
274
+ │ └── value
275
+ └── content?
276
+ ├── markdown_header*
277
+ └── content_line*
278
+ ```
279
+
280
+ ### Schema Entry
281
+
282
+ ```
283
+ source_file
284
+ └── entry
285
+ └── schema_entry
286
+ ├── schema_header
287
+ │ ├── timestamp
288
+ │ ├── schema_directive
289
+ │ ├── identifier
290
+ │ ├── title
291
+ │ ├── link?
292
+ │ └── tag*
293
+ ├── metadata_block?
294
+ │ └── field_definition*
295
+ │ ├── field_name
296
+ │ ├── optional_marker?
297
+ │ ├── type_expression
298
+ │ ├── default_value?
299
+ │ └── description?
300
+ ├── sections_block?
301
+ │ └── section_definition*
302
+ │ ├── section_name
303
+ │ ├── optional_marker?
304
+ │ └── description?
305
+ ├── remove_metadata_block?
306
+ │ └── field_removal*
307
+ └── remove_sections_block?
308
+ └── section_removal*
309
+ ```
310
+
311
+ ## Usage
312
+
313
+ ```bash
314
+ # Generate parser
315
+ pnpm exec tree-sitter generate
316
+
317
+ # Run tests
318
+ pnpm exec tree-sitter test
319
+
320
+ # Parse a file
321
+ pnpm exec tree-sitter parse path/to/file.thalo
322
+ ```
323
+
324
+ ## Limitations
325
+
326
+ ### General
327
+
328
+ - Titles cannot contain unescaped quotes
329
+ - Content text starting with `#` is always parsed as a markdown header
330
+ - Only 2-space indentation is supported
331
+ - Section names must be PascalCase
332
+ - Field names must be lowercase (kebab-case or camelCase)
333
+
334
+ ### Typed Value Parsing
335
+
336
+ - **All string values must be quoted**: There are no plain/unquoted values. Literal types like
337
+ `"fact"` require quotes.
338
+
339
+ ```text
340
+ # Correct:
341
+ type: "fact"
342
+ description: "Some text"
343
+ ```
344
+
345
+ - **No inline comments in values**: Comments (`//`) after metadata values break parsing. Use a
346
+ separate comment line instead.
347
+
348
+ ```text
349
+ # Wrong - causes parse error:
350
+ type: "fact" // this breaks
351
+
352
+ # Correct:
353
+ // Note about type
354
+ type: "fact"
355
+ ```
356
+
357
+ - **Single dates must be quoted**: The grammar only recognizes date ranges (`YYYY ~ YYYY`). Single
358
+ dates should be quoted strings.
359
+
360
+ ```text
361
+ # Correct:
362
+ published: "2024-05-11"
363
+ period: 2022 ~ 2024
364
+ ```
365
+
366
+ - **Query `where` clause is required**: Queries must include a `where` clause.
367
+
368
+ ```text
369
+ # Correct:
370
+ sources: lore where #career
371
+
372
+ # Wrong - not a valid query:
373
+ sources: lore
374
+ ```
375
+
376
+ - **Empty values cause parse errors**: Metadata must have a value; use optional fields and omit the
377
+ field entirely instead.
package/binding.gyp ADDED
@@ -0,0 +1,35 @@
1
+ {
2
+ "targets": [
3
+ {
4
+ "target_name": "tree_sitter_thalo_binding",
5
+ "dependencies": [
6
+ "<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
7
+ ],
8
+ "include_dirs": [
9
+ "src",
10
+ ],
11
+ "sources": [
12
+ "bindings/node/binding.cc",
13
+ "src/parser.c",
14
+ ],
15
+ "variables": {
16
+ "has_scanner": "<!(node -p \"fs.existsSync('src/scanner.c')\")"
17
+ },
18
+ "conditions": [
19
+ ["has_scanner=='true'", {
20
+ "sources+": ["src/scanner.c"],
21
+ }],
22
+ ["OS!='win'", {
23
+ "cflags_c": [
24
+ "-std=c11",
25
+ ],
26
+ }, { # OS == "win"
27
+ "cflags_c": [
28
+ "/std:c11",
29
+ "/utf-8",
30
+ ],
31
+ }],
32
+ ],
33
+ }
34
+ ]
35
+ }
@@ -0,0 +1,19 @@
1
+ #include <napi.h>
2
+
3
+ typedef struct TSLanguage TSLanguage;
4
+
5
+ extern "C" TSLanguage *tree_sitter_thalo();
6
+
7
+ // "tree-sitter", "language" hashed with BLAKE2
8
+ const napi_type_tag LANGUAGE_TYPE_TAG = {
9
+ 0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
10
+ };
11
+
12
+ Napi::Object Init(Napi::Env env, Napi::Object exports) {
13
+ auto language = Napi::External<TSLanguage>::New(env, tree_sitter_thalo());
14
+ language.TypeTag(&LANGUAGE_TYPE_TAG);
15
+ exports["language"] = language;
16
+ return exports;
17
+ }
18
+
19
+ NODE_API_MODULE(tree_sitter_thalo_binding, Init)
@@ -0,0 +1,11 @@
1
+ import assert from "node:assert";
2
+ import { test } from "node:test";
3
+ import Parser from "tree-sitter";
4
+
5
+ test("can load grammar", () => {
6
+ const parser = new Parser();
7
+ assert.doesNotReject(async () => {
8
+ const { default: language } = await import("./index.js");
9
+ parser.setLanguage(language);
10
+ });
11
+ });
@@ -0,0 +1,60 @@
1
+ type BaseNode = {
2
+ type: string;
3
+ named: boolean;
4
+ };
5
+
6
+ type ChildNode = {
7
+ multiple: boolean;
8
+ required: boolean;
9
+ types: BaseNode[];
10
+ };
11
+
12
+ type NodeInfo =
13
+ | (BaseNode & {
14
+ subtypes: BaseNode[];
15
+ })
16
+ | (BaseNode & {
17
+ fields: { [name: string]: ChildNode };
18
+ children: ChildNode[];
19
+ });
20
+
21
+ /**
22
+ * The tree-sitter language object for this grammar.
23
+ *
24
+ * @see {@linkcode https://tree-sitter.github.io/node-tree-sitter/interfaces/Parser.Language.html Parser.Language}
25
+ *
26
+ * @example
27
+ * import Parser from "tree-sitter";
28
+ * import Thalo from "tree-sitter-thalo";
29
+ *
30
+ * const parser = new Parser();
31
+ * parser.setLanguage(Thalo);
32
+ */
33
+ declare const binding: {
34
+ /**
35
+ * The inner language object.
36
+ * @private
37
+ */
38
+ language: unknown;
39
+
40
+ /**
41
+ * The content of the `node-types.json` file for this grammar.
42
+ *
43
+ * @see {@linkplain https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types Static Node Types}
44
+ */
45
+ nodeTypeInfo: NodeInfo[];
46
+
47
+ /** The syntax highlighting query for this grammar. */
48
+ HIGHLIGHTS_QUERY?: string;
49
+
50
+ /** The language injection query for this grammar. */
51
+ INJECTIONS_QUERY?: string;
52
+
53
+ /** The local variable query for this grammar. */
54
+ LOCALS_QUERY?: string;
55
+
56
+ /** The symbol tagging query for this grammar. */
57
+ TAGS_QUERY?: string;
58
+ };
59
+
60
+ export default binding;
@@ -0,0 +1,38 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { fileURLToPath } from "node:url";
3
+
4
+ const root = fileURLToPath(new URL("../..", import.meta.url));
5
+
6
+ const binding =
7
+ typeof process.versions.bun === "string"
8
+ ? // Support `bun build --compile` by being statically analyzable enough to find the .node file at build-time
9
+ await import(`${root}/prebuilds/${process.platform}-${process.arch}/tree-sitter-thalo.node`)
10
+ : (await import("node-gyp-build")).default(root);
11
+
12
+ try {
13
+ const nodeTypes = await import(`${root}/src/node-types.json`, { with: { type: "json" } });
14
+ binding.nodeTypeInfo = nodeTypes.default;
15
+ } catch {}
16
+
17
+ const queries = [
18
+ ["HIGHLIGHTS_QUERY", `${root}/queries/highlights.scm`],
19
+ ["INJECTIONS_QUERY", `${root}/queries/injections.scm`],
20
+ ["LOCALS_QUERY", `${root}/queries/locals.scm`],
21
+ ["TAGS_QUERY", `${root}/queries/tags.scm`],
22
+ ];
23
+
24
+ for (const [prop, path] of queries) {
25
+ Object.defineProperty(binding, prop, {
26
+ configurable: true,
27
+ enumerable: true,
28
+ get() {
29
+ delete binding[prop];
30
+ try {
31
+ binding[prop] = readFileSync(path, "utf8");
32
+ } catch {}
33
+ return binding[prop];
34
+ },
35
+ });
36
+ }
37
+
38
+ export default binding;