@rejot-dev/tree-sitter-thalo 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/grammar.js ADDED
@@ -0,0 +1,303 @@
1
+ export default grammar({
2
+ name: "thalo",
3
+
4
+ extras: (_) => [" "],
5
+
6
+ externals: ($) => [
7
+ $["_indent"], // Newline followed by indentation (1+ spaces or tab)
8
+ $["_content_blank"], // Blank line within content blocks
9
+ $["error_sentinel"], // Detects error recovery mode
10
+ ],
11
+
12
+ // Resolve ambiguity: link after directive could be argument or trailing link/tag
13
+ conflicts: ($) => [[$.data_entry]],
14
+
15
+ rules: {
16
+ source_file: ($) => repeat(choice($.entry, $.comment, $._nl)),
17
+
18
+ // Comment token (the text of a comment)
19
+ comment: (_) => token(seq("//", /[^\r\n]*/)),
20
+
21
+ // Indented comment line (within entries) - same prec as metadata (will be tried via choice)
22
+ comment_line: ($) => prec(2, seq($["_indent"], $.comment)),
23
+
24
+ // =========================================================================
25
+ // Unified entry structure
26
+ // Entry types are differentiated by the directive keyword in the header.
27
+ // Schema entries (define-entity, alter-entity) have schema blocks.
28
+ // Data entries (create, update, define-synthesis, actualize-synthesis) have metadata/content.
29
+ // =========================================================================
30
+
31
+ entry: ($) => choice($.schema_entry, $.data_entry),
32
+
33
+ // Schema entries: define-entity or alter-entity with optional schema blocks
34
+ schema_entry: ($) =>
35
+ seq(
36
+ field("timestamp", $.timestamp),
37
+ field("directive", $.schema_directive),
38
+ field("argument", $.identifier),
39
+ field("title", $.title),
40
+ repeat(choice($.link, $.tag)),
41
+ repeat($._schema_block),
42
+ ),
43
+
44
+ schema_directive: (_) => choice("define-entity", "alter-entity"),
45
+
46
+ // Data entries: instance, synthesis, or actualize with metadata and optional content
47
+ data_entry: ($) =>
48
+ seq(
49
+ field("timestamp", $.timestamp),
50
+ field("directive", $.data_directive),
51
+ field("argument", optional(choice($.identifier, $.link))),
52
+ field("title", optional($.title)),
53
+ repeat(choice($.link, $.tag)),
54
+ repeat(choice($.metadata, $.comment_line)),
55
+ optional($.content),
56
+ ),
57
+
58
+ data_directive: (_) => choice("create", "update", "define-synthesis", "actualize-synthesis"),
59
+
60
+ // Identifier for entity names (lore, opinion, etc.) and custom entity definitions
61
+ identifier: (_) => token(/[a-z][a-zA-Z0-9\-_]*/),
62
+
63
+ // prec(2) to prefer metadata over content_line when we see key:value
64
+ metadata: ($) => prec(2, seq($["_indent"], field("key", $.key), ":", field("value", $.value))),
65
+
66
+ // =========================================================================
67
+ // Schema blocks (# Metadata, # Sections, # Remove Metadata, # Remove Sections)
68
+ // =========================================================================
69
+
70
+ _schema_block: ($) =>
71
+ choice($.metadata_block, $.sections_block, $.remove_metadata_block, $.remove_sections_block),
72
+
73
+ metadata_block: ($) => prec(2, seq($._metadata_header, repeat1($.field_definition))),
74
+ sections_block: ($) => prec(2, seq($._sections_header, $._section_lines)),
75
+ remove_metadata_block: ($) => prec(2, seq($._remove_metadata_header, repeat1($.field_removal))),
76
+ remove_sections_block: ($) =>
77
+ prec(2, seq($._remove_sections_header, repeat1($.section_removal))),
78
+
79
+ _section_lines: ($) => prec.right(repeat1($.section_definition)),
80
+
81
+ // Block headers: newline + optional blank lines + indent + "# BlockName"
82
+ // Note: do NOT consume trailing spaces; they are ignored via `extras`.
83
+ _metadata_header: (_) => token(/\r?\n(?:[ \t]*\r?\n)*(?:\t|[ \t][ \t])+# Metadata/),
84
+ _sections_header: (_) => token(/\r?\n(?:[ \t]*\r?\n)*(?:\t|[ \t][ \t])+# Sections/),
85
+ _remove_metadata_header: (_) =>
86
+ token(/\r?\n(?:[ \t]*\r?\n)*(?:\t|[ \t][ \t])+# Remove Metadata/),
87
+ _remove_sections_header: (_) =>
88
+ token(/\r?\n(?:[ \t]*\r?\n)*(?:\t|[ \t][ \t])+# Remove Sections/),
89
+
90
+ // =========================================================================
91
+ // Field definitions (for schema metadata blocks)
92
+ // =========================================================================
93
+
94
+ field_definition: ($) =>
95
+ seq(
96
+ $._field_line_start,
97
+ optional($.optional_marker),
98
+ ":",
99
+ field("type", $.type_expression),
100
+ optional(seq("=", field("default", $.default_value))),
101
+ optional(seq(";", field("description", $.description))),
102
+ ),
103
+
104
+ // Newline + indent + field name (aliased to field_name in AST)
105
+ _field_line_start: ($) => alias($._field_name_token, $["field_name"]),
106
+ _field_name_token: (_) => token(/\r?\n(?:\t|[ \t][ \t])+[a-z][a-zA-Z0-9\-_]*/),
107
+
108
+ optional_marker: (_) => "?",
109
+ description: (_) => token(/"[^"]*"/),
110
+
111
+ // =========================================================================
112
+ // Section definitions (for schema sections blocks)
113
+ // =========================================================================
114
+
115
+ // Section names start uppercase, may contain spaces: "Key Takeaways"
116
+ section_definition: ($) =>
117
+ seq(
118
+ $._section_line_start,
119
+ optional($.optional_marker),
120
+ optional(seq(";", field("description", $.description))),
121
+ ),
122
+
123
+ // Newline + indent + section name (aliased to section_name in AST)
124
+ _section_line_start: ($) => alias($._section_name_token, $["section_name"]),
125
+ _section_name_token: (_) => token(/\r?\n(?:\t|[ \t][ \t])+[A-Z][a-zA-Z0-9]*( +[a-zA-Z0-9]+)*/),
126
+
127
+ // =========================================================================
128
+ // Removals (for alter-entity)
129
+ // =========================================================================
130
+
131
+ field_removal: ($) =>
132
+ seq($._field_line_start, optional(seq(";", field("reason", $.description)))),
133
+ section_removal: ($) =>
134
+ seq($._section_line_start, optional(seq(";", field("reason", $.description)))),
135
+
136
+ // =========================================================================
137
+ // Type expressions (for field definitions)
138
+ // =========================================================================
139
+
140
+ type_expression: ($) => choice($.union_type, $._type_term),
141
+ union_type: ($) => prec.left(1, seq($._type_term, repeat1(seq("|", $._type_term)))),
142
+ _type_term: ($) => choice($.array_type, $.primitive_type, $.literal_type, $.unknown_type),
143
+ array_type: ($) => seq($._array_element, token.immediate("[]")),
144
+ _array_element: ($) => choice($.primitive_type, $.literal_type, $.paren_type, $.unknown_type),
145
+ paren_type: ($) => seq("(", $.type_expression, ")"),
146
+ primitive_type: (_) => choice("string", "datetime", "daterange", "link", "number"),
147
+ literal_type: (_) => token(/"[^"]*"/),
148
+ // Fallback for unrecognized type identifiers (e.g., "date-time" typo)
149
+ // Tree-sitter prefers exact matches (primitive_type) over regex patterns
150
+ unknown_type: (_) => token(/[a-z][a-zA-Z0-9\-_]*/),
151
+ default_value: ($) => choice($.quoted_value, $.link, $.datetime_value, $.number_value),
152
+
153
+ // =========================================================================
154
+ // Content (markdown body for instance entries)
155
+ // =========================================================================
156
+
157
+ // Content must start with a markdown header (# Section Name)
158
+ content: ($) =>
159
+ prec.right(
160
+ seq(
161
+ repeat($["_content_blank"]),
162
+ $.markdown_header,
163
+ repeat(choice($.markdown_header, $.content_line, $.comment_line, $["_content_blank"])),
164
+ ),
165
+ ),
166
+
167
+ // prec(2) for headers vs prec(1) for content lines (headers win when line starts with #)
168
+ markdown_header: ($) => prec.right(2, seq($["_indent"], $.md_indicator, $.md_heading_text)),
169
+ content_line: ($) => prec.right(1, seq($["_indent"], $.content_text)),
170
+
171
+ md_indicator: (_) => token.immediate(/#+/),
172
+ md_heading_text: (_) => token.immediate(/ [^\r\n]+/),
173
+ // Must not start with # (would be header) or // (would be comment)
174
+ content_text: (_) => token.immediate(/[^#/\r\n][^\r\n]*|\/[^/\r\n][^\r\n]*/),
175
+
176
+ // =========================================================================
177
+ // Common tokens
178
+ // =========================================================================
179
+
180
+ _nl: (_) => /\r?\n/,
181
+
182
+ // Timestamp is decomposed into date, T separator, time, and optional timezone parts.
183
+ // Each part is a separate token using token.immediate() to prevent whitespace.
184
+ // Missing timezone is validated in builder.ts and produces a specific error.
185
+ timestamp: ($) =>
186
+ seq(
187
+ field("date", $.timestamp_date),
188
+ $.timestamp_t,
189
+ field("time", $.timestamp_time),
190
+ field("tz", optional($.timestamp_tz)),
191
+ ),
192
+ timestamp_date: (_) => token(/[12]\d{3}-[01]\d-[0-3]\d/),
193
+ timestamp_t: (_) => token.immediate("T"),
194
+ timestamp_time: (_) => token.immediate(/[0-2]\d:[0-5]\d/),
195
+ timestamp_tz: (_) => token.immediate(/Z|[+-][0-2]\d:[0-5]\d/),
196
+ // Allow unclosed quotes to terminate at newline for error recovery
197
+ title: (_) => token(/"[^"\r\n]*"?/),
198
+ link: (_) => token(/\^[A-Za-z0-9\-_/.:]*[A-Za-z0-9]/),
199
+ tag: (_) => token(/#[A-Za-z0-9\-_/.]+/),
200
+ key: (_) => token(/[a-z][a-zA-Z0-9\-_]*/),
201
+
202
+ // =========================================================================
203
+ // Typed metadata values
204
+ // =========================================================================
205
+
206
+ // Value parsing uses typed tokens. All values must be explicitly typed:
207
+ // - Links: ^identifier
208
+ // - Quoted strings: "text" (required for literal types like "fact")
209
+ // - Datetime: YYYY-MM-DD or YYYY-MM-DDTHH:MM (date with optional time)
210
+ // - Daterange: YYYY ~ YYYY, YYYY, YYYY-MM, YYYY Q1, etc.
211
+ // - Numbers: 123, -45.67
212
+ // - Queries: entity where conditions
213
+ // - Arrays: comma-separated values of any type
214
+ value: ($) =>
215
+ choice(
216
+ prec.dynamic(6, $.value_array), // Comma-separated values (2+ elements)
217
+ prec.dynamic(5, $.daterange), // Dateranges with ~, Q, or implicit period
218
+ prec.dynamic(4, $.datetime_value), // YYYY-MM-DD or YYYY-MM-DDTHH:MM
219
+ prec.dynamic(3, $.query), // entity where conditions
220
+ prec.dynamic(3, $.link), // ^identifier
221
+ prec.dynamic(3, $.quoted_value), // "quoted text"
222
+ prec.dynamic(2, $.number_value), // 123, -45.67
223
+ ),
224
+
225
+ // Quoted string as a value (required for literal types)
226
+ quoted_value: (_) => token(/"[^"]*"/),
227
+
228
+ // Number value: integer or float (e.g., 123, -45.67)
229
+ number_value: (_) => token(/-?\d+(\.\d+)?/),
230
+
231
+ // Datetime value: date with optional time (split into tokens)
232
+ // YYYY-MM-DD or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MMZ
233
+ datetime_value: ($) =>
234
+ seq(
235
+ field("date", $.datetime_date),
236
+ optional(
237
+ seq($.datetime_t, field("time", $.datetime_time), optional(field("tz", $.datetime_tz))),
238
+ ),
239
+ ),
240
+ datetime_date: (_) => token(/[12]\d{3}-[01]\d-[0-3]\d/),
241
+ datetime_t: (_) => token.immediate("T"),
242
+ datetime_time: (_) => token.immediate(/[0-2]\d:[0-5]\d/),
243
+ datetime_tz: (_) => token.immediate(/Z|[+-][0-2]\d:[0-5]\d/),
244
+
245
+ // Daterange: date period or range
246
+ // Formats:
247
+ // a. Single period: YYYY-MM, YYYY Q1 (implicit start~end - YYYY alone conflicts with number)
248
+ // b. Two full dates: YYYY-MM-DD ~ YYYY-MM-DD
249
+ // c. Two partial: YYYY ~ YYYY, YYYY-MM ~ YYYY-MM
250
+ // d. Open-ended: YYYY ~, YYYY-MM ~ (start till now)
251
+ // Note: Bare YYYY is ambiguous with number type, so use "YYYY ~" for year periods
252
+ daterange: (_) =>
253
+ choice(
254
+ // Quarter: YYYY Q1-Q4
255
+ token(/[12]\d{3} +Q[1-4]/),
256
+ // Range with ~: partial ~ optional(partial) - includes open-ended "YYYY ~"
257
+ token(/[12]\d{3}(-[01]\d(-[0-3]\d)?)? *~( *[12]\d{3}(-[01]\d(-[0-3]\d)?)?)?/),
258
+ // Implicit month: YYYY-MM (hyphen disambiguates from number)
259
+ token(/[12]\d{3}-[01]\d/),
260
+ ),
261
+
262
+ // Unified array: comma-separated values of any type
263
+ value_array: ($) =>
264
+ prec.right(seq($._value_array_element, repeat1(seq(",", $._value_array_element)))),
265
+
266
+ _value_array_element: ($) =>
267
+ choice($.link, $.quoted_value, $.datetime_value, $.daterange, $.query, $.number_value),
268
+
269
+ // =========================================================================
270
+ // Query expressions (for sources metadata)
271
+ // =========================================================================
272
+
273
+ // Single query: entity where conditions
274
+ query: ($) =>
275
+ seq(field("entity", $.query_entity), "where", field("conditions", $.query_conditions)),
276
+
277
+ // Query entity (lore, journal, opinion, reference)
278
+ query_entity: (_) => token(/[a-z][a-zA-Z0-9\-_]*/),
279
+
280
+ // Conditions joined by "and"
281
+ query_conditions: ($) => seq($.query_condition, repeat(seq("and", $.query_condition))),
282
+
283
+ // Individual condition types
284
+ query_condition: ($) =>
285
+ choice(
286
+ $.field_condition, // field = value
287
+ $.tag_condition, // #tag
288
+ $.link_condition, // ^link
289
+ ),
290
+
291
+ field_condition: ($) =>
292
+ seq(field("field", $.condition_field), "=", field("value", $._condition_value)),
293
+
294
+ // Condition field name
295
+ condition_field: (_) => token(/[a-z][a-zA-Z0-9\-_]*/),
296
+
297
+ // Condition values: links or quoted strings only (no plain values)
298
+ _condition_value: ($) => choice($.link, $.quoted_value),
299
+
300
+ tag_condition: ($) => $.tag,
301
+ link_condition: ($) => $.link,
302
+ },
303
+ });
package/package.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "@rejot-dev/tree-sitter-thalo",
3
+ "version": "0.0.0",
4
+ "type": "module",
5
+ "repository": {
6
+ "type": "git",
7
+ "url": "https://github.com/rejot-dev/thalo.git",
8
+ "directory": "packages/grammar"
9
+ },
10
+ "license": "MIT",
11
+ "main": "bindings/node/index.js",
12
+ "types": "bindings/node/index.d.ts",
13
+ "exports": {
14
+ ".": {
15
+ "types": "./bindings/node/index.d.ts",
16
+ "default": "./bindings/node/index.js"
17
+ },
18
+ "./tree-sitter-thalo.wasm": "./tree-sitter-thalo.wasm"
19
+ },
20
+ "files": [
21
+ "grammar.js",
22
+ "binding.gyp",
23
+ "prebuilds/**",
24
+ "bindings/node/*",
25
+ "queries/*",
26
+ "src/**",
27
+ "tree-sitter-thalo.wasm"
28
+ ],
29
+ "devDependencies": {
30
+ "@types/node": "^24",
31
+ "node-gyp": "^10.2.0",
32
+ "tree-sitter-cli": "^0.26.3",
33
+ "node-addon-api": "^8.5.0",
34
+ "node-gyp-build": "^4.8.4",
35
+ "@rejot-private/typescript-config": "0.0.1"
36
+ },
37
+ "scripts": {
38
+ "build": "tree-sitter generate && node scripts/check-rebuild.mjs --fix",
39
+ "build:wasm": "tree-sitter build --wasm --output tree-sitter-thalo.wasm",
40
+ "build:native": "pnpm exec node-gyp rebuild",
41
+ "check:native": "node scripts/check-rebuild.mjs",
42
+ "test": "tree-sitter test",
43
+ "types:check": "tsc --noEmit"
44
+ }
45
+ }
@@ -0,0 +1,71 @@
1
+ ; Timestamps - displayed as special constants
2
+ (timestamp) @number
3
+
4
+ ; Directives - keywords for entry types
5
+ (data_directive) @keyword
6
+ (schema_directive) @keyword
7
+
8
+ ; Entity identifier in schema definitions
9
+ (identifier) @type
10
+
11
+ ; Title strings
12
+ (title) @string
13
+
14
+ ; Links (^reference-id)
15
+ (link) @tag
16
+
17
+ ; Tags (#category)
18
+ (tag) @attribute
19
+
20
+ ; Metadata key-value pairs
21
+ (metadata
22
+ (key) @property
23
+ (value) @string)
24
+
25
+ ; Field definitions in schema
26
+ (field_definition
27
+ (field_name) @property
28
+ (optional_marker)? @punctuation.special
29
+ (type_expression) @type
30
+ (default_value)? @string
31
+ (description)? @comment)
32
+
33
+ ; Section definitions in schema
34
+ ; Using @variable instead of @label since @label isn't in most themes
35
+ (section_definition
36
+ (section_name) @variable
37
+ (optional_marker)? @punctuation.special
38
+ (description)? @comment)
39
+
40
+ ; Type expressions
41
+ (primitive_type) @type.builtin
42
+ (literal_type) @string
43
+ (array_type) @type
44
+
45
+ ; Punctuation
46
+ ":" @punctuation.delimiter
47
+ ";" @punctuation.delimiter
48
+ "=" @operator
49
+ "|" @operator
50
+ "[]" @punctuation.bracket
51
+
52
+ ; Markdown headers in content
53
+ (markdown_header) @markup.heading
54
+
55
+ ; Content lines (regular text)
56
+ (content_line) @text
57
+
58
+ ; Field/section removals
59
+ (field_removal
60
+ (field_name) @property
61
+ (description)? @comment)
62
+
63
+ (section_removal
64
+ (section_name) @variable
65
+ (description)? @comment)
66
+
67
+ ; Description strings (in schema definitions)
68
+ (description) @comment
69
+
70
+ ; Comments
71
+ (comment) @comment