@felixtensor/tree-sitter-mlir 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/grammar.js CHANGED
@@ -5,20 +5,45 @@ export default grammar({
5
5
  name: "mlir",
6
6
  extras: ($) => [/[\s\x00]/, $.comment],
7
7
  conflicts: ($) => [
8
+ // Core MLIR overlaps: shaped dimensions, aliases, pretty dialect payloads,
9
+ // value/type lists, and affine syntax share prefixes by design.
8
10
  [$._static_dim_list, $._static_dim_list],
9
- [$.dictionary_attribute, $.region],
10
- [$.custom_op_name, $.attribute_entry],
11
11
  [$.type_alias, $.dialect_namespace],
12
12
  [$.dialect_namespace, $.attribute_alias],
13
13
  [$.pretty_dialect_item],
14
- [$.array_literal, $._custom_body_element_base],
15
- [$._custom_body_element_base, $.tensor_type],
16
- [$._custom_body_element_base, $._custom_body_arrow],
17
- [$._generic_custom_operation_with_location_attr_dict, $.custom_op_name],
18
- [$._custom_body_dict_key, $.attribute_entry],
19
14
  [$._value_use_list, $._value_use_and_type],
20
15
  [$._type_list_no_parens, $._type_or_func_type],
21
16
  [$._type_list_parens, $._multi_dim_affine_expr_parens],
17
+
18
+ // Custom operation fallback overlaps: loose body syntax must preserve
19
+ // dialect keywords, dictionary-looking payloads, and loc-sensitive forms
20
+ // without enumerating every upstream dialect operation.
21
+ [$.custom_op_name, $.attribute_entry],
22
+ [$.array_literal, $._custom_body_array_keyword],
23
+ [$._custom_body_tensor_keyword, $.tensor_type],
24
+ [$._generic_custom_operation_with_location_attr_dict, $.custom_op_name],
25
+ [$._custom_body_dict_key, $.attribute_entry],
26
+ ],
27
+ inline: ($) => [
28
+ $._tier1_custom_operation,
29
+ $._tier2_custom_operation,
30
+ $._custom_body_reference_element,
31
+ $._custom_body_type_element,
32
+ $._custom_body_attribute_or_braced_element,
33
+ $._custom_body_dialect_marker,
34
+ $._custom_body_group,
35
+ $._custom_body_atom,
36
+ $._custom_body_literal_element,
37
+ $._custom_body_reserved_keyword,
38
+ $._custom_body_affine_keyword,
39
+ $._custom_body_brace_payload,
40
+ $._custom_body_punctuation,
41
+ $._custom_body_separator_punctuation,
42
+ $._custom_body_operator_punctuation,
43
+ $._pretty_dialect_structural_content,
44
+ $._pretty_dialect_value_content,
45
+ $._pretty_dialect_keyword_content,
46
+ $._pretty_dialect_punctuation_content,
22
47
  ],
23
48
 
24
49
  // Token-level precedence constants (higher wins the token race):
@@ -65,7 +90,7 @@ export default grammar({
65
90
  // decimal-literal ::= digit+
66
91
  // hexadecimal-literal ::= `0x` hex_digit+
67
92
  // float-literal ::= [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
68
- // string-literal ::= `"` [^"\n\f\v\r]* `"`
93
+ // string-literal ::= `"` (char | escape-sequence | invalid-escape)* `"`
69
94
  // =========================================================================
70
95
  _digit: ($) => /[0-9]/,
71
96
  integer_literal: ($) => choice($._decimal_literal, $._hexadecimal_literal),
@@ -284,10 +309,17 @@ export default grammar({
284
309
  // Tier 2: _generic_custom_operation — all other dialect.op_name patterns
285
310
  // =========================================================================
286
311
  custom_operation: ($) =>
312
+ choice($._tier1_custom_operation, $._tier2_custom_operation),
313
+
314
+ _tier1_custom_operation: ($) =>
287
315
  choice(
288
316
  prec(2, $.func_operation),
289
317
  prec(2, $.module_operation),
290
318
  prec(2, $._affine_for_operation),
319
+ ),
320
+
321
+ _tier2_custom_operation: ($) =>
322
+ choice(
291
323
  $._pdl_interp_record_match_operation,
292
324
  $._generic_custom_operation_with_location_attr_dict,
293
325
  $._generic_custom_operation,
@@ -361,6 +393,8 @@ export default grammar({
361
393
  ),
362
394
  ),
363
395
 
396
+ // These stay as specialized operation forms because `loc(...)` can be
397
+ // either custom body syntax or the operation-level trailing location.
364
398
  _pdl_interp_record_match_operation: ($) =>
365
399
  prec.dynamic(
366
400
  -1,
@@ -449,50 +483,116 @@ export default grammar({
449
483
  _custom_body_element: ($) =>
450
484
  choice(
451
485
  $._custom_body_element_base,
486
+ // Kept out of _custom_body_element_base because nested delimiter bodies
487
+ // should still treat `>` as an angle-group boundary, not a loose marker.
452
488
  $._custom_body_successor_marker, // >^bb1 (WasmSSA if continuation)
453
489
  ),
454
490
 
455
491
  _custom_body_element_base: ($) =>
492
+ choice(
493
+ $._custom_body_reference_element,
494
+ $._custom_body_type_element,
495
+ $._custom_body_attribute_or_braced_element,
496
+ $._custom_body_dialect_marker,
497
+ $._custom_body_group,
498
+ $._custom_body_atom,
499
+ $._custom_body_punctuation,
500
+ ),
501
+
502
+ _custom_body_reference_element: ($) =>
456
503
  choice(
457
504
  $.value_use, // %foo, %0
458
505
  $.symbol_ref_id, // @sym, @"string"
459
506
  $.successor, // ^bb0, ^bb0(%arg : type)
460
- $._custom_body_complex_label, // complex: %value (IRDL operand label)
461
- prec(2, $.type), // !type, i32, memref<...>, etc.
507
+ ),
508
+
509
+ _custom_body_type_element: ($) =>
510
+ prec(2, $.type), // !type, i32, memref<...>, etc.
511
+
512
+ // Attribute includes dictionary_attribute, so keep it adjacent to the
513
+ // custom-body `{...}` payloads while preserving the public wrapper.
514
+ _custom_body_attribute_or_braced_element: ($) =>
515
+ choice(
462
516
  $.attribute, // #attr, {dict}, affine_map<...>
517
+ $._custom_body_brace_payload,
518
+ ),
519
+
520
+ _custom_body_brace_payload: ($) =>
521
+ choice(
463
522
  $._custom_body_tuple_group, // {(%v), (%w)}
464
523
  $.region, // { ... } (regions with operations)
465
524
  $._custom_body_value_group, // {%v : type, ...}
466
525
  $._custom_body_ssa_dict, // {"attr" = %value, ...} / options with SSA values
526
+ ),
527
+
528
+ _custom_body_dialect_marker: ($) =>
529
+ choice(
530
+ $._custom_body_arrow, // <- (OpenMP loop transform mapped-from marker)
531
+ $._custom_body_complex_label, // complex: %value (IRDL operand label)
467
532
  $._custom_body_module_symbol_arg, // module(@sym) kernel attr
468
533
  $._custom_body_sparse_operand, // sparse(%idx : type)
534
+ ),
535
+
536
+ _custom_body_group: ($) =>
537
+ choice(
469
538
  $._custom_body_paren, // ( ... )
470
539
  $._custom_body_bracket, // [ ... ]
471
540
  $._custom_body_angle_group, // < ... >
541
+ ),
542
+
543
+ _custom_body_atom: ($) =>
544
+ choice(
545
+ $._custom_body_literal_element,
546
+ $._custom_body_reserved_keyword,
547
+ $.bare_id, // keywords: to, from, step, ins, outs, etc.
548
+ ),
549
+
550
+ _custom_body_literal_element: ($) =>
551
+ choice(
472
552
  $.variadic, // custom assembly ellipsis marker
473
553
  $._literal, // 42, 3.14, "string", true, dense<...>
474
- "array", // property names may collide with array<...>
554
+ ),
555
+
556
+ _custom_body_reserved_keyword: ($) =>
557
+ choice(
558
+ $._custom_body_array_keyword, // property names may collide with array<...>
475
559
  "vector", // OpenACC keyword may collide with vector<...>
476
- "tensor", // AMDGPU/NVGPU keyword may collide with tensor<...>
477
- "ceildiv",
478
- "floordiv",
479
- "mod", // inline affine keywords
480
- $.bare_id, // keywords: to, from, step, ins, outs, etc.
481
- $._custom_body_arrow, // <- (mapped-from, e.g. omp.fuse <- (...))
560
+ $._custom_body_tensor_keyword, // AMDGPU/NVGPU keyword may collide with tensor<...>
561
+ $._custom_body_affine_keyword, // inline affine keywords
562
+ ),
563
+
564
+ _custom_body_array_keyword: ($) => "array",
565
+ _custom_body_tensor_keyword: ($) => "tensor",
566
+ _custom_body_affine_keyword: ($) =>
567
+ choice("ceildiv", "floordiv", "mod"),
568
+
569
+ _custom_body_punctuation: ($) =>
570
+ choice(
571
+ $._custom_body_separator_punctuation,
572
+ $._custom_body_operator_punctuation,
573
+ ),
574
+
575
+ _custom_body_separator_punctuation: ($) =>
576
+ choice(
482
577
  ",",
483
578
  "=",
484
579
  ":",
485
580
  "->",
581
+ ),
582
+
583
+ _custom_body_operator_punctuation: ($) =>
584
+ choice(
486
585
  "*",
487
586
  "?",
488
587
  $.dimension_separator,
489
588
  "+",
490
- "-",
589
+ $._custom_body_minus_punctuation,
491
590
  "/",
492
591
  "&",
493
592
  "|",
494
593
  "~",
495
594
  ),
595
+ _custom_body_minus_punctuation: ($) => "-",
496
596
 
497
597
  _custom_body_paren: ($) =>
498
598
  seq("(", repeat($._nested_custom_body_element), ")"),
@@ -562,7 +662,7 @@ export default grammar({
562
662
  // mis-lex negative payloads like `#smt.bv<-1>` as `<-` `1`. Keeping the
563
663
  // tokens separate leaves the lexer unchanged; GLR distinguishes the arrow
564
664
  // from `_custom_body_angle_group` (which requires a closing '>').
565
- _custom_body_arrow: ($) => seq("<", "-"),
665
+ _custom_body_arrow: ($) => prec(1, seq("<", "-")),
566
666
  // Only nested groups accept `trailing_location` as a body element.
567
667
  // At the top level it is omitted on purpose so the operation rule
568
668
  // captures a trailing `loc(...)` as the operation's location instead of
@@ -608,7 +708,8 @@ export default grammar({
608
708
  // region ::= `{` entry-block? block* `}`
609
709
  // entry-block ::= operation+
610
710
  // =========================================================================
611
- region: ($) => seq("{", optional($.entry_block), repeat($.block), "}"),
711
+ region: ($) =>
712
+ prec(1, seq("{", optional($.entry_block), repeat($.block), "}")),
612
713
  entry_block: ($) => repeat1($.operation),
613
714
 
614
715
  // =========================================================================
@@ -666,37 +767,48 @@ export default grammar({
666
767
  _pretty_dialect_item_contents: ($) =>
667
768
  prec.left(
668
769
  choice(
669
- $.pretty_dialect_item_body,
670
- $._pretty_dialect_bang_body_token,
671
- $._pretty_dialect_body_attribute,
672
- $.dialect_dim_list,
673
- $.type,
674
- prec(2, $.attribute),
675
- $._literal,
676
- $._dense_keyword,
677
- $._sparse_keyword,
678
- "array",
679
- "vector",
680
- "tensor",
681
- "opaque",
682
- $.bare_id,
683
- ",",
684
- ":",
685
- "=",
686
- "->",
687
- "(",
688
- ")",
689
- "[",
690
- "]",
691
- "{",
692
- "}",
693
- "*",
694
- "?",
695
- "@",
696
- "#",
770
+ $._pretty_dialect_structural_content,
771
+ $._pretty_dialect_value_content,
772
+ $._pretty_dialect_keyword_content,
773
+ $._pretty_dialect_punctuation_content,
697
774
  token(prec(-1, /[^<>]/)),
698
775
  ),
699
776
  ),
777
+ _pretty_dialect_structural_content: ($) =>
778
+ choice(
779
+ $.pretty_dialect_item_body,
780
+ $._pretty_dialect_bang_body_token,
781
+ $._pretty_dialect_body_attribute,
782
+ ),
783
+ _pretty_dialect_value_content: ($) =>
784
+ choice($.dialect_dim_list, $.type, prec(2, $.attribute), $._literal),
785
+ _pretty_dialect_keyword_content: ($) =>
786
+ choice(
787
+ $._dense_keyword,
788
+ $._sparse_keyword,
789
+ "array",
790
+ "vector",
791
+ "tensor",
792
+ "opaque",
793
+ $.bare_id,
794
+ ),
795
+ _pretty_dialect_punctuation_content: ($) =>
796
+ choice(
797
+ ",",
798
+ ":",
799
+ "=",
800
+ "->",
801
+ "(",
802
+ ")",
803
+ "[",
804
+ "]",
805
+ "{",
806
+ "}",
807
+ "*",
808
+ "?",
809
+ "@",
810
+ "#",
811
+ ),
700
812
  _pretty_dialect_bang_body_token: ($) =>
701
813
  token(prec(1, seq("!", /[^a-zA-Z_<>]/))),
702
814
  _pretty_dialect_body_attribute: ($) =>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@felixtensor/tree-sitter-mlir",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "description": "MLIR grammar for tree-sitter",
5
5
  "type": "module",
6
6
  "repository": {
@@ -36,8 +36,8 @@
36
36
  "node-gyp-build": "^4.8.4"
37
37
  },
38
38
  "devDependencies": {
39
+ "node-gyp": "^12.4.0",
39
40
  "prebuildify": "^6.0.1",
40
- "tree-sitter": "^0.25.0",
41
41
  "tree-sitter-cli": "^0.26.9"
42
42
  },
43
43
  "peerDependencies": {
@@ -11,11 +11,17 @@
11
11
  (func_operation name: _ @function.builtin)
12
12
  (module_operation name: _ @function.builtin)
13
13
  (func_operation ["private" "public" "attributes"] @keyword)
14
+ (function_specifier) @keyword
14
15
  (module_operation "attributes" @keyword)
15
16
 
16
17
  ;; Dialect operations (e.g., arith.addi)
17
18
  (custom_op_name) @function.builtin
18
- (generic_operation (string_literal) @function.builtin)
19
+ (custom_operation ["array" "sparse" "tensor" "vector"] @keyword)
20
+ (custom_operation ["+" "-" "*" "/" "&" "|" "~"] @operator)
21
+ (custom_operation "?" @punctuation.special)
22
+ (custom_operation "loc" @keyword)
23
+ (custom_operation "module(" @keyword)
24
+ (custom_operation ">" @punctuation.bracket)
19
25
 
20
26
  ;; Symbols (@name)
21
27
  (symbol_ref_id) @string.special.symbol
@@ -42,20 +48,48 @@
42
48
  (dim_list "x" @punctuation.delimiter)
43
49
  (dimension_separator) @punctuation.delimiter
44
50
  (vector_dim_list "x" @punctuation.delimiter)
51
+ (dim_list ["?" "*"] @punctuation.special)
52
+ (dialect_dim_list ["?" "*"] @punctuation.special)
45
53
 
46
54
  [(attribute_alias) (attribute_alias_def) (dialect_attribute) (builtin_attribute) (dictionary_attribute)] @attribute
47
55
 
48
56
  ;; Specific attribute content
57
+ (properties ["<{" "}>"] @punctuation.bracket)
58
+ (affine_map "affine_map" @keyword)
59
+ (affine_set "affine_set" @keyword)
49
60
  (affine_map ["max" "min" "symbol"] @keyword)
50
61
  (affine_set ["max" "min" "symbol"] @keyword)
62
+ (affine_map
63
+ ["dense" "sparse" "compressed" "singleton" "loose_compressed" "n_out_of_m"]
64
+ @keyword)
65
+ (affine_set
66
+ ["dense" "sparse" "compressed" "singleton" "loose_compressed" "n_out_of_m"]
67
+ @keyword)
68
+ (affine_map ["+" "-" "*" "==" ">=" "<="] @operator)
69
+ (affine_set ["+" "-" "*" "==" ">=" "<="] @operator)
70
+ (strided_layout "strided" @keyword)
51
71
  (strided_layout "offset" @keyword)
52
- ["ceildiv" "floordiv" "mod"] @keyword.operator
72
+ (strided_layout ["?" "*"] @punctuation.special)
73
+ (distinct_attribute "distinct" @keyword)
74
+ (dense_resource_literal "dense_resource" @keyword)
75
+ ["ceildiv" "floordiv" "mod"] @operator
76
+ (pretty_dialect_item_body
77
+ ["array" "dense" "opaque" "sparse" "tensor" "vector"] @keyword)
78
+ (pretty_dialect_item_body ["?" "*"] @punctuation.special)
53
79
 
54
80
  ;; ── Literals ────────────────────────────────────────────────────────────────
55
81
  [(integer_literal) (float_literal) (complex_literal)] @number
56
82
  (bool_literal) @boolean
57
- [(tensor_literal) (dense_resource_literal) (array_literal) (unit_literal) (uninitialized_literal)] @constant.builtin
83
+ [(tensor_literal) (array_literal) (unit_literal) (uninitialized_literal)] @constant.builtin
84
+ (tensor_literal ["dense" "sparse"] @keyword)
85
+ (array_literal "array" @keyword)
58
86
  (string_literal) @string
87
+ (generic_operation (string_literal) @function.builtin)
88
+
89
+ ;; Escape sequences inside strings (\n, \t, \", \\, \HH) overlay on @string;
90
+ ;; malformed escapes are flagged distinctly rather than silently colored.
91
+ (escape_sequence) @string.escape
92
+ (invalid_escape) @error
59
93
 
60
94
  ;; ── SSA Variables (%name) ───────────────────────────────────────────────────
61
95
  ;; General uses and results (catch-all, overridden by more specific rules below)
@@ -67,17 +101,31 @@
67
101
  (block_arg_list (value_use) @variable.parameter)
68
102
 
69
103
  ;; ── Control Flow ────────────────────────────────────────────────────────────
70
- (caret_id) @label
104
+ (caret_id) @tag
71
105
  (trailing_location "loc" @keyword)
106
+ (callsite_location ["callsite" "at"] @keyword)
107
+ (fused_location "fused" @keyword)
108
+ (location "to" @keyword)
109
+ (unknown_location) @constant.builtin
72
110
  (variadic) @punctuation.special
73
111
 
112
+ ;; ── External Resource Blocks ───────────────────────────────────────────────
113
+ (external_resources ["{-#" "#-}"] @punctuation.bracket)
114
+
74
115
  ;; ── Punctuation ─────────────────────────────────────────────────────────────
75
116
  ["(" ")" "{" "}" "[" "]" "<" ">"] @punctuation.bracket
76
117
  ["," ":"] @punctuation.delimiter
77
118
  ["=" "->" "::"] @operator
78
119
 
79
- ;; Catch-all for bare keywords in Op bodies (ins, outs, etc.)
80
- (bare_id) @keyword
120
+ ;; Catch-all for bare keywords in custom operation bodies (ins, outs, etc.).
121
+ ;; Keep this scoped to direct custom_operation children so attribute keys,
122
+ ;; affine dimensions, and pretty dialect payload identifiers do not all become
123
+ ;; keywords.
124
+ (custom_operation (bare_id) @keyword)
125
+
126
+ ;; Dense resource handles override the bare_id catch-all above.
127
+ (dense_resource_literal (bare_id) @constant.builtin)
81
128
 
82
129
  ;; Dictionary attribute keys override the bare_id catch-all above
83
130
  (attribute_entry (bare_id) @attribute)
131
+ (attribute_entry (string_literal) @attribute)