herb 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +5 -5
  2. data/{LICENSE → LICENSE.txt} +4 -3
  3. data/Makefile +121 -0
  4. data/README.md +102 -107
  5. data/Rakefile +184 -0
  6. data/exe/herb +5 -0
  7. data/ext/herb/error_helpers.c +302 -0
  8. data/ext/herb/error_helpers.h +15 -0
  9. data/ext/herb/extconf.rb +75 -0
  10. data/ext/herb/extension.c +110 -0
  11. data/ext/herb/extension.h +6 -0
  12. data/ext/herb/extension_helpers.c +117 -0
  13. data/ext/herb/extension_helpers.h +24 -0
  14. data/ext/herb/nodes.c +936 -0
  15. data/ext/herb/nodes.h +12 -0
  16. data/herb.gemspec +49 -0
  17. data/lib/herb/ast/node.rb +61 -0
  18. data/lib/herb/ast/nodes.rb +1542 -0
  19. data/lib/herb/ast.rb +6 -0
  20. data/lib/herb/cli.rb +164 -0
  21. data/lib/herb/errors.rb +352 -0
  22. data/lib/herb/lex_result.rb +20 -0
  23. data/lib/herb/libherb/array.rb +48 -0
  24. data/lib/herb/libherb/ast_node.rb +47 -0
  25. data/lib/herb/libherb/buffer.rb +53 -0
  26. data/lib/herb/libherb/extract_result.rb +17 -0
  27. data/lib/herb/libherb/lex_result.rb +29 -0
  28. data/lib/herb/libherb/libherb.rb +49 -0
  29. data/lib/herb/libherb/parse_result.rb +17 -0
  30. data/lib/herb/libherb/token.rb +43 -0
  31. data/lib/herb/libherb.rb +32 -0
  32. data/lib/herb/location.rb +42 -0
  33. data/lib/herb/parse_result.rb +26 -0
  34. data/lib/herb/position.rb +36 -0
  35. data/lib/herb/project.rb +361 -0
  36. data/lib/herb/range.rb +40 -0
  37. data/lib/herb/result.rb +21 -0
  38. data/lib/herb/token.rb +43 -0
  39. data/lib/herb/token_list.rb +11 -0
  40. data/lib/herb/version.rb +5 -0
  41. data/lib/herb.rb +21 -68
  42. data/src/analyze.c +989 -0
  43. data/src/analyze_helpers.c +241 -0
  44. data/src/analyzed_ruby.c +35 -0
  45. data/src/array.c +137 -0
  46. data/src/ast_node.c +81 -0
  47. data/src/ast_nodes.c +866 -0
  48. data/src/ast_pretty_print.c +588 -0
  49. data/src/buffer.c +199 -0
  50. data/src/errors.c +740 -0
  51. data/src/extract.c +110 -0
  52. data/src/herb.c +103 -0
  53. data/src/html_util.c +143 -0
  54. data/src/include/analyze.h +36 -0
  55. data/src/include/analyze_helpers.h +43 -0
  56. data/src/include/analyzed_ruby.h +33 -0
  57. data/src/include/array.h +33 -0
  58. data/src/include/ast_node.h +35 -0
  59. data/src/include/ast_nodes.h +303 -0
  60. data/src/include/ast_pretty_print.h +17 -0
  61. data/src/include/buffer.h +36 -0
  62. data/src/include/errors.h +125 -0
  63. data/src/include/extract.h +20 -0
  64. data/src/include/herb.h +32 -0
  65. data/src/include/html_util.h +13 -0
  66. data/src/include/io.h +9 -0
  67. data/src/include/json.h +28 -0
  68. data/src/include/lexer.h +13 -0
  69. data/src/include/lexer_peek_helpers.h +23 -0
  70. data/src/include/lexer_struct.h +32 -0
  71. data/src/include/location.h +25 -0
  72. data/src/include/macros.h +10 -0
  73. data/src/include/memory.h +12 -0
  74. data/src/include/parser.h +22 -0
  75. data/src/include/parser_helpers.h +33 -0
  76. data/src/include/position.h +22 -0
  77. data/src/include/pretty_print.h +53 -0
  78. data/src/include/prism_helpers.h +18 -0
  79. data/src/include/range.h +23 -0
  80. data/src/include/ruby_parser.h +6 -0
  81. data/src/include/token.h +25 -0
  82. data/src/include/token_matchers.h +21 -0
  83. data/src/include/token_struct.h +51 -0
  84. data/src/include/util.h +25 -0
  85. data/src/include/version.h +6 -0
  86. data/src/include/visitor.h +11 -0
  87. data/src/io.c +30 -0
  88. data/src/json.c +205 -0
  89. data/src/lexer.c +284 -0
  90. data/src/lexer_peek_helpers.c +59 -0
  91. data/src/location.c +41 -0
  92. data/src/main.c +162 -0
  93. data/src/memory.c +53 -0
  94. data/src/parser.c +704 -0
  95. data/src/parser_helpers.c +161 -0
  96. data/src/position.c +33 -0
  97. data/src/pretty_print.c +242 -0
  98. data/src/prism_helpers.c +50 -0
  99. data/src/range.c +38 -0
  100. data/src/ruby_parser.c +47 -0
  101. data/src/token.c +194 -0
  102. data/src/token_matchers.c +32 -0
  103. data/src/util.c +128 -0
  104. data/src/visitor.c +321 -0
  105. metadata +126 -82
  106. data/test/helper.rb +0 -7
  107. data/test/helpers_test.rb +0 -25
  108. data/test/parsing_test.rb +0 -110
@@ -0,0 +1,241 @@
1
+ #include <prism.h>
2
+ #include <stdbool.h>
3
+ #include <string.h>
4
+
5
+ #include "include/analyzed_ruby.h"
6
+
7
+ bool has_if_node(analyzed_ruby_T* analyzed) {
8
+ return analyzed->has_if_node;
9
+ }
10
+
11
+ bool has_elsif_node(analyzed_ruby_T* analyzed) {
12
+ return analyzed->has_elsif_node;
13
+ }
14
+
15
+ bool has_else_node(analyzed_ruby_T* analyzed) {
16
+ return analyzed->has_else_node;
17
+ }
18
+
19
+ bool has_end(analyzed_ruby_T* analyzed) {
20
+ return analyzed->has_end;
21
+ }
22
+
23
+ bool has_block_node(analyzed_ruby_T* analyzed) {
24
+ return analyzed->has_block_node;
25
+ }
26
+
27
+ bool has_block_closing(analyzed_ruby_T* analyzed) {
28
+ return analyzed->has_block_closing;
29
+ }
30
+
31
+ bool has_case_node(analyzed_ruby_T* analyzed) {
32
+ return analyzed->has_case_node;
33
+ }
34
+
35
+ bool has_when_node(analyzed_ruby_T* analyzed) {
36
+ return analyzed->has_when_node;
37
+ }
38
+
39
+ bool has_for_node(analyzed_ruby_T* analyzed) {
40
+ return analyzed->has_for_node;
41
+ }
42
+
43
+ bool has_while_node(analyzed_ruby_T* analyzed) {
44
+ return analyzed->has_while_node;
45
+ }
46
+
47
+ bool has_until_node(analyzed_ruby_T* analyzed) {
48
+ return analyzed->has_until_node;
49
+ }
50
+
51
+ bool has_begin_node(analyzed_ruby_T* analyzed) {
52
+ return analyzed->has_begin_node;
53
+ }
54
+
55
+ bool has_rescue_node(analyzed_ruby_T* analyzed) {
56
+ return analyzed->has_rescue_node;
57
+ }
58
+
59
+ bool has_ensure_node(analyzed_ruby_T* analyzed) {
60
+ return analyzed->has_ensure_node;
61
+ }
62
+
63
+ bool has_unless_node(analyzed_ruby_T* analyzed) {
64
+ return analyzed->has_unless_node;
65
+ }
66
+
67
+ bool has_error_message(analyzed_ruby_T* anlayzed, const char* message) {
68
+ for (const pm_diagnostic_t* error = (const pm_diagnostic_t*) anlayzed->parser.error_list.head; error != NULL;
69
+ error = (const pm_diagnostic_t*) error->node.next) {
70
+ if (strcmp(error->message, message) == 0) { return true; }
71
+ }
72
+
73
+ return false;
74
+ }
75
+
76
+ bool search_if_nodes(const pm_node_t* node, void* data) {
77
+ analyzed_ruby_T* analyzed = (analyzed_ruby_T*) data;
78
+
79
+ if (node->type == PM_IF_NODE) {
80
+ analyzed->has_if_node = true;
81
+ return true;
82
+ } else {
83
+ pm_visit_child_nodes(node, search_if_nodes, analyzed);
84
+ }
85
+
86
+ return false;
87
+ }
88
+
89
+ bool search_block_nodes(const pm_node_t* node, void* data) {
90
+ analyzed_ruby_T* analyzed = (analyzed_ruby_T*) data;
91
+
92
+ if (node->type == PM_BLOCK_NODE) {
93
+ analyzed->has_block_node = true;
94
+ return true;
95
+ } else {
96
+ pm_visit_child_nodes(node, search_block_nodes, analyzed);
97
+ }
98
+
99
+ return false;
100
+ }
101
+
102
+ bool search_case_nodes(const pm_node_t* node, void* data) {
103
+ analyzed_ruby_T* analyzed = (analyzed_ruby_T*) data;
104
+
105
+ if (node->type == PM_CASE_MATCH_NODE) {
106
+ analyzed->has_case_node = true;
107
+ return true;
108
+ } else {
109
+ pm_visit_child_nodes(node, search_case_nodes, analyzed);
110
+ }
111
+
112
+ return false;
113
+ }
114
+
115
+ bool search_while_nodes(const pm_node_t* node, void* data) {
116
+ analyzed_ruby_T* analyzed = (analyzed_ruby_T*) data;
117
+
118
+ if (node->type == PM_WHILE_NODE) {
119
+ analyzed->has_while_node = true;
120
+ return true;
121
+ } else {
122
+ pm_visit_child_nodes(node, search_while_nodes, analyzed);
123
+ }
124
+
125
+ return false;
126
+ }
127
+
128
+ bool search_for_nodes(const pm_node_t* node, void* data) {
129
+ analyzed_ruby_T* analyzed = (analyzed_ruby_T*) data;
130
+
131
+ if (node->type == PM_FOR_NODE) {
132
+ analyzed->has_for_node = true;
133
+ return true;
134
+ } else {
135
+ pm_visit_child_nodes(node, search_for_nodes, analyzed);
136
+ }
137
+
138
+ return false;
139
+ }
140
+
141
+ bool search_until_nodes(const pm_node_t* node, void* data) {
142
+ analyzed_ruby_T* analyzed = (analyzed_ruby_T*) data;
143
+
144
+ if (node->type == PM_UNTIL_NODE) {
145
+ analyzed->has_until_node = true;
146
+ return true;
147
+ } else {
148
+ pm_visit_child_nodes(node, search_until_nodes, analyzed);
149
+ }
150
+
151
+ return false;
152
+ }
153
+
154
+ bool search_begin_nodes(const pm_node_t* node, void* data) {
155
+ analyzed_ruby_T* analyzed = (analyzed_ruby_T*) data;
156
+
157
+ if (node->type == PM_BEGIN_NODE) {
158
+ analyzed->has_begin_node = true;
159
+ return true;
160
+ } else {
161
+ pm_visit_child_nodes(node, search_begin_nodes, analyzed);
162
+ }
163
+
164
+ return false;
165
+ }
166
+
167
+ bool search_unless_nodes(const pm_node_t* node, void* data) {
168
+ analyzed_ruby_T* analyzed = (analyzed_ruby_T*) data;
169
+
170
+ if (node->type == PM_UNLESS_NODE) {
171
+ analyzed->has_unless_node = true;
172
+ return true;
173
+ } else {
174
+ pm_visit_child_nodes(node, search_unless_nodes, analyzed);
175
+ }
176
+
177
+ return false;
178
+ }
179
+
180
+ bool search_elsif_nodes(analyzed_ruby_T* analyzed) {
181
+ if (has_error_message(analyzed, "unexpected 'elsif', ignoring it")) {
182
+ analyzed->has_elsif_node = true;
183
+ return true;
184
+ }
185
+
186
+ return false;
187
+ }
188
+
189
+ bool search_else_nodes(analyzed_ruby_T* analyzed) {
190
+ if (has_error_message(analyzed, "unexpected 'else', ignoring it")) {
191
+ analyzed->has_else_node = true;
192
+ return true;
193
+ }
194
+
195
+ return false;
196
+ }
197
+
198
+ bool search_end_nodes(analyzed_ruby_T* analyzed) {
199
+ if (has_error_message(analyzed, "unexpected 'end', ignoring it")) {
200
+ analyzed->has_end = true;
201
+ return true;
202
+ }
203
+
204
+ return false;
205
+ }
206
+
207
+ bool search_block_closing_nodes(analyzed_ruby_T* analyzed) {
208
+ if (has_error_message(analyzed, "unexpected '}', ignoring it")) {
209
+ analyzed->has_block_closing = true;
210
+ return true;
211
+ }
212
+
213
+ return false;
214
+ }
215
+
216
+ bool search_when_nodes(analyzed_ruby_T* analyzed) {
217
+ if (has_error_message(analyzed, "unexpected 'when', ignoring it")) {
218
+ analyzed->has_when_node = true;
219
+ return true;
220
+ }
221
+
222
+ return false;
223
+ }
224
+
225
+ bool search_rescue_nodes(analyzed_ruby_T* analyzed) {
226
+ if (has_error_message(analyzed, "unexpected 'rescue', ignoring it")) {
227
+ analyzed->has_rescue_node = true;
228
+ return true;
229
+ }
230
+
231
+ return false;
232
+ }
233
+
234
+ bool search_ensure_nodes(analyzed_ruby_T* analyzed) {
235
+ if (has_error_message(analyzed, "unexpected 'ensure', ignoring it")) {
236
+ analyzed->has_ensure_node = true;
237
+ return true;
238
+ }
239
+
240
+ return false;
241
+ }
@@ -0,0 +1,35 @@
1
+ #include "include/analyzed_ruby.h"
2
+
3
+ #include <prism.h>
4
+ #include <string.h>
5
+
6
+ analyzed_ruby_T* init_analyzed_ruby(char* source) {
7
+ analyzed_ruby_T* analyzed = malloc(sizeof(analyzed_ruby_T));
8
+
9
+ pm_parser_init(&analyzed->parser, (const uint8_t*) source, strlen(source), NULL);
10
+
11
+ analyzed->root = pm_parse(&analyzed->parser);
12
+ analyzed->valid = (analyzed->parser.error_list.size == 0);
13
+ analyzed->parsed = true;
14
+ analyzed->has_if_node = false;
15
+ analyzed->has_elsif_node = false;
16
+ analyzed->has_else_node = false;
17
+ analyzed->has_end = false;
18
+ analyzed->has_block_node = false;
19
+ analyzed->has_block_closing = false;
20
+ analyzed->has_case_node = false;
21
+ analyzed->has_when_node = false;
22
+ analyzed->has_for_node = false;
23
+ analyzed->has_while_node = false;
24
+ analyzed->has_until_node = false;
25
+ analyzed->has_begin_node = false;
26
+ analyzed->has_rescue_node = false;
27
+ analyzed->has_ensure_node = false;
28
+ analyzed->has_unless_node = false;
29
+
30
+ return analyzed;
31
+ }
32
+
33
+ void free_analyzed_ruby(analyzed_ruby_T* analyzed) {
34
+ // TODO
35
+ }
data/src/array.c ADDED
@@ -0,0 +1,137 @@
1
+ #include <stdint.h>
2
+ #include <stdio.h>
3
+
4
+ #include "include/array.h"
5
+ #include "include/macros.h"
6
+ #include "include/memory.h"
7
+
8
+ size_t array_sizeof(void) {
9
+ return sizeof(array_T);
10
+ }
11
+
12
+ array_T* array_init(const size_t capacity) {
13
+ array_T* array = safe_malloc(array_sizeof());
14
+
15
+ array->size = 0;
16
+ array->capacity = capacity;
17
+ array->items = nullable_safe_malloc(sizeof(void*) * capacity);
18
+
19
+ if (!array->items) {
20
+ free(array);
21
+ return NULL;
22
+ }
23
+
24
+ return array;
25
+ }
26
+
27
+ void array_append(array_T* array, void* item) {
28
+ if (array->size >= array->capacity) {
29
+ size_t new_capacity;
30
+
31
+ if (array->capacity == 0) {
32
+ new_capacity = 1;
33
+ } else if (array->capacity > SIZE_MAX / (2 * sizeof(void*))) {
34
+ fprintf(stderr, "Warning: Approaching array size limits, using conservative growth.\n");
35
+ new_capacity = array->capacity + 1024 / sizeof(void*);
36
+
37
+ if (new_capacity < array->capacity) { new_capacity = SIZE_MAX / sizeof(void*); }
38
+ } else {
39
+ new_capacity = array->capacity * 2;
40
+ }
41
+
42
+ if (new_capacity > SIZE_MAX / sizeof(void*)) {
43
+ fprintf(stderr, "Error: Array allocation would exceed system limits.\n");
44
+ return;
45
+ }
46
+
47
+ size_t new_size_bytes = new_capacity * sizeof(void*);
48
+ void* new_items = safe_realloc(array->items, new_size_bytes);
49
+
50
+ if (unlikely(new_items == NULL)) { return; }
51
+
52
+ array->items = (void**) new_items;
53
+ array->capacity = new_capacity;
54
+ }
55
+
56
+ array->items[array->size] = item;
57
+ array->size++;
58
+ }
59
+
60
+ void* array_get(const array_T* array, const size_t index) {
61
+ if (index >= array->size) { return NULL; }
62
+
63
+ return array->items[index];
64
+ }
65
+
66
+ void* array_first(array_T* array) {
67
+ if (!array || array->size == 0) { return NULL; }
68
+ return array->items[0];
69
+ }
70
+
71
+ void* array_last(array_T* array) {
72
+ if (!array || array->size == 0) { return NULL; }
73
+ return array->items[array->size - 1];
74
+ }
75
+
76
+ void array_set(const array_T* array, const size_t index, void* item) {
77
+ if (index >= array->size) { return; }
78
+
79
+ array->items[index] = item;
80
+ }
81
+
82
+ void array_remove(array_T* array, const size_t index) {
83
+ if (index >= array->size) { return; }
84
+
85
+ for (size_t i = index; i < array->size - 1; i++) {
86
+ array->items[i] = array->items[i + 1];
87
+ }
88
+
89
+ array->size--;
90
+ }
91
+
92
+ size_t array_index_of(array_T* array, void* item) {
93
+ for (size_t i = 0; i < array->size; i++) {
94
+ if (array->items[i] == item) { return i; }
95
+ }
96
+
97
+ return SIZE_MAX;
98
+ }
99
+
100
+ void array_remove_item(array_T* array, void* item) {
101
+ size_t index = array_index_of(array, item);
102
+
103
+ if (index != SIZE_MAX) { array_remove(array, index); }
104
+ }
105
+
106
+ // Alias for array_append
107
+ void array_push(array_T* array, void* item) {
108
+ array_append(array, item);
109
+ }
110
+
111
+ void* array_pop(array_T* array) {
112
+ if (!array || array->size == 0) { return NULL; }
113
+
114
+ void* last_item = array_last(array);
115
+ array->size--;
116
+
117
+ return last_item;
118
+ }
119
+
120
+ size_t array_size(const array_T* array) {
121
+ if (array == NULL) { return 0; }
122
+
123
+ return array->size;
124
+ }
125
+
126
+ size_t array_capacity(const array_T* array) {
127
+ return array->capacity;
128
+ }
129
+
130
+ void array_free(array_T** array) {
131
+ if (!array || !*array) { return; }
132
+
133
+ free((*array)->items);
134
+ free(*array);
135
+
136
+ *array = NULL;
137
+ }
data/src/ast_node.c ADDED
@@ -0,0 +1,81 @@
1
+ #include "include/ast_node.h"
2
+ #include "include/ast_nodes.h"
3
+ #include "include/errors.h"
4
+ #include "include/token.h"
5
+ #include "include/util.h"
6
+
7
+ #include <prism.h>
8
+ #include <stdio.h>
9
+ #include <stdlib.h>
10
+
11
+ size_t ast_node_sizeof(void) {
12
+ return sizeof(struct AST_NODE_STRUCT);
13
+ }
14
+
15
+ void ast_node_init(AST_NODE_T* node, const ast_node_type_T type, position_T* start, position_T* end, array_T* errors) {
16
+ if (!node) { return; }
17
+
18
+ node->type = type;
19
+ node->location = location_init(position_copy(start), position_copy(end));
20
+
21
+ if (errors == NULL) {
22
+ node->errors = array_init(8);
23
+ } else {
24
+ node->errors = errors;
25
+ }
26
+ }
27
+
28
+ AST_LITERAL_NODE_T* ast_literal_node_init_from_token(const token_T* token) {
29
+ AST_LITERAL_NODE_T* literal = malloc(sizeof(AST_LITERAL_NODE_T));
30
+
31
+ ast_node_init(&literal->base, AST_LITERAL_NODE, token->location->start, token->location->end, NULL);
32
+
33
+ literal->content = herb_strdup(token->value);
34
+
35
+ return literal;
36
+ }
37
+
38
+ ast_node_type_T ast_node_type(const AST_NODE_T* node) {
39
+ return node->type;
40
+ }
41
+
42
+ size_t ast_node_errors_count(const AST_NODE_T* node) {
43
+ return array_size(node->errors);
44
+ }
45
+
46
+ array_T* ast_node_errors(const AST_NODE_T* node) {
47
+ return node->errors;
48
+ }
49
+
50
+ void ast_node_append_error(const AST_NODE_T* node, ERROR_T* error) {
51
+ array_append(node->errors, error);
52
+ }
53
+
54
+ void ast_node_set_start(AST_NODE_T* node, position_T* position) {
55
+ if (node->location->start != NULL) { position_free(node->location->start); }
56
+
57
+ node->location->start = position_copy(position);
58
+ }
59
+
60
+ void ast_node_set_end(AST_NODE_T* node, position_T* position) {
61
+ if (node->location->end != NULL) { position_free(node->location->end); }
62
+
63
+ node->location->end = position_copy(position);
64
+ }
65
+
66
+ void ast_node_set_start_from_token(AST_NODE_T* node, const token_T* token) {
67
+ ast_node_set_start(node, token->location->start);
68
+ }
69
+
70
+ void ast_node_set_end_from_token(AST_NODE_T* node, const token_T* token) {
71
+ ast_node_set_end(node, token->location->end);
72
+ }
73
+
74
+ void ast_node_set_positions_from_token(AST_NODE_T* node, const token_T* token) {
75
+ ast_node_set_start_from_token(node, token);
76
+ ast_node_set_end_from_token(node, token);
77
+ }
78
+
79
+ bool ast_node_is(const AST_NODE_T* node, const ast_node_type_T type) {
80
+ return node->type == type;
81
+ }