biosyntax 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.md +674 -0
- data/README.md +154 -0
- data/ext/biosyntax/biosyntax.c +620 -0
- data/ext/biosyntax/biosyntax.h +413 -0
- data/ext/biosyntax/biosyntax_ext.c +380 -0
- data/ext/biosyntax/extconf.rb +13 -0
- data/lib/biosyntax/version.rb +5 -0
- data/lib/biosyntax.rb +538 -0
- metadata +47 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Ruby native extension for libbiosyntax.
|
|
3
|
+
* SPDX-License-Identifier: GPL-3.0-only
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
#include "ruby.h"
|
|
7
|
+
#include "ruby/encoding.h"
|
|
8
|
+
#include "biosyntax.h"
|
|
9
|
+
|
|
10
|
+
#include <limits.h>
|
|
11
|
+
#include <stdint.h>
|
|
12
|
+
#include <stdlib.h>
|
|
13
|
+
#include <string.h>
|
|
14
|
+
|
|
15
|
+
static VALUE mBioSyntax;
|
|
16
|
+
static VALUE mNative;
|
|
17
|
+
static VALUE cNativeState;
|
|
18
|
+
static VALUE cSpan;
|
|
19
|
+
|
|
20
|
+
typedef struct {
|
|
21
|
+
biosyn_state_t *ptr;
|
|
22
|
+
} rb_biosyn_state_t;
|
|
23
|
+
|
|
24
|
+
typedef struct {
|
|
25
|
+
biosyn_span_t local[64];
|
|
26
|
+
biosyn_span_t *spans;
|
|
27
|
+
uint64_t count;
|
|
28
|
+
int heap_allocated;
|
|
29
|
+
} span_buffer_t;
|
|
30
|
+
|
|
31
|
+
typedef struct {
|
|
32
|
+
biosyn_span_t *spans;
|
|
33
|
+
uint64_t count;
|
|
34
|
+
} span_array_args_t;
|
|
35
|
+
|
|
36
|
+
/* Wrap the native parser state in a Ruby typed data object. */
|
|
37
|
+
static void state_free(void *data) {
|
|
38
|
+
rb_biosyn_state_t *wrap = (rb_biosyn_state_t *)data;
|
|
39
|
+
if (!wrap)
|
|
40
|
+
return;
|
|
41
|
+
|
|
42
|
+
if (wrap->ptr) {
|
|
43
|
+
biosyn_state_free(wrap->ptr);
|
|
44
|
+
wrap->ptr = NULL;
|
|
45
|
+
}
|
|
46
|
+
xfree(wrap);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
static size_t state_memsize(const void *data) {
|
|
50
|
+
const rb_biosyn_state_t *wrap = (const rb_biosyn_state_t *)data;
|
|
51
|
+
return wrap ? sizeof(*wrap) + (wrap->ptr ? sizeof(*wrap->ptr) : 0) : 0;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
static const rb_data_type_t state_type = {"BioSyntax::Native::State",
|
|
55
|
+
{0, state_free, state_memsize, 0},
|
|
56
|
+
0,
|
|
57
|
+
0,
|
|
58
|
+
RUBY_TYPED_FREE_IMMEDIATELY};
|
|
59
|
+
|
|
60
|
+
static rb_biosyn_state_t *get_state(VALUE self) {
|
|
61
|
+
rb_biosyn_state_t *wrap = NULL;
|
|
62
|
+
TypedData_Get_Struct(self, rb_biosyn_state_t, &state_type, wrap);
|
|
63
|
+
if (!wrap || !wrap->ptr) {
|
|
64
|
+
rb_raise(rb_eRuntimeError, "uninitialized BioSyntax native state");
|
|
65
|
+
}
|
|
66
|
+
return wrap;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
static VALUE state_alloc(VALUE klass) {
|
|
70
|
+
rb_biosyn_state_t *wrap = ALLOC(rb_biosyn_state_t);
|
|
71
|
+
wrap->ptr = NULL;
|
|
72
|
+
return TypedData_Wrap_Struct(klass, &state_type, wrap);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
static biosyn_format_t format_id_from_value(VALUE value) {
|
|
76
|
+
unsigned int id = NUM2UINT(value);
|
|
77
|
+
if (id == BIOSYN_FORMAT_UNKNOWN || id >= biosyn_format_count()) {
|
|
78
|
+
rb_raise(rb_eArgError, "unsupported libbiosyntax format id: %u", id);
|
|
79
|
+
}
|
|
80
|
+
return (biosyn_format_t)id;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
static VALUE state_initialize(VALUE self, VALUE format_id_value) {
|
|
84
|
+
rb_biosyn_state_t *wrap = NULL;
|
|
85
|
+
biosyn_format_t format_id = format_id_from_value(format_id_value);
|
|
86
|
+
|
|
87
|
+
TypedData_Get_Struct(self, rb_biosyn_state_t, &state_type, wrap);
|
|
88
|
+
if (wrap->ptr) {
|
|
89
|
+
biosyn_state_free(wrap->ptr);
|
|
90
|
+
wrap->ptr = NULL;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
wrap->ptr = biosyn_state_new(format_id);
|
|
94
|
+
if (!wrap->ptr) {
|
|
95
|
+
rb_raise(rb_eNoMemError, "could not allocate libbiosyntax state");
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return self;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
static VALUE state_reset(int argc, VALUE *argv, VALUE self) {
|
|
102
|
+
rb_biosyn_state_t *wrap = get_state(self);
|
|
103
|
+
biosyn_format_t format_id = wrap->ptr->format;
|
|
104
|
+
|
|
105
|
+
if (argc > 1) {
|
|
106
|
+
rb_error_arity(argc, 0, 1);
|
|
107
|
+
}
|
|
108
|
+
if (argc == 1 && !NIL_P(argv[0])) {
|
|
109
|
+
format_id = format_id_from_value(argv[0]);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
biosyn_state_init(wrap->ptr, format_id);
|
|
113
|
+
return self;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
static VALUE state_line_no(VALUE self) {
|
|
117
|
+
rb_biosyn_state_t *wrap = get_state(self);
|
|
118
|
+
return ULL2NUM((unsigned long long)wrap->ptr->line_no);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
static VALUE state_format_id(VALUE self) {
|
|
122
|
+
rb_biosyn_state_t *wrap = get_state(self);
|
|
123
|
+
return UINT2NUM(wrap->ptr->format);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/* Collect spans for one line. Most lines fit in span_buffer_t.local; larger
|
|
127
|
+
* results fall back to a heap allocation released through rb_ensure.
|
|
128
|
+
*/
|
|
129
|
+
static void highlight_into_buffer(biosyn_state_t *state, const char *ptr, uint64_t len,
|
|
130
|
+
span_buffer_t *buf) {
|
|
131
|
+
uint64_t needed;
|
|
132
|
+
|
|
133
|
+
buf->spans = NULL;
|
|
134
|
+
buf->count = 0;
|
|
135
|
+
buf->heap_allocated = 0;
|
|
136
|
+
|
|
137
|
+
needed = biosyn_highlight_next_line(state, ptr, len, buf->local, 64);
|
|
138
|
+
if (needed <= 64) {
|
|
139
|
+
if (needed == 0)
|
|
140
|
+
return;
|
|
141
|
+
|
|
142
|
+
buf->spans = buf->local;
|
|
143
|
+
buf->count = needed;
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (needed > (uint64_t)(SIZE_MAX / sizeof(biosyn_span_t))) {
|
|
148
|
+
rb_raise(rb_eNoMemError, "too many highlight spans");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
buf->spans = ALLOC_N(biosyn_span_t, (size_t)needed);
|
|
152
|
+
buf->heap_allocated = 1;
|
|
153
|
+
buf->count = biosyn_highlight_next_line(state, ptr, len, buf->spans, needed);
|
|
154
|
+
|
|
155
|
+
if (buf->count > needed) {
|
|
156
|
+
xfree(buf->spans);
|
|
157
|
+
buf->spans = NULL;
|
|
158
|
+
buf->count = 0;
|
|
159
|
+
rb_raise(rb_eRuntimeError, "libbiosyntax span count changed while highlighting");
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
static VALUE free_span_buffer(VALUE arg) {
|
|
164
|
+
span_buffer_t *buf = (span_buffer_t *)arg;
|
|
165
|
+
if (buf && buf->heap_allocated && buf->spans) {
|
|
166
|
+
xfree(buf->spans);
|
|
167
|
+
}
|
|
168
|
+
if (buf) {
|
|
169
|
+
buf->spans = NULL;
|
|
170
|
+
buf->count = 0;
|
|
171
|
+
buf->heap_allocated = 0;
|
|
172
|
+
}
|
|
173
|
+
return Qnil;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
static VALUE span_class(void) {
|
|
177
|
+
if (NIL_P(cSpan)) {
|
|
178
|
+
cSpan = rb_const_get(mBioSyntax, rb_intern("Span"));
|
|
179
|
+
rb_gc_register_mark_object(cSpan);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return cSpan;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/* Convert native byte spans into BioSyntax::Span objects. */
|
|
186
|
+
static VALUE build_span_array(VALUE arg) {
|
|
187
|
+
span_array_args_t *args = (span_array_args_t *)arg;
|
|
188
|
+
VALUE klass = span_class();
|
|
189
|
+
VALUE ary;
|
|
190
|
+
uint64_t i;
|
|
191
|
+
|
|
192
|
+
if (args->count > (uint64_t)LONG_MAX) {
|
|
193
|
+
rb_raise(rb_eRuntimeError, "too many highlight spans for Ruby Array");
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
ary = rb_ary_new_capa((long)args->count);
|
|
197
|
+
for (i = 0; i < args->count; i++) {
|
|
198
|
+
VALUE argv[3];
|
|
199
|
+
argv[0] = ULL2NUM((unsigned long long)args->spans[i].start);
|
|
200
|
+
argv[1] = ULL2NUM((unsigned long long)args->spans[i].length);
|
|
201
|
+
argv[2] = UINT2NUM(args->spans[i].class_id);
|
|
202
|
+
rb_ary_push(ary, rb_class_new_instance(3, argv, klass));
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return ary;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
static VALUE state_highlight_body(VALUE arg) {
|
|
209
|
+
span_buffer_t *buf = (span_buffer_t *)arg;
|
|
210
|
+
span_array_args_t args;
|
|
211
|
+
args.spans = buf->spans;
|
|
212
|
+
args.count = buf->count;
|
|
213
|
+
return build_span_array((VALUE)&args);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
static VALUE state_highlight(VALUE self, VALUE line_value) {
|
|
217
|
+
rb_biosyn_state_t *wrap = get_state(self);
|
|
218
|
+
span_buffer_t buf;
|
|
219
|
+
VALUE line = StringValue(line_value);
|
|
220
|
+
long len = RSTRING_LEN(line);
|
|
221
|
+
|
|
222
|
+
if (len < 0) {
|
|
223
|
+
rb_raise(rb_eArgError, "line is too large");
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
highlight_into_buffer(wrap->ptr, RSTRING_PTR(line), (uint64_t)len, &buf);
|
|
227
|
+
return rb_ensure(state_highlight_body, (VALUE)&buf, free_span_buffer, (VALUE)&buf);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
typedef struct {
|
|
231
|
+
VALUE line;
|
|
232
|
+
span_buffer_t *buf;
|
|
233
|
+
} colorize_args_t;
|
|
234
|
+
|
|
235
|
+
/* Render ANSI output after spans have already been calculated. */
|
|
236
|
+
static VALUE state_colorize_body(VALUE arg) {
|
|
237
|
+
colorize_args_t *args = (colorize_args_t *)arg;
|
|
238
|
+
VALUE line = args->line;
|
|
239
|
+
const char *ptr = RSTRING_PTR(line);
|
|
240
|
+
uint64_t len = (uint64_t)RSTRING_LEN(line);
|
|
241
|
+
uint64_t needed;
|
|
242
|
+
VALUE out;
|
|
243
|
+
|
|
244
|
+
if (args->buf->count == 0) {
|
|
245
|
+
return rb_str_dup(line);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
needed = biosyn_render_ansi_line(ptr, len, args->buf->spans, args->buf->count, NULL, 0);
|
|
249
|
+
if (needed > (uint64_t)(LONG_MAX - 1)) {
|
|
250
|
+
rb_raise(rb_eNoMemError, "ANSI output is too large");
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
out = rb_str_new(NULL, (long)needed + 1);
|
|
254
|
+
biosyn_render_ansi_line(ptr, len, args->buf->spans, args->buf->count, RSTRING_PTR(out),
|
|
255
|
+
needed + 1);
|
|
256
|
+
rb_str_set_len(out, (long)needed);
|
|
257
|
+
rb_enc_copy(out, line);
|
|
258
|
+
return out;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
static VALUE state_colorize(VALUE self, VALUE line_value) {
|
|
262
|
+
rb_biosyn_state_t *wrap = get_state(self);
|
|
263
|
+
span_buffer_t buf;
|
|
264
|
+
colorize_args_t args;
|
|
265
|
+
VALUE line = StringValue(line_value);
|
|
266
|
+
long len = RSTRING_LEN(line);
|
|
267
|
+
|
|
268
|
+
if (len < 0) {
|
|
269
|
+
rb_raise(rb_eArgError, "line is too large");
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
highlight_into_buffer(wrap->ptr, RSTRING_PTR(line), (uint64_t)len, &buf);
|
|
273
|
+
args.line = line;
|
|
274
|
+
args.buf = &buf;
|
|
275
|
+
return rb_ensure(state_colorize_body, (VALUE)&args, free_span_buffer, (VALUE)&buf);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
static VALUE native_libbiosyntax_version(VALUE self) {
|
|
279
|
+
(void)self;
|
|
280
|
+
return rb_str_new_cstr(biosyn_version());
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
static VALUE native_abi_version(VALUE self) {
|
|
284
|
+
(void)self;
|
|
285
|
+
return UINT2NUM(biosyn_abi_version());
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
static VALUE native_format_id_from_name(VALUE self, VALUE name_value) {
|
|
289
|
+
VALUE name = StringValue(name_value);
|
|
290
|
+
biosyn_format_t format_id;
|
|
291
|
+
(void)self;
|
|
292
|
+
|
|
293
|
+
format_id = biosyn_format_from_name(StringValueCStr(name));
|
|
294
|
+
return UINT2NUM(format_id);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
static VALUE native_guess_format_id(VALUE self, VALUE path_value) {
|
|
298
|
+
VALUE path = StringValue(path_value);
|
|
299
|
+
biosyn_format_t format_id;
|
|
300
|
+
(void)self;
|
|
301
|
+
|
|
302
|
+
format_id = biosyn_guess_format_from_path(StringValueCStr(path));
|
|
303
|
+
return UINT2NUM(format_id);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
static VALUE str_or_nil(const char *s) { return s ? rb_str_new_cstr(s) : Qnil; }
|
|
307
|
+
|
|
308
|
+
/* Format and kind metadata are generated from libbiosyntax at load time, so the
|
|
309
|
+
* Ruby layer does not need to maintain parallel tables.
|
|
310
|
+
*/
|
|
311
|
+
static VALUE native_formats_raw(VALUE self) {
|
|
312
|
+
VALUE ary = rb_ary_new();
|
|
313
|
+
uint32_t count = biosyn_format_count();
|
|
314
|
+
uint32_t i;
|
|
315
|
+
(void)self;
|
|
316
|
+
|
|
317
|
+
for (i = 0; i < count; i++) {
|
|
318
|
+
biosyn_format_info_t info;
|
|
319
|
+
if (biosyn_format_info((biosyn_format_t)i, &info)) {
|
|
320
|
+
VALUE h = rb_hash_new();
|
|
321
|
+
rb_hash_aset(h, ID2SYM(rb_intern("id")), UINT2NUM(i));
|
|
322
|
+
rb_hash_aset(h, ID2SYM(rb_intern("name")), str_or_nil(info.name));
|
|
323
|
+
rb_hash_aset(h, ID2SYM(rb_intern("description")), str_or_nil(info.description));
|
|
324
|
+
rb_hash_aset(h, ID2SYM(rb_intern("stateful")), info.stateful ? Qtrue : Qfalse);
|
|
325
|
+
rb_ary_push(ary, rb_obj_freeze(h));
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
return rb_obj_freeze(ary);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/* Return raw token kind metadata for Ruby-side value objects. */
|
|
333
|
+
static VALUE native_kinds_raw(VALUE self) {
|
|
334
|
+
VALUE ary = rb_ary_new();
|
|
335
|
+
uint32_t count = biosyn_class_count();
|
|
336
|
+
uint32_t i;
|
|
337
|
+
(void)self;
|
|
338
|
+
|
|
339
|
+
for (i = 0; i < count; i++) {
|
|
340
|
+
biosyn_class_info_t info;
|
|
341
|
+
if (biosyn_class_info((biosyn_class_t)i, &info)) {
|
|
342
|
+
VALUE h = rb_hash_new();
|
|
343
|
+
rb_hash_aset(h, ID2SYM(rb_intern("id")), UINT2NUM(i));
|
|
344
|
+
rb_hash_aset(h, ID2SYM(rb_intern("name")), str_or_nil(info.name));
|
|
345
|
+
rb_hash_aset(h, ID2SYM(rb_intern("scope")), str_or_nil(info.scope));
|
|
346
|
+
rb_hash_aset(h, ID2SYM(rb_intern("foreground")), str_or_nil(info.foreground));
|
|
347
|
+
rb_hash_aset(h, ID2SYM(rb_intern("background")), str_or_nil(info.background));
|
|
348
|
+
rb_hash_aset(h, ID2SYM(rb_intern("font_style")), str_or_nil(info.font_style));
|
|
349
|
+
rb_hash_aset(h, ID2SYM(rb_intern("ansi_sgr")), str_or_nil(info.ansi_sgr));
|
|
350
|
+
rb_ary_push(ary, rb_obj_freeze(h));
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
return rb_obj_freeze(ary);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
void Init_biosyntax_ext(void) {
|
|
358
|
+
mBioSyntax = rb_define_module("BioSyntax");
|
|
359
|
+
cSpan = Qnil;
|
|
360
|
+
|
|
361
|
+
/* BioSyntax::Native is intentionally an internal bridge. Public Ruby APIs
|
|
362
|
+
* are defined in lib/biosyntax.rb.
|
|
363
|
+
*/
|
|
364
|
+
mNative = rb_define_module_under(mBioSyntax, "Native");
|
|
365
|
+
rb_define_singleton_method(mNative, "libbiosyntax_version", native_libbiosyntax_version, 0);
|
|
366
|
+
rb_define_singleton_method(mNative, "abi_version", native_abi_version, 0);
|
|
367
|
+
rb_define_singleton_method(mNative, "formats_raw", native_formats_raw, 0);
|
|
368
|
+
rb_define_singleton_method(mNative, "kinds_raw", native_kinds_raw, 0);
|
|
369
|
+
rb_define_singleton_method(mNative, "format_id_from_name", native_format_id_from_name, 1);
|
|
370
|
+
rb_define_singleton_method(mNative, "guess_format_id", native_guess_format_id, 1);
|
|
371
|
+
|
|
372
|
+
cNativeState = rb_define_class_under(mNative, "State", rb_cObject);
|
|
373
|
+
rb_define_alloc_func(cNativeState, state_alloc);
|
|
374
|
+
rb_define_method(cNativeState, "initialize", state_initialize, 1);
|
|
375
|
+
rb_define_method(cNativeState, "reset", state_reset, -1);
|
|
376
|
+
rb_define_method(cNativeState, "line_no", state_line_no, 0);
|
|
377
|
+
rb_define_method(cNativeState, "format_id", state_format_id, 0);
|
|
378
|
+
rb_define_method(cNativeState, "highlight", state_highlight, 1);
|
|
379
|
+
rb_define_method(cNativeState, "colorize", state_colorize, 1);
|
|
380
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
require 'mkmf'
|
|
2
|
+
require 'rbconfig'
|
|
3
|
+
|
|
4
|
+
abort 'missing stdint.h' unless have_header('stdint.h')
|
|
5
|
+
|
|
6
|
+
$defs << '-DBIOSYN_STATIC'
|
|
7
|
+
|
|
8
|
+
cc = RbConfig::CONFIG['CC'].to_s
|
|
9
|
+
$CFLAGS << ' -std=c99' unless cc =~ /\bcl(\.exe)?\b/i
|
|
10
|
+
|
|
11
|
+
$objs = ["biosyntax_ext.#{$OBJEXT}", "biosyntax.#{$OBJEXT}"]
|
|
12
|
+
|
|
13
|
+
create_makefile('biosyntax/biosyntax_ext')
|