node-marshal 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2296 +1,2356 @@
1
- /*
2
- * This file contains implementation of classes for Ruby nodes
3
- * marshalization (i.e. loading and saving them from disk)
4
- *
5
- * (C) 2015-2016 Alexey Voskov
6
- * License: BSD-2-Clause
7
- */
8
- #define __STDC_FORMAT_MACROS
9
- #include <stdio.h>
10
- #include <stdlib.h>
11
- #include <inttypes.h>
12
- #include <ruby.h>
13
- #include <ruby/version.h>
14
-
15
- /*
16
- * Some global variables
17
- */
18
- static VALUE cNodeObjAddresses, cNodeInfo;
19
-
20
- /*
21
- * Part 1. .H files: nodedump functions + parts of Ruby internals
22
- */
23
- #include "nodedump.h"
24
-
25
- #ifdef WITH_CUSTOM_RB_GLOBAL_ENTRY
26
- /* Custom (and slow) implementation of rb_global_entry internal API for Ruby 2.3
27
- (original rb_global_entry API was opened before Ruby 2.3)
28
- It uses a hack with the node creation. The main idea of the hack is
29
- to create a node from the expression containing only a name of the global variable
30
- and extract global entry address from NODE_GVAR u3 "leaf" */
31
- static struct rb_global_entry *rb_global_entry(ID id)
32
- {
33
- NODE *node, *gvar_node;
34
- struct rb_global_entry *gentry;
35
- /* a) Step 1: create node from the expression consisting only from
36
- our global variable */
37
- node = rb_compile_string("<compiled>", rb_id2str(id), NUM2INT(1));
38
- if (nd_type(node) != NODE_SCOPE)
39
- {
40
- return NULL;
41
- }
42
- /* b) Trace the node to the NODE_GVAR */
43
- gvar_node = node->u2.node;
44
- if (nd_type(gvar_node) == NODE_PRELUDE) /* Present only in 2.3 */
45
- {
46
- gvar_node = gvar_node->u2.node;
47
- }
48
- if (nd_type(gvar_node) != NODE_GVAR) /* Error: no GVAR found */
49
- {
50
- return NULL;
51
- }
52
- /* c) Get the global entry address and return its address */
53
- gentry = gvar_node->u3.entry;
54
- return gentry;
55
- }
56
- #endif
57
-
58
-
59
- /*
60
- * Part 2. Information about the nodes
61
- *
62
- */
63
-
64
- // Pre-2.0 Ruby versions don't use this version
65
- #if RUBY_API_VERSION_MAJOR == 2
66
- #define USE_RB_ARGS_INFO 1
67
- #endif
68
-
69
- #if RUBY_API_VERSION_MAJOR == 1
70
- #define RESET_GC_FLAGS 1
71
- #endif
72
-
73
-
74
- // Some generic utilities
75
- int is_value_in_heap(VALUE val)
76
- {
77
- if (val == Qfalse || val == Qtrue ||
78
- val == Qnil || val == Qundef ||
79
- (val & FIXNUM_FLAG)
80
- #ifdef FLONUM_MASK
81
- || ((val & FLONUM_MASK) == FLONUM_FLAG) // This memory trick with floats is present only in 2.x
82
- #endif
83
- )
84
- {
85
- return 0;
86
- }
87
- else
88
- return 1;
89
- }
90
-
91
-
92
- /*
93
- * Converts Ruby string with hexadecimal number
94
- * to the Ruby VALUE
95
- */
96
- VALUE str_to_value(VALUE str)
97
- {
98
- intptr_t ans = (intptr_t) Qnil;
99
- sscanf(RSTRING_PTR(str), "%"PRIxPTR, &ans);
100
- return (VALUE) ans;
101
- }
102
-
103
-
104
- /*
105
- * Converts Ruby VALUE (i.e. machine address) to the
106
- * hexadecimal Ruby string
107
- */
108
- VALUE value_to_str(VALUE val)
109
- {
110
- char str[16];
111
- sprintf(str, "%" PRIxPTR, (intptr_t) val);
112
- return rb_str_new2(str);
113
- }
114
-
115
- /*
116
- * Converts VALUE to the sequence of bytes using big-endian
117
- * standard. Returns number of non-zero bytes
118
- *
119
- * Inputs
120
- * val -- input value
121
- * buf -- pointer to the output buffer
122
- * Returns
123
- * number of written bytes
124
- */
125
- int value_to_bin(VALUE val, unsigned char *buf)
126
- {
127
- int i, len = 0;
128
- unsigned char byte;
129
- for (i = sizeof(VALUE) - 1; i >= 0; i--)
130
- {
131
- byte = (unsigned char) ((val >> (i * 8)) & 0xFF);
132
- if (len > 0 || byte != 0)
133
- {
134
- *buf++ = byte;
135
- len++;
136
- }
137
- }
138
- return len;
139
- }
140
-
141
- /*
142
- * Converts sequence of bytes (big-endian standard) to the VALUE.
143
- *
144
- * Inputs
145
- * buf -- poiner to the input buffer
146
- * len -- number of bytes
147
- * Returns
148
- * VALUE
149
- */
150
- VALUE bin_to_value(unsigned char *buf, int len)
151
- {
152
- VALUE val = (VALUE) 0;
153
- int i;
154
- for (i = len - 1; i >= 0; i--)
155
- val |= ((VALUE) *buf++) << (i * 8);
156
- return val;
157
- }
158
-
159
- #define NODES_CTBL_SIZE 256
160
- static int nodes_ctbl[NODES_CTBL_SIZE * 3];
161
-
162
-
163
- /*
164
- * Part 3. Functions for node marshalization
165
- */
166
-
167
- /*
168
- * Keeps the information about node elements position
169
- * in the memory and its IDs/ordinals for export to the file
170
- */
171
- typedef struct {
172
- VALUE vals; // values: key=>val Hash
173
- VALUE ids; // identifiers: key=>id Hash
174
- VALUE pos; // free identifier
175
- } LeafTableInfo;
176
-
177
- void LeafTableInfo_init(LeafTableInfo *lti)
178
- {
179
- lti->vals = rb_hash_new();
180
- lti->ids = rb_hash_new();
181
- lti->pos = 0;
182
- }
183
-
184
- void LeafTableInfo_mark(LeafTableInfo *lti)
185
- {
186
- rb_gc_mark(lti->vals);
187
- rb_gc_mark(lti->ids);
188
- }
189
-
190
-
191
- int LeafTableInfo_addEntry(LeafTableInfo *lti, VALUE key, VALUE value)
192
- {
193
- VALUE v_id = rb_hash_aref(lti->ids, key);
194
- if (v_id == Qnil)
195
- {
196
- int id = lti->pos++;
197
- rb_hash_aset(lti->vals, key, value);
198
- rb_hash_aset(lti->ids, key, INT2FIX(id));
199
- return id;
200
- }
201
- else
202
- {
203
- return FIX2INT(v_id);
204
- }
205
- }
206
-
207
- /*
208
- * Adds Ruby ID data type as the entry to the LeafTableInfo struct.
209
- * Main features:
210
- * 1) ID will be converted to Fixnum
211
- * 2) If ID can be converted to string by rb_id2str it will be saved as
212
- String object. Otherwise it will be converted to Fixnum.
213
- */
214
- int LeafTableInfo_addIDEntry(LeafTableInfo *lti, ID id)
215
- {
216
- VALUE r_idval = rb_id2str(id);
217
- if (TYPE(r_idval) != T_STRING)
218
- {
219
- r_idval = INT2FIX(id);
220
- }
221
- return LeafTableInfo_addEntry(lti, INT2FIX(id), r_idval);
222
- }
223
-
224
- VALUE LeafTableInfo_getLeavesTable(LeafTableInfo *lti)
225
- {
226
- VALUE key, keys = rb_funcall(lti->vals, rb_intern("keys"), 0);
227
- unsigned int i;
228
- VALUE val;
229
- for (i = 0; i < lti->pos; i++)
230
- {
231
- key = RARRAY_PTR(keys)[i];
232
- val = rb_hash_aref(lti->vals, key);
233
- rb_ary_store(keys, i, val);
234
- }
235
- return keys;
236
- }
237
-
238
- int LeafTableInfo_keyToID(LeafTableInfo *lti, VALUE key)
239
- {
240
- VALUE id = rb_hash_aref(lti->ids, key);
241
- return (id == Qnil) ? -1 : FIX2INT(id);
242
- }
243
-
244
- VALUE LeafTableInfo_keyToValue(LeafTableInfo *lti, VALUE key)
245
- {
246
- return rb_hash_aref(lti->vals, key);
247
- }
248
-
249
- /* The structure keeps information about the node
250
- that is required for its dumping to the file
251
- (mainly hashes with relocatable identifiers) */
252
- typedef struct {
253
- LeafTableInfo syms; // Node symbols
254
- LeafTableInfo lits; // Node literals
255
- LeafTableInfo idtabs; // Table of identifiers
256
- #ifdef USE_RB_ARGS_INFO
257
- LeafTableInfo args; // Table of arguments
258
- #endif
259
- LeafTableInfo gentries; // Global variables table
260
- LeafTableInfo nodes; // Table of nodes
261
- LeafTableInfo pnodes; // Table of parent nodes
262
- } NODEInfo;
263
-
264
- void NODEInfo_init(NODEInfo *info)
265
- {
266
- LeafTableInfo_init(&(info->syms));
267
- LeafTableInfo_init(&(info->lits));
268
- LeafTableInfo_init(&(info->idtabs));
269
- #ifdef USE_RB_ARGS_INFO
270
- LeafTableInfo_init(&(info->args));
271
- #endif
272
- LeafTableInfo_init(&(info->gentries));
273
- LeafTableInfo_init(&(info->nodes));
274
- LeafTableInfo_init(&(info->pnodes));
275
- }
276
-
277
- void NODEInfo_mark(NODEInfo *info)
278
- {
279
- LeafTableInfo_mark(&(info->syms));
280
- LeafTableInfo_mark(&(info->lits));
281
- LeafTableInfo_mark(&(info->idtabs));
282
- #ifdef USE_RB_ARGS_INFO
283
- LeafTableInfo_mark(&(info->args));
284
- #endif
285
- LeafTableInfo_mark(&(info->gentries));
286
- LeafTableInfo_mark(&(info->nodes));
287
- LeafTableInfo_mark(&(info->pnodes));
288
- }
289
-
290
- void NODEInfo_free(NODEInfo *info)
291
- {
292
- xfree(info);
293
- }
294
-
295
- LeafTableInfo *NODEInfo_getTableByID(NODEInfo *info, int id)
296
- {
297
- switch (id)
298
- {
299
- case NT_ID:
300
- return &info->syms;
301
- case NT_VALUE:
302
- return &info->lits;
303
- case NT_IDTABLE:
304
- return &info->idtabs;
305
- #ifdef USE_RB_ARGS_INFO
306
- case NT_ARGS:
307
- return &info->args;
308
- #endif
309
- case NT_ENTRY:
310
- return &info->gentries;
311
- case NT_NODE:
312
- return &info->nodes;
313
- default:
314
- return NULL;
315
- }
316
- }
317
-
318
- /*
319
- * Converts node value to the binary data
320
- * Input parameters:
321
- * info -- current NODEInfo structure
322
- * node -- parent node (that contains the value)
323
- * ptr -- pointer to the output memory buffer
324
- * type -- type of the entry (NT_...)
325
- * value -- node->u?.value VALUE
326
- * child_id -- child node number (1,2,3)
327
- * Returns:
328
- * Byte that contains the next information
329
- * a) upper half-byte: VL_... data type (for node loader)
330
- * b) lower half-byte: number of bytes written to the buffer
331
- */
332
- #define DUMP_RAW_VALUE(vl_ans, vl) (vl_ans | (value_to_bin(vl, (unsigned char *) ptr) << 4))
333
- static int dump_node_value(NODEInfo *info, char *ptr, NODE *node, int type, VALUE value, int child_id)
334
- {
335
- if (type == NT_NULL || type == NT_LONG)
336
- {
337
- return DUMP_RAW_VALUE(VL_RAW, value);
338
- }
339
- else if (type == NT_NODE)
340
- {
341
- if (value == 0)
342
- { // Variant a: empty node
343
- return DUMP_RAW_VALUE(VL_RAW, value);
344
- }
345
- else if (nd_type(node) == NODE_ATTRASGN && value == 1 && child_id == 1)
346
- { // Special case: "self"
347
- return DUMP_RAW_VALUE(VL_RAW, value);
348
- }
349
- else if (TYPE(value) != T_NODE)
350
- {
351
- rb_raise(rb_eArgError, "dump_node_value, parent node %s (ADR 0x%s): child node %d (ADR 0x%s): is not a node\n"
352
- " Type: %s (%d), Value: %s",
353
- ruby_node_name(nd_type(node)), RSTRING_PTR(value_to_str((VALUE) node)),
354
- child_id, RSTRING_PTR(value_to_str(value)),
355
- RSTRING_PTR(rb_funcall(rb_funcall(value, rb_intern("class"), 0), rb_intern("to_s"), 0)),
356
- TYPE(value),
357
- RSTRING_PTR(rb_funcall(value, rb_intern("to_s"), 0)) );
358
- }
359
- else
360
- { // Variant b: not empty node
361
- VALUE id = LeafTableInfo_keyToID(&info->nodes, value_to_str(value));
362
- if (id == (VALUE) -1)
363
- {
364
- rb_raise(rb_eArgError, "dump_node_value, parent node %s (ADR 0x%s): child node %d (ADR 0x%s) not found",
365
- ruby_node_name(nd_type(node)), RSTRING_PTR(value_to_str((VALUE) node)),
366
- child_id, RSTRING_PTR(value_to_str(value)));
367
- return VL_RAW;
368
- }
369
- else
370
- {
371
- return DUMP_RAW_VALUE(VL_NODE, id);
372
- }
373
- return VL_NODE;
374
- }
375
- }
376
- else if (type == NT_VALUE)
377
- {
378
- if (!is_value_in_heap(value))
379
- { // a) value that is inside VALUE
380
- return DUMP_RAW_VALUE(VL_RAW, value);
381
- }
382
- else
383
- { // b) value that requires reference to literals table
384
- VALUE id = LeafTableInfo_keyToID(&info->lits, value_to_str(value));
385
- if (id == (VALUE) -1)
386
- rb_raise(rb_eArgError, "Cannot find literal");
387
- else
388
- return DUMP_RAW_VALUE(VL_LIT, id);
389
- }
390
- }
391
- else if (type == NT_ID)
392
- {
393
- ID sym = (VALUE) value; // We are working with RAW data from RAM!
394
- VALUE id = LeafTableInfo_keyToID(&info->syms, INT2FIX(sym));
395
- if (id == (VALUE) -1)
396
- {
397
- rb_raise(rb_eArgError, "Cannot find symbol ID %d (%s) (parent node %s, line %d)",
398
- (int) sym, RSTRING_PTR(rb_id2str(ID2SYM(sym))),
399
- ruby_node_name(nd_type(node)), nd_line(node));
400
- return VL_RAW;
401
- }
402
- else
403
- {
404
- return DUMP_RAW_VALUE(VL_ID, id);
405
- }
406
- }
407
- else if (type == NT_ENTRY || type == NT_ARGS || type == NT_IDTABLE)
408
- {
409
- VALUE key = value_to_str(value);
410
- LeafTableInfo *lti = NODEInfo_getTableByID(info, type);
411
- VALUE id = LeafTableInfo_keyToID(lti, key);
412
- if (id == (VALUE) -1)
413
- {
414
- rb_raise(rb_eArgError, "Cannot find some entry");
415
- return VL_RAW;
416
- }
417
- else
418
- {
419
- switch(type)
420
- {
421
- case NT_ENTRY: return DUMP_RAW_VALUE(VL_GVAR, id);
422
- case NT_IDTABLE: return DUMP_RAW_VALUE(VL_IDTABLE, id);
423
- case NT_ARGS: return DUMP_RAW_VALUE(VL_ARGS, id);
424
- default: rb_raise(rb_eArgError, "Internal error");
425
- }
426
- }
427
- }
428
- else
429
- {
430
- rb_raise(rb_eArgError, "Unknown child node type %d", type);
431
- }
432
- }
433
-
434
- /*
435
- * Converts information about nodes to the binary string.
436
- * It uses dump_node_value function for the low-level conversion
437
- * of node "leaves" to the actual binary data.
438
- *
439
- * See load_nodes_from_str for the descrpition of the binary string format.
440
- */
441
- static VALUE dump_nodes(NODEInfo *info)
442
- {
443
- int node_size = sizeof(int) + sizeof(VALUE) * 4;
444
- int i, nt, flags_len;
445
- NODE *node;
446
- char *bin, *ptr, *rtypes;
447
- VALUE nodes_ary = rb_funcall(info->nodes.vals, rb_intern("keys"), 0);
448
- VALUE nodes_bin = rb_str_new(NULL, RARRAY_LEN(nodes_ary) * node_size);
449
- VALUE ut[3];
450
- bin = RSTRING_PTR(nodes_bin);
451
-
452
- for (i = 0, ptr = bin; i < RARRAY_LEN(nodes_ary); i++)
453
- {
454
- node = RNODE(str_to_value(RARRAY_PTR(nodes_ary)[i]));
455
- nt = nd_type(node);
456
- rtypes = (char *) ptr; ptr += sizeof(int);
457
- flags_len = value_to_bin(node->flags >> 5, (unsigned char *) ptr); ptr += flags_len;
458
-
459
- ut[0] = nodes_ctbl[nt * 3];
460
- ut[1] = nodes_ctbl[nt * 3 + 1];
461
- ut[2] = nodes_ctbl[nt * 3 + 2];
462
- if (nt == NODE_OP_ASGN2 && LeafTableInfo_keyToID(&info->syms, INT2FIX(node->u1.value)) != -1)
463
- {
464
- ut[0] = NT_ID; ut[1] = NT_ID; ut[2] = NT_ID;
465
- }
466
-
467
- if (nt == NODE_ARGS_AUX)
468
- {
469
- ut[0] = NT_ID; ut[1] = NT_LONG; ut[2] = NT_NODE;
470
- if (LeafTableInfo_keyToID(&info->syms, INT2FIX(node->u2.value)) != -1)
471
- {
472
- ut[1] = NT_ID;
473
- }
474
- else
475
- {
476
- ut[1] = NT_LONG;
477
- }
478
- if (node->u1.value == 0) ut[0] = NT_NULL;
479
- if (node->u2.value == 0) ut[1] = NT_NULL;
480
- if (node->u3.value == 0) ut[2] = NT_NULL;
481
- }
482
-
483
- if (nt = NODE_ARRAY)
484
- {
485
- /* Special undocumented cases:
486
- * 1) the second child of the second element of an array
487
- * contains reference to the last element (NT_NODE) not
488
- * length (NT_LONG)
489
- * 2) NODE_HASH: every second element in NODE_ARRAY chain
490
- * contains pointers to NODES (instead of lengths)
491
- * 3) NODE_DSTR: first node in NODE_ARRAY chain contains
492
- * pointer to NODE (instead of lengths) */
493
- NODE *pnode1, *pnode2;
494
- pnode1 = (NODE *) str_to_value(LeafTableInfo_keyToValue(&info->pnodes, value_to_str((VALUE) node)));
495
- if (pnode1 != NULL && nd_type(pnode1) == NODE_ARRAY &&
496
- (NODE *) pnode1->u3.value == node)
497
- {
498
- int nt2;
499
- pnode2 = (NODE *) str_to_value(LeafTableInfo_keyToValue(&info->pnodes, value_to_str((VALUE) pnode1)));
500
- nt2 = nd_type(pnode2);
501
- if ( (nt2 != NODE_ARRAY && nt2 != NODE_DSTR) ||
502
- (NODE *) pnode2->u1.value == pnode1 )
503
- {
504
- ut[1] = NT_NODE;
505
- }
506
- else if (pnode1->u2.value == 2 && node == (NODE *) node->u2.value)
507
- {
508
- ut[1] = NT_NODE;
509
- }
510
- }
511
- else if (pnode1 != NULL && nd_type(pnode1) == NODE_DSTR)
512
- {
513
- ut[1] = NT_NODE;
514
- }
515
- }
516
-
517
- rtypes[0] = dump_node_value(info, ptr, node, ut[0], node->u1.value, 1);
518
- ptr += (rtypes[0] & 0xF0) >> 4;
519
- rtypes[1] = dump_node_value(info, ptr, node, ut[1], node->u2.value, 2);
520
- ptr += (rtypes[1] & 0xF0) >> 4;
521
- rtypes[2] = dump_node_value(info, ptr, node, ut[2], node->u3.value, 3);
522
- ptr += (rtypes[2] & 0xF0) >> 4;
523
- rtypes[3] = flags_len;
524
- }
525
- rb_str_resize(nodes_bin, (int) (ptr - bin) + 1);
526
- return nodes_bin;
527
- }
528
-
529
-
530
- /*
531
- * Transforms preprocessed node to Ruby hash that can be used
532
- * to load the node from disk.
533
- *
534
- * See m_nodedump_to_hash function for output hash format details
535
- */
536
- VALUE NODEInfo_toHash(NODEInfo *info)
537
- {
538
- VALUE ans = rb_hash_new();
539
- VALUE idtbl, idtabs = LeafTableInfo_getLeavesTable(&info->idtabs);
540
- VALUE syms = LeafTableInfo_getLeavesTable(&info->syms);
541
- VALUE args;
542
- int i, j, id;
543
- // Add some signatures
544
- rb_hash_aset(ans, ID2SYM(rb_intern("MAGIC")), rb_str_new2(NODEMARSHAL_MAGIC));
545
- rb_hash_aset(ans, ID2SYM(rb_intern("RUBY_PLATFORM")),
546
- rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM")));
547
- rb_hash_aset(ans, ID2SYM(rb_intern("RUBY_VERSION")),
548
- rb_const_get(rb_cObject, rb_intern("RUBY_VERSION")));
549
- // Write literals, symbols and global_entries arrays: they don't need to be corrected
550
- rb_hash_aset(ans, ID2SYM(rb_intern("literals")), LeafTableInfo_getLeavesTable(&info->lits));
551
- rb_hash_aset(ans, ID2SYM(rb_intern("symbols")), syms);
552
- rb_hash_aset(ans, ID2SYM(rb_intern("global_entries")), LeafTableInfo_getLeavesTable(&info->gentries));
553
- // Replace RAM IDs to disk IDs in id_tables
554
- for (i = 0; i < RARRAY_LEN(idtabs); i++)
555
- {
556
- idtbl = RARRAY_PTR(idtabs)[i];
557
- for (j = 0; j < RARRAY_LEN(idtbl); j++)
558
- {
559
- id = LeafTableInfo_keyToID(&info->syms, RARRAY_PTR(idtbl)[j]);
560
-
561
- if (id == -1)
562
- {
563
- ID sym = FIX2INT(RARRAY_PTR(idtbl)[j]);
564
- rb_raise(rb_eArgError, "Cannot find the symbol ID %d", (int) sym);
565
- }
566
- else
567
- {
568
- rb_ary_store(idtbl, j, INT2FIX(id));
569
- }
570
-
571
- }
572
- }
573
- rb_hash_aset(ans, ID2SYM(rb_intern("id_tables")), idtabs);
574
- // Replace RAM IDs to disk IDs in args tables
575
- #ifdef USE_RB_ARGS_INFO
576
- args = LeafTableInfo_getLeavesTable(&info->args);
577
- for (i = 0; i < RARRAY_LEN(args); i++)
578
- {
579
- VALUE args_entry = RARRAY_PTR(args)[i];
580
- VALUE *eptr = RARRAY_PTR(args_entry);
581
- int args_vals[5] = {0, 1, 7, 8, 9};
582
- int args_ids[3] = {4, 5, 6};
583
- if (RARRAY_LEN(args_entry) != 10)
584
- rb_raise(rb_eArgError, "Corrupted args entry");
585
- // Pointer to nodes to be replaced:
586
- // a) VALUES
587
- // (0) pre_init, (1) post_init,
588
- // (7) kw_args, (8) kw_rest_arg, (9) opt_args
589
- for (j = 0; j < 5; j++)
590
- {
591
- int ind = args_vals[j];
592
- VALUE key = eptr[ind];
593
- if (!strcmp(RSTRING_PTR(key), "0"))
594
- eptr[ind] = INT2FIX(-1);
595
- else
596
- {
597
- eptr[ind] = INT2FIX(LeafTableInfo_keyToID(&info->nodes, key));
598
- if (FIX2INT(eptr[ind]) == -1)
599
- rb_raise(rb_eArgError, "Unknown NODE in args tables");
600
- }
601
- }
602
- // b) IDs (symbols)
603
- // (4) first_post_arg (5) rest_arg (6) block_arg
604
- for (j = 0; j < 3; j++)
605
- {
606
- int ind = args_ids[j];
607
- VALUE key = eptr[ind];
608
- if (FIX2INT(key) != 0)
609
- {
610
- eptr[ind] = INT2FIX(LeafTableInfo_keyToID(&info->syms, key));
611
- if (FIX2INT(eptr[ind]) == -1)
612
- rb_raise(rb_eArgError, "Unknown symbolic ID in args tables");
613
- }
614
- else
615
- eptr[ind] = INT2FIX(-1);
616
- }
617
- }
618
- #else
619
- args = rb_ary_new();
620
- #endif
621
-
622
- rb_hash_aset(ans, ID2SYM(rb_intern("args")), args);
623
- // Special case: NODES. Nodes are kept as binary string
624
- rb_hash_aset(ans, ID2SYM(rb_intern("nodes")), dump_nodes(info));
625
- return ans;
626
- }
627
-
628
-
629
- static void NODEInfo_addValue(NODEInfo *info, VALUE value)
630
- {
631
- if (is_value_in_heap(value))
632
- {
633
- VALUE lkey = value_to_str(value);
634
- LeafTableInfo_addEntry(&info->lits, lkey, value);
635
- }
636
- }
637
-
638
- /*
639
- * Adds the information about Ruby NODE to the NODEInfo struct.
640
- * It keeps the addresses of the node and its parents
641
- */
642
- static void NODEInfo_addNode(NODEInfo *info, NODE *node, NODE *pnode)
643
- {
644
- VALUE node_adr = value_to_str((VALUE) node);
645
- VALUE pnode_adr = value_to_str((VALUE) pnode);
646
- LeafTableInfo_addEntry(&info->nodes, node_adr, node_adr);
647
- LeafTableInfo_addEntry(&info->pnodes, node_adr, pnode_adr);
648
- }
649
-
650
- /*
651
- * Returns ID of the node using its address (VALUE)
652
- * It is used during the process of dumping Ruby AST to disk
653
- * for replacing of memory addresses into ordinals
654
- */
655
- static int NODEInfo_nodeAdrToID(NODEInfo *info, VALUE adr)
656
- {
657
- return LeafTableInfo_keyToID(&info->nodes, adr);
658
- }
659
-
660
- /*
661
- * Function counts number of nodes and fills NODEInfo struct
662
- * that is neccessary for the node saving to the HDD
663
- */
664
- static int count_num_of_nodes(NODE *node, NODE *parent, NODEInfo *info)
665
- {
666
- int ut[3], num, offset;
667
- if (node == 0)
668
- {
669
- return 0;
670
- }
671
- else if (TYPE((VALUE) node) != T_NODE)
672
- {
673
- rb_raise(rb_eArgError, "count_num_of_nodes: parent node %s: child node (ADR 0x%s) is not a node; Type: %d",
674
- ruby_node_name(nd_type(parent)), RSTRING_PTR(value_to_str((VALUE) node)), TYPE((VALUE) node));
675
- return 0;
676
- }
677
- else
678
- {
679
- offset = nd_type(node) * 3;
680
- ut[0] = nodes_ctbl[offset++];
681
- ut[1] = nodes_ctbl[offset++];
682
- ut[2] = nodes_ctbl[offset];
683
-
684
- if (nd_type(node) == NODE_OP_ASGN2 && nd_type(parent) == NODE_OP_ASGN2)
685
- {
686
- ut[0] = NT_ID;
687
- ut[1] = NT_ID;
688
- ut[2] = NT_ID;
689
- }
690
-
691
- /* Some Ruby 1.9.3 style function arguments (without rb_args_info) */
692
- if (nd_type(node) == NODE_ARGS_AUX)
693
- {
694
- ut[0] = NT_ID;
695
- ut[1] = (nd_type(parent) == NODE_ARGS_AUX) ? NT_LONG : NT_ID;
696
- ut[2] = NT_NODE;
697
-
698
- if (node->u1.value == 0) ut[0] = NT_NULL;
699
- if (node->u2.value == 0) ut[1] = NT_NULL;
700
- if (node->u3.value == 0) ut[2] = NT_NULL;
701
- }
702
- /* Some Ruby 1.9.3-specific code for NODE_ATTRASGN */
703
- if (nd_type(node) == NODE_ATTRASGN)
704
- {
705
- if (node->u1.value == 1) ut[0] = NT_LONG;
706
- }
707
- /* Check if there is information about child nodes types */
708
- if (ut[0] == NT_UNKNOWN || ut[1] == NT_UNKNOWN || ut[2] == NT_UNKNOWN)
709
- {
710
- rb_raise(rb_eArgError, "Cannot interpret node %d (%s)", nd_type(node), ruby_node_name(nd_type(node)));
711
- }
712
- /* Save the ID of the node */
713
- num = 1;
714
- NODEInfo_addNode(info, node, parent);
715
- /* Analyze node childs */
716
- /* a) child 1 */
717
- if (ut[0] == NT_NODE)
718
- {
719
- num += count_num_of_nodes(node->u1.node, node, info);
720
- }
721
- else if (ut[0] == NT_ID)
722
- {
723
- LeafTableInfo_addIDEntry(&info->syms, node->u1.id);
724
- }
725
- else if (ut[0] == NT_VALUE)
726
- {
727
- if (TYPE(node->u1.value) == T_NODE)
728
- rb_raise(rb_eArgError, "NODE instead of VALUE in child 1 of node %s", ruby_node_name(nd_type(node)));
729
- NODEInfo_addValue(info, node->u1.value);
730
- }
731
- else if (ut[0] == NT_IDTABLE)
732
- {
733
- VALUE tkey = value_to_str(node->u1.value);
734
- VALUE idtbl_ary = rb_ary_new();
735
- ID *idtbl = (ID *) node->u1.value;
736
- int i, size = (node->u1.value) ? *idtbl++ : 0;
737
- for (i = 0; i < size; i++)
738
- {
739
- ID sym = *idtbl++;
740
- rb_ary_push(idtbl_ary, INT2FIX(sym));
741
- LeafTableInfo_addIDEntry(&info->syms, sym);
742
- }
743
- LeafTableInfo_addEntry(&info->idtabs, tkey, idtbl_ary);
744
- }
745
- else if (ut[0] != NT_LONG && ut[0] != NT_NULL)
746
- {
747
- rb_raise(rb_eArgError, "1!");
748
- }
749
- /* b) child 2 */
750
- if (ut[1] == NT_NODE)
751
- {
752
- num += count_num_of_nodes(node->u2.node, node, info);
753
- }
754
- else if (ut[1] == NT_ID)
755
- {
756
- LeafTableInfo_addIDEntry(&info->syms, node->u2.id);
757
- }
758
- else if (ut[1] == NT_VALUE)
759
- {
760
- if (TYPE(node->u2.value) == T_NODE)
761
- rb_raise(rb_eArgError, "NODE instead of VALUE in child 2 of node %s", ruby_node_name(nd_type(node)));
762
- NODEInfo_addValue(info, node->u2.value);
763
- }
764
- else if (ut[1] != NT_LONG && ut[1] != NT_NULL)
765
- {
766
- rb_raise(rb_eArgError, "2!");
767
- }
768
-
769
- /* c) child 3 */
770
- if (ut[2] == NT_NODE)
771
- {
772
- num += count_num_of_nodes(node->u3.node, node, info);
773
- }
774
- else if (ut[2] == NT_ID)
775
- {
776
- LeafTableInfo_addIDEntry(&info->syms, node->u3.id);
777
- }
778
- else if (ut[2] == NT_ARGS)
779
- {
780
- #ifdef USE_RB_ARGS_INFO
781
- VALUE varg = Qtrue;
782
- struct rb_args_info *ainfo;
783
- ID asym;
784
- ainfo = node->u3.args;
785
- // Save child nodes
786
- num += count_num_of_nodes(ainfo->pre_init, node, info);
787
- num += count_num_of_nodes(ainfo->post_init, node, info);
788
- num += count_num_of_nodes(ainfo->kw_args, node, info);
789
- num += count_num_of_nodes(ainfo->kw_rest_arg, node, info);
790
- num += count_num_of_nodes(ainfo->opt_args, node, info);
791
- // Save rb_args_info structure content
792
- varg = rb_ary_new();
793
- rb_ary_push(varg, value_to_str((VALUE) ainfo->pre_init));
794
- rb_ary_push(varg, value_to_str((VALUE) ainfo->post_init));
795
- rb_ary_push(varg, INT2FIX(ainfo->pre_args_num));
796
- rb_ary_push(varg, INT2FIX(ainfo->post_args_num));
797
-
798
- asym = ainfo->first_post_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
799
- if (asym != 0)
800
- LeafTableInfo_addIDEntry(&info->syms, asym);
801
-
802
- asym = ainfo->rest_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
803
- if (asym != 0)
804
- LeafTableInfo_addIDEntry(&info->syms, asym);
805
-
806
- asym = ainfo->block_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
807
- if (asym != 0)
808
- LeafTableInfo_addIDEntry(&info->syms, asym);
809
- rb_ary_push(varg, value_to_str((VALUE) ainfo->kw_args));
810
- rb_ary_push(varg, value_to_str((VALUE) ainfo->kw_rest_arg));
811
- rb_ary_push(varg, value_to_str((VALUE) ainfo->opt_args));
812
-
813
- LeafTableInfo_addEntry(&info->args, value_to_str((VALUE) ainfo), varg);
814
- #else
815
- rb_raise(rb_eArgError, "NT_ARGS entry without USE_RB_ARGS_INFO");
816
- #endif
817
- }
818
- else if (ut[2] == NT_ENTRY)
819
- {
820
- ID gsym = node->u3.entry->id;
821
- // Save symbol to the symbol table
822
- int newid = LeafTableInfo_addIDEntry(&info->syms, gsym);
823
- LeafTableInfo_addEntry(&info->gentries, value_to_str(node->u3.value), INT2FIX(newid));
824
- }
825
- else if (ut[2] != NT_LONG && ut[2] != NT_NULL)
826
- {
827
- rb_raise(rb_eArgError, "Invalid child node 3 of node %s: TYPE %d, VALUE %"PRIxPTR,
828
- ruby_node_name(nd_type(node)), ut[2], (uintptr_t) (node->u3.value));
829
- }
830
-
831
- return num;
832
- }
833
- }
834
-
835
-
836
-
837
- //-------------------------------------------------------------------------
838
-
839
- /*
840
- * Part 4. Functions for loading marshalled nodes
841
- */
842
- typedef struct {
843
- ID *syms_adr; // Table of symbols
844
- int syms_len;
845
-
846
- VALUE *lits_adr; // Table of literals
847
- int lits_len;
848
-
849
- ID **idtbls_adr; // Table of symbols tables
850
- int idtbls_len;
851
-
852
- struct rb_global_entry **gvars_adr; // Table of global variables entries
853
- int gvars_len;
854
-
855
- NODE **nodes_adr; // Table of nodes
856
- int nodes_len;
857
- #ifdef USE_RB_ARGS_INFO
858
- struct rb_args_info **args_adr; // Table of code blocks arguments
859
- int args_len;
860
- #endif
861
- } NODEObjAddresses;
862
-
863
-
864
- void NODEObjAddresses_free(NODEObjAddresses *obj)
865
- {
866
- xfree(obj->syms_adr);
867
- xfree(obj->idtbls_adr);
868
- xfree(obj->gvars_adr);
869
- xfree(obj->nodes_adr);
870
- #ifdef USE_RB_ARGS_INFO
871
- xfree(obj->args_adr);
872
- #endif
873
- xfree(obj);
874
- }
875
-
876
-
877
-
878
- void rbstr_printf(VALUE str, const char *fmt, ...)
879
- {
880
- char buf[1024];
881
- va_list ptr;
882
-
883
- va_start(ptr, fmt);
884
- vsprintf(buf, fmt, ptr);
885
- rb_str_append(str, rb_str_new2(buf));
886
- va_end(ptr);
887
- }
888
-
889
- #define PRINT_NODE_TAB for (j = 0; j < tab; j++) rbstr_printf(str, " ");
890
- /*
891
- * Recursively transforms node into Ruby string
892
- * str -- output Ruby string
893
- * node -- input Ruby NODE
894
- * tab -- number of tabulations during print
895
- * show_offsets -- 0/1 show/hide addresses and symbol IDs
896
- */
897
- static void print_node(VALUE str, NODE *node, int tab, int show_offsets)
898
- {
899
- int i, j, type, ut[3];
900
- VALUE uref[3];
901
-
902
- PRINT_NODE_TAB
903
- if (node == NULL)
904
- {
905
- rbstr_printf(str, "(NULL)\n");
906
- return;
907
- }
908
- type = nd_type(node);
909
-
910
- if (show_offsets)
911
- {
912
- rbstr_printf(str, "@ %s | %16"PRIxPTR " | %16"PRIxPTR " %16"PRIxPTR " %16"PRIxPTR " (line %d)\n",
913
- ruby_node_name(type),
914
- (intptr_t) node,
915
- (intptr_t) node->u1.value, (intptr_t) node->u2.value, (intptr_t) node->u3.value,
916
- nd_line(node));
917
- }
918
- else
919
- {
920
- rbstr_printf(str, "@ %s (line %d)\n", ruby_node_name(type), nd_line(node));
921
- }
922
-
923
- ut[0] = nodes_ctbl[type * 3];
924
- ut[1] = nodes_ctbl[type * 3 + 1];
925
- ut[2] = nodes_ctbl[type * 3 + 2];
926
-
927
- uref[0] = node->u1.value;
928
- uref[1] = node->u2.value;
929
- uref[2] = node->u3.value;
930
-
931
- for (i = 0; i < 3; i++)
932
- {
933
-
934
- if (ut[i] == NT_NODE)
935
- {
936
- if (nd_type(node) != NODE_OP_ASGN2 || i != 2)
937
- print_node(str, RNODE(uref[i]), tab + 1, show_offsets);
938
- else
939
- {
940
- if (ut[i] != 0 && TYPE(ut[i]) != T_NODE)
941
- rb_raise(rb_eArgError, "print_node: broken node 0x%s", RSTRING_PTR(value_to_str(ut[i])));
942
- PRINT_NODE_TAB; rbstr_printf(str, " ");
943
- rbstr_printf(str, "%"PRIxPTR " %"PRIxPTR " %"PRIxPTR"\n",
944
- (intptr_t) RNODE(uref[i])->u1.value,
945
- (intptr_t) RNODE(uref[i])->u2.value,
946
- (intptr_t) RNODE(uref[i])->u3.value);
947
- }
948
- }
949
- else if (ut[i] == NT_VALUE)
950
- {
951
- char *class_name = RSTRING_PTR(rb_funcall(rb_funcall(uref[i], rb_intern("class"), 0), rb_intern("to_s"), 0));
952
- PRINT_NODE_TAB; rbstr_printf(str, " ");
953
- if (show_offsets)
954
- {
955
- rbstr_printf(str, ">| ADR: %"PRIxPTR"; CLASS: %s (TYPE %d); VALUE: %s\n",
956
- (intptr_t) uref[i],
957
- class_name, TYPE(uref[i]),
958
- RSTRING_PTR(rb_funcall(uref[i], rb_intern("to_s"), 0)));
959
- }
960
- else
961
- {
962
- rbstr_printf(str, ">| CLASS: %s (TYPE %d); VALUE: %s\n",
963
- class_name, TYPE(uref[i]),
964
- RSTRING_PTR(rb_funcall(uref[i], rb_intern("to_s"), 0)));
965
- }
966
- }
967
- else if (ut[i] == NT_ID)
968
- {
969
- const char *str_null = "<NULL>", *str_intern = "<NONAME>";
970
- const char *str_sym;
971
- PRINT_NODE_TAB; rbstr_printf(str, " ");
972
-
973
- if (uref[i] == 0)
974
- str_sym = str_null;
975
- else
976
- {
977
- VALUE rbstr_sym = rb_id2str(uref[i]);
978
- if (TYPE(rbstr_sym) == T_STRING)
979
- str_sym = RSTRING_PTR(rb_id2str(uref[i]));
980
- else
981
- str_sym = str_intern;
982
- }
983
-
984
- if (show_offsets)
985
- rbstr_printf(str, ">| ID: %d; SYMBOL: :%s\n", (ID) uref[i], str_sym);
986
- else
987
- rbstr_printf(str, ">| SYMBOL: :%s\n", str_sym);
988
- }
989
- else if (ut[i] == NT_LONG)
990
- {
991
- PRINT_NODE_TAB; rbstr_printf(str, " ");
992
- rbstr_printf(str, ">| %"PRIxPTR "\n", (intptr_t) uref[i]);
993
- }
994
- else if (ut[i] == NT_NULL)
995
- {
996
- PRINT_NODE_TAB; rbstr_printf(str, " ");
997
- rbstr_printf(str, ">| (NULL)\n");
998
- }
999
- else if (ut[i] == NT_ARGS)
1000
- {
1001
- PRINT_NODE_TAB; rbstr_printf(str, " ");
1002
- rbstr_printf(str, ">| ARGS\n");
1003
- }
1004
- else if (ut[i] == NT_IDTABLE)
1005
- {
1006
- PRINT_NODE_TAB; rbstr_printf(str, " ");
1007
- rbstr_printf(str, ">| IDTABLE\n");
1008
- }
1009
- else if (ut[i] == NT_ENTRY)
1010
- {
1011
- struct rb_global_entry *gentry;
1012
- gentry = (struct rb_global_entry *) uref[i];
1013
- PRINT_NODE_TAB; rbstr_printf(str, " ");
1014
- rbstr_printf(str, ">| [GLOBAL ENTRY PTR=0x%"PRIxPTR" ID=%X]\n", (uintptr_t) gentry->var, gentry->id);
1015
- }
1016
- else
1017
- {
1018
- PRINT_NODE_TAB; rbstr_printf(str, " ");
1019
- rbstr_printf(str, ">| [UNKNOWN]\n");
1020
- }
1021
- }
1022
- }
1023
-
1024
-
1025
-
1026
- void resolve_syms_ords(VALUE data, NODEObjAddresses *relocs)
1027
- {
1028
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("symbols")));
1029
- int i;
1030
- if (tbl_val == Qnil)
1031
- {
1032
- rb_raise(rb_eArgError, "Cannot find symbols table");
1033
- }
1034
- if (TYPE(tbl_val) != T_ARRAY)
1035
- {
1036
- rb_raise(rb_eArgError, "Symbols table is not an array");
1037
- }
1038
- relocs->syms_len = RARRAY_LEN(tbl_val);
1039
- relocs->syms_adr = ALLOC_N(ID, relocs->syms_len);
1040
- for (i = 0; i < relocs->syms_len; i++)
1041
- {
1042
- VALUE r_sym = RARRAY_PTR(tbl_val)[i];
1043
- if (TYPE(r_sym) == T_STRING)
1044
- { /* Created symbol will be immune to garbage collector */
1045
- relocs->syms_adr[i] = rb_intern(RSTRING_PTR(r_sym));
1046
- }
1047
- else if (TYPE(r_sym) == T_FIXNUM)
1048
- {
1049
- relocs->syms_adr[i] = (ID) FIX2INT(r_sym);
1050
- }
1051
- else
1052
- {
1053
- rb_raise(rb_eArgError, "Symbols table is corrupted");
1054
- }
1055
- }
1056
- }
1057
-
1058
- void resolve_lits_ords(VALUE data, NODEObjAddresses *relocs)
1059
- {
1060
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("literals")));
1061
- int i;
1062
- if (tbl_val == Qnil)
1063
- {
1064
- rb_raise(rb_eArgError, "Cannot find literals table");
1065
- }
1066
- if (TYPE(tbl_val) != T_ARRAY)
1067
- {
1068
- rb_raise(rb_eArgError, "Literals table is not an array");
1069
- }
1070
- relocs->lits_adr = RARRAY_PTR(tbl_val);
1071
- relocs->lits_len = RARRAY_LEN(tbl_val);
1072
- /* Mark all symbols as "immortal" (i.e. not collectable
1073
- by Ruby GC): some of them can be used in the syntax tree!
1074
- See the presentation of Narihiro Nakamura, author of
1075
- symbol GC in Ruby 2.x for details
1076
- http://www.slideshare.net/authorNari/symbol-gc */
1077
- for (i = 0; i < relocs->lits_len; i++)
1078
- {
1079
- if (TYPE(relocs->lits_adr[i]) == T_SYMBOL)
1080
- {
1081
- SYM2ID(relocs->lits_adr[i]);
1082
- }
1083
- }
1084
- }
1085
-
1086
- void resolve_gvars_ords(VALUE data, NODEObjAddresses *relocs)
1087
- {
1088
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("global_entries")));
1089
- int i;
1090
-
1091
- if (tbl_val == Qnil)
1092
- {
1093
- rb_raise(rb_eArgError, "Cannot find global entries table");
1094
- }
1095
- if (TYPE(tbl_val) != T_ARRAY)
1096
- {
1097
- rb_raise(rb_eArgError, "Global entries table should be an array");
1098
- }
1099
- relocs->gvars_len = RARRAY_LEN(tbl_val);
1100
- relocs->gvars_adr = ALLOC_N(struct rb_global_entry *, relocs->gvars_len);
1101
- for (i = 0; i < relocs->gvars_len; i++)
1102
- {
1103
- int ind = FIX2INT(RARRAY_PTR(tbl_val)[i]);
1104
- ID sym = relocs->syms_adr[ind];
1105
- relocs->gvars_adr[i] = rb_global_entry(sym);
1106
- }
1107
- }
1108
-
1109
-
1110
- void resolve_idtbls_ords(VALUE data, NODEObjAddresses *relocs)
1111
- {
1112
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("id_tables")));
1113
- int i, j, idnum;
1114
-
1115
- if (tbl_val == Qnil)
1116
- {
1117
- rb_raise(rb_eArgError, "Cannot find id_tables entries");
1118
- }
1119
- relocs->idtbls_len = RARRAY_LEN(tbl_val);
1120
- relocs->idtbls_adr = ALLOC_N(ID *, relocs->idtbls_len);
1121
- for (i = 0; i < relocs->idtbls_len; i++)
1122
- {
1123
- VALUE idtbl = RARRAY_PTR(tbl_val)[i];
1124
- idnum = RARRAY_LEN(idtbl);
1125
- if (idnum == 0)
1126
- { // Empty table: NULL pointer in the address table
1127
- relocs->idtbls_adr[i] = NULL;
1128
- }
1129
- else
1130
- { // Filled table: pointer to dynamic memory
1131
- relocs->idtbls_adr[i] = ALLOC_N(ID, idnum + 1);
1132
- relocs->idtbls_adr[i][0] = idnum;
1133
- for (j = 0; j < idnum; j++)
1134
- {
1135
- int ind = FIX2INT(RARRAY_PTR(idtbl)[j]);
1136
- relocs->idtbls_adr[i][j+1] = relocs->syms_adr[ind];
1137
- }
1138
- }
1139
- }
1140
- }
1141
-
1142
- void resolve_nodes_ords(VALUE data, int num_of_nodes, NODEObjAddresses *relocs)
1143
- {
1144
- int i;
1145
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("nodes")));
1146
- if (tbl_val == Qnil)
1147
- {
1148
- rb_raise(rb_eArgError, "Cannot find nodes entries");
1149
- }
1150
- if (TYPE(tbl_val) != T_STRING)
1151
- {
1152
- rb_raise(rb_eArgError, "Nodes description must be a string");
1153
- }
1154
- relocs->nodes_adr = ALLOC_N(NODE *, num_of_nodes);
1155
- relocs->nodes_len = num_of_nodes;
1156
- for (i = 0; i < num_of_nodes; i++)
1157
- {
1158
- relocs->nodes_adr[i] = (NODE *) NEW_NODE((enum node_type) 0, 0, 0, 0);
1159
- }
1160
- }
1161
-
1162
- #ifdef USE_RB_ARGS_INFO
1163
- void resolve_args_ords(VALUE data, NODEObjAddresses *relocs)
1164
- {
1165
- int i;
1166
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("args")));
1167
-
1168
- if (tbl_val == Qnil)
1169
- {
1170
- rb_raise(rb_eArgError, "Cannot find args entries table");
1171
- }
1172
- if (TYPE(tbl_val) != T_ARRAY)
1173
- {
1174
- rb_raise(rb_eArgError, "args description must be an array");
1175
- }
1176
- relocs->args_len = RARRAY_LEN(tbl_val);
1177
- relocs->args_adr = ALLOC_N(struct rb_args_info *, relocs->args_len);
1178
- for (i = 0; i < relocs->args_len; i++)
1179
- {
1180
- int ord;
1181
- VALUE ainfo_val, *aiptr;
1182
- struct rb_args_info *ainfo;
1183
-
1184
- relocs->args_adr[i] = ALLOC(struct rb_args_info);
1185
- ainfo_val = RARRAY_PTR(tbl_val)[i];
1186
- aiptr = RARRAY_PTR(ainfo_val);
1187
- ainfo = relocs->args_adr[i];
1188
-
1189
- if (TYPE(ainfo_val) != T_ARRAY || RARRAY_LEN(ainfo_val) != 10)
1190
- {
1191
- rb_raise(rb_eArgError, "args entry %d is corrupted", i);
1192
- }
1193
- // Load unresolved values
1194
- ainfo->pre_init = (NODE *) (uintptr_t) FIX2LONG(aiptr[0]); // Node ordinal
1195
- ainfo->post_init = (NODE *) (uintptr_t) FIX2LONG(aiptr[1]); // Node ordinal
1196
- ainfo->pre_args_num = FIX2INT(aiptr[2]); // No ordinal resolving
1197
- ainfo->post_args_num = FIX2INT(aiptr[3]); // No ordinal resolving
1198
- ainfo->first_post_arg = FIX2INT(aiptr[4]); // Symbolic ordinal
1199
- ainfo->rest_arg = FIX2INT(aiptr[5]); // Symbolic ordinal
1200
- ainfo->block_arg = FIX2INT(aiptr[6]); // Symbolic ordinal
1201
- ainfo->kw_args = (NODE *) (uintptr_t) FIX2LONG(aiptr[7]); // Node ordinal
1202
- ainfo->kw_rest_arg = (NODE *) (uintptr_t) FIX2LONG(aiptr[8]); // Node ordinal
1203
- ainfo->opt_args = (NODE *) (uintptr_t) FIX2LONG(aiptr[9]); // Node ordinal
1204
- // Resolve nodes
1205
- ord = (int) (((VALUE) ainfo->pre_init) & 0xFFFFFFFF);
1206
- if (ord < -1 || ord >= relocs->nodes_len)
1207
- rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1208
- ainfo->pre_init = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1209
-
1210
- ord = (int) (((VALUE) ainfo->post_init) & 0xFFFFFFFF);
1211
- if (ord < -1 || ord >= relocs->nodes_len)
1212
- rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1213
- ainfo->post_init = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1214
-
1215
- ord = (int) (((VALUE) ainfo->kw_args) & 0xFFFFFFFF);
1216
- if (ord < -1 || ord >= relocs->nodes_len)
1217
- rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1218
- ainfo->kw_args = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1219
-
1220
- ord = (int) (((VALUE) ainfo->kw_rest_arg) & 0xFFFFFFFF);
1221
- if (ord < -1 || ord >= relocs->nodes_len)
1222
- rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1223
- ainfo->kw_rest_arg = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1224
-
1225
- ord = (int) (((VALUE) ainfo->opt_args) & 0xFFFFFFFF);
1226
- if (ord < -1 || ord >= relocs->nodes_len)
1227
- rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1228
- ainfo->opt_args = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1229
- // Resolve symbolic ordinals
1230
- ord = ainfo->first_post_arg;
1231
- if (ord < -1 || ord >= relocs->syms_len)
1232
- rb_raise(rb_eArgError, "1- Invalid symbol ID ordinal %d", ord);
1233
- ainfo->first_post_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1234
-
1235
- ord = ainfo->rest_arg;
1236
- if (ord < -1 || ord >= relocs->syms_len)
1237
- rb_raise(rb_eArgError, "2- Invalid symbol ID ordinal %d", ord);
1238
- ainfo->rest_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1239
-
1240
- ord = ainfo->block_arg;
1241
- if (ord < -1 || ord >= relocs->syms_len)
1242
- rb_raise(rb_eArgError, "3- Invalid symbol ID ordinal %d", ord);
1243
- ainfo->block_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1244
- }
1245
- }
1246
- #endif
1247
-
1248
- /*
1249
- * Transforms binary data with nodes descriptions into Ruby AST (i.e.
1250
- * ternary tree of nodes). Each node is represented in the next binary format:
1251
- *
1252
- * [4 bytes -- pointers info] [node flags] [child ORD1] [child ORD2] [child ORD3]
1253
- *
1254
- * Pointers info:
1255
- * BYTE -- child 1 info (bits 7..4 -- ordinal type, bits 3..0 -- ordinal size, bytes)
1256
- * BYTE -- child 2 info
1257
- * BYTE -- child 3 info
1258
- * BYTE -- node flags length, bytes
1259
- * Node flags:
1260
- * node->flags field packed by bin_to_value function
1261
- * child ORDi Ordinal of ith node child packed by bin_to_value_function
1262
- * (it will be transformed to the real address in memory, i.e. pointer
1263
- * or symbol ID during data loading)
1264
- */
1265
- void load_nodes_from_str(VALUE data, NODEObjAddresses *relocs)
1266
- {
1267
- int i, j;
1268
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("nodes")));
1269
- unsigned char *bin = (unsigned char *) RSTRING_PTR(tbl_val);
1270
- NODE *node = NULL;
1271
- for (i = 0; i < relocs->nodes_len; i++)
1272
- {
1273
- int rtypes[4];
1274
- VALUE u[3], flags;
1275
- // Read data structure info
1276
- for (j = 0; j < 4; j++)
1277
- rtypes[j] = *bin++;
1278
- flags = bin_to_value(bin, rtypes[3]); bin += rtypes[3];
1279
- for (j = 0; j < 3; j++)
1280
- {
1281
- int val_len = (rtypes[j] & 0xF0) >> 4;
1282
- u[j] = bin_to_value(bin, val_len);
1283
- bin += val_len;
1284
- rtypes[j] &= 0x0F;
1285
-
1286
- }
1287
- if ((char *)bin - RSTRING_PTR(tbl_val) > RSTRING_LEN(tbl_val))
1288
- rb_raise(rb_eArgError, "Nodes binary dump is too short");
1289
- // Resolving all addresses
1290
- for (j = 0; j < 3; j++)
1291
- {
1292
- switch(rtypes[j])
1293
- {
1294
- case VL_RAW: // Do nothing: it is raw data
1295
- break;
1296
- case VL_NODE:
1297
- if (u[j] >= (unsigned int) relocs->nodes_len)
1298
- rb_raise(rb_eArgError, "Cannot resolve VL_NODE entry %d", (int) u[j]);
1299
- u[j] = (VALUE) relocs->nodes_adr[u[j]];
1300
- if (TYPE(u[j]) != T_NODE)
1301
- rb_raise(rb_eArgError, "load_nodes_from_str: nodes memory corrupted");
1302
- break;
1303
- case VL_ID:
1304
- if (u[j] >= (unsigned int) relocs->syms_len)
1305
- rb_raise(rb_eArgError, "Cannot resolve VL_ID entry %d", (int) u[j]);
1306
- u[j] = relocs->syms_adr[u[j]];
1307
- break;
1308
- case VL_GVAR:
1309
- if (u[j] >= (unsigned int) relocs->gvars_len)
1310
- rb_raise(rb_eArgError, "Cannot resolve VL_GVAR entry %d", (int) u[j]);
1311
- u[j] = (VALUE) relocs->gvars_adr[u[j]];
1312
- break;
1313
- case VL_IDTABLE:
1314
- if (u[j] >= (unsigned int) relocs->idtbls_len)
1315
- rb_raise(rb_eArgError, "Cannot resolve VL_IDTABLE entry %d", (int) u[j]);
1316
- u[j] = (VALUE) relocs->idtbls_adr[u[j]];
1317
- break;
1318
- #ifdef USE_RB_ARGS_INFO
1319
- case VL_ARGS:
1320
- if (u[j] >= (unsigned int) relocs->args_len)
1321
- rb_raise(rb_eArgError, "Cannot resolve VL_ARGS entry %d", (int) u[j]);
1322
- u[j] = (VALUE) relocs->args_adr[u[j]];
1323
- break;
1324
- #endif
1325
- case VL_LIT:
1326
- if (u[j] >= (unsigned int) relocs->lits_len)
1327
- rb_raise(rb_eArgError, "Cannot resolve VL_LIT entry %d", (int) u[j]);
1328
- u[j] = (VALUE) relocs->lits_adr[u[j]];
1329
- break;
1330
- default:
1331
- rb_raise(rb_eArgError, "Unknown RTYPE %d", rtypes[j]);
1332
- }
1333
- }
1334
-
1335
- // Fill classic node structure
1336
- node = relocs->nodes_adr[i];
1337
- #ifdef RESET_GC_FLAGS
1338
- flags = flags & (~0x3); // Ruby 1.9.x -- specific thing
1339
- #endif
1340
- node->flags = (flags << 5) | T_NODE;
1341
- node->nd_reserved = 0;
1342
- node->u1.value = u[0];
1343
- node->u2.value = u[1];
1344
- node->u3.value = u[2];
1345
- }
1346
- }
1347
-
1348
- /*
1349
- * Returns the value of string hash field using symbolic key
1350
- */
1351
- static VALUE get_hash_strfield(VALUE hash, const char *idtxt)
1352
- {
1353
- VALUE str = rb_hash_aref(hash, ID2SYM(rb_intern(idtxt)));
1354
- if (TYPE(str) != T_STRING)
1355
- {
1356
- rb_raise(rb_eArgError, "Hash field %s is not a string", idtxt);
1357
- return Qnil;
1358
- }
1359
- else
1360
- {
1361
- return str;
1362
- }
1363
- }
1364
-
1365
- /*
1366
- * Check validity of node hash representation signatures ("magic" values)
1367
- */
1368
- static VALUE check_hash_magic(VALUE data)
1369
- {
1370
- VALUE val, refval;
1371
- // MAGIC signature must be valid
1372
- val = get_hash_strfield(data, "MAGIC");
1373
- if (strcmp(NODEMARSHAL_MAGIC, RSTRING_PTR(val)))
1374
- rb_raise(rb_eArgError, "Bad value of MAGIC signature");
1375
- // RUBY_PLATFORM signature must match the current platform
1376
- val = get_hash_strfield(data, "RUBY_PLATFORM");
1377
- refval = rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM"));
1378
- if (strcmp(RSTRING_PTR(refval), RSTRING_PTR(val)))
1379
- rb_raise(rb_eArgError, "Incompatible RUBY_PLATFORM value %s", RSTRING_PTR(val));
1380
- // RUBY_VERSION signature must match the used Ruby interpreter
1381
- val = get_hash_strfield(data, "RUBY_VERSION");
1382
- refval = rb_const_get(rb_cObject, rb_intern("RUBY_VERSION"));
1383
- if (strcmp(RSTRING_PTR(refval), RSTRING_PTR(val)))
1384
- rb_raise(rb_eArgError, "Incompatible RUBY_VERSION value %s", RSTRING_PTR(val));
1385
- return Qtrue;
1386
- }
1387
-
1388
- /*
1389
- * Part 5. C-to-Ruby interface
1390
- *
1391
- */
1392
-
1393
- /*
1394
- * Restore Ruby node from the binary blob (dump)
1395
- */
1396
- static VALUE m_nodedump_from_memory(VALUE self, VALUE dump)
1397
- {
1398
- VALUE cMarshal, data, val, val_relocs;
1399
- VALUE gc_was_disabled;
1400
- int num_of_nodes;
1401
- NODEObjAddresses *relocs;
1402
- /* DISABLE GARBAGE COLLECTOR (required for stable loading
1403
- of large node trees */
1404
- gc_was_disabled = rb_gc_disable();
1405
- /* Wrap struct for relocations */
1406
- val_relocs = Data_Make_Struct(cNodeObjAddresses, NODEObjAddresses,
1407
- NULL, NODEObjAddresses_free, relocs); // This data envelope cannot exist without NODE
1408
- /* Load and unpack our dump */
1409
- cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
1410
- data = rb_funcall(cMarshal, rb_intern("load"), 1, dump);
1411
- if (TYPE(data) != T_HASH)
1412
- {
1413
- rb_raise(rb_eArgError, "Input dump is corrupted");
1414
- }
1415
- val = rb_hash_aref(data, ID2SYM(rb_intern("num_of_nodes")));
1416
- if (val == Qnil)
1417
- {
1418
- rb_raise(rb_eArgError, "num_of_nodes not found");
1419
- }
1420
- else
1421
- {
1422
- num_of_nodes = FIX2INT(val);
1423
- }
1424
- /* Check "magic" signature and platform identifiers */
1425
- check_hash_magic(data);
1426
- /* Get the information about the source file that was compiled to the node */
1427
- // a) node name
1428
- val = rb_hash_aref(data, ID2SYM(rb_intern("nodename")));
1429
- if (val == Qnil || TYPE(val) == T_STRING)
1430
- rb_iv_set(self, "@nodename", val);
1431
- else
1432
- rb_raise(rb_eArgError, "nodename value is corrupted");
1433
- // b) file name
1434
- val = rb_hash_aref(data, ID2SYM(rb_intern("filename")));
1435
- if (val == Qnil || TYPE(val) == T_STRING)
1436
- rb_iv_set(self, "@filename", val);
1437
- else
1438
- rb_raise(rb_eArgError, "filename value is corrupted");
1439
- // c) file path
1440
- val = rb_hash_aref(data, ID2SYM(rb_intern("filepath")));
1441
- if (val == Qnil || TYPE(val) == T_STRING)
1442
- rb_iv_set(self, "@filepath", val);
1443
- else
1444
- rb_raise(rb_eArgError, "filepath value is corrupted");
1445
- /* Load all required data */
1446
- resolve_syms_ords(data, relocs); // Symbols
1447
- resolve_lits_ords(data, relocs); // Literals
1448
- resolve_gvars_ords(data, relocs); // Global entries (with symbol ID resolving)
1449
- resolve_idtbls_ords(data, relocs); // Identifiers tables (with symbol ID resolving)
1450
- resolve_nodes_ords(data, num_of_nodes, relocs); // Allocate memory for all nodes
1451
- #ifdef USE_RB_ARGS_INFO
1452
- resolve_args_ords(data, relocs); // Load args entries with symbols ID and nodes resolving
1453
- #endif
1454
- load_nodes_from_str(data, relocs);
1455
- /* Save the loaded node tree and collect garbage */
1456
- rb_iv_set(self, "@node", (VALUE) relocs->nodes_adr[0]);
1457
- rb_iv_set(self, "@num_of_nodes", INT2FIX(num_of_nodes));
1458
- rb_iv_set(self, "@obj_addresses", val_relocs);
1459
- if (gc_was_disabled == Qfalse)
1460
- {
1461
- rb_gc_enable();
1462
- rb_gc_start();
1463
- }
1464
- return self;
1465
- }
1466
-
1467
-
1468
- /*
1469
- * call-seq:
1470
- * obj.symbols
1471
- *
1472
- * Return array with the list of symbols
1473
- */
1474
- static VALUE m_nodedump_symbols(VALUE self)
1475
- {
1476
- int i;
1477
- VALUE val_relocs, val_nodeinfo, syms;
1478
- // Variant 1: node loaded from file
1479
- val_relocs = rb_iv_get(self, "@obj_addresses");
1480
- if (val_relocs != Qnil)
1481
- {
1482
- NODEObjAddresses *relocs;
1483
- Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
1484
- syms = rb_ary_new();
1485
- for (i = 0; i < relocs->syms_len; i++)
1486
- rb_ary_push(syms, ID2SYM(relocs->syms_adr[i]));
1487
- return syms;
1488
- }
1489
- // Variant 2: node saved to file (parsed from memory)
1490
- val_nodeinfo = rb_iv_get(self, "@nodeinfo");
1491
- if (val_nodeinfo != Qnil)
1492
- {
1493
- NODEInfo *ninfo;
1494
- VALUE *ary;
1495
- Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
1496
- syms = rb_funcall(ninfo->syms.vals, rb_intern("values"), 0);
1497
- ary = RARRAY_PTR(syms);
1498
- for (i = 0; i < RARRAY_LEN(syms); i++)
1499
- {
1500
- ary[i] = rb_funcall(ary[i], rb_intern("to_sym"), 0);
1501
- }
1502
- return syms;
1503
- }
1504
- rb_raise(rb_eArgError, "Symbol information not initialized. Run to_hash before reading.");
1505
- }
1506
-
1507
- /*
1508
- * call-seq:
1509
- * obj.change_symbol(old_sym, new_sym)
1510
- *
1511
- * Replace one symbol by another (to be used for code obfuscation)
1512
- * - +old_sym+ -- String that contains symbol name to be replaced
1513
- * - +new_sym+ -- String that contains new name of the symbol
1514
- */
1515
- static VALUE m_nodedump_change_symbol(VALUE self, VALUE old_sym, VALUE new_sym)
1516
- {
1517
- VALUE val_nodehash = rb_iv_get(self, "@nodehash");
1518
- VALUE syms, key;
1519
- // Check if node is position-independent
1520
- // (i.e. with initialized NODEInfo structure that contains
1521
- // relocations for symbols)
1522
- if (val_nodehash == Qnil)
1523
- rb_raise(rb_eArgError, "This node is not preparsed into Hash");
1524
- // Check data types of the input array
1525
- if (TYPE(old_sym) != T_STRING)
1526
- {
1527
- rb_raise(rb_eArgError, "old_sym argument must be a string");
1528
- }
1529
- if (TYPE(new_sym) != T_STRING)
1530
- {
1531
- rb_raise(rb_eArgError, "new_sym argument must be a string");
1532
- }
1533
- // Get the symbol table from the Hash
1534
- syms = rb_hash_aref(val_nodehash, ID2SYM(rb_intern("symbols")));
1535
- if (syms == Qnil)
1536
- rb_raise(rb_eArgError, "Preparsed hash has no :symbols field");
1537
- // Check if new_sym is present in the symbol table
1538
- key = rb_funcall(syms, rb_intern("find_index"), 1, new_sym);
1539
- if (key != Qnil)
1540
- {
1541
- rb_raise(rb_eArgError, "new_sym value must be absent in table of symbols");
1542
- }
1543
- // Change the symbol in the preparsed Hash
1544
- key = rb_funcall(syms, rb_intern("find_index"), 1, old_sym);
1545
- if (key == Qnil)
1546
- return Qnil;
1547
- RARRAY_PTR(syms)[FIX2INT(key)] = new_sym;
1548
- return self;
1549
- }
1550
-
1551
- /*
1552
- * Return array with the list of literals
1553
- */
1554
- static VALUE m_nodedump_literals(VALUE self)
1555
- {
1556
- int i;
1557
- VALUE val_relocs, val_nodeinfo, lits;
1558
- // Variant 1: node loaded from file. It uses NODEObjAddresses struct
1559
- // with the results of Ruby NODE structure parsing.
1560
- val_relocs = rb_iv_get(self, "@obj_addresses");
1561
- if (val_relocs != Qnil)
1562
- {
1563
- NODEObjAddresses *relocs;
1564
-
1565
- Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
1566
- lits = rb_ary_new();
1567
- for (i = 0; i < relocs->lits_len; i++)
1568
- {
1569
- VALUE val = relocs->lits_adr[i];
1570
- int t = TYPE(val);
1571
- if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
1572
- val = rb_funcall(val, rb_intern("dup"), 0);
1573
- rb_ary_push(lits, val);
1574
- }
1575
- return lits;
1576
- }
1577
- // Variant 2: node saved to file (parsed from memory). It uses
1578
- // NODEInfo struct that is initialized during node dump parsing.
1579
- val_nodeinfo = rb_iv_get(self, "@nodeinfo");
1580
- if (val_nodeinfo != Qnil)
1581
- {
1582
- NODEInfo *ninfo;
1583
- VALUE *ary;
1584
- Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
1585
- lits = rb_funcall(ninfo->lits.vals, rb_intern("values"), 0);
1586
- ary = RARRAY_PTR(lits);
1587
- for (i = 0; i < RARRAY_LEN(lits); i++)
1588
- {
1589
- int t = TYPE(ary[i]);
1590
- if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
1591
- ary[i] = rb_funcall(ary[i], rb_intern("dup"), 0);
1592
- }
1593
- return lits;
1594
- }
1595
- rb_raise(rb_eArgError, "Literals information not initialized. Run to_hash before reading.");
1596
- }
1597
-
1598
- /*
1599
- * Update the array with the list of literals
1600
- * (to be used for code obfuscation)
1601
- * Warning! This function is a stub!
1602
- */
1603
- static VALUE m_nodedump_change_literal(VALUE self, VALUE old_lit, VALUE new_lit)
1604
- {
1605
- /* TO BE IMPLEMENTED */
1606
- return self;
1607
- }
1608
-
1609
-
1610
- /*
1611
- * call-seq:
1612
- * obj.compile
1613
- *
1614
- * Creates the RubyVM::InstructionSequence object from the node
1615
- */
1616
- static VALUE m_nodedump_compile(VALUE self)
1617
- {
1618
- NODE *node = RNODE(rb_iv_get(self, "@node"));
1619
- VALUE nodename = rb_iv_get(self, "@nodename");
1620
- VALUE filename = rb_iv_get(self, "@filename");
1621
- VALUE filepath = rb_iv_get(self, "@filepath");
1622
- #ifndef WITH_RB_ISEQW_NEW
1623
- /* For Pre-2.3 */
1624
- return rb_iseq_new_top(node, nodename, filename, filepath, Qfalse);
1625
- #else
1626
- /* For Ruby 2.3 */
1627
- return rb_iseqw_new(rb_iseq_new_top(node, nodename, filename, filepath, Qfalse));
1628
- #endif
1629
- }
1630
-
1631
- /*
1632
- * Parses Ruby file with the source code and saves the node
1633
- */
1634
- static VALUE m_nodedump_from_source(VALUE self, VALUE file)
1635
- {
1636
- VALUE line = INT2FIX(1), f, node, filepath, gc_was_disabled;
1637
- const char *fname;
1638
-
1639
- gc_was_disabled = rb_gc_disable();
1640
- rb_secure(1);
1641
- FilePathValue(file);
1642
- fname = StringValueCStr(file);
1643
- /* Remember information about the file */
1644
- rb_iv_set(self, "@nodename", rb_str_new2("<main>"));
1645
- rb_iv_set(self, "@filename", file);
1646
- filepath = rb_funcall(rb_cFile, rb_intern("realpath"), 1, file); // Envelope for rb_realpath_internal
1647
- rb_iv_set(self, "@filepath", filepath);
1648
- /* Create node from the source */
1649
- f = rb_file_open_str(file, "r");
1650
- node = (VALUE) rb_compile_file(fname, f, NUM2INT(line));
1651
- rb_iv_set(self, "@node", node);
1652
- if ((void *) node == NULL)
1653
- {
1654
- rb_raise(rb_eArgError, "Error during string parsing");
1655
- }
1656
- if (gc_was_disabled == Qfalse)
1657
- {
1658
- rb_gc_enable();
1659
- }
1660
- return self;
1661
- }
1662
-
1663
- /*
1664
- * Parses Ruby string with the source code and saves the node
1665
- */
1666
- static VALUE m_nodedump_from_string(VALUE self, VALUE str)
1667
- {
1668
- VALUE line = INT2FIX(1), node, gc_was_disabled;
1669
- const char *fname = "STRING";
1670
- Check_Type(str, T_STRING);
1671
- gc_was_disabled = rb_gc_disable();
1672
- rb_secure(1);
1673
- /* Create empty information about the file */
1674
- rb_iv_set(self, "@nodename", rb_str_new2("<main>"));
1675
- if (RUBY_API_VERSION_MAJOR == 1)
1676
- { /* For Ruby 1.9.x */
1677
- rb_iv_set(self, "@filename", Qnil);
1678
- rb_iv_set(self, "@filepath", Qnil);
1679
- }
1680
- else
1681
- { /* For Ruby 2.x */
1682
- rb_iv_set(self, "@filename", rb_str_new2("<compiled>"));
1683
- rb_iv_set(self, "@filepath", rb_str_new2("<compiled>"));
1684
- }
1685
- /* Create node from the string */
1686
- node = (VALUE) rb_compile_string(fname, str, NUM2INT(line));
1687
- rb_iv_set(self, "@node", node);
1688
- if (gc_was_disabled == Qfalse)
1689
- {
1690
- rb_gc_enable();
1691
- rb_gc_start();
1692
- }
1693
- if ((void *) node == NULL)
1694
- {
1695
- rb_raise(rb_eArgError, "Error during string parsing");
1696
- }
1697
- return self;
1698
- }
1699
-
1700
- /*
1701
- * call-seq:
1702
- * obj.new(:srcfile, filename) # Will load source file from the disk
1703
- * obj.new(:binfile, filename) # Will load file with node binary dump from the disk
1704
- * obj.new(:srcmemory, srcstr) # Will load source code from the string
1705
- * obj.new(:binmemory, binstr) # Will load node binary dump from the string
1706
- *
1707
- * Creates NodeMarshal class example from the source code or dumped
1708
- * syntax tree (NODEs), i.e. preparsed and packed source code. Created
1709
- * object can be used either for code execution or for saving it
1710
- * in the preparsed form (useful for code obfuscation/protection)
1711
- */
1712
- static VALUE m_nodedump_init(VALUE self, VALUE source, VALUE info)
1713
- {
1714
- ID id_usr;
1715
- rb_iv_set(self, "@show_offsets", Qfalse);
1716
- Check_Type(source, T_SYMBOL);
1717
- id_usr = SYM2ID(source);
1718
- if (id_usr == rb_intern("srcfile"))
1719
- {
1720
- return m_nodedump_from_source(self, info);
1721
- }
1722
- else if (id_usr == rb_intern("srcmemory"))
1723
- {
1724
- return m_nodedump_from_string(self, info);
1725
- }
1726
- else if (id_usr == rb_intern("binmemory"))
1727
- {
1728
- return m_nodedump_from_memory(self, info);
1729
- }
1730
- else if (id_usr == rb_intern("binfile"))
1731
- {
1732
- VALUE cFile = rb_const_get(rb_cObject, rb_intern("File"));
1733
- VALUE bin = rb_funcall(cFile, rb_intern("binread"), 1, info);
1734
- return m_nodedump_from_memory(self, bin);
1735
- }
1736
- else
1737
- {
1738
- rb_raise(rb_eArgError, "Invalid source type (it must be :srcfile, :srcmemory, :binmemory of :binfile)");
1739
- }
1740
- return Qnil;
1741
- }
1742
-
1743
- /*
1744
- * call-seq:
1745
- * obj.dump_tree
1746
- *
1747
- * Transforms Ruby syntax tree (NODE) to the String using
1748
- * +rb_parser_dump_tree+ function from +node.c+ (see Ruby source code).
1749
- */
1750
- static VALUE m_nodedump_parser_dump_tree(VALUE self)
1751
- {
1752
- NODE *node = RNODE(rb_iv_get(self, "@node"));
1753
- return rb_parser_dump_tree(node, 0);
1754
- }
1755
-
1756
- /*
1757
- * call-seq:
1758
- * obj.dump_tree_short
1759
- *
1760
- * Transforms Ruby syntax tree (NODE) to the String using custom function
1761
- * instead of +rb_parser_dump_tree+ function.
1762
- *
1763
- * See also #show_offsets, #show_offsets=
1764
- */
1765
- static VALUE m_nodedump_dump_tree_short(VALUE self)
1766
- {
1767
- VALUE str = rb_str_new2(""); // Output string
1768
- NODE *node = RNODE(rb_iv_get(self, "@node"));
1769
- int show_offsets = (rb_iv_get(self, "@show_offsets") == Qtrue) ? 1 : 0;
1770
- print_node(str, node, 0, show_offsets);
1771
- return str;
1772
- }
1773
-
1774
- /*
1775
- * call-seq:
1776
- * obj.show_offsets
1777
- *
1778
- * Returns show_offsets property (used by NodeMarshal#dump_tree_short)
1779
- * It can be either true or false
1780
- */
1781
- static VALUE m_nodedump_show_offsets(VALUE self)
1782
- {
1783
- return rb_iv_get(self, "@show_offsets");
1784
- }
1785
-
1786
- /*
1787
- * call-seq:
1788
- * obj.show_offsets=
1789
- *
1790
- * Sets show_offsets property (used by NodeMarshal#dump_tree_short)
1791
- * It can be either true or false
1792
- */
1793
- static VALUE m_nodedump_set_show_offsets(VALUE self, VALUE value)
1794
- {
1795
- if (value != Qtrue && value != Qfalse)
1796
- {
1797
- rb_raise(rb_eArgError, "show_offsets property must be either true or false");
1798
- }
1799
- return rb_iv_set(self, "@show_offsets", value);
1800
- }
1801
-
1802
-
1803
- /*
1804
- * call-seq:
1805
- * obj.to_hash
1806
- *
1807
- * Converts NodeMarshal class example to the hash that contains full
1808
- * and independent from data structures memory addresses information.
1809
- * Format of the obtained hash depends on used platform (especially
1810
- * size of the pointer) and Ruby version.
1811
- *
1812
- * <b>Format of the hash</b>
1813
- *
1814
- * <i>Part 1: Signatures</i>
1815
- *
1816
- * - <tt>MAGIC</tt> -- NODEMARSHAL11
1817
- * - <tt>RUBY_PLATFORM</tt> -- saved <tt>RUBY_PLATFORM</tt> constant value
1818
- * - <tt>RUBY_VERSION</tt> -- saved <tt>RUBY_VERSION</tt> constant value
1819
- *
1820
- * <i>Part 2: Program loadable elements.</i>
1821
- *
1822
- * All loadable elements are arrays. Index of the array element means
1823
- * its identifier that is used in the node tree.
1824
- *
1825
- * - <tt>literals</tt> -- program literals (strings, ranges etc.)
1826
- * - <tt>symbols</tt> -- program symbols (values have either String or Fixnum
1827
- * data type; numbers are used for symbols that cannot be represented as strings)
1828
- * - <tt>global_entries</tt> -- global variables information
1829
- * - <tt>id_tables</tt> -- array of arrays. Each array contains symbols IDs
1830
- * - <tt>args</tt> -- information about code block argument(s)
1831
- *
1832
- * <i>Part 3: Nodes information</i>
1833
- * - <tt>nodes</tt> -- string that contains binary encoded information
1834
- * about the nodes
1835
- * - <tt>num_of_nodes</tt> -- number of nodes in the <tt>nodes</tt> field
1836
- * - <tt>nodename</tt> -- name of the node (usually "<main>")
1837
- * - <tt>filename</tt> -- name (without path) of .rb file used for the node generation
1838
- * - <tt>filepath</tt> -- name (with full path) of .rb file used for the node generation
1839
- */
1840
- static VALUE m_nodedump_to_hash(VALUE self)
1841
- {
1842
- NODE *node = RNODE(rb_iv_get(self, "@node"));
1843
- NODEInfo *info;
1844
- VALUE ans, num, val_info, gc_was_disabled;
1845
- // DISABLE GARBAGE COLLECTOR (important for dumping)
1846
- gc_was_disabled = rb_gc_disable();
1847
- // Convert the node to the form with relocs (i.e. the information about node)
1848
- // if such form is not present
1849
- val_info = rb_iv_get(self, "@nodeinfo");
1850
- if (val_info == Qnil)
1851
- {
1852
- val_info = Data_Make_Struct(cNodeInfo, NODEInfo,
1853
- NODEInfo_mark, NODEInfo_free, info); // This data envelope cannot exist without NODE
1854
- NODEInfo_init(info);
1855
- rb_iv_set(self, "@nodeinfo", val_info);
1856
- num = INT2FIX(count_num_of_nodes(node, node, info));
1857
- rb_iv_set(self, "@nodeinfo_num_of_nodes", num);
1858
- // Convert node to NODEInfo structure
1859
- ans = NODEInfo_toHash(info);
1860
- rb_hash_aset(ans, ID2SYM(rb_intern("num_of_nodes")), num);
1861
- rb_hash_aset(ans, ID2SYM(rb_intern("nodename")), rb_iv_get(self, "@nodename"));
1862
- rb_hash_aset(ans, ID2SYM(rb_intern("filename")), rb_iv_get(self, "@filename"));
1863
- rb_hash_aset(ans, ID2SYM(rb_intern("filepath")), rb_iv_get(self, "@filepath"));
1864
- rb_iv_set(self, "@nodehash", ans);
1865
- }
1866
- else
1867
- {
1868
- ans = rb_iv_get(self, "@nodehash");
1869
- }
1870
- // ENABLE GARBAGE COLLECTOR (important for dumping)
1871
- if (gc_was_disabled == Qfalse)
1872
- {
1873
- rb_gc_enable();
1874
- }
1875
- return ans;
1876
- }
1877
-
1878
-
1879
- VALUE m_node_to_ary(NODE *node)
1880
- {
1881
- int i, type, ut[3];
1882
- VALUE uref[3];
1883
- VALUE entry = rb_ary_new();
1884
- /* Special case: NULL node */
1885
- if (node == NULL)
1886
- {
1887
- return Qnil;
1888
- }
1889
- /* Save node name */
1890
- type = nd_type(node);
1891
- rb_ary_push(entry, ID2SYM(rb_intern(ruby_node_name(type))));
1892
-
1893
- ut[0] = nodes_ctbl[type * 3];
1894
- ut[1] = nodes_ctbl[type * 3 + 1];
1895
- ut[2] = nodes_ctbl[type * 3 + 2];
1896
-
1897
- uref[0] = node->u1.value;
1898
- uref[1] = node->u2.value;
1899
- uref[2] = node->u3.value;
1900
-
1901
-
1902
- for (i = 0; i < 3; i++)
1903
- {
1904
- if (ut[i] == NT_NODE)
1905
- {
1906
- if (nd_type(node) != NODE_OP_ASGN2 || i != 2)
1907
- {
1908
- rb_ary_push(entry, m_node_to_ary(RNODE(uref[i])));
1909
- }
1910
- else
1911
- {
1912
- VALUE child = rb_ary_new();
1913
- if (ut[i] != 0 && TYPE(ut[i]) != T_NODE)
1914
- rb_raise(rb_eArgError, "print_node: broken node 0x%s", RSTRING_PTR(value_to_str(ut[i])));
1915
- rb_ary_push(child, ID2SYM(rb_intern("NODE_OP_ASGN2")));
1916
- rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u1.value));
1917
- rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u2.value));
1918
- rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u3.value));
1919
- rb_ary_push(entry, child);
1920
- }
1921
- }
1922
- else if (ut[i] == NT_VALUE)
1923
- {
1924
- rb_ary_push(entry, uref[i]);
1925
- }
1926
- else if (ut[i] == NT_ID)
1927
- {
1928
- rb_ary_push(entry, ID2SYM( (ID) uref[i]));
1929
- }
1930
- else if (ut[i] == NT_LONG)
1931
- {
1932
- rb_ary_push(entry, LONG2NUM( (intptr_t) uref[i]));
1933
- }
1934
- else if (ut[i] == NT_NULL)
1935
- {
1936
- rb_ary_push(entry, Qnil);
1937
- }
1938
- else if (ut[i] == NT_ARGS)
1939
- {
1940
- VALUE rargs = rb_hash_new();
1941
- VALUE rargs_env = rb_ary_new();
1942
- #ifdef USE_RB_ARGS_INFO
1943
- ID id;
1944
- struct rb_args_info *args = (void *) uref[i];
1945
-
1946
- rb_hash_aset(rargs, ID2SYM(rb_intern("pre_init")), m_node_to_ary(args->pre_init));
1947
- rb_hash_aset(rargs, ID2SYM(rb_intern("post_init")), m_node_to_ary(args->post_init));
1948
-
1949
- id = args->first_post_arg;
1950
- rb_hash_aset(rargs, ID2SYM(rb_intern("first_post_arg")), (id) ? ID2SYM(id) : Qnil);
1951
- id = args->rest_arg;
1952
- rb_hash_aset(rargs, ID2SYM(rb_intern("rest_arg")), (id) ? ID2SYM(id) : Qnil);
1953
- id = args->block_arg;
1954
- rb_hash_aset(rargs, ID2SYM(rb_intern("block_arg")), (id) ? ID2SYM(id) : Qnil);
1955
-
1956
- rb_hash_aset(rargs, ID2SYM(rb_intern("kw_args")), m_node_to_ary(args->kw_args));
1957
- rb_hash_aset(rargs, ID2SYM(rb_intern("kw_rest_arg")), m_node_to_ary(args->kw_rest_arg));
1958
- rb_hash_aset(rargs, ID2SYM(rb_intern("opt_args")), m_node_to_ary(args->opt_args));
1959
- #endif
1960
- rb_ary_push(rargs_env, ID2SYM(rb_intern("ARGS")));
1961
- rb_ary_push(rargs_env, rargs);
1962
- rb_ary_push(entry, rargs_env);
1963
- }
1964
- else if (ut[i] == NT_IDTABLE)
1965
- {
1966
- VALUE ridtbl = rb_ary_new();
1967
- VALUE idtbl_ary = rb_ary_new();
1968
- int j, len;
1969
-
1970
- ID *idtbl = (ID *) uref[i];
1971
- len = (uref[i]) ? *idtbl++ : 0;
1972
- for (j = 0; j < len; j++)
1973
- {
1974
- ID sym = *idtbl++;
1975
- VALUE val = ID2SYM(sym);
1976
- rb_ary_push(idtbl_ary, val);
1977
- }
1978
- rb_ary_push(ridtbl, ID2SYM(rb_intern("IDTABLE")));
1979
- rb_ary_push(ridtbl, idtbl_ary);
1980
- rb_ary_push(entry, ridtbl);
1981
- }
1982
- else if (ut[i] == NT_ENTRY)
1983
- {
1984
- struct rb_global_entry *gentry;
1985
- gentry = (struct rb_global_entry *) uref[i];
1986
- rb_ary_push(entry, ID2SYM(gentry->id));
1987
- }
1988
- else
1989
- {
1990
- rb_ary_push(entry, ID2SYM(rb_intern("UNKNOWN")));
1991
- }
1992
- }
1993
- return entry;
1994
- }
1995
-
1996
- /*
1997
- * call-seq:
1998
- * obj.to_a
1999
- *
2000
- * Converts node to the array (mainly to allow exploration of AST
2001
- * by the user). It shows information about rb_args_info and
2002
- * ID *tbl that are not displayed by NodeMarshal#dump_tree and
2003
- * NodeMarshal#dump_tree_short.
2004
- */
2005
- static VALUE m_nodedump_to_a(VALUE self)
2006
- {
2007
- NODE *node = RNODE(rb_iv_get(self, "@node"));
2008
- VALUE gc_was_disabled = rb_gc_disable();
2009
- VALUE ary = m_node_to_ary(node);
2010
- if (gc_was_disabled == Qfalse)
2011
- {
2012
- rb_gc_enable();
2013
- }
2014
- return ary;
2015
- }
2016
-
2017
-
2018
- /*
2019
- * call-seq:
2020
- * obj.to_bin
2021
- *
2022
- * Converts NodeMarshal class example to the binary string that
2023
- * can be saved to the file and used for loading the node from the file.
2024
- * Format of the obtained binary dump depends on used platform (especially
2025
- * size of the pointer) and Ruby version.
2026
- */
2027
- static VALUE m_nodedump_to_bin(VALUE self)
2028
- {
2029
- VALUE hash = m_nodedump_to_hash(self);
2030
- VALUE cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
2031
- return rb_funcall(cMarshal, rb_intern("dump"), 1, hash);
2032
- }
2033
-
2034
- /*
2035
- * Gives the information about the node
2036
- */
2037
- static VALUE m_nodedump_inspect(VALUE self)
2038
- {
2039
- static char str[1024], buf[512];
2040
- VALUE num_of_nodes, nodename, filepath, filename;
2041
- VALUE val_obj_addresses, val_nodeinfo;
2042
- // Get generic information about node
2043
- num_of_nodes = rb_iv_get(self, "@num_of_nodes");
2044
- nodename = rb_iv_get(self, "@nodename");
2045
- filepath = rb_iv_get(self, "@filepath");
2046
- filename = rb_iv_get(self, "@filename");
2047
- // Generate string with generic information about node
2048
- sprintf(str,
2049
- "----- NodeMarshal:0x%"PRIxPTR"\n"
2050
- " num_of_nodes: %d\n nodename: %s\n filepath: %s\n filename: %s\n",
2051
- (uintptr_t) (self),
2052
- (num_of_nodes == Qnil) ? -1 : FIX2INT(num_of_nodes),
2053
- (nodename == Qnil) ? "nil" : RSTRING_PTR(nodename),
2054
- (filepath == Qnil) ? "nil" : RSTRING_PTR(filepath),
2055
- (filename == Qnil) ? "nil" : RSTRING_PTR(filename)
2056
- );
2057
- // Check if the information about node struct is available
2058
- val_nodeinfo = rb_iv_get(self, "@nodeinfo");
2059
- val_obj_addresses = rb_iv_get(self, "@obj_addresses");
2060
- if (val_nodeinfo == Qnil && val_obj_addresses == Qnil)
2061
- {
2062
- m_nodedump_to_hash(self);
2063
- val_nodeinfo = rb_iv_get(self, "@nodeinfo");
2064
- }
2065
- // Information about preparsed node
2066
- // a) NODEInfo struct
2067
- if (val_nodeinfo == Qnil)
2068
- {
2069
- sprintf(buf, " NODEInfo struct is empty\n");
2070
- }
2071
- else
2072
- {
2073
- NODEInfo *ninfo;
2074
- Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
2075
- sprintf(buf,
2076
- " NODEInfo struct:\n"
2077
- " syms hash len (Symbols): %d\n"
2078
- " lits hash len (Literals): %d\n"
2079
- " idtabs hash len (ID tables): %d\n"
2080
- " gentries hash len (Global vars): %d\n"
2081
- " nodes hash len (Nodes): %d\n"
2082
- " pnodes hash len (Parent nodes): %d\n"
2083
- #ifdef USE_RB_ARGS_INFO
2084
- " args hash len (args info): %d\n"
2085
- #endif
2086
- ,
2087
- FIX2INT(rb_funcall(ninfo->syms.vals, rb_intern("length"), 0)),
2088
- FIX2INT(rb_funcall(ninfo->lits.vals, rb_intern("length"), 0)),
2089
- FIX2INT(rb_funcall(ninfo->idtabs.vals, rb_intern("length"), 0)),
2090
- FIX2INT(rb_funcall(ninfo->gentries.vals, rb_intern("length"), 0)),
2091
- FIX2INT(rb_funcall(ninfo->nodes.vals, rb_intern("length"), 0)),
2092
- FIX2INT(rb_funcall(ninfo->pnodes.vals, rb_intern("length"), 0))
2093
- #ifdef USE_RB_ARGS_INFO
2094
- ,
2095
- FIX2INT(rb_funcall(ninfo->args.vals, rb_intern("length"), 0))
2096
- #endif
2097
- );
2098
- }
2099
- strcat(str, buf);
2100
- // b) NODEObjAddresses struct
2101
- if (val_obj_addresses == Qnil)
2102
- {
2103
- sprintf(buf, " NODEObjAddresses struct is empty\n");
2104
- }
2105
- else
2106
- {
2107
- NODEObjAddresses *objadr;
2108
- Data_Get_Struct(val_obj_addresses, NODEObjAddresses, objadr);
2109
- sprintf(buf,
2110
- " NODEObjAddresses struct:\n"
2111
- " syms_len (Num of symbols): %d\n"
2112
- " lits_len (Num of literals): %d\n"
2113
- " idtbls_len (Num of ID tables): %d\n"
2114
- " gvars_len (Num of global vars): %d\n"
2115
- " nodes_len (Num of nodes): %d\n"
2116
- #ifdef USE_RB_ARGS_INFO
2117
- " args_len: (Num of args info): %d\n"
2118
- #endif
2119
- , objadr->syms_len, objadr->lits_len,
2120
- objadr->idtbls_len, objadr->gvars_len,
2121
- objadr->nodes_len
2122
- #ifdef USE_RB_ARGS_INFO
2123
- , objadr->args_len
2124
- #endif
2125
- );
2126
- }
2127
- strcat(str, buf);
2128
- strcat(str, "------------------\n");
2129
- // Generate output string
2130
- return rb_str_new2(str);
2131
- }
2132
-
2133
- /*
2134
- * Returns node name (usually <main>)
2135
- */
2136
- static VALUE m_nodedump_nodename(VALUE self)
2137
- {
2138
- return rb_funcall(rb_iv_get(self, "@nodename"), rb_intern("dup"), 0);
2139
- }
2140
-
2141
- /*
2142
- * Returns name of file that was used for node generation and will be used
2143
- * by YARV (or nil/<compiled> if a string of code was used)
2144
- */
2145
- static VALUE m_nodedump_filename(VALUE self)
2146
- {
2147
- return rb_funcall(rb_iv_get(self, "@filename"), rb_intern("dup"), 0);
2148
- }
2149
-
2150
- /*
2151
- * Sets name of file that was used for node generation and will be used
2152
- * by YARV (or nil/<compiled> if a string of code was used)
2153
- */
2154
- static VALUE m_nodedump_set_filename(VALUE self, VALUE val)
2155
- {
2156
- if (val != Qnil)
2157
- {
2158
- Check_Type(val, T_STRING);
2159
- rb_iv_set(self, "@filename", rb_funcall(val, rb_intern("dup"), 0));
2160
- }
2161
- else
2162
- {
2163
- rb_iv_set(self, "@filename", Qnil);
2164
- }
2165
- return self;
2166
- }
2167
-
2168
- /*
2169
- * Returns path of file that was used for node generation and will be used
2170
- * by YARV (or nil/<compiled> if a string of code was used)
2171
- */
2172
- static VALUE m_nodedump_filepath(VALUE self)
2173
- {
2174
- return rb_funcall(rb_iv_get(self, "@filepath"), rb_intern("dup"), 0);
2175
- }
2176
-
2177
- /*
2178
- * call-seq:
2179
- * obj.filepath=value
2180
- *
2181
- * Sets the path of file that was used for node generation and will
2182
- * be used by YARV (or nil/<compiled> if a string of code was used)
2183
- */
2184
- static VALUE m_nodedump_set_filepath(VALUE self, VALUE val)
2185
- {
2186
- if (val != Qnil)
2187
- {
2188
- Check_Type(val, T_STRING);
2189
- rb_iv_set(self, "@filepath", rb_funcall(val, rb_intern("dup"), 0));
2190
- }
2191
- else
2192
- {
2193
- rb_iv_set(self, "@filepath", Qnil);
2194
- }
2195
- return self;
2196
- }
2197
-
2198
- /*
2199
- * call-seq:
2200
- * NodeMarshal.base85r_encode(input) -> output
2201
- *
2202
- * Encode arbitrary binary string to the ASCII string
2203
- * using modified version of BASE85 (useful for obfuscation
2204
- * of .rb source files)
2205
- */
2206
- static VALUE m_base85r_encode(VALUE obj, VALUE input)
2207
- {
2208
- return base85r_encode(input);
2209
- }
2210
-
2211
- /*
2212
- * call-seq:
2213
- * NodeMarshal.base85r_decode(input) -> output
2214
- *
2215
- * Decode ASCII string in the modified BASE85 format
2216
- * to the binary string (useful for obfuscation of .rb
2217
- * source files)
2218
- */
2219
- static VALUE m_base85r_decode(VALUE obj, VALUE input)
2220
- {
2221
- return base85r_decode(input);
2222
- }
2223
-
2224
- /* call-seq:
2225
- * obj.to_text
2226
- *
2227
- * Converts NodeMarshal class example to the text string (modified Base85 encoding) that
2228
- * can be saved to the file and used for loading the node from the file.
2229
- * Format of the obtained binary dump depends on used platform (especially
2230
- * size of the pointer) and Ruby version.
2231
- */
2232
- static VALUE m_nodedump_to_text(VALUE self)
2233
- {
2234
- VALUE bin = m_nodedump_to_bin(self);
2235
- return base85r_encode(bin);
2236
- }
2237
-
2238
- /*
2239
- * Returns node object
2240
- */
2241
- static VALUE m_nodedump_node(VALUE self)
2242
- {
2243
- return rb_iv_get(self, "@node");
2244
- }
2245
-
2246
- /*
2247
- * This class can load and save Ruby code in the form of the
2248
- * platform-dependent syntax tree (made of NODEs). Such function
2249
- * allows to hide the source code from users. Main features:
2250
- *
2251
- * - Irreversible transformation of Ruby source code to the syntax tree
2252
- * - Representation of syntax tree in binary form dependent from the platform and Ruby version
2253
- * - Simple options for node inspection
2254
- * - Ruby 1.9.3, 2.2.x and 2.3.x support
2255
- * - Subroutines for custom code obfuscation
2256
- */
2257
- void Init_nodemarshal()
2258
- {
2259
- static VALUE cNodeMarshal;
2260
- init_nodes_table(nodes_ctbl, NODES_CTBL_SIZE);
2261
- base85r_init_tables();
2262
-
2263
- cNodeMarshal = rb_define_class("NodeMarshal", rb_cObject);
2264
- rb_define_singleton_method(cNodeMarshal, "base85r_encode", RUBY_METHOD_FUNC(m_base85r_encode), 1);
2265
- rb_define_singleton_method(cNodeMarshal, "base85r_decode", RUBY_METHOD_FUNC(m_base85r_decode), 1);
2266
-
2267
- rb_define_method(cNodeMarshal, "initialize", RUBY_METHOD_FUNC(m_nodedump_init), 2);
2268
- rb_define_method(cNodeMarshal, "to_hash", RUBY_METHOD_FUNC(m_nodedump_to_hash), 0);
2269
- rb_define_method(cNodeMarshal, "to_h", RUBY_METHOD_FUNC(m_nodedump_to_hash), 0);
2270
- rb_define_method(cNodeMarshal, "to_bin", RUBY_METHOD_FUNC(m_nodedump_to_bin), 0);
2271
- rb_define_method(cNodeMarshal, "to_text", RUBY_METHOD_FUNC(m_nodedump_to_text), 0);
2272
- rb_define_method(cNodeMarshal, "to_a", RUBY_METHOD_FUNC(m_nodedump_to_a), 0);
2273
- rb_define_method(cNodeMarshal, "to_ary", RUBY_METHOD_FUNC(m_nodedump_to_a), 0);
2274
- rb_define_method(cNodeMarshal, "dump_tree", RUBY_METHOD_FUNC(m_nodedump_parser_dump_tree), 0);
2275
- rb_define_method(cNodeMarshal, "dump_tree_short", RUBY_METHOD_FUNC(m_nodedump_dump_tree_short), 0);
2276
- rb_define_method(cNodeMarshal, "compile", RUBY_METHOD_FUNC(m_nodedump_compile), 0);
2277
- rb_define_method(cNodeMarshal, "show_offsets", RUBY_METHOD_FUNC(m_nodedump_show_offsets), 0);
2278
- rb_define_method(cNodeMarshal, "show_offsets=", RUBY_METHOD_FUNC(m_nodedump_set_show_offsets), 1);
2279
- // Methods for working with the information about the node
2280
- // a) literals, symbols, generic information
2281
- rb_define_method(cNodeMarshal, "symbols", RUBY_METHOD_FUNC(m_nodedump_symbols), 0);
2282
- rb_define_method(cNodeMarshal, "change_symbol", RUBY_METHOD_FUNC(m_nodedump_change_symbol), 2);
2283
- rb_define_method(cNodeMarshal, "literals", RUBY_METHOD_FUNC(m_nodedump_literals), 0);
2284
- rb_define_method(cNodeMarshal, "change_literal", RUBY_METHOD_FUNC(m_nodedump_change_literal), 2);
2285
- rb_define_method(cNodeMarshal, "inspect", RUBY_METHOD_FUNC(m_nodedump_inspect), 0);
2286
- rb_define_method(cNodeMarshal, "node", RUBY_METHOD_FUNC(m_nodedump_node), 0);
2287
- // b) node and file names
2288
- rb_define_method(cNodeMarshal, "nodename", RUBY_METHOD_FUNC(m_nodedump_nodename), 0);
2289
- rb_define_method(cNodeMarshal, "filename", RUBY_METHOD_FUNC(m_nodedump_filename), 0);
2290
- rb_define_method(cNodeMarshal, "filename=", RUBY_METHOD_FUNC(m_nodedump_set_filename), 1);
2291
- rb_define_method(cNodeMarshal, "filepath", RUBY_METHOD_FUNC(m_nodedump_filepath), 0);
2292
- rb_define_method(cNodeMarshal, "filepath=", RUBY_METHOD_FUNC(m_nodedump_set_filepath), 1);
2293
- // C structure wrappers
2294
- cNodeObjAddresses = rb_define_class("NodeObjAddresses", rb_cObject);
2295
- cNodeInfo = rb_define_class("NodeInfo", rb_cObject);
2296
- }
1
+ /*
2
+ * This file contains implementation of classes for Ruby nodes
3
+ * marshalization (i.e. loading and saving them from disk)
4
+ *
5
+ * (C) 2015-2017 Alexey Voskov
6
+ * License: BSD-2-Clause
7
+ */
8
+ #define __STDC_FORMAT_MACROS
9
+ #include <stdio.h>
10
+ #include <stdlib.h>
11
+ #include <inttypes.h>
12
+ #include <ruby.h>
13
+ #include <ruby/version.h>
14
+
15
+ /*
16
+ * Some global variables
17
+ */
18
+ static VALUE cNodeObjAddresses, cNodeInfo;
19
+
20
+ /*
21
+ * Part 1. .H files: nodedump functions + parts of Ruby internals
22
+ */
23
+ #include "nodedump.h"
24
+
25
+ #ifdef WITH_CUSTOM_RB_GLOBAL_ENTRY
26
+ /* Custom (and slow) implementation of rb_global_entry internal API for Ruby 2.3
27
+ (original rb_global_entry API was opened before Ruby 2.3)
28
+ It uses a hack with the node creation. The main idea of the hack is
29
+ to create a node from the expression containing only a name of the global variable
30
+ and extract global entry address from NODE_GVAR u3 "leaf" */
31
+ static struct rb_global_entry *rb_global_entry(ID id)
32
+ {
33
+ NODE *node, *gvar_node;
34
+ struct rb_global_entry *gentry;
35
+ /* a) Step 1: create node from the expression consisting only from
36
+ our global variable */
37
+ node = rb_compile_string("<compiled>", rb_id2str(id), NUM2INT(1));
38
+ if (nd_type(node) != NODE_SCOPE)
39
+ {
40
+ return NULL;
41
+ }
42
+ /* b) Trace the node to the NODE_GVAR */
43
+ gvar_node = node->u2.node;
44
+ if (nd_type(gvar_node) == NODE_PRELUDE) /* Present only in 2.3 */
45
+ {
46
+ gvar_node = gvar_node->u2.node;
47
+ }
48
+ if (nd_type(gvar_node) != NODE_GVAR) /* Error: no GVAR found */
49
+ {
50
+ return NULL;
51
+ }
52
+ /* c) Get the global entry address and return its address */
53
+ gentry = gvar_node->u3.entry;
54
+ return gentry;
55
+ }
56
+ #endif
57
+
58
+
59
+ /*
60
+ * Part 2. Information about the nodes
61
+ *
62
+ */
63
+
64
+ // Pre-2.0 Ruby versions don't use this version
65
+ #if RUBY_API_VERSION_MAJOR == 2
66
+ #define USE_RB_ARGS_INFO 1
67
+ #endif
68
+
69
+ #if RUBY_API_VERSION_MAJOR == 1
70
+ #define RESET_GC_FLAGS 1
71
+ #endif
72
+
73
+
74
+ // Some generic utilities
75
+ int is_value_in_heap(VALUE val)
76
+ {
77
+ if (val == Qfalse || val == Qtrue ||
78
+ val == Qnil || val == Qundef ||
79
+ (val & FIXNUM_FLAG)
80
+ #ifdef FLONUM_MASK
81
+ || ((val & FLONUM_MASK) == FLONUM_FLAG) // This memory trick with floats is present only in 2.x
82
+ #endif
83
+ )
84
+ {
85
+ return 0;
86
+ }
87
+ else
88
+ return 1;
89
+ }
90
+
91
+
92
+ /*
93
+ * Converts Ruby string with hexadecimal number
94
+ * to the Ruby VALUE
95
+ */
96
+ VALUE str_to_value(VALUE str)
97
+ {
98
+ intptr_t ans = (intptr_t) Qnil;
99
+ sscanf(RSTRING_PTR(str), "%"PRIxPTR, &ans);
100
+ return (VALUE) ans;
101
+ }
102
+
103
+
104
+ /*
105
+ * Converts Ruby VALUE (i.e. machine address) to the
106
+ * hexadecimal Ruby string
107
+ */
108
+ VALUE value_to_str(VALUE val)
109
+ {
110
+ char str[16];
111
+ sprintf(str, "%" PRIxPTR, (intptr_t) val);
112
+ return rb_str_new2(str);
113
+ }
114
+
115
+ /*
116
+ * Converts VALUE to the sequence of bytes using big-endian
117
+ * standard. Returns number of non-zero bytes
118
+ *
119
+ * Inputs
120
+ * val -- input value
121
+ * buf -- pointer to the output buffer
122
+ * Returns
123
+ * number of written bytes
124
+ */
125
+ int value_to_bin(VALUE val, unsigned char *buf)
126
+ {
127
+ int i, len = 0;
128
+ unsigned char byte;
129
+ for (i = sizeof(VALUE) - 1; i >= 0; i--)
130
+ {
131
+ byte = (unsigned char) ((val >> (i * 8)) & 0xFF);
132
+ if (len > 0 || byte != 0)
133
+ {
134
+ *buf++ = byte;
135
+ len++;
136
+ }
137
+ }
138
+ return len;
139
+ }
140
+
141
+ /*
142
+ * Converts sequence of bytes (big-endian standard) to the VALUE.
143
+ *
144
+ * Inputs
145
+ * buf -- poiner to the input buffer
146
+ * len -- number of bytes
147
+ * Returns
148
+ * VALUE
149
+ */
150
+ VALUE bin_to_value(unsigned char *buf, int len)
151
+ {
152
+ VALUE val = (VALUE) 0;
153
+ int i;
154
+ for (i = len - 1; i >= 0; i--)
155
+ val |= ((VALUE) *buf++) << (i * 8);
156
+ return val;
157
+ }
158
+
159
+ #define NODES_CTBL_SIZE 256
160
+ static int nodes_ctbl[NODES_CTBL_SIZE * 3];
161
+
162
+
163
+ /*
164
+ * Part 3. Functions for node marshalization
165
+ */
166
+
167
+ /*
168
+ * Keeps the information about node elements position
169
+ * in the memory and its IDs/ordinals for export to the file
170
+ */
171
+ typedef struct {
172
+ VALUE vals; // values: key=>val Hash
173
+ VALUE ids; // identifiers: key=>id Hash
174
+ VALUE pos; // free identifier
175
+ } LeafTableInfo;
176
+
177
+ void LeafTableInfo_init(LeafTableInfo *lti)
178
+ {
179
+ lti->vals = rb_hash_new();
180
+ lti->ids = rb_hash_new();
181
+ lti->pos = 0;
182
+ }
183
+
184
+ void LeafTableInfo_mark(LeafTableInfo *lti)
185
+ {
186
+ rb_gc_mark(lti->vals);
187
+ rb_gc_mark(lti->ids);
188
+ }
189
+
190
+
191
+ int LeafTableInfo_addEntry(LeafTableInfo *lti, VALUE key, VALUE value)
192
+ {
193
+ VALUE v_id = rb_hash_aref(lti->ids, key);
194
+ if (v_id == Qnil)
195
+ {
196
+ int id = lti->pos++;
197
+ rb_hash_aset(lti->vals, key, value);
198
+ rb_hash_aset(lti->ids, key, INT2FIX(id));
199
+ return id;
200
+ }
201
+ else
202
+ {
203
+ return FIX2INT(v_id);
204
+ }
205
+ }
206
+
207
+ /*
208
+ * Adds Ruby ID data type as the entry to the LeafTableInfo struct.
209
+ * Main features:
210
+ * 1) ID will be converted to Fixnum
211
+ * 2) If ID can be converted to string by rb_id2str it will be saved as
212
+ String object. Otherwise it will be converted to Fixnum.
213
+ */
214
+ int LeafTableInfo_addIDEntry(LeafTableInfo *lti, ID id)
215
+ {
216
+ VALUE r_idval = rb_id2str(id);
217
+ if (TYPE(r_idval) != T_STRING)
218
+ {
219
+ r_idval = INT2FIX(id);
220
+ }
221
+ return LeafTableInfo_addEntry(lti, INT2FIX(id), r_idval);
222
+ }
223
+
224
+ VALUE LeafTableInfo_getLeavesTable(LeafTableInfo *lti)
225
+ {
226
+ VALUE key, keys = rb_funcall(lti->vals, rb_intern("keys"), 0);
227
+ unsigned int i;
228
+ VALUE val;
229
+ for (i = 0; i < lti->pos; i++)
230
+ {
231
+ key = RARRAY_PTR(keys)[i];
232
+ val = rb_hash_aref(lti->vals, key);
233
+ rb_ary_store(keys, i, val);
234
+ }
235
+ return keys;
236
+ }
237
+
238
+ int LeafTableInfo_keyToID(LeafTableInfo *lti, VALUE key)
239
+ {
240
+ VALUE id = rb_hash_aref(lti->ids, key);
241
+ return (id == Qnil) ? -1 : FIX2INT(id);
242
+ }
243
+
244
+ VALUE LeafTableInfo_keyToValue(LeafTableInfo *lti, VALUE key)
245
+ {
246
+ return rb_hash_aref(lti->vals, key);
247
+ }
248
+
249
+ /* The structure keeps information about the node
250
+ that is required for its dumping to the file
251
+ (mainly hashes with relocatable identifiers) */
252
+ typedef struct {
253
+ LeafTableInfo syms; // Node symbols
254
+ LeafTableInfo lits; // Node literals
255
+ LeafTableInfo idtabs; // Table of identifiers
256
+ #ifdef USE_RB_ARGS_INFO
257
+ LeafTableInfo args; // Table of arguments
258
+ #endif
259
+ LeafTableInfo gentries; // Global variables table
260
+ LeafTableInfo nodes; // Table of nodes
261
+ LeafTableInfo pnodes; // Table of parent nodes
262
+ } NODEInfo;
263
+
264
+ void NODEInfo_init(NODEInfo *info)
265
+ {
266
+ LeafTableInfo_init(&(info->syms));
267
+ LeafTableInfo_init(&(info->lits));
268
+ LeafTableInfo_init(&(info->idtabs));
269
+ #ifdef USE_RB_ARGS_INFO
270
+ LeafTableInfo_init(&(info->args));
271
+ #endif
272
+ LeafTableInfo_init(&(info->gentries));
273
+ LeafTableInfo_init(&(info->nodes));
274
+ LeafTableInfo_init(&(info->pnodes));
275
+ }
276
+
277
+ void NODEInfo_mark(NODEInfo *info)
278
+ {
279
+ LeafTableInfo_mark(&(info->syms));
280
+ LeafTableInfo_mark(&(info->lits));
281
+ LeafTableInfo_mark(&(info->idtabs));
282
+ #ifdef USE_RB_ARGS_INFO
283
+ LeafTableInfo_mark(&(info->args));
284
+ #endif
285
+ LeafTableInfo_mark(&(info->gentries));
286
+ LeafTableInfo_mark(&(info->nodes));
287
+ LeafTableInfo_mark(&(info->pnodes));
288
+ }
289
+
290
+ void NODEInfo_free(NODEInfo *info)
291
+ {
292
+ xfree(info);
293
+ }
294
+
295
+ LeafTableInfo *NODEInfo_getTableByID(NODEInfo *info, int id)
296
+ {
297
+ switch (id)
298
+ {
299
+ case NT_ID:
300
+ return &info->syms;
301
+ case NT_VALUE:
302
+ return &info->lits;
303
+ case NT_IDTABLE:
304
+ return &info->idtabs;
305
+ #ifdef USE_RB_ARGS_INFO
306
+ case NT_ARGS:
307
+ return &info->args;
308
+ #endif
309
+ case NT_ENTRY:
310
+ return &info->gentries;
311
+ case NT_NODE:
312
+ return &info->nodes;
313
+ default:
314
+ return NULL;
315
+ }
316
+ }
317
+
318
+ /*
319
+ * Converts node value to the binary data
320
+ * Input parameters:
321
+ * info -- current NODEInfo structure
322
+ * node -- parent node (that contains the value)
323
+ * ptr -- pointer to the output memory buffer
324
+ * type -- type of the entry (NT_...)
325
+ * value -- node->u?.value VALUE
326
+ * child_id -- child node number (1,2,3)
327
+ * Returns:
328
+ * Byte that contains the next information
329
+ * a) upper half-byte: VL_... data type (for node loader)
330
+ * b) lower half-byte: number of bytes written to the buffer
331
+ */
332
+ #define DUMP_RAW_VALUE(vl_ans, vl) (vl_ans | (value_to_bin(vl, (unsigned char *) ptr) << 4))
333
+ static int dump_node_value(NODEInfo *info, char *ptr, NODE *node, int type, VALUE value, int child_id)
334
+ {
335
+ if (type == NT_NULL || type == NT_LONG)
336
+ {
337
+ return DUMP_RAW_VALUE(VL_RAW, value);
338
+ }
339
+ else if (type == NT_NODE)
340
+ {
341
+ if (value == 0)
342
+ { // Variant a: empty node
343
+ return DUMP_RAW_VALUE(VL_RAW, value);
344
+ }
345
+ else if (nd_type(node) == NODE_ATTRASGN && value == 1 && child_id == 1)
346
+ { // Special case: "self"
347
+ return DUMP_RAW_VALUE(VL_RAW, value);
348
+ }
349
+ else if (TYPE(value) != T_NODE)
350
+ {
351
+ rb_raise(rb_eArgError, "dump_node_value, parent node %s (ADR 0x%s): child node %d (ADR 0x%s): is not a node\n"
352
+ " Type: %s (%d), Value: %s",
353
+ ruby_node_name(nd_type(node)), RSTRING_PTR(value_to_str((VALUE) node)),
354
+ child_id, RSTRING_PTR(value_to_str(value)),
355
+ RSTRING_PTR(rb_funcall(rb_funcall(value, rb_intern("class"), 0), rb_intern("to_s"), 0)),
356
+ TYPE(value),
357
+ RSTRING_PTR(rb_funcall(value, rb_intern("to_s"), 0)) );
358
+ }
359
+ else
360
+ { // Variant b: not empty node
361
+ VALUE id = LeafTableInfo_keyToID(&info->nodes, value_to_str(value));
362
+ if (id == (VALUE) -1)
363
+ {
364
+ rb_raise(rb_eArgError, "dump_node_value, parent node %s (ADR 0x%s): child node %d (ADR 0x%s) not found",
365
+ ruby_node_name(nd_type(node)), RSTRING_PTR(value_to_str((VALUE) node)),
366
+ child_id, RSTRING_PTR(value_to_str(value)));
367
+ return VL_RAW;
368
+ }
369
+ else
370
+ {
371
+ return DUMP_RAW_VALUE(VL_NODE, id);
372
+ }
373
+ return VL_NODE;
374
+ }
375
+ }
376
+ else if (type == NT_VALUE)
377
+ {
378
+ if (!is_value_in_heap(value))
379
+ { // a) value that is inside VALUE
380
+ return DUMP_RAW_VALUE(VL_RAW, value);
381
+ }
382
+ else
383
+ { // b) value that requires reference to literals table
384
+ VALUE id = LeafTableInfo_keyToID(&info->lits, value_to_str(value));
385
+ if (id == (VALUE) -1)
386
+ rb_raise(rb_eArgError, "Cannot find literal");
387
+ else
388
+ return DUMP_RAW_VALUE(VL_LIT, id);
389
+ }
390
+ }
391
+ else if (type == NT_ID)
392
+ {
393
+ ID sym = (VALUE) value; // We are working with RAW data from RAM!
394
+ VALUE id = LeafTableInfo_keyToID(&info->syms, INT2FIX(sym));
395
+ if (id == (VALUE) -1)
396
+ {
397
+ rb_raise(rb_eArgError, "Cannot find symbol ID %d (%s) (parent node %s, line %d)",
398
+ (int) sym, RSTRING_PTR(rb_id2str(ID2SYM(sym))),
399
+ ruby_node_name(nd_type(node)), nd_line(node));
400
+ return VL_RAW;
401
+ }
402
+ else
403
+ {
404
+ return DUMP_RAW_VALUE(VL_ID, id);
405
+ }
406
+ }
407
+ else if (type == NT_ENTRY || type == NT_ARGS || type == NT_IDTABLE)
408
+ {
409
+ VALUE key = value_to_str(value);
410
+ LeafTableInfo *lti = NODEInfo_getTableByID(info, type);
411
+ VALUE id = LeafTableInfo_keyToID(lti, key);
412
+ if (id == (VALUE) -1)
413
+ {
414
+ rb_raise(rb_eArgError, "Cannot find some entry");
415
+ return VL_RAW;
416
+ }
417
+ else
418
+ {
419
+ switch(type)
420
+ {
421
+ case NT_ENTRY: return DUMP_RAW_VALUE(VL_GVAR, id);
422
+ case NT_IDTABLE: return DUMP_RAW_VALUE(VL_IDTABLE, id);
423
+ case NT_ARGS: return DUMP_RAW_VALUE(VL_ARGS, id);
424
+ default: rb_raise(rb_eArgError, "Internal error");
425
+ }
426
+ }
427
+ }
428
+ else
429
+ {
430
+ rb_raise(rb_eArgError, "Unknown child node type %d", type);
431
+ }
432
+ }
433
+
434
+ /*
435
+ * Converts information about nodes to the binary string.
436
+ * It uses dump_node_value function for the low-level conversion
437
+ * of node "leaves" to the actual binary data.
438
+ *
439
+ * See load_nodes_from_str for the descrpition of the binary string format.
440
+ */
441
+ static VALUE dump_nodes(NODEInfo *info)
442
+ {
443
+ int node_size = sizeof(int) + sizeof(VALUE) * 4;
444
+ int i, nt, flags_len;
445
+ NODE *node;
446
+ char *bin, *ptr, *rtypes;
447
+ VALUE nodes_ary = rb_funcall(info->nodes.vals, rb_intern("keys"), 0);
448
+ VALUE nodes_bin = rb_str_new(NULL, RARRAY_LEN(nodes_ary) * node_size);
449
+ VALUE ut[3];
450
+ bin = RSTRING_PTR(nodes_bin);
451
+
452
+ for (i = 0, ptr = bin; i < RARRAY_LEN(nodes_ary); i++)
453
+ {
454
+ node = RNODE(str_to_value(RARRAY_PTR(nodes_ary)[i]));
455
+ nt = nd_type(node);
456
+ rtypes = (char *) ptr; ptr += sizeof(int);
457
+ flags_len = value_to_bin(node->flags >> 5, (unsigned char *) ptr); ptr += flags_len;
458
+
459
+ ut[0] = nodes_ctbl[nt * 3];
460
+ ut[1] = nodes_ctbl[nt * 3 + 1];
461
+ ut[2] = nodes_ctbl[nt * 3 + 2];
462
+ if ((nt == NODE_LASGN || nt == NODE_DASGN_CURR) && (void *) node->u2.value == (void *) -1) {
463
+ ut[1] = NT_LONG;
464
+ }
465
+ if (nt == NODE_OP_ASGN2 && LeafTableInfo_keyToID(&info->syms, INT2FIX(node->u1.value)) != -1)
466
+ {
467
+ ut[0] = NT_ID; ut[1] = NT_ID; ut[2] = NT_ID;
468
+ }
469
+
470
+ if (nt == NODE_ARGS_AUX)
471
+ {
472
+ ut[0] = NT_ID; ut[1] = NT_LONG; ut[2] = NT_NODE;
473
+ if (LeafTableInfo_keyToID(&info->syms, INT2FIX(node->u2.value)) != -1)
474
+ {
475
+ ut[1] = NT_ID;
476
+ }
477
+ else
478
+ {
479
+ ut[1] = NT_LONG;
480
+ }
481
+ if (node->u1.value == 0) ut[0] = NT_NULL;
482
+ if (node->u2.value == 0) ut[1] = NT_NULL;
483
+ if (node->u3.value == 0) ut[2] = NT_NULL;
484
+ }
485
+
486
+ if (nt = NODE_ARRAY)
487
+ {
488
+ /* Special undocumented cases:
489
+ * 1) the second child of the second element of an array
490
+ * contains reference to the last element (NT_NODE) not
491
+ * length (NT_LONG)
492
+ * 2) NODE_HASH: every second element in NODE_ARRAY chain
493
+ * contains pointers to NODES (instead of lengths)
494
+ * 3) NODE_DSTR: first node in NODE_ARRAY chain contains
495
+ * pointer to NODE (instead of lengths) */
496
+ NODE *pnode1, *pnode2;
497
+ pnode1 = (NODE *) str_to_value(LeafTableInfo_keyToValue(&info->pnodes, value_to_str((VALUE) node)));
498
+ if (pnode1 != NULL && nd_type(pnode1) == NODE_ARRAY &&
499
+ (NODE *) pnode1->u3.value == node)
500
+ {
501
+ int nt2;
502
+ pnode2 = (NODE *) str_to_value(LeafTableInfo_keyToValue(&info->pnodes, value_to_str((VALUE) pnode1)));
503
+ nt2 = nd_type(pnode2);
504
+ if ( (nt2 != NODE_ARRAY && nt2 != NODE_DSTR) ||
505
+ (NODE *) pnode2->u1.value == pnode1 )
506
+ {
507
+ ut[1] = NT_NODE;
508
+ }
509
+ else if (pnode1->u2.value == 2 && node == (NODE *) node->u2.value)
510
+ {
511
+ ut[1] = NT_NODE;
512
+ }
513
+ }
514
+ else if (pnode1 != NULL && nd_type(pnode1) == NODE_DSTR)
515
+ {
516
+ ut[1] = NT_NODE;
517
+ }
518
+ }
519
+
520
+ rtypes[0] = dump_node_value(info, ptr, node, ut[0], node->u1.value, 1);
521
+ ptr += (rtypes[0] & 0xF0) >> 4;
522
+ rtypes[1] = dump_node_value(info, ptr, node, ut[1], node->u2.value, 2);
523
+ ptr += (rtypes[1] & 0xF0) >> 4;
524
+ rtypes[2] = dump_node_value(info, ptr, node, ut[2], node->u3.value, 3);
525
+ ptr += (rtypes[2] & 0xF0) >> 4;
526
+ rtypes[3] = flags_len;
527
+ }
528
+ rb_str_resize(nodes_bin, (int) (ptr - bin) + 1);
529
+ return nodes_bin;
530
+ }
531
+
532
+
533
+ /*
534
+ * Transforms preprocessed node to Ruby hash that can be used
535
+ * to load the node from disk.
536
+ *
537
+ * See m_nodedump_to_hash function for output hash format details
538
+ */
539
+ VALUE NODEInfo_toHash(NODEInfo *info)
540
+ {
541
+ VALUE ans = rb_hash_new();
542
+ VALUE idtbl, idtabs = LeafTableInfo_getLeavesTable(&info->idtabs);
543
+ VALUE syms = LeafTableInfo_getLeavesTable(&info->syms);
544
+ VALUE args;
545
+ int i, j, id;
546
+ // Add some signatures
547
+ rb_hash_aset(ans, ID2SYM(rb_intern("MAGIC")), rb_str_new2(NODEMARSHAL_MAGIC));
548
+ rb_hash_aset(ans, ID2SYM(rb_intern("RUBY_PLATFORM")),
549
+ rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM")));
550
+ rb_hash_aset(ans, ID2SYM(rb_intern("RUBY_VERSION")),
551
+ rb_const_get(rb_cObject, rb_intern("RUBY_VERSION")));
552
+ // Write literals, symbols and global_entries arrays: they don't need to be corrected
553
+ rb_hash_aset(ans, ID2SYM(rb_intern("literals")), LeafTableInfo_getLeavesTable(&info->lits));
554
+ rb_hash_aset(ans, ID2SYM(rb_intern("symbols")), syms);
555
+ rb_hash_aset(ans, ID2SYM(rb_intern("global_entries")), LeafTableInfo_getLeavesTable(&info->gentries));
556
+ // Replace RAM IDs to disk IDs in id_tables
557
+ for (i = 0; i < RARRAY_LEN(idtabs); i++)
558
+ {
559
+ idtbl = RARRAY_PTR(idtabs)[i];
560
+ for (j = 0; j < RARRAY_LEN(idtbl); j++)
561
+ {
562
+ id = LeafTableInfo_keyToID(&info->syms, RARRAY_PTR(idtbl)[j]);
563
+
564
+ if (id == -1)
565
+ {
566
+ ID sym = FIX2INT(RARRAY_PTR(idtbl)[j]);
567
+ rb_raise(rb_eArgError, "Cannot find the symbol ID %d", (int) sym);
568
+ }
569
+ else
570
+ {
571
+ rb_ary_store(idtbl, j, INT2FIX(id));
572
+ }
573
+
574
+ }
575
+ }
576
+ rb_hash_aset(ans, ID2SYM(rb_intern("id_tables")), idtabs);
577
+ // Replace RAM IDs to disk IDs in args tables
578
+ #ifdef USE_RB_ARGS_INFO
579
+ args = LeafTableInfo_getLeavesTable(&info->args);
580
+ for (i = 0; i < RARRAY_LEN(args); i++)
581
+ {
582
+ VALUE args_entry = RARRAY_PTR(args)[i];
583
+ VALUE *eptr = RARRAY_PTR(args_entry);
584
+ int args_vals[5] = {0, 1, 7, 8, 9};
585
+ int args_ids[3] = {4, 5, 6};
586
+ if (RARRAY_LEN(args_entry) != 10)
587
+ rb_raise(rb_eArgError, "Corrupted args entry");
588
+ // Pointer to nodes to be replaced:
589
+ // a) VALUES
590
+ // (0) pre_init, (1) post_init,
591
+ // (7) kw_args, (8) kw_rest_arg, (9) opt_args
592
+ for (j = 0; j < 5; j++)
593
+ {
594
+ int ind = args_vals[j];
595
+ VALUE key = eptr[ind];
596
+ if (!strcmp(RSTRING_PTR(key), "0"))
597
+ eptr[ind] = INT2FIX(-1);
598
+ else
599
+ {
600
+ eptr[ind] = INT2FIX(LeafTableInfo_keyToID(&info->nodes, key));
601
+ if (FIX2INT(eptr[ind]) == -1)
602
+ rb_raise(rb_eArgError, "Unknown NODE in args tables");
603
+ }
604
+ }
605
+ // b) IDs (symbols)
606
+ // (4) first_post_arg (5) rest_arg (6) block_arg
607
+ for (j = 0; j < 3; j++)
608
+ {
609
+ int ind = args_ids[j];
610
+ VALUE key = eptr[ind];
611
+ if (FIX2INT(key) != 0)
612
+ {
613
+ eptr[ind] = INT2FIX(LeafTableInfo_keyToID(&info->syms, key));
614
+ if (FIX2INT(eptr[ind]) == -1)
615
+ rb_raise(rb_eArgError, "Unknown symbolic ID in args tables");
616
+ }
617
+ else
618
+ eptr[ind] = INT2FIX(-1);
619
+ }
620
+ }
621
+ #else
622
+ args = rb_ary_new();
623
+ #endif
624
+
625
+ rb_hash_aset(ans, ID2SYM(rb_intern("args")), args);
626
+ // Special case: NODES. Nodes are kept as binary string
627
+ rb_hash_aset(ans, ID2SYM(rb_intern("nodes")), dump_nodes(info));
628
+ return ans;
629
+ }
630
+
631
+
632
+ static void NODEInfo_addValue(NODEInfo *info, VALUE value)
633
+ {
634
+ if (is_value_in_heap(value))
635
+ {
636
+ VALUE lkey = value_to_str(value);
637
+ LeafTableInfo_addEntry(&info->lits, lkey, value);
638
+ }
639
+ }
640
+
641
+ /*
642
+ * Adds the information about Ruby NODE to the NODEInfo struct.
643
+ * It keeps the addresses of the node and its parents
644
+ */
645
+ static void NODEInfo_addNode(NODEInfo *info, NODE *node, NODE *pnode)
646
+ {
647
+ VALUE node_adr = value_to_str((VALUE) node);
648
+ VALUE pnode_adr = value_to_str((VALUE) pnode);
649
+ LeafTableInfo_addEntry(&info->nodes, node_adr, node_adr);
650
+ LeafTableInfo_addEntry(&info->pnodes, node_adr, pnode_adr);
651
+ }
652
+
653
+ /*
654
+ * Returns ID of the node using its address (VALUE)
655
+ * It is used during the process of dumping Ruby AST to disk
656
+ * for replacing of memory addresses into ordinals
657
+ */
658
+ static int NODEInfo_nodeAdrToID(NODEInfo *info, VALUE adr)
659
+ {
660
+ return LeafTableInfo_keyToID(&info->nodes, adr);
661
+ }
662
+
663
+ /*
664
+ * Function counts number of nodes and fills NODEInfo struct
665
+ * that is neccessary for the node saving to the HDD
666
+ */
667
+ static int count_num_of_nodes(NODE *node, NODE *parent, NODEInfo *info)
668
+ {
669
+ int ut[3], num, offset;
670
+ if (node == 0)
671
+ {
672
+ return 0;
673
+ }
674
+ else if (TYPE((VALUE) node) != T_NODE)
675
+ {
676
+ rb_raise(rb_eArgError, "count_num_of_nodes: parent node %s: child node (ADR 0x%s) is not a node; Type: %d (%s)",
677
+ ruby_node_name(nd_type(parent)), RSTRING_PTR(value_to_str((VALUE) node)), TYPE((VALUE) node),
678
+ RSTRING_PTR(rb_funcall(rb_funcall((VALUE) node, rb_intern("class"), 0), rb_intern("to_s"), 0))
679
+ );
680
+ return 0;
681
+ }
682
+ else
683
+ {
684
+ offset = nd_type(node) * 3;
685
+ ut[0] = nodes_ctbl[offset++];
686
+ ut[1] = nodes_ctbl[offset++];
687
+ ut[2] = nodes_ctbl[offset];
688
+
689
+ /* Special case: part of NODE_KW_ARG syntax in Ruby 2.x, e.g. def func(foo:, bar: 'default) */
690
+ if ((nd_type(node) == NODE_LASGN || nd_type(node) == NODE_DASGN_CURR) && (void *) node->u2.value == (void *) -1) {
691
+ ut[1] = NT_LONG; /* To keep -1 correctly */
692
+ }
693
+
694
+ /* Some another special cases */
695
+ if (nd_type(node) == NODE_OP_ASGN2 && nd_type(parent) == NODE_OP_ASGN2)
696
+ {
697
+ ut[0] = NT_ID;
698
+ ut[1] = NT_ID;
699
+ ut[2] = NT_ID;
700
+ }
701
+
702
+ /* Some Ruby 1.9.3 style function arguments (without rb_args_info) */
703
+ if (nd_type(node) == NODE_ARGS_AUX)
704
+ {
705
+ ut[0] = NT_ID;
706
+ ut[1] = (nd_type(parent) == NODE_ARGS_AUX) ? NT_LONG : NT_ID;
707
+ ut[2] = NT_NODE;
708
+
709
+ if (node->u1.value == 0) ut[0] = NT_NULL;
710
+ if (node->u2.value == 0) ut[1] = NT_NULL;
711
+ if (node->u3.value == 0) ut[2] = NT_NULL;
712
+ }
713
+ /* Some Ruby 1.9.3-specific code for NODE_ATTRASGN */
714
+ if (nd_type(node) == NODE_ATTRASGN)
715
+ {
716
+ if (node->u1.value == 1) ut[0] = NT_LONG;
717
+ }
718
+ /* Check if there is information about child nodes types */
719
+ if (ut[0] == NT_UNKNOWN || ut[1] == NT_UNKNOWN || ut[2] == NT_UNKNOWN)
720
+ {
721
+ rb_raise(rb_eArgError, "Cannot interpret node %d (%s)", nd_type(node), ruby_node_name(nd_type(node)));
722
+ }
723
+ /* Save the ID of the node */
724
+ num = 1;
725
+ NODEInfo_addNode(info, node, parent);
726
+ /* Analyze node childs */
727
+ /* a) child 1 */
728
+ if (ut[0] == NT_NODE)
729
+ {
730
+ num += count_num_of_nodes(node->u1.node, node, info);
731
+ }
732
+ else if (ut[0] == NT_ID)
733
+ {
734
+ LeafTableInfo_addIDEntry(&info->syms, node->u1.id);
735
+ }
736
+ else if (ut[0] == NT_VALUE)
737
+ {
738
+ if (TYPE(node->u1.value) == T_NODE)
739
+ rb_raise(rb_eArgError, "NODE instead of VALUE in child 1 of node %s", ruby_node_name(nd_type(node)));
740
+ NODEInfo_addValue(info, node->u1.value);
741
+ }
742
+ else if (ut[0] == NT_IDTABLE)
743
+ {
744
+ VALUE tkey = value_to_str(node->u1.value);
745
+ VALUE idtbl_ary = rb_ary_new();
746
+ ID *idtbl = (ID *) node->u1.value;
747
+ int i, size = (node->u1.value) ? *idtbl++ : 0;
748
+ for (i = 0; i < size; i++)
749
+ {
750
+ ID sym = *idtbl++;
751
+ rb_ary_push(idtbl_ary, INT2FIX(sym));
752
+ LeafTableInfo_addIDEntry(&info->syms, sym);
753
+ }
754
+ LeafTableInfo_addEntry(&info->idtabs, tkey, idtbl_ary);
755
+ }
756
+ else if (ut[0] != NT_LONG && ut[0] != NT_NULL)
757
+ {
758
+ rb_raise(rb_eArgError, "1!");
759
+ }
760
+ /* b) child 2 */
761
+ if (ut[1] == NT_NODE)
762
+ {
763
+ num += count_num_of_nodes(node->u2.node, node, info);
764
+ }
765
+ else if (ut[1] == NT_ID)
766
+ {
767
+ LeafTableInfo_addIDEntry(&info->syms, node->u2.id);
768
+ }
769
+ else if (ut[1] == NT_VALUE)
770
+ {
771
+ if (TYPE(node->u2.value) == T_NODE)
772
+ rb_raise(rb_eArgError, "NODE instead of VALUE in child 2 of node %s", ruby_node_name(nd_type(node)));
773
+ NODEInfo_addValue(info, node->u2.value);
774
+ }
775
+ else if (ut[1] != NT_LONG && ut[1] != NT_NULL)
776
+ {
777
+ rb_raise(rb_eArgError, "2!");
778
+ }
779
+
780
+ /* c) child 3 */
781
+ if (ut[2] == NT_NODE)
782
+ {
783
+ num += count_num_of_nodes(node->u3.node, node, info);
784
+ }
785
+ else if (ut[2] == NT_ID)
786
+ {
787
+ LeafTableInfo_addIDEntry(&info->syms, node->u3.id);
788
+ }
789
+ else if (ut[2] == NT_ARGS)
790
+ {
791
+ #ifdef USE_RB_ARGS_INFO
792
+ VALUE varg = Qtrue;
793
+ struct rb_args_info *ainfo;
794
+ ID asym;
795
+ ainfo = node->u3.args;
796
+ // Save child nodes
797
+ num += count_num_of_nodes(ainfo->pre_init, node, info);
798
+ num += count_num_of_nodes(ainfo->post_init, node, info);
799
+ num += count_num_of_nodes(ainfo->kw_args, node, info);
800
+ num += count_num_of_nodes(ainfo->kw_rest_arg, node, info);
801
+ num += count_num_of_nodes(ainfo->opt_args, node, info);
802
+ // Save rb_args_info structure content
803
+ varg = rb_ary_new();
804
+ rb_ary_push(varg, value_to_str((VALUE) ainfo->pre_init));
805
+ rb_ary_push(varg, value_to_str((VALUE) ainfo->post_init));
806
+ rb_ary_push(varg, INT2FIX(ainfo->pre_args_num));
807
+ rb_ary_push(varg, INT2FIX(ainfo->post_args_num));
808
+
809
+ asym = ainfo->first_post_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
810
+ if (asym != 0)
811
+ LeafTableInfo_addIDEntry(&info->syms, asym);
812
+
813
+ asym = ainfo->rest_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
814
+ if (asym != 0)
815
+ LeafTableInfo_addIDEntry(&info->syms, asym);
816
+
817
+ asym = ainfo->block_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
818
+ if (asym != 0)
819
+ LeafTableInfo_addIDEntry(&info->syms, asym);
820
+ rb_ary_push(varg, value_to_str((VALUE) ainfo->kw_args));
821
+ rb_ary_push(varg, value_to_str((VALUE) ainfo->kw_rest_arg));
822
+ rb_ary_push(varg, value_to_str((VALUE) ainfo->opt_args));
823
+
824
+ LeafTableInfo_addEntry(&info->args, value_to_str((VALUE) ainfo), varg);
825
+ #else
826
+ rb_raise(rb_eArgError, "NT_ARGS entry without USE_RB_ARGS_INFO");
827
+ #endif
828
+ }
829
+ else if (ut[2] == NT_ENTRY)
830
+ {
831
+ ID gsym = node->u3.entry->id;
832
+ // Save symbol to the symbol table
833
+ int newid = LeafTableInfo_addIDEntry(&info->syms, gsym);
834
+ LeafTableInfo_addEntry(&info->gentries, value_to_str(node->u3.value), INT2FIX(newid));
835
+ }
836
+ else if (ut[2] != NT_LONG && ut[2] != NT_NULL)
837
+ {
838
+ rb_raise(rb_eArgError, "Invalid child node 3 of node %s: TYPE %d, VALUE %"PRIxPTR,
839
+ ruby_node_name(nd_type(node)), ut[2], (uintptr_t) (node->u3.value));
840
+ }
841
+
842
+ return num;
843
+ }
844
+ }
845
+
846
+
847
+
848
+ //-------------------------------------------------------------------------
849
+
850
+ /*
851
+ * Part 4. Functions for loading marshalled nodes
852
+ */
853
+ typedef struct {
854
+ ID *syms_adr; // Table of symbols
855
+ int syms_len;
856
+
857
+ VALUE *lits_adr; // Table of literals
858
+ int lits_len;
859
+
860
+ ID **idtbls_adr; // Table of symbols tables
861
+ int idtbls_len;
862
+
863
+ struct rb_global_entry **gvars_adr; // Table of global variables entries
864
+ int gvars_len;
865
+
866
+ NODE **nodes_adr; // Table of nodes
867
+ int nodes_len;
868
+ #ifdef USE_RB_ARGS_INFO
869
+ struct rb_args_info **args_adr; // Table of code blocks arguments
870
+ int args_len;
871
+ #endif
872
+ } NODEObjAddresses;
873
+
874
+
875
+ void NODEObjAddresses_free(NODEObjAddresses *obj)
876
+ {
877
+ xfree(obj->syms_adr);
878
+ xfree(obj->idtbls_adr);
879
+ xfree(obj->gvars_adr);
880
+ xfree(obj->nodes_adr);
881
+ #ifdef USE_RB_ARGS_INFO
882
+ xfree(obj->args_adr);
883
+ #endif
884
+ xfree(obj);
885
+ }
886
+
887
+
888
+
889
+ void rbstr_printf(VALUE str, const char *fmt, ...)
890
+ {
891
+ char buf[1024];
892
+ va_list ptr;
893
+
894
+ va_start(ptr, fmt);
895
+ vsprintf(buf, fmt, ptr);
896
+ rb_str_append(str, rb_str_new2(buf));
897
+ va_end(ptr);
898
+ }
899
+
900
+ const char *symid_to_cstr(ID symid)
901
+ {
902
+ const char *str_null = "<NULL>", *str_intern = "<NONAME>";
903
+ const char *str_sym;
904
+
905
+ if (symid == 0)
906
+ str_sym = str_null;
907
+ else
908
+ {
909
+ VALUE rbstr_sym = rb_id2str(symid);
910
+ if (TYPE(rbstr_sym) == T_STRING)
911
+ str_sym = RSTRING_PTR(rb_id2str(symid));
912
+ else
913
+ str_sym = str_intern;
914
+ }
915
+ return str_sym;
916
+ }
917
+
918
+ #define PRINT_NODE_TAB for (j = 0; j < tab; j++) rbstr_printf(str, " ");
919
+ /*
920
+ * Recursively transforms node into Ruby string
921
+ * str -- output Ruby string
922
+ * node -- input Ruby NODE
923
+ * tab -- number of tabulations during print
924
+ * show_offsets -- 0/1 show/hide addresses and symbol IDs
925
+ */
926
+ static void print_node(VALUE str, NODE *node, int tab, int show_offsets)
927
+ {
928
+ int i, j, type, ut[3];
929
+ VALUE uref[3];
930
+
931
+ PRINT_NODE_TAB
932
+ if (node == NULL)
933
+ {
934
+ rbstr_printf(str, "(NULL)\n");
935
+ return;
936
+ }
937
+ type = nd_type(node);
938
+
939
+ if (show_offsets)
940
+ {
941
+ rbstr_printf(str, "@ %s | %16"PRIxPTR " | %16"PRIxPTR " %16"PRIxPTR " %16"PRIxPTR " (line %d)\n",
942
+ ruby_node_name(type),
943
+ (intptr_t) node,
944
+ (intptr_t) node->u1.value, (intptr_t) node->u2.value, (intptr_t) node->u3.value,
945
+ nd_line(node));
946
+ }
947
+ else
948
+ {
949
+ rbstr_printf(str, "@ %s (line %d)\n", ruby_node_name(type), nd_line(node));
950
+ }
951
+
952
+ ut[0] = nodes_ctbl[type * 3];
953
+ ut[1] = nodes_ctbl[type * 3 + 1];
954
+ ut[2] = nodes_ctbl[type * 3 + 2];
955
+
956
+ uref[0] = node->u1.value;
957
+ uref[1] = node->u2.value;
958
+ uref[2] = node->u3.value;
959
+
960
+ if ((type == NODE_LASGN || type == NODE_DASGN_CURR) && (void *) node->u2.value == (void *) -1)
961
+ {
962
+ ut[1] = NT_LONG;
963
+ }
964
+
965
+ for (i = 0; i < 3; i++)
966
+ {
967
+
968
+ if (ut[i] == NT_NODE)
969
+ {
970
+ if (nd_type(node) != NODE_OP_ASGN2 || i != 2)
971
+ print_node(str, RNODE(uref[i]), tab + 1, show_offsets);
972
+ else
973
+ {
974
+ if (ut[i] != 0 && TYPE(ut[i]) != T_NODE)
975
+ rb_raise(rb_eArgError, "print_node: broken node 0x%s", RSTRING_PTR(value_to_str(ut[i])));
976
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
977
+ rbstr_printf(str, "%"PRIxPTR " %"PRIxPTR " %"PRIxPTR"\n",
978
+ (intptr_t) RNODE(uref[i])->u1.value,
979
+ (intptr_t) RNODE(uref[i])->u2.value,
980
+ (intptr_t) RNODE(uref[i])->u3.value);
981
+ }
982
+ }
983
+ else if (ut[i] == NT_VALUE)
984
+ {
985
+ char *class_name = RSTRING_PTR(rb_funcall(rb_funcall(uref[i], rb_intern("class"), 0), rb_intern("to_s"), 0));
986
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
987
+ if (show_offsets)
988
+ {
989
+ rbstr_printf(str, ">| ADR: %"PRIxPTR"; CLASS: %s (TYPE %d); VALUE: %s\n",
990
+ (intptr_t) uref[i],
991
+ class_name, TYPE(uref[i]),
992
+ RSTRING_PTR(rb_funcall(uref[i], rb_intern("to_s"), 0)));
993
+ }
994
+ else
995
+ {
996
+ rbstr_printf(str, ">| CLASS: %s (TYPE %d); VALUE: %s\n",
997
+ class_name, TYPE(uref[i]),
998
+ RSTRING_PTR(rb_funcall(uref[i], rb_intern("to_s"), 0)));
999
+ }
1000
+ }
1001
+ else if (ut[i] == NT_ID)
1002
+ {
1003
+ const char *str_sym = symid_to_cstr(uref[i]);
1004
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1005
+ if (show_offsets)
1006
+ rbstr_printf(str, ">| ID: %d; SYMBOL: :%s\n", (ID) uref[i], str_sym);
1007
+ else
1008
+ rbstr_printf(str, ">| SYMBOL: :%s\n", str_sym);
1009
+ }
1010
+ else if (ut[i] == NT_LONG)
1011
+ {
1012
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1013
+ rbstr_printf(str, ">| %"PRIxPTR "\n", (intptr_t) uref[i]);
1014
+ }
1015
+ else if (ut[i] == NT_NULL)
1016
+ {
1017
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1018
+ rbstr_printf(str, ">| (NULL)\n");
1019
+ }
1020
+ else if (ut[i] == NT_ARGS)
1021
+ {
1022
+ #ifdef USE_RB_ARGS_INFO
1023
+ struct rb_args_info *ainfo;
1024
+ #endif
1025
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1026
+ rbstr_printf(str, ">| ARGS\n");
1027
+ #ifdef USE_RB_ARGS_INFO
1028
+ ainfo = node->u3.args;
1029
+ /* Print generic info about the structure */
1030
+ PRINT_NODE_TAB; rbstr_printf(str, " PRE_INIT: %16" PRIxPTR "\n", ainfo->pre_init);
1031
+ PRINT_NODE_TAB; rbstr_printf(str, " POST_INIT: %16" PRIxPTR "\n", ainfo->post_init);
1032
+ PRINT_NODE_TAB; rbstr_printf(str, " KW_ARGS: %16" PRIxPTR "\n", ainfo->kw_args);
1033
+ PRINT_NODE_TAB; rbstr_printf(str, " KW_REST_ARG: %16" PRIxPTR "\n", ainfo->kw_rest_arg);
1034
+ PRINT_NODE_TAB; rbstr_printf(str, " OPT_ARGS: %16" PRIxPTR "\n", ainfo->opt_args);
1035
+ PRINT_NODE_TAB; rbstr_printf(str, " pre_args_num: %d\n", ainfo->pre_args_num);
1036
+ PRINT_NODE_TAB; rbstr_printf(str, " post_args_num: %d\n", ainfo->post_args_num);
1037
+ /* Print information about symbols */
1038
+ if (show_offsets)
1039
+ {
1040
+ PRINT_NODE_TAB; rbstr_printf(str, " first_post_arg: %s (ID %X)\n",
1041
+ symid_to_cstr(ainfo->first_post_arg), ainfo->first_post_arg);
1042
+ PRINT_NODE_TAB; rbstr_printf(str, " rest_arg: %s (ID %X)\n",
1043
+ symid_to_cstr(ainfo->rest_arg), ainfo->rest_arg);
1044
+ PRINT_NODE_TAB; rbstr_printf(str, " block_arg: %s (ID %X)\n",
1045
+ symid_to_cstr(ainfo->block_arg), ainfo->block_arg);
1046
+ }
1047
+ else
1048
+ {
1049
+ PRINT_NODE_TAB; rbstr_printf(str, " first_post_arg: %s\n",
1050
+ symid_to_cstr(ainfo->first_post_arg));
1051
+ PRINT_NODE_TAB; rbstr_printf(str, " rest_arg: %s\n",
1052
+ symid_to_cstr(ainfo->rest_arg));
1053
+ PRINT_NODE_TAB; rbstr_printf(str, " block_arg: %s\n",
1054
+ symid_to_cstr(ainfo->block_arg));
1055
+ }
1056
+ /* Print information about child nodes */
1057
+ print_node(str, RNODE(ainfo->pre_init), tab + 2, show_offsets);
1058
+ print_node(str, RNODE(ainfo->post_init), tab + 2, show_offsets);
1059
+ print_node(str, RNODE(ainfo->kw_args), tab + 2, show_offsets);
1060
+ print_node(str, RNODE(ainfo->kw_rest_arg), tab + 2, show_offsets);
1061
+ print_node(str, RNODE(ainfo->opt_args), tab + 2, show_offsets);
1062
+ #endif
1063
+ }
1064
+ else if (ut[i] == NT_IDTABLE)
1065
+ {
1066
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1067
+ rbstr_printf(str, ">| IDTABLE\n");
1068
+ }
1069
+ else if (ut[i] == NT_ENTRY)
1070
+ {
1071
+ struct rb_global_entry *gentry;
1072
+ gentry = (struct rb_global_entry *) uref[i];
1073
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1074
+ rbstr_printf(str, ">| [GLOBAL ENTRY PTR=0x%"PRIxPTR" ID=%X]\n", (uintptr_t) gentry->var, gentry->id);
1075
+ }
1076
+ else
1077
+ {
1078
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1079
+ rbstr_printf(str, ">| [UNKNOWN]\n");
1080
+ }
1081
+ }
1082
+ }
1083
+
1084
+
1085
+
1086
+ void resolve_syms_ords(VALUE data, NODEObjAddresses *relocs)
1087
+ {
1088
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("symbols")));
1089
+ int i;
1090
+ if (tbl_val == Qnil)
1091
+ {
1092
+ rb_raise(rb_eArgError, "Cannot find symbols table");
1093
+ }
1094
+ if (TYPE(tbl_val) != T_ARRAY)
1095
+ {
1096
+ rb_raise(rb_eArgError, "Symbols table is not an array");
1097
+ }
1098
+ relocs->syms_len = RARRAY_LEN(tbl_val);
1099
+ relocs->syms_adr = ALLOC_N(ID, relocs->syms_len);
1100
+ for (i = 0; i < relocs->syms_len; i++)
1101
+ {
1102
+ VALUE r_sym = RARRAY_PTR(tbl_val)[i];
1103
+ if (TYPE(r_sym) == T_STRING)
1104
+ { /* Created symbol will be immune to garbage collector */
1105
+ relocs->syms_adr[i] = rb_intern(RSTRING_PTR(r_sym));
1106
+ }
1107
+ else if (TYPE(r_sym) == T_FIXNUM)
1108
+ {
1109
+ relocs->syms_adr[i] = (ID) FIX2INT(r_sym);
1110
+ }
1111
+ else
1112
+ {
1113
+ rb_raise(rb_eArgError, "Symbols table is corrupted");
1114
+ }
1115
+ }
1116
+ }
1117
+
1118
+ void resolve_lits_ords(VALUE data, NODEObjAddresses *relocs)
1119
+ {
1120
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("literals")));
1121
+ int i;
1122
+ if (tbl_val == Qnil)
1123
+ {
1124
+ rb_raise(rb_eArgError, "Cannot find literals table");
1125
+ }
1126
+ if (TYPE(tbl_val) != T_ARRAY)
1127
+ {
1128
+ rb_raise(rb_eArgError, "Literals table is not an array");
1129
+ }
1130
+ relocs->lits_adr = RARRAY_PTR(tbl_val);
1131
+ relocs->lits_len = RARRAY_LEN(tbl_val);
1132
+ /* Mark all symbols as "immortal" (i.e. not collectable
1133
+ by Ruby GC): some of them can be used in the syntax tree!
1134
+ See the presentation of Narihiro Nakamura, author of
1135
+ symbol GC in Ruby 2.x for details
1136
+ http://www.slideshare.net/authorNari/symbol-gc */
1137
+ for (i = 0; i < relocs->lits_len; i++)
1138
+ {
1139
+ if (TYPE(relocs->lits_adr[i]) == T_SYMBOL)
1140
+ {
1141
+ SYM2ID(relocs->lits_adr[i]);
1142
+ }
1143
+ }
1144
+ }
1145
+
1146
+ void resolve_gvars_ords(VALUE data, NODEObjAddresses *relocs)
1147
+ {
1148
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("global_entries")));
1149
+ int i;
1150
+
1151
+ if (tbl_val == Qnil)
1152
+ {
1153
+ rb_raise(rb_eArgError, "Cannot find global entries table");
1154
+ }
1155
+ if (TYPE(tbl_val) != T_ARRAY)
1156
+ {
1157
+ rb_raise(rb_eArgError, "Global entries table should be an array");
1158
+ }
1159
+ relocs->gvars_len = RARRAY_LEN(tbl_val);
1160
+ relocs->gvars_adr = ALLOC_N(struct rb_global_entry *, relocs->gvars_len);
1161
+ for (i = 0; i < relocs->gvars_len; i++)
1162
+ {
1163
+ int ind = FIX2INT(RARRAY_PTR(tbl_val)[i]);
1164
+ ID sym = relocs->syms_adr[ind];
1165
+ relocs->gvars_adr[i] = rb_global_entry(sym);
1166
+ }
1167
+ }
1168
+
1169
+
1170
+ void resolve_idtbls_ords(VALUE data, NODEObjAddresses *relocs)
1171
+ {
1172
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("id_tables")));
1173
+ int i, j, idnum;
1174
+
1175
+ if (tbl_val == Qnil)
1176
+ {
1177
+ rb_raise(rb_eArgError, "Cannot find id_tables entries");
1178
+ }
1179
+ relocs->idtbls_len = RARRAY_LEN(tbl_val);
1180
+ relocs->idtbls_adr = ALLOC_N(ID *, relocs->idtbls_len);
1181
+ for (i = 0; i < relocs->idtbls_len; i++)
1182
+ {
1183
+ VALUE idtbl = RARRAY_PTR(tbl_val)[i];
1184
+ idnum = RARRAY_LEN(idtbl);
1185
+ if (idnum == 0)
1186
+ { // Empty table: NULL pointer in the address table
1187
+ relocs->idtbls_adr[i] = NULL;
1188
+ }
1189
+ else
1190
+ { // Filled table: pointer to dynamic memory
1191
+ relocs->idtbls_adr[i] = ALLOC_N(ID, idnum + 1);
1192
+ relocs->idtbls_adr[i][0] = idnum;
1193
+ for (j = 0; j < idnum; j++)
1194
+ {
1195
+ int ind = FIX2INT(RARRAY_PTR(idtbl)[j]);
1196
+ relocs->idtbls_adr[i][j+1] = relocs->syms_adr[ind];
1197
+ }
1198
+ }
1199
+ }
1200
+ }
1201
+
1202
+ void resolve_nodes_ords(VALUE data, int num_of_nodes, NODEObjAddresses *relocs)
1203
+ {
1204
+ int i;
1205
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("nodes")));
1206
+ if (tbl_val == Qnil)
1207
+ {
1208
+ rb_raise(rb_eArgError, "Cannot find nodes entries");
1209
+ }
1210
+ if (TYPE(tbl_val) != T_STRING)
1211
+ {
1212
+ rb_raise(rb_eArgError, "Nodes description must be a string");
1213
+ }
1214
+ relocs->nodes_adr = ALLOC_N(NODE *, num_of_nodes);
1215
+ relocs->nodes_len = num_of_nodes;
1216
+ for (i = 0; i < num_of_nodes; i++)
1217
+ {
1218
+ relocs->nodes_adr[i] = (NODE *) NEW_NODE((enum node_type) 0, 0, 0, 0);
1219
+ }
1220
+ }
1221
+
1222
+ #ifdef USE_RB_ARGS_INFO
1223
+ void resolve_args_ords(VALUE data, NODEObjAddresses *relocs)
1224
+ {
1225
+ int i;
1226
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("args")));
1227
+
1228
+ if (tbl_val == Qnil)
1229
+ {
1230
+ rb_raise(rb_eArgError, "Cannot find args entries table");
1231
+ }
1232
+ if (TYPE(tbl_val) != T_ARRAY)
1233
+ {
1234
+ rb_raise(rb_eArgError, "args description must be an array");
1235
+ }
1236
+ relocs->args_len = RARRAY_LEN(tbl_val);
1237
+ relocs->args_adr = ALLOC_N(struct rb_args_info *, relocs->args_len);
1238
+ for (i = 0; i < relocs->args_len; i++)
1239
+ {
1240
+ int ord;
1241
+ VALUE ainfo_val, *aiptr;
1242
+ struct rb_args_info *ainfo;
1243
+
1244
+ relocs->args_adr[i] = ALLOC(struct rb_args_info);
1245
+ ainfo_val = RARRAY_PTR(tbl_val)[i];
1246
+ aiptr = RARRAY_PTR(ainfo_val);
1247
+ ainfo = relocs->args_adr[i];
1248
+
1249
+ if (TYPE(ainfo_val) != T_ARRAY || RARRAY_LEN(ainfo_val) != 10)
1250
+ {
1251
+ rb_raise(rb_eArgError, "args entry %d is corrupted", i);
1252
+ }
1253
+ // Load unresolved values
1254
+ ainfo->pre_init = (NODE *) (uintptr_t) FIX2LONG(aiptr[0]); // Node ordinal
1255
+ ainfo->post_init = (NODE *) (uintptr_t) FIX2LONG(aiptr[1]); // Node ordinal
1256
+ ainfo->pre_args_num = FIX2INT(aiptr[2]); // No ordinal resolving
1257
+ ainfo->post_args_num = FIX2INT(aiptr[3]); // No ordinal resolving
1258
+ ainfo->first_post_arg = FIX2INT(aiptr[4]); // Symbolic ordinal
1259
+ ainfo->rest_arg = FIX2INT(aiptr[5]); // Symbolic ordinal
1260
+ ainfo->block_arg = FIX2INT(aiptr[6]); // Symbolic ordinal
1261
+ ainfo->kw_args = (NODE *) (uintptr_t) FIX2LONG(aiptr[7]); // Node ordinal
1262
+ ainfo->kw_rest_arg = (NODE *) (uintptr_t) FIX2LONG(aiptr[8]); // Node ordinal
1263
+ ainfo->opt_args = (NODE *) (uintptr_t) FIX2LONG(aiptr[9]); // Node ordinal
1264
+ // Resolve nodes
1265
+ ord = (int) (((VALUE) ainfo->pre_init) & 0xFFFFFFFF);
1266
+ if (ord < -1 || ord >= relocs->nodes_len)
1267
+ rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1268
+ ainfo->pre_init = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1269
+
1270
+ ord = (int) (((VALUE) ainfo->post_init) & 0xFFFFFFFF);
1271
+ if (ord < -1 || ord >= relocs->nodes_len)
1272
+ rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1273
+ ainfo->post_init = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1274
+
1275
+ ord = (int) (((VALUE) ainfo->kw_args) & 0xFFFFFFFF);
1276
+ if (ord < -1 || ord >= relocs->nodes_len)
1277
+ rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1278
+ ainfo->kw_args = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1279
+
1280
+ ord = (int) (((VALUE) ainfo->kw_rest_arg) & 0xFFFFFFFF);
1281
+ if (ord < -1 || ord >= relocs->nodes_len)
1282
+ rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1283
+ ainfo->kw_rest_arg = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1284
+
1285
+ ord = (int) (((VALUE) ainfo->opt_args) & 0xFFFFFFFF);
1286
+ if (ord < -1 || ord >= relocs->nodes_len)
1287
+ rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1288
+ ainfo->opt_args = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1289
+ // Resolve symbolic ordinals
1290
+ ord = ainfo->first_post_arg;
1291
+ if (ord < -1 || ord >= relocs->syms_len)
1292
+ rb_raise(rb_eArgError, "1- Invalid symbol ID ordinal %d", ord);
1293
+ ainfo->first_post_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1294
+
1295
+ ord = ainfo->rest_arg;
1296
+ if (ord < -1 || ord >= relocs->syms_len)
1297
+ rb_raise(rb_eArgError, "2- Invalid symbol ID ordinal %d", ord);
1298
+ ainfo->rest_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1299
+
1300
+ ord = ainfo->block_arg;
1301
+ if (ord < -1 || ord >= relocs->syms_len)
1302
+ rb_raise(rb_eArgError, "3- Invalid symbol ID ordinal %d", ord);
1303
+ ainfo->block_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1304
+ }
1305
+ }
1306
+ #endif
1307
+
1308
+ /*
1309
+ * Transforms binary data with nodes descriptions into Ruby AST (i.e.
1310
+ * ternary tree of nodes). Each node is represented in the next binary format:
1311
+ *
1312
+ * [4 bytes -- pointers info] [node flags] [child ORD1] [child ORD2] [child ORD3]
1313
+ *
1314
+ * Pointers info:
1315
+ * BYTE -- child 1 info (bits 7..4 -- ordinal type, bits 3..0 -- ordinal size, bytes)
1316
+ * BYTE -- child 2 info
1317
+ * BYTE -- child 3 info
1318
+ * BYTE -- node flags length, bytes
1319
+ * Node flags:
1320
+ * node->flags field packed by bin_to_value function
1321
+ * child ORDi Ordinal of ith node child packed by bin_to_value_function
1322
+ * (it will be transformed to the real address in memory, i.e. pointer
1323
+ * or symbol ID during data loading)
1324
+ */
1325
+ void load_nodes_from_str(VALUE data, NODEObjAddresses *relocs)
1326
+ {
1327
+ int i, j;
1328
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("nodes")));
1329
+ unsigned char *bin = (unsigned char *) RSTRING_PTR(tbl_val);
1330
+ NODE *node = NULL;
1331
+ for (i = 0; i < relocs->nodes_len; i++)
1332
+ {
1333
+ int rtypes[4];
1334
+ VALUE u[3], flags;
1335
+ // Read data structure info
1336
+ for (j = 0; j < 4; j++)
1337
+ rtypes[j] = *bin++;
1338
+ flags = bin_to_value(bin, rtypes[3]); bin += rtypes[3];
1339
+ for (j = 0; j < 3; j++)
1340
+ {
1341
+ int val_len = (rtypes[j] & 0xF0) >> 4;
1342
+ u[j] = bin_to_value(bin, val_len);
1343
+ bin += val_len;
1344
+ rtypes[j] &= 0x0F;
1345
+
1346
+ }
1347
+ if ((char *)bin - RSTRING_PTR(tbl_val) > RSTRING_LEN(tbl_val))
1348
+ rb_raise(rb_eArgError, "Nodes binary dump is too short");
1349
+ // Resolving all addresses
1350
+ for (j = 0; j < 3; j++)
1351
+ {
1352
+ switch(rtypes[j])
1353
+ {
1354
+ case VL_RAW: // Do nothing: it is raw data
1355
+ break;
1356
+ case VL_NODE:
1357
+ if (u[j] >= (unsigned int) relocs->nodes_len)
1358
+ rb_raise(rb_eArgError, "Cannot resolve VL_NODE entry %d", (int) u[j]);
1359
+ u[j] = (VALUE) relocs->nodes_adr[u[j]];
1360
+ if (TYPE(u[j]) != T_NODE)
1361
+ rb_raise(rb_eArgError, "load_nodes_from_str: nodes memory corrupted");
1362
+ break;
1363
+ case VL_ID:
1364
+ if (u[j] >= (unsigned int) relocs->syms_len)
1365
+ rb_raise(rb_eArgError, "Cannot resolve VL_ID entry %d", (int) u[j]);
1366
+ u[j] = relocs->syms_adr[u[j]];
1367
+ break;
1368
+ case VL_GVAR:
1369
+ if (u[j] >= (unsigned int) relocs->gvars_len)
1370
+ rb_raise(rb_eArgError, "Cannot resolve VL_GVAR entry %d", (int) u[j]);
1371
+ u[j] = (VALUE) relocs->gvars_adr[u[j]];
1372
+ break;
1373
+ case VL_IDTABLE:
1374
+ if (u[j] >= (unsigned int) relocs->idtbls_len)
1375
+ rb_raise(rb_eArgError, "Cannot resolve VL_IDTABLE entry %d", (int) u[j]);
1376
+ u[j] = (VALUE) relocs->idtbls_adr[u[j]];
1377
+ break;
1378
+ #ifdef USE_RB_ARGS_INFO
1379
+ case VL_ARGS:
1380
+ if (u[j] >= (unsigned int) relocs->args_len)
1381
+ rb_raise(rb_eArgError, "Cannot resolve VL_ARGS entry %d", (int) u[j]);
1382
+ u[j] = (VALUE) relocs->args_adr[u[j]];
1383
+ break;
1384
+ #endif
1385
+ case VL_LIT:
1386
+ if (u[j] >= (unsigned int) relocs->lits_len)
1387
+ rb_raise(rb_eArgError, "Cannot resolve VL_LIT entry %d", (int) u[j]);
1388
+ u[j] = (VALUE) relocs->lits_adr[u[j]];
1389
+ break;
1390
+ default:
1391
+ rb_raise(rb_eArgError, "Unknown RTYPE %d", rtypes[j]);
1392
+ }
1393
+ }
1394
+
1395
+ // Fill classic node structure
1396
+ node = relocs->nodes_adr[i];
1397
+ #ifdef RESET_GC_FLAGS
1398
+ flags = flags & (~0x3); // Ruby 1.9.x -- specific thing
1399
+ #endif
1400
+ node->flags = (flags << 5) | T_NODE;
1401
+ node->nd_reserved = 0;
1402
+ node->u1.value = u[0];
1403
+ node->u2.value = u[1];
1404
+ node->u3.value = u[2];
1405
+ }
1406
+ }
1407
+
1408
+ /*
1409
+ * Returns the value of string hash field using symbolic key
1410
+ */
1411
+ static VALUE get_hash_strfield(VALUE hash, const char *idtxt)
1412
+ {
1413
+ VALUE str = rb_hash_aref(hash, ID2SYM(rb_intern(idtxt)));
1414
+ if (TYPE(str) != T_STRING)
1415
+ {
1416
+ rb_raise(rb_eArgError, "Hash field %s is not a string", idtxt);
1417
+ return Qnil;
1418
+ }
1419
+ else
1420
+ {
1421
+ return str;
1422
+ }
1423
+ }
1424
+
1425
+ /*
1426
+ * Check validity of node hash representation signatures ("magic" values)
1427
+ */
1428
+ static VALUE check_hash_magic(VALUE data)
1429
+ {
1430
+ VALUE val, refval;
1431
+ // MAGIC signature must be valid
1432
+ val = get_hash_strfield(data, "MAGIC");
1433
+ if (strcmp(NODEMARSHAL_MAGIC, RSTRING_PTR(val)))
1434
+ rb_raise(rb_eArgError, "Bad value of MAGIC signature");
1435
+ // RUBY_PLATFORM signature must match the current platform
1436
+ val = get_hash_strfield(data, "RUBY_PLATFORM");
1437
+ refval = rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM"));
1438
+ if (strcmp(RSTRING_PTR(refval), RSTRING_PTR(val)))
1439
+ rb_raise(rb_eArgError, "Incompatible RUBY_PLATFORM value %s", RSTRING_PTR(val));
1440
+ // RUBY_VERSION signature must match the used Ruby interpreter
1441
+ val = get_hash_strfield(data, "RUBY_VERSION");
1442
+ refval = rb_const_get(rb_cObject, rb_intern("RUBY_VERSION"));
1443
+ if (strcmp(RSTRING_PTR(refval), RSTRING_PTR(val)))
1444
+ rb_raise(rb_eArgError, "Incompatible RUBY_VERSION value %s", RSTRING_PTR(val));
1445
+ return Qtrue;
1446
+ }
1447
+
1448
+ /*
1449
+ * Part 5. C-to-Ruby interface
1450
+ *
1451
+ */
1452
+
1453
+ /*
1454
+ * Restore Ruby node from the binary blob (dump)
1455
+ */
1456
+ static VALUE m_nodedump_from_memory(VALUE self, VALUE dump)
1457
+ {
1458
+ VALUE cMarshal, data, val, val_relocs;
1459
+ VALUE gc_was_disabled;
1460
+ int num_of_nodes;
1461
+ NODEObjAddresses *relocs;
1462
+ /* DISABLE GARBAGE COLLECTOR (required for stable loading
1463
+ of large node trees */
1464
+ gc_was_disabled = rb_gc_disable();
1465
+ /* Wrap struct for relocations */
1466
+ val_relocs = Data_Make_Struct(cNodeObjAddresses, NODEObjAddresses,
1467
+ NULL, NODEObjAddresses_free, relocs); // This data envelope cannot exist without NODE
1468
+ /* Load and unpack our dump */
1469
+ cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
1470
+ data = rb_funcall(cMarshal, rb_intern("load"), 1, dump);
1471
+ if (TYPE(data) != T_HASH)
1472
+ {
1473
+ rb_raise(rb_eArgError, "Input dump is corrupted");
1474
+ }
1475
+ val = rb_hash_aref(data, ID2SYM(rb_intern("num_of_nodes")));
1476
+ if (val == Qnil)
1477
+ {
1478
+ rb_raise(rb_eArgError, "num_of_nodes not found");
1479
+ }
1480
+ else
1481
+ {
1482
+ num_of_nodes = FIX2INT(val);
1483
+ }
1484
+ /* Check "magic" signature and platform identifiers */
1485
+ check_hash_magic(data);
1486
+ /* Get the information about the source file that was compiled to the node */
1487
+ // a) node name
1488
+ val = rb_hash_aref(data, ID2SYM(rb_intern("nodename")));
1489
+ if (val == Qnil || TYPE(val) == T_STRING)
1490
+ rb_iv_set(self, "@nodename", val);
1491
+ else
1492
+ rb_raise(rb_eArgError, "nodename value is corrupted");
1493
+ // b) file name
1494
+ val = rb_hash_aref(data, ID2SYM(rb_intern("filename")));
1495
+ if (val == Qnil || TYPE(val) == T_STRING)
1496
+ rb_iv_set(self, "@filename", val);
1497
+ else
1498
+ rb_raise(rb_eArgError, "filename value is corrupted");
1499
+ // c) file path
1500
+ val = rb_hash_aref(data, ID2SYM(rb_intern("filepath")));
1501
+ if (val == Qnil || TYPE(val) == T_STRING)
1502
+ rb_iv_set(self, "@filepath", val);
1503
+ else
1504
+ rb_raise(rb_eArgError, "filepath value is corrupted");
1505
+ /* Load all required data */
1506
+ resolve_syms_ords(data, relocs); // Symbols
1507
+ resolve_lits_ords(data, relocs); // Literals
1508
+ resolve_gvars_ords(data, relocs); // Global entries (with symbol ID resolving)
1509
+ resolve_idtbls_ords(data, relocs); // Identifiers tables (with symbol ID resolving)
1510
+ resolve_nodes_ords(data, num_of_nodes, relocs); // Allocate memory for all nodes
1511
+ #ifdef USE_RB_ARGS_INFO
1512
+ resolve_args_ords(data, relocs); // Load args entries with symbols ID and nodes resolving
1513
+ #endif
1514
+ load_nodes_from_str(data, relocs);
1515
+ /* Save the loaded node tree and collect garbage */
1516
+ rb_iv_set(self, "@node", (VALUE) relocs->nodes_adr[0]);
1517
+ rb_iv_set(self, "@num_of_nodes", INT2FIX(num_of_nodes));
1518
+ rb_iv_set(self, "@obj_addresses", val_relocs);
1519
+ if (gc_was_disabled == Qfalse)
1520
+ {
1521
+ rb_gc_enable();
1522
+ rb_gc_start();
1523
+ }
1524
+ return self;
1525
+ }
1526
+
1527
+
1528
+ /*
1529
+ * call-seq:
1530
+ * obj.symbols
1531
+ *
1532
+ * Return array with the list of symbols
1533
+ */
1534
+ static VALUE m_nodedump_symbols(VALUE self)
1535
+ {
1536
+ int i;
1537
+ VALUE val_relocs, val_nodeinfo, syms;
1538
+ // Variant 1: node loaded from file
1539
+ val_relocs = rb_iv_get(self, "@obj_addresses");
1540
+ if (val_relocs != Qnil)
1541
+ {
1542
+ NODEObjAddresses *relocs;
1543
+ Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
1544
+ syms = rb_ary_new();
1545
+ for (i = 0; i < relocs->syms_len; i++)
1546
+ rb_ary_push(syms, ID2SYM(relocs->syms_adr[i]));
1547
+ return syms;
1548
+ }
1549
+ // Variant 2: node saved to file (parsed from memory)
1550
+ val_nodeinfo = rb_iv_get(self, "@nodeinfo");
1551
+ if (val_nodeinfo != Qnil)
1552
+ {
1553
+ NODEInfo *ninfo;
1554
+ VALUE *ary;
1555
+ Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
1556
+ syms = rb_funcall(ninfo->syms.vals, rb_intern("values"), 0);
1557
+ ary = RARRAY_PTR(syms);
1558
+ for (i = 0; i < RARRAY_LEN(syms); i++)
1559
+ {
1560
+ ary[i] = rb_funcall(ary[i], rb_intern("to_sym"), 0);
1561
+ }
1562
+ return syms;
1563
+ }
1564
+ rb_raise(rb_eArgError, "Symbol information not initialized. Run to_hash before reading.");
1565
+ }
1566
+
1567
+ /*
1568
+ * call-seq:
1569
+ * obj.change_symbol(old_sym, new_sym)
1570
+ *
1571
+ * Replace one symbol by another (to be used for code obfuscation)
1572
+ * - +old_sym+ -- String that contains symbol name to be replaced
1573
+ * - +new_sym+ -- String that contains new name of the symbol
1574
+ */
1575
+ static VALUE m_nodedump_change_symbol(VALUE self, VALUE old_sym, VALUE new_sym)
1576
+ {
1577
+ VALUE val_nodehash = rb_iv_get(self, "@nodehash");
1578
+ VALUE syms, key;
1579
+ // Check if node is position-independent
1580
+ // (i.e. with initialized NODEInfo structure that contains
1581
+ // relocations for symbols)
1582
+ if (val_nodehash == Qnil)
1583
+ rb_raise(rb_eArgError, "This node is not preparsed into Hash");
1584
+ // Check data types of the input array
1585
+ if (TYPE(old_sym) != T_STRING)
1586
+ {
1587
+ rb_raise(rb_eArgError, "old_sym argument must be a string");
1588
+ }
1589
+ if (TYPE(new_sym) != T_STRING)
1590
+ {
1591
+ rb_raise(rb_eArgError, "new_sym argument must be a string");
1592
+ }
1593
+ // Get the symbol table from the Hash
1594
+ syms = rb_hash_aref(val_nodehash, ID2SYM(rb_intern("symbols")));
1595
+ if (syms == Qnil)
1596
+ rb_raise(rb_eArgError, "Preparsed hash has no :symbols field");
1597
+ // Check if new_sym is present in the symbol table
1598
+ key = rb_funcall(syms, rb_intern("find_index"), 1, new_sym);
1599
+ if (key != Qnil)
1600
+ {
1601
+ rb_raise(rb_eArgError, "new_sym value must be absent in table of symbols");
1602
+ }
1603
+ // Change the symbol in the preparsed Hash
1604
+ key = rb_funcall(syms, rb_intern("find_index"), 1, old_sym);
1605
+ if (key == Qnil)
1606
+ return Qnil;
1607
+ RARRAY_PTR(syms)[FIX2INT(key)] = new_sym;
1608
+ return self;
1609
+ }
1610
+
1611
+ /*
1612
+ * Return array with the list of literals
1613
+ */
1614
+ static VALUE m_nodedump_literals(VALUE self)
1615
+ {
1616
+ int i;
1617
+ VALUE val_relocs, val_nodeinfo, lits;
1618
+ // Variant 1: node loaded from file. It uses NODEObjAddresses struct
1619
+ // with the results of Ruby NODE structure parsing.
1620
+ val_relocs = rb_iv_get(self, "@obj_addresses");
1621
+ if (val_relocs != Qnil)
1622
+ {
1623
+ NODEObjAddresses *relocs;
1624
+
1625
+ Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
1626
+ lits = rb_ary_new();
1627
+ for (i = 0; i < relocs->lits_len; i++)
1628
+ {
1629
+ VALUE val = relocs->lits_adr[i];
1630
+ int t = TYPE(val);
1631
+ if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
1632
+ val = rb_funcall(val, rb_intern("dup"), 0);
1633
+ rb_ary_push(lits, val);
1634
+ }
1635
+ return lits;
1636
+ }
1637
+ // Variant 2: node saved to file (parsed from memory). It uses
1638
+ // NODEInfo struct that is initialized during node dump parsing.
1639
+ val_nodeinfo = rb_iv_get(self, "@nodeinfo");
1640
+ if (val_nodeinfo != Qnil)
1641
+ {
1642
+ NODEInfo *ninfo;
1643
+ VALUE *ary;
1644
+ Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
1645
+ lits = rb_funcall(ninfo->lits.vals, rb_intern("values"), 0);
1646
+ ary = RARRAY_PTR(lits);
1647
+ for (i = 0; i < RARRAY_LEN(lits); i++)
1648
+ {
1649
+ int t = TYPE(ary[i]);
1650
+ if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
1651
+ ary[i] = rb_funcall(ary[i], rb_intern("dup"), 0);
1652
+ }
1653
+ return lits;
1654
+ }
1655
+ rb_raise(rb_eArgError, "Literals information not initialized. Run to_hash before reading.");
1656
+ }
1657
+
1658
+ /*
1659
+ * Update the array with the list of literals
1660
+ * (to be used for code obfuscation)
1661
+ * Warning! This function is a stub!
1662
+ */
1663
+ static VALUE m_nodedump_change_literal(VALUE self, VALUE old_lit, VALUE new_lit)
1664
+ {
1665
+ /* TO BE IMPLEMENTED */
1666
+ return self;
1667
+ }
1668
+
1669
+
1670
+ /*
1671
+ * call-seq:
1672
+ * obj.compile
1673
+ *
1674
+ * Creates the RubyVM::InstructionSequence object from the node
1675
+ */
1676
+ static VALUE m_nodedump_compile(VALUE self)
1677
+ {
1678
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
1679
+ VALUE nodename = rb_iv_get(self, "@nodename");
1680
+ VALUE filename = rb_iv_get(self, "@filename");
1681
+ VALUE filepath = rb_iv_get(self, "@filepath");
1682
+ #ifndef WITH_RB_ISEQW_NEW
1683
+ /* For Pre-2.3 */
1684
+ return rb_iseq_new_top(node, nodename, filename, filepath, Qfalse);
1685
+ #else
1686
+ /* For Ruby 2.3 */
1687
+ return rb_iseqw_new(rb_iseq_new_top(node, nodename, filename, filepath, Qfalse));
1688
+ #endif
1689
+ }
1690
+
1691
+ /*
1692
+ * Parses Ruby file with the source code and saves the node
1693
+ */
1694
+ static VALUE m_nodedump_from_source(VALUE self, VALUE file)
1695
+ {
1696
+ VALUE line = INT2FIX(1), f, node, filepath, gc_was_disabled;
1697
+ const char *fname;
1698
+
1699
+ gc_was_disabled = rb_gc_disable();
1700
+ rb_secure(1);
1701
+ FilePathValue(file);
1702
+ fname = StringValueCStr(file);
1703
+ /* Remember information about the file */
1704
+ rb_iv_set(self, "@nodename", rb_str_new2("<main>"));
1705
+ rb_iv_set(self, "@filename", file);
1706
+ filepath = rb_funcall(rb_cFile, rb_intern("realpath"), 1, file); // Envelope for rb_realpath_internal
1707
+ rb_iv_set(self, "@filepath", filepath);
1708
+ /* Create node from the source */
1709
+ f = rb_file_open_str(file, "r");
1710
+ node = (VALUE) rb_compile_file(fname, f, NUM2INT(line));
1711
+ rb_iv_set(self, "@node", node);
1712
+ if ((void *) node == NULL)
1713
+ {
1714
+ rb_raise(rb_eArgError, "Error during string parsing");
1715
+ }
1716
+ if (gc_was_disabled == Qfalse)
1717
+ {
1718
+ rb_gc_enable();
1719
+ }
1720
+ return self;
1721
+ }
1722
+
1723
+ /*
1724
+ * Parses Ruby string with the source code and saves the node
1725
+ */
1726
+ static VALUE m_nodedump_from_string(VALUE self, VALUE str)
1727
+ {
1728
+ VALUE line = INT2FIX(1), node, gc_was_disabled;
1729
+ const char *fname = "STRING";
1730
+ Check_Type(str, T_STRING);
1731
+ gc_was_disabled = rb_gc_disable();
1732
+ rb_secure(1);
1733
+ /* Create empty information about the file */
1734
+ rb_iv_set(self, "@nodename", rb_str_new2("<main>"));
1735
+ if (RUBY_API_VERSION_MAJOR == 1)
1736
+ { /* For Ruby 1.9.x */
1737
+ rb_iv_set(self, "@filename", Qnil);
1738
+ rb_iv_set(self, "@filepath", Qnil);
1739
+ }
1740
+ else
1741
+ { /* For Ruby 2.x */
1742
+ rb_iv_set(self, "@filename", rb_str_new2("<compiled>"));
1743
+ rb_iv_set(self, "@filepath", rb_str_new2("<compiled>"));
1744
+ }
1745
+ /* Create node from the string */
1746
+ node = (VALUE) rb_compile_string(fname, str, NUM2INT(line));
1747
+ rb_iv_set(self, "@node", node);
1748
+ if (gc_was_disabled == Qfalse)
1749
+ {
1750
+ rb_gc_enable();
1751
+ rb_gc_start();
1752
+ }
1753
+ if ((void *) node == NULL)
1754
+ {
1755
+ rb_raise(rb_eArgError, "Error during string parsing");
1756
+ }
1757
+ return self;
1758
+ }
1759
+
1760
+ /*
1761
+ * call-seq:
1762
+ * obj.new(:srcfile, filename) # Will load source file from the disk
1763
+ * obj.new(:binfile, filename) # Will load file with node binary dump from the disk
1764
+ * obj.new(:srcmemory, srcstr) # Will load source code from the string
1765
+ * obj.new(:binmemory, binstr) # Will load node binary dump from the string
1766
+ *
1767
+ * Creates NodeMarshal class example from the source code or dumped
1768
+ * syntax tree (NODEs), i.e. preparsed and packed source code. Created
1769
+ * object can be used either for code execution or for saving it
1770
+ * in the preparsed form (useful for code obfuscation/protection)
1771
+ */
1772
+ static VALUE m_nodedump_init(VALUE self, VALUE source, VALUE info)
1773
+ {
1774
+ ID id_usr;
1775
+ rb_iv_set(self, "@show_offsets", Qfalse);
1776
+ Check_Type(source, T_SYMBOL);
1777
+ id_usr = SYM2ID(source);
1778
+ if (id_usr == rb_intern("srcfile"))
1779
+ {
1780
+ return m_nodedump_from_source(self, info);
1781
+ }
1782
+ else if (id_usr == rb_intern("srcmemory"))
1783
+ {
1784
+ return m_nodedump_from_string(self, info);
1785
+ }
1786
+ else if (id_usr == rb_intern("binmemory"))
1787
+ {
1788
+ return m_nodedump_from_memory(self, info);
1789
+ }
1790
+ else if (id_usr == rb_intern("binfile"))
1791
+ {
1792
+ VALUE cFile = rb_const_get(rb_cObject, rb_intern("File"));
1793
+ VALUE bin = rb_funcall(cFile, rb_intern("binread"), 1, info);
1794
+ return m_nodedump_from_memory(self, bin);
1795
+ }
1796
+ else
1797
+ {
1798
+ rb_raise(rb_eArgError, "Invalid source type (it must be :srcfile, :srcmemory, :binmemory of :binfile)");
1799
+ }
1800
+ return Qnil;
1801
+ }
1802
+
1803
+ /*
1804
+ * call-seq:
1805
+ * obj.dump_tree
1806
+ *
1807
+ * Transforms Ruby syntax tree (NODE) to the String using
1808
+ * +rb_parser_dump_tree+ function from +node.c+ (see Ruby source code).
1809
+ */
1810
+ static VALUE m_nodedump_parser_dump_tree(VALUE self)
1811
+ {
1812
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
1813
+ return rb_parser_dump_tree(node, 0);
1814
+ }
1815
+
1816
+ /*
1817
+ * call-seq:
1818
+ * obj.dump_tree_short
1819
+ *
1820
+ * Transforms Ruby syntax tree (NODE) to the String using custom function
1821
+ * instead of +rb_parser_dump_tree+ function.
1822
+ *
1823
+ * See also #show_offsets, #show_offsets=
1824
+ */
1825
+ static VALUE m_nodedump_dump_tree_short(VALUE self)
1826
+ {
1827
+ VALUE str = rb_str_new2(""); // Output string
1828
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
1829
+ int show_offsets = (rb_iv_get(self, "@show_offsets") == Qtrue) ? 1 : 0;
1830
+ print_node(str, node, 0, show_offsets);
1831
+ return str;
1832
+ }
1833
+
1834
+ /*
1835
+ * call-seq:
1836
+ * obj.show_offsets
1837
+ *
1838
+ * Returns show_offsets property (used by NodeMarshal#dump_tree_short)
1839
+ * It can be either true or false
1840
+ */
1841
+ static VALUE m_nodedump_show_offsets(VALUE self)
1842
+ {
1843
+ return rb_iv_get(self, "@show_offsets");
1844
+ }
1845
+
1846
+ /*
1847
+ * call-seq:
1848
+ * obj.show_offsets=
1849
+ *
1850
+ * Sets show_offsets property (used by NodeMarshal#dump_tree_short)
1851
+ * It can be either true or false
1852
+ */
1853
+ static VALUE m_nodedump_set_show_offsets(VALUE self, VALUE value)
1854
+ {
1855
+ if (value != Qtrue && value != Qfalse)
1856
+ {
1857
+ rb_raise(rb_eArgError, "show_offsets property must be either true or false");
1858
+ }
1859
+ return rb_iv_set(self, "@show_offsets", value);
1860
+ }
1861
+
1862
+
1863
+ /*
1864
+ * call-seq:
1865
+ * obj.to_hash
1866
+ *
1867
+ * Converts NodeMarshal class example to the hash that contains full
1868
+ * and independent from data structures memory addresses information.
1869
+ * Format of the obtained hash depends on used platform (especially
1870
+ * size of the pointer) and Ruby version.
1871
+ *
1872
+ * <b>Format of the hash</b>
1873
+ *
1874
+ * <i>Part 1: Signatures</i>
1875
+ *
1876
+ * - <tt>MAGIC</tt> -- NODEMARSHAL11
1877
+ * - <tt>RUBY_PLATFORM</tt> -- saved <tt>RUBY_PLATFORM</tt> constant value
1878
+ * - <tt>RUBY_VERSION</tt> -- saved <tt>RUBY_VERSION</tt> constant value
1879
+ *
1880
+ * <i>Part 2: Program loadable elements.</i>
1881
+ *
1882
+ * All loadable elements are arrays. Index of the array element means
1883
+ * its identifier that is used in the node tree.
1884
+ *
1885
+ * - <tt>literals</tt> -- program literals (strings, ranges etc.)
1886
+ * - <tt>symbols</tt> -- program symbols (values have either String or Fixnum
1887
+ * data type; numbers are used for symbols that cannot be represented as strings)
1888
+ * - <tt>global_entries</tt> -- global variables information
1889
+ * - <tt>id_tables</tt> -- array of arrays. Each array contains symbols IDs
1890
+ * - <tt>args</tt> -- information about code block argument(s)
1891
+ *
1892
+ * <i>Part 3: Nodes information</i>
1893
+ * - <tt>nodes</tt> -- string that contains binary encoded information
1894
+ * about the nodes
1895
+ * - <tt>num_of_nodes</tt> -- number of nodes in the <tt>nodes</tt> field
1896
+ * - <tt>nodename</tt> -- name of the node (usually "<main>")
1897
+ * - <tt>filename</tt> -- name (without path) of .rb file used for the node generation
1898
+ * - <tt>filepath</tt> -- name (with full path) of .rb file used for the node generation
1899
+ */
1900
+ static VALUE m_nodedump_to_hash(VALUE self)
1901
+ {
1902
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
1903
+ NODEInfo *info;
1904
+ VALUE ans, num, val_info, gc_was_disabled;
1905
+ // DISABLE GARBAGE COLLECTOR (important for dumping)
1906
+ gc_was_disabled = rb_gc_disable();
1907
+ // Convert the node to the form with relocs (i.e. the information about node)
1908
+ // if such form is not present
1909
+ val_info = rb_iv_get(self, "@nodeinfo");
1910
+ if (val_info == Qnil)
1911
+ {
1912
+ val_info = Data_Make_Struct(cNodeInfo, NODEInfo,
1913
+ NODEInfo_mark, NODEInfo_free, info); // This data envelope cannot exist without NODE
1914
+ NODEInfo_init(info);
1915
+ rb_iv_set(self, "@nodeinfo", val_info);
1916
+ num = INT2FIX(count_num_of_nodes(node, node, info));
1917
+ rb_iv_set(self, "@nodeinfo_num_of_nodes", num);
1918
+ // Convert node to NODEInfo structure
1919
+ ans = NODEInfo_toHash(info);
1920
+ rb_hash_aset(ans, ID2SYM(rb_intern("num_of_nodes")), num);
1921
+ rb_hash_aset(ans, ID2SYM(rb_intern("nodename")), rb_iv_get(self, "@nodename"));
1922
+ rb_hash_aset(ans, ID2SYM(rb_intern("filename")), rb_iv_get(self, "@filename"));
1923
+ rb_hash_aset(ans, ID2SYM(rb_intern("filepath")), rb_iv_get(self, "@filepath"));
1924
+ rb_iv_set(self, "@nodehash", ans);
1925
+ }
1926
+ else
1927
+ {
1928
+ ans = rb_iv_get(self, "@nodehash");
1929
+ }
1930
+ // ENABLE GARBAGE COLLECTOR (important for dumping)
1931
+ if (gc_was_disabled == Qfalse)
1932
+ {
1933
+ rb_gc_enable();
1934
+ }
1935
+ return ans;
1936
+ }
1937
+
1938
+
1939
+ VALUE m_node_to_ary(NODE *node)
1940
+ {
1941
+ int i, type, ut[3];
1942
+ VALUE uref[3];
1943
+ VALUE entry = rb_ary_new();
1944
+ /* Special case: NULL node */
1945
+ if (node == NULL)
1946
+ {
1947
+ return Qnil;
1948
+ }
1949
+ /* Save node name */
1950
+ type = nd_type(node);
1951
+ rb_ary_push(entry, ID2SYM(rb_intern(ruby_node_name(type))));
1952
+
1953
+ ut[0] = nodes_ctbl[type * 3];
1954
+ ut[1] = nodes_ctbl[type * 3 + 1];
1955
+ ut[2] = nodes_ctbl[type * 3 + 2];
1956
+
1957
+ uref[0] = node->u1.value;
1958
+ uref[1] = node->u2.value;
1959
+ uref[2] = node->u3.value;
1960
+
1961
+
1962
+ for (i = 0; i < 3; i++)
1963
+ {
1964
+ if (ut[i] == NT_NODE)
1965
+ {
1966
+ if (nd_type(node) != NODE_OP_ASGN2 || i != 2)
1967
+ {
1968
+ rb_ary_push(entry, m_node_to_ary(RNODE(uref[i])));
1969
+ }
1970
+ else
1971
+ {
1972
+ VALUE child = rb_ary_new();
1973
+ if (ut[i] != 0 && TYPE(ut[i]) != T_NODE)
1974
+ rb_raise(rb_eArgError, "print_node: broken node 0x%s", RSTRING_PTR(value_to_str(ut[i])));
1975
+ rb_ary_push(child, ID2SYM(rb_intern("NODE_OP_ASGN2")));
1976
+ rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u1.value));
1977
+ rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u2.value));
1978
+ rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u3.value));
1979
+ rb_ary_push(entry, child);
1980
+ }
1981
+ }
1982
+ else if (ut[i] == NT_VALUE)
1983
+ {
1984
+ rb_ary_push(entry, uref[i]);
1985
+ }
1986
+ else if (ut[i] == NT_ID)
1987
+ {
1988
+ rb_ary_push(entry, ID2SYM( (ID) uref[i]));
1989
+ }
1990
+ else if (ut[i] == NT_LONG)
1991
+ {
1992
+ rb_ary_push(entry, LONG2NUM( (intptr_t) uref[i]));
1993
+ }
1994
+ else if (ut[i] == NT_NULL)
1995
+ {
1996
+ rb_ary_push(entry, Qnil);
1997
+ }
1998
+ else if (ut[i] == NT_ARGS)
1999
+ {
2000
+ VALUE rargs = rb_hash_new();
2001
+ VALUE rargs_env = rb_ary_new();
2002
+ #ifdef USE_RB_ARGS_INFO
2003
+ ID id;
2004
+ struct rb_args_info *args = (void *) uref[i];
2005
+
2006
+ rb_hash_aset(rargs, ID2SYM(rb_intern("pre_init")), m_node_to_ary(args->pre_init));
2007
+ rb_hash_aset(rargs, ID2SYM(rb_intern("post_init")), m_node_to_ary(args->post_init));
2008
+
2009
+ id = args->first_post_arg;
2010
+ rb_hash_aset(rargs, ID2SYM(rb_intern("first_post_arg")), (id) ? ID2SYM(id) : Qnil);
2011
+ id = args->rest_arg;
2012
+ rb_hash_aset(rargs, ID2SYM(rb_intern("rest_arg")), (id) ? ID2SYM(id) : Qnil);
2013
+ id = args->block_arg;
2014
+ rb_hash_aset(rargs, ID2SYM(rb_intern("block_arg")), (id) ? ID2SYM(id) : Qnil);
2015
+
2016
+ rb_hash_aset(rargs, ID2SYM(rb_intern("kw_args")), m_node_to_ary(args->kw_args));
2017
+ rb_hash_aset(rargs, ID2SYM(rb_intern("kw_rest_arg")), m_node_to_ary(args->kw_rest_arg));
2018
+ rb_hash_aset(rargs, ID2SYM(rb_intern("opt_args")), m_node_to_ary(args->opt_args));
2019
+ #endif
2020
+ rb_ary_push(rargs_env, ID2SYM(rb_intern("ARGS")));
2021
+ rb_ary_push(rargs_env, rargs);
2022
+ rb_ary_push(entry, rargs_env);
2023
+ }
2024
+ else if (ut[i] == NT_IDTABLE)
2025
+ {
2026
+ VALUE ridtbl = rb_ary_new();
2027
+ VALUE idtbl_ary = rb_ary_new();
2028
+ int j, len;
2029
+
2030
+ ID *idtbl = (ID *) uref[i];
2031
+ len = (uref[i]) ? *idtbl++ : 0;
2032
+ for (j = 0; j < len; j++)
2033
+ {
2034
+ ID sym = *idtbl++;
2035
+ VALUE val = ID2SYM(sym);
2036
+ rb_ary_push(idtbl_ary, val);
2037
+ }
2038
+ rb_ary_push(ridtbl, ID2SYM(rb_intern("IDTABLE")));
2039
+ rb_ary_push(ridtbl, idtbl_ary);
2040
+ rb_ary_push(entry, ridtbl);
2041
+ }
2042
+ else if (ut[i] == NT_ENTRY)
2043
+ {
2044
+ struct rb_global_entry *gentry;
2045
+ gentry = (struct rb_global_entry *) uref[i];
2046
+ rb_ary_push(entry, ID2SYM(gentry->id));
2047
+ }
2048
+ else
2049
+ {
2050
+ rb_ary_push(entry, ID2SYM(rb_intern("UNKNOWN")));
2051
+ }
2052
+ }
2053
+ return entry;
2054
+ }
2055
+
2056
+ /*
2057
+ * call-seq:
2058
+ * obj.to_a
2059
+ *
2060
+ * Converts node to the array (mainly to allow exploration of AST
2061
+ * by the user). It shows information about rb_args_info and
2062
+ * ID *tbl that are not displayed by NodeMarshal#dump_tree and
2063
+ * NodeMarshal#dump_tree_short.
2064
+ */
2065
+ static VALUE m_nodedump_to_a(VALUE self)
2066
+ {
2067
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
2068
+ VALUE gc_was_disabled = rb_gc_disable();
2069
+ VALUE ary = m_node_to_ary(node);
2070
+ if (gc_was_disabled == Qfalse)
2071
+ {
2072
+ rb_gc_enable();
2073
+ }
2074
+ return ary;
2075
+ }
2076
+
2077
+
2078
+ /*
2079
+ * call-seq:
2080
+ * obj.to_bin
2081
+ *
2082
+ * Converts NodeMarshal class example to the binary string that
2083
+ * can be saved to the file and used for loading the node from the file.
2084
+ * Format of the obtained binary dump depends on used platform (especially
2085
+ * size of the pointer) and Ruby version.
2086
+ */
2087
+ static VALUE m_nodedump_to_bin(VALUE self)
2088
+ {
2089
+ VALUE hash = m_nodedump_to_hash(self);
2090
+ VALUE cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
2091
+ return rb_funcall(cMarshal, rb_intern("dump"), 1, hash);
2092
+ }
2093
+
2094
+ /*
2095
+ * Gives the information about the node
2096
+ */
2097
+ static VALUE m_nodedump_inspect(VALUE self)
2098
+ {
2099
+ static char str[1024], buf[512];
2100
+ VALUE num_of_nodes, nodename, filepath, filename;
2101
+ VALUE val_obj_addresses, val_nodeinfo;
2102
+ // Get generic information about node
2103
+ num_of_nodes = rb_iv_get(self, "@num_of_nodes");
2104
+ nodename = rb_iv_get(self, "@nodename");
2105
+ filepath = rb_iv_get(self, "@filepath");
2106
+ filename = rb_iv_get(self, "@filename");
2107
+ // Generate string with generic information about node
2108
+ sprintf(str,
2109
+ "----- NodeMarshal:0x%"PRIxPTR"\n"
2110
+ " num_of_nodes: %d\n nodename: %s\n filepath: %s\n filename: %s\n",
2111
+ (uintptr_t) (self),
2112
+ (num_of_nodes == Qnil) ? -1 : FIX2INT(num_of_nodes),
2113
+ (nodename == Qnil) ? "nil" : RSTRING_PTR(nodename),
2114
+ (filepath == Qnil) ? "nil" : RSTRING_PTR(filepath),
2115
+ (filename == Qnil) ? "nil" : RSTRING_PTR(filename)
2116
+ );
2117
+ // Check if the information about node struct is available
2118
+ val_nodeinfo = rb_iv_get(self, "@nodeinfo");
2119
+ val_obj_addresses = rb_iv_get(self, "@obj_addresses");
2120
+ if (val_nodeinfo == Qnil && val_obj_addresses == Qnil)
2121
+ {
2122
+ m_nodedump_to_hash(self);
2123
+ val_nodeinfo = rb_iv_get(self, "@nodeinfo");
2124
+ }
2125
+ // Information about preparsed node
2126
+ // a) NODEInfo struct
2127
+ if (val_nodeinfo == Qnil)
2128
+ {
2129
+ sprintf(buf, " NODEInfo struct is empty\n");
2130
+ }
2131
+ else
2132
+ {
2133
+ NODEInfo *ninfo;
2134
+ Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
2135
+ sprintf(buf,
2136
+ " NODEInfo struct:\n"
2137
+ " syms hash len (Symbols): %d\n"
2138
+ " lits hash len (Literals): %d\n"
2139
+ " idtabs hash len (ID tables): %d\n"
2140
+ " gentries hash len (Global vars): %d\n"
2141
+ " nodes hash len (Nodes): %d\n"
2142
+ " pnodes hash len (Parent nodes): %d\n"
2143
+ #ifdef USE_RB_ARGS_INFO
2144
+ " args hash len (args info): %d\n"
2145
+ #endif
2146
+ ,
2147
+ FIX2INT(rb_funcall(ninfo->syms.vals, rb_intern("length"), 0)),
2148
+ FIX2INT(rb_funcall(ninfo->lits.vals, rb_intern("length"), 0)),
2149
+ FIX2INT(rb_funcall(ninfo->idtabs.vals, rb_intern("length"), 0)),
2150
+ FIX2INT(rb_funcall(ninfo->gentries.vals, rb_intern("length"), 0)),
2151
+ FIX2INT(rb_funcall(ninfo->nodes.vals, rb_intern("length"), 0)),
2152
+ FIX2INT(rb_funcall(ninfo->pnodes.vals, rb_intern("length"), 0))
2153
+ #ifdef USE_RB_ARGS_INFO
2154
+ ,
2155
+ FIX2INT(rb_funcall(ninfo->args.vals, rb_intern("length"), 0))
2156
+ #endif
2157
+ );
2158
+ }
2159
+ strcat(str, buf);
2160
+ // b) NODEObjAddresses struct
2161
+ if (val_obj_addresses == Qnil)
2162
+ {
2163
+ sprintf(buf, " NODEObjAddresses struct is empty\n");
2164
+ }
2165
+ else
2166
+ {
2167
+ NODEObjAddresses *objadr;
2168
+ Data_Get_Struct(val_obj_addresses, NODEObjAddresses, objadr);
2169
+ sprintf(buf,
2170
+ " NODEObjAddresses struct:\n"
2171
+ " syms_len (Num of symbols): %d\n"
2172
+ " lits_len (Num of literals): %d\n"
2173
+ " idtbls_len (Num of ID tables): %d\n"
2174
+ " gvars_len (Num of global vars): %d\n"
2175
+ " nodes_len (Num of nodes): %d\n"
2176
+ #ifdef USE_RB_ARGS_INFO
2177
+ " args_len: (Num of args info): %d\n"
2178
+ #endif
2179
+ , objadr->syms_len, objadr->lits_len,
2180
+ objadr->idtbls_len, objadr->gvars_len,
2181
+ objadr->nodes_len
2182
+ #ifdef USE_RB_ARGS_INFO
2183
+ , objadr->args_len
2184
+ #endif
2185
+ );
2186
+ }
2187
+ strcat(str, buf);
2188
+ strcat(str, "------------------\n");
2189
+ // Generate output string
2190
+ return rb_str_new2(str);
2191
+ }
2192
+
2193
+ /*
2194
+ * Returns node name (usually <main>)
2195
+ */
2196
+ static VALUE m_nodedump_nodename(VALUE self)
2197
+ {
2198
+ return rb_funcall(rb_iv_get(self, "@nodename"), rb_intern("dup"), 0);
2199
+ }
2200
+
2201
+ /*
2202
+ * Returns name of file that was used for node generation and will be used
2203
+ * by YARV (or nil/<compiled> if a string of code was used)
2204
+ */
2205
+ static VALUE m_nodedump_filename(VALUE self)
2206
+ {
2207
+ return rb_funcall(rb_iv_get(self, "@filename"), rb_intern("dup"), 0);
2208
+ }
2209
+
2210
+ /*
2211
+ * Sets name of file that was used for node generation and will be used
2212
+ * by YARV (or nil/<compiled> if a string of code was used)
2213
+ */
2214
+ static VALUE m_nodedump_set_filename(VALUE self, VALUE val)
2215
+ {
2216
+ if (val != Qnil)
2217
+ {
2218
+ Check_Type(val, T_STRING);
2219
+ rb_iv_set(self, "@filename", rb_funcall(val, rb_intern("dup"), 0));
2220
+ }
2221
+ else
2222
+ {
2223
+ rb_iv_set(self, "@filename", Qnil);
2224
+ }
2225
+ return self;
2226
+ }
2227
+
2228
+ /*
2229
+ * Returns path of file that was used for node generation and will be used
2230
+ * by YARV (or nil/<compiled> if a string of code was used)
2231
+ */
2232
+ static VALUE m_nodedump_filepath(VALUE self)
2233
+ {
2234
+ return rb_funcall(rb_iv_get(self, "@filepath"), rb_intern("dup"), 0);
2235
+ }
2236
+
2237
+ /*
2238
+ * call-seq:
2239
+ * obj.filepath=value
2240
+ *
2241
+ * Sets the path of file that was used for node generation and will
2242
+ * be used by YARV (or nil/<compiled> if a string of code was used)
2243
+ */
2244
+ static VALUE m_nodedump_set_filepath(VALUE self, VALUE val)
2245
+ {
2246
+ if (val != Qnil)
2247
+ {
2248
+ Check_Type(val, T_STRING);
2249
+ rb_iv_set(self, "@filepath", rb_funcall(val, rb_intern("dup"), 0));
2250
+ }
2251
+ else
2252
+ {
2253
+ rb_iv_set(self, "@filepath", Qnil);
2254
+ }
2255
+ return self;
2256
+ }
2257
+
2258
+ /*
2259
+ * call-seq:
2260
+ * NodeMarshal.base85r_encode(input) -> output
2261
+ *
2262
+ * Encode arbitrary binary string to the ASCII string
2263
+ * using modified version of BASE85 (useful for obfuscation
2264
+ * of .rb source files)
2265
+ */
2266
+ static VALUE m_base85r_encode(VALUE obj, VALUE input)
2267
+ {
2268
+ return base85r_encode(input);
2269
+ }
2270
+
2271
+ /*
2272
+ * call-seq:
2273
+ * NodeMarshal.base85r_decode(input) -> output
2274
+ *
2275
+ * Decode ASCII string in the modified BASE85 format
2276
+ * to the binary string (useful for obfuscation of .rb
2277
+ * source files)
2278
+ */
2279
+ static VALUE m_base85r_decode(VALUE obj, VALUE input)
2280
+ {
2281
+ return base85r_decode(input);
2282
+ }
2283
+
2284
+ /* call-seq:
2285
+ * obj.to_text
2286
+ *
2287
+ * Converts NodeMarshal class example to the text string (modified Base85 encoding) that
2288
+ * can be saved to the file and used for loading the node from the file.
2289
+ * Format of the obtained binary dump depends on used platform (especially
2290
+ * size of the pointer) and Ruby version.
2291
+ */
2292
+ static VALUE m_nodedump_to_text(VALUE self)
2293
+ {
2294
+ VALUE bin = m_nodedump_to_bin(self);
2295
+ return base85r_encode(bin);
2296
+ }
2297
+
2298
+ /*
2299
+ * Returns node object
2300
+ */
2301
+ static VALUE m_nodedump_node(VALUE self)
2302
+ {
2303
+ return rb_iv_get(self, "@node");
2304
+ }
2305
+
2306
+ /*
2307
+ * This class can load and save Ruby code in the form of the
2308
+ * platform-dependent syntax tree (made of NODEs). Such function
2309
+ * allows to hide the source code from users. Main features:
2310
+ *
2311
+ * - Irreversible transformation of Ruby source code to the syntax tree
2312
+ * - Representation of syntax tree in binary form dependent from the platform and Ruby version
2313
+ * - Simple options for node inspection
2314
+ * - Ruby 1.9.3, 2.2.x and 2.3.x support
2315
+ * - Subroutines for custom code obfuscation
2316
+ */
2317
+ void Init_nodemarshal()
2318
+ {
2319
+ static VALUE cNodeMarshal;
2320
+ init_nodes_table(nodes_ctbl, NODES_CTBL_SIZE);
2321
+ base85r_init_tables();
2322
+
2323
+ cNodeMarshal = rb_define_class("NodeMarshal", rb_cObject);
2324
+ rb_define_singleton_method(cNodeMarshal, "base85r_encode", RUBY_METHOD_FUNC(m_base85r_encode), 1);
2325
+ rb_define_singleton_method(cNodeMarshal, "base85r_decode", RUBY_METHOD_FUNC(m_base85r_decode), 1);
2326
+
2327
+ rb_define_method(cNodeMarshal, "initialize", RUBY_METHOD_FUNC(m_nodedump_init), 2);
2328
+ rb_define_method(cNodeMarshal, "to_hash", RUBY_METHOD_FUNC(m_nodedump_to_hash), 0);
2329
+ rb_define_method(cNodeMarshal, "to_h", RUBY_METHOD_FUNC(m_nodedump_to_hash), 0);
2330
+ rb_define_method(cNodeMarshal, "to_bin", RUBY_METHOD_FUNC(m_nodedump_to_bin), 0);
2331
+ rb_define_method(cNodeMarshal, "to_text", RUBY_METHOD_FUNC(m_nodedump_to_text), 0);
2332
+ rb_define_method(cNodeMarshal, "to_a", RUBY_METHOD_FUNC(m_nodedump_to_a), 0);
2333
+ rb_define_method(cNodeMarshal, "to_ary", RUBY_METHOD_FUNC(m_nodedump_to_a), 0);
2334
+ rb_define_method(cNodeMarshal, "dump_tree", RUBY_METHOD_FUNC(m_nodedump_parser_dump_tree), 0);
2335
+ rb_define_method(cNodeMarshal, "dump_tree_short", RUBY_METHOD_FUNC(m_nodedump_dump_tree_short), 0);
2336
+ rb_define_method(cNodeMarshal, "compile", RUBY_METHOD_FUNC(m_nodedump_compile), 0);
2337
+ rb_define_method(cNodeMarshal, "show_offsets", RUBY_METHOD_FUNC(m_nodedump_show_offsets), 0);
2338
+ rb_define_method(cNodeMarshal, "show_offsets=", RUBY_METHOD_FUNC(m_nodedump_set_show_offsets), 1);
2339
+ // Methods for working with the information about the node
2340
+ // a) literals, symbols, generic information
2341
+ rb_define_method(cNodeMarshal, "symbols", RUBY_METHOD_FUNC(m_nodedump_symbols), 0);
2342
+ rb_define_method(cNodeMarshal, "change_symbol", RUBY_METHOD_FUNC(m_nodedump_change_symbol), 2);
2343
+ rb_define_method(cNodeMarshal, "literals", RUBY_METHOD_FUNC(m_nodedump_literals), 0);
2344
+ rb_define_method(cNodeMarshal, "change_literal", RUBY_METHOD_FUNC(m_nodedump_change_literal), 2);
2345
+ rb_define_method(cNodeMarshal, "inspect", RUBY_METHOD_FUNC(m_nodedump_inspect), 0);
2346
+ rb_define_method(cNodeMarshal, "node", RUBY_METHOD_FUNC(m_nodedump_node), 0);
2347
+ // b) node and file names
2348
+ rb_define_method(cNodeMarshal, "nodename", RUBY_METHOD_FUNC(m_nodedump_nodename), 0);
2349
+ rb_define_method(cNodeMarshal, "filename", RUBY_METHOD_FUNC(m_nodedump_filename), 0);
2350
+ rb_define_method(cNodeMarshal, "filename=", RUBY_METHOD_FUNC(m_nodedump_set_filename), 1);
2351
+ rb_define_method(cNodeMarshal, "filepath", RUBY_METHOD_FUNC(m_nodedump_filepath), 0);
2352
+ rb_define_method(cNodeMarshal, "filepath=", RUBY_METHOD_FUNC(m_nodedump_set_filepath), 1);
2353
+ // C structure wrappers
2354
+ cNodeObjAddresses = rb_define_class("NodeObjAddresses", rb_cObject);
2355
+ cNodeInfo = rb_define_class("NodeInfo", rb_cObject);
2356
+ }