node-marshal 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,2296 +1,2356 @@
1
- /*
2
- * This file contains implementation of classes for Ruby nodes
3
- * marshalization (i.e. loading and saving them from disk)
4
- *
5
- * (C) 2015-2016 Alexey Voskov
6
- * License: BSD-2-Clause
7
- */
8
- #define __STDC_FORMAT_MACROS
9
- #include <stdio.h>
10
- #include <stdlib.h>
11
- #include <inttypes.h>
12
- #include <ruby.h>
13
- #include <ruby/version.h>
14
-
15
- /*
16
- * Some global variables
17
- */
18
- static VALUE cNodeObjAddresses, cNodeInfo;
19
-
20
- /*
21
- * Part 1. .H files: nodedump functions + parts of Ruby internals
22
- */
23
- #include "nodedump.h"
24
-
25
- #ifdef WITH_CUSTOM_RB_GLOBAL_ENTRY
26
- /* Custom (and slow) implementation of rb_global_entry internal API for Ruby 2.3
27
- (original rb_global_entry API was opened before Ruby 2.3)
28
- It uses a hack with the node creation. The main idea of the hack is
29
- to create a node from the expression containing only a name of the global variable
30
- and extract global entry address from NODE_GVAR u3 "leaf" */
31
- static struct rb_global_entry *rb_global_entry(ID id)
32
- {
33
- NODE *node, *gvar_node;
34
- struct rb_global_entry *gentry;
35
- /* a) Step 1: create node from the expression consisting only from
36
- our global variable */
37
- node = rb_compile_string("<compiled>", rb_id2str(id), NUM2INT(1));
38
- if (nd_type(node) != NODE_SCOPE)
39
- {
40
- return NULL;
41
- }
42
- /* b) Trace the node to the NODE_GVAR */
43
- gvar_node = node->u2.node;
44
- if (nd_type(gvar_node) == NODE_PRELUDE) /* Present only in 2.3 */
45
- {
46
- gvar_node = gvar_node->u2.node;
47
- }
48
- if (nd_type(gvar_node) != NODE_GVAR) /* Error: no GVAR found */
49
- {
50
- return NULL;
51
- }
52
- /* c) Get the global entry address and return its address */
53
- gentry = gvar_node->u3.entry;
54
- return gentry;
55
- }
56
- #endif
57
-
58
-
59
- /*
60
- * Part 2. Information about the nodes
61
- *
62
- */
63
-
64
- // Pre-2.0 Ruby versions don't use this version
65
- #if RUBY_API_VERSION_MAJOR == 2
66
- #define USE_RB_ARGS_INFO 1
67
- #endif
68
-
69
- #if RUBY_API_VERSION_MAJOR == 1
70
- #define RESET_GC_FLAGS 1
71
- #endif
72
-
73
-
74
- // Some generic utilities
75
- int is_value_in_heap(VALUE val)
76
- {
77
- if (val == Qfalse || val == Qtrue ||
78
- val == Qnil || val == Qundef ||
79
- (val & FIXNUM_FLAG)
80
- #ifdef FLONUM_MASK
81
- || ((val & FLONUM_MASK) == FLONUM_FLAG) // This memory trick with floats is present only in 2.x
82
- #endif
83
- )
84
- {
85
- return 0;
86
- }
87
- else
88
- return 1;
89
- }
90
-
91
-
92
- /*
93
- * Converts Ruby string with hexadecimal number
94
- * to the Ruby VALUE
95
- */
96
- VALUE str_to_value(VALUE str)
97
- {
98
- intptr_t ans = (intptr_t) Qnil;
99
- sscanf(RSTRING_PTR(str), "%"PRIxPTR, &ans);
100
- return (VALUE) ans;
101
- }
102
-
103
-
104
- /*
105
- * Converts Ruby VALUE (i.e. machine address) to the
106
- * hexadecimal Ruby string
107
- */
108
- VALUE value_to_str(VALUE val)
109
- {
110
- char str[16];
111
- sprintf(str, "%" PRIxPTR, (intptr_t) val);
112
- return rb_str_new2(str);
113
- }
114
-
115
- /*
116
- * Converts VALUE to the sequence of bytes using big-endian
117
- * standard. Returns number of non-zero bytes
118
- *
119
- * Inputs
120
- * val -- input value
121
- * buf -- pointer to the output buffer
122
- * Returns
123
- * number of written bytes
124
- */
125
- int value_to_bin(VALUE val, unsigned char *buf)
126
- {
127
- int i, len = 0;
128
- unsigned char byte;
129
- for (i = sizeof(VALUE) - 1; i >= 0; i--)
130
- {
131
- byte = (unsigned char) ((val >> (i * 8)) & 0xFF);
132
- if (len > 0 || byte != 0)
133
- {
134
- *buf++ = byte;
135
- len++;
136
- }
137
- }
138
- return len;
139
- }
140
-
141
- /*
142
- * Converts sequence of bytes (big-endian standard) to the VALUE.
143
- *
144
- * Inputs
145
- * buf -- poiner to the input buffer
146
- * len -- number of bytes
147
- * Returns
148
- * VALUE
149
- */
150
- VALUE bin_to_value(unsigned char *buf, int len)
151
- {
152
- VALUE val = (VALUE) 0;
153
- int i;
154
- for (i = len - 1; i >= 0; i--)
155
- val |= ((VALUE) *buf++) << (i * 8);
156
- return val;
157
- }
158
-
159
- #define NODES_CTBL_SIZE 256
160
- static int nodes_ctbl[NODES_CTBL_SIZE * 3];
161
-
162
-
163
- /*
164
- * Part 3. Functions for node marshalization
165
- */
166
-
167
- /*
168
- * Keeps the information about node elements position
169
- * in the memory and its IDs/ordinals for export to the file
170
- */
171
- typedef struct {
172
- VALUE vals; // values: key=>val Hash
173
- VALUE ids; // identifiers: key=>id Hash
174
- VALUE pos; // free identifier
175
- } LeafTableInfo;
176
-
177
- void LeafTableInfo_init(LeafTableInfo *lti)
178
- {
179
- lti->vals = rb_hash_new();
180
- lti->ids = rb_hash_new();
181
- lti->pos = 0;
182
- }
183
-
184
- void LeafTableInfo_mark(LeafTableInfo *lti)
185
- {
186
- rb_gc_mark(lti->vals);
187
- rb_gc_mark(lti->ids);
188
- }
189
-
190
-
191
- int LeafTableInfo_addEntry(LeafTableInfo *lti, VALUE key, VALUE value)
192
- {
193
- VALUE v_id = rb_hash_aref(lti->ids, key);
194
- if (v_id == Qnil)
195
- {
196
- int id = lti->pos++;
197
- rb_hash_aset(lti->vals, key, value);
198
- rb_hash_aset(lti->ids, key, INT2FIX(id));
199
- return id;
200
- }
201
- else
202
- {
203
- return FIX2INT(v_id);
204
- }
205
- }
206
-
207
- /*
208
- * Adds Ruby ID data type as the entry to the LeafTableInfo struct.
209
- * Main features:
210
- * 1) ID will be converted to Fixnum
211
- * 2) If ID can be converted to string by rb_id2str it will be saved as
212
- String object. Otherwise it will be converted to Fixnum.
213
- */
214
- int LeafTableInfo_addIDEntry(LeafTableInfo *lti, ID id)
215
- {
216
- VALUE r_idval = rb_id2str(id);
217
- if (TYPE(r_idval) != T_STRING)
218
- {
219
- r_idval = INT2FIX(id);
220
- }
221
- return LeafTableInfo_addEntry(lti, INT2FIX(id), r_idval);
222
- }
223
-
224
- VALUE LeafTableInfo_getLeavesTable(LeafTableInfo *lti)
225
- {
226
- VALUE key, keys = rb_funcall(lti->vals, rb_intern("keys"), 0);
227
- unsigned int i;
228
- VALUE val;
229
- for (i = 0; i < lti->pos; i++)
230
- {
231
- key = RARRAY_PTR(keys)[i];
232
- val = rb_hash_aref(lti->vals, key);
233
- rb_ary_store(keys, i, val);
234
- }
235
- return keys;
236
- }
237
-
238
- int LeafTableInfo_keyToID(LeafTableInfo *lti, VALUE key)
239
- {
240
- VALUE id = rb_hash_aref(lti->ids, key);
241
- return (id == Qnil) ? -1 : FIX2INT(id);
242
- }
243
-
244
- VALUE LeafTableInfo_keyToValue(LeafTableInfo *lti, VALUE key)
245
- {
246
- return rb_hash_aref(lti->vals, key);
247
- }
248
-
249
- /* The structure keeps information about the node
250
- that is required for its dumping to the file
251
- (mainly hashes with relocatable identifiers) */
252
- typedef struct {
253
- LeafTableInfo syms; // Node symbols
254
- LeafTableInfo lits; // Node literals
255
- LeafTableInfo idtabs; // Table of identifiers
256
- #ifdef USE_RB_ARGS_INFO
257
- LeafTableInfo args; // Table of arguments
258
- #endif
259
- LeafTableInfo gentries; // Global variables table
260
- LeafTableInfo nodes; // Table of nodes
261
- LeafTableInfo pnodes; // Table of parent nodes
262
- } NODEInfo;
263
-
264
- void NODEInfo_init(NODEInfo *info)
265
- {
266
- LeafTableInfo_init(&(info->syms));
267
- LeafTableInfo_init(&(info->lits));
268
- LeafTableInfo_init(&(info->idtabs));
269
- #ifdef USE_RB_ARGS_INFO
270
- LeafTableInfo_init(&(info->args));
271
- #endif
272
- LeafTableInfo_init(&(info->gentries));
273
- LeafTableInfo_init(&(info->nodes));
274
- LeafTableInfo_init(&(info->pnodes));
275
- }
276
-
277
- void NODEInfo_mark(NODEInfo *info)
278
- {
279
- LeafTableInfo_mark(&(info->syms));
280
- LeafTableInfo_mark(&(info->lits));
281
- LeafTableInfo_mark(&(info->idtabs));
282
- #ifdef USE_RB_ARGS_INFO
283
- LeafTableInfo_mark(&(info->args));
284
- #endif
285
- LeafTableInfo_mark(&(info->gentries));
286
- LeafTableInfo_mark(&(info->nodes));
287
- LeafTableInfo_mark(&(info->pnodes));
288
- }
289
-
290
- void NODEInfo_free(NODEInfo *info)
291
- {
292
- xfree(info);
293
- }
294
-
295
- LeafTableInfo *NODEInfo_getTableByID(NODEInfo *info, int id)
296
- {
297
- switch (id)
298
- {
299
- case NT_ID:
300
- return &info->syms;
301
- case NT_VALUE:
302
- return &info->lits;
303
- case NT_IDTABLE:
304
- return &info->idtabs;
305
- #ifdef USE_RB_ARGS_INFO
306
- case NT_ARGS:
307
- return &info->args;
308
- #endif
309
- case NT_ENTRY:
310
- return &info->gentries;
311
- case NT_NODE:
312
- return &info->nodes;
313
- default:
314
- return NULL;
315
- }
316
- }
317
-
318
- /*
319
- * Converts node value to the binary data
320
- * Input parameters:
321
- * info -- current NODEInfo structure
322
- * node -- parent node (that contains the value)
323
- * ptr -- pointer to the output memory buffer
324
- * type -- type of the entry (NT_...)
325
- * value -- node->u?.value VALUE
326
- * child_id -- child node number (1,2,3)
327
- * Returns:
328
- * Byte that contains the next information
329
- * a) upper half-byte: VL_... data type (for node loader)
330
- * b) lower half-byte: number of bytes written to the buffer
331
- */
332
- #define DUMP_RAW_VALUE(vl_ans, vl) (vl_ans | (value_to_bin(vl, (unsigned char *) ptr) << 4))
333
- static int dump_node_value(NODEInfo *info, char *ptr, NODE *node, int type, VALUE value, int child_id)
334
- {
335
- if (type == NT_NULL || type == NT_LONG)
336
- {
337
- return DUMP_RAW_VALUE(VL_RAW, value);
338
- }
339
- else if (type == NT_NODE)
340
- {
341
- if (value == 0)
342
- { // Variant a: empty node
343
- return DUMP_RAW_VALUE(VL_RAW, value);
344
- }
345
- else if (nd_type(node) == NODE_ATTRASGN && value == 1 && child_id == 1)
346
- { // Special case: "self"
347
- return DUMP_RAW_VALUE(VL_RAW, value);
348
- }
349
- else if (TYPE(value) != T_NODE)
350
- {
351
- rb_raise(rb_eArgError, "dump_node_value, parent node %s (ADR 0x%s): child node %d (ADR 0x%s): is not a node\n"
352
- " Type: %s (%d), Value: %s",
353
- ruby_node_name(nd_type(node)), RSTRING_PTR(value_to_str((VALUE) node)),
354
- child_id, RSTRING_PTR(value_to_str(value)),
355
- RSTRING_PTR(rb_funcall(rb_funcall(value, rb_intern("class"), 0), rb_intern("to_s"), 0)),
356
- TYPE(value),
357
- RSTRING_PTR(rb_funcall(value, rb_intern("to_s"), 0)) );
358
- }
359
- else
360
- { // Variant b: not empty node
361
- VALUE id = LeafTableInfo_keyToID(&info->nodes, value_to_str(value));
362
- if (id == (VALUE) -1)
363
- {
364
- rb_raise(rb_eArgError, "dump_node_value, parent node %s (ADR 0x%s): child node %d (ADR 0x%s) not found",
365
- ruby_node_name(nd_type(node)), RSTRING_PTR(value_to_str((VALUE) node)),
366
- child_id, RSTRING_PTR(value_to_str(value)));
367
- return VL_RAW;
368
- }
369
- else
370
- {
371
- return DUMP_RAW_VALUE(VL_NODE, id);
372
- }
373
- return VL_NODE;
374
- }
375
- }
376
- else if (type == NT_VALUE)
377
- {
378
- if (!is_value_in_heap(value))
379
- { // a) value that is inside VALUE
380
- return DUMP_RAW_VALUE(VL_RAW, value);
381
- }
382
- else
383
- { // b) value that requires reference to literals table
384
- VALUE id = LeafTableInfo_keyToID(&info->lits, value_to_str(value));
385
- if (id == (VALUE) -1)
386
- rb_raise(rb_eArgError, "Cannot find literal");
387
- else
388
- return DUMP_RAW_VALUE(VL_LIT, id);
389
- }
390
- }
391
- else if (type == NT_ID)
392
- {
393
- ID sym = (VALUE) value; // We are working with RAW data from RAM!
394
- VALUE id = LeafTableInfo_keyToID(&info->syms, INT2FIX(sym));
395
- if (id == (VALUE) -1)
396
- {
397
- rb_raise(rb_eArgError, "Cannot find symbol ID %d (%s) (parent node %s, line %d)",
398
- (int) sym, RSTRING_PTR(rb_id2str(ID2SYM(sym))),
399
- ruby_node_name(nd_type(node)), nd_line(node));
400
- return VL_RAW;
401
- }
402
- else
403
- {
404
- return DUMP_RAW_VALUE(VL_ID, id);
405
- }
406
- }
407
- else if (type == NT_ENTRY || type == NT_ARGS || type == NT_IDTABLE)
408
- {
409
- VALUE key = value_to_str(value);
410
- LeafTableInfo *lti = NODEInfo_getTableByID(info, type);
411
- VALUE id = LeafTableInfo_keyToID(lti, key);
412
- if (id == (VALUE) -1)
413
- {
414
- rb_raise(rb_eArgError, "Cannot find some entry");
415
- return VL_RAW;
416
- }
417
- else
418
- {
419
- switch(type)
420
- {
421
- case NT_ENTRY: return DUMP_RAW_VALUE(VL_GVAR, id);
422
- case NT_IDTABLE: return DUMP_RAW_VALUE(VL_IDTABLE, id);
423
- case NT_ARGS: return DUMP_RAW_VALUE(VL_ARGS, id);
424
- default: rb_raise(rb_eArgError, "Internal error");
425
- }
426
- }
427
- }
428
- else
429
- {
430
- rb_raise(rb_eArgError, "Unknown child node type %d", type);
431
- }
432
- }
433
-
434
- /*
435
- * Converts information about nodes to the binary string.
436
- * It uses dump_node_value function for the low-level conversion
437
- * of node "leaves" to the actual binary data.
438
- *
439
- * See load_nodes_from_str for the descrpition of the binary string format.
440
- */
441
- static VALUE dump_nodes(NODEInfo *info)
442
- {
443
- int node_size = sizeof(int) + sizeof(VALUE) * 4;
444
- int i, nt, flags_len;
445
- NODE *node;
446
- char *bin, *ptr, *rtypes;
447
- VALUE nodes_ary = rb_funcall(info->nodes.vals, rb_intern("keys"), 0);
448
- VALUE nodes_bin = rb_str_new(NULL, RARRAY_LEN(nodes_ary) * node_size);
449
- VALUE ut[3];
450
- bin = RSTRING_PTR(nodes_bin);
451
-
452
- for (i = 0, ptr = bin; i < RARRAY_LEN(nodes_ary); i++)
453
- {
454
- node = RNODE(str_to_value(RARRAY_PTR(nodes_ary)[i]));
455
- nt = nd_type(node);
456
- rtypes = (char *) ptr; ptr += sizeof(int);
457
- flags_len = value_to_bin(node->flags >> 5, (unsigned char *) ptr); ptr += flags_len;
458
-
459
- ut[0] = nodes_ctbl[nt * 3];
460
- ut[1] = nodes_ctbl[nt * 3 + 1];
461
- ut[2] = nodes_ctbl[nt * 3 + 2];
462
- if (nt == NODE_OP_ASGN2 && LeafTableInfo_keyToID(&info->syms, INT2FIX(node->u1.value)) != -1)
463
- {
464
- ut[0] = NT_ID; ut[1] = NT_ID; ut[2] = NT_ID;
465
- }
466
-
467
- if (nt == NODE_ARGS_AUX)
468
- {
469
- ut[0] = NT_ID; ut[1] = NT_LONG; ut[2] = NT_NODE;
470
- if (LeafTableInfo_keyToID(&info->syms, INT2FIX(node->u2.value)) != -1)
471
- {
472
- ut[1] = NT_ID;
473
- }
474
- else
475
- {
476
- ut[1] = NT_LONG;
477
- }
478
- if (node->u1.value == 0) ut[0] = NT_NULL;
479
- if (node->u2.value == 0) ut[1] = NT_NULL;
480
- if (node->u3.value == 0) ut[2] = NT_NULL;
481
- }
482
-
483
- if (nt = NODE_ARRAY)
484
- {
485
- /* Special undocumented cases:
486
- * 1) the second child of the second element of an array
487
- * contains reference to the last element (NT_NODE) not
488
- * length (NT_LONG)
489
- * 2) NODE_HASH: every second element in NODE_ARRAY chain
490
- * contains pointers to NODES (instead of lengths)
491
- * 3) NODE_DSTR: first node in NODE_ARRAY chain contains
492
- * pointer to NODE (instead of lengths) */
493
- NODE *pnode1, *pnode2;
494
- pnode1 = (NODE *) str_to_value(LeafTableInfo_keyToValue(&info->pnodes, value_to_str((VALUE) node)));
495
- if (pnode1 != NULL && nd_type(pnode1) == NODE_ARRAY &&
496
- (NODE *) pnode1->u3.value == node)
497
- {
498
- int nt2;
499
- pnode2 = (NODE *) str_to_value(LeafTableInfo_keyToValue(&info->pnodes, value_to_str((VALUE) pnode1)));
500
- nt2 = nd_type(pnode2);
501
- if ( (nt2 != NODE_ARRAY && nt2 != NODE_DSTR) ||
502
- (NODE *) pnode2->u1.value == pnode1 )
503
- {
504
- ut[1] = NT_NODE;
505
- }
506
- else if (pnode1->u2.value == 2 && node == (NODE *) node->u2.value)
507
- {
508
- ut[1] = NT_NODE;
509
- }
510
- }
511
- else if (pnode1 != NULL && nd_type(pnode1) == NODE_DSTR)
512
- {
513
- ut[1] = NT_NODE;
514
- }
515
- }
516
-
517
- rtypes[0] = dump_node_value(info, ptr, node, ut[0], node->u1.value, 1);
518
- ptr += (rtypes[0] & 0xF0) >> 4;
519
- rtypes[1] = dump_node_value(info, ptr, node, ut[1], node->u2.value, 2);
520
- ptr += (rtypes[1] & 0xF0) >> 4;
521
- rtypes[2] = dump_node_value(info, ptr, node, ut[2], node->u3.value, 3);
522
- ptr += (rtypes[2] & 0xF0) >> 4;
523
- rtypes[3] = flags_len;
524
- }
525
- rb_str_resize(nodes_bin, (int) (ptr - bin) + 1);
526
- return nodes_bin;
527
- }
528
-
529
-
530
- /*
531
- * Transforms preprocessed node to Ruby hash that can be used
532
- * to load the node from disk.
533
- *
534
- * See m_nodedump_to_hash function for output hash format details
535
- */
536
- VALUE NODEInfo_toHash(NODEInfo *info)
537
- {
538
- VALUE ans = rb_hash_new();
539
- VALUE idtbl, idtabs = LeafTableInfo_getLeavesTable(&info->idtabs);
540
- VALUE syms = LeafTableInfo_getLeavesTable(&info->syms);
541
- VALUE args;
542
- int i, j, id;
543
- // Add some signatures
544
- rb_hash_aset(ans, ID2SYM(rb_intern("MAGIC")), rb_str_new2(NODEMARSHAL_MAGIC));
545
- rb_hash_aset(ans, ID2SYM(rb_intern("RUBY_PLATFORM")),
546
- rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM")));
547
- rb_hash_aset(ans, ID2SYM(rb_intern("RUBY_VERSION")),
548
- rb_const_get(rb_cObject, rb_intern("RUBY_VERSION")));
549
- // Write literals, symbols and global_entries arrays: they don't need to be corrected
550
- rb_hash_aset(ans, ID2SYM(rb_intern("literals")), LeafTableInfo_getLeavesTable(&info->lits));
551
- rb_hash_aset(ans, ID2SYM(rb_intern("symbols")), syms);
552
- rb_hash_aset(ans, ID2SYM(rb_intern("global_entries")), LeafTableInfo_getLeavesTable(&info->gentries));
553
- // Replace RAM IDs to disk IDs in id_tables
554
- for (i = 0; i < RARRAY_LEN(idtabs); i++)
555
- {
556
- idtbl = RARRAY_PTR(idtabs)[i];
557
- for (j = 0; j < RARRAY_LEN(idtbl); j++)
558
- {
559
- id = LeafTableInfo_keyToID(&info->syms, RARRAY_PTR(idtbl)[j]);
560
-
561
- if (id == -1)
562
- {
563
- ID sym = FIX2INT(RARRAY_PTR(idtbl)[j]);
564
- rb_raise(rb_eArgError, "Cannot find the symbol ID %d", (int) sym);
565
- }
566
- else
567
- {
568
- rb_ary_store(idtbl, j, INT2FIX(id));
569
- }
570
-
571
- }
572
- }
573
- rb_hash_aset(ans, ID2SYM(rb_intern("id_tables")), idtabs);
574
- // Replace RAM IDs to disk IDs in args tables
575
- #ifdef USE_RB_ARGS_INFO
576
- args = LeafTableInfo_getLeavesTable(&info->args);
577
- for (i = 0; i < RARRAY_LEN(args); i++)
578
- {
579
- VALUE args_entry = RARRAY_PTR(args)[i];
580
- VALUE *eptr = RARRAY_PTR(args_entry);
581
- int args_vals[5] = {0, 1, 7, 8, 9};
582
- int args_ids[3] = {4, 5, 6};
583
- if (RARRAY_LEN(args_entry) != 10)
584
- rb_raise(rb_eArgError, "Corrupted args entry");
585
- // Pointer to nodes to be replaced:
586
- // a) VALUES
587
- // (0) pre_init, (1) post_init,
588
- // (7) kw_args, (8) kw_rest_arg, (9) opt_args
589
- for (j = 0; j < 5; j++)
590
- {
591
- int ind = args_vals[j];
592
- VALUE key = eptr[ind];
593
- if (!strcmp(RSTRING_PTR(key), "0"))
594
- eptr[ind] = INT2FIX(-1);
595
- else
596
- {
597
- eptr[ind] = INT2FIX(LeafTableInfo_keyToID(&info->nodes, key));
598
- if (FIX2INT(eptr[ind]) == -1)
599
- rb_raise(rb_eArgError, "Unknown NODE in args tables");
600
- }
601
- }
602
- // b) IDs (symbols)
603
- // (4) first_post_arg (5) rest_arg (6) block_arg
604
- for (j = 0; j < 3; j++)
605
- {
606
- int ind = args_ids[j];
607
- VALUE key = eptr[ind];
608
- if (FIX2INT(key) != 0)
609
- {
610
- eptr[ind] = INT2FIX(LeafTableInfo_keyToID(&info->syms, key));
611
- if (FIX2INT(eptr[ind]) == -1)
612
- rb_raise(rb_eArgError, "Unknown symbolic ID in args tables");
613
- }
614
- else
615
- eptr[ind] = INT2FIX(-1);
616
- }
617
- }
618
- #else
619
- args = rb_ary_new();
620
- #endif
621
-
622
- rb_hash_aset(ans, ID2SYM(rb_intern("args")), args);
623
- // Special case: NODES. Nodes are kept as binary string
624
- rb_hash_aset(ans, ID2SYM(rb_intern("nodes")), dump_nodes(info));
625
- return ans;
626
- }
627
-
628
-
629
- static void NODEInfo_addValue(NODEInfo *info, VALUE value)
630
- {
631
- if (is_value_in_heap(value))
632
- {
633
- VALUE lkey = value_to_str(value);
634
- LeafTableInfo_addEntry(&info->lits, lkey, value);
635
- }
636
- }
637
-
638
- /*
639
- * Adds the information about Ruby NODE to the NODEInfo struct.
640
- * It keeps the addresses of the node and its parents
641
- */
642
- static void NODEInfo_addNode(NODEInfo *info, NODE *node, NODE *pnode)
643
- {
644
- VALUE node_adr = value_to_str((VALUE) node);
645
- VALUE pnode_adr = value_to_str((VALUE) pnode);
646
- LeafTableInfo_addEntry(&info->nodes, node_adr, node_adr);
647
- LeafTableInfo_addEntry(&info->pnodes, node_adr, pnode_adr);
648
- }
649
-
650
- /*
651
- * Returns ID of the node using its address (VALUE)
652
- * It is used during the process of dumping Ruby AST to disk
653
- * for replacing of memory addresses into ordinals
654
- */
655
- static int NODEInfo_nodeAdrToID(NODEInfo *info, VALUE adr)
656
- {
657
- return LeafTableInfo_keyToID(&info->nodes, adr);
658
- }
659
-
660
- /*
661
- * Function counts number of nodes and fills NODEInfo struct
662
- * that is neccessary for the node saving to the HDD
663
- */
664
- static int count_num_of_nodes(NODE *node, NODE *parent, NODEInfo *info)
665
- {
666
- int ut[3], num, offset;
667
- if (node == 0)
668
- {
669
- return 0;
670
- }
671
- else if (TYPE((VALUE) node) != T_NODE)
672
- {
673
- rb_raise(rb_eArgError, "count_num_of_nodes: parent node %s: child node (ADR 0x%s) is not a node; Type: %d",
674
- ruby_node_name(nd_type(parent)), RSTRING_PTR(value_to_str((VALUE) node)), TYPE((VALUE) node));
675
- return 0;
676
- }
677
- else
678
- {
679
- offset = nd_type(node) * 3;
680
- ut[0] = nodes_ctbl[offset++];
681
- ut[1] = nodes_ctbl[offset++];
682
- ut[2] = nodes_ctbl[offset];
683
-
684
- if (nd_type(node) == NODE_OP_ASGN2 && nd_type(parent) == NODE_OP_ASGN2)
685
- {
686
- ut[0] = NT_ID;
687
- ut[1] = NT_ID;
688
- ut[2] = NT_ID;
689
- }
690
-
691
- /* Some Ruby 1.9.3 style function arguments (without rb_args_info) */
692
- if (nd_type(node) == NODE_ARGS_AUX)
693
- {
694
- ut[0] = NT_ID;
695
- ut[1] = (nd_type(parent) == NODE_ARGS_AUX) ? NT_LONG : NT_ID;
696
- ut[2] = NT_NODE;
697
-
698
- if (node->u1.value == 0) ut[0] = NT_NULL;
699
- if (node->u2.value == 0) ut[1] = NT_NULL;
700
- if (node->u3.value == 0) ut[2] = NT_NULL;
701
- }
702
- /* Some Ruby 1.9.3-specific code for NODE_ATTRASGN */
703
- if (nd_type(node) == NODE_ATTRASGN)
704
- {
705
- if (node->u1.value == 1) ut[0] = NT_LONG;
706
- }
707
- /* Check if there is information about child nodes types */
708
- if (ut[0] == NT_UNKNOWN || ut[1] == NT_UNKNOWN || ut[2] == NT_UNKNOWN)
709
- {
710
- rb_raise(rb_eArgError, "Cannot interpret node %d (%s)", nd_type(node), ruby_node_name(nd_type(node)));
711
- }
712
- /* Save the ID of the node */
713
- num = 1;
714
- NODEInfo_addNode(info, node, parent);
715
- /* Analyze node childs */
716
- /* a) child 1 */
717
- if (ut[0] == NT_NODE)
718
- {
719
- num += count_num_of_nodes(node->u1.node, node, info);
720
- }
721
- else if (ut[0] == NT_ID)
722
- {
723
- LeafTableInfo_addIDEntry(&info->syms, node->u1.id);
724
- }
725
- else if (ut[0] == NT_VALUE)
726
- {
727
- if (TYPE(node->u1.value) == T_NODE)
728
- rb_raise(rb_eArgError, "NODE instead of VALUE in child 1 of node %s", ruby_node_name(nd_type(node)));
729
- NODEInfo_addValue(info, node->u1.value);
730
- }
731
- else if (ut[0] == NT_IDTABLE)
732
- {
733
- VALUE tkey = value_to_str(node->u1.value);
734
- VALUE idtbl_ary = rb_ary_new();
735
- ID *idtbl = (ID *) node->u1.value;
736
- int i, size = (node->u1.value) ? *idtbl++ : 0;
737
- for (i = 0; i < size; i++)
738
- {
739
- ID sym = *idtbl++;
740
- rb_ary_push(idtbl_ary, INT2FIX(sym));
741
- LeafTableInfo_addIDEntry(&info->syms, sym);
742
- }
743
- LeafTableInfo_addEntry(&info->idtabs, tkey, idtbl_ary);
744
- }
745
- else if (ut[0] != NT_LONG && ut[0] != NT_NULL)
746
- {
747
- rb_raise(rb_eArgError, "1!");
748
- }
749
- /* b) child 2 */
750
- if (ut[1] == NT_NODE)
751
- {
752
- num += count_num_of_nodes(node->u2.node, node, info);
753
- }
754
- else if (ut[1] == NT_ID)
755
- {
756
- LeafTableInfo_addIDEntry(&info->syms, node->u2.id);
757
- }
758
- else if (ut[1] == NT_VALUE)
759
- {
760
- if (TYPE(node->u2.value) == T_NODE)
761
- rb_raise(rb_eArgError, "NODE instead of VALUE in child 2 of node %s", ruby_node_name(nd_type(node)));
762
- NODEInfo_addValue(info, node->u2.value);
763
- }
764
- else if (ut[1] != NT_LONG && ut[1] != NT_NULL)
765
- {
766
- rb_raise(rb_eArgError, "2!");
767
- }
768
-
769
- /* c) child 3 */
770
- if (ut[2] == NT_NODE)
771
- {
772
- num += count_num_of_nodes(node->u3.node, node, info);
773
- }
774
- else if (ut[2] == NT_ID)
775
- {
776
- LeafTableInfo_addIDEntry(&info->syms, node->u3.id);
777
- }
778
- else if (ut[2] == NT_ARGS)
779
- {
780
- #ifdef USE_RB_ARGS_INFO
781
- VALUE varg = Qtrue;
782
- struct rb_args_info *ainfo;
783
- ID asym;
784
- ainfo = node->u3.args;
785
- // Save child nodes
786
- num += count_num_of_nodes(ainfo->pre_init, node, info);
787
- num += count_num_of_nodes(ainfo->post_init, node, info);
788
- num += count_num_of_nodes(ainfo->kw_args, node, info);
789
- num += count_num_of_nodes(ainfo->kw_rest_arg, node, info);
790
- num += count_num_of_nodes(ainfo->opt_args, node, info);
791
- // Save rb_args_info structure content
792
- varg = rb_ary_new();
793
- rb_ary_push(varg, value_to_str((VALUE) ainfo->pre_init));
794
- rb_ary_push(varg, value_to_str((VALUE) ainfo->post_init));
795
- rb_ary_push(varg, INT2FIX(ainfo->pre_args_num));
796
- rb_ary_push(varg, INT2FIX(ainfo->post_args_num));
797
-
798
- asym = ainfo->first_post_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
799
- if (asym != 0)
800
- LeafTableInfo_addIDEntry(&info->syms, asym);
801
-
802
- asym = ainfo->rest_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
803
- if (asym != 0)
804
- LeafTableInfo_addIDEntry(&info->syms, asym);
805
-
806
- asym = ainfo->block_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
807
- if (asym != 0)
808
- LeafTableInfo_addIDEntry(&info->syms, asym);
809
- rb_ary_push(varg, value_to_str((VALUE) ainfo->kw_args));
810
- rb_ary_push(varg, value_to_str((VALUE) ainfo->kw_rest_arg));
811
- rb_ary_push(varg, value_to_str((VALUE) ainfo->opt_args));
812
-
813
- LeafTableInfo_addEntry(&info->args, value_to_str((VALUE) ainfo), varg);
814
- #else
815
- rb_raise(rb_eArgError, "NT_ARGS entry without USE_RB_ARGS_INFO");
816
- #endif
817
- }
818
- else if (ut[2] == NT_ENTRY)
819
- {
820
- ID gsym = node->u3.entry->id;
821
- // Save symbol to the symbol table
822
- int newid = LeafTableInfo_addIDEntry(&info->syms, gsym);
823
- LeafTableInfo_addEntry(&info->gentries, value_to_str(node->u3.value), INT2FIX(newid));
824
- }
825
- else if (ut[2] != NT_LONG && ut[2] != NT_NULL)
826
- {
827
- rb_raise(rb_eArgError, "Invalid child node 3 of node %s: TYPE %d, VALUE %"PRIxPTR,
828
- ruby_node_name(nd_type(node)), ut[2], (uintptr_t) (node->u3.value));
829
- }
830
-
831
- return num;
832
- }
833
- }
834
-
835
-
836
-
837
- //-------------------------------------------------------------------------
838
-
839
- /*
840
- * Part 4. Functions for loading marshalled nodes
841
- */
842
- typedef struct {
843
- ID *syms_adr; // Table of symbols
844
- int syms_len;
845
-
846
- VALUE *lits_adr; // Table of literals
847
- int lits_len;
848
-
849
- ID **idtbls_adr; // Table of symbols tables
850
- int idtbls_len;
851
-
852
- struct rb_global_entry **gvars_adr; // Table of global variables entries
853
- int gvars_len;
854
-
855
- NODE **nodes_adr; // Table of nodes
856
- int nodes_len;
857
- #ifdef USE_RB_ARGS_INFO
858
- struct rb_args_info **args_adr; // Table of code blocks arguments
859
- int args_len;
860
- #endif
861
- } NODEObjAddresses;
862
-
863
-
864
- void NODEObjAddresses_free(NODEObjAddresses *obj)
865
- {
866
- xfree(obj->syms_adr);
867
- xfree(obj->idtbls_adr);
868
- xfree(obj->gvars_adr);
869
- xfree(obj->nodes_adr);
870
- #ifdef USE_RB_ARGS_INFO
871
- xfree(obj->args_adr);
872
- #endif
873
- xfree(obj);
874
- }
875
-
876
-
877
-
878
- void rbstr_printf(VALUE str, const char *fmt, ...)
879
- {
880
- char buf[1024];
881
- va_list ptr;
882
-
883
- va_start(ptr, fmt);
884
- vsprintf(buf, fmt, ptr);
885
- rb_str_append(str, rb_str_new2(buf));
886
- va_end(ptr);
887
- }
888
-
889
- #define PRINT_NODE_TAB for (j = 0; j < tab; j++) rbstr_printf(str, " ");
890
- /*
891
- * Recursively transforms node into Ruby string
892
- * str -- output Ruby string
893
- * node -- input Ruby NODE
894
- * tab -- number of tabulations during print
895
- * show_offsets -- 0/1 show/hide addresses and symbol IDs
896
- */
897
- static void print_node(VALUE str, NODE *node, int tab, int show_offsets)
898
- {
899
- int i, j, type, ut[3];
900
- VALUE uref[3];
901
-
902
- PRINT_NODE_TAB
903
- if (node == NULL)
904
- {
905
- rbstr_printf(str, "(NULL)\n");
906
- return;
907
- }
908
- type = nd_type(node);
909
-
910
- if (show_offsets)
911
- {
912
- rbstr_printf(str, "@ %s | %16"PRIxPTR " | %16"PRIxPTR " %16"PRIxPTR " %16"PRIxPTR " (line %d)\n",
913
- ruby_node_name(type),
914
- (intptr_t) node,
915
- (intptr_t) node->u1.value, (intptr_t) node->u2.value, (intptr_t) node->u3.value,
916
- nd_line(node));
917
- }
918
- else
919
- {
920
- rbstr_printf(str, "@ %s (line %d)\n", ruby_node_name(type), nd_line(node));
921
- }
922
-
923
- ut[0] = nodes_ctbl[type * 3];
924
- ut[1] = nodes_ctbl[type * 3 + 1];
925
- ut[2] = nodes_ctbl[type * 3 + 2];
926
-
927
- uref[0] = node->u1.value;
928
- uref[1] = node->u2.value;
929
- uref[2] = node->u3.value;
930
-
931
- for (i = 0; i < 3; i++)
932
- {
933
-
934
- if (ut[i] == NT_NODE)
935
- {
936
- if (nd_type(node) != NODE_OP_ASGN2 || i != 2)
937
- print_node(str, RNODE(uref[i]), tab + 1, show_offsets);
938
- else
939
- {
940
- if (ut[i] != 0 && TYPE(ut[i]) != T_NODE)
941
- rb_raise(rb_eArgError, "print_node: broken node 0x%s", RSTRING_PTR(value_to_str(ut[i])));
942
- PRINT_NODE_TAB; rbstr_printf(str, " ");
943
- rbstr_printf(str, "%"PRIxPTR " %"PRIxPTR " %"PRIxPTR"\n",
944
- (intptr_t) RNODE(uref[i])->u1.value,
945
- (intptr_t) RNODE(uref[i])->u2.value,
946
- (intptr_t) RNODE(uref[i])->u3.value);
947
- }
948
- }
949
- else if (ut[i] == NT_VALUE)
950
- {
951
- char *class_name = RSTRING_PTR(rb_funcall(rb_funcall(uref[i], rb_intern("class"), 0), rb_intern("to_s"), 0));
952
- PRINT_NODE_TAB; rbstr_printf(str, " ");
953
- if (show_offsets)
954
- {
955
- rbstr_printf(str, ">| ADR: %"PRIxPTR"; CLASS: %s (TYPE %d); VALUE: %s\n",
956
- (intptr_t) uref[i],
957
- class_name, TYPE(uref[i]),
958
- RSTRING_PTR(rb_funcall(uref[i], rb_intern("to_s"), 0)));
959
- }
960
- else
961
- {
962
- rbstr_printf(str, ">| CLASS: %s (TYPE %d); VALUE: %s\n",
963
- class_name, TYPE(uref[i]),
964
- RSTRING_PTR(rb_funcall(uref[i], rb_intern("to_s"), 0)));
965
- }
966
- }
967
- else if (ut[i] == NT_ID)
968
- {
969
- const char *str_null = "<NULL>", *str_intern = "<NONAME>";
970
- const char *str_sym;
971
- PRINT_NODE_TAB; rbstr_printf(str, " ");
972
-
973
- if (uref[i] == 0)
974
- str_sym = str_null;
975
- else
976
- {
977
- VALUE rbstr_sym = rb_id2str(uref[i]);
978
- if (TYPE(rbstr_sym) == T_STRING)
979
- str_sym = RSTRING_PTR(rb_id2str(uref[i]));
980
- else
981
- str_sym = str_intern;
982
- }
983
-
984
- if (show_offsets)
985
- rbstr_printf(str, ">| ID: %d; SYMBOL: :%s\n", (ID) uref[i], str_sym);
986
- else
987
- rbstr_printf(str, ">| SYMBOL: :%s\n", str_sym);
988
- }
989
- else if (ut[i] == NT_LONG)
990
- {
991
- PRINT_NODE_TAB; rbstr_printf(str, " ");
992
- rbstr_printf(str, ">| %"PRIxPTR "\n", (intptr_t) uref[i]);
993
- }
994
- else if (ut[i] == NT_NULL)
995
- {
996
- PRINT_NODE_TAB; rbstr_printf(str, " ");
997
- rbstr_printf(str, ">| (NULL)\n");
998
- }
999
- else if (ut[i] == NT_ARGS)
1000
- {
1001
- PRINT_NODE_TAB; rbstr_printf(str, " ");
1002
- rbstr_printf(str, ">| ARGS\n");
1003
- }
1004
- else if (ut[i] == NT_IDTABLE)
1005
- {
1006
- PRINT_NODE_TAB; rbstr_printf(str, " ");
1007
- rbstr_printf(str, ">| IDTABLE\n");
1008
- }
1009
- else if (ut[i] == NT_ENTRY)
1010
- {
1011
- struct rb_global_entry *gentry;
1012
- gentry = (struct rb_global_entry *) uref[i];
1013
- PRINT_NODE_TAB; rbstr_printf(str, " ");
1014
- rbstr_printf(str, ">| [GLOBAL ENTRY PTR=0x%"PRIxPTR" ID=%X]\n", (uintptr_t) gentry->var, gentry->id);
1015
- }
1016
- else
1017
- {
1018
- PRINT_NODE_TAB; rbstr_printf(str, " ");
1019
- rbstr_printf(str, ">| [UNKNOWN]\n");
1020
- }
1021
- }
1022
- }
1023
-
1024
-
1025
-
1026
- void resolve_syms_ords(VALUE data, NODEObjAddresses *relocs)
1027
- {
1028
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("symbols")));
1029
- int i;
1030
- if (tbl_val == Qnil)
1031
- {
1032
- rb_raise(rb_eArgError, "Cannot find symbols table");
1033
- }
1034
- if (TYPE(tbl_val) != T_ARRAY)
1035
- {
1036
- rb_raise(rb_eArgError, "Symbols table is not an array");
1037
- }
1038
- relocs->syms_len = RARRAY_LEN(tbl_val);
1039
- relocs->syms_adr = ALLOC_N(ID, relocs->syms_len);
1040
- for (i = 0; i < relocs->syms_len; i++)
1041
- {
1042
- VALUE r_sym = RARRAY_PTR(tbl_val)[i];
1043
- if (TYPE(r_sym) == T_STRING)
1044
- { /* Created symbol will be immune to garbage collector */
1045
- relocs->syms_adr[i] = rb_intern(RSTRING_PTR(r_sym));
1046
- }
1047
- else if (TYPE(r_sym) == T_FIXNUM)
1048
- {
1049
- relocs->syms_adr[i] = (ID) FIX2INT(r_sym);
1050
- }
1051
- else
1052
- {
1053
- rb_raise(rb_eArgError, "Symbols table is corrupted");
1054
- }
1055
- }
1056
- }
1057
-
1058
- void resolve_lits_ords(VALUE data, NODEObjAddresses *relocs)
1059
- {
1060
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("literals")));
1061
- int i;
1062
- if (tbl_val == Qnil)
1063
- {
1064
- rb_raise(rb_eArgError, "Cannot find literals table");
1065
- }
1066
- if (TYPE(tbl_val) != T_ARRAY)
1067
- {
1068
- rb_raise(rb_eArgError, "Literals table is not an array");
1069
- }
1070
- relocs->lits_adr = RARRAY_PTR(tbl_val);
1071
- relocs->lits_len = RARRAY_LEN(tbl_val);
1072
- /* Mark all symbols as "immortal" (i.e. not collectable
1073
- by Ruby GC): some of them can be used in the syntax tree!
1074
- See the presentation of Narihiro Nakamura, author of
1075
- symbol GC in Ruby 2.x for details
1076
- http://www.slideshare.net/authorNari/symbol-gc */
1077
- for (i = 0; i < relocs->lits_len; i++)
1078
- {
1079
- if (TYPE(relocs->lits_adr[i]) == T_SYMBOL)
1080
- {
1081
- SYM2ID(relocs->lits_adr[i]);
1082
- }
1083
- }
1084
- }
1085
-
1086
- void resolve_gvars_ords(VALUE data, NODEObjAddresses *relocs)
1087
- {
1088
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("global_entries")));
1089
- int i;
1090
-
1091
- if (tbl_val == Qnil)
1092
- {
1093
- rb_raise(rb_eArgError, "Cannot find global entries table");
1094
- }
1095
- if (TYPE(tbl_val) != T_ARRAY)
1096
- {
1097
- rb_raise(rb_eArgError, "Global entries table should be an array");
1098
- }
1099
- relocs->gvars_len = RARRAY_LEN(tbl_val);
1100
- relocs->gvars_adr = ALLOC_N(struct rb_global_entry *, relocs->gvars_len);
1101
- for (i = 0; i < relocs->gvars_len; i++)
1102
- {
1103
- int ind = FIX2INT(RARRAY_PTR(tbl_val)[i]);
1104
- ID sym = relocs->syms_adr[ind];
1105
- relocs->gvars_adr[i] = rb_global_entry(sym);
1106
- }
1107
- }
1108
-
1109
-
1110
- void resolve_idtbls_ords(VALUE data, NODEObjAddresses *relocs)
1111
- {
1112
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("id_tables")));
1113
- int i, j, idnum;
1114
-
1115
- if (tbl_val == Qnil)
1116
- {
1117
- rb_raise(rb_eArgError, "Cannot find id_tables entries");
1118
- }
1119
- relocs->idtbls_len = RARRAY_LEN(tbl_val);
1120
- relocs->idtbls_adr = ALLOC_N(ID *, relocs->idtbls_len);
1121
- for (i = 0; i < relocs->idtbls_len; i++)
1122
- {
1123
- VALUE idtbl = RARRAY_PTR(tbl_val)[i];
1124
- idnum = RARRAY_LEN(idtbl);
1125
- if (idnum == 0)
1126
- { // Empty table: NULL pointer in the address table
1127
- relocs->idtbls_adr[i] = NULL;
1128
- }
1129
- else
1130
- { // Filled table: pointer to dynamic memory
1131
- relocs->idtbls_adr[i] = ALLOC_N(ID, idnum + 1);
1132
- relocs->idtbls_adr[i][0] = idnum;
1133
- for (j = 0; j < idnum; j++)
1134
- {
1135
- int ind = FIX2INT(RARRAY_PTR(idtbl)[j]);
1136
- relocs->idtbls_adr[i][j+1] = relocs->syms_adr[ind];
1137
- }
1138
- }
1139
- }
1140
- }
1141
-
1142
- void resolve_nodes_ords(VALUE data, int num_of_nodes, NODEObjAddresses *relocs)
1143
- {
1144
- int i;
1145
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("nodes")));
1146
- if (tbl_val == Qnil)
1147
- {
1148
- rb_raise(rb_eArgError, "Cannot find nodes entries");
1149
- }
1150
- if (TYPE(tbl_val) != T_STRING)
1151
- {
1152
- rb_raise(rb_eArgError, "Nodes description must be a string");
1153
- }
1154
- relocs->nodes_adr = ALLOC_N(NODE *, num_of_nodes);
1155
- relocs->nodes_len = num_of_nodes;
1156
- for (i = 0; i < num_of_nodes; i++)
1157
- {
1158
- relocs->nodes_adr[i] = (NODE *) NEW_NODE((enum node_type) 0, 0, 0, 0);
1159
- }
1160
- }
1161
-
1162
- #ifdef USE_RB_ARGS_INFO
1163
- void resolve_args_ords(VALUE data, NODEObjAddresses *relocs)
1164
- {
1165
- int i;
1166
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("args")));
1167
-
1168
- if (tbl_val == Qnil)
1169
- {
1170
- rb_raise(rb_eArgError, "Cannot find args entries table");
1171
- }
1172
- if (TYPE(tbl_val) != T_ARRAY)
1173
- {
1174
- rb_raise(rb_eArgError, "args description must be an array");
1175
- }
1176
- relocs->args_len = RARRAY_LEN(tbl_val);
1177
- relocs->args_adr = ALLOC_N(struct rb_args_info *, relocs->args_len);
1178
- for (i = 0; i < relocs->args_len; i++)
1179
- {
1180
- int ord;
1181
- VALUE ainfo_val, *aiptr;
1182
- struct rb_args_info *ainfo;
1183
-
1184
- relocs->args_adr[i] = ALLOC(struct rb_args_info);
1185
- ainfo_val = RARRAY_PTR(tbl_val)[i];
1186
- aiptr = RARRAY_PTR(ainfo_val);
1187
- ainfo = relocs->args_adr[i];
1188
-
1189
- if (TYPE(ainfo_val) != T_ARRAY || RARRAY_LEN(ainfo_val) != 10)
1190
- {
1191
- rb_raise(rb_eArgError, "args entry %d is corrupted", i);
1192
- }
1193
- // Load unresolved values
1194
- ainfo->pre_init = (NODE *) (uintptr_t) FIX2LONG(aiptr[0]); // Node ordinal
1195
- ainfo->post_init = (NODE *) (uintptr_t) FIX2LONG(aiptr[1]); // Node ordinal
1196
- ainfo->pre_args_num = FIX2INT(aiptr[2]); // No ordinal resolving
1197
- ainfo->post_args_num = FIX2INT(aiptr[3]); // No ordinal resolving
1198
- ainfo->first_post_arg = FIX2INT(aiptr[4]); // Symbolic ordinal
1199
- ainfo->rest_arg = FIX2INT(aiptr[5]); // Symbolic ordinal
1200
- ainfo->block_arg = FIX2INT(aiptr[6]); // Symbolic ordinal
1201
- ainfo->kw_args = (NODE *) (uintptr_t) FIX2LONG(aiptr[7]); // Node ordinal
1202
- ainfo->kw_rest_arg = (NODE *) (uintptr_t) FIX2LONG(aiptr[8]); // Node ordinal
1203
- ainfo->opt_args = (NODE *) (uintptr_t) FIX2LONG(aiptr[9]); // Node ordinal
1204
- // Resolve nodes
1205
- ord = (int) (((VALUE) ainfo->pre_init) & 0xFFFFFFFF);
1206
- if (ord < -1 || ord >= relocs->nodes_len)
1207
- rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1208
- ainfo->pre_init = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1209
-
1210
- ord = (int) (((VALUE) ainfo->post_init) & 0xFFFFFFFF);
1211
- if (ord < -1 || ord >= relocs->nodes_len)
1212
- rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1213
- ainfo->post_init = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1214
-
1215
- ord = (int) (((VALUE) ainfo->kw_args) & 0xFFFFFFFF);
1216
- if (ord < -1 || ord >= relocs->nodes_len)
1217
- rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1218
- ainfo->kw_args = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1219
-
1220
- ord = (int) (((VALUE) ainfo->kw_rest_arg) & 0xFFFFFFFF);
1221
- if (ord < -1 || ord >= relocs->nodes_len)
1222
- rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1223
- ainfo->kw_rest_arg = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1224
-
1225
- ord = (int) (((VALUE) ainfo->opt_args) & 0xFFFFFFFF);
1226
- if (ord < -1 || ord >= relocs->nodes_len)
1227
- rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1228
- ainfo->opt_args = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1229
- // Resolve symbolic ordinals
1230
- ord = ainfo->first_post_arg;
1231
- if (ord < -1 || ord >= relocs->syms_len)
1232
- rb_raise(rb_eArgError, "1- Invalid symbol ID ordinal %d", ord);
1233
- ainfo->first_post_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1234
-
1235
- ord = ainfo->rest_arg;
1236
- if (ord < -1 || ord >= relocs->syms_len)
1237
- rb_raise(rb_eArgError, "2- Invalid symbol ID ordinal %d", ord);
1238
- ainfo->rest_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1239
-
1240
- ord = ainfo->block_arg;
1241
- if (ord < -1 || ord >= relocs->syms_len)
1242
- rb_raise(rb_eArgError, "3- Invalid symbol ID ordinal %d", ord);
1243
- ainfo->block_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1244
- }
1245
- }
1246
- #endif
1247
-
1248
- /*
1249
- * Transforms binary data with nodes descriptions into Ruby AST (i.e.
1250
- * ternary tree of nodes). Each node is represented in the next binary format:
1251
- *
1252
- * [4 bytes -- pointers info] [node flags] [child ORD1] [child ORD2] [child ORD3]
1253
- *
1254
- * Pointers info:
1255
- * BYTE -- child 1 info (bits 7..4 -- ordinal type, bits 3..0 -- ordinal size, bytes)
1256
- * BYTE -- child 2 info
1257
- * BYTE -- child 3 info
1258
- * BYTE -- node flags length, bytes
1259
- * Node flags:
1260
- * node->flags field packed by bin_to_value function
1261
- * child ORDi Ordinal of ith node child packed by bin_to_value_function
1262
- * (it will be transformed to the real address in memory, i.e. pointer
1263
- * or symbol ID during data loading)
1264
- */
1265
- void load_nodes_from_str(VALUE data, NODEObjAddresses *relocs)
1266
- {
1267
- int i, j;
1268
- VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("nodes")));
1269
- unsigned char *bin = (unsigned char *) RSTRING_PTR(tbl_val);
1270
- NODE *node = NULL;
1271
- for (i = 0; i < relocs->nodes_len; i++)
1272
- {
1273
- int rtypes[4];
1274
- VALUE u[3], flags;
1275
- // Read data structure info
1276
- for (j = 0; j < 4; j++)
1277
- rtypes[j] = *bin++;
1278
- flags = bin_to_value(bin, rtypes[3]); bin += rtypes[3];
1279
- for (j = 0; j < 3; j++)
1280
- {
1281
- int val_len = (rtypes[j] & 0xF0) >> 4;
1282
- u[j] = bin_to_value(bin, val_len);
1283
- bin += val_len;
1284
- rtypes[j] &= 0x0F;
1285
-
1286
- }
1287
- if ((char *)bin - RSTRING_PTR(tbl_val) > RSTRING_LEN(tbl_val))
1288
- rb_raise(rb_eArgError, "Nodes binary dump is too short");
1289
- // Resolving all addresses
1290
- for (j = 0; j < 3; j++)
1291
- {
1292
- switch(rtypes[j])
1293
- {
1294
- case VL_RAW: // Do nothing: it is raw data
1295
- break;
1296
- case VL_NODE:
1297
- if (u[j] >= (unsigned int) relocs->nodes_len)
1298
- rb_raise(rb_eArgError, "Cannot resolve VL_NODE entry %d", (int) u[j]);
1299
- u[j] = (VALUE) relocs->nodes_adr[u[j]];
1300
- if (TYPE(u[j]) != T_NODE)
1301
- rb_raise(rb_eArgError, "load_nodes_from_str: nodes memory corrupted");
1302
- break;
1303
- case VL_ID:
1304
- if (u[j] >= (unsigned int) relocs->syms_len)
1305
- rb_raise(rb_eArgError, "Cannot resolve VL_ID entry %d", (int) u[j]);
1306
- u[j] = relocs->syms_adr[u[j]];
1307
- break;
1308
- case VL_GVAR:
1309
- if (u[j] >= (unsigned int) relocs->gvars_len)
1310
- rb_raise(rb_eArgError, "Cannot resolve VL_GVAR entry %d", (int) u[j]);
1311
- u[j] = (VALUE) relocs->gvars_adr[u[j]];
1312
- break;
1313
- case VL_IDTABLE:
1314
- if (u[j] >= (unsigned int) relocs->idtbls_len)
1315
- rb_raise(rb_eArgError, "Cannot resolve VL_IDTABLE entry %d", (int) u[j]);
1316
- u[j] = (VALUE) relocs->idtbls_adr[u[j]];
1317
- break;
1318
- #ifdef USE_RB_ARGS_INFO
1319
- case VL_ARGS:
1320
- if (u[j] >= (unsigned int) relocs->args_len)
1321
- rb_raise(rb_eArgError, "Cannot resolve VL_ARGS entry %d", (int) u[j]);
1322
- u[j] = (VALUE) relocs->args_adr[u[j]];
1323
- break;
1324
- #endif
1325
- case VL_LIT:
1326
- if (u[j] >= (unsigned int) relocs->lits_len)
1327
- rb_raise(rb_eArgError, "Cannot resolve VL_LIT entry %d", (int) u[j]);
1328
- u[j] = (VALUE) relocs->lits_adr[u[j]];
1329
- break;
1330
- default:
1331
- rb_raise(rb_eArgError, "Unknown RTYPE %d", rtypes[j]);
1332
- }
1333
- }
1334
-
1335
- // Fill classic node structure
1336
- node = relocs->nodes_adr[i];
1337
- #ifdef RESET_GC_FLAGS
1338
- flags = flags & (~0x3); // Ruby 1.9.x -- specific thing
1339
- #endif
1340
- node->flags = (flags << 5) | T_NODE;
1341
- node->nd_reserved = 0;
1342
- node->u1.value = u[0];
1343
- node->u2.value = u[1];
1344
- node->u3.value = u[2];
1345
- }
1346
- }
1347
-
1348
- /*
1349
- * Returns the value of string hash field using symbolic key
1350
- */
1351
- static VALUE get_hash_strfield(VALUE hash, const char *idtxt)
1352
- {
1353
- VALUE str = rb_hash_aref(hash, ID2SYM(rb_intern(idtxt)));
1354
- if (TYPE(str) != T_STRING)
1355
- {
1356
- rb_raise(rb_eArgError, "Hash field %s is not a string", idtxt);
1357
- return Qnil;
1358
- }
1359
- else
1360
- {
1361
- return str;
1362
- }
1363
- }
1364
-
1365
- /*
1366
- * Check validity of node hash representation signatures ("magic" values)
1367
- */
1368
- static VALUE check_hash_magic(VALUE data)
1369
- {
1370
- VALUE val, refval;
1371
- // MAGIC signature must be valid
1372
- val = get_hash_strfield(data, "MAGIC");
1373
- if (strcmp(NODEMARSHAL_MAGIC, RSTRING_PTR(val)))
1374
- rb_raise(rb_eArgError, "Bad value of MAGIC signature");
1375
- // RUBY_PLATFORM signature must match the current platform
1376
- val = get_hash_strfield(data, "RUBY_PLATFORM");
1377
- refval = rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM"));
1378
- if (strcmp(RSTRING_PTR(refval), RSTRING_PTR(val)))
1379
- rb_raise(rb_eArgError, "Incompatible RUBY_PLATFORM value %s", RSTRING_PTR(val));
1380
- // RUBY_VERSION signature must match the used Ruby interpreter
1381
- val = get_hash_strfield(data, "RUBY_VERSION");
1382
- refval = rb_const_get(rb_cObject, rb_intern("RUBY_VERSION"));
1383
- if (strcmp(RSTRING_PTR(refval), RSTRING_PTR(val)))
1384
- rb_raise(rb_eArgError, "Incompatible RUBY_VERSION value %s", RSTRING_PTR(val));
1385
- return Qtrue;
1386
- }
1387
-
1388
- /*
1389
- * Part 5. C-to-Ruby interface
1390
- *
1391
- */
1392
-
1393
- /*
1394
- * Restore Ruby node from the binary blob (dump)
1395
- */
1396
- static VALUE m_nodedump_from_memory(VALUE self, VALUE dump)
1397
- {
1398
- VALUE cMarshal, data, val, val_relocs;
1399
- VALUE gc_was_disabled;
1400
- int num_of_nodes;
1401
- NODEObjAddresses *relocs;
1402
- /* DISABLE GARBAGE COLLECTOR (required for stable loading
1403
- of large node trees */
1404
- gc_was_disabled = rb_gc_disable();
1405
- /* Wrap struct for relocations */
1406
- val_relocs = Data_Make_Struct(cNodeObjAddresses, NODEObjAddresses,
1407
- NULL, NODEObjAddresses_free, relocs); // This data envelope cannot exist without NODE
1408
- /* Load and unpack our dump */
1409
- cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
1410
- data = rb_funcall(cMarshal, rb_intern("load"), 1, dump);
1411
- if (TYPE(data) != T_HASH)
1412
- {
1413
- rb_raise(rb_eArgError, "Input dump is corrupted");
1414
- }
1415
- val = rb_hash_aref(data, ID2SYM(rb_intern("num_of_nodes")));
1416
- if (val == Qnil)
1417
- {
1418
- rb_raise(rb_eArgError, "num_of_nodes not found");
1419
- }
1420
- else
1421
- {
1422
- num_of_nodes = FIX2INT(val);
1423
- }
1424
- /* Check "magic" signature and platform identifiers */
1425
- check_hash_magic(data);
1426
- /* Get the information about the source file that was compiled to the node */
1427
- // a) node name
1428
- val = rb_hash_aref(data, ID2SYM(rb_intern("nodename")));
1429
- if (val == Qnil || TYPE(val) == T_STRING)
1430
- rb_iv_set(self, "@nodename", val);
1431
- else
1432
- rb_raise(rb_eArgError, "nodename value is corrupted");
1433
- // b) file name
1434
- val = rb_hash_aref(data, ID2SYM(rb_intern("filename")));
1435
- if (val == Qnil || TYPE(val) == T_STRING)
1436
- rb_iv_set(self, "@filename", val);
1437
- else
1438
- rb_raise(rb_eArgError, "filename value is corrupted");
1439
- // c) file path
1440
- val = rb_hash_aref(data, ID2SYM(rb_intern("filepath")));
1441
- if (val == Qnil || TYPE(val) == T_STRING)
1442
- rb_iv_set(self, "@filepath", val);
1443
- else
1444
- rb_raise(rb_eArgError, "filepath value is corrupted");
1445
- /* Load all required data */
1446
- resolve_syms_ords(data, relocs); // Symbols
1447
- resolve_lits_ords(data, relocs); // Literals
1448
- resolve_gvars_ords(data, relocs); // Global entries (with symbol ID resolving)
1449
- resolve_idtbls_ords(data, relocs); // Identifiers tables (with symbol ID resolving)
1450
- resolve_nodes_ords(data, num_of_nodes, relocs); // Allocate memory for all nodes
1451
- #ifdef USE_RB_ARGS_INFO
1452
- resolve_args_ords(data, relocs); // Load args entries with symbols ID and nodes resolving
1453
- #endif
1454
- load_nodes_from_str(data, relocs);
1455
- /* Save the loaded node tree and collect garbage */
1456
- rb_iv_set(self, "@node", (VALUE) relocs->nodes_adr[0]);
1457
- rb_iv_set(self, "@num_of_nodes", INT2FIX(num_of_nodes));
1458
- rb_iv_set(self, "@obj_addresses", val_relocs);
1459
- if (gc_was_disabled == Qfalse)
1460
- {
1461
- rb_gc_enable();
1462
- rb_gc_start();
1463
- }
1464
- return self;
1465
- }
1466
-
1467
-
1468
- /*
1469
- * call-seq:
1470
- * obj.symbols
1471
- *
1472
- * Return array with the list of symbols
1473
- */
1474
- static VALUE m_nodedump_symbols(VALUE self)
1475
- {
1476
- int i;
1477
- VALUE val_relocs, val_nodeinfo, syms;
1478
- // Variant 1: node loaded from file
1479
- val_relocs = rb_iv_get(self, "@obj_addresses");
1480
- if (val_relocs != Qnil)
1481
- {
1482
- NODEObjAddresses *relocs;
1483
- Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
1484
- syms = rb_ary_new();
1485
- for (i = 0; i < relocs->syms_len; i++)
1486
- rb_ary_push(syms, ID2SYM(relocs->syms_adr[i]));
1487
- return syms;
1488
- }
1489
- // Variant 2: node saved to file (parsed from memory)
1490
- val_nodeinfo = rb_iv_get(self, "@nodeinfo");
1491
- if (val_nodeinfo != Qnil)
1492
- {
1493
- NODEInfo *ninfo;
1494
- VALUE *ary;
1495
- Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
1496
- syms = rb_funcall(ninfo->syms.vals, rb_intern("values"), 0);
1497
- ary = RARRAY_PTR(syms);
1498
- for (i = 0; i < RARRAY_LEN(syms); i++)
1499
- {
1500
- ary[i] = rb_funcall(ary[i], rb_intern("to_sym"), 0);
1501
- }
1502
- return syms;
1503
- }
1504
- rb_raise(rb_eArgError, "Symbol information not initialized. Run to_hash before reading.");
1505
- }
1506
-
1507
- /*
1508
- * call-seq:
1509
- * obj.change_symbol(old_sym, new_sym)
1510
- *
1511
- * Replace one symbol by another (to be used for code obfuscation)
1512
- * - +old_sym+ -- String that contains symbol name to be replaced
1513
- * - +new_sym+ -- String that contains new name of the symbol
1514
- */
1515
- static VALUE m_nodedump_change_symbol(VALUE self, VALUE old_sym, VALUE new_sym)
1516
- {
1517
- VALUE val_nodehash = rb_iv_get(self, "@nodehash");
1518
- VALUE syms, key;
1519
- // Check if node is position-independent
1520
- // (i.e. with initialized NODEInfo structure that contains
1521
- // relocations for symbols)
1522
- if (val_nodehash == Qnil)
1523
- rb_raise(rb_eArgError, "This node is not preparsed into Hash");
1524
- // Check data types of the input array
1525
- if (TYPE(old_sym) != T_STRING)
1526
- {
1527
- rb_raise(rb_eArgError, "old_sym argument must be a string");
1528
- }
1529
- if (TYPE(new_sym) != T_STRING)
1530
- {
1531
- rb_raise(rb_eArgError, "new_sym argument must be a string");
1532
- }
1533
- // Get the symbol table from the Hash
1534
- syms = rb_hash_aref(val_nodehash, ID2SYM(rb_intern("symbols")));
1535
- if (syms == Qnil)
1536
- rb_raise(rb_eArgError, "Preparsed hash has no :symbols field");
1537
- // Check if new_sym is present in the symbol table
1538
- key = rb_funcall(syms, rb_intern("find_index"), 1, new_sym);
1539
- if (key != Qnil)
1540
- {
1541
- rb_raise(rb_eArgError, "new_sym value must be absent in table of symbols");
1542
- }
1543
- // Change the symbol in the preparsed Hash
1544
- key = rb_funcall(syms, rb_intern("find_index"), 1, old_sym);
1545
- if (key == Qnil)
1546
- return Qnil;
1547
- RARRAY_PTR(syms)[FIX2INT(key)] = new_sym;
1548
- return self;
1549
- }
1550
-
1551
- /*
1552
- * Return array with the list of literals
1553
- */
1554
- static VALUE m_nodedump_literals(VALUE self)
1555
- {
1556
- int i;
1557
- VALUE val_relocs, val_nodeinfo, lits;
1558
- // Variant 1: node loaded from file. It uses NODEObjAddresses struct
1559
- // with the results of Ruby NODE structure parsing.
1560
- val_relocs = rb_iv_get(self, "@obj_addresses");
1561
- if (val_relocs != Qnil)
1562
- {
1563
- NODEObjAddresses *relocs;
1564
-
1565
- Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
1566
- lits = rb_ary_new();
1567
- for (i = 0; i < relocs->lits_len; i++)
1568
- {
1569
- VALUE val = relocs->lits_adr[i];
1570
- int t = TYPE(val);
1571
- if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
1572
- val = rb_funcall(val, rb_intern("dup"), 0);
1573
- rb_ary_push(lits, val);
1574
- }
1575
- return lits;
1576
- }
1577
- // Variant 2: node saved to file (parsed from memory). It uses
1578
- // NODEInfo struct that is initialized during node dump parsing.
1579
- val_nodeinfo = rb_iv_get(self, "@nodeinfo");
1580
- if (val_nodeinfo != Qnil)
1581
- {
1582
- NODEInfo *ninfo;
1583
- VALUE *ary;
1584
- Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
1585
- lits = rb_funcall(ninfo->lits.vals, rb_intern("values"), 0);
1586
- ary = RARRAY_PTR(lits);
1587
- for (i = 0; i < RARRAY_LEN(lits); i++)
1588
- {
1589
- int t = TYPE(ary[i]);
1590
- if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
1591
- ary[i] = rb_funcall(ary[i], rb_intern("dup"), 0);
1592
- }
1593
- return lits;
1594
- }
1595
- rb_raise(rb_eArgError, "Literals information not initialized. Run to_hash before reading.");
1596
- }
1597
-
1598
- /*
1599
- * Update the array with the list of literals
1600
- * (to be used for code obfuscation)
1601
- * Warning! This function is a stub!
1602
- */
1603
- static VALUE m_nodedump_change_literal(VALUE self, VALUE old_lit, VALUE new_lit)
1604
- {
1605
- /* TO BE IMPLEMENTED */
1606
- return self;
1607
- }
1608
-
1609
-
1610
- /*
1611
- * call-seq:
1612
- * obj.compile
1613
- *
1614
- * Creates the RubyVM::InstructionSequence object from the node
1615
- */
1616
- static VALUE m_nodedump_compile(VALUE self)
1617
- {
1618
- NODE *node = RNODE(rb_iv_get(self, "@node"));
1619
- VALUE nodename = rb_iv_get(self, "@nodename");
1620
- VALUE filename = rb_iv_get(self, "@filename");
1621
- VALUE filepath = rb_iv_get(self, "@filepath");
1622
- #ifndef WITH_RB_ISEQW_NEW
1623
- /* For Pre-2.3 */
1624
- return rb_iseq_new_top(node, nodename, filename, filepath, Qfalse);
1625
- #else
1626
- /* For Ruby 2.3 */
1627
- return rb_iseqw_new(rb_iseq_new_top(node, nodename, filename, filepath, Qfalse));
1628
- #endif
1629
- }
1630
-
1631
- /*
1632
- * Parses Ruby file with the source code and saves the node
1633
- */
1634
- static VALUE m_nodedump_from_source(VALUE self, VALUE file)
1635
- {
1636
- VALUE line = INT2FIX(1), f, node, filepath, gc_was_disabled;
1637
- const char *fname;
1638
-
1639
- gc_was_disabled = rb_gc_disable();
1640
- rb_secure(1);
1641
- FilePathValue(file);
1642
- fname = StringValueCStr(file);
1643
- /* Remember information about the file */
1644
- rb_iv_set(self, "@nodename", rb_str_new2("<main>"));
1645
- rb_iv_set(self, "@filename", file);
1646
- filepath = rb_funcall(rb_cFile, rb_intern("realpath"), 1, file); // Envelope for rb_realpath_internal
1647
- rb_iv_set(self, "@filepath", filepath);
1648
- /* Create node from the source */
1649
- f = rb_file_open_str(file, "r");
1650
- node = (VALUE) rb_compile_file(fname, f, NUM2INT(line));
1651
- rb_iv_set(self, "@node", node);
1652
- if ((void *) node == NULL)
1653
- {
1654
- rb_raise(rb_eArgError, "Error during string parsing");
1655
- }
1656
- if (gc_was_disabled == Qfalse)
1657
- {
1658
- rb_gc_enable();
1659
- }
1660
- return self;
1661
- }
1662
-
1663
- /*
1664
- * Parses Ruby string with the source code and saves the node
1665
- */
1666
- static VALUE m_nodedump_from_string(VALUE self, VALUE str)
1667
- {
1668
- VALUE line = INT2FIX(1), node, gc_was_disabled;
1669
- const char *fname = "STRING";
1670
- Check_Type(str, T_STRING);
1671
- gc_was_disabled = rb_gc_disable();
1672
- rb_secure(1);
1673
- /* Create empty information about the file */
1674
- rb_iv_set(self, "@nodename", rb_str_new2("<main>"));
1675
- if (RUBY_API_VERSION_MAJOR == 1)
1676
- { /* For Ruby 1.9.x */
1677
- rb_iv_set(self, "@filename", Qnil);
1678
- rb_iv_set(self, "@filepath", Qnil);
1679
- }
1680
- else
1681
- { /* For Ruby 2.x */
1682
- rb_iv_set(self, "@filename", rb_str_new2("<compiled>"));
1683
- rb_iv_set(self, "@filepath", rb_str_new2("<compiled>"));
1684
- }
1685
- /* Create node from the string */
1686
- node = (VALUE) rb_compile_string(fname, str, NUM2INT(line));
1687
- rb_iv_set(self, "@node", node);
1688
- if (gc_was_disabled == Qfalse)
1689
- {
1690
- rb_gc_enable();
1691
- rb_gc_start();
1692
- }
1693
- if ((void *) node == NULL)
1694
- {
1695
- rb_raise(rb_eArgError, "Error during string parsing");
1696
- }
1697
- return self;
1698
- }
1699
-
1700
- /*
1701
- * call-seq:
1702
- * obj.new(:srcfile, filename) # Will load source file from the disk
1703
- * obj.new(:binfile, filename) # Will load file with node binary dump from the disk
1704
- * obj.new(:srcmemory, srcstr) # Will load source code from the string
1705
- * obj.new(:binmemory, binstr) # Will load node binary dump from the string
1706
- *
1707
- * Creates NodeMarshal class example from the source code or dumped
1708
- * syntax tree (NODEs), i.e. preparsed and packed source code. Created
1709
- * object can be used either for code execution or for saving it
1710
- * in the preparsed form (useful for code obfuscation/protection)
1711
- */
1712
- static VALUE m_nodedump_init(VALUE self, VALUE source, VALUE info)
1713
- {
1714
- ID id_usr;
1715
- rb_iv_set(self, "@show_offsets", Qfalse);
1716
- Check_Type(source, T_SYMBOL);
1717
- id_usr = SYM2ID(source);
1718
- if (id_usr == rb_intern("srcfile"))
1719
- {
1720
- return m_nodedump_from_source(self, info);
1721
- }
1722
- else if (id_usr == rb_intern("srcmemory"))
1723
- {
1724
- return m_nodedump_from_string(self, info);
1725
- }
1726
- else if (id_usr == rb_intern("binmemory"))
1727
- {
1728
- return m_nodedump_from_memory(self, info);
1729
- }
1730
- else if (id_usr == rb_intern("binfile"))
1731
- {
1732
- VALUE cFile = rb_const_get(rb_cObject, rb_intern("File"));
1733
- VALUE bin = rb_funcall(cFile, rb_intern("binread"), 1, info);
1734
- return m_nodedump_from_memory(self, bin);
1735
- }
1736
- else
1737
- {
1738
- rb_raise(rb_eArgError, "Invalid source type (it must be :srcfile, :srcmemory, :binmemory of :binfile)");
1739
- }
1740
- return Qnil;
1741
- }
1742
-
1743
- /*
1744
- * call-seq:
1745
- * obj.dump_tree
1746
- *
1747
- * Transforms Ruby syntax tree (NODE) to the String using
1748
- * +rb_parser_dump_tree+ function from +node.c+ (see Ruby source code).
1749
- */
1750
- static VALUE m_nodedump_parser_dump_tree(VALUE self)
1751
- {
1752
- NODE *node = RNODE(rb_iv_get(self, "@node"));
1753
- return rb_parser_dump_tree(node, 0);
1754
- }
1755
-
1756
- /*
1757
- * call-seq:
1758
- * obj.dump_tree_short
1759
- *
1760
- * Transforms Ruby syntax tree (NODE) to the String using custom function
1761
- * instead of +rb_parser_dump_tree+ function.
1762
- *
1763
- * See also #show_offsets, #show_offsets=
1764
- */
1765
- static VALUE m_nodedump_dump_tree_short(VALUE self)
1766
- {
1767
- VALUE str = rb_str_new2(""); // Output string
1768
- NODE *node = RNODE(rb_iv_get(self, "@node"));
1769
- int show_offsets = (rb_iv_get(self, "@show_offsets") == Qtrue) ? 1 : 0;
1770
- print_node(str, node, 0, show_offsets);
1771
- return str;
1772
- }
1773
-
1774
- /*
1775
- * call-seq:
1776
- * obj.show_offsets
1777
- *
1778
- * Returns show_offsets property (used by NodeMarshal#dump_tree_short)
1779
- * It can be either true or false
1780
- */
1781
- static VALUE m_nodedump_show_offsets(VALUE self)
1782
- {
1783
- return rb_iv_get(self, "@show_offsets");
1784
- }
1785
-
1786
- /*
1787
- * call-seq:
1788
- * obj.show_offsets=
1789
- *
1790
- * Sets show_offsets property (used by NodeMarshal#dump_tree_short)
1791
- * It can be either true or false
1792
- */
1793
- static VALUE m_nodedump_set_show_offsets(VALUE self, VALUE value)
1794
- {
1795
- if (value != Qtrue && value != Qfalse)
1796
- {
1797
- rb_raise(rb_eArgError, "show_offsets property must be either true or false");
1798
- }
1799
- return rb_iv_set(self, "@show_offsets", value);
1800
- }
1801
-
1802
-
1803
- /*
1804
- * call-seq:
1805
- * obj.to_hash
1806
- *
1807
- * Converts NodeMarshal class example to the hash that contains full
1808
- * and independent from data structures memory addresses information.
1809
- * Format of the obtained hash depends on used platform (especially
1810
- * size of the pointer) and Ruby version.
1811
- *
1812
- * <b>Format of the hash</b>
1813
- *
1814
- * <i>Part 1: Signatures</i>
1815
- *
1816
- * - <tt>MAGIC</tt> -- NODEMARSHAL11
1817
- * - <tt>RUBY_PLATFORM</tt> -- saved <tt>RUBY_PLATFORM</tt> constant value
1818
- * - <tt>RUBY_VERSION</tt> -- saved <tt>RUBY_VERSION</tt> constant value
1819
- *
1820
- * <i>Part 2: Program loadable elements.</i>
1821
- *
1822
- * All loadable elements are arrays. Index of the array element means
1823
- * its identifier that is used in the node tree.
1824
- *
1825
- * - <tt>literals</tt> -- program literals (strings, ranges etc.)
1826
- * - <tt>symbols</tt> -- program symbols (values have either String or Fixnum
1827
- * data type; numbers are used for symbols that cannot be represented as strings)
1828
- * - <tt>global_entries</tt> -- global variables information
1829
- * - <tt>id_tables</tt> -- array of arrays. Each array contains symbols IDs
1830
- * - <tt>args</tt> -- information about code block argument(s)
1831
- *
1832
- * <i>Part 3: Nodes information</i>
1833
- * - <tt>nodes</tt> -- string that contains binary encoded information
1834
- * about the nodes
1835
- * - <tt>num_of_nodes</tt> -- number of nodes in the <tt>nodes</tt> field
1836
- * - <tt>nodename</tt> -- name of the node (usually "<main>")
1837
- * - <tt>filename</tt> -- name (without path) of .rb file used for the node generation
1838
- * - <tt>filepath</tt> -- name (with full path) of .rb file used for the node generation
1839
- */
1840
- static VALUE m_nodedump_to_hash(VALUE self)
1841
- {
1842
- NODE *node = RNODE(rb_iv_get(self, "@node"));
1843
- NODEInfo *info;
1844
- VALUE ans, num, val_info, gc_was_disabled;
1845
- // DISABLE GARBAGE COLLECTOR (important for dumping)
1846
- gc_was_disabled = rb_gc_disable();
1847
- // Convert the node to the form with relocs (i.e. the information about node)
1848
- // if such form is not present
1849
- val_info = rb_iv_get(self, "@nodeinfo");
1850
- if (val_info == Qnil)
1851
- {
1852
- val_info = Data_Make_Struct(cNodeInfo, NODEInfo,
1853
- NODEInfo_mark, NODEInfo_free, info); // This data envelope cannot exist without NODE
1854
- NODEInfo_init(info);
1855
- rb_iv_set(self, "@nodeinfo", val_info);
1856
- num = INT2FIX(count_num_of_nodes(node, node, info));
1857
- rb_iv_set(self, "@nodeinfo_num_of_nodes", num);
1858
- // Convert node to NODEInfo structure
1859
- ans = NODEInfo_toHash(info);
1860
- rb_hash_aset(ans, ID2SYM(rb_intern("num_of_nodes")), num);
1861
- rb_hash_aset(ans, ID2SYM(rb_intern("nodename")), rb_iv_get(self, "@nodename"));
1862
- rb_hash_aset(ans, ID2SYM(rb_intern("filename")), rb_iv_get(self, "@filename"));
1863
- rb_hash_aset(ans, ID2SYM(rb_intern("filepath")), rb_iv_get(self, "@filepath"));
1864
- rb_iv_set(self, "@nodehash", ans);
1865
- }
1866
- else
1867
- {
1868
- ans = rb_iv_get(self, "@nodehash");
1869
- }
1870
- // ENABLE GARBAGE COLLECTOR (important for dumping)
1871
- if (gc_was_disabled == Qfalse)
1872
- {
1873
- rb_gc_enable();
1874
- }
1875
- return ans;
1876
- }
1877
-
1878
-
1879
- VALUE m_node_to_ary(NODE *node)
1880
- {
1881
- int i, type, ut[3];
1882
- VALUE uref[3];
1883
- VALUE entry = rb_ary_new();
1884
- /* Special case: NULL node */
1885
- if (node == NULL)
1886
- {
1887
- return Qnil;
1888
- }
1889
- /* Save node name */
1890
- type = nd_type(node);
1891
- rb_ary_push(entry, ID2SYM(rb_intern(ruby_node_name(type))));
1892
-
1893
- ut[0] = nodes_ctbl[type * 3];
1894
- ut[1] = nodes_ctbl[type * 3 + 1];
1895
- ut[2] = nodes_ctbl[type * 3 + 2];
1896
-
1897
- uref[0] = node->u1.value;
1898
- uref[1] = node->u2.value;
1899
- uref[2] = node->u3.value;
1900
-
1901
-
1902
- for (i = 0; i < 3; i++)
1903
- {
1904
- if (ut[i] == NT_NODE)
1905
- {
1906
- if (nd_type(node) != NODE_OP_ASGN2 || i != 2)
1907
- {
1908
- rb_ary_push(entry, m_node_to_ary(RNODE(uref[i])));
1909
- }
1910
- else
1911
- {
1912
- VALUE child = rb_ary_new();
1913
- if (ut[i] != 0 && TYPE(ut[i]) != T_NODE)
1914
- rb_raise(rb_eArgError, "print_node: broken node 0x%s", RSTRING_PTR(value_to_str(ut[i])));
1915
- rb_ary_push(child, ID2SYM(rb_intern("NODE_OP_ASGN2")));
1916
- rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u1.value));
1917
- rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u2.value));
1918
- rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u3.value));
1919
- rb_ary_push(entry, child);
1920
- }
1921
- }
1922
- else if (ut[i] == NT_VALUE)
1923
- {
1924
- rb_ary_push(entry, uref[i]);
1925
- }
1926
- else if (ut[i] == NT_ID)
1927
- {
1928
- rb_ary_push(entry, ID2SYM( (ID) uref[i]));
1929
- }
1930
- else if (ut[i] == NT_LONG)
1931
- {
1932
- rb_ary_push(entry, LONG2NUM( (intptr_t) uref[i]));
1933
- }
1934
- else if (ut[i] == NT_NULL)
1935
- {
1936
- rb_ary_push(entry, Qnil);
1937
- }
1938
- else if (ut[i] == NT_ARGS)
1939
- {
1940
- VALUE rargs = rb_hash_new();
1941
- VALUE rargs_env = rb_ary_new();
1942
- #ifdef USE_RB_ARGS_INFO
1943
- ID id;
1944
- struct rb_args_info *args = (void *) uref[i];
1945
-
1946
- rb_hash_aset(rargs, ID2SYM(rb_intern("pre_init")), m_node_to_ary(args->pre_init));
1947
- rb_hash_aset(rargs, ID2SYM(rb_intern("post_init")), m_node_to_ary(args->post_init));
1948
-
1949
- id = args->first_post_arg;
1950
- rb_hash_aset(rargs, ID2SYM(rb_intern("first_post_arg")), (id) ? ID2SYM(id) : Qnil);
1951
- id = args->rest_arg;
1952
- rb_hash_aset(rargs, ID2SYM(rb_intern("rest_arg")), (id) ? ID2SYM(id) : Qnil);
1953
- id = args->block_arg;
1954
- rb_hash_aset(rargs, ID2SYM(rb_intern("block_arg")), (id) ? ID2SYM(id) : Qnil);
1955
-
1956
- rb_hash_aset(rargs, ID2SYM(rb_intern("kw_args")), m_node_to_ary(args->kw_args));
1957
- rb_hash_aset(rargs, ID2SYM(rb_intern("kw_rest_arg")), m_node_to_ary(args->kw_rest_arg));
1958
- rb_hash_aset(rargs, ID2SYM(rb_intern("opt_args")), m_node_to_ary(args->opt_args));
1959
- #endif
1960
- rb_ary_push(rargs_env, ID2SYM(rb_intern("ARGS")));
1961
- rb_ary_push(rargs_env, rargs);
1962
- rb_ary_push(entry, rargs_env);
1963
- }
1964
- else if (ut[i] == NT_IDTABLE)
1965
- {
1966
- VALUE ridtbl = rb_ary_new();
1967
- VALUE idtbl_ary = rb_ary_new();
1968
- int j, len;
1969
-
1970
- ID *idtbl = (ID *) uref[i];
1971
- len = (uref[i]) ? *idtbl++ : 0;
1972
- for (j = 0; j < len; j++)
1973
- {
1974
- ID sym = *idtbl++;
1975
- VALUE val = ID2SYM(sym);
1976
- rb_ary_push(idtbl_ary, val);
1977
- }
1978
- rb_ary_push(ridtbl, ID2SYM(rb_intern("IDTABLE")));
1979
- rb_ary_push(ridtbl, idtbl_ary);
1980
- rb_ary_push(entry, ridtbl);
1981
- }
1982
- else if (ut[i] == NT_ENTRY)
1983
- {
1984
- struct rb_global_entry *gentry;
1985
- gentry = (struct rb_global_entry *) uref[i];
1986
- rb_ary_push(entry, ID2SYM(gentry->id));
1987
- }
1988
- else
1989
- {
1990
- rb_ary_push(entry, ID2SYM(rb_intern("UNKNOWN")));
1991
- }
1992
- }
1993
- return entry;
1994
- }
1995
-
1996
- /*
1997
- * call-seq:
1998
- * obj.to_a
1999
- *
2000
- * Converts node to the array (mainly to allow exploration of AST
2001
- * by the user). It shows information about rb_args_info and
2002
- * ID *tbl that are not displayed by NodeMarshal#dump_tree and
2003
- * NodeMarshal#dump_tree_short.
2004
- */
2005
- static VALUE m_nodedump_to_a(VALUE self)
2006
- {
2007
- NODE *node = RNODE(rb_iv_get(self, "@node"));
2008
- VALUE gc_was_disabled = rb_gc_disable();
2009
- VALUE ary = m_node_to_ary(node);
2010
- if (gc_was_disabled == Qfalse)
2011
- {
2012
- rb_gc_enable();
2013
- }
2014
- return ary;
2015
- }
2016
-
2017
-
2018
- /*
2019
- * call-seq:
2020
- * obj.to_bin
2021
- *
2022
- * Converts NodeMarshal class example to the binary string that
2023
- * can be saved to the file and used for loading the node from the file.
2024
- * Format of the obtained binary dump depends on used platform (especially
2025
- * size of the pointer) and Ruby version.
2026
- */
2027
- static VALUE m_nodedump_to_bin(VALUE self)
2028
- {
2029
- VALUE hash = m_nodedump_to_hash(self);
2030
- VALUE cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
2031
- return rb_funcall(cMarshal, rb_intern("dump"), 1, hash);
2032
- }
2033
-
2034
- /*
2035
- * Gives the information about the node
2036
- */
2037
- static VALUE m_nodedump_inspect(VALUE self)
2038
- {
2039
- static char str[1024], buf[512];
2040
- VALUE num_of_nodes, nodename, filepath, filename;
2041
- VALUE val_obj_addresses, val_nodeinfo;
2042
- // Get generic information about node
2043
- num_of_nodes = rb_iv_get(self, "@num_of_nodes");
2044
- nodename = rb_iv_get(self, "@nodename");
2045
- filepath = rb_iv_get(self, "@filepath");
2046
- filename = rb_iv_get(self, "@filename");
2047
- // Generate string with generic information about node
2048
- sprintf(str,
2049
- "----- NodeMarshal:0x%"PRIxPTR"\n"
2050
- " num_of_nodes: %d\n nodename: %s\n filepath: %s\n filename: %s\n",
2051
- (uintptr_t) (self),
2052
- (num_of_nodes == Qnil) ? -1 : FIX2INT(num_of_nodes),
2053
- (nodename == Qnil) ? "nil" : RSTRING_PTR(nodename),
2054
- (filepath == Qnil) ? "nil" : RSTRING_PTR(filepath),
2055
- (filename == Qnil) ? "nil" : RSTRING_PTR(filename)
2056
- );
2057
- // Check if the information about node struct is available
2058
- val_nodeinfo = rb_iv_get(self, "@nodeinfo");
2059
- val_obj_addresses = rb_iv_get(self, "@obj_addresses");
2060
- if (val_nodeinfo == Qnil && val_obj_addresses == Qnil)
2061
- {
2062
- m_nodedump_to_hash(self);
2063
- val_nodeinfo = rb_iv_get(self, "@nodeinfo");
2064
- }
2065
- // Information about preparsed node
2066
- // a) NODEInfo struct
2067
- if (val_nodeinfo == Qnil)
2068
- {
2069
- sprintf(buf, " NODEInfo struct is empty\n");
2070
- }
2071
- else
2072
- {
2073
- NODEInfo *ninfo;
2074
- Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
2075
- sprintf(buf,
2076
- " NODEInfo struct:\n"
2077
- " syms hash len (Symbols): %d\n"
2078
- " lits hash len (Literals): %d\n"
2079
- " idtabs hash len (ID tables): %d\n"
2080
- " gentries hash len (Global vars): %d\n"
2081
- " nodes hash len (Nodes): %d\n"
2082
- " pnodes hash len (Parent nodes): %d\n"
2083
- #ifdef USE_RB_ARGS_INFO
2084
- " args hash len (args info): %d\n"
2085
- #endif
2086
- ,
2087
- FIX2INT(rb_funcall(ninfo->syms.vals, rb_intern("length"), 0)),
2088
- FIX2INT(rb_funcall(ninfo->lits.vals, rb_intern("length"), 0)),
2089
- FIX2INT(rb_funcall(ninfo->idtabs.vals, rb_intern("length"), 0)),
2090
- FIX2INT(rb_funcall(ninfo->gentries.vals, rb_intern("length"), 0)),
2091
- FIX2INT(rb_funcall(ninfo->nodes.vals, rb_intern("length"), 0)),
2092
- FIX2INT(rb_funcall(ninfo->pnodes.vals, rb_intern("length"), 0))
2093
- #ifdef USE_RB_ARGS_INFO
2094
- ,
2095
- FIX2INT(rb_funcall(ninfo->args.vals, rb_intern("length"), 0))
2096
- #endif
2097
- );
2098
- }
2099
- strcat(str, buf);
2100
- // b) NODEObjAddresses struct
2101
- if (val_obj_addresses == Qnil)
2102
- {
2103
- sprintf(buf, " NODEObjAddresses struct is empty\n");
2104
- }
2105
- else
2106
- {
2107
- NODEObjAddresses *objadr;
2108
- Data_Get_Struct(val_obj_addresses, NODEObjAddresses, objadr);
2109
- sprintf(buf,
2110
- " NODEObjAddresses struct:\n"
2111
- " syms_len (Num of symbols): %d\n"
2112
- " lits_len (Num of literals): %d\n"
2113
- " idtbls_len (Num of ID tables): %d\n"
2114
- " gvars_len (Num of global vars): %d\n"
2115
- " nodes_len (Num of nodes): %d\n"
2116
- #ifdef USE_RB_ARGS_INFO
2117
- " args_len: (Num of args info): %d\n"
2118
- #endif
2119
- , objadr->syms_len, objadr->lits_len,
2120
- objadr->idtbls_len, objadr->gvars_len,
2121
- objadr->nodes_len
2122
- #ifdef USE_RB_ARGS_INFO
2123
- , objadr->args_len
2124
- #endif
2125
- );
2126
- }
2127
- strcat(str, buf);
2128
- strcat(str, "------------------\n");
2129
- // Generate output string
2130
- return rb_str_new2(str);
2131
- }
2132
-
2133
- /*
2134
- * Returns node name (usually <main>)
2135
- */
2136
- static VALUE m_nodedump_nodename(VALUE self)
2137
- {
2138
- return rb_funcall(rb_iv_get(self, "@nodename"), rb_intern("dup"), 0);
2139
- }
2140
-
2141
- /*
2142
- * Returns name of file that was used for node generation and will be used
2143
- * by YARV (or nil/<compiled> if a string of code was used)
2144
- */
2145
- static VALUE m_nodedump_filename(VALUE self)
2146
- {
2147
- return rb_funcall(rb_iv_get(self, "@filename"), rb_intern("dup"), 0);
2148
- }
2149
-
2150
- /*
2151
- * Sets name of file that was used for node generation and will be used
2152
- * by YARV (or nil/<compiled> if a string of code was used)
2153
- */
2154
- static VALUE m_nodedump_set_filename(VALUE self, VALUE val)
2155
- {
2156
- if (val != Qnil)
2157
- {
2158
- Check_Type(val, T_STRING);
2159
- rb_iv_set(self, "@filename", rb_funcall(val, rb_intern("dup"), 0));
2160
- }
2161
- else
2162
- {
2163
- rb_iv_set(self, "@filename", Qnil);
2164
- }
2165
- return self;
2166
- }
2167
-
2168
- /*
2169
- * Returns path of file that was used for node generation and will be used
2170
- * by YARV (or nil/<compiled> if a string of code was used)
2171
- */
2172
- static VALUE m_nodedump_filepath(VALUE self)
2173
- {
2174
- return rb_funcall(rb_iv_get(self, "@filepath"), rb_intern("dup"), 0);
2175
- }
2176
-
2177
- /*
2178
- * call-seq:
2179
- * obj.filepath=value
2180
- *
2181
- * Sets the path of file that was used for node generation and will
2182
- * be used by YARV (or nil/<compiled> if a string of code was used)
2183
- */
2184
- static VALUE m_nodedump_set_filepath(VALUE self, VALUE val)
2185
- {
2186
- if (val != Qnil)
2187
- {
2188
- Check_Type(val, T_STRING);
2189
- rb_iv_set(self, "@filepath", rb_funcall(val, rb_intern("dup"), 0));
2190
- }
2191
- else
2192
- {
2193
- rb_iv_set(self, "@filepath", Qnil);
2194
- }
2195
- return self;
2196
- }
2197
-
2198
- /*
2199
- * call-seq:
2200
- * NodeMarshal.base85r_encode(input) -> output
2201
- *
2202
- * Encode arbitrary binary string to the ASCII string
2203
- * using modified version of BASE85 (useful for obfuscation
2204
- * of .rb source files)
2205
- */
2206
- static VALUE m_base85r_encode(VALUE obj, VALUE input)
2207
- {
2208
- return base85r_encode(input);
2209
- }
2210
-
2211
- /*
2212
- * call-seq:
2213
- * NodeMarshal.base85r_decode(input) -> output
2214
- *
2215
- * Decode ASCII string in the modified BASE85 format
2216
- * to the binary string (useful for obfuscation of .rb
2217
- * source files)
2218
- */
2219
- static VALUE m_base85r_decode(VALUE obj, VALUE input)
2220
- {
2221
- return base85r_decode(input);
2222
- }
2223
-
2224
- /* call-seq:
2225
- * obj.to_text
2226
- *
2227
- * Converts NodeMarshal class example to the text string (modified Base85 encoding) that
2228
- * can be saved to the file and used for loading the node from the file.
2229
- * Format of the obtained binary dump depends on used platform (especially
2230
- * size of the pointer) and Ruby version.
2231
- */
2232
- static VALUE m_nodedump_to_text(VALUE self)
2233
- {
2234
- VALUE bin = m_nodedump_to_bin(self);
2235
- return base85r_encode(bin);
2236
- }
2237
-
2238
- /*
2239
- * Returns node object
2240
- */
2241
- static VALUE m_nodedump_node(VALUE self)
2242
- {
2243
- return rb_iv_get(self, "@node");
2244
- }
2245
-
2246
- /*
2247
- * This class can load and save Ruby code in the form of the
2248
- * platform-dependent syntax tree (made of NODEs). Such function
2249
- * allows to hide the source code from users. Main features:
2250
- *
2251
- * - Irreversible transformation of Ruby source code to the syntax tree
2252
- * - Representation of syntax tree in binary form dependent from the platform and Ruby version
2253
- * - Simple options for node inspection
2254
- * - Ruby 1.9.3, 2.2.x and 2.3.x support
2255
- * - Subroutines for custom code obfuscation
2256
- */
2257
- void Init_nodemarshal()
2258
- {
2259
- static VALUE cNodeMarshal;
2260
- init_nodes_table(nodes_ctbl, NODES_CTBL_SIZE);
2261
- base85r_init_tables();
2262
-
2263
- cNodeMarshal = rb_define_class("NodeMarshal", rb_cObject);
2264
- rb_define_singleton_method(cNodeMarshal, "base85r_encode", RUBY_METHOD_FUNC(m_base85r_encode), 1);
2265
- rb_define_singleton_method(cNodeMarshal, "base85r_decode", RUBY_METHOD_FUNC(m_base85r_decode), 1);
2266
-
2267
- rb_define_method(cNodeMarshal, "initialize", RUBY_METHOD_FUNC(m_nodedump_init), 2);
2268
- rb_define_method(cNodeMarshal, "to_hash", RUBY_METHOD_FUNC(m_nodedump_to_hash), 0);
2269
- rb_define_method(cNodeMarshal, "to_h", RUBY_METHOD_FUNC(m_nodedump_to_hash), 0);
2270
- rb_define_method(cNodeMarshal, "to_bin", RUBY_METHOD_FUNC(m_nodedump_to_bin), 0);
2271
- rb_define_method(cNodeMarshal, "to_text", RUBY_METHOD_FUNC(m_nodedump_to_text), 0);
2272
- rb_define_method(cNodeMarshal, "to_a", RUBY_METHOD_FUNC(m_nodedump_to_a), 0);
2273
- rb_define_method(cNodeMarshal, "to_ary", RUBY_METHOD_FUNC(m_nodedump_to_a), 0);
2274
- rb_define_method(cNodeMarshal, "dump_tree", RUBY_METHOD_FUNC(m_nodedump_parser_dump_tree), 0);
2275
- rb_define_method(cNodeMarshal, "dump_tree_short", RUBY_METHOD_FUNC(m_nodedump_dump_tree_short), 0);
2276
- rb_define_method(cNodeMarshal, "compile", RUBY_METHOD_FUNC(m_nodedump_compile), 0);
2277
- rb_define_method(cNodeMarshal, "show_offsets", RUBY_METHOD_FUNC(m_nodedump_show_offsets), 0);
2278
- rb_define_method(cNodeMarshal, "show_offsets=", RUBY_METHOD_FUNC(m_nodedump_set_show_offsets), 1);
2279
- // Methods for working with the information about the node
2280
- // a) literals, symbols, generic information
2281
- rb_define_method(cNodeMarshal, "symbols", RUBY_METHOD_FUNC(m_nodedump_symbols), 0);
2282
- rb_define_method(cNodeMarshal, "change_symbol", RUBY_METHOD_FUNC(m_nodedump_change_symbol), 2);
2283
- rb_define_method(cNodeMarshal, "literals", RUBY_METHOD_FUNC(m_nodedump_literals), 0);
2284
- rb_define_method(cNodeMarshal, "change_literal", RUBY_METHOD_FUNC(m_nodedump_change_literal), 2);
2285
- rb_define_method(cNodeMarshal, "inspect", RUBY_METHOD_FUNC(m_nodedump_inspect), 0);
2286
- rb_define_method(cNodeMarshal, "node", RUBY_METHOD_FUNC(m_nodedump_node), 0);
2287
- // b) node and file names
2288
- rb_define_method(cNodeMarshal, "nodename", RUBY_METHOD_FUNC(m_nodedump_nodename), 0);
2289
- rb_define_method(cNodeMarshal, "filename", RUBY_METHOD_FUNC(m_nodedump_filename), 0);
2290
- rb_define_method(cNodeMarshal, "filename=", RUBY_METHOD_FUNC(m_nodedump_set_filename), 1);
2291
- rb_define_method(cNodeMarshal, "filepath", RUBY_METHOD_FUNC(m_nodedump_filepath), 0);
2292
- rb_define_method(cNodeMarshal, "filepath=", RUBY_METHOD_FUNC(m_nodedump_set_filepath), 1);
2293
- // C structure wrappers
2294
- cNodeObjAddresses = rb_define_class("NodeObjAddresses", rb_cObject);
2295
- cNodeInfo = rb_define_class("NodeInfo", rb_cObject);
2296
- }
1
+ /*
2
+ * This file contains implementation of classes for Ruby nodes
3
+ * marshalization (i.e. loading and saving them from disk)
4
+ *
5
+ * (C) 2015-2017 Alexey Voskov
6
+ * License: BSD-2-Clause
7
+ */
8
+ #define __STDC_FORMAT_MACROS
9
+ #include <stdio.h>
10
+ #include <stdlib.h>
11
+ #include <inttypes.h>
12
+ #include <ruby.h>
13
+ #include <ruby/version.h>
14
+
15
+ /*
16
+ * Some global variables
17
+ */
18
+ static VALUE cNodeObjAddresses, cNodeInfo;
19
+
20
+ /*
21
+ * Part 1. .H files: nodedump functions + parts of Ruby internals
22
+ */
23
+ #include "nodedump.h"
24
+
25
+ #ifdef WITH_CUSTOM_RB_GLOBAL_ENTRY
26
+ /* Custom (and slow) implementation of rb_global_entry internal API for Ruby 2.3
27
+ (original rb_global_entry API was opened before Ruby 2.3)
28
+ It uses a hack with the node creation. The main idea of the hack is
29
+ to create a node from the expression containing only a name of the global variable
30
+ and extract global entry address from NODE_GVAR u3 "leaf" */
31
+ static struct rb_global_entry *rb_global_entry(ID id)
32
+ {
33
+ NODE *node, *gvar_node;
34
+ struct rb_global_entry *gentry;
35
+ /* a) Step 1: create node from the expression consisting only from
36
+ our global variable */
37
+ node = rb_compile_string("<compiled>", rb_id2str(id), NUM2INT(1));
38
+ if (nd_type(node) != NODE_SCOPE)
39
+ {
40
+ return NULL;
41
+ }
42
+ /* b) Trace the node to the NODE_GVAR */
43
+ gvar_node = node->u2.node;
44
+ if (nd_type(gvar_node) == NODE_PRELUDE) /* Present only in 2.3 */
45
+ {
46
+ gvar_node = gvar_node->u2.node;
47
+ }
48
+ if (nd_type(gvar_node) != NODE_GVAR) /* Error: no GVAR found */
49
+ {
50
+ return NULL;
51
+ }
52
+ /* c) Get the global entry address and return its address */
53
+ gentry = gvar_node->u3.entry;
54
+ return gentry;
55
+ }
56
+ #endif
57
+
58
+
59
+ /*
60
+ * Part 2. Information about the nodes
61
+ *
62
+ */
63
+
64
+ // Pre-2.0 Ruby versions don't use this version
65
+ #if RUBY_API_VERSION_MAJOR == 2
66
+ #define USE_RB_ARGS_INFO 1
67
+ #endif
68
+
69
+ #if RUBY_API_VERSION_MAJOR == 1
70
+ #define RESET_GC_FLAGS 1
71
+ #endif
72
+
73
+
74
+ // Some generic utilities
75
+ int is_value_in_heap(VALUE val)
76
+ {
77
+ if (val == Qfalse || val == Qtrue ||
78
+ val == Qnil || val == Qundef ||
79
+ (val & FIXNUM_FLAG)
80
+ #ifdef FLONUM_MASK
81
+ || ((val & FLONUM_MASK) == FLONUM_FLAG) // This memory trick with floats is present only in 2.x
82
+ #endif
83
+ )
84
+ {
85
+ return 0;
86
+ }
87
+ else
88
+ return 1;
89
+ }
90
+
91
+
92
+ /*
93
+ * Converts Ruby string with hexadecimal number
94
+ * to the Ruby VALUE
95
+ */
96
+ VALUE str_to_value(VALUE str)
97
+ {
98
+ intptr_t ans = (intptr_t) Qnil;
99
+ sscanf(RSTRING_PTR(str), "%"PRIxPTR, &ans);
100
+ return (VALUE) ans;
101
+ }
102
+
103
+
104
+ /*
105
+ * Converts Ruby VALUE (i.e. machine address) to the
106
+ * hexadecimal Ruby string
107
+ */
108
+ VALUE value_to_str(VALUE val)
109
+ {
110
+ char str[16];
111
+ sprintf(str, "%" PRIxPTR, (intptr_t) val);
112
+ return rb_str_new2(str);
113
+ }
114
+
115
+ /*
116
+ * Converts VALUE to the sequence of bytes using big-endian
117
+ * standard. Returns number of non-zero bytes
118
+ *
119
+ * Inputs
120
+ * val -- input value
121
+ * buf -- pointer to the output buffer
122
+ * Returns
123
+ * number of written bytes
124
+ */
125
+ int value_to_bin(VALUE val, unsigned char *buf)
126
+ {
127
+ int i, len = 0;
128
+ unsigned char byte;
129
+ for (i = sizeof(VALUE) - 1; i >= 0; i--)
130
+ {
131
+ byte = (unsigned char) ((val >> (i * 8)) & 0xFF);
132
+ if (len > 0 || byte != 0)
133
+ {
134
+ *buf++ = byte;
135
+ len++;
136
+ }
137
+ }
138
+ return len;
139
+ }
140
+
141
+ /*
142
+ * Converts sequence of bytes (big-endian standard) to the VALUE.
143
+ *
144
+ * Inputs
145
+ * buf -- poiner to the input buffer
146
+ * len -- number of bytes
147
+ * Returns
148
+ * VALUE
149
+ */
150
+ VALUE bin_to_value(unsigned char *buf, int len)
151
+ {
152
+ VALUE val = (VALUE) 0;
153
+ int i;
154
+ for (i = len - 1; i >= 0; i--)
155
+ val |= ((VALUE) *buf++) << (i * 8);
156
+ return val;
157
+ }
158
+
159
+ #define NODES_CTBL_SIZE 256
160
+ static int nodes_ctbl[NODES_CTBL_SIZE * 3];
161
+
162
+
163
+ /*
164
+ * Part 3. Functions for node marshalization
165
+ */
166
+
167
+ /*
168
+ * Keeps the information about node elements position
169
+ * in the memory and its IDs/ordinals for export to the file
170
+ */
171
+ typedef struct {
172
+ VALUE vals; // values: key=>val Hash
173
+ VALUE ids; // identifiers: key=>id Hash
174
+ VALUE pos; // free identifier
175
+ } LeafTableInfo;
176
+
177
+ void LeafTableInfo_init(LeafTableInfo *lti)
178
+ {
179
+ lti->vals = rb_hash_new();
180
+ lti->ids = rb_hash_new();
181
+ lti->pos = 0;
182
+ }
183
+
184
+ void LeafTableInfo_mark(LeafTableInfo *lti)
185
+ {
186
+ rb_gc_mark(lti->vals);
187
+ rb_gc_mark(lti->ids);
188
+ }
189
+
190
+
191
+ int LeafTableInfo_addEntry(LeafTableInfo *lti, VALUE key, VALUE value)
192
+ {
193
+ VALUE v_id = rb_hash_aref(lti->ids, key);
194
+ if (v_id == Qnil)
195
+ {
196
+ int id = lti->pos++;
197
+ rb_hash_aset(lti->vals, key, value);
198
+ rb_hash_aset(lti->ids, key, INT2FIX(id));
199
+ return id;
200
+ }
201
+ else
202
+ {
203
+ return FIX2INT(v_id);
204
+ }
205
+ }
206
+
207
+ /*
208
+ * Adds Ruby ID data type as the entry to the LeafTableInfo struct.
209
+ * Main features:
210
+ * 1) ID will be converted to Fixnum
211
+ * 2) If ID can be converted to string by rb_id2str it will be saved as
212
+ String object. Otherwise it will be converted to Fixnum.
213
+ */
214
+ int LeafTableInfo_addIDEntry(LeafTableInfo *lti, ID id)
215
+ {
216
+ VALUE r_idval = rb_id2str(id);
217
+ if (TYPE(r_idval) != T_STRING)
218
+ {
219
+ r_idval = INT2FIX(id);
220
+ }
221
+ return LeafTableInfo_addEntry(lti, INT2FIX(id), r_idval);
222
+ }
223
+
224
+ VALUE LeafTableInfo_getLeavesTable(LeafTableInfo *lti)
225
+ {
226
+ VALUE key, keys = rb_funcall(lti->vals, rb_intern("keys"), 0);
227
+ unsigned int i;
228
+ VALUE val;
229
+ for (i = 0; i < lti->pos; i++)
230
+ {
231
+ key = RARRAY_PTR(keys)[i];
232
+ val = rb_hash_aref(lti->vals, key);
233
+ rb_ary_store(keys, i, val);
234
+ }
235
+ return keys;
236
+ }
237
+
238
+ int LeafTableInfo_keyToID(LeafTableInfo *lti, VALUE key)
239
+ {
240
+ VALUE id = rb_hash_aref(lti->ids, key);
241
+ return (id == Qnil) ? -1 : FIX2INT(id);
242
+ }
243
+
244
+ VALUE LeafTableInfo_keyToValue(LeafTableInfo *lti, VALUE key)
245
+ {
246
+ return rb_hash_aref(lti->vals, key);
247
+ }
248
+
249
+ /* The structure keeps information about the node
250
+ that is required for its dumping to the file
251
+ (mainly hashes with relocatable identifiers) */
252
+ typedef struct {
253
+ LeafTableInfo syms; // Node symbols
254
+ LeafTableInfo lits; // Node literals
255
+ LeafTableInfo idtabs; // Table of identifiers
256
+ #ifdef USE_RB_ARGS_INFO
257
+ LeafTableInfo args; // Table of arguments
258
+ #endif
259
+ LeafTableInfo gentries; // Global variables table
260
+ LeafTableInfo nodes; // Table of nodes
261
+ LeafTableInfo pnodes; // Table of parent nodes
262
+ } NODEInfo;
263
+
264
+ void NODEInfo_init(NODEInfo *info)
265
+ {
266
+ LeafTableInfo_init(&(info->syms));
267
+ LeafTableInfo_init(&(info->lits));
268
+ LeafTableInfo_init(&(info->idtabs));
269
+ #ifdef USE_RB_ARGS_INFO
270
+ LeafTableInfo_init(&(info->args));
271
+ #endif
272
+ LeafTableInfo_init(&(info->gentries));
273
+ LeafTableInfo_init(&(info->nodes));
274
+ LeafTableInfo_init(&(info->pnodes));
275
+ }
276
+
277
+ void NODEInfo_mark(NODEInfo *info)
278
+ {
279
+ LeafTableInfo_mark(&(info->syms));
280
+ LeafTableInfo_mark(&(info->lits));
281
+ LeafTableInfo_mark(&(info->idtabs));
282
+ #ifdef USE_RB_ARGS_INFO
283
+ LeafTableInfo_mark(&(info->args));
284
+ #endif
285
+ LeafTableInfo_mark(&(info->gentries));
286
+ LeafTableInfo_mark(&(info->nodes));
287
+ LeafTableInfo_mark(&(info->pnodes));
288
+ }
289
+
290
+ void NODEInfo_free(NODEInfo *info)
291
+ {
292
+ xfree(info);
293
+ }
294
+
295
+ LeafTableInfo *NODEInfo_getTableByID(NODEInfo *info, int id)
296
+ {
297
+ switch (id)
298
+ {
299
+ case NT_ID:
300
+ return &info->syms;
301
+ case NT_VALUE:
302
+ return &info->lits;
303
+ case NT_IDTABLE:
304
+ return &info->idtabs;
305
+ #ifdef USE_RB_ARGS_INFO
306
+ case NT_ARGS:
307
+ return &info->args;
308
+ #endif
309
+ case NT_ENTRY:
310
+ return &info->gentries;
311
+ case NT_NODE:
312
+ return &info->nodes;
313
+ default:
314
+ return NULL;
315
+ }
316
+ }
317
+
318
+ /*
319
+ * Converts node value to the binary data
320
+ * Input parameters:
321
+ * info -- current NODEInfo structure
322
+ * node -- parent node (that contains the value)
323
+ * ptr -- pointer to the output memory buffer
324
+ * type -- type of the entry (NT_...)
325
+ * value -- node->u?.value VALUE
326
+ * child_id -- child node number (1,2,3)
327
+ * Returns:
328
+ * Byte that contains the next information
329
+ * a) upper half-byte: VL_... data type (for node loader)
330
+ * b) lower half-byte: number of bytes written to the buffer
331
+ */
332
+ #define DUMP_RAW_VALUE(vl_ans, vl) (vl_ans | (value_to_bin(vl, (unsigned char *) ptr) << 4))
333
+ static int dump_node_value(NODEInfo *info, char *ptr, NODE *node, int type, VALUE value, int child_id)
334
+ {
335
+ if (type == NT_NULL || type == NT_LONG)
336
+ {
337
+ return DUMP_RAW_VALUE(VL_RAW, value);
338
+ }
339
+ else if (type == NT_NODE)
340
+ {
341
+ if (value == 0)
342
+ { // Variant a: empty node
343
+ return DUMP_RAW_VALUE(VL_RAW, value);
344
+ }
345
+ else if (nd_type(node) == NODE_ATTRASGN && value == 1 && child_id == 1)
346
+ { // Special case: "self"
347
+ return DUMP_RAW_VALUE(VL_RAW, value);
348
+ }
349
+ else if (TYPE(value) != T_NODE)
350
+ {
351
+ rb_raise(rb_eArgError, "dump_node_value, parent node %s (ADR 0x%s): child node %d (ADR 0x%s): is not a node\n"
352
+ " Type: %s (%d), Value: %s",
353
+ ruby_node_name(nd_type(node)), RSTRING_PTR(value_to_str((VALUE) node)),
354
+ child_id, RSTRING_PTR(value_to_str(value)),
355
+ RSTRING_PTR(rb_funcall(rb_funcall(value, rb_intern("class"), 0), rb_intern("to_s"), 0)),
356
+ TYPE(value),
357
+ RSTRING_PTR(rb_funcall(value, rb_intern("to_s"), 0)) );
358
+ }
359
+ else
360
+ { // Variant b: not empty node
361
+ VALUE id = LeafTableInfo_keyToID(&info->nodes, value_to_str(value));
362
+ if (id == (VALUE) -1)
363
+ {
364
+ rb_raise(rb_eArgError, "dump_node_value, parent node %s (ADR 0x%s): child node %d (ADR 0x%s) not found",
365
+ ruby_node_name(nd_type(node)), RSTRING_PTR(value_to_str((VALUE) node)),
366
+ child_id, RSTRING_PTR(value_to_str(value)));
367
+ return VL_RAW;
368
+ }
369
+ else
370
+ {
371
+ return DUMP_RAW_VALUE(VL_NODE, id);
372
+ }
373
+ return VL_NODE;
374
+ }
375
+ }
376
+ else if (type == NT_VALUE)
377
+ {
378
+ if (!is_value_in_heap(value))
379
+ { // a) value that is inside VALUE
380
+ return DUMP_RAW_VALUE(VL_RAW, value);
381
+ }
382
+ else
383
+ { // b) value that requires reference to literals table
384
+ VALUE id = LeafTableInfo_keyToID(&info->lits, value_to_str(value));
385
+ if (id == (VALUE) -1)
386
+ rb_raise(rb_eArgError, "Cannot find literal");
387
+ else
388
+ return DUMP_RAW_VALUE(VL_LIT, id);
389
+ }
390
+ }
391
+ else if (type == NT_ID)
392
+ {
393
+ ID sym = (VALUE) value; // We are working with RAW data from RAM!
394
+ VALUE id = LeafTableInfo_keyToID(&info->syms, INT2FIX(sym));
395
+ if (id == (VALUE) -1)
396
+ {
397
+ rb_raise(rb_eArgError, "Cannot find symbol ID %d (%s) (parent node %s, line %d)",
398
+ (int) sym, RSTRING_PTR(rb_id2str(ID2SYM(sym))),
399
+ ruby_node_name(nd_type(node)), nd_line(node));
400
+ return VL_RAW;
401
+ }
402
+ else
403
+ {
404
+ return DUMP_RAW_VALUE(VL_ID, id);
405
+ }
406
+ }
407
+ else if (type == NT_ENTRY || type == NT_ARGS || type == NT_IDTABLE)
408
+ {
409
+ VALUE key = value_to_str(value);
410
+ LeafTableInfo *lti = NODEInfo_getTableByID(info, type);
411
+ VALUE id = LeafTableInfo_keyToID(lti, key);
412
+ if (id == (VALUE) -1)
413
+ {
414
+ rb_raise(rb_eArgError, "Cannot find some entry");
415
+ return VL_RAW;
416
+ }
417
+ else
418
+ {
419
+ switch(type)
420
+ {
421
+ case NT_ENTRY: return DUMP_RAW_VALUE(VL_GVAR, id);
422
+ case NT_IDTABLE: return DUMP_RAW_VALUE(VL_IDTABLE, id);
423
+ case NT_ARGS: return DUMP_RAW_VALUE(VL_ARGS, id);
424
+ default: rb_raise(rb_eArgError, "Internal error");
425
+ }
426
+ }
427
+ }
428
+ else
429
+ {
430
+ rb_raise(rb_eArgError, "Unknown child node type %d", type);
431
+ }
432
+ }
433
+
434
+ /*
435
+ * Converts information about nodes to the binary string.
436
+ * It uses dump_node_value function for the low-level conversion
437
+ * of node "leaves" to the actual binary data.
438
+ *
439
+ * See load_nodes_from_str for the descrpition of the binary string format.
440
+ */
441
+ static VALUE dump_nodes(NODEInfo *info)
442
+ {
443
+ int node_size = sizeof(int) + sizeof(VALUE) * 4;
444
+ int i, nt, flags_len;
445
+ NODE *node;
446
+ char *bin, *ptr, *rtypes;
447
+ VALUE nodes_ary = rb_funcall(info->nodes.vals, rb_intern("keys"), 0);
448
+ VALUE nodes_bin = rb_str_new(NULL, RARRAY_LEN(nodes_ary) * node_size);
449
+ VALUE ut[3];
450
+ bin = RSTRING_PTR(nodes_bin);
451
+
452
+ for (i = 0, ptr = bin; i < RARRAY_LEN(nodes_ary); i++)
453
+ {
454
+ node = RNODE(str_to_value(RARRAY_PTR(nodes_ary)[i]));
455
+ nt = nd_type(node);
456
+ rtypes = (char *) ptr; ptr += sizeof(int);
457
+ flags_len = value_to_bin(node->flags >> 5, (unsigned char *) ptr); ptr += flags_len;
458
+
459
+ ut[0] = nodes_ctbl[nt * 3];
460
+ ut[1] = nodes_ctbl[nt * 3 + 1];
461
+ ut[2] = nodes_ctbl[nt * 3 + 2];
462
+ if ((nt == NODE_LASGN || nt == NODE_DASGN_CURR) && (void *) node->u2.value == (void *) -1) {
463
+ ut[1] = NT_LONG;
464
+ }
465
+ if (nt == NODE_OP_ASGN2 && LeafTableInfo_keyToID(&info->syms, INT2FIX(node->u1.value)) != -1)
466
+ {
467
+ ut[0] = NT_ID; ut[1] = NT_ID; ut[2] = NT_ID;
468
+ }
469
+
470
+ if (nt == NODE_ARGS_AUX)
471
+ {
472
+ ut[0] = NT_ID; ut[1] = NT_LONG; ut[2] = NT_NODE;
473
+ if (LeafTableInfo_keyToID(&info->syms, INT2FIX(node->u2.value)) != -1)
474
+ {
475
+ ut[1] = NT_ID;
476
+ }
477
+ else
478
+ {
479
+ ut[1] = NT_LONG;
480
+ }
481
+ if (node->u1.value == 0) ut[0] = NT_NULL;
482
+ if (node->u2.value == 0) ut[1] = NT_NULL;
483
+ if (node->u3.value == 0) ut[2] = NT_NULL;
484
+ }
485
+
486
+ if (nt = NODE_ARRAY)
487
+ {
488
+ /* Special undocumented cases:
489
+ * 1) the second child of the second element of an array
490
+ * contains reference to the last element (NT_NODE) not
491
+ * length (NT_LONG)
492
+ * 2) NODE_HASH: every second element in NODE_ARRAY chain
493
+ * contains pointers to NODES (instead of lengths)
494
+ * 3) NODE_DSTR: first node in NODE_ARRAY chain contains
495
+ * pointer to NODE (instead of lengths) */
496
+ NODE *pnode1, *pnode2;
497
+ pnode1 = (NODE *) str_to_value(LeafTableInfo_keyToValue(&info->pnodes, value_to_str((VALUE) node)));
498
+ if (pnode1 != NULL && nd_type(pnode1) == NODE_ARRAY &&
499
+ (NODE *) pnode1->u3.value == node)
500
+ {
501
+ int nt2;
502
+ pnode2 = (NODE *) str_to_value(LeafTableInfo_keyToValue(&info->pnodes, value_to_str((VALUE) pnode1)));
503
+ nt2 = nd_type(pnode2);
504
+ if ( (nt2 != NODE_ARRAY && nt2 != NODE_DSTR) ||
505
+ (NODE *) pnode2->u1.value == pnode1 )
506
+ {
507
+ ut[1] = NT_NODE;
508
+ }
509
+ else if (pnode1->u2.value == 2 && node == (NODE *) node->u2.value)
510
+ {
511
+ ut[1] = NT_NODE;
512
+ }
513
+ }
514
+ else if (pnode1 != NULL && nd_type(pnode1) == NODE_DSTR)
515
+ {
516
+ ut[1] = NT_NODE;
517
+ }
518
+ }
519
+
520
+ rtypes[0] = dump_node_value(info, ptr, node, ut[0], node->u1.value, 1);
521
+ ptr += (rtypes[0] & 0xF0) >> 4;
522
+ rtypes[1] = dump_node_value(info, ptr, node, ut[1], node->u2.value, 2);
523
+ ptr += (rtypes[1] & 0xF0) >> 4;
524
+ rtypes[2] = dump_node_value(info, ptr, node, ut[2], node->u3.value, 3);
525
+ ptr += (rtypes[2] & 0xF0) >> 4;
526
+ rtypes[3] = flags_len;
527
+ }
528
+ rb_str_resize(nodes_bin, (int) (ptr - bin) + 1);
529
+ return nodes_bin;
530
+ }
531
+
532
+
533
+ /*
534
+ * Transforms preprocessed node to Ruby hash that can be used
535
+ * to load the node from disk.
536
+ *
537
+ * See m_nodedump_to_hash function for output hash format details
538
+ */
539
+ VALUE NODEInfo_toHash(NODEInfo *info)
540
+ {
541
+ VALUE ans = rb_hash_new();
542
+ VALUE idtbl, idtabs = LeafTableInfo_getLeavesTable(&info->idtabs);
543
+ VALUE syms = LeafTableInfo_getLeavesTable(&info->syms);
544
+ VALUE args;
545
+ int i, j, id;
546
+ // Add some signatures
547
+ rb_hash_aset(ans, ID2SYM(rb_intern("MAGIC")), rb_str_new2(NODEMARSHAL_MAGIC));
548
+ rb_hash_aset(ans, ID2SYM(rb_intern("RUBY_PLATFORM")),
549
+ rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM")));
550
+ rb_hash_aset(ans, ID2SYM(rb_intern("RUBY_VERSION")),
551
+ rb_const_get(rb_cObject, rb_intern("RUBY_VERSION")));
552
+ // Write literals, symbols and global_entries arrays: they don't need to be corrected
553
+ rb_hash_aset(ans, ID2SYM(rb_intern("literals")), LeafTableInfo_getLeavesTable(&info->lits));
554
+ rb_hash_aset(ans, ID2SYM(rb_intern("symbols")), syms);
555
+ rb_hash_aset(ans, ID2SYM(rb_intern("global_entries")), LeafTableInfo_getLeavesTable(&info->gentries));
556
+ // Replace RAM IDs to disk IDs in id_tables
557
+ for (i = 0; i < RARRAY_LEN(idtabs); i++)
558
+ {
559
+ idtbl = RARRAY_PTR(idtabs)[i];
560
+ for (j = 0; j < RARRAY_LEN(idtbl); j++)
561
+ {
562
+ id = LeafTableInfo_keyToID(&info->syms, RARRAY_PTR(idtbl)[j]);
563
+
564
+ if (id == -1)
565
+ {
566
+ ID sym = FIX2INT(RARRAY_PTR(idtbl)[j]);
567
+ rb_raise(rb_eArgError, "Cannot find the symbol ID %d", (int) sym);
568
+ }
569
+ else
570
+ {
571
+ rb_ary_store(idtbl, j, INT2FIX(id));
572
+ }
573
+
574
+ }
575
+ }
576
+ rb_hash_aset(ans, ID2SYM(rb_intern("id_tables")), idtabs);
577
+ // Replace RAM IDs to disk IDs in args tables
578
+ #ifdef USE_RB_ARGS_INFO
579
+ args = LeafTableInfo_getLeavesTable(&info->args);
580
+ for (i = 0; i < RARRAY_LEN(args); i++)
581
+ {
582
+ VALUE args_entry = RARRAY_PTR(args)[i];
583
+ VALUE *eptr = RARRAY_PTR(args_entry);
584
+ int args_vals[5] = {0, 1, 7, 8, 9};
585
+ int args_ids[3] = {4, 5, 6};
586
+ if (RARRAY_LEN(args_entry) != 10)
587
+ rb_raise(rb_eArgError, "Corrupted args entry");
588
+ // Pointer to nodes to be replaced:
589
+ // a) VALUES
590
+ // (0) pre_init, (1) post_init,
591
+ // (7) kw_args, (8) kw_rest_arg, (9) opt_args
592
+ for (j = 0; j < 5; j++)
593
+ {
594
+ int ind = args_vals[j];
595
+ VALUE key = eptr[ind];
596
+ if (!strcmp(RSTRING_PTR(key), "0"))
597
+ eptr[ind] = INT2FIX(-1);
598
+ else
599
+ {
600
+ eptr[ind] = INT2FIX(LeafTableInfo_keyToID(&info->nodes, key));
601
+ if (FIX2INT(eptr[ind]) == -1)
602
+ rb_raise(rb_eArgError, "Unknown NODE in args tables");
603
+ }
604
+ }
605
+ // b) IDs (symbols)
606
+ // (4) first_post_arg (5) rest_arg (6) block_arg
607
+ for (j = 0; j < 3; j++)
608
+ {
609
+ int ind = args_ids[j];
610
+ VALUE key = eptr[ind];
611
+ if (FIX2INT(key) != 0)
612
+ {
613
+ eptr[ind] = INT2FIX(LeafTableInfo_keyToID(&info->syms, key));
614
+ if (FIX2INT(eptr[ind]) == -1)
615
+ rb_raise(rb_eArgError, "Unknown symbolic ID in args tables");
616
+ }
617
+ else
618
+ eptr[ind] = INT2FIX(-1);
619
+ }
620
+ }
621
+ #else
622
+ args = rb_ary_new();
623
+ #endif
624
+
625
+ rb_hash_aset(ans, ID2SYM(rb_intern("args")), args);
626
+ // Special case: NODES. Nodes are kept as binary string
627
+ rb_hash_aset(ans, ID2SYM(rb_intern("nodes")), dump_nodes(info));
628
+ return ans;
629
+ }
630
+
631
+
632
+ static void NODEInfo_addValue(NODEInfo *info, VALUE value)
633
+ {
634
+ if (is_value_in_heap(value))
635
+ {
636
+ VALUE lkey = value_to_str(value);
637
+ LeafTableInfo_addEntry(&info->lits, lkey, value);
638
+ }
639
+ }
640
+
641
+ /*
642
+ * Adds the information about Ruby NODE to the NODEInfo struct.
643
+ * It keeps the addresses of the node and its parents
644
+ */
645
+ static void NODEInfo_addNode(NODEInfo *info, NODE *node, NODE *pnode)
646
+ {
647
+ VALUE node_adr = value_to_str((VALUE) node);
648
+ VALUE pnode_adr = value_to_str((VALUE) pnode);
649
+ LeafTableInfo_addEntry(&info->nodes, node_adr, node_adr);
650
+ LeafTableInfo_addEntry(&info->pnodes, node_adr, pnode_adr);
651
+ }
652
+
653
+ /*
654
+ * Returns ID of the node using its address (VALUE)
655
+ * It is used during the process of dumping Ruby AST to disk
656
+ * for replacing of memory addresses into ordinals
657
+ */
658
+ static int NODEInfo_nodeAdrToID(NODEInfo *info, VALUE adr)
659
+ {
660
+ return LeafTableInfo_keyToID(&info->nodes, adr);
661
+ }
662
+
663
+ /*
664
+ * Function counts number of nodes and fills NODEInfo struct
665
+ * that is neccessary for the node saving to the HDD
666
+ */
667
+ static int count_num_of_nodes(NODE *node, NODE *parent, NODEInfo *info)
668
+ {
669
+ int ut[3], num, offset;
670
+ if (node == 0)
671
+ {
672
+ return 0;
673
+ }
674
+ else if (TYPE((VALUE) node) != T_NODE)
675
+ {
676
+ rb_raise(rb_eArgError, "count_num_of_nodes: parent node %s: child node (ADR 0x%s) is not a node; Type: %d (%s)",
677
+ ruby_node_name(nd_type(parent)), RSTRING_PTR(value_to_str((VALUE) node)), TYPE((VALUE) node),
678
+ RSTRING_PTR(rb_funcall(rb_funcall((VALUE) node, rb_intern("class"), 0), rb_intern("to_s"), 0))
679
+ );
680
+ return 0;
681
+ }
682
+ else
683
+ {
684
+ offset = nd_type(node) * 3;
685
+ ut[0] = nodes_ctbl[offset++];
686
+ ut[1] = nodes_ctbl[offset++];
687
+ ut[2] = nodes_ctbl[offset];
688
+
689
+ /* Special case: part of NODE_KW_ARG syntax in Ruby 2.x, e.g. def func(foo:, bar: 'default) */
690
+ if ((nd_type(node) == NODE_LASGN || nd_type(node) == NODE_DASGN_CURR) && (void *) node->u2.value == (void *) -1) {
691
+ ut[1] = NT_LONG; /* To keep -1 correctly */
692
+ }
693
+
694
+ /* Some another special cases */
695
+ if (nd_type(node) == NODE_OP_ASGN2 && nd_type(parent) == NODE_OP_ASGN2)
696
+ {
697
+ ut[0] = NT_ID;
698
+ ut[1] = NT_ID;
699
+ ut[2] = NT_ID;
700
+ }
701
+
702
+ /* Some Ruby 1.9.3 style function arguments (without rb_args_info) */
703
+ if (nd_type(node) == NODE_ARGS_AUX)
704
+ {
705
+ ut[0] = NT_ID;
706
+ ut[1] = (nd_type(parent) == NODE_ARGS_AUX) ? NT_LONG : NT_ID;
707
+ ut[2] = NT_NODE;
708
+
709
+ if (node->u1.value == 0) ut[0] = NT_NULL;
710
+ if (node->u2.value == 0) ut[1] = NT_NULL;
711
+ if (node->u3.value == 0) ut[2] = NT_NULL;
712
+ }
713
+ /* Some Ruby 1.9.3-specific code for NODE_ATTRASGN */
714
+ if (nd_type(node) == NODE_ATTRASGN)
715
+ {
716
+ if (node->u1.value == 1) ut[0] = NT_LONG;
717
+ }
718
+ /* Check if there is information about child nodes types */
719
+ if (ut[0] == NT_UNKNOWN || ut[1] == NT_UNKNOWN || ut[2] == NT_UNKNOWN)
720
+ {
721
+ rb_raise(rb_eArgError, "Cannot interpret node %d (%s)", nd_type(node), ruby_node_name(nd_type(node)));
722
+ }
723
+ /* Save the ID of the node */
724
+ num = 1;
725
+ NODEInfo_addNode(info, node, parent);
726
+ /* Analyze node childs */
727
+ /* a) child 1 */
728
+ if (ut[0] == NT_NODE)
729
+ {
730
+ num += count_num_of_nodes(node->u1.node, node, info);
731
+ }
732
+ else if (ut[0] == NT_ID)
733
+ {
734
+ LeafTableInfo_addIDEntry(&info->syms, node->u1.id);
735
+ }
736
+ else if (ut[0] == NT_VALUE)
737
+ {
738
+ if (TYPE(node->u1.value) == T_NODE)
739
+ rb_raise(rb_eArgError, "NODE instead of VALUE in child 1 of node %s", ruby_node_name(nd_type(node)));
740
+ NODEInfo_addValue(info, node->u1.value);
741
+ }
742
+ else if (ut[0] == NT_IDTABLE)
743
+ {
744
+ VALUE tkey = value_to_str(node->u1.value);
745
+ VALUE idtbl_ary = rb_ary_new();
746
+ ID *idtbl = (ID *) node->u1.value;
747
+ int i, size = (node->u1.value) ? *idtbl++ : 0;
748
+ for (i = 0; i < size; i++)
749
+ {
750
+ ID sym = *idtbl++;
751
+ rb_ary_push(idtbl_ary, INT2FIX(sym));
752
+ LeafTableInfo_addIDEntry(&info->syms, sym);
753
+ }
754
+ LeafTableInfo_addEntry(&info->idtabs, tkey, idtbl_ary);
755
+ }
756
+ else if (ut[0] != NT_LONG && ut[0] != NT_NULL)
757
+ {
758
+ rb_raise(rb_eArgError, "1!");
759
+ }
760
+ /* b) child 2 */
761
+ if (ut[1] == NT_NODE)
762
+ {
763
+ num += count_num_of_nodes(node->u2.node, node, info);
764
+ }
765
+ else if (ut[1] == NT_ID)
766
+ {
767
+ LeafTableInfo_addIDEntry(&info->syms, node->u2.id);
768
+ }
769
+ else if (ut[1] == NT_VALUE)
770
+ {
771
+ if (TYPE(node->u2.value) == T_NODE)
772
+ rb_raise(rb_eArgError, "NODE instead of VALUE in child 2 of node %s", ruby_node_name(nd_type(node)));
773
+ NODEInfo_addValue(info, node->u2.value);
774
+ }
775
+ else if (ut[1] != NT_LONG && ut[1] != NT_NULL)
776
+ {
777
+ rb_raise(rb_eArgError, "2!");
778
+ }
779
+
780
+ /* c) child 3 */
781
+ if (ut[2] == NT_NODE)
782
+ {
783
+ num += count_num_of_nodes(node->u3.node, node, info);
784
+ }
785
+ else if (ut[2] == NT_ID)
786
+ {
787
+ LeafTableInfo_addIDEntry(&info->syms, node->u3.id);
788
+ }
789
+ else if (ut[2] == NT_ARGS)
790
+ {
791
+ #ifdef USE_RB_ARGS_INFO
792
+ VALUE varg = Qtrue;
793
+ struct rb_args_info *ainfo;
794
+ ID asym;
795
+ ainfo = node->u3.args;
796
+ // Save child nodes
797
+ num += count_num_of_nodes(ainfo->pre_init, node, info);
798
+ num += count_num_of_nodes(ainfo->post_init, node, info);
799
+ num += count_num_of_nodes(ainfo->kw_args, node, info);
800
+ num += count_num_of_nodes(ainfo->kw_rest_arg, node, info);
801
+ num += count_num_of_nodes(ainfo->opt_args, node, info);
802
+ // Save rb_args_info structure content
803
+ varg = rb_ary_new();
804
+ rb_ary_push(varg, value_to_str((VALUE) ainfo->pre_init));
805
+ rb_ary_push(varg, value_to_str((VALUE) ainfo->post_init));
806
+ rb_ary_push(varg, INT2FIX(ainfo->pre_args_num));
807
+ rb_ary_push(varg, INT2FIX(ainfo->post_args_num));
808
+
809
+ asym = ainfo->first_post_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
810
+ if (asym != 0)
811
+ LeafTableInfo_addIDEntry(&info->syms, asym);
812
+
813
+ asym = ainfo->rest_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
814
+ if (asym != 0)
815
+ LeafTableInfo_addIDEntry(&info->syms, asym);
816
+
817
+ asym = ainfo->block_arg; rb_ary_push(varg, INT2FIX(asym)); // ID
818
+ if (asym != 0)
819
+ LeafTableInfo_addIDEntry(&info->syms, asym);
820
+ rb_ary_push(varg, value_to_str((VALUE) ainfo->kw_args));
821
+ rb_ary_push(varg, value_to_str((VALUE) ainfo->kw_rest_arg));
822
+ rb_ary_push(varg, value_to_str((VALUE) ainfo->opt_args));
823
+
824
+ LeafTableInfo_addEntry(&info->args, value_to_str((VALUE) ainfo), varg);
825
+ #else
826
+ rb_raise(rb_eArgError, "NT_ARGS entry without USE_RB_ARGS_INFO");
827
+ #endif
828
+ }
829
+ else if (ut[2] == NT_ENTRY)
830
+ {
831
+ ID gsym = node->u3.entry->id;
832
+ // Save symbol to the symbol table
833
+ int newid = LeafTableInfo_addIDEntry(&info->syms, gsym);
834
+ LeafTableInfo_addEntry(&info->gentries, value_to_str(node->u3.value), INT2FIX(newid));
835
+ }
836
+ else if (ut[2] != NT_LONG && ut[2] != NT_NULL)
837
+ {
838
+ rb_raise(rb_eArgError, "Invalid child node 3 of node %s: TYPE %d, VALUE %"PRIxPTR,
839
+ ruby_node_name(nd_type(node)), ut[2], (uintptr_t) (node->u3.value));
840
+ }
841
+
842
+ return num;
843
+ }
844
+ }
845
+
846
+
847
+
848
+ //-------------------------------------------------------------------------
849
+
850
+ /*
851
+ * Part 4. Functions for loading marshalled nodes
852
+ */
853
+ typedef struct {
854
+ ID *syms_adr; // Table of symbols
855
+ int syms_len;
856
+
857
+ VALUE *lits_adr; // Table of literals
858
+ int lits_len;
859
+
860
+ ID **idtbls_adr; // Table of symbols tables
861
+ int idtbls_len;
862
+
863
+ struct rb_global_entry **gvars_adr; // Table of global variables entries
864
+ int gvars_len;
865
+
866
+ NODE **nodes_adr; // Table of nodes
867
+ int nodes_len;
868
+ #ifdef USE_RB_ARGS_INFO
869
+ struct rb_args_info **args_adr; // Table of code blocks arguments
870
+ int args_len;
871
+ #endif
872
+ } NODEObjAddresses;
873
+
874
+
875
+ void NODEObjAddresses_free(NODEObjAddresses *obj)
876
+ {
877
+ xfree(obj->syms_adr);
878
+ xfree(obj->idtbls_adr);
879
+ xfree(obj->gvars_adr);
880
+ xfree(obj->nodes_adr);
881
+ #ifdef USE_RB_ARGS_INFO
882
+ xfree(obj->args_adr);
883
+ #endif
884
+ xfree(obj);
885
+ }
886
+
887
+
888
+
889
+ void rbstr_printf(VALUE str, const char *fmt, ...)
890
+ {
891
+ char buf[1024];
892
+ va_list ptr;
893
+
894
+ va_start(ptr, fmt);
895
+ vsprintf(buf, fmt, ptr);
896
+ rb_str_append(str, rb_str_new2(buf));
897
+ va_end(ptr);
898
+ }
899
+
900
+ const char *symid_to_cstr(ID symid)
901
+ {
902
+ const char *str_null = "<NULL>", *str_intern = "<NONAME>";
903
+ const char *str_sym;
904
+
905
+ if (symid == 0)
906
+ str_sym = str_null;
907
+ else
908
+ {
909
+ VALUE rbstr_sym = rb_id2str(symid);
910
+ if (TYPE(rbstr_sym) == T_STRING)
911
+ str_sym = RSTRING_PTR(rb_id2str(symid));
912
+ else
913
+ str_sym = str_intern;
914
+ }
915
+ return str_sym;
916
+ }
917
+
918
+ #define PRINT_NODE_TAB for (j = 0; j < tab; j++) rbstr_printf(str, " ");
919
+ /*
920
+ * Recursively transforms node into Ruby string
921
+ * str -- output Ruby string
922
+ * node -- input Ruby NODE
923
+ * tab -- number of tabulations during print
924
+ * show_offsets -- 0/1 show/hide addresses and symbol IDs
925
+ */
926
+ static void print_node(VALUE str, NODE *node, int tab, int show_offsets)
927
+ {
928
+ int i, j, type, ut[3];
929
+ VALUE uref[3];
930
+
931
+ PRINT_NODE_TAB
932
+ if (node == NULL)
933
+ {
934
+ rbstr_printf(str, "(NULL)\n");
935
+ return;
936
+ }
937
+ type = nd_type(node);
938
+
939
+ if (show_offsets)
940
+ {
941
+ rbstr_printf(str, "@ %s | %16"PRIxPTR " | %16"PRIxPTR " %16"PRIxPTR " %16"PRIxPTR " (line %d)\n",
942
+ ruby_node_name(type),
943
+ (intptr_t) node,
944
+ (intptr_t) node->u1.value, (intptr_t) node->u2.value, (intptr_t) node->u3.value,
945
+ nd_line(node));
946
+ }
947
+ else
948
+ {
949
+ rbstr_printf(str, "@ %s (line %d)\n", ruby_node_name(type), nd_line(node));
950
+ }
951
+
952
+ ut[0] = nodes_ctbl[type * 3];
953
+ ut[1] = nodes_ctbl[type * 3 + 1];
954
+ ut[2] = nodes_ctbl[type * 3 + 2];
955
+
956
+ uref[0] = node->u1.value;
957
+ uref[1] = node->u2.value;
958
+ uref[2] = node->u3.value;
959
+
960
+ if ((type == NODE_LASGN || type == NODE_DASGN_CURR) && (void *) node->u2.value == (void *) -1)
961
+ {
962
+ ut[1] = NT_LONG;
963
+ }
964
+
965
+ for (i = 0; i < 3; i++)
966
+ {
967
+
968
+ if (ut[i] == NT_NODE)
969
+ {
970
+ if (nd_type(node) != NODE_OP_ASGN2 || i != 2)
971
+ print_node(str, RNODE(uref[i]), tab + 1, show_offsets);
972
+ else
973
+ {
974
+ if (ut[i] != 0 && TYPE(ut[i]) != T_NODE)
975
+ rb_raise(rb_eArgError, "print_node: broken node 0x%s", RSTRING_PTR(value_to_str(ut[i])));
976
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
977
+ rbstr_printf(str, "%"PRIxPTR " %"PRIxPTR " %"PRIxPTR"\n",
978
+ (intptr_t) RNODE(uref[i])->u1.value,
979
+ (intptr_t) RNODE(uref[i])->u2.value,
980
+ (intptr_t) RNODE(uref[i])->u3.value);
981
+ }
982
+ }
983
+ else if (ut[i] == NT_VALUE)
984
+ {
985
+ char *class_name = RSTRING_PTR(rb_funcall(rb_funcall(uref[i], rb_intern("class"), 0), rb_intern("to_s"), 0));
986
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
987
+ if (show_offsets)
988
+ {
989
+ rbstr_printf(str, ">| ADR: %"PRIxPTR"; CLASS: %s (TYPE %d); VALUE: %s\n",
990
+ (intptr_t) uref[i],
991
+ class_name, TYPE(uref[i]),
992
+ RSTRING_PTR(rb_funcall(uref[i], rb_intern("to_s"), 0)));
993
+ }
994
+ else
995
+ {
996
+ rbstr_printf(str, ">| CLASS: %s (TYPE %d); VALUE: %s\n",
997
+ class_name, TYPE(uref[i]),
998
+ RSTRING_PTR(rb_funcall(uref[i], rb_intern("to_s"), 0)));
999
+ }
1000
+ }
1001
+ else if (ut[i] == NT_ID)
1002
+ {
1003
+ const char *str_sym = symid_to_cstr(uref[i]);
1004
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1005
+ if (show_offsets)
1006
+ rbstr_printf(str, ">| ID: %d; SYMBOL: :%s\n", (ID) uref[i], str_sym);
1007
+ else
1008
+ rbstr_printf(str, ">| SYMBOL: :%s\n", str_sym);
1009
+ }
1010
+ else if (ut[i] == NT_LONG)
1011
+ {
1012
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1013
+ rbstr_printf(str, ">| %"PRIxPTR "\n", (intptr_t) uref[i]);
1014
+ }
1015
+ else if (ut[i] == NT_NULL)
1016
+ {
1017
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1018
+ rbstr_printf(str, ">| (NULL)\n");
1019
+ }
1020
+ else if (ut[i] == NT_ARGS)
1021
+ {
1022
+ #ifdef USE_RB_ARGS_INFO
1023
+ struct rb_args_info *ainfo;
1024
+ #endif
1025
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1026
+ rbstr_printf(str, ">| ARGS\n");
1027
+ #ifdef USE_RB_ARGS_INFO
1028
+ ainfo = node->u3.args;
1029
+ /* Print generic info about the structure */
1030
+ PRINT_NODE_TAB; rbstr_printf(str, " PRE_INIT: %16" PRIxPTR "\n", ainfo->pre_init);
1031
+ PRINT_NODE_TAB; rbstr_printf(str, " POST_INIT: %16" PRIxPTR "\n", ainfo->post_init);
1032
+ PRINT_NODE_TAB; rbstr_printf(str, " KW_ARGS: %16" PRIxPTR "\n", ainfo->kw_args);
1033
+ PRINT_NODE_TAB; rbstr_printf(str, " KW_REST_ARG: %16" PRIxPTR "\n", ainfo->kw_rest_arg);
1034
+ PRINT_NODE_TAB; rbstr_printf(str, " OPT_ARGS: %16" PRIxPTR "\n", ainfo->opt_args);
1035
+ PRINT_NODE_TAB; rbstr_printf(str, " pre_args_num: %d\n", ainfo->pre_args_num);
1036
+ PRINT_NODE_TAB; rbstr_printf(str, " post_args_num: %d\n", ainfo->post_args_num);
1037
+ /* Print information about symbols */
1038
+ if (show_offsets)
1039
+ {
1040
+ PRINT_NODE_TAB; rbstr_printf(str, " first_post_arg: %s (ID %X)\n",
1041
+ symid_to_cstr(ainfo->first_post_arg), ainfo->first_post_arg);
1042
+ PRINT_NODE_TAB; rbstr_printf(str, " rest_arg: %s (ID %X)\n",
1043
+ symid_to_cstr(ainfo->rest_arg), ainfo->rest_arg);
1044
+ PRINT_NODE_TAB; rbstr_printf(str, " block_arg: %s (ID %X)\n",
1045
+ symid_to_cstr(ainfo->block_arg), ainfo->block_arg);
1046
+ }
1047
+ else
1048
+ {
1049
+ PRINT_NODE_TAB; rbstr_printf(str, " first_post_arg: %s\n",
1050
+ symid_to_cstr(ainfo->first_post_arg));
1051
+ PRINT_NODE_TAB; rbstr_printf(str, " rest_arg: %s\n",
1052
+ symid_to_cstr(ainfo->rest_arg));
1053
+ PRINT_NODE_TAB; rbstr_printf(str, " block_arg: %s\n",
1054
+ symid_to_cstr(ainfo->block_arg));
1055
+ }
1056
+ /* Print information about child nodes */
1057
+ print_node(str, RNODE(ainfo->pre_init), tab + 2, show_offsets);
1058
+ print_node(str, RNODE(ainfo->post_init), tab + 2, show_offsets);
1059
+ print_node(str, RNODE(ainfo->kw_args), tab + 2, show_offsets);
1060
+ print_node(str, RNODE(ainfo->kw_rest_arg), tab + 2, show_offsets);
1061
+ print_node(str, RNODE(ainfo->opt_args), tab + 2, show_offsets);
1062
+ #endif
1063
+ }
1064
+ else if (ut[i] == NT_IDTABLE)
1065
+ {
1066
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1067
+ rbstr_printf(str, ">| IDTABLE\n");
1068
+ }
1069
+ else if (ut[i] == NT_ENTRY)
1070
+ {
1071
+ struct rb_global_entry *gentry;
1072
+ gentry = (struct rb_global_entry *) uref[i];
1073
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1074
+ rbstr_printf(str, ">| [GLOBAL ENTRY PTR=0x%"PRIxPTR" ID=%X]\n", (uintptr_t) gentry->var, gentry->id);
1075
+ }
1076
+ else
1077
+ {
1078
+ PRINT_NODE_TAB; rbstr_printf(str, " ");
1079
+ rbstr_printf(str, ">| [UNKNOWN]\n");
1080
+ }
1081
+ }
1082
+ }
1083
+
1084
+
1085
+
1086
+ void resolve_syms_ords(VALUE data, NODEObjAddresses *relocs)
1087
+ {
1088
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("symbols")));
1089
+ int i;
1090
+ if (tbl_val == Qnil)
1091
+ {
1092
+ rb_raise(rb_eArgError, "Cannot find symbols table");
1093
+ }
1094
+ if (TYPE(tbl_val) != T_ARRAY)
1095
+ {
1096
+ rb_raise(rb_eArgError, "Symbols table is not an array");
1097
+ }
1098
+ relocs->syms_len = RARRAY_LEN(tbl_val);
1099
+ relocs->syms_adr = ALLOC_N(ID, relocs->syms_len);
1100
+ for (i = 0; i < relocs->syms_len; i++)
1101
+ {
1102
+ VALUE r_sym = RARRAY_PTR(tbl_val)[i];
1103
+ if (TYPE(r_sym) == T_STRING)
1104
+ { /* Created symbol will be immune to garbage collector */
1105
+ relocs->syms_adr[i] = rb_intern(RSTRING_PTR(r_sym));
1106
+ }
1107
+ else if (TYPE(r_sym) == T_FIXNUM)
1108
+ {
1109
+ relocs->syms_adr[i] = (ID) FIX2INT(r_sym);
1110
+ }
1111
+ else
1112
+ {
1113
+ rb_raise(rb_eArgError, "Symbols table is corrupted");
1114
+ }
1115
+ }
1116
+ }
1117
+
1118
+ void resolve_lits_ords(VALUE data, NODEObjAddresses *relocs)
1119
+ {
1120
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("literals")));
1121
+ int i;
1122
+ if (tbl_val == Qnil)
1123
+ {
1124
+ rb_raise(rb_eArgError, "Cannot find literals table");
1125
+ }
1126
+ if (TYPE(tbl_val) != T_ARRAY)
1127
+ {
1128
+ rb_raise(rb_eArgError, "Literals table is not an array");
1129
+ }
1130
+ relocs->lits_adr = RARRAY_PTR(tbl_val);
1131
+ relocs->lits_len = RARRAY_LEN(tbl_val);
1132
+ /* Mark all symbols as "immortal" (i.e. not collectable
1133
+ by Ruby GC): some of them can be used in the syntax tree!
1134
+ See the presentation of Narihiro Nakamura, author of
1135
+ symbol GC in Ruby 2.x for details
1136
+ http://www.slideshare.net/authorNari/symbol-gc */
1137
+ for (i = 0; i < relocs->lits_len; i++)
1138
+ {
1139
+ if (TYPE(relocs->lits_adr[i]) == T_SYMBOL)
1140
+ {
1141
+ SYM2ID(relocs->lits_adr[i]);
1142
+ }
1143
+ }
1144
+ }
1145
+
1146
+ void resolve_gvars_ords(VALUE data, NODEObjAddresses *relocs)
1147
+ {
1148
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("global_entries")));
1149
+ int i;
1150
+
1151
+ if (tbl_val == Qnil)
1152
+ {
1153
+ rb_raise(rb_eArgError, "Cannot find global entries table");
1154
+ }
1155
+ if (TYPE(tbl_val) != T_ARRAY)
1156
+ {
1157
+ rb_raise(rb_eArgError, "Global entries table should be an array");
1158
+ }
1159
+ relocs->gvars_len = RARRAY_LEN(tbl_val);
1160
+ relocs->gvars_adr = ALLOC_N(struct rb_global_entry *, relocs->gvars_len);
1161
+ for (i = 0; i < relocs->gvars_len; i++)
1162
+ {
1163
+ int ind = FIX2INT(RARRAY_PTR(tbl_val)[i]);
1164
+ ID sym = relocs->syms_adr[ind];
1165
+ relocs->gvars_adr[i] = rb_global_entry(sym);
1166
+ }
1167
+ }
1168
+
1169
+
1170
+ void resolve_idtbls_ords(VALUE data, NODEObjAddresses *relocs)
1171
+ {
1172
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("id_tables")));
1173
+ int i, j, idnum;
1174
+
1175
+ if (tbl_val == Qnil)
1176
+ {
1177
+ rb_raise(rb_eArgError, "Cannot find id_tables entries");
1178
+ }
1179
+ relocs->idtbls_len = RARRAY_LEN(tbl_val);
1180
+ relocs->idtbls_adr = ALLOC_N(ID *, relocs->idtbls_len);
1181
+ for (i = 0; i < relocs->idtbls_len; i++)
1182
+ {
1183
+ VALUE idtbl = RARRAY_PTR(tbl_val)[i];
1184
+ idnum = RARRAY_LEN(idtbl);
1185
+ if (idnum == 0)
1186
+ { // Empty table: NULL pointer in the address table
1187
+ relocs->idtbls_adr[i] = NULL;
1188
+ }
1189
+ else
1190
+ { // Filled table: pointer to dynamic memory
1191
+ relocs->idtbls_adr[i] = ALLOC_N(ID, idnum + 1);
1192
+ relocs->idtbls_adr[i][0] = idnum;
1193
+ for (j = 0; j < idnum; j++)
1194
+ {
1195
+ int ind = FIX2INT(RARRAY_PTR(idtbl)[j]);
1196
+ relocs->idtbls_adr[i][j+1] = relocs->syms_adr[ind];
1197
+ }
1198
+ }
1199
+ }
1200
+ }
1201
+
1202
+ void resolve_nodes_ords(VALUE data, int num_of_nodes, NODEObjAddresses *relocs)
1203
+ {
1204
+ int i;
1205
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("nodes")));
1206
+ if (tbl_val == Qnil)
1207
+ {
1208
+ rb_raise(rb_eArgError, "Cannot find nodes entries");
1209
+ }
1210
+ if (TYPE(tbl_val) != T_STRING)
1211
+ {
1212
+ rb_raise(rb_eArgError, "Nodes description must be a string");
1213
+ }
1214
+ relocs->nodes_adr = ALLOC_N(NODE *, num_of_nodes);
1215
+ relocs->nodes_len = num_of_nodes;
1216
+ for (i = 0; i < num_of_nodes; i++)
1217
+ {
1218
+ relocs->nodes_adr[i] = (NODE *) NEW_NODE((enum node_type) 0, 0, 0, 0);
1219
+ }
1220
+ }
1221
+
1222
+ #ifdef USE_RB_ARGS_INFO
1223
+ void resolve_args_ords(VALUE data, NODEObjAddresses *relocs)
1224
+ {
1225
+ int i;
1226
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("args")));
1227
+
1228
+ if (tbl_val == Qnil)
1229
+ {
1230
+ rb_raise(rb_eArgError, "Cannot find args entries table");
1231
+ }
1232
+ if (TYPE(tbl_val) != T_ARRAY)
1233
+ {
1234
+ rb_raise(rb_eArgError, "args description must be an array");
1235
+ }
1236
+ relocs->args_len = RARRAY_LEN(tbl_val);
1237
+ relocs->args_adr = ALLOC_N(struct rb_args_info *, relocs->args_len);
1238
+ for (i = 0; i < relocs->args_len; i++)
1239
+ {
1240
+ int ord;
1241
+ VALUE ainfo_val, *aiptr;
1242
+ struct rb_args_info *ainfo;
1243
+
1244
+ relocs->args_adr[i] = ALLOC(struct rb_args_info);
1245
+ ainfo_val = RARRAY_PTR(tbl_val)[i];
1246
+ aiptr = RARRAY_PTR(ainfo_val);
1247
+ ainfo = relocs->args_adr[i];
1248
+
1249
+ if (TYPE(ainfo_val) != T_ARRAY || RARRAY_LEN(ainfo_val) != 10)
1250
+ {
1251
+ rb_raise(rb_eArgError, "args entry %d is corrupted", i);
1252
+ }
1253
+ // Load unresolved values
1254
+ ainfo->pre_init = (NODE *) (uintptr_t) FIX2LONG(aiptr[0]); // Node ordinal
1255
+ ainfo->post_init = (NODE *) (uintptr_t) FIX2LONG(aiptr[1]); // Node ordinal
1256
+ ainfo->pre_args_num = FIX2INT(aiptr[2]); // No ordinal resolving
1257
+ ainfo->post_args_num = FIX2INT(aiptr[3]); // No ordinal resolving
1258
+ ainfo->first_post_arg = FIX2INT(aiptr[4]); // Symbolic ordinal
1259
+ ainfo->rest_arg = FIX2INT(aiptr[5]); // Symbolic ordinal
1260
+ ainfo->block_arg = FIX2INT(aiptr[6]); // Symbolic ordinal
1261
+ ainfo->kw_args = (NODE *) (uintptr_t) FIX2LONG(aiptr[7]); // Node ordinal
1262
+ ainfo->kw_rest_arg = (NODE *) (uintptr_t) FIX2LONG(aiptr[8]); // Node ordinal
1263
+ ainfo->opt_args = (NODE *) (uintptr_t) FIX2LONG(aiptr[9]); // Node ordinal
1264
+ // Resolve nodes
1265
+ ord = (int) (((VALUE) ainfo->pre_init) & 0xFFFFFFFF);
1266
+ if (ord < -1 || ord >= relocs->nodes_len)
1267
+ rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1268
+ ainfo->pre_init = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1269
+
1270
+ ord = (int) (((VALUE) ainfo->post_init) & 0xFFFFFFFF);
1271
+ if (ord < -1 || ord >= relocs->nodes_len)
1272
+ rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1273
+ ainfo->post_init = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1274
+
1275
+ ord = (int) (((VALUE) ainfo->kw_args) & 0xFFFFFFFF);
1276
+ if (ord < -1 || ord >= relocs->nodes_len)
1277
+ rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1278
+ ainfo->kw_args = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1279
+
1280
+ ord = (int) (((VALUE) ainfo->kw_rest_arg) & 0xFFFFFFFF);
1281
+ if (ord < -1 || ord >= relocs->nodes_len)
1282
+ rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1283
+ ainfo->kw_rest_arg = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1284
+
1285
+ ord = (int) (((VALUE) ainfo->opt_args) & 0xFFFFFFFF);
1286
+ if (ord < -1 || ord >= relocs->nodes_len)
1287
+ rb_raise(rb_eArgError, "Invalid node ordinal %d", ord);
1288
+ ainfo->opt_args = (ord == -1) ? NULL : relocs->nodes_adr[ord];
1289
+ // Resolve symbolic ordinals
1290
+ ord = ainfo->first_post_arg;
1291
+ if (ord < -1 || ord >= relocs->syms_len)
1292
+ rb_raise(rb_eArgError, "1- Invalid symbol ID ordinal %d", ord);
1293
+ ainfo->first_post_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1294
+
1295
+ ord = ainfo->rest_arg;
1296
+ if (ord < -1 || ord >= relocs->syms_len)
1297
+ rb_raise(rb_eArgError, "2- Invalid symbol ID ordinal %d", ord);
1298
+ ainfo->rest_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1299
+
1300
+ ord = ainfo->block_arg;
1301
+ if (ord < -1 || ord >= relocs->syms_len)
1302
+ rb_raise(rb_eArgError, "3- Invalid symbol ID ordinal %d", ord);
1303
+ ainfo->block_arg = (ord == -1) ? 0 : relocs->syms_adr[ord];
1304
+ }
1305
+ }
1306
+ #endif
1307
+
1308
+ /*
1309
+ * Transforms binary data with nodes descriptions into Ruby AST (i.e.
1310
+ * ternary tree of nodes). Each node is represented in the next binary format:
1311
+ *
1312
+ * [4 bytes -- pointers info] [node flags] [child ORD1] [child ORD2] [child ORD3]
1313
+ *
1314
+ * Pointers info:
1315
+ * BYTE -- child 1 info (bits 7..4 -- ordinal type, bits 3..0 -- ordinal size, bytes)
1316
+ * BYTE -- child 2 info
1317
+ * BYTE -- child 3 info
1318
+ * BYTE -- node flags length, bytes
1319
+ * Node flags:
1320
+ * node->flags field packed by bin_to_value function
1321
+ * child ORDi Ordinal of ith node child packed by bin_to_value_function
1322
+ * (it will be transformed to the real address in memory, i.e. pointer
1323
+ * or symbol ID during data loading)
1324
+ */
1325
+ void load_nodes_from_str(VALUE data, NODEObjAddresses *relocs)
1326
+ {
1327
+ int i, j;
1328
+ VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("nodes")));
1329
+ unsigned char *bin = (unsigned char *) RSTRING_PTR(tbl_val);
1330
+ NODE *node = NULL;
1331
+ for (i = 0; i < relocs->nodes_len; i++)
1332
+ {
1333
+ int rtypes[4];
1334
+ VALUE u[3], flags;
1335
+ // Read data structure info
1336
+ for (j = 0; j < 4; j++)
1337
+ rtypes[j] = *bin++;
1338
+ flags = bin_to_value(bin, rtypes[3]); bin += rtypes[3];
1339
+ for (j = 0; j < 3; j++)
1340
+ {
1341
+ int val_len = (rtypes[j] & 0xF0) >> 4;
1342
+ u[j] = bin_to_value(bin, val_len);
1343
+ bin += val_len;
1344
+ rtypes[j] &= 0x0F;
1345
+
1346
+ }
1347
+ if ((char *)bin - RSTRING_PTR(tbl_val) > RSTRING_LEN(tbl_val))
1348
+ rb_raise(rb_eArgError, "Nodes binary dump is too short");
1349
+ // Resolving all addresses
1350
+ for (j = 0; j < 3; j++)
1351
+ {
1352
+ switch(rtypes[j])
1353
+ {
1354
+ case VL_RAW: // Do nothing: it is raw data
1355
+ break;
1356
+ case VL_NODE:
1357
+ if (u[j] >= (unsigned int) relocs->nodes_len)
1358
+ rb_raise(rb_eArgError, "Cannot resolve VL_NODE entry %d", (int) u[j]);
1359
+ u[j] = (VALUE) relocs->nodes_adr[u[j]];
1360
+ if (TYPE(u[j]) != T_NODE)
1361
+ rb_raise(rb_eArgError, "load_nodes_from_str: nodes memory corrupted");
1362
+ break;
1363
+ case VL_ID:
1364
+ if (u[j] >= (unsigned int) relocs->syms_len)
1365
+ rb_raise(rb_eArgError, "Cannot resolve VL_ID entry %d", (int) u[j]);
1366
+ u[j] = relocs->syms_adr[u[j]];
1367
+ break;
1368
+ case VL_GVAR:
1369
+ if (u[j] >= (unsigned int) relocs->gvars_len)
1370
+ rb_raise(rb_eArgError, "Cannot resolve VL_GVAR entry %d", (int) u[j]);
1371
+ u[j] = (VALUE) relocs->gvars_adr[u[j]];
1372
+ break;
1373
+ case VL_IDTABLE:
1374
+ if (u[j] >= (unsigned int) relocs->idtbls_len)
1375
+ rb_raise(rb_eArgError, "Cannot resolve VL_IDTABLE entry %d", (int) u[j]);
1376
+ u[j] = (VALUE) relocs->idtbls_adr[u[j]];
1377
+ break;
1378
+ #ifdef USE_RB_ARGS_INFO
1379
+ case VL_ARGS:
1380
+ if (u[j] >= (unsigned int) relocs->args_len)
1381
+ rb_raise(rb_eArgError, "Cannot resolve VL_ARGS entry %d", (int) u[j]);
1382
+ u[j] = (VALUE) relocs->args_adr[u[j]];
1383
+ break;
1384
+ #endif
1385
+ case VL_LIT:
1386
+ if (u[j] >= (unsigned int) relocs->lits_len)
1387
+ rb_raise(rb_eArgError, "Cannot resolve VL_LIT entry %d", (int) u[j]);
1388
+ u[j] = (VALUE) relocs->lits_adr[u[j]];
1389
+ break;
1390
+ default:
1391
+ rb_raise(rb_eArgError, "Unknown RTYPE %d", rtypes[j]);
1392
+ }
1393
+ }
1394
+
1395
+ // Fill classic node structure
1396
+ node = relocs->nodes_adr[i];
1397
+ #ifdef RESET_GC_FLAGS
1398
+ flags = flags & (~0x3); // Ruby 1.9.x -- specific thing
1399
+ #endif
1400
+ node->flags = (flags << 5) | T_NODE;
1401
+ node->nd_reserved = 0;
1402
+ node->u1.value = u[0];
1403
+ node->u2.value = u[1];
1404
+ node->u3.value = u[2];
1405
+ }
1406
+ }
1407
+
1408
+ /*
1409
+ * Returns the value of string hash field using symbolic key
1410
+ */
1411
+ static VALUE get_hash_strfield(VALUE hash, const char *idtxt)
1412
+ {
1413
+ VALUE str = rb_hash_aref(hash, ID2SYM(rb_intern(idtxt)));
1414
+ if (TYPE(str) != T_STRING)
1415
+ {
1416
+ rb_raise(rb_eArgError, "Hash field %s is not a string", idtxt);
1417
+ return Qnil;
1418
+ }
1419
+ else
1420
+ {
1421
+ return str;
1422
+ }
1423
+ }
1424
+
1425
+ /*
1426
+ * Check validity of node hash representation signatures ("magic" values)
1427
+ */
1428
+ static VALUE check_hash_magic(VALUE data)
1429
+ {
1430
+ VALUE val, refval;
1431
+ // MAGIC signature must be valid
1432
+ val = get_hash_strfield(data, "MAGIC");
1433
+ if (strcmp(NODEMARSHAL_MAGIC, RSTRING_PTR(val)))
1434
+ rb_raise(rb_eArgError, "Bad value of MAGIC signature");
1435
+ // RUBY_PLATFORM signature must match the current platform
1436
+ val = get_hash_strfield(data, "RUBY_PLATFORM");
1437
+ refval = rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM"));
1438
+ if (strcmp(RSTRING_PTR(refval), RSTRING_PTR(val)))
1439
+ rb_raise(rb_eArgError, "Incompatible RUBY_PLATFORM value %s", RSTRING_PTR(val));
1440
+ // RUBY_VERSION signature must match the used Ruby interpreter
1441
+ val = get_hash_strfield(data, "RUBY_VERSION");
1442
+ refval = rb_const_get(rb_cObject, rb_intern("RUBY_VERSION"));
1443
+ if (strcmp(RSTRING_PTR(refval), RSTRING_PTR(val)))
1444
+ rb_raise(rb_eArgError, "Incompatible RUBY_VERSION value %s", RSTRING_PTR(val));
1445
+ return Qtrue;
1446
+ }
1447
+
1448
+ /*
1449
+ * Part 5. C-to-Ruby interface
1450
+ *
1451
+ */
1452
+
1453
+ /*
1454
+ * Restore Ruby node from the binary blob (dump)
1455
+ */
1456
+ static VALUE m_nodedump_from_memory(VALUE self, VALUE dump)
1457
+ {
1458
+ VALUE cMarshal, data, val, val_relocs;
1459
+ VALUE gc_was_disabled;
1460
+ int num_of_nodes;
1461
+ NODEObjAddresses *relocs;
1462
+ /* DISABLE GARBAGE COLLECTOR (required for stable loading
1463
+ of large node trees */
1464
+ gc_was_disabled = rb_gc_disable();
1465
+ /* Wrap struct for relocations */
1466
+ val_relocs = Data_Make_Struct(cNodeObjAddresses, NODEObjAddresses,
1467
+ NULL, NODEObjAddresses_free, relocs); // This data envelope cannot exist without NODE
1468
+ /* Load and unpack our dump */
1469
+ cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
1470
+ data = rb_funcall(cMarshal, rb_intern("load"), 1, dump);
1471
+ if (TYPE(data) != T_HASH)
1472
+ {
1473
+ rb_raise(rb_eArgError, "Input dump is corrupted");
1474
+ }
1475
+ val = rb_hash_aref(data, ID2SYM(rb_intern("num_of_nodes")));
1476
+ if (val == Qnil)
1477
+ {
1478
+ rb_raise(rb_eArgError, "num_of_nodes not found");
1479
+ }
1480
+ else
1481
+ {
1482
+ num_of_nodes = FIX2INT(val);
1483
+ }
1484
+ /* Check "magic" signature and platform identifiers */
1485
+ check_hash_magic(data);
1486
+ /* Get the information about the source file that was compiled to the node */
1487
+ // a) node name
1488
+ val = rb_hash_aref(data, ID2SYM(rb_intern("nodename")));
1489
+ if (val == Qnil || TYPE(val) == T_STRING)
1490
+ rb_iv_set(self, "@nodename", val);
1491
+ else
1492
+ rb_raise(rb_eArgError, "nodename value is corrupted");
1493
+ // b) file name
1494
+ val = rb_hash_aref(data, ID2SYM(rb_intern("filename")));
1495
+ if (val == Qnil || TYPE(val) == T_STRING)
1496
+ rb_iv_set(self, "@filename", val);
1497
+ else
1498
+ rb_raise(rb_eArgError, "filename value is corrupted");
1499
+ // c) file path
1500
+ val = rb_hash_aref(data, ID2SYM(rb_intern("filepath")));
1501
+ if (val == Qnil || TYPE(val) == T_STRING)
1502
+ rb_iv_set(self, "@filepath", val);
1503
+ else
1504
+ rb_raise(rb_eArgError, "filepath value is corrupted");
1505
+ /* Load all required data */
1506
+ resolve_syms_ords(data, relocs); // Symbols
1507
+ resolve_lits_ords(data, relocs); // Literals
1508
+ resolve_gvars_ords(data, relocs); // Global entries (with symbol ID resolving)
1509
+ resolve_idtbls_ords(data, relocs); // Identifiers tables (with symbol ID resolving)
1510
+ resolve_nodes_ords(data, num_of_nodes, relocs); // Allocate memory for all nodes
1511
+ #ifdef USE_RB_ARGS_INFO
1512
+ resolve_args_ords(data, relocs); // Load args entries with symbols ID and nodes resolving
1513
+ #endif
1514
+ load_nodes_from_str(data, relocs);
1515
+ /* Save the loaded node tree and collect garbage */
1516
+ rb_iv_set(self, "@node", (VALUE) relocs->nodes_adr[0]);
1517
+ rb_iv_set(self, "@num_of_nodes", INT2FIX(num_of_nodes));
1518
+ rb_iv_set(self, "@obj_addresses", val_relocs);
1519
+ if (gc_was_disabled == Qfalse)
1520
+ {
1521
+ rb_gc_enable();
1522
+ rb_gc_start();
1523
+ }
1524
+ return self;
1525
+ }
1526
+
1527
+
1528
+ /*
1529
+ * call-seq:
1530
+ * obj.symbols
1531
+ *
1532
+ * Return array with the list of symbols
1533
+ */
1534
+ static VALUE m_nodedump_symbols(VALUE self)
1535
+ {
1536
+ int i;
1537
+ VALUE val_relocs, val_nodeinfo, syms;
1538
+ // Variant 1: node loaded from file
1539
+ val_relocs = rb_iv_get(self, "@obj_addresses");
1540
+ if (val_relocs != Qnil)
1541
+ {
1542
+ NODEObjAddresses *relocs;
1543
+ Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
1544
+ syms = rb_ary_new();
1545
+ for (i = 0; i < relocs->syms_len; i++)
1546
+ rb_ary_push(syms, ID2SYM(relocs->syms_adr[i]));
1547
+ return syms;
1548
+ }
1549
+ // Variant 2: node saved to file (parsed from memory)
1550
+ val_nodeinfo = rb_iv_get(self, "@nodeinfo");
1551
+ if (val_nodeinfo != Qnil)
1552
+ {
1553
+ NODEInfo *ninfo;
1554
+ VALUE *ary;
1555
+ Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
1556
+ syms = rb_funcall(ninfo->syms.vals, rb_intern("values"), 0);
1557
+ ary = RARRAY_PTR(syms);
1558
+ for (i = 0; i < RARRAY_LEN(syms); i++)
1559
+ {
1560
+ ary[i] = rb_funcall(ary[i], rb_intern("to_sym"), 0);
1561
+ }
1562
+ return syms;
1563
+ }
1564
+ rb_raise(rb_eArgError, "Symbol information not initialized. Run to_hash before reading.");
1565
+ }
1566
+
1567
+ /*
1568
+ * call-seq:
1569
+ * obj.change_symbol(old_sym, new_sym)
1570
+ *
1571
+ * Replace one symbol by another (to be used for code obfuscation)
1572
+ * - +old_sym+ -- String that contains symbol name to be replaced
1573
+ * - +new_sym+ -- String that contains new name of the symbol
1574
+ */
1575
+ static VALUE m_nodedump_change_symbol(VALUE self, VALUE old_sym, VALUE new_sym)
1576
+ {
1577
+ VALUE val_nodehash = rb_iv_get(self, "@nodehash");
1578
+ VALUE syms, key;
1579
+ // Check if node is position-independent
1580
+ // (i.e. with initialized NODEInfo structure that contains
1581
+ // relocations for symbols)
1582
+ if (val_nodehash == Qnil)
1583
+ rb_raise(rb_eArgError, "This node is not preparsed into Hash");
1584
+ // Check data types of the input array
1585
+ if (TYPE(old_sym) != T_STRING)
1586
+ {
1587
+ rb_raise(rb_eArgError, "old_sym argument must be a string");
1588
+ }
1589
+ if (TYPE(new_sym) != T_STRING)
1590
+ {
1591
+ rb_raise(rb_eArgError, "new_sym argument must be a string");
1592
+ }
1593
+ // Get the symbol table from the Hash
1594
+ syms = rb_hash_aref(val_nodehash, ID2SYM(rb_intern("symbols")));
1595
+ if (syms == Qnil)
1596
+ rb_raise(rb_eArgError, "Preparsed hash has no :symbols field");
1597
+ // Check if new_sym is present in the symbol table
1598
+ key = rb_funcall(syms, rb_intern("find_index"), 1, new_sym);
1599
+ if (key != Qnil)
1600
+ {
1601
+ rb_raise(rb_eArgError, "new_sym value must be absent in table of symbols");
1602
+ }
1603
+ // Change the symbol in the preparsed Hash
1604
+ key = rb_funcall(syms, rb_intern("find_index"), 1, old_sym);
1605
+ if (key == Qnil)
1606
+ return Qnil;
1607
+ RARRAY_PTR(syms)[FIX2INT(key)] = new_sym;
1608
+ return self;
1609
+ }
1610
+
1611
+ /*
1612
+ * Return array with the list of literals
1613
+ */
1614
+ static VALUE m_nodedump_literals(VALUE self)
1615
+ {
1616
+ int i;
1617
+ VALUE val_relocs, val_nodeinfo, lits;
1618
+ // Variant 1: node loaded from file. It uses NODEObjAddresses struct
1619
+ // with the results of Ruby NODE structure parsing.
1620
+ val_relocs = rb_iv_get(self, "@obj_addresses");
1621
+ if (val_relocs != Qnil)
1622
+ {
1623
+ NODEObjAddresses *relocs;
1624
+
1625
+ Data_Get_Struct(val_relocs, NODEObjAddresses, relocs);
1626
+ lits = rb_ary_new();
1627
+ for (i = 0; i < relocs->lits_len; i++)
1628
+ {
1629
+ VALUE val = relocs->lits_adr[i];
1630
+ int t = TYPE(val);
1631
+ if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
1632
+ val = rb_funcall(val, rb_intern("dup"), 0);
1633
+ rb_ary_push(lits, val);
1634
+ }
1635
+ return lits;
1636
+ }
1637
+ // Variant 2: node saved to file (parsed from memory). It uses
1638
+ // NODEInfo struct that is initialized during node dump parsing.
1639
+ val_nodeinfo = rb_iv_get(self, "@nodeinfo");
1640
+ if (val_nodeinfo != Qnil)
1641
+ {
1642
+ NODEInfo *ninfo;
1643
+ VALUE *ary;
1644
+ Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
1645
+ lits = rb_funcall(ninfo->lits.vals, rb_intern("values"), 0);
1646
+ ary = RARRAY_PTR(lits);
1647
+ for (i = 0; i < RARRAY_LEN(lits); i++)
1648
+ {
1649
+ int t = TYPE(ary[i]);
1650
+ if (t != T_SYMBOL && t != T_FLOAT && t != T_FIXNUM)
1651
+ ary[i] = rb_funcall(ary[i], rb_intern("dup"), 0);
1652
+ }
1653
+ return lits;
1654
+ }
1655
+ rb_raise(rb_eArgError, "Literals information not initialized. Run to_hash before reading.");
1656
+ }
1657
+
1658
+ /*
1659
+ * Update the array with the list of literals
1660
+ * (to be used for code obfuscation)
1661
+ * Warning! This function is a stub!
1662
+ */
1663
+ static VALUE m_nodedump_change_literal(VALUE self, VALUE old_lit, VALUE new_lit)
1664
+ {
1665
+ /* TO BE IMPLEMENTED */
1666
+ return self;
1667
+ }
1668
+
1669
+
1670
+ /*
1671
+ * call-seq:
1672
+ * obj.compile
1673
+ *
1674
+ * Creates the RubyVM::InstructionSequence object from the node
1675
+ */
1676
+ static VALUE m_nodedump_compile(VALUE self)
1677
+ {
1678
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
1679
+ VALUE nodename = rb_iv_get(self, "@nodename");
1680
+ VALUE filename = rb_iv_get(self, "@filename");
1681
+ VALUE filepath = rb_iv_get(self, "@filepath");
1682
+ #ifndef WITH_RB_ISEQW_NEW
1683
+ /* For Pre-2.3 */
1684
+ return rb_iseq_new_top(node, nodename, filename, filepath, Qfalse);
1685
+ #else
1686
+ /* For Ruby 2.3 */
1687
+ return rb_iseqw_new(rb_iseq_new_top(node, nodename, filename, filepath, Qfalse));
1688
+ #endif
1689
+ }
1690
+
1691
+ /*
1692
+ * Parses Ruby file with the source code and saves the node
1693
+ */
1694
+ static VALUE m_nodedump_from_source(VALUE self, VALUE file)
1695
+ {
1696
+ VALUE line = INT2FIX(1), f, node, filepath, gc_was_disabled;
1697
+ const char *fname;
1698
+
1699
+ gc_was_disabled = rb_gc_disable();
1700
+ rb_secure(1);
1701
+ FilePathValue(file);
1702
+ fname = StringValueCStr(file);
1703
+ /* Remember information about the file */
1704
+ rb_iv_set(self, "@nodename", rb_str_new2("<main>"));
1705
+ rb_iv_set(self, "@filename", file);
1706
+ filepath = rb_funcall(rb_cFile, rb_intern("realpath"), 1, file); // Envelope for rb_realpath_internal
1707
+ rb_iv_set(self, "@filepath", filepath);
1708
+ /* Create node from the source */
1709
+ f = rb_file_open_str(file, "r");
1710
+ node = (VALUE) rb_compile_file(fname, f, NUM2INT(line));
1711
+ rb_iv_set(self, "@node", node);
1712
+ if ((void *) node == NULL)
1713
+ {
1714
+ rb_raise(rb_eArgError, "Error during string parsing");
1715
+ }
1716
+ if (gc_was_disabled == Qfalse)
1717
+ {
1718
+ rb_gc_enable();
1719
+ }
1720
+ return self;
1721
+ }
1722
+
1723
+ /*
1724
+ * Parses Ruby string with the source code and saves the node
1725
+ */
1726
+ static VALUE m_nodedump_from_string(VALUE self, VALUE str)
1727
+ {
1728
+ VALUE line = INT2FIX(1), node, gc_was_disabled;
1729
+ const char *fname = "STRING";
1730
+ Check_Type(str, T_STRING);
1731
+ gc_was_disabled = rb_gc_disable();
1732
+ rb_secure(1);
1733
+ /* Create empty information about the file */
1734
+ rb_iv_set(self, "@nodename", rb_str_new2("<main>"));
1735
+ if (RUBY_API_VERSION_MAJOR == 1)
1736
+ { /* For Ruby 1.9.x */
1737
+ rb_iv_set(self, "@filename", Qnil);
1738
+ rb_iv_set(self, "@filepath", Qnil);
1739
+ }
1740
+ else
1741
+ { /* For Ruby 2.x */
1742
+ rb_iv_set(self, "@filename", rb_str_new2("<compiled>"));
1743
+ rb_iv_set(self, "@filepath", rb_str_new2("<compiled>"));
1744
+ }
1745
+ /* Create node from the string */
1746
+ node = (VALUE) rb_compile_string(fname, str, NUM2INT(line));
1747
+ rb_iv_set(self, "@node", node);
1748
+ if (gc_was_disabled == Qfalse)
1749
+ {
1750
+ rb_gc_enable();
1751
+ rb_gc_start();
1752
+ }
1753
+ if ((void *) node == NULL)
1754
+ {
1755
+ rb_raise(rb_eArgError, "Error during string parsing");
1756
+ }
1757
+ return self;
1758
+ }
1759
+
1760
+ /*
1761
+ * call-seq:
1762
+ * obj.new(:srcfile, filename) # Will load source file from the disk
1763
+ * obj.new(:binfile, filename) # Will load file with node binary dump from the disk
1764
+ * obj.new(:srcmemory, srcstr) # Will load source code from the string
1765
+ * obj.new(:binmemory, binstr) # Will load node binary dump from the string
1766
+ *
1767
+ * Creates NodeMarshal class example from the source code or dumped
1768
+ * syntax tree (NODEs), i.e. preparsed and packed source code. Created
1769
+ * object can be used either for code execution or for saving it
1770
+ * in the preparsed form (useful for code obfuscation/protection)
1771
+ */
1772
+ static VALUE m_nodedump_init(VALUE self, VALUE source, VALUE info)
1773
+ {
1774
+ ID id_usr;
1775
+ rb_iv_set(self, "@show_offsets", Qfalse);
1776
+ Check_Type(source, T_SYMBOL);
1777
+ id_usr = SYM2ID(source);
1778
+ if (id_usr == rb_intern("srcfile"))
1779
+ {
1780
+ return m_nodedump_from_source(self, info);
1781
+ }
1782
+ else if (id_usr == rb_intern("srcmemory"))
1783
+ {
1784
+ return m_nodedump_from_string(self, info);
1785
+ }
1786
+ else if (id_usr == rb_intern("binmemory"))
1787
+ {
1788
+ return m_nodedump_from_memory(self, info);
1789
+ }
1790
+ else if (id_usr == rb_intern("binfile"))
1791
+ {
1792
+ VALUE cFile = rb_const_get(rb_cObject, rb_intern("File"));
1793
+ VALUE bin = rb_funcall(cFile, rb_intern("binread"), 1, info);
1794
+ return m_nodedump_from_memory(self, bin);
1795
+ }
1796
+ else
1797
+ {
1798
+ rb_raise(rb_eArgError, "Invalid source type (it must be :srcfile, :srcmemory, :binmemory of :binfile)");
1799
+ }
1800
+ return Qnil;
1801
+ }
1802
+
1803
+ /*
1804
+ * call-seq:
1805
+ * obj.dump_tree
1806
+ *
1807
+ * Transforms Ruby syntax tree (NODE) to the String using
1808
+ * +rb_parser_dump_tree+ function from +node.c+ (see Ruby source code).
1809
+ */
1810
+ static VALUE m_nodedump_parser_dump_tree(VALUE self)
1811
+ {
1812
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
1813
+ return rb_parser_dump_tree(node, 0);
1814
+ }
1815
+
1816
+ /*
1817
+ * call-seq:
1818
+ * obj.dump_tree_short
1819
+ *
1820
+ * Transforms Ruby syntax tree (NODE) to the String using custom function
1821
+ * instead of +rb_parser_dump_tree+ function.
1822
+ *
1823
+ * See also #show_offsets, #show_offsets=
1824
+ */
1825
+ static VALUE m_nodedump_dump_tree_short(VALUE self)
1826
+ {
1827
+ VALUE str = rb_str_new2(""); // Output string
1828
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
1829
+ int show_offsets = (rb_iv_get(self, "@show_offsets") == Qtrue) ? 1 : 0;
1830
+ print_node(str, node, 0, show_offsets);
1831
+ return str;
1832
+ }
1833
+
1834
+ /*
1835
+ * call-seq:
1836
+ * obj.show_offsets
1837
+ *
1838
+ * Returns show_offsets property (used by NodeMarshal#dump_tree_short)
1839
+ * It can be either true or false
1840
+ */
1841
+ static VALUE m_nodedump_show_offsets(VALUE self)
1842
+ {
1843
+ return rb_iv_get(self, "@show_offsets");
1844
+ }
1845
+
1846
+ /*
1847
+ * call-seq:
1848
+ * obj.show_offsets=
1849
+ *
1850
+ * Sets show_offsets property (used by NodeMarshal#dump_tree_short)
1851
+ * It can be either true or false
1852
+ */
1853
+ static VALUE m_nodedump_set_show_offsets(VALUE self, VALUE value)
1854
+ {
1855
+ if (value != Qtrue && value != Qfalse)
1856
+ {
1857
+ rb_raise(rb_eArgError, "show_offsets property must be either true or false");
1858
+ }
1859
+ return rb_iv_set(self, "@show_offsets", value);
1860
+ }
1861
+
1862
+
1863
+ /*
1864
+ * call-seq:
1865
+ * obj.to_hash
1866
+ *
1867
+ * Converts NodeMarshal class example to the hash that contains full
1868
+ * and independent from data structures memory addresses information.
1869
+ * Format of the obtained hash depends on used platform (especially
1870
+ * size of the pointer) and Ruby version.
1871
+ *
1872
+ * <b>Format of the hash</b>
1873
+ *
1874
+ * <i>Part 1: Signatures</i>
1875
+ *
1876
+ * - <tt>MAGIC</tt> -- NODEMARSHAL11
1877
+ * - <tt>RUBY_PLATFORM</tt> -- saved <tt>RUBY_PLATFORM</tt> constant value
1878
+ * - <tt>RUBY_VERSION</tt> -- saved <tt>RUBY_VERSION</tt> constant value
1879
+ *
1880
+ * <i>Part 2: Program loadable elements.</i>
1881
+ *
1882
+ * All loadable elements are arrays. Index of the array element means
1883
+ * its identifier that is used in the node tree.
1884
+ *
1885
+ * - <tt>literals</tt> -- program literals (strings, ranges etc.)
1886
+ * - <tt>symbols</tt> -- program symbols (values have either String or Fixnum
1887
+ * data type; numbers are used for symbols that cannot be represented as strings)
1888
+ * - <tt>global_entries</tt> -- global variables information
1889
+ * - <tt>id_tables</tt> -- array of arrays. Each array contains symbols IDs
1890
+ * - <tt>args</tt> -- information about code block argument(s)
1891
+ *
1892
+ * <i>Part 3: Nodes information</i>
1893
+ * - <tt>nodes</tt> -- string that contains binary encoded information
1894
+ * about the nodes
1895
+ * - <tt>num_of_nodes</tt> -- number of nodes in the <tt>nodes</tt> field
1896
+ * - <tt>nodename</tt> -- name of the node (usually "<main>")
1897
+ * - <tt>filename</tt> -- name (without path) of .rb file used for the node generation
1898
+ * - <tt>filepath</tt> -- name (with full path) of .rb file used for the node generation
1899
+ */
1900
+ static VALUE m_nodedump_to_hash(VALUE self)
1901
+ {
1902
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
1903
+ NODEInfo *info;
1904
+ VALUE ans, num, val_info, gc_was_disabled;
1905
+ // DISABLE GARBAGE COLLECTOR (important for dumping)
1906
+ gc_was_disabled = rb_gc_disable();
1907
+ // Convert the node to the form with relocs (i.e. the information about node)
1908
+ // if such form is not present
1909
+ val_info = rb_iv_get(self, "@nodeinfo");
1910
+ if (val_info == Qnil)
1911
+ {
1912
+ val_info = Data_Make_Struct(cNodeInfo, NODEInfo,
1913
+ NODEInfo_mark, NODEInfo_free, info); // This data envelope cannot exist without NODE
1914
+ NODEInfo_init(info);
1915
+ rb_iv_set(self, "@nodeinfo", val_info);
1916
+ num = INT2FIX(count_num_of_nodes(node, node, info));
1917
+ rb_iv_set(self, "@nodeinfo_num_of_nodes", num);
1918
+ // Convert node to NODEInfo structure
1919
+ ans = NODEInfo_toHash(info);
1920
+ rb_hash_aset(ans, ID2SYM(rb_intern("num_of_nodes")), num);
1921
+ rb_hash_aset(ans, ID2SYM(rb_intern("nodename")), rb_iv_get(self, "@nodename"));
1922
+ rb_hash_aset(ans, ID2SYM(rb_intern("filename")), rb_iv_get(self, "@filename"));
1923
+ rb_hash_aset(ans, ID2SYM(rb_intern("filepath")), rb_iv_get(self, "@filepath"));
1924
+ rb_iv_set(self, "@nodehash", ans);
1925
+ }
1926
+ else
1927
+ {
1928
+ ans = rb_iv_get(self, "@nodehash");
1929
+ }
1930
+ // ENABLE GARBAGE COLLECTOR (important for dumping)
1931
+ if (gc_was_disabled == Qfalse)
1932
+ {
1933
+ rb_gc_enable();
1934
+ }
1935
+ return ans;
1936
+ }
1937
+
1938
+
1939
+ VALUE m_node_to_ary(NODE *node)
1940
+ {
1941
+ int i, type, ut[3];
1942
+ VALUE uref[3];
1943
+ VALUE entry = rb_ary_new();
1944
+ /* Special case: NULL node */
1945
+ if (node == NULL)
1946
+ {
1947
+ return Qnil;
1948
+ }
1949
+ /* Save node name */
1950
+ type = nd_type(node);
1951
+ rb_ary_push(entry, ID2SYM(rb_intern(ruby_node_name(type))));
1952
+
1953
+ ut[0] = nodes_ctbl[type * 3];
1954
+ ut[1] = nodes_ctbl[type * 3 + 1];
1955
+ ut[2] = nodes_ctbl[type * 3 + 2];
1956
+
1957
+ uref[0] = node->u1.value;
1958
+ uref[1] = node->u2.value;
1959
+ uref[2] = node->u3.value;
1960
+
1961
+
1962
+ for (i = 0; i < 3; i++)
1963
+ {
1964
+ if (ut[i] == NT_NODE)
1965
+ {
1966
+ if (nd_type(node) != NODE_OP_ASGN2 || i != 2)
1967
+ {
1968
+ rb_ary_push(entry, m_node_to_ary(RNODE(uref[i])));
1969
+ }
1970
+ else
1971
+ {
1972
+ VALUE child = rb_ary_new();
1973
+ if (ut[i] != 0 && TYPE(ut[i]) != T_NODE)
1974
+ rb_raise(rb_eArgError, "print_node: broken node 0x%s", RSTRING_PTR(value_to_str(ut[i])));
1975
+ rb_ary_push(child, ID2SYM(rb_intern("NODE_OP_ASGN2")));
1976
+ rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u1.value));
1977
+ rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u2.value));
1978
+ rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u3.value));
1979
+ rb_ary_push(entry, child);
1980
+ }
1981
+ }
1982
+ else if (ut[i] == NT_VALUE)
1983
+ {
1984
+ rb_ary_push(entry, uref[i]);
1985
+ }
1986
+ else if (ut[i] == NT_ID)
1987
+ {
1988
+ rb_ary_push(entry, ID2SYM( (ID) uref[i]));
1989
+ }
1990
+ else if (ut[i] == NT_LONG)
1991
+ {
1992
+ rb_ary_push(entry, LONG2NUM( (intptr_t) uref[i]));
1993
+ }
1994
+ else if (ut[i] == NT_NULL)
1995
+ {
1996
+ rb_ary_push(entry, Qnil);
1997
+ }
1998
+ else if (ut[i] == NT_ARGS)
1999
+ {
2000
+ VALUE rargs = rb_hash_new();
2001
+ VALUE rargs_env = rb_ary_new();
2002
+ #ifdef USE_RB_ARGS_INFO
2003
+ ID id;
2004
+ struct rb_args_info *args = (void *) uref[i];
2005
+
2006
+ rb_hash_aset(rargs, ID2SYM(rb_intern("pre_init")), m_node_to_ary(args->pre_init));
2007
+ rb_hash_aset(rargs, ID2SYM(rb_intern("post_init")), m_node_to_ary(args->post_init));
2008
+
2009
+ id = args->first_post_arg;
2010
+ rb_hash_aset(rargs, ID2SYM(rb_intern("first_post_arg")), (id) ? ID2SYM(id) : Qnil);
2011
+ id = args->rest_arg;
2012
+ rb_hash_aset(rargs, ID2SYM(rb_intern("rest_arg")), (id) ? ID2SYM(id) : Qnil);
2013
+ id = args->block_arg;
2014
+ rb_hash_aset(rargs, ID2SYM(rb_intern("block_arg")), (id) ? ID2SYM(id) : Qnil);
2015
+
2016
+ rb_hash_aset(rargs, ID2SYM(rb_intern("kw_args")), m_node_to_ary(args->kw_args));
2017
+ rb_hash_aset(rargs, ID2SYM(rb_intern("kw_rest_arg")), m_node_to_ary(args->kw_rest_arg));
2018
+ rb_hash_aset(rargs, ID2SYM(rb_intern("opt_args")), m_node_to_ary(args->opt_args));
2019
+ #endif
2020
+ rb_ary_push(rargs_env, ID2SYM(rb_intern("ARGS")));
2021
+ rb_ary_push(rargs_env, rargs);
2022
+ rb_ary_push(entry, rargs_env);
2023
+ }
2024
+ else if (ut[i] == NT_IDTABLE)
2025
+ {
2026
+ VALUE ridtbl = rb_ary_new();
2027
+ VALUE idtbl_ary = rb_ary_new();
2028
+ int j, len;
2029
+
2030
+ ID *idtbl = (ID *) uref[i];
2031
+ len = (uref[i]) ? *idtbl++ : 0;
2032
+ for (j = 0; j < len; j++)
2033
+ {
2034
+ ID sym = *idtbl++;
2035
+ VALUE val = ID2SYM(sym);
2036
+ rb_ary_push(idtbl_ary, val);
2037
+ }
2038
+ rb_ary_push(ridtbl, ID2SYM(rb_intern("IDTABLE")));
2039
+ rb_ary_push(ridtbl, idtbl_ary);
2040
+ rb_ary_push(entry, ridtbl);
2041
+ }
2042
+ else if (ut[i] == NT_ENTRY)
2043
+ {
2044
+ struct rb_global_entry *gentry;
2045
+ gentry = (struct rb_global_entry *) uref[i];
2046
+ rb_ary_push(entry, ID2SYM(gentry->id));
2047
+ }
2048
+ else
2049
+ {
2050
+ rb_ary_push(entry, ID2SYM(rb_intern("UNKNOWN")));
2051
+ }
2052
+ }
2053
+ return entry;
2054
+ }
2055
+
2056
+ /*
2057
+ * call-seq:
2058
+ * obj.to_a
2059
+ *
2060
+ * Converts node to the array (mainly to allow exploration of AST
2061
+ * by the user). It shows information about rb_args_info and
2062
+ * ID *tbl that are not displayed by NodeMarshal#dump_tree and
2063
+ * NodeMarshal#dump_tree_short.
2064
+ */
2065
+ static VALUE m_nodedump_to_a(VALUE self)
2066
+ {
2067
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
2068
+ VALUE gc_was_disabled = rb_gc_disable();
2069
+ VALUE ary = m_node_to_ary(node);
2070
+ if (gc_was_disabled == Qfalse)
2071
+ {
2072
+ rb_gc_enable();
2073
+ }
2074
+ return ary;
2075
+ }
2076
+
2077
+
2078
+ /*
2079
+ * call-seq:
2080
+ * obj.to_bin
2081
+ *
2082
+ * Converts NodeMarshal class example to the binary string that
2083
+ * can be saved to the file and used for loading the node from the file.
2084
+ * Format of the obtained binary dump depends on used platform (especially
2085
+ * size of the pointer) and Ruby version.
2086
+ */
2087
+ static VALUE m_nodedump_to_bin(VALUE self)
2088
+ {
2089
+ VALUE hash = m_nodedump_to_hash(self);
2090
+ VALUE cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal"));
2091
+ return rb_funcall(cMarshal, rb_intern("dump"), 1, hash);
2092
+ }
2093
+
2094
+ /*
2095
+ * Gives the information about the node
2096
+ */
2097
+ static VALUE m_nodedump_inspect(VALUE self)
2098
+ {
2099
+ static char str[1024], buf[512];
2100
+ VALUE num_of_nodes, nodename, filepath, filename;
2101
+ VALUE val_obj_addresses, val_nodeinfo;
2102
+ // Get generic information about node
2103
+ num_of_nodes = rb_iv_get(self, "@num_of_nodes");
2104
+ nodename = rb_iv_get(self, "@nodename");
2105
+ filepath = rb_iv_get(self, "@filepath");
2106
+ filename = rb_iv_get(self, "@filename");
2107
+ // Generate string with generic information about node
2108
+ sprintf(str,
2109
+ "----- NodeMarshal:0x%"PRIxPTR"\n"
2110
+ " num_of_nodes: %d\n nodename: %s\n filepath: %s\n filename: %s\n",
2111
+ (uintptr_t) (self),
2112
+ (num_of_nodes == Qnil) ? -1 : FIX2INT(num_of_nodes),
2113
+ (nodename == Qnil) ? "nil" : RSTRING_PTR(nodename),
2114
+ (filepath == Qnil) ? "nil" : RSTRING_PTR(filepath),
2115
+ (filename == Qnil) ? "nil" : RSTRING_PTR(filename)
2116
+ );
2117
+ // Check if the information about node struct is available
2118
+ val_nodeinfo = rb_iv_get(self, "@nodeinfo");
2119
+ val_obj_addresses = rb_iv_get(self, "@obj_addresses");
2120
+ if (val_nodeinfo == Qnil && val_obj_addresses == Qnil)
2121
+ {
2122
+ m_nodedump_to_hash(self);
2123
+ val_nodeinfo = rb_iv_get(self, "@nodeinfo");
2124
+ }
2125
+ // Information about preparsed node
2126
+ // a) NODEInfo struct
2127
+ if (val_nodeinfo == Qnil)
2128
+ {
2129
+ sprintf(buf, " NODEInfo struct is empty\n");
2130
+ }
2131
+ else
2132
+ {
2133
+ NODEInfo *ninfo;
2134
+ Data_Get_Struct(val_nodeinfo, NODEInfo, ninfo);
2135
+ sprintf(buf,
2136
+ " NODEInfo struct:\n"
2137
+ " syms hash len (Symbols): %d\n"
2138
+ " lits hash len (Literals): %d\n"
2139
+ " idtabs hash len (ID tables): %d\n"
2140
+ " gentries hash len (Global vars): %d\n"
2141
+ " nodes hash len (Nodes): %d\n"
2142
+ " pnodes hash len (Parent nodes): %d\n"
2143
+ #ifdef USE_RB_ARGS_INFO
2144
+ " args hash len (args info): %d\n"
2145
+ #endif
2146
+ ,
2147
+ FIX2INT(rb_funcall(ninfo->syms.vals, rb_intern("length"), 0)),
2148
+ FIX2INT(rb_funcall(ninfo->lits.vals, rb_intern("length"), 0)),
2149
+ FIX2INT(rb_funcall(ninfo->idtabs.vals, rb_intern("length"), 0)),
2150
+ FIX2INT(rb_funcall(ninfo->gentries.vals, rb_intern("length"), 0)),
2151
+ FIX2INT(rb_funcall(ninfo->nodes.vals, rb_intern("length"), 0)),
2152
+ FIX2INT(rb_funcall(ninfo->pnodes.vals, rb_intern("length"), 0))
2153
+ #ifdef USE_RB_ARGS_INFO
2154
+ ,
2155
+ FIX2INT(rb_funcall(ninfo->args.vals, rb_intern("length"), 0))
2156
+ #endif
2157
+ );
2158
+ }
2159
+ strcat(str, buf);
2160
+ // b) NODEObjAddresses struct
2161
+ if (val_obj_addresses == Qnil)
2162
+ {
2163
+ sprintf(buf, " NODEObjAddresses struct is empty\n");
2164
+ }
2165
+ else
2166
+ {
2167
+ NODEObjAddresses *objadr;
2168
+ Data_Get_Struct(val_obj_addresses, NODEObjAddresses, objadr);
2169
+ sprintf(buf,
2170
+ " NODEObjAddresses struct:\n"
2171
+ " syms_len (Num of symbols): %d\n"
2172
+ " lits_len (Num of literals): %d\n"
2173
+ " idtbls_len (Num of ID tables): %d\n"
2174
+ " gvars_len (Num of global vars): %d\n"
2175
+ " nodes_len (Num of nodes): %d\n"
2176
+ #ifdef USE_RB_ARGS_INFO
2177
+ " args_len: (Num of args info): %d\n"
2178
+ #endif
2179
+ , objadr->syms_len, objadr->lits_len,
2180
+ objadr->idtbls_len, objadr->gvars_len,
2181
+ objadr->nodes_len
2182
+ #ifdef USE_RB_ARGS_INFO
2183
+ , objadr->args_len
2184
+ #endif
2185
+ );
2186
+ }
2187
+ strcat(str, buf);
2188
+ strcat(str, "------------------\n");
2189
+ // Generate output string
2190
+ return rb_str_new2(str);
2191
+ }
2192
+
2193
+ /*
2194
+ * Returns node name (usually <main>)
2195
+ */
2196
+ static VALUE m_nodedump_nodename(VALUE self)
2197
+ {
2198
+ return rb_funcall(rb_iv_get(self, "@nodename"), rb_intern("dup"), 0);
2199
+ }
2200
+
2201
+ /*
2202
+ * Returns name of file that was used for node generation and will be used
2203
+ * by YARV (or nil/<compiled> if a string of code was used)
2204
+ */
2205
+ static VALUE m_nodedump_filename(VALUE self)
2206
+ {
2207
+ return rb_funcall(rb_iv_get(self, "@filename"), rb_intern("dup"), 0);
2208
+ }
2209
+
2210
+ /*
2211
+ * Sets name of file that was used for node generation and will be used
2212
+ * by YARV (or nil/<compiled> if a string of code was used)
2213
+ */
2214
+ static VALUE m_nodedump_set_filename(VALUE self, VALUE val)
2215
+ {
2216
+ if (val != Qnil)
2217
+ {
2218
+ Check_Type(val, T_STRING);
2219
+ rb_iv_set(self, "@filename", rb_funcall(val, rb_intern("dup"), 0));
2220
+ }
2221
+ else
2222
+ {
2223
+ rb_iv_set(self, "@filename", Qnil);
2224
+ }
2225
+ return self;
2226
+ }
2227
+
2228
+ /*
2229
+ * Returns path of file that was used for node generation and will be used
2230
+ * by YARV (or nil/<compiled> if a string of code was used)
2231
+ */
2232
+ static VALUE m_nodedump_filepath(VALUE self)
2233
+ {
2234
+ return rb_funcall(rb_iv_get(self, "@filepath"), rb_intern("dup"), 0);
2235
+ }
2236
+
2237
+ /*
2238
+ * call-seq:
2239
+ * obj.filepath=value
2240
+ *
2241
+ * Sets the path of file that was used for node generation and will
2242
+ * be used by YARV (or nil/<compiled> if a string of code was used)
2243
+ */
2244
+ static VALUE m_nodedump_set_filepath(VALUE self, VALUE val)
2245
+ {
2246
+ if (val != Qnil)
2247
+ {
2248
+ Check_Type(val, T_STRING);
2249
+ rb_iv_set(self, "@filepath", rb_funcall(val, rb_intern("dup"), 0));
2250
+ }
2251
+ else
2252
+ {
2253
+ rb_iv_set(self, "@filepath", Qnil);
2254
+ }
2255
+ return self;
2256
+ }
2257
+
2258
+ /*
2259
+ * call-seq:
2260
+ * NodeMarshal.base85r_encode(input) -> output
2261
+ *
2262
+ * Encode arbitrary binary string to the ASCII string
2263
+ * using modified version of BASE85 (useful for obfuscation
2264
+ * of .rb source files)
2265
+ */
2266
+ static VALUE m_base85r_encode(VALUE obj, VALUE input)
2267
+ {
2268
+ return base85r_encode(input);
2269
+ }
2270
+
2271
+ /*
2272
+ * call-seq:
2273
+ * NodeMarshal.base85r_decode(input) -> output
2274
+ *
2275
+ * Decode ASCII string in the modified BASE85 format
2276
+ * to the binary string (useful for obfuscation of .rb
2277
+ * source files)
2278
+ */
2279
+ static VALUE m_base85r_decode(VALUE obj, VALUE input)
2280
+ {
2281
+ return base85r_decode(input);
2282
+ }
2283
+
2284
+ /* call-seq:
2285
+ * obj.to_text
2286
+ *
2287
+ * Converts NodeMarshal class example to the text string (modified Base85 encoding) that
2288
+ * can be saved to the file and used for loading the node from the file.
2289
+ * Format of the obtained binary dump depends on used platform (especially
2290
+ * size of the pointer) and Ruby version.
2291
+ */
2292
+ static VALUE m_nodedump_to_text(VALUE self)
2293
+ {
2294
+ VALUE bin = m_nodedump_to_bin(self);
2295
+ return base85r_encode(bin);
2296
+ }
2297
+
2298
+ /*
2299
+ * Returns node object
2300
+ */
2301
+ static VALUE m_nodedump_node(VALUE self)
2302
+ {
2303
+ return rb_iv_get(self, "@node");
2304
+ }
2305
+
2306
+ /*
2307
+ * This class can load and save Ruby code in the form of the
2308
+ * platform-dependent syntax tree (made of NODEs). Such function
2309
+ * allows to hide the source code from users. Main features:
2310
+ *
2311
+ * - Irreversible transformation of Ruby source code to the syntax tree
2312
+ * - Representation of syntax tree in binary form dependent from the platform and Ruby version
2313
+ * - Simple options for node inspection
2314
+ * - Ruby 1.9.3, 2.2.x and 2.3.x support
2315
+ * - Subroutines for custom code obfuscation
2316
+ */
2317
+ void Init_nodemarshal()
2318
+ {
2319
+ static VALUE cNodeMarshal;
2320
+ init_nodes_table(nodes_ctbl, NODES_CTBL_SIZE);
2321
+ base85r_init_tables();
2322
+
2323
+ cNodeMarshal = rb_define_class("NodeMarshal", rb_cObject);
2324
+ rb_define_singleton_method(cNodeMarshal, "base85r_encode", RUBY_METHOD_FUNC(m_base85r_encode), 1);
2325
+ rb_define_singleton_method(cNodeMarshal, "base85r_decode", RUBY_METHOD_FUNC(m_base85r_decode), 1);
2326
+
2327
+ rb_define_method(cNodeMarshal, "initialize", RUBY_METHOD_FUNC(m_nodedump_init), 2);
2328
+ rb_define_method(cNodeMarshal, "to_hash", RUBY_METHOD_FUNC(m_nodedump_to_hash), 0);
2329
+ rb_define_method(cNodeMarshal, "to_h", RUBY_METHOD_FUNC(m_nodedump_to_hash), 0);
2330
+ rb_define_method(cNodeMarshal, "to_bin", RUBY_METHOD_FUNC(m_nodedump_to_bin), 0);
2331
+ rb_define_method(cNodeMarshal, "to_text", RUBY_METHOD_FUNC(m_nodedump_to_text), 0);
2332
+ rb_define_method(cNodeMarshal, "to_a", RUBY_METHOD_FUNC(m_nodedump_to_a), 0);
2333
+ rb_define_method(cNodeMarshal, "to_ary", RUBY_METHOD_FUNC(m_nodedump_to_a), 0);
2334
+ rb_define_method(cNodeMarshal, "dump_tree", RUBY_METHOD_FUNC(m_nodedump_parser_dump_tree), 0);
2335
+ rb_define_method(cNodeMarshal, "dump_tree_short", RUBY_METHOD_FUNC(m_nodedump_dump_tree_short), 0);
2336
+ rb_define_method(cNodeMarshal, "compile", RUBY_METHOD_FUNC(m_nodedump_compile), 0);
2337
+ rb_define_method(cNodeMarshal, "show_offsets", RUBY_METHOD_FUNC(m_nodedump_show_offsets), 0);
2338
+ rb_define_method(cNodeMarshal, "show_offsets=", RUBY_METHOD_FUNC(m_nodedump_set_show_offsets), 1);
2339
+ // Methods for working with the information about the node
2340
+ // a) literals, symbols, generic information
2341
+ rb_define_method(cNodeMarshal, "symbols", RUBY_METHOD_FUNC(m_nodedump_symbols), 0);
2342
+ rb_define_method(cNodeMarshal, "change_symbol", RUBY_METHOD_FUNC(m_nodedump_change_symbol), 2);
2343
+ rb_define_method(cNodeMarshal, "literals", RUBY_METHOD_FUNC(m_nodedump_literals), 0);
2344
+ rb_define_method(cNodeMarshal, "change_literal", RUBY_METHOD_FUNC(m_nodedump_change_literal), 2);
2345
+ rb_define_method(cNodeMarshal, "inspect", RUBY_METHOD_FUNC(m_nodedump_inspect), 0);
2346
+ rb_define_method(cNodeMarshal, "node", RUBY_METHOD_FUNC(m_nodedump_node), 0);
2347
+ // b) node and file names
2348
+ rb_define_method(cNodeMarshal, "nodename", RUBY_METHOD_FUNC(m_nodedump_nodename), 0);
2349
+ rb_define_method(cNodeMarshal, "filename", RUBY_METHOD_FUNC(m_nodedump_filename), 0);
2350
+ rb_define_method(cNodeMarshal, "filename=", RUBY_METHOD_FUNC(m_nodedump_set_filename), 1);
2351
+ rb_define_method(cNodeMarshal, "filepath", RUBY_METHOD_FUNC(m_nodedump_filepath), 0);
2352
+ rb_define_method(cNodeMarshal, "filepath=", RUBY_METHOD_FUNC(m_nodedump_set_filepath), 1);
2353
+ // C structure wrappers
2354
+ cNodeObjAddresses = rb_define_class("NodeObjAddresses", rb_cObject);
2355
+ cNodeInfo = rb_define_class("NodeInfo", rb_cObject);
2356
+ }