node-marshal 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d564a41c56aa30cec81fe572e069cd4734637995
4
- data.tar.gz: 28c7a859af9ba4cfa82c2f8138e0a9b1708f3176
3
+ metadata.gz: 5bed8bdc507d12d26f9b1ed0eb8c969fc05853e3
4
+ data.tar.gz: ebc554d61f17751f7033c1c666375f383e9cb5c5
5
5
  SHA512:
6
- metadata.gz: e7a41dbf52db03ccb69a4ccd9641ff740f45c88ba27488e17dea2897655c0920e1880edfcb29ded1801e25287358bd81d77097453e19c97ca2ff55fa46c90fa0
7
- data.tar.gz: f31215d9e96c760d21927279162f48627d4bbace53ad66fe43d7335f8de403788e77022570a933fb6b4e5fee65cbd829f26ae9c3d5cd8c63903087ca10048fdd
6
+ metadata.gz: 2d7c90c64e183b195ebbe6d12ea6f9af7d75ce54955e93b667b1ccbc9a22cc4807a1623c4c3eb0134a044af1c21f4aa94865878f7a93879b250f0d8928357e63
7
+ data.tar.gz: f25344c304ef5786fd44fc50f14afc177b30698b4db00c2a12055707e10ae8964e7aa6a5d84d1c92877d7a0a418fd779032a5e281d3437adfd1c6493ffcb4cc5
data/README.rdoc CHANGED
@@ -17,6 +17,19 @@ The key features of node-marshal gem:
17
17
  - 2-clause BSD license suitable for creation of custom source code protection system
18
18
 
19
19
  Changelog:
20
+ - 16.MAR.2016 - 0.2.1
21
+ - Bugfix: garbage collection of symbols kept in the literals table of the node dump
22
+ is now prohibited. Such GC caused hardly reproducible bugs after code loading.
23
+ (thanks to Gregory Siehień for suitable examples)
24
+ - Bugfix: improved parsing of NODE_ARRAY (correct processing of two cases of
25
+ not documented pointers (instead of longs) in 2nd child. It affects arrays,
26
+ NODE_HASH (hashes) and NODE_DSTR (strings in double quotes with #{} inside) ).
27
+ - Bugfix: now NodeMarshal class methods don't change the state of Ruby
28
+ garbage collector
29
+ - Improved NodeMarshal#dump_tree_short output (addresses of nodes are shown)
30
+ - Added NodeMarshal#to_h method (alias for NodeMarshal#to_hash)
31
+ - NodeMarshal#to_a and NodeMarshal#to_ary methods added (they show extended information
32
+ about Ruby AST including rb_args_info and ID *tbl internals)
20
33
  - 11.JAN.2016 - 0.2.0
21
34
  - Bugfix: || and && in NODE_OP_ASGN1 (e.g. in x['a'] ||= 'b' or x['b'] &&= false)
22
35
  (this bug caused segfaults in some cases)
data/bin/noderbc CHANGED
@@ -5,7 +5,7 @@ help = <<-EOS
5
5
  Ruby source files compiler from node-marshal gem. It is based
6
6
  on NodeMarshal class. Source code is irreversibly transformed to the
7
7
  Ruby node (syntax tree) serialized into ASCII string. It can be used
8
- for for code obfuscation.
8
+ for code obfuscation and Ruby internals exploration.
9
9
 
10
10
  (C) 2015-2016 Alexey Voskov. License: BSD-2-Clause.
11
11
 
@@ -241,6 +241,11 @@ int LeafTableInfo_keyToID(LeafTableInfo *lti, VALUE key)
241
241
  return (id == Qnil) ? -1 : FIX2INT(id);
242
242
  }
243
243
 
244
+ VALUE LeafTableInfo_keyToValue(LeafTableInfo *lti, VALUE key)
245
+ {
246
+ return rb_hash_aref(lti->vals, key);
247
+ }
248
+
244
249
  /* The structure keeps information about the node
245
250
  that is required for its dumping to the file
246
251
  (mainly hashes with relocatable identifiers) */
@@ -253,6 +258,7 @@ typedef struct {
253
258
  #endif
254
259
  LeafTableInfo gentries; // Global variables table
255
260
  LeafTableInfo nodes; // Table of nodes
261
+ LeafTableInfo pnodes; // Table of parent nodes
256
262
  } NODEInfo;
257
263
 
258
264
  void NODEInfo_init(NODEInfo *info)
@@ -265,6 +271,7 @@ void NODEInfo_init(NODEInfo *info)
265
271
  #endif
266
272
  LeafTableInfo_init(&(info->gentries));
267
273
  LeafTableInfo_init(&(info->nodes));
274
+ LeafTableInfo_init(&(info->pnodes));
268
275
  }
269
276
 
270
277
  void NODEInfo_mark(NODEInfo *info)
@@ -277,6 +284,7 @@ void NODEInfo_mark(NODEInfo *info)
277
284
  #endif
278
285
  LeafTableInfo_mark(&(info->gentries));
279
286
  LeafTableInfo_mark(&(info->nodes));
287
+ LeafTableInfo_mark(&(info->pnodes));
280
288
  }
281
289
 
282
290
  void NODEInfo_free(NODEInfo *info)
@@ -340,9 +348,10 @@ static int dump_node_value(NODEInfo *info, char *ptr, NODE *node, int type, VALU
340
348
  }
341
349
  else if (TYPE(value) != T_NODE)
342
350
  {
343
- rb_raise(rb_eArgError, "dump_node_value, parent node %s: child node %d (ADR 0x%s): is not a node\n"
351
+ rb_raise(rb_eArgError, "dump_node_value, parent node %s (ADR 0x%s): child node %d (ADR 0x%s): is not a node\n"
344
352
  " Type: %s (%d), Value: %s",
345
- ruby_node_name(nd_type(node)), child_id, RSTRING_PTR(value_to_str(value)),
353
+ ruby_node_name(nd_type(node)), RSTRING_PTR(value_to_str((VALUE) node)),
354
+ child_id, RSTRING_PTR(value_to_str(value)),
346
355
  RSTRING_PTR(rb_funcall(rb_funcall(value, rb_intern("class"), 0), rb_intern("to_s"), 0)),
347
356
  TYPE(value),
348
357
  RSTRING_PTR(rb_funcall(value, rb_intern("to_s"), 0)) );
@@ -352,8 +361,9 @@ static int dump_node_value(NODEInfo *info, char *ptr, NODE *node, int type, VALU
352
361
  VALUE id = LeafTableInfo_keyToID(&info->nodes, value_to_str(value));
353
362
  if (id == (VALUE) -1)
354
363
  {
355
- rb_raise(rb_eArgError, "dump_node_value, parent node %s: child node %d (ADR 0x%s) not found",
356
- ruby_node_name(nd_type(node)), child_id, RSTRING_PTR(value_to_str(value)));
364
+ rb_raise(rb_eArgError, "dump_node_value, parent node %s (ADR 0x%s): child node %d (ADR 0x%s) not found",
365
+ ruby_node_name(nd_type(node)), RSTRING_PTR(value_to_str((VALUE) node)),
366
+ child_id, RSTRING_PTR(value_to_str(value)));
357
367
  return VL_RAW;
358
368
  }
359
369
  else
@@ -421,6 +431,13 @@ static int dump_node_value(NODEInfo *info, char *ptr, NODE *node, int type, VALU
421
431
  }
422
432
  }
423
433
 
434
+ /*
435
+ * Converts information about nodes to the binary string.
436
+ * It uses dump_node_value function for the low-level conversion
437
+ * of node "leaves" to the actual binary data.
438
+ *
439
+ * See load_nodes_from_str for the descrpition of the binary string format.
440
+ */
424
441
  static VALUE dump_nodes(NODEInfo *info)
425
442
  {
426
443
  int node_size = sizeof(int) + sizeof(VALUE) * 4;
@@ -462,6 +479,40 @@ static VALUE dump_nodes(NODEInfo *info)
462
479
  if (node->u2.value == 0) ut[1] = NT_NULL;
463
480
  if (node->u3.value == 0) ut[2] = NT_NULL;
464
481
  }
482
+
483
+ if (nt = NODE_ARRAY)
484
+ {
485
+ /* Special undocumented cases:
486
+ * 1) the second child of the second element of an array
487
+ * contains reference to the last element (NT_NODE) not
488
+ * length (NT_LONG)
489
+ * 2) NODE_HASH: every second element in NODE_ARRAY chain
490
+ * contains pointers to NODES (instead of lengths)
491
+ * 3) NODE_DSTR: first node in NODE_ARRAY chain contains
492
+ * pointer to NODE (instead of lengths) */
493
+ NODE *pnode1, *pnode2;
494
+ pnode1 = (NODE *) str_to_value(LeafTableInfo_keyToValue(&info->pnodes, value_to_str((VALUE) node)));
495
+ if (pnode1 != NULL && nd_type(pnode1) == NODE_ARRAY &&
496
+ (NODE *) pnode1->u3.value == node)
497
+ {
498
+ int nt2;
499
+ pnode2 = (NODE *) str_to_value(LeafTableInfo_keyToValue(&info->pnodes, value_to_str((VALUE) pnode1)));
500
+ nt2 = nd_type(pnode2);
501
+ if ( (nt2 != NODE_ARRAY && nt2 != NODE_DSTR) ||
502
+ (NODE *) pnode2->u1.value == pnode1 )
503
+ {
504
+ ut[1] = NT_NODE;
505
+ }
506
+ else if (pnode1->u2.value == 2 && node == (NODE *) node->u2.value)
507
+ {
508
+ ut[1] = NT_NODE;
509
+ }
510
+ }
511
+ else if (pnode1 != NULL && nd_type(pnode1) == NODE_DSTR)
512
+ {
513
+ ut[1] = NT_NODE;
514
+ }
515
+ }
465
516
 
466
517
  rtypes[0] = dump_node_value(info, ptr, node, ut[0], node->u1.value, 1);
467
518
  ptr += (rtypes[0] & 0xF0) >> 4;
@@ -584,6 +635,28 @@ static void NODEInfo_addValue(NODEInfo *info, VALUE value)
584
635
  }
585
636
  }
586
637
 
638
+ /*
639
+ * Adds the information about Ruby NODE to the NODEInfo struct.
640
+ * It keeps the addresses of the node and its parents
641
+ */
642
+ static void NODEInfo_addNode(NODEInfo *info, NODE *node, NODE *pnode)
643
+ {
644
+ VALUE node_adr = value_to_str((VALUE) node);
645
+ VALUE pnode_adr = value_to_str((VALUE) pnode);
646
+ LeafTableInfo_addEntry(&info->nodes, node_adr, node_adr);
647
+ LeafTableInfo_addEntry(&info->pnodes, node_adr, pnode_adr);
648
+ }
649
+
650
+ /*
651
+ * Returns ID of the node using its address (VALUE)
652
+ * It is used during the process of dumping Ruby AST to disk
653
+ * for replacing of memory addresses into ordinals
654
+ */
655
+ static int NODEInfo_nodeAdrToID(NODEInfo *info, VALUE adr)
656
+ {
657
+ return LeafTableInfo_keyToID(&info->nodes, adr);
658
+ }
659
+
587
660
  /*
588
661
  * Function counts number of nodes and fills NODEInfo struct
589
662
  * that is neccessary for the node saving to the HDD
@@ -638,7 +711,7 @@ static int count_num_of_nodes(NODE *node, NODE *parent, NODEInfo *info)
638
711
  }
639
712
  /* Save the ID of the node */
640
713
  num = 1;
641
- LeafTableInfo_addEntry(&info->nodes, value_to_str((VALUE) node), value_to_str((VALUE) node));
714
+ NODEInfo_addNode(info, node, parent);
642
715
  /* Analyze node childs */
643
716
  /* a) child 1 */
644
717
  if (ut[0] == NT_NODE)
@@ -836,8 +909,9 @@ static void print_node(VALUE str, NODE *node, int tab, int show_offsets)
836
909
 
837
910
  if (show_offsets)
838
911
  {
839
- rbstr_printf(str, "@ %s | %16"PRIxPTR " %16"PRIxPTR " %16"PRIxPTR " (line %d)\n",
912
+ rbstr_printf(str, "@ %s | %16"PRIxPTR " | %16"PRIxPTR " %16"PRIxPTR " %16"PRIxPTR " (line %d)\n",
840
913
  ruby_node_name(type),
914
+ (intptr_t) node,
841
915
  (intptr_t) node->u1.value, (intptr_t) node->u2.value, (intptr_t) node->u3.value,
842
916
  nd_line(node));
843
917
  }
@@ -967,7 +1041,7 @@ void resolve_syms_ords(VALUE data, NODEObjAddresses *relocs)
967
1041
  {
968
1042
  VALUE r_sym = RARRAY_PTR(tbl_val)[i];
969
1043
  if (TYPE(r_sym) == T_STRING)
970
- {
1044
+ { /* Created symbol will be immune to garbage collector */
971
1045
  relocs->syms_adr[i] = rb_intern(RSTRING_PTR(r_sym));
972
1046
  }
973
1047
  else if (TYPE(r_sym) == T_FIXNUM)
@@ -984,6 +1058,7 @@ void resolve_syms_ords(VALUE data, NODEObjAddresses *relocs)
984
1058
  void resolve_lits_ords(VALUE data, NODEObjAddresses *relocs)
985
1059
  {
986
1060
  VALUE tbl_val = rb_hash_aref(data, ID2SYM(rb_intern("literals")));
1061
+ int i;
987
1062
  if (tbl_val == Qnil)
988
1063
  {
989
1064
  rb_raise(rb_eArgError, "Cannot find literals table");
@@ -994,6 +1069,18 @@ void resolve_lits_ords(VALUE data, NODEObjAddresses *relocs)
994
1069
  }
995
1070
  relocs->lits_adr = RARRAY_PTR(tbl_val);
996
1071
  relocs->lits_len = RARRAY_LEN(tbl_val);
1072
+ /* Mark all symbols as "immortal" (i.e. not collectable
1073
+ by Ruby GC): some of them can be used in the syntax tree!
1074
+ See the presentation of Narihiro Nakamura, author of
1075
+ symbol GC in Ruby 2.x for details
1076
+ http://www.slideshare.net/authorNari/symbol-gc */
1077
+ for (i = 0; i < relocs->lits_len; i++)
1078
+ {
1079
+ if (TYPE(relocs->lits_adr[i]) == T_SYMBOL)
1080
+ {
1081
+ SYM2ID(relocs->lits_adr[i]);
1082
+ }
1083
+ }
997
1084
  }
998
1085
 
999
1086
  void resolve_gvars_ords(VALUE data, NODEObjAddresses *relocs)
@@ -1309,11 +1396,12 @@ static VALUE check_hash_magic(VALUE data)
1309
1396
  static VALUE m_nodedump_from_memory(VALUE self, VALUE dump)
1310
1397
  {
1311
1398
  VALUE cMarshal, data, val, val_relocs;
1399
+ VALUE gc_was_disabled;
1312
1400
  int num_of_nodes;
1313
1401
  NODEObjAddresses *relocs;
1314
1402
  /* DISABLE GARBAGE COLLECTOR (required for stable loading
1315
1403
  of large node trees */
1316
- rb_gc_disable();
1404
+ gc_was_disabled = rb_gc_disable();
1317
1405
  /* Wrap struct for relocations */
1318
1406
  val_relocs = Data_Make_Struct(cNodeObjAddresses, NODEObjAddresses,
1319
1407
  NULL, NODEObjAddresses_free, relocs); // This data envelope cannot exist without NODE
@@ -1368,8 +1456,11 @@ static VALUE m_nodedump_from_memory(VALUE self, VALUE dump)
1368
1456
  rb_iv_set(self, "@node", (VALUE) relocs->nodes_adr[0]);
1369
1457
  rb_iv_set(self, "@num_of_nodes", INT2FIX(num_of_nodes));
1370
1458
  rb_iv_set(self, "@obj_addresses", val_relocs);
1371
- rb_gc_enable();
1372
- rb_gc_start();
1459
+ if (gc_was_disabled == Qfalse)
1460
+ {
1461
+ rb_gc_enable();
1462
+ rb_gc_start();
1463
+ }
1373
1464
  return self;
1374
1465
  }
1375
1466
 
@@ -1542,10 +1633,10 @@ static VALUE m_nodedump_compile(VALUE self)
1542
1633
  */
1543
1634
  static VALUE m_nodedump_from_source(VALUE self, VALUE file)
1544
1635
  {
1545
- VALUE line = INT2FIX(1), f, node, filepath;
1636
+ VALUE line = INT2FIX(1), f, node, filepath, gc_was_disabled;
1546
1637
  const char *fname;
1547
1638
 
1548
- rb_gc_disable();
1639
+ gc_was_disabled = rb_gc_disable();
1549
1640
  rb_secure(1);
1550
1641
  FilePathValue(file);
1551
1642
  fname = StringValueCStr(file);
@@ -1557,12 +1648,15 @@ static VALUE m_nodedump_from_source(VALUE self, VALUE file)
1557
1648
  /* Create node from the source */
1558
1649
  f = rb_file_open_str(file, "r");
1559
1650
  node = (VALUE) rb_compile_file(fname, f, NUM2INT(line));
1560
- rb_gc_enable();
1561
- rb_iv_set(self, "@node", node);
1651
+ rb_iv_set(self, "@node", node);
1562
1652
  if ((void *) node == NULL)
1563
1653
  {
1564
1654
  rb_raise(rb_eArgError, "Error during string parsing");
1565
1655
  }
1656
+ if (gc_was_disabled == Qfalse)
1657
+ {
1658
+ rb_gc_enable();
1659
+ }
1566
1660
  return self;
1567
1661
  }
1568
1662
 
@@ -1571,9 +1665,10 @@ static VALUE m_nodedump_from_source(VALUE self, VALUE file)
1571
1665
  */
1572
1666
  static VALUE m_nodedump_from_string(VALUE self, VALUE str)
1573
1667
  {
1574
- VALUE line = INT2FIX(1), node;
1668
+ VALUE line = INT2FIX(1), node, gc_was_disabled;
1575
1669
  const char *fname = "STRING";
1576
1670
  Check_Type(str, T_STRING);
1671
+ gc_was_disabled = rb_gc_disable();
1577
1672
  rb_secure(1);
1578
1673
  /* Create empty information about the file */
1579
1674
  rb_iv_set(self, "@nodename", rb_str_new2("<main>"));
@@ -1588,11 +1683,13 @@ static VALUE m_nodedump_from_string(VALUE self, VALUE str)
1588
1683
  rb_iv_set(self, "@filepath", rb_str_new2("<compiled>"));
1589
1684
  }
1590
1685
  /* Create node from the string */
1591
- rb_gc_disable();
1592
1686
  node = (VALUE) rb_compile_string(fname, str, NUM2INT(line));
1593
1687
  rb_iv_set(self, "@node", node);
1594
- rb_gc_enable();
1595
- rb_gc_start();
1688
+ if (gc_was_disabled == Qfalse)
1689
+ {
1690
+ rb_gc_enable();
1691
+ rb_gc_start();
1692
+ }
1596
1693
  if ((void *) node == NULL)
1597
1694
  {
1598
1695
  rb_raise(rb_eArgError, "Error during string parsing");
@@ -1744,9 +1841,9 @@ static VALUE m_nodedump_to_hash(VALUE self)
1744
1841
  {
1745
1842
  NODE *node = RNODE(rb_iv_get(self, "@node"));
1746
1843
  NODEInfo *info;
1747
- VALUE ans, num, val_info;
1844
+ VALUE ans, num, val_info, gc_was_disabled;
1748
1845
  // DISABLE GARBAGE COLLECTOR (important for dumping)
1749
- rb_gc_disable();
1846
+ gc_was_disabled = rb_gc_disable();
1750
1847
  // Convert the node to the form with relocs (i.e. the information about node)
1751
1848
  // if such form is not present
1752
1849
  val_info = rb_iv_get(self, "@nodeinfo");
@@ -1771,10 +1868,153 @@ static VALUE m_nodedump_to_hash(VALUE self)
1771
1868
  ans = rb_iv_get(self, "@nodehash");
1772
1869
  }
1773
1870
  // ENABLE GARBAGE COLLECTOR (important for dumping)
1774
- rb_gc_enable();
1871
+ if (gc_was_disabled == Qfalse)
1872
+ {
1873
+ rb_gc_enable();
1874
+ }
1775
1875
  return ans;
1776
1876
  }
1777
1877
 
1878
+
1879
+ VALUE m_node_to_ary(NODE *node)
1880
+ {
1881
+ int i, type, ut[3];
1882
+ VALUE uref[3];
1883
+ VALUE entry = rb_ary_new();
1884
+ /* Special case: NULL node */
1885
+ if (node == NULL)
1886
+ {
1887
+ return Qnil;
1888
+ }
1889
+ /* Save node name */
1890
+ type = nd_type(node);
1891
+ rb_ary_push(entry, ID2SYM(rb_intern(ruby_node_name(type))));
1892
+
1893
+ ut[0] = nodes_ctbl[type * 3];
1894
+ ut[1] = nodes_ctbl[type * 3 + 1];
1895
+ ut[2] = nodes_ctbl[type * 3 + 2];
1896
+
1897
+ uref[0] = node->u1.value;
1898
+ uref[1] = node->u2.value;
1899
+ uref[2] = node->u3.value;
1900
+
1901
+
1902
+ for (i = 0; i < 3; i++)
1903
+ {
1904
+ if (ut[i] == NT_NODE)
1905
+ {
1906
+ if (nd_type(node) != NODE_OP_ASGN2 || i != 2)
1907
+ {
1908
+ rb_ary_push(entry, m_node_to_ary(RNODE(uref[i])));
1909
+ }
1910
+ else
1911
+ {
1912
+ VALUE child = rb_ary_new();
1913
+ if (ut[i] != 0 && TYPE(ut[i]) != T_NODE)
1914
+ rb_raise(rb_eArgError, "print_node: broken node 0x%s", RSTRING_PTR(value_to_str(ut[i])));
1915
+ rb_ary_push(child, ID2SYM(rb_intern("NODE_OP_ASGN2")));
1916
+ rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u1.value));
1917
+ rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u2.value));
1918
+ rb_ary_push(child, LONG2NUM((intptr_t) RNODE(uref[i])->u3.value));
1919
+ rb_ary_push(entry, child);
1920
+ }
1921
+ }
1922
+ else if (ut[i] == NT_VALUE)
1923
+ {
1924
+ rb_ary_push(entry, uref[i]);
1925
+ }
1926
+ else if (ut[i] == NT_ID)
1927
+ {
1928
+ rb_ary_push(entry, ID2SYM( (ID) uref[i]));
1929
+ }
1930
+ else if (ut[i] == NT_LONG)
1931
+ {
1932
+ rb_ary_push(entry, LONG2NUM( (intptr_t) uref[i]));
1933
+ }
1934
+ else if (ut[i] == NT_NULL)
1935
+ {
1936
+ rb_ary_push(entry, Qnil);
1937
+ }
1938
+ else if (ut[i] == NT_ARGS)
1939
+ {
1940
+ VALUE rargs = rb_hash_new();
1941
+ VALUE rargs_env = rb_ary_new();
1942
+ #ifdef USE_RB_ARGS_INFO
1943
+ ID id;
1944
+ struct rb_args_info *args = (void *) uref[i];
1945
+
1946
+ rb_hash_aset(rargs, ID2SYM(rb_intern("pre_init")), m_node_to_ary(args->pre_init));
1947
+ rb_hash_aset(rargs, ID2SYM(rb_intern("post_init")), m_node_to_ary(args->post_init));
1948
+
1949
+ id = args->first_post_arg;
1950
+ rb_hash_aset(rargs, ID2SYM(rb_intern("first_post_arg")), (id) ? ID2SYM(id) : Qnil);
1951
+ id = args->rest_arg;
1952
+ rb_hash_aset(rargs, ID2SYM(rb_intern("rest_arg")), (id) ? ID2SYM(id) : Qnil);
1953
+ id = args->block_arg;
1954
+ rb_hash_aset(rargs, ID2SYM(rb_intern("block_arg")), (id) ? ID2SYM(id) : Qnil);
1955
+
1956
+ rb_hash_aset(rargs, ID2SYM(rb_intern("kw_args")), m_node_to_ary(args->kw_args));
1957
+ rb_hash_aset(rargs, ID2SYM(rb_intern("kw_rest_arg")), m_node_to_ary(args->kw_rest_arg));
1958
+ rb_hash_aset(rargs, ID2SYM(rb_intern("opt_args")), m_node_to_ary(args->opt_args));
1959
+ #endif
1960
+ rb_ary_push(rargs_env, ID2SYM(rb_intern("ARGS")));
1961
+ rb_ary_push(rargs_env, rargs);
1962
+ rb_ary_push(entry, rargs_env);
1963
+ }
1964
+ else if (ut[i] == NT_IDTABLE)
1965
+ {
1966
+ VALUE ridtbl = rb_ary_new();
1967
+ VALUE idtbl_ary = rb_ary_new();
1968
+ int j, len;
1969
+
1970
+ ID *idtbl = (ID *) uref[i];
1971
+ len = (uref[i]) ? *idtbl++ : 0;
1972
+ for (j = 0; j < len; j++)
1973
+ {
1974
+ ID sym = *idtbl++;
1975
+ VALUE val = ID2SYM(sym);
1976
+ rb_ary_push(idtbl_ary, val);
1977
+ }
1978
+ rb_ary_push(ridtbl, ID2SYM(rb_intern("IDTABLE")));
1979
+ rb_ary_push(ridtbl, idtbl_ary);
1980
+ rb_ary_push(entry, ridtbl);
1981
+ }
1982
+ else if (ut[i] == NT_ENTRY)
1983
+ {
1984
+ struct rb_global_entry *gentry;
1985
+ gentry = (struct rb_global_entry *) uref[i];
1986
+ rb_ary_push(entry, ID2SYM(gentry->id));
1987
+ }
1988
+ else
1989
+ {
1990
+ rb_ary_push(entry, ID2SYM(rb_intern("UNKNOWN")));
1991
+ }
1992
+ }
1993
+ return entry;
1994
+ }
1995
+
1996
+ /*
1997
+ * call-seq:
1998
+ * obj.to_a
1999
+ *
2000
+ * Converts node to the array (mainly to allow exploration of AST
2001
+ * by the user). It shows information about rb_args_info and
2002
+ * ID *tbl that are not displayed by NodeMarshal#dump_tree and
2003
+ * NodeMarshal#dump_tree_short.
2004
+ */
2005
+ static VALUE m_nodedump_to_a(VALUE self)
2006
+ {
2007
+ NODE *node = RNODE(rb_iv_get(self, "@node"));
2008
+ VALUE gc_was_disabled = rb_gc_disable();
2009
+ VALUE ary = m_node_to_ary(node);
2010
+ if (gc_was_disabled == Qfalse)
2011
+ {
2012
+ rb_gc_enable();
2013
+ }
2014
+ return ary;
2015
+ }
2016
+
2017
+
1778
2018
  /*
1779
2019
  * call-seq:
1780
2020
  * obj.to_bin
@@ -1839,6 +2079,7 @@ static VALUE m_nodedump_inspect(VALUE self)
1839
2079
  " idtabs hash len (ID tables): %d\n"
1840
2080
  " gentries hash len (Global vars): %d\n"
1841
2081
  " nodes hash len (Nodes): %d\n"
2082
+ " pnodes hash len (Parent nodes): %d\n"
1842
2083
  #ifdef USE_RB_ARGS_INFO
1843
2084
  " args hash len (args info): %d\n"
1844
2085
  #endif
@@ -1847,7 +2088,8 @@ static VALUE m_nodedump_inspect(VALUE self)
1847
2088
  FIX2INT(rb_funcall(ninfo->lits.vals, rb_intern("length"), 0)),
1848
2089
  FIX2INT(rb_funcall(ninfo->idtabs.vals, rb_intern("length"), 0)),
1849
2090
  FIX2INT(rb_funcall(ninfo->gentries.vals, rb_intern("length"), 0)),
1850
- FIX2INT(rb_funcall(ninfo->nodes.vals, rb_intern("length"), 0))
2091
+ FIX2INT(rb_funcall(ninfo->nodes.vals, rb_intern("length"), 0)),
2092
+ FIX2INT(rb_funcall(ninfo->pnodes.vals, rb_intern("length"), 0))
1851
2093
  #ifdef USE_RB_ARGS_INFO
1852
2094
  ,
1853
2095
  FIX2INT(rb_funcall(ninfo->args.vals, rb_intern("length"), 0))
@@ -2024,8 +2266,11 @@ void Init_nodemarshal()
2024
2266
 
2025
2267
  rb_define_method(cNodeMarshal, "initialize", RUBY_METHOD_FUNC(m_nodedump_init), 2);
2026
2268
  rb_define_method(cNodeMarshal, "to_hash", RUBY_METHOD_FUNC(m_nodedump_to_hash), 0);
2269
+ rb_define_method(cNodeMarshal, "to_h", RUBY_METHOD_FUNC(m_nodedump_to_hash), 0);
2027
2270
  rb_define_method(cNodeMarshal, "to_bin", RUBY_METHOD_FUNC(m_nodedump_to_bin), 0);
2028
2271
  rb_define_method(cNodeMarshal, "to_text", RUBY_METHOD_FUNC(m_nodedump_to_text), 0);
2272
+ rb_define_method(cNodeMarshal, "to_a", RUBY_METHOD_FUNC(m_nodedump_to_a), 0);
2273
+ rb_define_method(cNodeMarshal, "to_ary", RUBY_METHOD_FUNC(m_nodedump_to_a), 0);
2029
2274
  rb_define_method(cNodeMarshal, "dump_tree", RUBY_METHOD_FUNC(m_nodedump_parser_dump_tree), 0);
2030
2275
  rb_define_method(cNodeMarshal, "dump_tree_short", RUBY_METHOD_FUNC(m_nodedump_dump_tree_short), 0);
2031
2276
  rb_define_method(cNodeMarshal, "compile", RUBY_METHOD_FUNC(m_nodedump_compile), 0);
@@ -18,7 +18,18 @@
18
18
  #include <ruby/version.h>
19
19
  #include "nodedump.h"
20
20
 
21
- // Information about nodes types
21
+ /* Information about nodes types
22
+
23
+ NODE_ARRAY contains an undocumented feature: if an array contains
24
+ more than 1 element the 2nd child of 2nd element will contain
25
+ a reference to the last element of the array (and have NT_NODE
26
+ not NT_LONG type)
27
+ Another case is Hash: every 2nd element of NODE_ARRAY chain
28
+ has NT_NODE type (see nodedump.c for details)
29
+
30
+ Such child is ignored by GC because there is a reference to it
31
+ from another place of Ruby AST.
32
+ */
22
33
  static int nodes_child_info[][4] =
23
34
  {
24
35
  {NODE_BLOCK, NT_NODE, NT_NULL, NT_NODE},
@@ -77,7 +88,7 @@ static int nodes_child_info[][4] =
77
88
 
78
89
  {NODE_SUPER, NT_NULL, NT_NULL, NT_NODE},
79
90
  {NODE_ZSUPER, NT_NULL, NT_NULL, NT_NULL},
80
- {NODE_ARRAY, NT_NODE, NT_LONG, NT_NODE},
91
+ {NODE_ARRAY, NT_NODE, NT_LONG, NT_NODE}, /* 2nd child has undocumented variants (see above) */
81
92
  {NODE_VALUES, NT_NODE, NT_LONG, NT_NODE},
82
93
  {NODE_ZARRAY, NT_NULL, NT_NULL, NT_NULL},
83
94
 
@@ -582,7 +593,7 @@ void init_nodes_table(int *nodes_ctbl, int num_of_entries)
582
593
  check_nodes_child_info(pos);
583
594
  }
584
595
  /* Initialize output array by NT_UNKNOWN (if node is not defined
585
- in the input table the types of its childs are unknown) */
596
+ in the input table the types of its childs are unknown) */
586
597
  for (i = 0; i < num_of_entries * 3; i++)
587
598
  {
588
599
  nodes_ctbl[i] = NT_UNKNOWN;
data/lib/node-marshal.rb CHANGED
@@ -84,8 +84,8 @@ EOS
84
84
 
85
85
  # call-seq:
86
86
  # NodeMarshal::compile_rb_file(outfile, inpfile, opts)
87
- #
88
- # Reads +.rb+ file (Ruby source) and compiles it to .rb file containing
87
+ #
88
+ # Reads +.rb+ file (Ruby source) and compiles it to .rb file containing
89
89
  # compressed AST node and its loader. This functions is an envelope for
90
90
  # NodeMarshal#to_compiled_rb
91
91
  def self.compile_rb_file(outfile, inpfile, *args)
@@ -167,7 +167,7 @@ EOS
167
167
  "q#{pos}"
168
168
  end
169
169
  end
170
- [symbols_ary, aliases_ary].transpose.to_h
170
+ [symbols_ary, aliases_ary].transpose.to_h
171
171
  end
172
172
 
173
173
  # call-seq:
data/test/test_base.rb CHANGED
@@ -36,7 +36,7 @@ def fact(n)
36
36
  end
37
37
  ni.map {|x| fact(x) }
38
38
  EOS
39
- assert_node_compiler(program)
39
+ assert_node_compiler(program)
40
40
  end
41
41
 
42
42
  # Simple ROT13 task that combines several language construction
@@ -183,6 +183,13 @@ EOS
183
183
  assert_node_compiler(program)
184
184
  end
185
185
 
186
+ # Tests correct processing of nodes with "#{expr}"--style strings
187
+ # (correct processing of NODE_ARRAY chain inside NODE_DSTR node)
188
+ def test_dstr
189
+ program = 'a = "#{1} and #{2*2} and #{3*3} and #{4*4}"'
190
+ assert_node_compiler(program)
191
+ end
192
+
186
193
  # Check the reaction on the parsing errors during the node creation
187
194
  # In the case of syntax error ArgumentError exception should be generated
188
195
  def test_syntax_error
@@ -203,7 +210,7 @@ EOS
203
210
  rescue ArgumentError
204
211
  test_passed = true
205
212
  end
206
- assert_equal(test_passed, true);
213
+ assert_equal(test_passed, true);
207
214
  end
208
215
 
209
216
  # Part of the tests: compare result of direct usage of eval
data/test/test_complex.rb CHANGED
@@ -120,6 +120,7 @@ class TestComplex < Test::Unit::TestCase
120
120
  tree_str = tree_src.to_s
121
121
  node = NodeMarshal.new(:srcmemory, tree_str)
122
122
  tree_bin = node.to_bin
123
+ puts node.inspect
123
124
 
124
125
  File.open('node.bin', 'wb') {|fp| fp << tree_bin }
125
126
  puts " Source code size: %d bytes" % tree_str.length
data/test/test_qcall.rb CHANGED
@@ -39,13 +39,14 @@ puts "'#{b&.owner_info&.address}'"
39
39
  ver = (ver[0] + ver[2] + ver[4]).to_i
40
40
  if ver >= 230
41
41
  node = NodeMarshal.new(:srcmemory, qcall_program)
42
+ node.show_offsets = true
42
43
  bindump = node.to_bin
43
44
  node = NodeMarshal.new(:binmemory, bindump)
44
45
  res_node = node.compile.eval
45
46
  res_text = eval(qcall_program)
46
47
  assert_equal(res_text, res_node)
47
48
  else
48
- assert_true(false, "Ruby 2.3 or higher is required for &. operator test")
49
+ assert_equal(false, true, "Ruby 2.3 or higher is required for &. operator test")
49
50
  end
50
51
  end
51
52
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: node-marshal
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexey Voskov
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir:
10
10
  - bin
11
11
  cert_chain: []
12
- date: 2016-01-11 00:00:00.000000000 Z
12
+ date: 2016-03-16 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: "This gem is designed for transformation of Ruby source code (eiher in
15
15
  the form of files or strings) to the \nRuby nodes (syntax trees) used by Ruby MRI
@@ -68,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
68
68
  version: '0'
69
69
  requirements: []
70
70
  rubyforge_project:
71
- rubygems_version: 2.4.5.1
71
+ rubygems_version: 2.5.1
72
72
  signing_key:
73
73
  specification_version: 4
74
74
  summary: Transforms Ruby sources to binary nodes (trees) that can be saved and loaded