nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
@@ -0,0 +1,160 @@
1
+ /* QUEUE based Transaction library, including database reduction.
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, please
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users. */
11
+
12
+ #ifndef _trsact_h_
13
+ #define _trsact_h_
14
+
15
+ #include"vec.h"
16
+ #include"base.h"
17
+
18
+ typedef struct {
19
+ unsigned char type; // mark to identify type of the structure
20
+ char *fname; // input file name
21
+ int flag; // flag
22
+ SETFAMILY T; // transaction
23
+ WEIGHT *w, *pw; // weight/positive-weight of transactions
24
+
25
+ QUEUE_INT clms_org, clm_max, clms_end; // #items in original file, max size of clms, and max of (original item, internal item)
26
+ VEC_ID rows_org, row_max; // #transactions in the original file
27
+ VEC_ID end1, sep; // #trsact in 1st file, the ID of the last permed trsact of 1st file
28
+ size_t eles_org; // #elements in the original file
29
+ WEIGHT total_w, total_pw, total_w_org, total_pw_org;
30
+ WEIGHT th; // threshold for frequency of items
31
+ PERM *perm, *trperm; // original item permutation loaded from permutation file (and inverse)
32
+
33
+ // lower/upper bound of #elements in a column/row. colunmn or row of out of range will be ignored
34
+ VEC_ID clm_lb, clm_ub;
35
+ QUEUE_ID row_lb, row_ub;
36
+ WEIGHT w_lb, w_ub;
37
+
38
+ VEC_ID str_num; // number of database (itemset stream/string datasets) in T
39
+ VEC_ID *head, *strID; // the head (beginning) of each stream, stream ID of each transaction
40
+ int occ_unit;
41
+
42
+ // for finding same transactions
43
+ QUEUE jump, *OQ; // queue of non-empty buckets, used in find_same_transactions
44
+ VEC_ID *mark; // marks for transactions
45
+ QUEUE_INT **shift; // memory for shift positions of each transaction
46
+ char *sc; // flag for non-active (in-frequent) items
47
+
48
+ // for extra transactions
49
+ VEC_ID new_t; // the start ID of un-used transactions
50
+ BASE buf; // buffer for transaction
51
+ BASE wbuf; // buffer for itemweights
52
+ char *fname2, *wfname, *wfname2, *item_wfname, *item_wfname2, *pfname; // weight file name, and item-weight file name, item-permutation file name
53
+ int flag2;
54
+ } TRSACT;
55
+
56
+ #define TRSACT_FRQSORT 65536 // sort transactions in decreasing order
57
+ #define TRSACT_ITEMWEIGHT 131072 // initialize itemweight by transaction weights
58
+ #define TRSACT_SHRINK 262144 // do not allocate memory for shrink, but do for mining
59
+ #define TRSACT_MULTI_STREAM 524288 // separate the datasets at each empty transaction
60
+ #define TRSACT_UNION 1048576 // take union of transactions, at the database reduction
61
+ #define TRSACT_INTSEC 2097152 // take intersection of transactions, at the database reduction
62
+ #define TRSACT_MAKE_NEW 4194304 // make new transaction for each
63
+ #define TRSACT_ALLOC_OCC 8388608 // make new transaction for each
64
+ #define TRSACT_DELIV_SC 16777216 // look T->sc when delivery
65
+ #define TRSACT_NEGATIVE 33554432 // flag for whether some transaction weights are negative or not
66
+ //#define TRSACT_INIT_SHRINK 65536 // allocate memory for database reduction
67
+ #define TRSACT_WRITE_PERM 67108864 // write item-order to file
68
+ #define TRSACT_1ST_SHRINK 134217728 // write item-order to file
69
+
70
+ #ifndef TRSACT_DEFAULT_WEIGHT
71
+ #define TRSACT_DEFAULT_WEIGHT 0 // default weight of the transaction, for missing weights in weight file
72
+ #endif
73
+
74
+ /* print transactions */
75
+ void TRSACT_print (TRSACT *T, QUEUE *occ, PERM *p);
76
+ void TRSACT_prop_print (TRSACT *T);
77
+
78
+ /**************************************************************/
79
+ void TRSACT_init (TRSACT *T);
80
+
81
+ /**************************************************************/
82
+ void TRSACT_end (TRSACT *T);
83
+
84
+ /*****************************************/
85
+ /* scan file "fp" with weight file wfp and count #items, #transactions in the file. */
86
+ /* count weight only if wfp!=NULL */
87
+ /* T->rows_org, clms_org, eles_org := #items, #transactions, #all items */
88
+ /* ignore the transactions of size not in range T->clm_lb - clm_ub */
89
+ /* T->total_w, total_pw := sum of (positive) weights of transactions */
90
+ /* C.clmt[i],C.cw[i] := the number/(sum of weights) of transactions including i */
91
+ /****************************************/
92
+ void TRSACT_file_count (TRSACT *T, FILE_COUNT *C, FILE2 *fp, char *wf);
93
+
94
+ /* allocate memory, set permutation, and free C.clmt,rowt,rw,cw */
95
+ int TRSACT_alloc (TRSACT *T, FILE_COUNT *C);
96
+
97
+ /* load the file to allocated memory according to permutation, and free C.rw, C.cw */
98
+ void TRSACT_file_read (TRSACT *T, FILE2 *fp, FILE_COUNT *C, VEC_ID *t, int flag, char *iwfname);
99
+
100
+ /*****************************************/
101
+ /* load transaction file to TRSACT */
102
+ void TRSACT_load (TRSACT *T);
103
+
104
+ /* occurrence deliver (only counting) */
105
+ /* WARNING: next cell of the last item of each transaction must be INTHUGE */
106
+ /* compute occurrence for items less than max item, in the database induced
107
+ by occ */
108
+ /* if jump!=0, all i with non-zero occ[i].t will be inserted to jump */
109
+ /* be careful for overflow of jump */
110
+ /* if occ==NULL, scan all transactions */
111
+ /* flag&1: count only positive weights */
112
+ void TRSACT_delivery_iter (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, VEC_ID t, QUEUE_INT m);
113
+ void TRSACT_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, QUEUE *occ, QUEUE_INT m);
114
+ // QUEUE *TRSACT_alloc_occ (TRSACT *T, QUEUE_INT end);
115
+ //QUEUE_ID TRSACT_occ_dup (SETFAMILY *S, QUEUE *OQ, QUEUE *jump, WEIGHT *occ_w, WEIGHT *occ_pw);
116
+
117
+ /**************************************************************/
118
+ /* Find identical transactions in a subset of transactions, by radix-sort like method */
119
+ /* infrequent items (refer LCM_occ) and items larger than item_max are ignored */
120
+ /* INPUT: T:transactions, occ:subset of T represented by indices, result:array for output, item_max:largest item not to be ignored */
121
+ /* OUTPUT: if transactions i1, i2,..., ik are the same, they have same value in T->mark[i]
122
+ (not all) isolated transaction may have mark 1 */
123
+ /* use 0 to end-1 of QQ temporary, and QQ[i].t and QQ[i].s have to be 0. */
124
+ /*************************************************************************/
125
+ void TRSACT_find_same (TRSACT *T, QUEUE *occ, QUEUE_INT end);
126
+
127
+ /* copy transaction t to tt (only items i s.t. pw[i]>=th) **/
128
+ void TRSACT_copy (TRSACT *T, VEC_ID tt, VEC_ID t, QUEUE_INT end);
129
+
130
+ /* intersection of transaction t and tt (only items i s.t. pw[i]>=th) **/
131
+ /* shift is the array of pointers indicates the start of each transaction **/
132
+ void TRSACT_suffix_and (TRSACT *T, VEC_ID tt, VEC_ID t);
133
+
134
+ /* take union of transaction t to tt (only items i s.t. pw[i]>=th) */
135
+ /* CAUSION: t has to be placed at the last of trsact_buf2. */
136
+ /* if the size of t inclreases, the following memory will be overwrited */
137
+ /* if memory (T->buf) is short, do nothing and return 1 */
138
+ void TRSACT_itemweight_union (TRSACT *T, VEC_ID tt, VEC_ID t);
139
+
140
+
141
+ /*****/
142
+ /* remove duplicated transactions from occ, and add the weight of the removed trsacts to the representative one */
143
+ /* duplicated trsacts are in occ[item_max]. Clear the queue when return */
144
+ /* T->flag&TRSACT_MAKE_NEW: make new trsact for representative
145
+ T->flag&TRSACT_INTSEC: take suffix intersection of the same trsacts
146
+ T->flag&TRSACT_UNION: take union of the same trsacts */
147
+ void TRSACT_merge_trsact (TRSACT *T, QUEUE *o, QUEUE_INT end);
148
+
149
+ /* remove the unified transactions from occ (consider T->occ_unit) */
150
+ void TRSACT_reduce_occ (TRSACT *T, QUEUE *occ);
151
+
152
+ #ifdef _alist_h_
153
+
154
+ /* occurrence deliver (only counting), for MALIST */
155
+ //void TRSACT_MALIST_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, MALIST *occ, ALIST_ID l, QUEUE_INT m);
156
+ //void TRSACT_MALIST_occ_deliver (TRSACT *TT, MALIST *occ, int l, int item_max);
157
+
158
+ #endif
159
+
160
+ #endif