nysol-take 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/mbiclique.rb +317 -0
- data/bin/mbipolish.rb +362 -0
- data/bin/mccomp.rb +235 -0
- data/bin/mclique.rb +295 -0
- data/bin/mclique2g.rb +105 -0
- data/bin/mcliqueInfo.rb +203 -0
- data/bin/mfriends.rb +202 -0
- data/bin/mgdiff.rb +252 -0
- data/bin/mhifriend.rb +456 -0
- data/bin/mhipolish.rb +465 -0
- data/bin/mitemset.rb +168 -0
- data/bin/mpal.rb +410 -0
- data/bin/mpolishing.rb +399 -0
- data/bin/msequence.rb +165 -0
- data/bin/mtra2g.rb +476 -0
- data/bin/mtra2gc.rb +360 -0
- data/ext/grhfilrun/extconf.rb +12 -0
- data/ext/grhfilrun/grhfilrun.c +85 -0
- data/ext/grhfilrun/src/_sspc.c +358 -0
- data/ext/grhfilrun/src/aheap.c +545 -0
- data/ext/grhfilrun/src/aheap.h +251 -0
- data/ext/grhfilrun/src/base.c +92 -0
- data/ext/grhfilrun/src/base.h +59 -0
- data/ext/grhfilrun/src/fstar.c +497 -0
- data/ext/grhfilrun/src/fstar.h +80 -0
- data/ext/grhfilrun/src/grhfil.c +214 -0
- data/ext/grhfilrun/src/itemset.c +713 -0
- data/ext/grhfilrun/src/itemset.h +170 -0
- data/ext/grhfilrun/src/problem.c +415 -0
- data/ext/grhfilrun/src/problem.h +179 -0
- data/ext/grhfilrun/src/queue.c +533 -0
- data/ext/grhfilrun/src/queue.h +182 -0
- data/ext/grhfilrun/src/sample.c +19 -0
- data/ext/grhfilrun/src/sspc.c +597 -0
- data/ext/grhfilrun/src/sspc2.c +491 -0
- data/ext/grhfilrun/src/stdlib2.c +1482 -0
- data/ext/grhfilrun/src/stdlib2.h +892 -0
- data/ext/grhfilrun/src/trsact.c +817 -0
- data/ext/grhfilrun/src/trsact.h +160 -0
- data/ext/grhfilrun/src/vec.c +745 -0
- data/ext/grhfilrun/src/vec.h +172 -0
- data/ext/lcmrun/extconf.rb +20 -0
- data/ext/lcmrun/lcmrun.cpp +99 -0
- data/ext/lcmrun/src/aheap.c +216 -0
- data/ext/lcmrun/src/aheap.h +111 -0
- data/ext/lcmrun/src/base.c +92 -0
- data/ext/lcmrun/src/base.h +59 -0
- data/ext/lcmrun/src/itemset.c +496 -0
- data/ext/lcmrun/src/itemset.h +157 -0
- data/ext/lcmrun/src/lcm.c +427 -0
- data/ext/lcmrun/src/problem.c +349 -0
- data/ext/lcmrun/src/problem.h +177 -0
- data/ext/lcmrun/src/queue.c +528 -0
- data/ext/lcmrun/src/queue.h +176 -0
- data/ext/lcmrun/src/sgraph.c +359 -0
- data/ext/lcmrun/src/sgraph.h +173 -0
- data/ext/lcmrun/src/stdlib2.c +1282 -0
- data/ext/lcmrun/src/stdlib2.h +823 -0
- data/ext/lcmrun/src/trsact.c +747 -0
- data/ext/lcmrun/src/trsact.h +159 -0
- data/ext/lcmrun/src/vec.c +731 -0
- data/ext/lcmrun/src/vec.h +171 -0
- data/ext/lcmseq0run/extconf.rb +20 -0
- data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
- data/ext/lcmseq0run/src/aheap.c +216 -0
- data/ext/lcmseq0run/src/aheap.h +111 -0
- data/ext/lcmseq0run/src/base.c +92 -0
- data/ext/lcmseq0run/src/base.h +59 -0
- data/ext/lcmseq0run/src/itemset.c +518 -0
- data/ext/lcmseq0run/src/itemset.h +157 -0
- data/ext/lcmseq0run/src/itemset_zero.c +522 -0
- data/ext/lcmseq0run/src/lcm_seq.c +446 -0
- data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
- data/ext/lcmseq0run/src/problem.c +439 -0
- data/ext/lcmseq0run/src/problem.h +179 -0
- data/ext/lcmseq0run/src/problem_zero.c +439 -0
- data/ext/lcmseq0run/src/queue.c +533 -0
- data/ext/lcmseq0run/src/queue.h +182 -0
- data/ext/lcmseq0run/src/stdlib2.c +1350 -0
- data/ext/lcmseq0run/src/stdlib2.h +864 -0
- data/ext/lcmseq0run/src/trsact.c +747 -0
- data/ext/lcmseq0run/src/trsact.h +159 -0
- data/ext/lcmseq0run/src/vec.c +779 -0
- data/ext/lcmseq0run/src/vec.h +172 -0
- data/ext/lcmseqrun/extconf.rb +20 -0
- data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
- data/ext/lcmseqrun/src/aheap.c +216 -0
- data/ext/lcmseqrun/src/aheap.h +111 -0
- data/ext/lcmseqrun/src/base.c +92 -0
- data/ext/lcmseqrun/src/base.h +59 -0
- data/ext/lcmseqrun/src/itemset.c +518 -0
- data/ext/lcmseqrun/src/itemset.h +157 -0
- data/ext/lcmseqrun/src/itemset_zero.c +522 -0
- data/ext/lcmseqrun/src/lcm_seq.c +447 -0
- data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
- data/ext/lcmseqrun/src/problem.c +439 -0
- data/ext/lcmseqrun/src/problem.h +179 -0
- data/ext/lcmseqrun/src/problem_zero.c +439 -0
- data/ext/lcmseqrun/src/queue.c +533 -0
- data/ext/lcmseqrun/src/queue.h +182 -0
- data/ext/lcmseqrun/src/stdlib2.c +1350 -0
- data/ext/lcmseqrun/src/stdlib2.h +864 -0
- data/ext/lcmseqrun/src/trsact.c +747 -0
- data/ext/lcmseqrun/src/trsact.h +159 -0
- data/ext/lcmseqrun/src/vec.c +779 -0
- data/ext/lcmseqrun/src/vec.h +172 -0
- data/ext/lcmtransrun/extconf.rb +18 -0
- data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
- data/ext/macerun/extconf.rb +20 -0
- data/ext/macerun/macerun.cpp +57 -0
- data/ext/macerun/src/aheap.c +217 -0
- data/ext/macerun/src/aheap.h +112 -0
- data/ext/macerun/src/itemset.c +491 -0
- data/ext/macerun/src/itemset.h +158 -0
- data/ext/macerun/src/mace.c +503 -0
- data/ext/macerun/src/problem.c +346 -0
- data/ext/macerun/src/problem.h +174 -0
- data/ext/macerun/src/queue.c +529 -0
- data/ext/macerun/src/queue.h +177 -0
- data/ext/macerun/src/sgraph.c +360 -0
- data/ext/macerun/src/sgraph.h +174 -0
- data/ext/macerun/src/stdlib2.c +993 -0
- data/ext/macerun/src/stdlib2.h +811 -0
- data/ext/macerun/src/vec.c +634 -0
- data/ext/macerun/src/vec.h +170 -0
- data/ext/sspcrun/extconf.rb +20 -0
- data/ext/sspcrun/src/_sspc.c +358 -0
- data/ext/sspcrun/src/aheap.c +545 -0
- data/ext/sspcrun/src/aheap.h +251 -0
- data/ext/sspcrun/src/base.c +92 -0
- data/ext/sspcrun/src/base.h +59 -0
- data/ext/sspcrun/src/fstar.c +496 -0
- data/ext/sspcrun/src/fstar.h +80 -0
- data/ext/sspcrun/src/grhfil.c +213 -0
- data/ext/sspcrun/src/itemset.c +713 -0
- data/ext/sspcrun/src/itemset.h +170 -0
- data/ext/sspcrun/src/problem.c +415 -0
- data/ext/sspcrun/src/problem.h +179 -0
- data/ext/sspcrun/src/queue.c +533 -0
- data/ext/sspcrun/src/queue.h +182 -0
- data/ext/sspcrun/src/sample.c +19 -0
- data/ext/sspcrun/src/sspc.c +598 -0
- data/ext/sspcrun/src/sspc2.c +491 -0
- data/ext/sspcrun/src/stdlib2.c +1482 -0
- data/ext/sspcrun/src/stdlib2.h +892 -0
- data/ext/sspcrun/src/trsact.c +817 -0
- data/ext/sspcrun/src/trsact.h +160 -0
- data/ext/sspcrun/src/vec.c +745 -0
- data/ext/sspcrun/src/vec.h +172 -0
- data/ext/sspcrun/sspcrun.cpp +54 -0
- data/lib/nysol/enumLcmEp.rb +338 -0
- data/lib/nysol/enumLcmEsp.rb +284 -0
- data/lib/nysol/enumLcmIs.rb +275 -0
- data/lib/nysol/enumLcmSeq.rb +143 -0
- data/lib/nysol/items.rb +201 -0
- data/lib/nysol/seqDB.rb +256 -0
- data/lib/nysol/take.rb +39 -0
- data/lib/nysol/taxonomy.rb +113 -0
- data/lib/nysol/traDB.rb +257 -0
- metadata +239 -0
@@ -0,0 +1,160 @@
|
|
1
|
+
/* QUEUE based Transaction library, including database reduction.
|
2
|
+
25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
|
3
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
4
|
+
/* This program is available for only academic use, basically.
|
5
|
+
Anyone can modify this program, but he/she has to write down
|
6
|
+
the change of the modification on the top of the source code.
|
7
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
8
|
+
If one wants to re-distribute this code, please
|
9
|
+
refer the newest code, and show the link to homepage of
|
10
|
+
Takeaki Uno, to notify the news about the codes for the users. */
|
11
|
+
|
12
|
+
#ifndef _trsact_h_
|
13
|
+
#define _trsact_h_
|
14
|
+
|
15
|
+
#include"vec.h"
|
16
|
+
#include"base.h"
|
17
|
+
|
18
|
+
typedef struct {
|
19
|
+
unsigned char type; // mark to identify type of the structure
|
20
|
+
char *fname; // input file name
|
21
|
+
int flag; // flag
|
22
|
+
SETFAMILY T; // transaction
|
23
|
+
WEIGHT *w, *pw; // weight/positive-weight of transactions
|
24
|
+
|
25
|
+
QUEUE_INT clms_org, clm_max, clms_end; // #items in original file, max size of clms, and max of (original item, internal item)
|
26
|
+
VEC_ID rows_org, row_max; // #transactions in the original file
|
27
|
+
VEC_ID end1, sep; // #trsact in 1st file, the ID of the last permed trsact of 1st file
|
28
|
+
size_t eles_org; // #elements in the original file
|
29
|
+
WEIGHT total_w, total_pw, total_w_org, total_pw_org;
|
30
|
+
WEIGHT th; // threshold for frequency of items
|
31
|
+
PERM *perm, *trperm; // original item permutation loaded from permutation file (and inverse)
|
32
|
+
|
33
|
+
// lower/upper bound of #elements in a column/row. colunmn or row of out of range will be ignored
|
34
|
+
VEC_ID clm_lb, clm_ub;
|
35
|
+
QUEUE_ID row_lb, row_ub;
|
36
|
+
WEIGHT w_lb, w_ub;
|
37
|
+
|
38
|
+
VEC_ID str_num; // number of database (itemset stream/string datasets) in T
|
39
|
+
VEC_ID *head, *strID; // the head (beginning) of each stream, stream ID of each transaction
|
40
|
+
int occ_unit;
|
41
|
+
|
42
|
+
// for finding same transactions
|
43
|
+
QUEUE jump, *OQ; // queue of non-empty buckets, used in find_same_transactions
|
44
|
+
VEC_ID *mark; // marks for transactions
|
45
|
+
QUEUE_INT **shift; // memory for shift positions of each transaction
|
46
|
+
char *sc; // flag for non-active (in-frequent) items
|
47
|
+
|
48
|
+
// for extra transactions
|
49
|
+
VEC_ID new_t; // the start ID of un-used transactions
|
50
|
+
BASE buf; // buffer for transaction
|
51
|
+
BASE wbuf; // buffer for itemweights
|
52
|
+
char *fname2, *wfname, *wfname2, *item_wfname, *item_wfname2, *pfname; // weight file name, and item-weight file name, item-permutation file name
|
53
|
+
int flag2;
|
54
|
+
} TRSACT;
|
55
|
+
|
56
|
+
#define TRSACT_FRQSORT 65536 // sort transactions in decreasing order
|
57
|
+
#define TRSACT_ITEMWEIGHT 131072 // initialize itemweight by transaction weights
|
58
|
+
#define TRSACT_SHRINK 262144 // do not allocate memory for shrink, but do for mining
|
59
|
+
#define TRSACT_MULTI_STREAM 524288 // separate the datasets at each empty transaction
|
60
|
+
#define TRSACT_UNION 1048576 // take union of transactions, at the database reduction
|
61
|
+
#define TRSACT_INTSEC 2097152 // take intersection of transactions, at the database reduction
|
62
|
+
#define TRSACT_MAKE_NEW 4194304 // make new transaction for each
|
63
|
+
#define TRSACT_ALLOC_OCC 8388608 // make new transaction for each
|
64
|
+
#define TRSACT_DELIV_SC 16777216 // look T->sc when delivery
|
65
|
+
#define TRSACT_NEGATIVE 33554432 // flag for whether some transaction weights are negative or not
|
66
|
+
//#define TRSACT_INIT_SHRINK 65536 // allocate memory for database reduction
|
67
|
+
#define TRSACT_WRITE_PERM 67108864 // write item-order to file
|
68
|
+
#define TRSACT_1ST_SHRINK 134217728 // write item-order to file
|
69
|
+
|
70
|
+
#ifndef TRSACT_DEFAULT_WEIGHT
|
71
|
+
#define TRSACT_DEFAULT_WEIGHT 0 // default weight of the transaction, for missing weights in weight file
|
72
|
+
#endif
|
73
|
+
|
74
|
+
/* print transactions */
|
75
|
+
void TRSACT_print (TRSACT *T, QUEUE *occ, PERM *p);
|
76
|
+
void TRSACT_prop_print (TRSACT *T);
|
77
|
+
|
78
|
+
/**************************************************************/
|
79
|
+
void TRSACT_init (TRSACT *T);
|
80
|
+
|
81
|
+
/**************************************************************/
|
82
|
+
void TRSACT_end (TRSACT *T);
|
83
|
+
|
84
|
+
/*****************************************/
|
85
|
+
/* scan file "fp" with weight file wfp and count #items, #transactions in the file. */
|
86
|
+
/* count weight only if wfp!=NULL */
|
87
|
+
/* T->rows_org, clms_org, eles_org := #items, #transactions, #all items */
|
88
|
+
/* ignore the transactions of size not in range T->clm_lb - clm_ub */
|
89
|
+
/* T->total_w, total_pw := sum of (positive) weights of transactions */
|
90
|
+
/* C.clmt[i],C.cw[i] := the number/(sum of weights) of transactions including i */
|
91
|
+
/****************************************/
|
92
|
+
void TRSACT_file_count (TRSACT *T, FILE_COUNT *C, FILE2 *fp, char *wf);
|
93
|
+
|
94
|
+
/* allocate memory, set permutation, and free C.clmt,rowt,rw,cw */
|
95
|
+
int TRSACT_alloc (TRSACT *T, FILE_COUNT *C);
|
96
|
+
|
97
|
+
/* load the file to allocated memory according to permutation, and free C.rw, C.cw */
|
98
|
+
void TRSACT_file_read (TRSACT *T, FILE2 *fp, FILE_COUNT *C, VEC_ID *t, int flag, char *iwfname);
|
99
|
+
|
100
|
+
/*****************************************/
|
101
|
+
/* load transaction file to TRSACT */
|
102
|
+
void TRSACT_load (TRSACT *T);
|
103
|
+
|
104
|
+
/* occurrence deliver (only counting) */
|
105
|
+
/* WARNING: next cell of the last item of each transaction must be INTHUGE */
|
106
|
+
/* compute occurrence for items less than max item, in the database induced
|
107
|
+
by occ */
|
108
|
+
/* if jump!=0, all i with non-zero occ[i].t will be inserted to jump */
|
109
|
+
/* be careful for overflow of jump */
|
110
|
+
/* if occ==NULL, scan all transactions */
|
111
|
+
/* flag&1: count only positive weights */
|
112
|
+
void TRSACT_delivery_iter (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, VEC_ID t, QUEUE_INT m);
|
113
|
+
void TRSACT_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, QUEUE *occ, QUEUE_INT m);
|
114
|
+
// QUEUE *TRSACT_alloc_occ (TRSACT *T, QUEUE_INT end);
|
115
|
+
//QUEUE_ID TRSACT_occ_dup (SETFAMILY *S, QUEUE *OQ, QUEUE *jump, WEIGHT *occ_w, WEIGHT *occ_pw);
|
116
|
+
|
117
|
+
/**************************************************************/
|
118
|
+
/* Find identical transactions in a subset of transactions, by radix-sort like method */
|
119
|
+
/* infrequent items (refer LCM_occ) and items larger than item_max are ignored */
|
120
|
+
/* INPUT: T:transactions, occ:subset of T represented by indices, result:array for output, item_max:largest item not to be ignored */
|
121
|
+
/* OUTPUT: if transactions i1, i2,..., ik are the same, they have same value in T->mark[i]
|
122
|
+
(not all) isolated transaction may have mark 1 */
|
123
|
+
/* use 0 to end-1 of QQ temporary, and QQ[i].t and QQ[i].s have to be 0. */
|
124
|
+
/*************************************************************************/
|
125
|
+
void TRSACT_find_same (TRSACT *T, QUEUE *occ, QUEUE_INT end);
|
126
|
+
|
127
|
+
/* copy transaction t to tt (only items i s.t. pw[i]>=th) **/
|
128
|
+
void TRSACT_copy (TRSACT *T, VEC_ID tt, VEC_ID t, QUEUE_INT end);
|
129
|
+
|
130
|
+
/* intersection of transaction t and tt (only items i s.t. pw[i]>=th) **/
|
131
|
+
/* shift is the array of pointers indicates the start of each transaction **/
|
132
|
+
void TRSACT_suffix_and (TRSACT *T, VEC_ID tt, VEC_ID t);
|
133
|
+
|
134
|
+
/* take union of transaction t to tt (only items i s.t. pw[i]>=th) */
|
135
|
+
/* CAUSION: t has to be placed at the last of trsact_buf2. */
|
136
|
+
/* if the size of t inclreases, the following memory will be overwrited */
|
137
|
+
/* if memory (T->buf) is short, do nothing and return 1 */
|
138
|
+
void TRSACT_itemweight_union (TRSACT *T, VEC_ID tt, VEC_ID t);
|
139
|
+
|
140
|
+
|
141
|
+
/*****/
|
142
|
+
/* remove duplicated transactions from occ, and add the weight of the removed trsacts to the representative one */
|
143
|
+
/* duplicated trsacts are in occ[item_max]. Clear the queue when return */
|
144
|
+
/* T->flag&TRSACT_MAKE_NEW: make new trsact for representative
|
145
|
+
T->flag&TRSACT_INTSEC: take suffix intersection of the same trsacts
|
146
|
+
T->flag&TRSACT_UNION: take union of the same trsacts */
|
147
|
+
void TRSACT_merge_trsact (TRSACT *T, QUEUE *o, QUEUE_INT end);
|
148
|
+
|
149
|
+
/* remove the unified transactions from occ (consider T->occ_unit) */
|
150
|
+
void TRSACT_reduce_occ (TRSACT *T, QUEUE *occ);
|
151
|
+
|
152
|
+
#ifdef _alist_h_
|
153
|
+
|
154
|
+
/* occurrence deliver (only counting), for MALIST */
|
155
|
+
//void TRSACT_MALIST_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, MALIST *occ, ALIST_ID l, QUEUE_INT m);
|
156
|
+
//void TRSACT_MALIST_occ_deliver (TRSACT *TT, MALIST *occ, int l, int item_max);
|
157
|
+
|
158
|
+
#endif
|
159
|
+
|
160
|
+
#endif
|