scbi_fqbin 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/{README.rdoc → README.md} +0 -0
- data/Rakefile +8 -28
- data/lib/scbi_fqbin.rb +3 -5
- data/lib/scbi_fqbin/fastabin.rb +411 -0
- data/lib/scbi_fqbin/fastq_file_c.rb +373 -0
- data/lib/scbi_fqbin/fbin_file.rb +1 -1
- data/lib/scbi_fqbin/t.rb +9 -0
- data/lib/scbi_fqbin/t2.rb +12 -0
- data/lib/scbi_fqbin/version.rb +3 -0
- data/lib_fqbin_src.zip +0 -0
- data/lib_fqbin_src/Makefile +66 -0
- data/lib_fqbin_src/fq +0 -0
- data/lib_fqbin_src/fq.c +165 -0
- data/lib_fqbin_src/hash_fqbin +0 -0
- data/lib_fqbin_src/hash_fqbin.c +212 -0
- data/lib_fqbin_src/idx_fqbin +21 -0
- data/lib_fqbin_src/iterate_fqbin +0 -0
- data/lib_fqbin_src/iterate_fqbin.c +136 -0
- data/lib_fqbin_src/lib_fqbin.c +1748 -0
- data/lib_fqbin_src/lib_fqbin.h +194 -0
- data/lib_fqbin_src/mk_fqbin +0 -0
- data/lib_fqbin_src/mk_fqbin.c +138 -0
- data/lib_fqbin_src/other/bwxform.c +915 -0
- data/lib_fqbin_src/other/bwxform.h +74 -0
- data/lib_fqbin_src/other/find_in_index.c +130 -0
- data/lib_fqbin_src/other/hash_fbin_nogzchunks.c +164 -0
- data/lib_fqbin_src/other/idx_fqbin +0 -0
- data/lib_fqbin_src/other/idx_fqbin.c +67 -0
- data/lib_fqbin_src/other/make_hsh.sh +14 -0
- data/lib_fqbin_src/other/rd_extras_fbin.c +45 -0
- data/lib_fqbin_src/read_fq +0 -0
- data/lib_fqbin_src/read_fq.c +143 -0
- data/lib_fqbin_src/read_fqbin +0 -0
- data/lib_fqbin_src/read_fqbin.c +101 -0
- data/lib_fqbin_src/sort_index +9 -0
- data/lib_fqbin_src/test.rb +13 -0
- data/scbi_fqbin.gemspec +25 -0
- data/test/build.rake +15 -0
- data/test/fbinfile +0 -0
- data/test/fbinfile.index +0 -0
- data/test/no_test_fill_file.rb +66 -0
- data/test/old/app.rb +43 -0
- data/test/old/bin/iterate_fastabin.rb +54 -0
- data/test/old/bin/mk_fastabin.rb +22 -0
- data/test/old/bin/rd_fastabin.rb +36 -0
- data/test/old/bin/rd_fq.rb +20 -0
- data/test/old/bioruby.rb +27 -0
- data/test/old/c/Makefile +34 -0
- data/test/old/c/fbin_lib.zip +0 -0
- data/test/old/c/iterate_fbin.c +54 -0
- data/test/old/c/libreria_gz.c +707 -0
- data/test/old/c/libreria_gz.h +127 -0
- data/test/old/c/main.c +86 -0
- data/test/old/c/mk_fbin.c +24 -0
- data/test/old/c/rd_seq_fbin.c +44 -0
- data/test/old/c/test_ffi/a.out +0 -0
- data/test/old/c/test_ffi/app.c +26 -0
- data/test/old/c/test_ffi/app.rb +19 -0
- data/test/old/c/test_ffi/liblibreria_gz.dylib +0 -0
- data/test/old/c/test_ffi/libmylibrary.dylib +0 -0
- data/test/old/c/test_ffi/my_library.rb +23 -0
- data/test/old/c/test_ffi/mylibrary.c +22 -0
- data/test/old/c/test_ffi/mylibrary.h +6 -0
- data/test/old/c/usage_instructions.txt +62 -0
- data/test/old/ext/Makefile +187 -0
- data/test/old/ext/Makefile.dario +34 -0
- data/test/old/ext/extconf.rb +8 -0
- data/test/old/ext/mk_fbin.c +24 -0
- data/test/old/ext/sample/extras.txt +4 -0
- data/{.gemtest → test/old/ext/sample/extras2.txt} +0 -0
- data/test/old/ext/sample/f1.fasta +10 -0
- data/test/old/ext/sample/f1.fasta.qual +10 -0
- data/test/old/ext/sample/f1.fbin +0 -0
- data/test/old/ext/sample/f1.fbin.index +0 -0
- data/test/old/ext/sample/main.c +86 -0
- data/test/old/ext/usage_instructions.txt +62 -0
- data/test/old/t_scbi_fastabin.rb +140 -0
- data/test/read_tests/10-original_sizes.sh +16 -0
- data/test/read_tests/20-fq_time.sh +23 -0
- data/test/read_tests/30-fbin_read_time.sh +23 -0
- data/test/read_tests/40-bsc_read_time.sh +21 -0
- data/test/read_tests/50-fq_time_x4.sh +25 -0
- data/test/read_tests/60-fbin_read_time_x4.sh +24 -0
- data/test/read_tests/70-bsc_read_time_x4.sh +32 -0
- data/test/results_bio_scbi_fasta.txt +11 -0
- data/test/{test_scbi_fbin_file.rb → scbi_fbin_file_test.rb} +0 -0
- data/test/speed.txt +81 -0
- data/test/t_scbi_fasta.rb +12 -0
- data/test/write_tests/10-original_sizes.sh +16 -0
- data/test/write_tests/20-zip_time.sh +17 -0
- data/test/write_tests/30-mk_fbin_time.sh +23 -0
- data/test/write_tests/31-mk_fbin_time_f30.sh +21 -0
- data/test/write_tests/40-gzip_time.sh +16 -0
- data/test/write_tests/41-bsc_time.sh +16 -0
- data/test/write_tests/50-zip_sizes.sh +16 -0
- data/test/write_tests/60-fbin_sizes.sh +17 -0
- data/test/write_tests/61-fbin_sizes_f30.sh +16 -0
- data/test/write_tests/70-gzip_sizes.sh +17 -0
- data/test/write_tests/80-bsc_sizes.sh +17 -0
- data/website/index.html +87 -0
- data/website/index.txt +81 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +159 -0
- data/website/template.html.erb +50 -0
- metadata +208 -95
- data/History.txt +0 -19
- data/Manifest.txt +0 -12
- data/PostInstall.txt +0 -7
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
@@ -0,0 +1,74 @@
|
|
1
|
+
/***************************************************************************
|
2
|
+
* Header for Burrows-Wheeler Transform Library
|
3
|
+
*
|
4
|
+
* File : bwxform.h
|
5
|
+
* Purpose : Provides that apply and reverse the Burrows-Wheeler transform
|
6
|
+
* (with or without move to front coding/decoding.
|
7
|
+
* transformation).
|
8
|
+
* Author : Michael Dipperstein
|
9
|
+
* Date : August 20, 2004
|
10
|
+
*
|
11
|
+
****************************************************************************
|
12
|
+
* UPDATES
|
13
|
+
*
|
14
|
+
* $Id: bwxform.h,v 1.3 2007/09/17 13:21:19 michael Exp $
|
15
|
+
* $Log: bwxform.h,v $
|
16
|
+
* Revision 1.3 2007/09/17 13:21:19 michael
|
17
|
+
* Changes required for LGPL v3.
|
18
|
+
*
|
19
|
+
* Revision 1.2 2005/05/02 13:35:49 michael
|
20
|
+
* Update e-mail address.
|
21
|
+
*
|
22
|
+
* Revision 1.1.1.1 2004/08/23 04:34:18 michael
|
23
|
+
* Burrows-Wheeler Transform
|
24
|
+
*
|
25
|
+
****************************************************************************
|
26
|
+
*
|
27
|
+
* bwxform: An ANSI C Burrows-Wheeler Transform/Reverse Transform Routines
|
28
|
+
* Copyright (C) 2004-2005, 2007 by
|
29
|
+
* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
|
30
|
+
*
|
31
|
+
* This file is part of the BWT library.
|
32
|
+
*
|
33
|
+
* The BWT library is free software; you can redistribute it and/or modify
|
34
|
+
* it under the terms of the GNU Lesser General Public License as published
|
35
|
+
* by the Free Software Foundation; either version 3 of the License, or (at
|
36
|
+
* your option) any later version.
|
37
|
+
*
|
38
|
+
* The BWT library is distributed in the hope that it will be useful, but
|
39
|
+
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
40
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
|
41
|
+
* General Public License for more details.
|
42
|
+
*
|
43
|
+
* You should have received a copy of the GNU Lesser General Public License
|
44
|
+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
45
|
+
*
|
46
|
+
***************************************************************************/
|
47
|
+
|
48
|
+
#ifndef _BWXFORM_H_
|
49
|
+
#define _BWXFORM_H_
|
50
|
+
|
51
|
+
/***************************************************************************
|
52
|
+
* CONSTANTS
|
53
|
+
***************************************************************************/
|
54
|
+
#ifndef FALSE
|
55
|
+
#define FALSE 0
|
56
|
+
#endif
|
57
|
+
|
58
|
+
#ifndef TRUE
|
59
|
+
#define TRUE 1
|
60
|
+
#endif
|
61
|
+
|
62
|
+
/***************************************************************************
|
63
|
+
* PROTOTYPES
|
64
|
+
***************************************************************************/
|
65
|
+
/* Transform inFile */
|
66
|
+
int BWXformFile(char *inFile, char *outFile, char mtf);
|
67
|
+
|
68
|
+
/* Reverse Transform inFile*/
|
69
|
+
int BWReverseXformFile(char *inFile, char *outFile, char mtf);
|
70
|
+
|
71
|
+
int BWXform(char *inString, char *outString, int mtf);
|
72
|
+
int BWReverseXform(char *inString, char *outString, int mtf, long size);
|
73
|
+
|
74
|
+
#endif /* ndef _BWXFORM_H_ */
|
@@ -0,0 +1,130 @@
|
|
1
|
+
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
#include <time.h>
|
5
|
+
|
6
|
+
|
7
|
+
#include <sys/types.h>
|
8
|
+
#include <sys/stat.h>
|
9
|
+
#include <fcntl.h>
|
10
|
+
#include <errno.h>
|
11
|
+
|
12
|
+
#include <zlib.h>
|
13
|
+
#include <zlib.h>
|
14
|
+
#include <stdlib.h>
|
15
|
+
|
16
|
+
// Maximum file name (including .idx)
|
17
|
+
#define MAXFNAME 512
|
18
|
+
|
19
|
+
// Maximum lenght of the name of a sequence
|
20
|
+
#define MAXSEQNAME 1024
|
21
|
+
#define MAXSEQLENGTH 150000000
|
22
|
+
#define DEBUG 1
|
23
|
+
#define FALSE 0
|
24
|
+
#define TRUE 1
|
25
|
+
|
26
|
+
#define INVALID_FASTQ_FORMAT -5
|
27
|
+
#define INVALID_FASTA_FORMAT -6
|
28
|
+
|
29
|
+
#define SEQ_METADATA 10000
|
30
|
+
|
31
|
+
|
32
|
+
// int mystrcmp(const char *a,const char *b)
|
33
|
+
// {
|
34
|
+
// return strlen(a)-strlen(b)?strlen(a)-strlen(b):strcmp(a,b);
|
35
|
+
// }
|
36
|
+
|
37
|
+
|
38
|
+
int first_line(gzFile file, line){
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
long long find_seq_in_hash(char *filename,char *sname)
|
47
|
+
{
|
48
|
+
|
49
|
+
char file_name[MAXFNAME];
|
50
|
+
// char indexname[MAXFNAME];
|
51
|
+
int error;
|
52
|
+
char sname1[MAXSEQNAME];// sequence name
|
53
|
+
char sname2[MAXSEQNAME];// sequence name
|
54
|
+
long long gz_chunk=0;
|
55
|
+
char tmp[SEQ_METADATA];
|
56
|
+
long long res=-1;
|
57
|
+
|
58
|
+
// to save min, max sequences and current chunk
|
59
|
+
char min_name[MAXSEQNAME];
|
60
|
+
char max_name[MAXSEQNAME];
|
61
|
+
long long current_chunk=0;
|
62
|
+
|
63
|
+
|
64
|
+
strcpy(min_name,"");
|
65
|
+
strcpy(max_name,"");
|
66
|
+
|
67
|
+
// calc index and hash name
|
68
|
+
// snprintf(indexname,MAXFNAME,"%s.index",filename);
|
69
|
+
snprintf(file_name,MAXFNAME,"%s.index",filename);
|
70
|
+
|
71
|
+
// open index and hash file
|
72
|
+
gzFile gzhash_file=gzopen(file_name,"r");
|
73
|
+
|
74
|
+
if (gzhash_file==NULL) {
|
75
|
+
fprintf(stderr,"error opening gzhash_file :%s\n",gzerror(gzhash_file,&error));
|
76
|
+
return -2;
|
77
|
+
}
|
78
|
+
|
79
|
+
// repeat until EOF
|
80
|
+
while ( gzgets(gzhash_file,tmp,sizeof(tmp))!=Z_NULL ) {
|
81
|
+
|
82
|
+
// printf("%s\n",tmp);
|
83
|
+
// parse string
|
84
|
+
int reads=sscanf(tmp,"%s %s %lld",sname1,sname2,&gz_chunk);
|
85
|
+
|
86
|
+
if(reads==3) // valid index line
|
87
|
+
{
|
88
|
+
//
|
89
|
+
if((mystrcmp(sname,sname1)>=0) && (mystrcmp(sname,sname2)<=0))
|
90
|
+
{
|
91
|
+
printf("%s in [%s,%s]\n",sname,sname1,sname2);
|
92
|
+
res = gz_chunk;
|
93
|
+
break;
|
94
|
+
}else{
|
95
|
+
printf("%s NOT IN [%s,%s]\n",sname,sname1,sname2);
|
96
|
+
}
|
97
|
+
|
98
|
+
}
|
99
|
+
|
100
|
+
}
|
101
|
+
|
102
|
+
// close files
|
103
|
+
gzclose(gzhash_file);
|
104
|
+
|
105
|
+
return res;
|
106
|
+
}
|
107
|
+
|
108
|
+
|
109
|
+
/*******************************************************/
|
110
|
+
/* main */
|
111
|
+
/*******************************************************/
|
112
|
+
int main(int argc, char *argv[])
|
113
|
+
{
|
114
|
+
// check params
|
115
|
+
if (argc!=3)
|
116
|
+
{
|
117
|
+
printf("Usage %s fbin_index_file seq_name\n\n",argv[0]);
|
118
|
+
return -1;
|
119
|
+
}
|
120
|
+
|
121
|
+
int c1=mystrcmp("SRR314795.1","SRR314795.1000000");
|
122
|
+
int c2=mystrcmp("SRR314795.1000000","SRR314795.9");
|
123
|
+
printf("RES: %d,%d\n",c1,c2);
|
124
|
+
|
125
|
+
long long chunk=find_seq_in_hash(argv[1],argv[2]);
|
126
|
+
|
127
|
+
printf("Chunk: %lld\n",chunk);
|
128
|
+
|
129
|
+
}
|
130
|
+
|
@@ -0,0 +1,164 @@
|
|
1
|
+
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
#include <time.h>
|
5
|
+
|
6
|
+
|
7
|
+
#include <sys/types.h>
|
8
|
+
#include <sys/stat.h>
|
9
|
+
#include <fcntl.h>
|
10
|
+
#include <errno.h>
|
11
|
+
|
12
|
+
#include <zlib.h>
|
13
|
+
#include <zlib.h>
|
14
|
+
#include <stdlib.h>
|
15
|
+
|
16
|
+
// Maximum file name (including .idx)
|
17
|
+
#define MAXFNAME 512
|
18
|
+
|
19
|
+
// Maximum lenght of the name of a sequence
|
20
|
+
#define MAXSEQNAME 1024
|
21
|
+
#define MAXSEQLENGTH 150000000
|
22
|
+
#define DEBUG 1
|
23
|
+
#define FALSE 0
|
24
|
+
#define TRUE 1
|
25
|
+
|
26
|
+
#define INVALID_FASTQ_FORMAT -5
|
27
|
+
#define INVALID_FASTA_FORMAT -6
|
28
|
+
|
29
|
+
#define SEQ_METADATA 10000
|
30
|
+
|
31
|
+
// creates a hash from an index file with the desired chunk size. Chunk size can be
|
32
|
+
// adjusted to fit a good compromise between access speed and used space.
|
33
|
+
int hash_index_file(char *filename, int chunk_size, int skip_sort)
|
34
|
+
{
|
35
|
+
|
36
|
+
char hash_file_name[MAXFNAME];
|
37
|
+
char indexname[MAXFNAME];
|
38
|
+
|
39
|
+
char sname[MAXSEQNAME];// sequence name
|
40
|
+
long long beginH, gz_chunk=0;
|
41
|
+
char tmp[SEQ_METADATA];
|
42
|
+
int res=0;
|
43
|
+
int error;
|
44
|
+
|
45
|
+
// to save min, max sequences and current chunk
|
46
|
+
char min_name[MAXSEQNAME];
|
47
|
+
char max_name[MAXSEQNAME];
|
48
|
+
long long current_chunk=0;
|
49
|
+
long long count=0;
|
50
|
+
|
51
|
+
|
52
|
+
strcpy(min_name,"");
|
53
|
+
strcpy(max_name,"");
|
54
|
+
|
55
|
+
// calc index and hash name
|
56
|
+
snprintf(indexname,MAXFNAME,"%s.index",filename);
|
57
|
+
snprintf(hash_file_name,MAXFNAME,"%s.index.hash",filename);
|
58
|
+
|
59
|
+
// sort index file by external command
|
60
|
+
if(skip_sort==0)
|
61
|
+
{
|
62
|
+
char cmd[10000];
|
63
|
+
snprintf(cmd,10000,"sort_index.sh %s",indexname);
|
64
|
+
system(cmd);
|
65
|
+
}
|
66
|
+
|
67
|
+
// use sorted index
|
68
|
+
// snprintf(indexname,MAXFNAME,"%s.index.sort",filename);
|
69
|
+
|
70
|
+
// open index and hash file
|
71
|
+
gzFile gzhash_file=gzopen(hash_file_name,"wb");
|
72
|
+
gzFile gzfile_index=gzopen(indexname,"r");
|
73
|
+
|
74
|
+
if (gzfile_index==NULL) {
|
75
|
+
fprintf(stderr,"error opening gzfile_index :%s\n",gzerror(gzfile_index,&error));
|
76
|
+
return -2;
|
77
|
+
}
|
78
|
+
|
79
|
+
if (gzhash_file==NULL) {
|
80
|
+
fprintf(stderr,"error opening gzhash_file :%s\n",gzerror(gzhash_file,&error));
|
81
|
+
return -2;
|
82
|
+
}
|
83
|
+
|
84
|
+
// repeat until EOF
|
85
|
+
while ( gzgets(gzfile_index,tmp,sizeof(tmp))!=Z_NULL ) {
|
86
|
+
|
87
|
+
// parse string
|
88
|
+
sscanf(tmp,"%s %lld %lld",sname,&gz_chunk,&beginH);
|
89
|
+
|
90
|
+
if(strcmp(sname,"UMACOMPRESSEDFORMAT")!=0) // valid index line
|
91
|
+
{
|
92
|
+
|
93
|
+
// clear chunk_data if any
|
94
|
+
// if (gz_chunk!=current_chunk){
|
95
|
+
if ((count%chunk_size)==0){
|
96
|
+
if (strcmp(min_name,"")!=0){
|
97
|
+
// there are data to write
|
98
|
+
res=gzprintf(gzhash_file,"%s %s %lld\n",min_name,max_name,current_chunk);
|
99
|
+
}
|
100
|
+
strcpy(min_name,"");
|
101
|
+
strcpy(max_name,"");
|
102
|
+
// current_chunk=gz_chunk;
|
103
|
+
current_chunk = gztell(gzfile_index);
|
104
|
+
}
|
105
|
+
|
106
|
+
// save min_name
|
107
|
+
if((strcmp(min_name,"")==0) || (strcmp(sname,min_name)<0))
|
108
|
+
{
|
109
|
+
// replace min_name
|
110
|
+
strcpy(min_name,sname);
|
111
|
+
}
|
112
|
+
|
113
|
+
//save max_name
|
114
|
+
if((strcmp(max_name,"")==0) || (strcmp(sname,max_name)>0))
|
115
|
+
{
|
116
|
+
strcpy(max_name,sname);
|
117
|
+
}
|
118
|
+
|
119
|
+
count++;
|
120
|
+
}
|
121
|
+
|
122
|
+
}
|
123
|
+
|
124
|
+
if (strcmp(min_name,"")!=0){
|
125
|
+
// there are data to write
|
126
|
+
res=gzprintf(gzhash_file,"%s %s %lld\n",min_name,max_name,current_chunk);
|
127
|
+
}
|
128
|
+
|
129
|
+
// close files
|
130
|
+
gzclose(gzhash_file);
|
131
|
+
gzclose(gzfile_index);
|
132
|
+
|
133
|
+
return 0;
|
134
|
+
}
|
135
|
+
|
136
|
+
|
137
|
+
/*******************************************************/
|
138
|
+
/* main */
|
139
|
+
/*******************************************************/
|
140
|
+
int main(int argc, char *argv[])
|
141
|
+
{
|
142
|
+
// check params
|
143
|
+
if (argc<2)
|
144
|
+
{
|
145
|
+
printf("Usage %s fbin_file [chunk_size [--skip_sort]]\n\n",argv[0]);
|
146
|
+
return -1;
|
147
|
+
}
|
148
|
+
|
149
|
+
int chunk_size=10000;
|
150
|
+
int skip_sort=0;
|
151
|
+
|
152
|
+
if(argc==3){
|
153
|
+
chunk_size=atoi(argv[2]);
|
154
|
+
}
|
155
|
+
|
156
|
+
if (argc==4){
|
157
|
+
skip_sort=1;
|
158
|
+
}
|
159
|
+
|
160
|
+
int res=hash_index_file(argv[1],chunk_size, skip_sort);
|
161
|
+
|
162
|
+
return res;
|
163
|
+
}
|
164
|
+
|
Binary file
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#include "lib_fqbin.h"
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <ctype.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include <stdlib.h>
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
#include <unistd.h>
|
10
|
+
|
11
|
+
|
12
|
+
void usage(){
|
13
|
+
printf("Usage: idx_fqbin fqbin_file\n\n");
|
14
|
+
// printf(" -f Output sequence in fasta format\n\n");
|
15
|
+
|
16
|
+
exit(-1);
|
17
|
+
|
18
|
+
}
|
19
|
+
|
20
|
+
/*******************************************************/
|
21
|
+
/* main */
|
22
|
+
/*******************************************************/
|
23
|
+
int main(int argc, char *argv[])
|
24
|
+
{
|
25
|
+
|
26
|
+
char *fasta=NULL;
|
27
|
+
char *qual=NULL;
|
28
|
+
char *extras=NULL;
|
29
|
+
int size=5000;
|
30
|
+
int res=0;
|
31
|
+
|
32
|
+
int ch;
|
33
|
+
|
34
|
+
int output_fasta = 0;
|
35
|
+
int output_qual = 0;
|
36
|
+
|
37
|
+
while ((ch = getopt(argc, argv, "h")) != -1) {
|
38
|
+
switch (ch) {
|
39
|
+
case 'h':
|
40
|
+
usage();
|
41
|
+
break;
|
42
|
+
case '?':
|
43
|
+
default:
|
44
|
+
usage();
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
argc -= optind;
|
49
|
+
argv += optind;
|
50
|
+
// printf("argc: %d", argc);
|
51
|
+
// printf("argv: %s", argv[0]);
|
52
|
+
|
53
|
+
if (argc!=1)
|
54
|
+
{
|
55
|
+
usage();
|
56
|
+
}
|
57
|
+
|
58
|
+
|
59
|
+
if (regenerate_index(argv[0])==-1){
|
60
|
+
printf("File %s does not exists",argv[0]);
|
61
|
+
exit(-1);
|
62
|
+
}
|
63
|
+
|
64
|
+
exit(0);
|
65
|
+
|
66
|
+
}
|
67
|
+
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# La entrada a fichero es el nombre del fichero sin extensión
|
2
|
+
|
3
|
+
# crea un fichero con los bloques existentes
|
4
|
+
rm $1.hsh
|
5
|
+
zmore $1.index | awk '{if ( FNR!=1 ) print $2}' |sort -n|uniq > $1.tmp
|
6
|
+
|
7
|
+
for block in `cat $1.tmp` ; do
|
8
|
+
minmax=`zegrep "^[^[:space:]]* $block " $1.index|awk ' \
|
9
|
+
BEGIN { MIN="ZZZZZZZZZZZZZZZZZZZZZZZZZZZ";MAX=""} \
|
10
|
+
{if ((MIN>$1) && ($1!="" )) MIN=$1; \
|
11
|
+
if (MAX<$1) MAX=$1;nlines++ } \
|
12
|
+
END {print MIN,MAX}'`
|
13
|
+
echo $minmax $block >> $1.hsh
|
14
|
+
done
|