bio-faster 0.2.2 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.travis.yml +2 -3
  2. data/Gemfile +2 -0
  3. data/Gemfile.lock +2 -0
  4. data/LICENSE.txt +1 -1
  5. data/README.md +2 -3
  6. data/Rakefile +9 -15
  7. data/VERSION +1 -1
  8. data/bio-faster.gemspec +56 -12
  9. data/ext/faster.c +115 -52
  10. data/ext/mkrf_conf.rb +40 -0
  11. data/lib/bio-faster.rb +5 -4
  12. data/lib/bio/faster.rb +57 -0
  13. data/lib/bio/faster/library.rb +26 -0
  14. data/spec/fastq_error_spec.rb +55 -0
  15. data/spec/parser_spec.rb +28 -116
  16. data/test/data/errors/error_header.fastq +20 -0
  17. data/test/data/errors/error_long_qual.fastq +20 -0
  18. data/test/data/errors/error_qual_del.fastq +20 -0
  19. data/test/data/errors/error_qual_escape.fastq +20 -0
  20. data/test/data/errors/error_qual_null.fastq +0 -0
  21. data/test/data/errors/error_qual_space.fastq +20 -0
  22. data/test/data/errors/error_qual_tab.fastq +20 -0
  23. data/test/data/errors/error_qual_unit_sep.fastq +20 -0
  24. data/test/data/errors/error_qual_vtab.fastq +20 -0
  25. data/test/data/errors/error_spaces.fastq +20 -0
  26. data/test/data/errors/error_tabs.fastq +21 -0
  27. data/test/data/errors/error_trunc_at_qual.fastq +19 -0
  28. data/test/data/errors/error_trunc_at_seq.fastq +18 -0
  29. data/test/data/errors/error_trunc_in_qual.fastq +20 -0
  30. data/test/data/errors/error_trunc_in_seq.fastq +18 -0
  31. data/test/data/formats/illumina_full_range_as_illumina.fastq +8 -0
  32. data/test/data/formats/illumina_full_range_as_sanger.fastq +8 -0
  33. data/test/data/formats/illumina_full_range_as_solexa.fastq +8 -0
  34. data/test/data/formats/illumina_full_range_original_illumina.fastq +8 -0
  35. data/test/data/formats/longreads_as_illumina.fastq +40 -0
  36. data/test/data/formats/longreads_as_sanger.fastq +40 -0
  37. data/test/data/formats/longreads_as_solexa.fastq +40 -0
  38. data/test/data/formats/misc_dna_as_illumina.fastq +16 -0
  39. data/test/data/formats/misc_dna_as_sanger.fastq +16 -0
  40. data/test/data/formats/misc_dna_as_solexa.fastq +16 -0
  41. data/test/data/formats/misc_dna_original_sanger.fastq +16 -0
  42. data/test/data/formats/misc_rna_as_illumina.fastq +16 -0
  43. data/test/data/formats/misc_rna_as_sanger.fastq +16 -0
  44. data/test/data/formats/misc_rna_as_solexa.fastq +16 -0
  45. data/test/data/formats/misc_rna_original_sanger.fastq +16 -0
  46. data/test/data/formats/sanger_full_range_as_illumina.fastq +8 -0
  47. data/test/data/formats/sanger_full_range_as_sanger.fastq +8 -0
  48. data/test/data/formats/sanger_full_range_as_solexa.fastq +8 -0
  49. data/test/data/formats/sanger_full_range_original_sanger.fastq +8 -0
  50. data/test/data/formats/solexa_full_range_as_illumina.fastq +8 -0
  51. data/test/data/formats/solexa_full_range_as_sanger.fastq +8 -0
  52. data/test/data/formats/solexa_full_range_as_solexa.fastq +8 -0
  53. data/test/data/formats/solexa_full_range_original_solexa.fastq +8 -0
  54. data/test/data/formats/wrapping_as_illumina.fastq +12 -0
  55. data/test/data/formats/wrapping_as_sanger.fastq +12 -0
  56. data/test/data/formats/wrapping_as_solexa.fastq +12 -0
  57. metadata +88 -24
  58. data/ext/extconf.rb +0 -7
  59. data/ext/kseq.h +0 -223
  60. data/test/data/sample.fasta +0 -10
  61. data/test/data/sample.fastq +0 -24
  62. data/test/data/sample.fastq.gz +0 -0
  63. data/test/data/sff_sample.fastq +0 -16
@@ -0,0 +1,16 @@
1
+ @FAKE0011 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order)
2
+ ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA
3
+ +
4
+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI
5
+ @FAKE0012 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order)
6
+ gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg
7
+ +
8
+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI
9
+ @FAKE0013 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order)
10
+ ucagucagucagucagucagucagucagucagucagucagu
11
+ +
12
+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI
13
+ @FAKE0014 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled)
14
+ gaucrywsmkhbvdnGAUCRYWSMKHBVDN
15
+ +
16
+ DEFGHIDEFGHIDEFGHIDEFGHIDEFGHI
@@ -0,0 +1,8 @@
1
+ @FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
3
+ +
4
+ @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
+ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
6
+ CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@
@@ -0,0 +1,8 @@
1
+ @FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
3
+ +
4
+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
5
+ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
6
+ CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"!
@@ -0,0 +1,8 @@
1
+ @FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
3
+ +
4
+ ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
+ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
6
+ CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJHGFECB@>;;
@@ -0,0 +1,8 @@
1
+ @FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
3
+ +
4
+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
5
+ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
6
+ CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"!
@@ -0,0 +1,8 @@
1
+ @FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
3
+ +
4
+ AABBCCDDEEFGHIJJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
5
+ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order)
6
+ TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJJIHGFEEDDCCBBAA
@@ -0,0 +1,8 @@
1
+ @FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
3
+ +
4
+ ""##$$%%&&'()*++,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
5
+ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order)
6
+ TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ _^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,++*)('&&%%$$##""
@@ -0,0 +1,8 @@
1
+ @FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
3
+ +
4
+ ;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
5
+ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order)
6
+ TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;
@@ -0,0 +1,8 @@
1
+ @FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
3
+ +
4
+ ;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
5
+ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order)
6
+ TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;
@@ -0,0 +1,12 @@
1
+ @SRR014849.50939 EIXKN4201BA2EC length=135
2
+ GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG
3
+ +
4
+ Zb^Ld`N\[d`NaZ[aZc]UOKHDA[\YT[_W[aZ\aZ[Zd`SF_WeaUI[Y\[[\\\[\Z\aY`X[[aZ\aZ\d`OY[aY[[\[[e`WPJC^UZ[`X\[R]T_V_W[`[Ga\I`\H[[Q^TVa\Ia\Ic^LY\S
5
+ @SRR014849.110027 EIXKN4201APUB0 length=131
6
+ CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG
7
+ +
8
+ \aYY_[FY\T`X^Vd`OY\[[^U_V[R^T[_ZDc^La\HYYO\S[c^Ld`Nc_QAZaZaYaY`XZZ\[aZZ[aZ[aZ[aZY`Z[`ZWeaVJ\[aZaY`X[PY\eaUG[\[[d`OXTUZ[Q\\`W\\\Y_W\
9
+ @SRR014849.203935 EIXKN4201B4HU6 length=144
10
+ AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT
11
+ +
12
+ `Z_ZDVT^YB[[Xd`PZ\d`RDaZaZ`ZaZ_ZDXd`Pd`Pd`RD[aZ`ZWd`Oc_RCd`P\aZ`ZaZaZY\YaZYaY`XYd`O`X[e`WPJEAc^LaZS[YYN[Z\Y`XWLT^U\b]JW[[RZ\SYc`RD[Z\WLXM`\HYa\I
@@ -0,0 +1,12 @@
1
+ @SRR014849.50939 EIXKN4201BA2EC length=135
2
+ GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG
3
+ +
4
+ ;C?-EA/=<EA/B;<B;D>60,)%"<=:5<@8<B;=B;<;EA4'@8FB6*<:=<<===<=;=B:A9<<B;=B;=EA0:<B:<<=<<FA81+$?6;<A9=<3>5@7@8<A<(B=*A=)<<2?57B=*B=*D?-:=4
5
+ @SRR014849.110027 EIXKN4201APUB0 length=131
6
+ CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG
7
+ +
8
+ =B::@<':=5A9?7EA0:=<<?6@7<3?5<@;%D?-B=)::0=4<D?-EA/D@2";B;B:B:A9;;=<B;;<B;<B;<B;:A;<A;8FB7+=<B;B:A9<1:=FB6(<=<<EA0956;<2==A8===:@8=
9
+ @SRR014849.203935 EIXKN4201B4HU6 length=144
10
+ AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT
11
+ +
12
+ A;@;%75?:#<<9EA1;=EA3%B;B;A;B;@;%9EA1EA1EA3%<B;A;8EA0D@3$EA1=B;A;B;B;:=:B;:B:A9:EA0A9<FA81+&"D?-B;4<::/<;=:A98-5?6=C>+8<<3;=4:DA3%<;=8-9.A=):B=*
@@ -0,0 +1,12 @@
1
+ @SRR014849.50939 EIXKN4201BA2EC length=135
2
+ GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG
3
+ +
4
+ Zb^Ld`N\[d`NaZ[aZc]UOKGB;[\YT[_W[aZ\aZ[Zd`SE_WeaUH[Y\[[\\\[\Z\aY`X[[aZ\aZ\d`OY[aY[[\[[e`WPJ@^UZ[`X\[R]T_V_W[`[Fa\H`\G[[Q^TVa\Ha\Hc^LY\S
5
+ @SRR014849.110027 EIXKN4201APUB0 length=131
6
+ CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG
7
+ +
8
+ \aYY_[EY\T`X^Vd`OY\[[^U_V[R^T[_ZBc^La\GYYO\S[c^Ld`Nc_Q;ZaZaYaY`XZZ\[aZZ[aZ[aZ[aZY`Z[`ZWeaVJ\[aZaY`X[PY\eaUF[\[[d`OXTUZ[Q\\`W\\\Y_W\
9
+ @SRR014849.203935 EIXKN4201B4HU6 length=144
10
+ AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT
11
+ +
12
+ `Z_ZBVT^Y>[[Xd`PZ\d`RBaZaZ`ZaZ_ZBXd`Pd`Pd`RB[aZ`ZWd`Oc_R@d`P\aZ`ZaZaZY\YaZYaY`XYd`O`X[e`WPJC;c^LaZS[YYN[Z\Y`XWLT^U\b]JW[[RZ\SYc`RB[Z\WLXM`\GYa\H
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-faster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.4.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-02 00:00:00.000000000 Z
12
+ date: 2012-04-27 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: ffi
16
+ requirement: &2153396520 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2153396520
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: shoulda
16
- requirement: &2161251160 !ruby/object:Gem::Requirement
27
+ requirement: &2153396000 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
30
  - - ! '>='
@@ -21,10 +32,10 @@ dependencies:
21
32
  version: '0'
22
33
  type: :development
23
34
  prerelease: false
24
- version_requirements: *2161251160
35
+ version_requirements: *2153396000
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: bundler
27
- requirement: &2161250660 !ruby/object:Gem::Requirement
38
+ requirement: &2153395420 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: 1.0.0
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *2161250660
46
+ version_requirements: *2153395420
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: jeweler
38
- requirement: &2161250100 !ruby/object:Gem::Requirement
49
+ requirement: &2153394940 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,10 +54,10 @@ dependencies:
43
54
  version: 1.6.4
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *2161250100
57
+ version_requirements: *2153394940
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: rcov
49
- requirement: &2161249560 !ruby/object:Gem::Requirement
60
+ requirement: &2153394460 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ! '>='
@@ -54,10 +65,10 @@ dependencies:
54
65
  version: '0'
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *2161249560
68
+ version_requirements: *2153394460
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: bio
60
- requirement: &2161249020 !ruby/object:Gem::Requirement
71
+ requirement: &2153393980 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ! '>='
@@ -65,10 +76,21 @@ dependencies:
65
76
  version: 1.4.2
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *2161249020
79
+ version_requirements: *2153393980
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: rspec
71
- requirement: &2161248500 !ruby/object:Gem::Requirement
82
+ requirement: &2153409780 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *2153409780
91
+ - !ruby/object:Gem::Dependency
92
+ name: ffi
93
+ requirement: &2153409260 !ruby/object:Gem::Requirement
72
94
  none: false
73
95
  requirements:
74
96
  - - ! '>='
@@ -76,12 +98,12 @@ dependencies:
76
98
  version: '0'
77
99
  type: :development
78
100
  prerelease: false
79
- version_requirements: *2161248500
80
- description: A fast parser for Fasta and FastQ files
101
+ version_requirements: *2153409260
102
+ description: A fast parser for FastQ files
81
103
  email: francesco.strozzi@gmail.com
82
104
  executables: []
83
105
  extensions:
84
- - ext/extconf.rb
106
+ - ext/mkrf_conf.rb
85
107
  extra_rdoc_files:
86
108
  - LICENSE.txt
87
109
  - README.md
@@ -95,16 +117,55 @@ files:
95
117
  - Rakefile
96
118
  - VERSION
97
119
  - bio-faster.gemspec
98
- - ext/extconf.rb
99
120
  - ext/faster.c
100
- - ext/kseq.h
121
+ - ext/mkrf_conf.rb
101
122
  - lib/bio-faster.rb
123
+ - lib/bio/faster.rb
124
+ - lib/bio/faster/library.rb
125
+ - spec/fastq_error_spec.rb
102
126
  - spec/helper.rb
103
127
  - spec/parser_spec.rb
104
- - test/data/sample.fasta
105
- - test/data/sample.fastq
106
- - test/data/sample.fastq.gz
107
- - test/data/sff_sample.fastq
128
+ - test/data/errors/error_header.fastq
129
+ - test/data/errors/error_long_qual.fastq
130
+ - test/data/errors/error_qual_del.fastq
131
+ - test/data/errors/error_qual_escape.fastq
132
+ - test/data/errors/error_qual_null.fastq
133
+ - test/data/errors/error_qual_space.fastq
134
+ - test/data/errors/error_qual_tab.fastq
135
+ - test/data/errors/error_qual_unit_sep.fastq
136
+ - test/data/errors/error_qual_vtab.fastq
137
+ - test/data/errors/error_spaces.fastq
138
+ - test/data/errors/error_tabs.fastq
139
+ - test/data/errors/error_trunc_at_qual.fastq
140
+ - test/data/errors/error_trunc_at_seq.fastq
141
+ - test/data/errors/error_trunc_in_qual.fastq
142
+ - test/data/errors/error_trunc_in_seq.fastq
143
+ - test/data/formats/illumina_full_range_as_illumina.fastq
144
+ - test/data/formats/illumina_full_range_as_sanger.fastq
145
+ - test/data/formats/illumina_full_range_as_solexa.fastq
146
+ - test/data/formats/illumina_full_range_original_illumina.fastq
147
+ - test/data/formats/longreads_as_illumina.fastq
148
+ - test/data/formats/longreads_as_sanger.fastq
149
+ - test/data/formats/longreads_as_solexa.fastq
150
+ - test/data/formats/misc_dna_as_illumina.fastq
151
+ - test/data/formats/misc_dna_as_sanger.fastq
152
+ - test/data/formats/misc_dna_as_solexa.fastq
153
+ - test/data/formats/misc_dna_original_sanger.fastq
154
+ - test/data/formats/misc_rna_as_illumina.fastq
155
+ - test/data/formats/misc_rna_as_sanger.fastq
156
+ - test/data/formats/misc_rna_as_solexa.fastq
157
+ - test/data/formats/misc_rna_original_sanger.fastq
158
+ - test/data/formats/sanger_full_range_as_illumina.fastq
159
+ - test/data/formats/sanger_full_range_as_sanger.fastq
160
+ - test/data/formats/sanger_full_range_as_solexa.fastq
161
+ - test/data/formats/sanger_full_range_original_sanger.fastq
162
+ - test/data/formats/solexa_full_range_as_illumina.fastq
163
+ - test/data/formats/solexa_full_range_as_sanger.fastq
164
+ - test/data/formats/solexa_full_range_as_solexa.fastq
165
+ - test/data/formats/solexa_full_range_original_solexa.fastq
166
+ - test/data/formats/wrapping_as_illumina.fastq
167
+ - test/data/formats/wrapping_as_sanger.fastq
168
+ - test/data/formats/wrapping_as_solexa.fastq
108
169
  homepage: http://github.com/fstrozzi/bioruby-faster
109
170
  licenses:
110
171
  - MIT
@@ -117,7 +178,10 @@ required_ruby_version: !ruby/object:Gem::Requirement
117
178
  requirements:
118
179
  - - ! '>='
119
180
  - !ruby/object:Gem::Version
120
- version: '1.9'
181
+ version: '0'
182
+ segments:
183
+ - 0
184
+ hash: 4125702484128053207
121
185
  required_rubygems_version: !ruby/object:Gem::Requirement
122
186
  none: false
123
187
  requirements:
@@ -129,5 +193,5 @@ rubyforge_project:
129
193
  rubygems_version: 1.8.15
130
194
  signing_key:
131
195
  specification_version: 3
132
- summary: A fast parser for Fasta and FastQ files
196
+ summary: A fast parser for FastQ files
133
197
  test_files: []
@@ -1,7 +0,0 @@
1
- require 'mkmf'
2
- extension_name = "faster"
3
- have_library("z")
4
- create_makefile(extension_name)
5
-
6
-
7
-
data/ext/kseq.h DELETED
@@ -1,223 +0,0 @@
1
- /* The MIT License
2
-
3
- Copyright (c) 2008 Genome Research Ltd (GRL).
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining
6
- a copy of this software and associated documentation files (the
7
- "Software"), to deal in the Software without restriction, including
8
- without limitation the rights to use, copy, modify, merge, publish,
9
- distribute, sublicense, and/or sell copies of the Software, and to
10
- permit persons to whom the Software is furnished to do so, subject to
11
- the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be
14
- included in all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
- SOFTWARE.
24
- */
25
-
26
- /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
-
28
- /* Last Modified: 12APR2009 */
29
-
30
- #ifndef AC_KSEQ_H
31
- #define AC_KSEQ_H
32
-
33
- #include <ctype.h>
34
- #include <string.h>
35
- #include <stdlib.h>
36
-
37
- #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
38
- #define KS_SEP_TAB 1 // isspace() && !' '
39
- #define KS_SEP_MAX 1
40
-
41
- #define __KS_TYPE(type_t) \
42
- typedef struct __kstream_t { \
43
- char *buf; \
44
- int begin, end, is_eof; \
45
- type_t f; \
46
- } kstream_t;
47
-
48
- #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
49
- #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
50
-
51
- #define __KS_BASIC(type_t, __bufsize) \
52
- static inline kstream_t *ks_init(type_t f) \
53
- { \
54
- kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
55
- ks->f = f; \
56
- ks->buf = (char*)malloc(__bufsize); \
57
- return ks; \
58
- } \
59
- static inline void ks_destroy(kstream_t *ks) \
60
- { \
61
- if (ks) { \
62
- free(ks->buf); \
63
- free(ks); \
64
- } \
65
- }
66
-
67
- #define __KS_GETC(__read, __bufsize) \
68
- static inline int ks_getc(kstream_t *ks) \
69
- { \
70
- if (ks->is_eof && ks->begin >= ks->end) return -1; \
71
- if (ks->begin >= ks->end) { \
72
- ks->begin = 0; \
73
- ks->end = __read(ks->f, ks->buf, __bufsize); \
74
- if (ks->end < __bufsize) ks->is_eof = 1; \
75
- if (ks->end == 0) return -1; \
76
- } \
77
- return (int)ks->buf[ks->begin++]; \
78
- }
79
-
80
- #ifndef KSTRING_T
81
- #define KSTRING_T kstring_t
82
- typedef struct __kstring_t {
83
- size_t l, m;
84
- char *s;
85
- } kstring_t;
86
- #endif
87
-
88
- #ifndef kroundup32
89
- #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
90
- #endif
91
-
92
- #define __KS_GETUNTIL(__read, __bufsize) \
93
- static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
94
- { \
95
- if (dret) *dret = 0; \
96
- str->l = 0; \
97
- if (ks->begin >= ks->end && ks->is_eof) return -1; \
98
- for (;;) { \
99
- int i; \
100
- if (ks->begin >= ks->end) { \
101
- if (!ks->is_eof) { \
102
- ks->begin = 0; \
103
- ks->end = __read(ks->f, ks->buf, __bufsize); \
104
- if (ks->end < __bufsize) ks->is_eof = 1; \
105
- if (ks->end == 0) break; \
106
- } else break; \
107
- } \
108
- if (delimiter > KS_SEP_MAX) { \
109
- for (i = ks->begin; i < ks->end; ++i) \
110
- if (ks->buf[i] == delimiter) break; \
111
- } else if (delimiter == KS_SEP_SPACE) { \
112
- for (i = ks->begin; i < ks->end; ++i) \
113
- if (isspace(ks->buf[i])) break; \
114
- } else if (delimiter == KS_SEP_TAB) { \
115
- for (i = ks->begin; i < ks->end; ++i) \
116
- if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
117
- } else i = 0; /* never come to here! */ \
118
- if (str->m - str->l < i - ks->begin + 1) { \
119
- str->m = str->l + (i - ks->begin) + 1; \
120
- kroundup32(str->m); \
121
- str->s = (char*)realloc(str->s, str->m); \
122
- } \
123
- memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
124
- str->l = str->l + (i - ks->begin); \
125
- ks->begin = i + 1; \
126
- if (i < ks->end) { \
127
- if (dret) *dret = ks->buf[i]; \
128
- break; \
129
- } \
130
- } \
131
- if (str->l == 0) { \
132
- str->m = 1; \
133
- str->s = (char*)calloc(1, 1); \
134
- } \
135
- str->s[str->l] = '\0'; \
136
- return str->l; \
137
- }
138
-
139
- #define KSTREAM_INIT(type_t, __read, __bufsize) \
140
- __KS_TYPE(type_t) \
141
- __KS_BASIC(type_t, __bufsize) \
142
- __KS_GETC(__read, __bufsize) \
143
- __KS_GETUNTIL(__read, __bufsize)
144
-
145
- #define __KSEQ_BASIC(type_t) \
146
- static inline kseq_t *kseq_init(type_t fd) \
147
- { \
148
- kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
149
- s->f = ks_init(fd); \
150
- return s; \
151
- } \
152
- static inline void kseq_rewind(kseq_t *ks) \
153
- { \
154
- ks->last_char = 0; \
155
- ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
156
- } \
157
- static inline void kseq_destroy(kseq_t *ks) \
158
- { \
159
- if (!ks) return; \
160
- free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
161
- ks_destroy(ks->f); \
162
- free(ks); \
163
- }
164
-
165
- /* Return value:
166
- >=0 length of the sequence (normal)
167
- -1 end-of-file
168
- -2 truncated quality string
169
- */
170
- #define __KSEQ_READ \
171
- static int kseq_read(kseq_t *seq) \
172
- { \
173
- int c; \
174
- kstream_t *ks = seq->f; \
175
- if (seq->last_char == 0) { /* then jump to the next header line */ \
176
- while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
177
- if (c == -1) return -1; /* end of file */ \
178
- seq->last_char = c; \
179
- } /* the first header char has been read */ \
180
- seq->comment.l = seq->seq.l = seq->qual.l = 0; \
181
- if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
182
- if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
183
- while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
184
- if (isgraph(c)) { /* printable non-space character */ \
185
- if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
186
- seq->seq.m = seq->seq.l + 2; \
187
- kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
188
- seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
189
- } \
190
- seq->seq.s[seq->seq.l++] = (char)c; \
191
- } \
192
- } \
193
- if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
194
- seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
195
- if (c != '+') return seq->seq.l; /* FASTA */ \
196
- if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
197
- seq->qual.m = seq->seq.m; \
198
- seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
199
- } \
200
- while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
201
- if (c == -1) return -2; /* we should not stop here */ \
202
- while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
203
- if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
204
- seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
205
- seq->last_char = 0; /* we have not come to the next header line */ \
206
- if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
207
- return seq->seq.l; \
208
- }
209
-
210
- #define __KSEQ_TYPE(type_t) \
211
- typedef struct { \
212
- kstring_t name, comment, seq, qual; \
213
- int last_char; \
214
- kstream_t *f; \
215
- } kseq_t;
216
-
217
- #define KSEQ_INIT(type_t, __read) \
218
- KSTREAM_INIT(type_t, __read, 4096) \
219
- __KSEQ_TYPE(type_t) \
220
- __KSEQ_BASIC(type_t) \
221
- __KSEQ_READ
222
-
223
- #endif