bio-faster 0.2.2 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.travis.yml +2 -3
  2. data/Gemfile +2 -0
  3. data/Gemfile.lock +2 -0
  4. data/LICENSE.txt +1 -1
  5. data/README.md +2 -3
  6. data/Rakefile +9 -15
  7. data/VERSION +1 -1
  8. data/bio-faster.gemspec +56 -12
  9. data/ext/faster.c +115 -52
  10. data/ext/mkrf_conf.rb +40 -0
  11. data/lib/bio-faster.rb +5 -4
  12. data/lib/bio/faster.rb +57 -0
  13. data/lib/bio/faster/library.rb +26 -0
  14. data/spec/fastq_error_spec.rb +55 -0
  15. data/spec/parser_spec.rb +28 -116
  16. data/test/data/errors/error_header.fastq +20 -0
  17. data/test/data/errors/error_long_qual.fastq +20 -0
  18. data/test/data/errors/error_qual_del.fastq +20 -0
  19. data/test/data/errors/error_qual_escape.fastq +20 -0
  20. data/test/data/errors/error_qual_null.fastq +0 -0
  21. data/test/data/errors/error_qual_space.fastq +20 -0
  22. data/test/data/errors/error_qual_tab.fastq +20 -0
  23. data/test/data/errors/error_qual_unit_sep.fastq +20 -0
  24. data/test/data/errors/error_qual_vtab.fastq +20 -0
  25. data/test/data/errors/error_spaces.fastq +20 -0
  26. data/test/data/errors/error_tabs.fastq +21 -0
  27. data/test/data/errors/error_trunc_at_qual.fastq +19 -0
  28. data/test/data/errors/error_trunc_at_seq.fastq +18 -0
  29. data/test/data/errors/error_trunc_in_qual.fastq +20 -0
  30. data/test/data/errors/error_trunc_in_seq.fastq +18 -0
  31. data/test/data/formats/illumina_full_range_as_illumina.fastq +8 -0
  32. data/test/data/formats/illumina_full_range_as_sanger.fastq +8 -0
  33. data/test/data/formats/illumina_full_range_as_solexa.fastq +8 -0
  34. data/test/data/formats/illumina_full_range_original_illumina.fastq +8 -0
  35. data/test/data/formats/longreads_as_illumina.fastq +40 -0
  36. data/test/data/formats/longreads_as_sanger.fastq +40 -0
  37. data/test/data/formats/longreads_as_solexa.fastq +40 -0
  38. data/test/data/formats/misc_dna_as_illumina.fastq +16 -0
  39. data/test/data/formats/misc_dna_as_sanger.fastq +16 -0
  40. data/test/data/formats/misc_dna_as_solexa.fastq +16 -0
  41. data/test/data/formats/misc_dna_original_sanger.fastq +16 -0
  42. data/test/data/formats/misc_rna_as_illumina.fastq +16 -0
  43. data/test/data/formats/misc_rna_as_sanger.fastq +16 -0
  44. data/test/data/formats/misc_rna_as_solexa.fastq +16 -0
  45. data/test/data/formats/misc_rna_original_sanger.fastq +16 -0
  46. data/test/data/formats/sanger_full_range_as_illumina.fastq +8 -0
  47. data/test/data/formats/sanger_full_range_as_sanger.fastq +8 -0
  48. data/test/data/formats/sanger_full_range_as_solexa.fastq +8 -0
  49. data/test/data/formats/sanger_full_range_original_sanger.fastq +8 -0
  50. data/test/data/formats/solexa_full_range_as_illumina.fastq +8 -0
  51. data/test/data/formats/solexa_full_range_as_sanger.fastq +8 -0
  52. data/test/data/formats/solexa_full_range_as_solexa.fastq +8 -0
  53. data/test/data/formats/solexa_full_range_original_solexa.fastq +8 -0
  54. data/test/data/formats/wrapping_as_illumina.fastq +12 -0
  55. data/test/data/formats/wrapping_as_sanger.fastq +12 -0
  56. data/test/data/formats/wrapping_as_solexa.fastq +12 -0
  57. metadata +88 -24
  58. data/ext/extconf.rb +0 -7
  59. data/ext/kseq.h +0 -223
  60. data/test/data/sample.fasta +0 -10
  61. data/test/data/sample.fastq +0 -24
  62. data/test/data/sample.fastq.gz +0 -0
  63. data/test/data/sff_sample.fastq +0 -16
@@ -0,0 +1,16 @@
1
+ @FAKE0011 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order)
2
+ ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA
3
+ +
4
+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI
5
+ @FAKE0012 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order)
6
+ gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg
7
+ +
8
+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI
9
+ @FAKE0013 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order)
10
+ ucagucagucagucagucagucagucagucagucagucagu
11
+ +
12
+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI
13
+ @FAKE0014 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled)
14
+ gaucrywsmkhbvdnGAUCRYWSMKHBVDN
15
+ +
16
+ DEFGHIDEFGHIDEFGHIDEFGHIDEFGHI
@@ -0,0 +1,8 @@
1
+ @FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
3
+ +
4
+ @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
+ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
6
+ CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@
@@ -0,0 +1,8 @@
1
+ @FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
3
+ +
4
+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
5
+ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
6
+ CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"!
@@ -0,0 +1,8 @@
1
+ @FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
3
+ +
4
+ ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
+ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
6
+ CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJHGFECB@>;;
@@ -0,0 +1,8 @@
1
+ @FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
3
+ +
4
+ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
5
+ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order)
6
+ CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"!
@@ -0,0 +1,8 @@
1
+ @FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
3
+ +
4
+ AABBCCDDEEFGHIJJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
5
+ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order)
6
+ TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJJIHGFEEDDCCBBAA
@@ -0,0 +1,8 @@
1
+ @FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
3
+ +
4
+ ""##$$%%&&'()*++,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
5
+ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order)
6
+ TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ _^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,++*)('&&%%$$##""
@@ -0,0 +1,8 @@
1
+ @FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
3
+ +
4
+ ;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
5
+ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order)
6
+ TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;
@@ -0,0 +1,8 @@
1
+ @FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order)
2
+ ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
3
+ +
4
+ ;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
5
+ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order)
6
+ TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA
7
+ +
8
+ ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;
@@ -0,0 +1,12 @@
1
+ @SRR014849.50939 EIXKN4201BA2EC length=135
2
+ GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG
3
+ +
4
+ Zb^Ld`N\[d`NaZ[aZc]UOKHDA[\YT[_W[aZ\aZ[Zd`SF_WeaUI[Y\[[\\\[\Z\aY`X[[aZ\aZ\d`OY[aY[[\[[e`WPJC^UZ[`X\[R]T_V_W[`[Ga\I`\H[[Q^TVa\Ia\Ic^LY\S
5
+ @SRR014849.110027 EIXKN4201APUB0 length=131
6
+ CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG
7
+ +
8
+ \aYY_[FY\T`X^Vd`OY\[[^U_V[R^T[_ZDc^La\HYYO\S[c^Ld`Nc_QAZaZaYaY`XZZ\[aZZ[aZ[aZ[aZY`Z[`ZWeaVJ\[aZaY`X[PY\eaUG[\[[d`OXTUZ[Q\\`W\\\Y_W\
9
+ @SRR014849.203935 EIXKN4201B4HU6 length=144
10
+ AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT
11
+ +
12
+ `Z_ZDVT^YB[[Xd`PZ\d`RDaZaZ`ZaZ_ZDXd`Pd`Pd`RD[aZ`ZWd`Oc_RCd`P\aZ`ZaZaZY\YaZYaY`XYd`O`X[e`WPJEAc^LaZS[YYN[Z\Y`XWLT^U\b]JW[[RZ\SYc`RD[Z\WLXM`\HYa\I
@@ -0,0 +1,12 @@
1
+ @SRR014849.50939 EIXKN4201BA2EC length=135
2
+ GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG
3
+ +
4
+ ;C?-EA/=<EA/B;<B;D>60,)%"<=:5<@8<B;=B;<;EA4'@8FB6*<:=<<===<=;=B:A9<<B;=B;=EA0:<B:<<=<<FA81+$?6;<A9=<3>5@7@8<A<(B=*A=)<<2?57B=*B=*D?-:=4
5
+ @SRR014849.110027 EIXKN4201APUB0 length=131
6
+ CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG
7
+ +
8
+ =B::@<':=5A9?7EA0:=<<?6@7<3?5<@;%D?-B=)::0=4<D?-EA/D@2";B;B:B:A9;;=<B;;<B;<B;<B;:A;<A;8FB7+=<B;B:A9<1:=FB6(<=<<EA0956;<2==A8===:@8=
9
+ @SRR014849.203935 EIXKN4201B4HU6 length=144
10
+ AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT
11
+ +
12
+ A;@;%75?:#<<9EA1;=EA3%B;B;A;B;@;%9EA1EA1EA3%<B;A;8EA0D@3$EA1=B;A;B;B;:=:B;:B:A9:EA0A9<FA81+&"D?-B;4<::/<;=:A98-5?6=C>+8<<3;=4:DA3%<;=8-9.A=):B=*
@@ -0,0 +1,12 @@
1
+ @SRR014849.50939 EIXKN4201BA2EC length=135
2
+ GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG
3
+ +
4
+ Zb^Ld`N\[d`NaZ[aZc]UOKGB;[\YT[_W[aZ\aZ[Zd`SE_WeaUH[Y\[[\\\[\Z\aY`X[[aZ\aZ\d`OY[aY[[\[[e`WPJ@^UZ[`X\[R]T_V_W[`[Fa\H`\G[[Q^TVa\Ha\Hc^LY\S
5
+ @SRR014849.110027 EIXKN4201APUB0 length=131
6
+ CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG
7
+ +
8
+ \aYY_[EY\T`X^Vd`OY\[[^U_V[R^T[_ZBc^La\GYYO\S[c^Ld`Nc_Q;ZaZaYaY`XZZ\[aZZ[aZ[aZ[aZY`Z[`ZWeaVJ\[aZaY`X[PY\eaUF[\[[d`OXTUZ[Q\\`W\\\Y_W\
9
+ @SRR014849.203935 EIXKN4201B4HU6 length=144
10
+ AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT
11
+ +
12
+ `Z_ZBVT^Y>[[Xd`PZ\d`RBaZaZ`ZaZ_ZBXd`Pd`Pd`RB[aZ`ZWd`Oc_R@d`P\aZ`ZaZaZY\YaZYaY`XYd`O`X[e`WPJC;c^LaZS[YYN[Z\Y`XWLT^U\b]JW[[RZ\SYc`RB[Z\WLXM`\GYa\H
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-faster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.4.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-02 00:00:00.000000000 Z
12
+ date: 2012-04-27 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: ffi
16
+ requirement: &2153396520 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2153396520
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: shoulda
16
- requirement: &2161251160 !ruby/object:Gem::Requirement
27
+ requirement: &2153396000 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
30
  - - ! '>='
@@ -21,10 +32,10 @@ dependencies:
21
32
  version: '0'
22
33
  type: :development
23
34
  prerelease: false
24
- version_requirements: *2161251160
35
+ version_requirements: *2153396000
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: bundler
27
- requirement: &2161250660 !ruby/object:Gem::Requirement
38
+ requirement: &2153395420 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: 1.0.0
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *2161250660
46
+ version_requirements: *2153395420
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: jeweler
38
- requirement: &2161250100 !ruby/object:Gem::Requirement
49
+ requirement: &2153394940 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,10 +54,10 @@ dependencies:
43
54
  version: 1.6.4
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *2161250100
57
+ version_requirements: *2153394940
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: rcov
49
- requirement: &2161249560 !ruby/object:Gem::Requirement
60
+ requirement: &2153394460 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ! '>='
@@ -54,10 +65,10 @@ dependencies:
54
65
  version: '0'
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *2161249560
68
+ version_requirements: *2153394460
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: bio
60
- requirement: &2161249020 !ruby/object:Gem::Requirement
71
+ requirement: &2153393980 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ! '>='
@@ -65,10 +76,21 @@ dependencies:
65
76
  version: 1.4.2
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *2161249020
79
+ version_requirements: *2153393980
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: rspec
71
- requirement: &2161248500 !ruby/object:Gem::Requirement
82
+ requirement: &2153409780 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *2153409780
91
+ - !ruby/object:Gem::Dependency
92
+ name: ffi
93
+ requirement: &2153409260 !ruby/object:Gem::Requirement
72
94
  none: false
73
95
  requirements:
74
96
  - - ! '>='
@@ -76,12 +98,12 @@ dependencies:
76
98
  version: '0'
77
99
  type: :development
78
100
  prerelease: false
79
- version_requirements: *2161248500
80
- description: A fast parser for Fasta and FastQ files
101
+ version_requirements: *2153409260
102
+ description: A fast parser for FastQ files
81
103
  email: francesco.strozzi@gmail.com
82
104
  executables: []
83
105
  extensions:
84
- - ext/extconf.rb
106
+ - ext/mkrf_conf.rb
85
107
  extra_rdoc_files:
86
108
  - LICENSE.txt
87
109
  - README.md
@@ -95,16 +117,55 @@ files:
95
117
  - Rakefile
96
118
  - VERSION
97
119
  - bio-faster.gemspec
98
- - ext/extconf.rb
99
120
  - ext/faster.c
100
- - ext/kseq.h
121
+ - ext/mkrf_conf.rb
101
122
  - lib/bio-faster.rb
123
+ - lib/bio/faster.rb
124
+ - lib/bio/faster/library.rb
125
+ - spec/fastq_error_spec.rb
102
126
  - spec/helper.rb
103
127
  - spec/parser_spec.rb
104
- - test/data/sample.fasta
105
- - test/data/sample.fastq
106
- - test/data/sample.fastq.gz
107
- - test/data/sff_sample.fastq
128
+ - test/data/errors/error_header.fastq
129
+ - test/data/errors/error_long_qual.fastq
130
+ - test/data/errors/error_qual_del.fastq
131
+ - test/data/errors/error_qual_escape.fastq
132
+ - test/data/errors/error_qual_null.fastq
133
+ - test/data/errors/error_qual_space.fastq
134
+ - test/data/errors/error_qual_tab.fastq
135
+ - test/data/errors/error_qual_unit_sep.fastq
136
+ - test/data/errors/error_qual_vtab.fastq
137
+ - test/data/errors/error_spaces.fastq
138
+ - test/data/errors/error_tabs.fastq
139
+ - test/data/errors/error_trunc_at_qual.fastq
140
+ - test/data/errors/error_trunc_at_seq.fastq
141
+ - test/data/errors/error_trunc_in_qual.fastq
142
+ - test/data/errors/error_trunc_in_seq.fastq
143
+ - test/data/formats/illumina_full_range_as_illumina.fastq
144
+ - test/data/formats/illumina_full_range_as_sanger.fastq
145
+ - test/data/formats/illumina_full_range_as_solexa.fastq
146
+ - test/data/formats/illumina_full_range_original_illumina.fastq
147
+ - test/data/formats/longreads_as_illumina.fastq
148
+ - test/data/formats/longreads_as_sanger.fastq
149
+ - test/data/formats/longreads_as_solexa.fastq
150
+ - test/data/formats/misc_dna_as_illumina.fastq
151
+ - test/data/formats/misc_dna_as_sanger.fastq
152
+ - test/data/formats/misc_dna_as_solexa.fastq
153
+ - test/data/formats/misc_dna_original_sanger.fastq
154
+ - test/data/formats/misc_rna_as_illumina.fastq
155
+ - test/data/formats/misc_rna_as_sanger.fastq
156
+ - test/data/formats/misc_rna_as_solexa.fastq
157
+ - test/data/formats/misc_rna_original_sanger.fastq
158
+ - test/data/formats/sanger_full_range_as_illumina.fastq
159
+ - test/data/formats/sanger_full_range_as_sanger.fastq
160
+ - test/data/formats/sanger_full_range_as_solexa.fastq
161
+ - test/data/formats/sanger_full_range_original_sanger.fastq
162
+ - test/data/formats/solexa_full_range_as_illumina.fastq
163
+ - test/data/formats/solexa_full_range_as_sanger.fastq
164
+ - test/data/formats/solexa_full_range_as_solexa.fastq
165
+ - test/data/formats/solexa_full_range_original_solexa.fastq
166
+ - test/data/formats/wrapping_as_illumina.fastq
167
+ - test/data/formats/wrapping_as_sanger.fastq
168
+ - test/data/formats/wrapping_as_solexa.fastq
108
169
  homepage: http://github.com/fstrozzi/bioruby-faster
109
170
  licenses:
110
171
  - MIT
@@ -117,7 +178,10 @@ required_ruby_version: !ruby/object:Gem::Requirement
117
178
  requirements:
118
179
  - - ! '>='
119
180
  - !ruby/object:Gem::Version
120
- version: '1.9'
181
+ version: '0'
182
+ segments:
183
+ - 0
184
+ hash: 4125702484128053207
121
185
  required_rubygems_version: !ruby/object:Gem::Requirement
122
186
  none: false
123
187
  requirements:
@@ -129,5 +193,5 @@ rubyforge_project:
129
193
  rubygems_version: 1.8.15
130
194
  signing_key:
131
195
  specification_version: 3
132
- summary: A fast parser for Fasta and FastQ files
196
+ summary: A fast parser for FastQ files
133
197
  test_files: []
@@ -1,7 +0,0 @@
1
- require 'mkmf'
2
- extension_name = "faster"
3
- have_library("z")
4
- create_makefile(extension_name)
5
-
6
-
7
-
data/ext/kseq.h DELETED
@@ -1,223 +0,0 @@
1
- /* The MIT License
2
-
3
- Copyright (c) 2008 Genome Research Ltd (GRL).
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining
6
- a copy of this software and associated documentation files (the
7
- "Software"), to deal in the Software without restriction, including
8
- without limitation the rights to use, copy, modify, merge, publish,
9
- distribute, sublicense, and/or sell copies of the Software, and to
10
- permit persons to whom the Software is furnished to do so, subject to
11
- the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be
14
- included in all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
- SOFTWARE.
24
- */
25
-
26
- /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
-
28
- /* Last Modified: 12APR2009 */
29
-
30
- #ifndef AC_KSEQ_H
31
- #define AC_KSEQ_H
32
-
33
- #include <ctype.h>
34
- #include <string.h>
35
- #include <stdlib.h>
36
-
37
- #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
38
- #define KS_SEP_TAB 1 // isspace() && !' '
39
- #define KS_SEP_MAX 1
40
-
41
- #define __KS_TYPE(type_t) \
42
- typedef struct __kstream_t { \
43
- char *buf; \
44
- int begin, end, is_eof; \
45
- type_t f; \
46
- } kstream_t;
47
-
48
- #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
49
- #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
50
-
51
- #define __KS_BASIC(type_t, __bufsize) \
52
- static inline kstream_t *ks_init(type_t f) \
53
- { \
54
- kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
55
- ks->f = f; \
56
- ks->buf = (char*)malloc(__bufsize); \
57
- return ks; \
58
- } \
59
- static inline void ks_destroy(kstream_t *ks) \
60
- { \
61
- if (ks) { \
62
- free(ks->buf); \
63
- free(ks); \
64
- } \
65
- }
66
-
67
- #define __KS_GETC(__read, __bufsize) \
68
- static inline int ks_getc(kstream_t *ks) \
69
- { \
70
- if (ks->is_eof && ks->begin >= ks->end) return -1; \
71
- if (ks->begin >= ks->end) { \
72
- ks->begin = 0; \
73
- ks->end = __read(ks->f, ks->buf, __bufsize); \
74
- if (ks->end < __bufsize) ks->is_eof = 1; \
75
- if (ks->end == 0) return -1; \
76
- } \
77
- return (int)ks->buf[ks->begin++]; \
78
- }
79
-
80
- #ifndef KSTRING_T
81
- #define KSTRING_T kstring_t
82
- typedef struct __kstring_t {
83
- size_t l, m;
84
- char *s;
85
- } kstring_t;
86
- #endif
87
-
88
- #ifndef kroundup32
89
- #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
90
- #endif
91
-
92
- #define __KS_GETUNTIL(__read, __bufsize) \
93
- static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
94
- { \
95
- if (dret) *dret = 0; \
96
- str->l = 0; \
97
- if (ks->begin >= ks->end && ks->is_eof) return -1; \
98
- for (;;) { \
99
- int i; \
100
- if (ks->begin >= ks->end) { \
101
- if (!ks->is_eof) { \
102
- ks->begin = 0; \
103
- ks->end = __read(ks->f, ks->buf, __bufsize); \
104
- if (ks->end < __bufsize) ks->is_eof = 1; \
105
- if (ks->end == 0) break; \
106
- } else break; \
107
- } \
108
- if (delimiter > KS_SEP_MAX) { \
109
- for (i = ks->begin; i < ks->end; ++i) \
110
- if (ks->buf[i] == delimiter) break; \
111
- } else if (delimiter == KS_SEP_SPACE) { \
112
- for (i = ks->begin; i < ks->end; ++i) \
113
- if (isspace(ks->buf[i])) break; \
114
- } else if (delimiter == KS_SEP_TAB) { \
115
- for (i = ks->begin; i < ks->end; ++i) \
116
- if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
117
- } else i = 0; /* never come to here! */ \
118
- if (str->m - str->l < i - ks->begin + 1) { \
119
- str->m = str->l + (i - ks->begin) + 1; \
120
- kroundup32(str->m); \
121
- str->s = (char*)realloc(str->s, str->m); \
122
- } \
123
- memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
124
- str->l = str->l + (i - ks->begin); \
125
- ks->begin = i + 1; \
126
- if (i < ks->end) { \
127
- if (dret) *dret = ks->buf[i]; \
128
- break; \
129
- } \
130
- } \
131
- if (str->l == 0) { \
132
- str->m = 1; \
133
- str->s = (char*)calloc(1, 1); \
134
- } \
135
- str->s[str->l] = '\0'; \
136
- return str->l; \
137
- }
138
-
139
- #define KSTREAM_INIT(type_t, __read, __bufsize) \
140
- __KS_TYPE(type_t) \
141
- __KS_BASIC(type_t, __bufsize) \
142
- __KS_GETC(__read, __bufsize) \
143
- __KS_GETUNTIL(__read, __bufsize)
144
-
145
- #define __KSEQ_BASIC(type_t) \
146
- static inline kseq_t *kseq_init(type_t fd) \
147
- { \
148
- kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
149
- s->f = ks_init(fd); \
150
- return s; \
151
- } \
152
- static inline void kseq_rewind(kseq_t *ks) \
153
- { \
154
- ks->last_char = 0; \
155
- ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
156
- } \
157
- static inline void kseq_destroy(kseq_t *ks) \
158
- { \
159
- if (!ks) return; \
160
- free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
161
- ks_destroy(ks->f); \
162
- free(ks); \
163
- }
164
-
165
- /* Return value:
166
- >=0 length of the sequence (normal)
167
- -1 end-of-file
168
- -2 truncated quality string
169
- */
170
- #define __KSEQ_READ \
171
- static int kseq_read(kseq_t *seq) \
172
- { \
173
- int c; \
174
- kstream_t *ks = seq->f; \
175
- if (seq->last_char == 0) { /* then jump to the next header line */ \
176
- while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
177
- if (c == -1) return -1; /* end of file */ \
178
- seq->last_char = c; \
179
- } /* the first header char has been read */ \
180
- seq->comment.l = seq->seq.l = seq->qual.l = 0; \
181
- if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
182
- if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
183
- while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
184
- if (isgraph(c)) { /* printable non-space character */ \
185
- if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
186
- seq->seq.m = seq->seq.l + 2; \
187
- kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
188
- seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
189
- } \
190
- seq->seq.s[seq->seq.l++] = (char)c; \
191
- } \
192
- } \
193
- if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
194
- seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
195
- if (c != '+') return seq->seq.l; /* FASTA */ \
196
- if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
197
- seq->qual.m = seq->seq.m; \
198
- seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
199
- } \
200
- while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
201
- if (c == -1) return -2; /* we should not stop here */ \
202
- while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
203
- if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
204
- seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
205
- seq->last_char = 0; /* we have not come to the next header line */ \
206
- if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
207
- return seq->seq.l; \
208
- }
209
-
210
- #define __KSEQ_TYPE(type_t) \
211
- typedef struct { \
212
- kstring_t name, comment, seq, qual; \
213
- int last_char; \
214
- kstream_t *f; \
215
- } kseq_t;
216
-
217
- #define KSEQ_INIT(type_t, __read) \
218
- KSTREAM_INIT(type_t, __read, 4096) \
219
- __KSEQ_TYPE(type_t) \
220
- __KSEQ_BASIC(type_t) \
221
- __KSEQ_READ
222
-
223
- #endif