biosyntax 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -9
- data/exe/biocat +131 -0
- data/lib/biosyntax/version.rb +1 -1
- data/lib/biosyntax.rb +2 -2
- metadata +5 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '08a0ce98e7f92f7c08a03c56ebcd37fee25ca4bfd8717bc6b6a0b4873d6ad609'
|
|
4
|
+
data.tar.gz: 7fb63a24759d4f1a095b5b33a30437c857f73aad4e6f5a0cdbd45ed75ccb1590
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2862c64ea68a56625424b382e4069e02b775a1085652a29e6f60da9d52d05415e30a461beeb86789a815a7a22a2a8598d17744da92e229404da0dc9f130fc06c
|
|
7
|
+
data.tar.gz: 61963733732c0acf300ef3edb168de21246842001112eb49dfa71c4ef7ff08a780cede0d36c64488716a767b97f1fdd5d3fe38d08aa41c5fe9c8386cb31b9199
|
data/README.md
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
# ruby-biosyntax
|
|
2
2
|
|
|
3
3
|
[](https://github.com/kojix2/ruby-biosyntax/actions/workflows/ci.yml)
|
|
4
|
+
[](https://badge.fury.io/rb/biosyntax)
|
|
4
5
|
[](https://tokei.kojix2.net/github/kojix2/ruby-biosyntax)
|
|
6
|
+
[](https://doi.org/10.5281/zenodo.20698478)
|
|
7
|
+
|
|
5
8
|
|
|
6
9
|
|
|
7
10
|
:dna: [bioSyntax](https://github.com/bioSyntax/bioSyntax) - Syntax highlighting for biological data formats - for Ruby.
|
|
@@ -98,20 +101,18 @@ BioSyntax.guess_format("a.vcf.gz") # :vcf
|
|
|
98
101
|
The metadata is generated from `libbiosyntax` at load time. The Ruby side does
|
|
99
102
|
not maintain a separate hand-written table of formats or kinds.
|
|
100
103
|
|
|
101
|
-
##
|
|
104
|
+
## Command line
|
|
102
105
|
|
|
103
|
-
|
|
106
|
+
Installing the gem also installs `biocat`:
|
|
104
107
|
|
|
105
108
|
```sh
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
ruby examples/inspect_spans.rb sample.vcf
|
|
109
|
+
biocat sample.vcf
|
|
110
|
+
biocat --format fastq reads.fastq
|
|
111
|
+
biocat -l
|
|
110
112
|
```
|
|
111
113
|
|
|
112
|
-
`
|
|
113
|
-
|
|
114
|
-
prints the supported format names.
|
|
114
|
+
`.gz`/`.bgz` are decompressed automatically. BAM/CRAM/BCF require optional
|
|
115
|
+
`ruby-htslib` (`gem install htslib`).
|
|
115
116
|
|
|
116
117
|
## Development tasks
|
|
117
118
|
|
data/exe/biocat
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'optparse'
|
|
5
|
+
require 'zlib'
|
|
6
|
+
require 'biosyntax'
|
|
7
|
+
|
|
8
|
+
options = { format: nil, list_formats: false }
|
|
9
|
+
|
|
10
|
+
BINARY_FORMATS = {
|
|
11
|
+
'.bam' => :sam,
|
|
12
|
+
'.cram' => :sam,
|
|
13
|
+
'.bcf' => :vcf
|
|
14
|
+
}.freeze
|
|
15
|
+
|
|
16
|
+
def binary_format_for(path, requested_format)
|
|
17
|
+
unless path == '-'
|
|
18
|
+
ext = File.extname(path).downcase
|
|
19
|
+
return BINARY_FORMATS[ext] if BINARY_FORMATS.key?(ext)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
case requested_format&.to_s&.downcase
|
|
23
|
+
when 'bam', 'cram' then :sam
|
|
24
|
+
when 'bcf' then :vcf
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def require_htslib!
|
|
29
|
+
require 'htslib'
|
|
30
|
+
rescue LoadError => e
|
|
31
|
+
warn "[biocat] ruby-htslib is required to read BAM/CRAM/BCF files: #{e.message}"
|
|
32
|
+
warn '[biocat] install it with `gem install htslib`'
|
|
33
|
+
exit 1
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def emit_colored(text, highlighter)
|
|
37
|
+
text.each_line do |line|
|
|
38
|
+
print(highlighter ? highlighter.colorize(line) : line)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def emit_record(record, highlighter)
|
|
43
|
+
line = record.to_s
|
|
44
|
+
line = "#{line}\n" unless line.end_with?("\n")
|
|
45
|
+
emit_colored(line, highlighter)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def emit_htslib_file(path, format, highlighter)
|
|
49
|
+
require_htslib!
|
|
50
|
+
|
|
51
|
+
case format
|
|
52
|
+
when :sam
|
|
53
|
+
HTS::Bam.open(path) do |bam|
|
|
54
|
+
emit_colored(bam.header.to_s, highlighter)
|
|
55
|
+
bam.each { |record| emit_record(record, highlighter) }
|
|
56
|
+
end
|
|
57
|
+
when :vcf
|
|
58
|
+
HTS::Bcf.open(path) do |bcf|
|
|
59
|
+
emit_colored(bcf.header.to_s, highlighter)
|
|
60
|
+
bcf.each { |record| emit_record(record, highlighter) }
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
parser = OptionParser.new do |opts|
|
|
66
|
+
opts.banner = 'usage: biocat [options] [FILE ...]'
|
|
67
|
+
|
|
68
|
+
opts.on('-f', '--format FORMAT', 'Highlight as FORMAT') do |format|
|
|
69
|
+
options[:format] = format
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
opts.on('-l', '--list-formats', 'Print supported format names') do
|
|
73
|
+
options[:list_formats] = true
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
opts.on('-h', '--help', 'Print this help') do
|
|
77
|
+
puts opts
|
|
78
|
+
exit
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
begin
|
|
83
|
+
parser.parse!
|
|
84
|
+
|
|
85
|
+
if options[:list_formats]
|
|
86
|
+
puts BioSyntax::FORMAT_NAMES.join("\n")
|
|
87
|
+
exit
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
paths = ARGV.empty? ? ['-'] : ARGV
|
|
91
|
+
|
|
92
|
+
paths.each do |path|
|
|
93
|
+
binary_format = binary_format_for(path, options[:format])
|
|
94
|
+
format = binary_format || options[:format]
|
|
95
|
+
format ||= BioSyntax.guess_format(path) unless path == '-'
|
|
96
|
+
|
|
97
|
+
highlighter = format ? BioSyntax[format] : nil
|
|
98
|
+
if binary_format
|
|
99
|
+
emit_htslib_file(path, binary_format, highlighter)
|
|
100
|
+
next
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
input =
|
|
104
|
+
if path == '-'
|
|
105
|
+
$stdin
|
|
106
|
+
elsif path.downcase.end_with?('.gz', '.bgz')
|
|
107
|
+
Zlib::GzipReader.open(path)
|
|
108
|
+
else
|
|
109
|
+
File.open(path, 'rb')
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
begin
|
|
113
|
+
emit_colored(input, highlighter)
|
|
114
|
+
ensure
|
|
115
|
+
input.close unless path == '-'
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
rescue OptionParser::ParseError, BioSyntax::Error => e
|
|
119
|
+
warn "[biocat] #{e.message}"
|
|
120
|
+
warn parser
|
|
121
|
+
exit 2
|
|
122
|
+
rescue SystemCallError => e
|
|
123
|
+
warn "[biocat] #{e.message}"
|
|
124
|
+
exit 1
|
|
125
|
+
rescue Zlib::GzipFile::Error => e
|
|
126
|
+
warn "[biocat] #{e.message}"
|
|
127
|
+
exit 1
|
|
128
|
+
rescue StandardError => e
|
|
129
|
+
warn "[biocat] #{e.message}"
|
|
130
|
+
exit 1
|
|
131
|
+
end
|
data/lib/biosyntax/version.rb
CHANGED
data/lib/biosyntax.rb
CHANGED
|
@@ -436,8 +436,8 @@ module BioSyntax
|
|
|
436
436
|
else
|
|
437
437
|
name = value.to_s.downcase
|
|
438
438
|
FORMATS[name.to_sym] ||
|
|
439
|
-
|
|
440
|
-
|
|
439
|
+
FORMATS[name.tr('_', '-').to_sym] ||
|
|
440
|
+
FORMATS_BY_ID[Native.format_id_from_name(name)]
|
|
441
441
|
end
|
|
442
442
|
|
|
443
443
|
return found if found
|
metadata
CHANGED
|
@@ -1,22 +1,24 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: biosyntax
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kojix2
|
|
8
|
-
bindir:
|
|
8
|
+
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies: []
|
|
12
12
|
email: 2xijok@gmail.com
|
|
13
|
-
executables:
|
|
13
|
+
executables:
|
|
14
|
+
- biocat
|
|
14
15
|
extensions:
|
|
15
16
|
- ext/biosyntax/extconf.rb
|
|
16
17
|
extra_rdoc_files: []
|
|
17
18
|
files:
|
|
18
19
|
- LICENSE.md
|
|
19
20
|
- README.md
|
|
21
|
+
- exe/biocat
|
|
20
22
|
- ext/biosyntax/biosyntax.c
|
|
21
23
|
- ext/biosyntax/biosyntax.h
|
|
22
24
|
- ext/biosyntax/biosyntax_ext.c
|