biosyntax 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d25bb3d93262912c86faa5bc27143a5d4f1f513aa66f728543d07fec0bc5221
4
- data.tar.gz: bc548ee7509ea154df6126bacf794233d8c2d523b53afe0dafe2ddb195e87e48
3
+ metadata.gz: '08a0ce98e7f92f7c08a03c56ebcd37fee25ca4bfd8717bc6b6a0b4873d6ad609'
4
+ data.tar.gz: 7fb63a24759d4f1a095b5b33a30437c857f73aad4e6f5a0cdbd45ed75ccb1590
5
5
  SHA512:
6
- metadata.gz: 5ab4fb91d4c75df89b737fcd1354b16637c1c631f8e87950cd1c15532041d6f9dc1e382ad0b90f2efb6ae34cf9013f790808797b31d33fd4d23ad246417ecc5c
7
- data.tar.gz: 4cbd7875d8ca86d40c9d21ddc00c662e8bc5319cb02862ae55c10fe5e77b005a794793ee7763bf515c9c7a8aa2ec692a7026aebe5d19ee503a27887f95bc4552
6
+ metadata.gz: 2862c64ea68a56625424b382e4069e02b775a1085652a29e6f60da9d52d05415e30a461beeb86789a815a7a22a2a8598d17744da92e229404da0dc9f130fc06c
7
+ data.tar.gz: 61963733732c0acf300ef3edb168de21246842001112eb49dfa71c4ef7ff08a780cede0d36c64488716a767b97f1fdd5d3fe38d08aa41c5fe9c8386cb31b9199
data/README.md CHANGED
@@ -1,7 +1,10 @@
1
1
  # ruby-biosyntax
2
2
 
3
3
  [![CI](https://github.com/kojix2/ruby-biosyntax/actions/workflows/ci.yml/badge.svg)](https://github.com/kojix2/ruby-biosyntax/actions/workflows/ci.yml)
4
+ [![Gem Version](https://badge.fury.io/rb/biosyntax.svg)](https://badge.fury.io/rb/biosyntax)
4
5
  [![Lines of Code](https://img.shields.io/endpoint?url=https%3A%2F%2Ftokei.kojix2.net%2Fbadge%2Fgithub%2Fkojix2%2Fruby-biosyntax%2Flines)](https://tokei.kojix2.net/github/kojix2/ruby-biosyntax)
6
+ [![DOI](https://zenodo.org/badge/1269890086.svg)](https://doi.org/10.5281/zenodo.20698478)
7
+
5
8
 
6
9
 
7
10
  :dna: [bioSyntax](https://github.com/bioSyntax/bioSyntax) - Syntax highlighting for biological data formats - for Ruby.
@@ -98,20 +101,18 @@ BioSyntax.guess_format("a.vcf.gz") # :vcf
98
101
  The metadata is generated from `libbiosyntax` at load time. The Ruby side does
99
102
  not maintain a separate hand-written table of formats or kinds.
100
103
 
101
- ## Examples
104
+ ## Command line
102
105
 
103
- This gem does not install a CLI. See `examples/` for small scripts:
106
+ Installing the gem also installs `biocat`:
104
107
 
105
108
  ```sh
106
- ruby examples/bcat.rb sample.vcf
107
- ruby examples/bcat.rb -l fastq reads.fastq
108
- ruby examples/bcat.rb -l
109
- ruby examples/inspect_spans.rb sample.vcf
109
+ biocat sample.vcf
110
+ biocat --format fastq reads.fastq
111
+ biocat -l
110
112
  ```
111
113
 
112
- `bcat.rb` guesses the format from the file name when possible. Use `-l` /
113
- `--language` to pass a format explicitly. Calling `-l` without an argument
114
- prints the supported format names.
114
+ `.gz`/`.bgz` are decompressed automatically. BAM/CRAM/BCF require optional
115
+ `ruby-htslib` (`gem install htslib`).
115
116
 
116
117
  ## Development tasks
117
118
 
data/exe/biocat ADDED
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'optparse'
5
+ require 'zlib'
6
+ require 'biosyntax'
7
+
8
+ options = { format: nil, list_formats: false }
9
+
10
+ BINARY_FORMATS = {
11
+ '.bam' => :sam,
12
+ '.cram' => :sam,
13
+ '.bcf' => :vcf
14
+ }.freeze
15
+
16
+ def binary_format_for(path, requested_format)
17
+ unless path == '-'
18
+ ext = File.extname(path).downcase
19
+ return BINARY_FORMATS[ext] if BINARY_FORMATS.key?(ext)
20
+ end
21
+
22
+ case requested_format&.to_s&.downcase
23
+ when 'bam', 'cram' then :sam
24
+ when 'bcf' then :vcf
25
+ end
26
+ end
27
+
28
+ def require_htslib!
29
+ require 'htslib'
30
+ rescue LoadError => e
31
+ warn "[biocat] ruby-htslib is required to read BAM/CRAM/BCF files: #{e.message}"
32
+ warn '[biocat] install it with `gem install htslib`'
33
+ exit 1
34
+ end
35
+
36
+ def emit_colored(text, highlighter)
37
+ text.each_line do |line|
38
+ print(highlighter ? highlighter.colorize(line) : line)
39
+ end
40
+ end
41
+
42
+ def emit_record(record, highlighter)
43
+ line = record.to_s
44
+ line = "#{line}\n" unless line.end_with?("\n")
45
+ emit_colored(line, highlighter)
46
+ end
47
+
48
+ def emit_htslib_file(path, format, highlighter)
49
+ require_htslib!
50
+
51
+ case format
52
+ when :sam
53
+ HTS::Bam.open(path) do |bam|
54
+ emit_colored(bam.header.to_s, highlighter)
55
+ bam.each { |record| emit_record(record, highlighter) }
56
+ end
57
+ when :vcf
58
+ HTS::Bcf.open(path) do |bcf|
59
+ emit_colored(bcf.header.to_s, highlighter)
60
+ bcf.each { |record| emit_record(record, highlighter) }
61
+ end
62
+ end
63
+ end
64
+
65
+ parser = OptionParser.new do |opts|
66
+ opts.banner = 'usage: biocat [options] [FILE ...]'
67
+
68
+ opts.on('-f', '--format FORMAT', 'Highlight as FORMAT') do |format|
69
+ options[:format] = format
70
+ end
71
+
72
+ opts.on('-l', '--list-formats', 'Print supported format names') do
73
+ options[:list_formats] = true
74
+ end
75
+
76
+ opts.on('-h', '--help', 'Print this help') do
77
+ puts opts
78
+ exit
79
+ end
80
+ end
81
+
82
+ begin
83
+ parser.parse!
84
+
85
+ if options[:list_formats]
86
+ puts BioSyntax::FORMAT_NAMES.join("\n")
87
+ exit
88
+ end
89
+
90
+ paths = ARGV.empty? ? ['-'] : ARGV
91
+
92
+ paths.each do |path|
93
+ binary_format = binary_format_for(path, options[:format])
94
+ format = binary_format || options[:format]
95
+ format ||= BioSyntax.guess_format(path) unless path == '-'
96
+
97
+ highlighter = format ? BioSyntax[format] : nil
98
+ if binary_format
99
+ emit_htslib_file(path, binary_format, highlighter)
100
+ next
101
+ end
102
+
103
+ input =
104
+ if path == '-'
105
+ $stdin
106
+ elsif path.downcase.end_with?('.gz', '.bgz')
107
+ Zlib::GzipReader.open(path)
108
+ else
109
+ File.open(path, 'rb')
110
+ end
111
+
112
+ begin
113
+ emit_colored(input, highlighter)
114
+ ensure
115
+ input.close unless path == '-'
116
+ end
117
+ end
118
+ rescue OptionParser::ParseError, BioSyntax::Error => e
119
+ warn "[biocat] #{e.message}"
120
+ warn parser
121
+ exit 2
122
+ rescue SystemCallError => e
123
+ warn "[biocat] #{e.message}"
124
+ exit 1
125
+ rescue Zlib::GzipFile::Error => e
126
+ warn "[biocat] #{e.message}"
127
+ exit 1
128
+ rescue StandardError => e
129
+ warn "[biocat] #{e.message}"
130
+ exit 1
131
+ end
@@ -1,5 +1,5 @@
1
1
  module BioSyntax
2
2
  # Ruby gem version.
3
3
  # @return [String]
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.1'
5
5
  end
data/lib/biosyntax.rb CHANGED
@@ -436,8 +436,8 @@ module BioSyntax
436
436
  else
437
437
  name = value.to_s.downcase
438
438
  FORMATS[name.to_sym] ||
439
- FORMATS[name.tr('_', '-').to_sym] ||
440
- FORMATS_BY_ID[Native.format_id_from_name(name)]
439
+ FORMATS[name.tr('_', '-').to_sym] ||
440
+ FORMATS_BY_ID[Native.format_id_from_name(name)]
441
441
  end
442
442
 
443
443
  return found if found
metadata CHANGED
@@ -1,22 +1,24 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biosyntax
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
- bindir: bin
8
+ bindir: exe
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  email: 2xijok@gmail.com
13
- executables: []
13
+ executables:
14
+ - biocat
14
15
  extensions:
15
16
  - ext/biosyntax/extconf.rb
16
17
  extra_rdoc_files: []
17
18
  files:
18
19
  - LICENSE.md
19
20
  - README.md
21
+ - exe/biocat
20
22
  - ext/biosyntax/biosyntax.c
21
23
  - ext/biosyntax/biosyntax.h
22
24
  - ext/biosyntax/biosyntax_ext.c