ciika 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9158850d1c86f75286d6904cab6bdb02ba7fc992
4
+ data.tar.gz: 816c70213f0fd3e47c9d431e05d557c6e15f9fcf
5
+ SHA512:
6
+ metadata.gz: 1ee73fabbed29b2694e1bdf04fdde46197522909e79aca21d3997af9a9fef45f0371984ae9cc10e1213de708e30449937babcac87744177f9b857d3c00a5ed1e
7
+ data.tar.gz: 9feee3d7a1d1e3fd1195865405223be996558b5c77f4d70dd287084369161be3cae1d038306c1b3b6aec87e84fadea444c439650327e6e4c0b900c4779d4310c
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ vendor/bundle
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.1
4
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in ciika.gemspec
4
+ gemspec
@@ -0,0 +1,35 @@
1
+ # Ciika
2
+
3
+ [![Build Status](https://secure.travis-ci.org/inutano/ciika.png)](http://travis-ci.org/inutano/ciika)
4
+
5
+ Ciika is utility tool to handle metadata of public NGSeq repository.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'ciika'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install ciika
22
+
23
+ ## Usage
24
+
25
+ Parse Sequence Read Archive metadata xml
26
+
27
+ ```ruby
28
+ xml = "/path/to/DRA000001.experiment.xml"
29
+ data = Ciika::SRA::Experiment.new(xml).parse
30
+ ```
31
+
32
+ ## Contributing
33
+
34
+ Bug reports and pull requests are welcome on GitHub at https://github.com/inutano/ciika.
35
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "ciika"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require "pry"
11
+ Pry.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'ciika/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "ciika"
8
+ spec.version = Ciika::VERSION
9
+ spec.authors = ["Tazro Inutano Ohta"]
10
+ spec.email = ["inutano@gmail.com"]
11
+
12
+ spec.summary = %q{A ruby parser for public DNA sequence database metadata}
13
+ spec.description = %q{Ciika is a set of utils which helps to handle metadata from Sequence Read Archive, BioProject, BioSample, GEO, ArrayExpress, etc.}
14
+ spec.homepage = "http://github.com/inutano/ciika"
15
+
16
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
17
+ # delete this section to allow pushing this gem to any host.
18
+ if spec.respond_to?(:metadata)
19
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
20
+ else
21
+ raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
22
+ end
23
+
24
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ spec.add_development_dependency "bundler", "~> 1.10"
30
+ spec.add_development_dependency "rake", "~> 10.0"
31
+ spec.add_development_dependency "rspec"
32
+ spec.add_development_dependency "pry"
33
+ spec.add_dependency "thor"
34
+ spec.add_dependency "nokogiri"
35
+ end
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "ciika"
4
+ Ciika::CLI.start
@@ -0,0 +1,12 @@
1
+ require "ciika/sra/run"
2
+ require "ciika/sra/sample"
3
+ require "ciika/sra/experiment"
4
+ require "ciika/sra/study"
5
+ require "ciika/sra/submission"
6
+ require "ciika/sra"
7
+ require "ciika/cli"
8
+ require "ciika/version"
9
+
10
+ module Ciika
11
+ # Your code goes here...
12
+ end
@@ -0,0 +1,12 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'thor'
4
+
5
+ module Ciika
6
+ class CLI < Thor
7
+ desc "retrieve [db] [id]", "Retrieve metadata from db with id in json format"
8
+ def retrieve(db, id)
9
+ puts "Got information from #{db} with identifier #{id}"
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,19 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'nokogiri'
4
+
5
+ module Ciika
6
+ module SRA
7
+ module_function
8
+ def id_selector(type, xml, id)
9
+ # return an array of nokogiri objects
10
+ dataset = Nokogiri::XML(open(xml)).css(type)
11
+ case id
12
+ when :all
13
+ dataset
14
+ else
15
+ dataset.select{|n| n.attr("accession") =~ /#{id}/ }
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,90 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Ciika
4
+ module SRA
5
+ class Experiment
6
+ def initialize(xml, id: :all)
7
+ @experimentset = Ciika::SRA::id_selector("EXPERIMENT", xml, id)
8
+ raise NameError, "ID not found" if @studyset == []
9
+ end
10
+
11
+ def parse
12
+ @experimentset.map do |experiment|
13
+ layout = experiment.css("LIBRARY_LAYOUT").first.children[1]
14
+ {
15
+ accession: experiment.attr("accession").to_s,
16
+ alias: experiment.attr("alias").to_s,
17
+ center_name: experiment.attr("center_name").to_s,
18
+ title: experiment.css("TITLE").inner_text,
19
+ study_accession: experiment.css("STUDY_REF").attr("accession").to_s,
20
+ study_refname: experiment.css("STUDY_REF").attr("refname").to_s,
21
+ design_description: experiment.css("DESIGN_DESCRIPTION").inner_text,
22
+ sample_accession: experiment.css("SAMPLE_DESCRIPTOR").first.attr("accession").to_s,
23
+ sample_refname: experiment.css("SAMPLE_DESCRIPTOR").first.attr("refname").to_s,
24
+
25
+ library_description: {
26
+ library_name: experiment.css("LIBRARY_NAME").inner_text,
27
+ library_strategy: experiment.css("LIBRARY_STRATEGY").inner_text,
28
+ library_source: experiment.css("LIBRARY_SOURCE").inner_text,
29
+ library_selection: experiment.css("LIBRARY_SELECTION").inner_text,
30
+ library_layout: experiment.css("LIBRARY_LAYOUT").first.children[1].name,
31
+ library_orientation: layout.attr("ORIENTATION").to_s,
32
+ library_nominal_length: layout.attr("NOMINAL_LENGTH").to_s,
33
+ library_nominal_sdev: layout.attr("NOMINAL_SDEV").to_s,
34
+ library_construction_protocol: experiment.css("LIBRARY_CONSTRUCTION_PROTOCOL").inner_text,
35
+ },
36
+
37
+ platform_information: {
38
+ platform: experiment.css("PLATFORM").first.children[1].name,
39
+ instrument_model: experiment.css("INSTRUMENT_MODEL").inner_text,
40
+ cycle_sequence: experiment.css("CYCLE_SEQUENCE").inner_text,
41
+ cycle_count: experiment.css("CYCLE_COUNT").inner_text,
42
+ flow_sequence: experiment.css("FLOW_SEQUENCE").inner_text,
43
+ flow_count: experiment.css("FLOW_COUNT").inner_text,
44
+ key_sequence: experiment.css("KEY_SEQUENCE").inner_text,
45
+ },
46
+
47
+ processing_information: {
48
+ base_calls: {
49
+ sequence_space: experiment.css("SEQUENCE_SPACE").inner_text,
50
+ base_caller: experiment.css("BASE_CALLER").inner_text,
51
+ },
52
+
53
+ quality_scores: experiment.css("QUALITY_SCORES").map{|node|
54
+ {
55
+ quality_type: node.attr("qtype").to_s,
56
+ quality_scorer: node.css("QUALITY_SCORER").inner_text,
57
+ number_of_level: node.css("NUMBER_OF_LEVELS").inner_text,
58
+ multiplier: node.css("MULTIPLIER").inner_text
59
+ }
60
+ },
61
+
62
+ pipe_section: experiment.css("PIPE_SECTION").map{|node|
63
+ {
64
+ step_index: node.css("STEP_INDEX").inner_text,
65
+ prev_step_index: node.css("PREV_STEP_INDEX").inner_text,
66
+ program: node.css("PROGRAM").inner_text,
67
+ version: node.css("VERSION").inner_text,
68
+ }
69
+ },
70
+ },
71
+
72
+ spot_information: {
73
+ number_of_reads_per_spot: experiment.css("NUMBER_OF_READS_PER_SPOT").inner_text,
74
+ spot_length: experiment.css("SPOT_LENGTH").inner_text,
75
+ },
76
+
77
+ read_spec: experiment.css("READ_SPEC").map{|node|
78
+ {
79
+ read_index: node.css("READ_INDEX").inner_text,
80
+ read_class: node.css("READ_CLASS").inner_text,
81
+ read_type: node.css("READ_TYPE").inner_text,
82
+ base_coord: node.css("BASE_COORD").inner_text,
83
+ }
84
+ },
85
+ }
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,56 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Ciika
4
+ module SRA
5
+ class Run
6
+ def initialize(xml, id: :all)
7
+ @runset = Ciika::SRA::id_selector("RUN", xml, id)
8
+ raise NameError, "ID not found" if @runset == []
9
+ end
10
+
11
+ def parse
12
+ @runset.map do |run|
13
+ {
14
+ accession: run.attr("accession").to_s,
15
+ alias: run.attr("alias").to_s,
16
+ center_name: run.attr("center_name").to_s,
17
+ run_center: run.attr("run_center").to_s,
18
+ run_date: run.attr("run_date").to_s,
19
+ instrument_name: run.attr("instrument_name").to_s,
20
+ total_data_blocks: run.attr("total_data_blocks").to_s,
21
+
22
+ pipeline: run.css("PIPE_SECTION").map{|node|
23
+ {
24
+ section_name: node.attr("section_name").to_s,
25
+ step_index: node.css("STEP_INDEX").inner_text,
26
+ prev_step_index: node.css("PREV_STEP_INDEX").inner_text,
27
+ program: node.css("PROGRAM").inner_text,
28
+ version: node.css("VERSION").inner_text,
29
+ }
30
+ },
31
+
32
+ spot_information: {
33
+ number_of_reads_per_spot: run.css("NUMBER_OF_READS_PER_SPOT").inner_text,
34
+ spot_length: run.css("SPOT_LENGTH").inner_text,
35
+ read_spec: run.css("READ_SPEC").map{|node|
36
+ {
37
+ read_index: node.css("READ_INDEX").inner_text,
38
+ read_class: node.css("READ_CLASS").inner_text,
39
+ read_type: node.css("READ_TYPE").inner_text,
40
+ base_coord: node.css("BASE_COORD").inner_text,
41
+ }
42
+ },
43
+ },
44
+
45
+ run_attr: run.css("RUN_ATTRIBUTE").map{|node|
46
+ {
47
+ tag: node.css("TAG").inner_text,
48
+ value: node.css("VALUE").inner_text,
49
+ }
50
+ }
51
+ }
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,47 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Ciika
4
+ module SRA
5
+ class Sample
6
+ def initialize(xml, id: :all)
7
+ @sampleset = Ciika::SRA::id_selector("SAMPLE", xml, id)
8
+ raise NameError, "ID not found" if @sampleset == []
9
+ end
10
+
11
+ def parse
12
+ @sampleset.map do |sample|
13
+ {
14
+ accession: sample.attr("accession").to_s,
15
+ alias: sample.attr("alias").to_s,
16
+ title: sample.css("TITLE").inner_text,
17
+ sample_description: sample.css("DESCRIPTION").inner_text,
18
+
19
+ organism_information: {
20
+ taxon_id: sample.css("TAXON_ID").inner_text,
21
+ common_name: sample.css("COMMON_NAME").inner_text,
22
+ scientific_name: sample.css("SCIENTIFIC_NAME").inner_text,
23
+ anonymized_name: sample.css("ANONYMIZED_NAME").inner_text,
24
+ individual_name: sample.css("INDIVIDUAL_NAME").inner_text,
25
+ },
26
+
27
+ sample_links: {
28
+ url_link: sample.css("URL_LINK").map{|node|
29
+ {
30
+ label: node.css("LABEL").inner_text,
31
+ url: node.css("URL").inner_text,
32
+ }
33
+ },
34
+
35
+ entrez_link: sample.css("ENTREZ_LINK").map{|node|
36
+ {
37
+ db: node.css("DB").inner_text,
38
+ id: node.css("ID").inner_text,
39
+ }
40
+ },
41
+ },
42
+ }
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,49 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Ciika
4
+ module SRA
5
+ class Study
6
+ def initialize(xml, id: :all)
7
+ @studyset = Ciika::SRA::id_selector("STUDY", xml, id)
8
+ raise NameError, "ID not found" if @studyset == []
9
+ end
10
+
11
+ def parse
12
+ @studyset.map do |study|
13
+ {
14
+ accession: study.attr("accession").to_s,
15
+ alias: study.attr("alias").to_s,
16
+ center_name: study.attr("center_name").to_s,
17
+ center_project_name: study.css("CENTER_PROJECT_NAME").inner_text,
18
+ study_title: study.css("STUDY_TITLE").inner_text,
19
+ study_type: study.css("STUDY_TYPE").attr("existing_study_type").to_s,
20
+ study_abstract: study.css("STUDY_ABSTRACT").inner_text,
21
+ study_description: study.css("STUDY_DESCRIPTION").inner_text,
22
+
23
+ url_link: study.css("URL_LINK").map{|node|
24
+ {
25
+ label: node.css("LABEL").inner_text,
26
+ url: node.css("URL").inner_text
27
+ }
28
+ },
29
+
30
+ entrez_link: study.css("ENTREZ_LINK").map{|node|
31
+ {
32
+ db: node.css("DB").inner_text,
33
+ id: node.css("ID").inner_text
34
+ }
35
+ },
36
+
37
+ related_link: study.css("RELATED_LINK").map{|node|
38
+ {
39
+ db: node.css("DB").inner_text,
40
+ id: node.css("ID").inner_text,
41
+ label: node.css("LABEL").inner_text
42
+ }
43
+ },
44
+ }
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,25 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Ciika
4
+ module SRA
5
+ class Submission
6
+ def initialize(xml, id: :all)
7
+ @submissionset = Ciika::SRA::id_selector("SUBMISSION", xml, id)
8
+ raise NameError, "ID not found" if @submission == []
9
+ end
10
+
11
+ def parse
12
+ @submissionset.map do |submission|
13
+ {
14
+ alias: submission.attr("alias").to_s,
15
+ accession: submission.attr("accession").to_s,
16
+ submission_comment: submission.attr("submission_comment").to_s,
17
+ center_name: submission.attr("center_name").to_s,
18
+ lab_name: submission.attr("lab_name").to_s,
19
+ submission_date: submission.attr("submission_date").to_s
20
+ }
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module Ciika
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ciika
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Tazro Inutano Ohta
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: thor
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: nokogiri
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Ciika is a set of utils which helps to handle metadata from Sequence
98
+ Read Archive, BioProject, BioSample, GEO, ArrayExpress, etc.
99
+ email:
100
+ - inutano@gmail.com
101
+ executables:
102
+ - ciika
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - ".gitignore"
107
+ - ".rspec"
108
+ - ".travis.yml"
109
+ - Gemfile
110
+ - README.md
111
+ - Rakefile
112
+ - bin/console
113
+ - bin/setup
114
+ - ciika.gemspec
115
+ - exe/ciika
116
+ - lib/ciika.rb
117
+ - lib/ciika/cli.rb
118
+ - lib/ciika/sra.rb
119
+ - lib/ciika/sra/experiment.rb
120
+ - lib/ciika/sra/run.rb
121
+ - lib/ciika/sra/sample.rb
122
+ - lib/ciika/sra/study.rb
123
+ - lib/ciika/sra/submission.rb
124
+ - lib/ciika/version.rb
125
+ homepage: http://github.com/inutano/ciika
126
+ licenses: []
127
+ metadata:
128
+ allowed_push_host: https://rubygems.org
129
+ post_install_message:
130
+ rdoc_options: []
131
+ require_paths:
132
+ - lib
133
+ required_ruby_version: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ required_rubygems_version: !ruby/object:Gem::Requirement
139
+ requirements:
140
+ - - ">="
141
+ - !ruby/object:Gem::Version
142
+ version: '0'
143
+ requirements: []
144
+ rubyforge_project:
145
+ rubygems_version: 2.4.5
146
+ signing_key:
147
+ specification_version: 4
148
+ summary: A ruby parser for public DNA sequence database metadata
149
+ test_files: []