ezgff 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/README.md +29 -12
- data/lib/ezgff/gffsqlitedb.rb +14 -2
- data/lib/ezgff/version.rb +1 -1
- data/webapi/app/main.py +76 -0
- data/webapi/app/run.py +5 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3dedc8f2f93b8b91983a4f330d7c8194be3bb6ba72c52ded51b11bcea2cc55eb
|
4
|
+
data.tar.gz: 7db8dbc168eb71e79576e7efcfc1da8441848c89b47a916e4e212de727aa1c46
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a75c6fd47cfdddf8a2984462be9f985e775a1e8ada520f9c17581573c6d3db3a3b4d00b6b94d06934ec2d0237bbd287b97203c057244700e73f6aa70bc36b701
|
7
|
+
data.tar.gz: ba88715c8a7855e2b005beb055cff0958f9d7146cb8ac560a3d6660b7daf34c0b5039d8765b19f868fc290828899603f4c9232104bb5440722942426839eba43
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,39 +1,56 @@
|
|
1
|
-
#
|
1
|
+
# ezgff
|
2
2
|
|
3
|
-
## What is
|
3
|
+
## What is ezgff?
|
4
4
|
|
5
5
|
Utilities for GFF3, the genome annotation format. Useful to explore the gene model features.
|
6
6
|
|
7
|
+
|
7
8
|
## Pre-requisites
|
8
9
|
|
9
|
-
*
|
10
|
+
* Sqlite3
|
11
|
+
* Ruby
|
10
12
|
|
11
13
|
## Install
|
12
14
|
|
13
15
|
```bash
|
14
|
-
gem ezgff
|
16
|
+
gem install ezgff
|
15
17
|
```
|
16
18
|
|
17
19
|
## Quick start
|
18
20
|
|
19
|
-
|
21
|
+
ezgff provides the command line interface.
|
22
|
+
|
23
|
+
You need build an ezgff database from the gff3 file first by using 'build' subcommand. Once you built ezgff db, you can search and retrieve data from the database by using 'search' and 'view' subcommands.
|
24
|
+
|
25
|
+
### Build database from GFF3 file.
|
20
26
|
|
21
27
|
```bash
|
22
|
-
ezgff build
|
28
|
+
ezgff build gff3_file
|
23
29
|
```
|
24
30
|
|
25
|
-
|
31
|
+
This command generates gff3_file.ezdb directory which is the ezgff database that will be specified when you use view and search subcommands.
|
26
32
|
|
27
|
-
|
28
|
-
|
33
|
+
### Retrieve GFF3 reacod by ID.
|
34
|
+
|
35
|
+
```
|
36
|
+
ezgff view DB ID
|
29
37
|
```
|
30
38
|
|
31
39
|
```
|
32
|
-
ezgff view
|
40
|
+
ezgff view DB ID --with=ancestors
|
33
41
|
```
|
34
42
|
|
35
|
-
|
43
|
+
GFF lines with the ID are displayed.
|
44
|
+
|
45
|
+
Data can be formated in JSON. Below are examples to work with jq.
|
46
|
+
|
47
|
+
```
|
48
|
+
ezgff view data.ezdb cds-WP_010895901.1 --with=ancestors --format=json |jq
|
49
|
+
```
|
36
50
|
|
51
|
+
More complicated example
|
37
52
|
```
|
38
|
-
|
53
|
+
ezgff view GCF_000009605.1_ASM960v1_genomic.gff.ezdb cds-WP_010895901.1 --with=ancestors --format=json \
|
54
|
+
|jq -r '.gff_records | map(select(.type == "gene"))[0] | [.seqid, .start, .end, .attributes.gene] \
|
55
|
+
|@csv'
|
39
56
|
```
|
data/lib/ezgff/gffsqlitedb.rb
CHANGED
@@ -103,8 +103,20 @@ module Ezgff
|
|
103
103
|
|
104
104
|
sql = "INSERT INTO gff_records (line_num, record, id, parent, seqid, source, type, start, end, score, strand, phase, attributes, attributes_json)
|
105
105
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
106
|
-
values = [
|
107
|
-
|
106
|
+
values = [
|
107
|
+
i, # line number
|
108
|
+
l.chomp, # raw record
|
109
|
+
id, # ID
|
110
|
+
parent, # parent ID
|
111
|
+
a[0], # seqid
|
112
|
+
a[1], # source
|
113
|
+
a[2], # type
|
114
|
+
a[3], # start
|
115
|
+
a[4], # end
|
116
|
+
(a[5] == "." ? nil : a[5]), # score
|
117
|
+
a[6], # strand
|
118
|
+
(a[7] == "." ? nil : a[7]), # phase
|
119
|
+
a[8], # attributes
|
108
120
|
attributes_as_json(l)]
|
109
121
|
sq3_db.execute(sql, values)
|
110
122
|
end
|
data/lib/ezgff/version.rb
CHANGED
data/webapi/app/main.py
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
from fastapi import FastAPI
|
2
|
+
from fastapi import Query, Path
|
3
|
+
from typing import Optional, List
|
4
|
+
import subprocess
|
5
|
+
import sys
|
6
|
+
import json
|
7
|
+
from pydantic import BaseModel, Field
|
8
|
+
from enum import Enum
|
9
|
+
from pydantic.errors import NoneIsNotAllowedError
|
10
|
+
import uvicorn
|
11
|
+
import argparse
|
12
|
+
|
13
|
+
parser = argparse.ArgumentParser()
|
14
|
+
parser.add_argument('-d', '--db', required=True)
|
15
|
+
parser.add_argument('-b', '--bind', default='0.0.0.0')
|
16
|
+
parser.add_argument('-p', '--port', type=int, default=8000)
|
17
|
+
args = parser.parse_args()
|
18
|
+
print(args)
|
19
|
+
ezdb = args.db
|
20
|
+
|
21
|
+
#print(ezdb)
|
22
|
+
#print(args)
|
23
|
+
|
24
|
+
app = FastAPI()
|
25
|
+
|
26
|
+
class OptWith(str, Enum):
|
27
|
+
none = "none"
|
28
|
+
parent = "parent"
|
29
|
+
children = "children"
|
30
|
+
ancestors = "ancestors"
|
31
|
+
descendants = "descendants"
|
32
|
+
|
33
|
+
class GffColumnStrand(str, Enum):
|
34
|
+
plus = "+"
|
35
|
+
minus = "-"
|
36
|
+
unstranded = "."
|
37
|
+
unknown = "?"
|
38
|
+
|
39
|
+
class GffRecord(BaseModel):
|
40
|
+
seqid: str = Field(title="seqid", descripion="GFF3 column 1: sequence ID", example="NC_002528.1")
|
41
|
+
source: str = Field(title="source", descripion="GFF3 column 2: algorithm or operating procedure", example="Refseq")
|
42
|
+
type: str = Field(title="type", description="GFF3 column 3: the type of the feature (previously called the \"method\"")
|
43
|
+
start: int = Field(title="start", description="GFF3 column 4: the start coordinate of the feature. 1-based integer.")
|
44
|
+
end: int = Field(title="end", description="GFF3 column 5: the end coordinate of the feature. 1-based integer.")
|
45
|
+
score: Optional[float] = Field(None, description="GFF3 column 6: the score of the feature. A floating point number.")
|
46
|
+
strand: GffColumnStrand = Field(title="strand", description="GFF3 column 7: the strand of the feature. +, -, . (unstranded), ? (unknown) are allowed.")
|
47
|
+
phase: Optional[int] = Field(None, description="GFF3 column 8: phase for CDS. 0, 1, 2 are allowed.")
|
48
|
+
line_num: int = Field(description="Line number in the original GFF3 file. Required and Unique.")
|
49
|
+
id: Optional[str] = Field(None, description="ID")
|
50
|
+
parent_id: Optional[str] = Field(None, description="Parent ID")
|
51
|
+
attributes: Optional[dict] = Field(title="attributes", description="Gff3 column 9: attributes.")
|
52
|
+
|
53
|
+
class GffRecords(BaseModel):
|
54
|
+
gff_records: List[GffRecord]
|
55
|
+
|
56
|
+
@app.get("/view/{query}", response_model=GffRecords)
|
57
|
+
def view(
|
58
|
+
query: str = Path(..., example="NC_002528.1"),
|
59
|
+
w: OptWith = Query("none", description="with"),
|
60
|
+
t: Optional[str] = Query(None, description="type", example="gene")
|
61
|
+
):
|
62
|
+
return json.loads(run_ezgff(query, w, t))
|
63
|
+
|
64
|
+
def run_ezgff(query, w, t):
|
65
|
+
cmd = ["ezgff", "view", ezdb, query, "-f", "json", "-w", w]
|
66
|
+
if t:
|
67
|
+
cmd.extend(["-t", t])
|
68
|
+
print(cmd)
|
69
|
+
proc = subprocess.run(cmd, stdout=subprocess.PIPE)
|
70
|
+
res = proc.stdout
|
71
|
+
print(res)
|
72
|
+
return res
|
73
|
+
|
74
|
+
|
75
|
+
if __name__ == "__main__":
|
76
|
+
uvicorn.run(app, host=args.bind, port=args.port)
|
data/webapi/app/run.py
ADDED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ezgff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuji Shigenobu
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-07-
|
11
|
+
date: 2021-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sqlite3
|
@@ -91,6 +91,8 @@ files:
|
|
91
91
|
- lib/ezgff.rb
|
92
92
|
- lib/ezgff/gffsqlitedb.rb
|
93
93
|
- lib/ezgff/version.rb
|
94
|
+
- webapi/app/main.py
|
95
|
+
- webapi/app/run.py
|
94
96
|
homepage: https://github.com/shujishigenobu/ezgff_alpha
|
95
97
|
licenses:
|
96
98
|
- MIT
|