gffkit 0.2__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gffkit
3
- Version: 0.2
3
+ Version: 0.3.1
4
4
  Summary: Region-aware GFF annotation integration toolkit
5
5
  Author: Qunjie Zhang
6
6
  License: MIT
@@ -48,7 +48,8 @@ gffkit integrate \
48
48
  --annotation-a EviAnn.gff3 \
49
49
  --annotation-b ANNEVO.gff3 \
50
50
  --outdir gffkit_out \
51
- --prefix sample
51
+ --prefix sample \
52
+ -t 8
52
53
  ```
53
54
 
54
55
  Outputs:
@@ -61,7 +62,7 @@ Outputs:
61
62
 
62
63
  ```bash
63
64
  # 1. Detect suspicious merged genes in Annotation A
64
- gffkit detect-bridge -i EviAnn.gff3 -o suspicious.tsv
65
+ gffkit detect-bridge -i EviAnn.gff3 -o suspicious.tsv -t 8
65
66
 
66
67
  # 2. Use A as the global reference, but switch to B in suspicious regions
67
68
  gffkit complement \
@@ -69,12 +70,27 @@ gffkit complement \
69
70
  --add ANNEVO.gff3 \
70
71
  --swap_region_tsv suspicious.tsv \
71
72
  --swap_region_flank 100 \
72
- --output merged.gff3
73
+ --output merged.gff3 \
74
+ -t 8
73
75
 
74
76
  # 3. Add UTR features
75
77
  gffkit add-utr -i merged.gff3 -o final.annotation.withUTR.gff3
76
78
  ```
77
79
 
80
+ ### Merge three or more annotations
81
+
82
+ Use repeated `--add` arguments. Files are merged in the order provided.
83
+
84
+ ```bash
85
+ gffkit complement \
86
+ --ref EviAnn.gff3 \
87
+ --add ANNEVO.gff3 \
88
+ --add Helixer.gff3 \
89
+ --add PASA.gff3 \
90
+ --output merged.multi.gff3 \
91
+ -t 8
92
+ ```
93
+
78
94
  ## Command overview
79
95
 
80
96
  ```bash
@@ -85,11 +101,30 @@ gffkit add-utr --help
85
101
  gffkit integrate --help
86
102
  ```
87
103
 
104
+ ## Threads
105
+
106
+ Version 0.3 and later add `-t/--threads`.
107
+
108
+ - `detect-bridge` analyzes genes in parallel.
109
+ - `complement` pre-parses multiple `--add` files in parallel, then merges them in the original command-line order.
110
+ - `integrate` passes the thread count to the detect and complement steps.
111
+
112
+ Example:
113
+
114
+ ```bash
115
+ gffkit integrate --annotation-a EviAnn.gff3 --annotation-b ANNEVO.gff3 -t 16
116
+ ```
117
+
88
118
  ## Annotation integration strategy
89
119
 
90
120
  - Annotation A, for example EviAnn/RNA-seq-supported GFF, is used as the global primary reference.
91
121
  - Annotation B, for example ANNEVO/deep-learning GFF, is used as the local primary reference only in suspicious merged-gene regions.
92
122
  - UTR features are reconstructed after merging using an exon-minus-CDS strategy.
123
+ - When multiple tools annotate the same gene locus, the GFF source column is combined with `|`, for example `EviAnn|ANNEVO`.
124
+
125
+ ## Maintainer notes
126
+
127
+ When command-line options or behavior changes, update this `README.md` in the versioned package directory before building and uploading to PyPI.
93
128
 
94
129
  ## License
95
130
 
@@ -23,7 +23,8 @@ gffkit integrate \
23
23
  --annotation-a EviAnn.gff3 \
24
24
  --annotation-b ANNEVO.gff3 \
25
25
  --outdir gffkit_out \
26
- --prefix sample
26
+ --prefix sample \
27
+ -t 8
27
28
  ```
28
29
 
29
30
  Outputs:
@@ -36,7 +37,7 @@ Outputs:
36
37
 
37
38
  ```bash
38
39
  # 1. Detect suspicious merged genes in Annotation A
39
- gffkit detect-bridge -i EviAnn.gff3 -o suspicious.tsv
40
+ gffkit detect-bridge -i EviAnn.gff3 -o suspicious.tsv -t 8
40
41
 
41
42
  # 2. Use A as the global reference, but switch to B in suspicious regions
42
43
  gffkit complement \
@@ -44,12 +45,27 @@ gffkit complement \
44
45
  --add ANNEVO.gff3 \
45
46
  --swap_region_tsv suspicious.tsv \
46
47
  --swap_region_flank 100 \
47
- --output merged.gff3
48
+ --output merged.gff3 \
49
+ -t 8
48
50
 
49
51
  # 3. Add UTR features
50
52
  gffkit add-utr -i merged.gff3 -o final.annotation.withUTR.gff3
51
53
  ```
52
54
 
55
+ ### Merge three or more annotations
56
+
57
+ Use repeated `--add` arguments. Files are merged in the order provided.
58
+
59
+ ```bash
60
+ gffkit complement \
61
+ --ref EviAnn.gff3 \
62
+ --add ANNEVO.gff3 \
63
+ --add Helixer.gff3 \
64
+ --add PASA.gff3 \
65
+ --output merged.multi.gff3 \
66
+ -t 8
67
+ ```
68
+
53
69
  ## Command overview
54
70
 
55
71
  ```bash
@@ -60,11 +76,30 @@ gffkit add-utr --help
60
76
  gffkit integrate --help
61
77
  ```
62
78
 
79
+ ## Threads
80
+
81
+ Version 0.3 and later add `-t/--threads`.
82
+
83
+ - `detect-bridge` analyzes genes in parallel.
84
+ - `complement` pre-parses multiple `--add` files in parallel, then merges them in the original command-line order.
85
+ - `integrate` passes the thread count to the detect and complement steps.
86
+
87
+ Example:
88
+
89
+ ```bash
90
+ gffkit integrate --annotation-a EviAnn.gff3 --annotation-b ANNEVO.gff3 -t 16
91
+ ```
92
+
63
93
  ## Annotation integration strategy
64
94
 
65
95
  - Annotation A, for example EviAnn/RNA-seq-supported GFF, is used as the global primary reference.
66
96
  - Annotation B, for example ANNEVO/deep-learning GFF, is used as the local primary reference only in suspicious merged-gene regions.
67
97
  - UTR features are reconstructed after merging using an exon-minus-CDS strategy.
98
+ - When multiple tools annotate the same gene locus, the GFF source column is combined with `|`, for example `EviAnn|ANNEVO`.
99
+
100
+ ## Maintainer notes
101
+
102
+ When command-line options or behavior changes, update this `README.md` in the versioned package directory before building and uploading to PyPI.
68
103
 
69
104
  ## License
70
105
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "gffkit"
7
- version = "0.2"
7
+ version = "0.3.1"
8
8
  description = "Region-aware GFF annotation integration toolkit"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -1,3 +1,3 @@
1
1
  """gffkit: region-aware GFF annotation integration utilities."""
2
2
 
3
- __version__ = "0.2"
3
+ __version__ = "0.3.1"
@@ -20,13 +20,14 @@ agat_sp_complement_annotations.pl 的 Python 改写版(纯 Python,不调用
20
20
 
21
21
  from __future__ import annotations
22
22
 
23
- import argparse
24
- import copy
25
- import re
26
- import sys
27
- from collections import defaultdict
28
- from dataclasses import dataclass, field
29
- from typing import Dict, Iterable, List, Optional, Tuple
23
+ import argparse
24
+ import copy
25
+ import re
26
+ import sys
27
+ from collections import defaultdict
28
+ from dataclasses import dataclass, field
29
+ from concurrent.futures import ThreadPoolExecutor
30
+ from typing import Dict, Iterable, List, Optional, Tuple
30
31
 
31
32
 
32
33
  GENE_LIKE_TYPES = {
@@ -607,7 +608,7 @@ def merge_source_names(*source_groups: Iterable[str]) -> str:
607
608
  for name in split_source_names(source):
608
609
  if name not in merged:
609
610
  merged.append(name)
610
- return ",".join(merged) if merged else "."
611
+ return "|".join(merged) if merged else "."
611
612
 
612
613
 
613
614
  def set_tree_source(root: Feature, source: str) -> None:
@@ -843,7 +844,7 @@ def print_complement_resume(before_counts: Dict[str, Dict[str, int]],
843
844
  eprint("\nNow the data contains:")
844
845
 
845
846
 
846
- def build_arg_parser() -> argparse.ArgumentParser:
847
+ def build_arg_parser() -> argparse.ArgumentParser:
847
848
  """构建命令行参数解析器。"""
848
849
  parser = argparse.ArgumentParser(
849
850
  description="用一个或多个注释文件去补充参考注释(Python 版,纯 Python,不调用 Perl)。"
@@ -880,12 +881,16 @@ def build_arg_parser() -> argparse.ArgumentParser:
880
881
  default=100,
881
882
  help="从 suspicious.tsv 读取区间时,start/end 两端各扩展的 bp 数,默认 100"
882
883
  )
883
- parser.add_argument("--output", "--out", "-o", default=None, help="输出文件路径;默认输出到 STDOUT")
884
- parser.add_argument(
885
- "-v", "--verbose", type=int, default=1,
886
- help="日志详细程度(0~4),这里只简单保留该参数接口,默认 1"
887
- )
888
- return parser
884
+ parser.add_argument("--output", "--out", "-o", default=None, help="输出文件路径;默认输出到 STDOUT")
885
+ parser.add_argument(
886
+ "-t", "--threads", type=int, default=1,
887
+ help="并行线程数;多个 --add 文件会并行预解析,但仍按输入顺序合并,默认 1"
888
+ )
889
+ parser.add_argument(
890
+ "-v", "--verbose", type=int, default=1,
891
+ help="日志详细程度(0~4),这里只简单保留该参数接口,默认 1"
892
+ )
893
+ return parser
889
894
 
890
895
 
891
896
  def parse_swap_regions(raw_regions: Optional[List[List[str]]]) -> List[SwapRegion]:
@@ -918,7 +923,7 @@ def parse_swap_regions(raw_regions: Optional[List[List[str]]]) -> List[SwapRegio
918
923
  return parsed
919
924
 
920
925
 
921
- def parse_swap_regions_from_tsv(tsv_path: str, flank_bp: int = 100) -> List[SwapRegion]:
926
+ def parse_swap_regions_from_tsv(tsv_path: str, flank_bp: int = 100) -> List[SwapRegion]:
922
927
  """
923
928
  从 detect_bridge_merged_genes.py 产生的 suspicious.tsv 读取区间。
924
929
 
@@ -994,12 +999,26 @@ def parse_swap_regions_from_tsv(tsv_path: str, flank_bp: int = 100) -> List[Swap
994
999
  f" 当前行内容为:{line}"
995
1000
  ) from exc
996
1001
 
997
- return regions
1002
+ return regions
1003
+
1004
+
1005
+ def parse_add_files(add_files: List[str], threads: int) -> List[Tuple[str, AnnotationSet]]:
1006
+ """并行预解析补充注释文件,返回顺序与命令行 --add 顺序一致。"""
1007
+ threads = max(1, threads)
1008
+ if threads == 1 or len(add_files) <= 1:
1009
+ return [(path, parse_annotation_file(path)) for path in add_files]
1010
+
1011
+ with ThreadPoolExecutor(max_workers=min(threads, len(add_files))) as executor:
1012
+ parsed_sets = list(executor.map(parse_annotation_file, add_files))
1013
+ return list(zip(add_files, parsed_sets))
998
1014
 
999
1015
 
1000
1016
  def main() -> int:
1001
1017
  parser = build_arg_parser()
1002
- args = parser.parse_args()
1018
+ args = parser.parse_args()
1019
+
1020
+ if args.threads < 1:
1021
+ parser.error("--threads/-t 必须是正整数")
1003
1022
 
1004
1023
  try:
1005
1024
  swap_regions = parse_swap_regions(args.swap_region)
@@ -1025,11 +1044,11 @@ def main() -> int:
1025
1044
  for region in swap_regions:
1026
1045
  eprint(f" - {region.seqid}:{region.start}-{region.end}")
1027
1046
 
1028
- # 2) 按用户给定顺序,逐个补充
1029
- for next_file in args.add:
1030
- add_set = parse_annotation_file(next_file)
1031
- eprint(f"{next_file} parsed")
1032
- add_set.info()
1047
+ # 2) 按用户给定顺序,逐个补充;多个输入文件可并行预解析
1048
+ parsed_add_sets = parse_add_files(args.add, args.threads)
1049
+ for next_file, add_set in parsed_add_sets:
1050
+ eprint(f"{next_file} parsed")
1051
+ add_set.info()
1033
1052
 
1034
1053
  before_counts = ref_set.level_counts()
1035
1054
 
@@ -43,9 +43,10 @@ detect_bridge_merged_genes.py
43
43
  bridge_members
44
44
  """
45
45
 
46
- import argparse
47
- import sys
48
- from collections import defaultdict
46
+ import argparse
47
+ import sys
48
+ from collections import defaultdict
49
+ from concurrent.futures import ThreadPoolExecutor
49
50
 
50
51
 
51
52
  # ----------------------------
@@ -343,7 +344,7 @@ def read_gff3(gff_file):
343
344
  # 核心检测逻辑
344
345
  # ----------------------------
345
346
 
346
- def analyze_gene(
347
+ def analyze_gene(
347
348
  gene,
348
349
  min_gap=10000,
349
350
  cluster_gap=2000,
@@ -432,7 +433,13 @@ def analyze_gene(
432
433
  "cluster_members": ";".join(cluster_member_strs),
433
434
  "bridge_members": ";".join(bridge_member_strs)
434
435
  }
435
- return result
436
+ return result
437
+
438
+
439
+ def analyze_gene_task(task):
440
+ """ThreadPoolExecutor 需要顶层函数;返回 (gene_id, result)。"""
441
+ gid, gene, params = task
442
+ return gid, analyze_gene(gene, **params)
436
443
 
437
444
 
438
445
  # ----------------------------
@@ -469,15 +476,24 @@ def main():
469
476
  default=1,
470
477
  help="至少多少条真实桥接 transcript 才输出,默认 1"
471
478
  )
472
- parser.add_argument(
473
- "--no-use-cds-if-no-exon",
474
- action="store_true",
475
- help="若 transcript 没有 exon,则不要回退使用 CDS"
476
- )
477
-
478
- args = parser.parse_args()
479
-
480
- use_cds_if_no_exon = not args.no_use_cds_if_no_exon
479
+ parser.add_argument(
480
+ "--no-use-cds-if-no-exon",
481
+ action="store_true",
482
+ help="若 transcript 没有 exon,则不要回退使用 CDS"
483
+ )
484
+ parser.add_argument(
485
+ "-t", "--threads",
486
+ type=int,
487
+ default=1,
488
+ help="并行分析 gene 的线程数,默认 1"
489
+ )
490
+
491
+ args = parser.parse_args()
492
+
493
+ if args.threads < 1:
494
+ parser.error("--threads/-t 必须是正整数")
495
+
496
+ use_cds_if_no_exon = not args.no_use_cds_if_no_exon
481
497
 
482
498
  genes = read_gff3(args.input)
483
499
 
@@ -499,25 +515,30 @@ def main():
499
515
  n_total = 0
500
516
  n_flagged = 0
501
517
 
502
- with open(args.output, "w", encoding="utf-8") as out:
503
- out.write("\t".join(out_fields) + "\n")
504
-
505
- for gid in sorted(genes.keys()):
506
- gene = genes[gid]
507
- n_total += 1
508
-
509
- result = analyze_gene(
510
- gene,
511
- min_gap=args.min_gap,
512
- cluster_gap=args.cluster_gap,
513
- min_core_tx_per_cluster=args.min_core_tx_per_cluster,
514
- min_bridge_count=args.min_bridge_count,
515
- use_cds_if_no_exon=use_cds_if_no_exon
516
- )
517
-
518
- if result:
519
- n_flagged += 1
520
- out.write("\t".join(str(result[f]) for f in out_fields) + "\n")
518
+ analyze_params = {
519
+ "min_gap": args.min_gap,
520
+ "cluster_gap": args.cluster_gap,
521
+ "min_core_tx_per_cluster": args.min_core_tx_per_cluster,
522
+ "min_bridge_count": args.min_bridge_count,
523
+ "use_cds_if_no_exon": use_cds_if_no_exon,
524
+ }
525
+ sorted_genes = [(gid, genes[gid], analyze_params) for gid in sorted(genes.keys())]
526
+
527
+ if args.threads == 1:
528
+ analyzed = [analyze_gene_task(task) for task in sorted_genes]
529
+ else:
530
+ with ThreadPoolExecutor(max_workers=args.threads) as executor:
531
+ analyzed = list(executor.map(analyze_gene_task, sorted_genes))
532
+
533
+ with open(args.output, "w", encoding="utf-8") as out:
534
+ out.write("\t".join(out_fields) + "\n")
535
+
536
+ for gid, result in analyzed:
537
+ n_total += 1
538
+
539
+ if result:
540
+ n_flagged += 1
541
+ out.write("\t".join(str(result[f]) for f in out_fields) + "\n")
521
542
 
522
543
  sys.stderr.write(
523
544
  f"[INFO] Total genes checked: {n_total}\n"
@@ -527,4 +548,4 @@ def main():
527
548
 
528
549
 
529
550
  if __name__ == "__main__":
530
- main()
551
+ main()
@@ -25,7 +25,7 @@ def _run_legacy_main(func: Callable[[], object], prog: str, args: List[str]) ->
25
25
 
26
26
  def cmd_detect_bridge(args: argparse.Namespace, extra: List[str]) -> int:
27
27
  from . import detect_bridge_merged_genes as mod
28
- cli = ["-i", args.input, "-o", args.output]
28
+ cli = ["-i", args.input, "-o", args.output, "-t", str(args.threads)]
29
29
  cli += extra
30
30
  return _run_legacy_main(mod.main, "gffkit detect-bridge", cli)
31
31
 
@@ -37,6 +37,7 @@ def cmd_complement(args: argparse.Namespace, extra: List[str]) -> int:
37
37
  cli += ["--add", add_file]
38
38
  if args.output:
39
39
  cli += ["--output", args.output]
40
+ cli += ["-t", str(args.threads)]
40
41
  cli += extra
41
42
  return _run_legacy_main(mod.main, "gffkit complement", cli)
42
43
 
@@ -69,6 +70,7 @@ def cmd_integrate(args: argparse.Namespace, extra: List[str]) -> int:
69
70
  "--cluster-gap", str(args.cluster_gap),
70
71
  "--min-core-tx-per-cluster", str(args.min_core_tx_per_cluster),
71
72
  "--min-bridge-count", str(args.min_bridge_count),
73
+ "-t", str(args.threads),
72
74
  ]
73
75
  if args.no_use_cds_if_no_exon:
74
76
  detect_cli.append("--no-use-cds-if-no-exon")
@@ -84,6 +86,7 @@ def cmd_integrate(args: argparse.Namespace, extra: List[str]) -> int:
84
86
  "--swap_region_flank", str(args.swap_region_flank),
85
87
  "--size_min", str(args.size_min),
86
88
  "--output", str(merged_gff),
89
+ "-t", str(args.threads),
87
90
  ]
88
91
  ret = _run_legacy_main(complement_mod.main, "gffkit complement", complement_cli)
89
92
  if ret != 0:
@@ -120,6 +123,7 @@ def build_parser() -> argparse.ArgumentParser:
120
123
  )
121
124
  p.add_argument("-i", "--input", required=True, help="Input GFF3 file, usually Annotation A.")
122
125
  p.add_argument("-o", "--output", required=True, help="Output suspicious.tsv file.")
126
+ p.add_argument("-t", "--threads", type=int, default=1, help="Number of worker threads.")
123
127
  p.set_defaults(handler=cmd_detect_bridge)
124
128
 
125
129
  p = subparsers.add_parser(
@@ -130,6 +134,7 @@ def build_parser() -> argparse.ArgumentParser:
130
134
  p.add_argument("--ref", "-r", "-i", required=True, help="Reference GFF/GTF file.")
131
135
  p.add_argument("--add", "-a", action="append", required=True, help="Supplementary GFF/GTF file; can be repeated.")
132
136
  p.add_argument("--output", "--out", "-o", default=None, help="Output GFF3 path. Default: stdout.")
137
+ p.add_argument("-t", "--threads", type=int, default=1, help="Number of worker threads.")
133
138
  p.set_defaults(handler=cmd_complement)
134
139
 
135
140
  p = subparsers.add_parser(
@@ -162,6 +167,7 @@ def build_parser() -> argparse.ArgumentParser:
162
167
  p.add_argument("--min-core-tx-per-cluster", type=int, default=1, help="Minimum core transcripts per cluster.")
163
168
  p.add_argument("--min-bridge-count", type=int, default=1, help="Minimum true bridge transcripts required.")
164
169
  p.add_argument("--no-use-cds-if-no-exon", action="store_true", help="Do not use CDS when transcript has no exon.")
170
+ p.add_argument("-t", "--threads", type=int, default=1, help="Number of worker threads used by detect and complement steps.")
165
171
 
166
172
  p.add_argument("--swap-region-flank", type=int, default=100, help="Flanking bp added to suspicious regions.")
167
173
  p.add_argument("--size-min", type=int, default=0, help="Minimum CDS size for non-overlapping supplementary roots.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gffkit
3
- Version: 0.2
3
+ Version: 0.3.1
4
4
  Summary: Region-aware GFF annotation integration toolkit
5
5
  Author: Qunjie Zhang
6
6
  License: MIT
@@ -48,7 +48,8 @@ gffkit integrate \
48
48
  --annotation-a EviAnn.gff3 \
49
49
  --annotation-b ANNEVO.gff3 \
50
50
  --outdir gffkit_out \
51
- --prefix sample
51
+ --prefix sample \
52
+ -t 8
52
53
  ```
53
54
 
54
55
  Outputs:
@@ -61,7 +62,7 @@ Outputs:
61
62
 
62
63
  ```bash
63
64
  # 1. Detect suspicious merged genes in Annotation A
64
- gffkit detect-bridge -i EviAnn.gff3 -o suspicious.tsv
65
+ gffkit detect-bridge -i EviAnn.gff3 -o suspicious.tsv -t 8
65
66
 
66
67
  # 2. Use A as the global reference, but switch to B in suspicious regions
67
68
  gffkit complement \
@@ -69,12 +70,27 @@ gffkit complement \
69
70
  --add ANNEVO.gff3 \
70
71
  --swap_region_tsv suspicious.tsv \
71
72
  --swap_region_flank 100 \
72
- --output merged.gff3
73
+ --output merged.gff3 \
74
+ -t 8
73
75
 
74
76
  # 3. Add UTR features
75
77
  gffkit add-utr -i merged.gff3 -o final.annotation.withUTR.gff3
76
78
  ```
77
79
 
80
+ ### Merge three or more annotations
81
+
82
+ Use repeated `--add` arguments. Files are merged in the order provided.
83
+
84
+ ```bash
85
+ gffkit complement \
86
+ --ref EviAnn.gff3 \
87
+ --add ANNEVO.gff3 \
88
+ --add Helixer.gff3 \
89
+ --add PASA.gff3 \
90
+ --output merged.multi.gff3 \
91
+ -t 8
92
+ ```
93
+
78
94
  ## Command overview
79
95
 
80
96
  ```bash
@@ -85,11 +101,30 @@ gffkit add-utr --help
85
101
  gffkit integrate --help
86
102
  ```
87
103
 
104
+ ## Threads
105
+
106
+ Version 0.3 and later add `-t/--threads`.
107
+
108
+ - `detect-bridge` analyzes genes in parallel.
109
+ - `complement` pre-parses multiple `--add` files in parallel, then merges them in the original command-line order.
110
+ - `integrate` passes the thread count to the detect and complement steps.
111
+
112
+ Example:
113
+
114
+ ```bash
115
+ gffkit integrate --annotation-a EviAnn.gff3 --annotation-b ANNEVO.gff3 -t 16
116
+ ```
117
+
88
118
  ## Annotation integration strategy
89
119
 
90
120
  - Annotation A, for example EviAnn/RNA-seq-supported GFF, is used as the global primary reference.
91
121
  - Annotation B, for example ANNEVO/deep-learning GFF, is used as the local primary reference only in suspicious merged-gene regions.
92
122
  - UTR features are reconstructed after merging using an exon-minus-CDS strategy.
123
+ - When multiple tools annotate the same gene locus, the GFF source column is combined with `|`, for example `EviAnn|ANNEVO`.
124
+
125
+ ## Maintainer notes
126
+
127
+ When command-line options or behavior changes, update this `README.md` in the versioned package directory before building and uploading to PyPI.
93
128
 
94
129
  ## License
95
130
 
@@ -37,7 +37,7 @@ def test_overlapping_gene_sources_are_merged(tmp_path):
37
37
 
38
38
  assert added == 0
39
39
  assert len(ref_set.roots) == 1
40
- assert {feature.source for feature in ref_set.roots[0].iter_all()} == {"EviAnn,ANNEVO"}
40
+ assert {feature.source for feature in ref_set.roots[0].iter_all()} == {"EviAnn|ANNEVO"}
41
41
 
42
42
 
43
43
  def test_non_overlapping_gene_keeps_single_source(tmp_path):
File without changes
File without changes
File without changes
File without changes
File without changes