cmdtabs 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/Rakefile +9 -4
- data/TEST_file +1 -0
- data/bin/aggregate_column_data.rb +20 -25
- data/bin/column_filter.rb +87 -0
- data/bin/create_metric_table.rb +35 -47
- data/bin/desaggregate_column_data.rb +18 -17
- data/bin/excel_to_tabular.rb +57 -0
- data/bin/intersect_columns.rb +29 -28
- data/bin/merge_tabular.rb +21 -34
- data/bin/standard_name_replacer.rb +35 -23
- data/bin/table_linker.rb +56 -29
- data/bin/tag_table.rb +52 -0
- data/cmdtabs.gemspec +3 -1
- data/data_test_scripts/cluster_genes_dis_AGG_stdin_to_test +2 -0
- data/data_test_scripts/cluster_genes_dis_AGG_to_test +2 -0
- data/data_test_scripts/cluster_genes_dis_DESAGG_stdin_to_test +2 -0
- data/data_test_scripts/cluster_genes_dis_DESAGG_to_test +2 -0
- data/data_test_scripts/cluster_genes_from_excel.txt_to_test +2 -0
- data/data_test_scripts/column_matching_hard_to_test +3 -0
- data/data_test_scripts/column_matching_hard_various_keys_and_every_columns_header_to_test +3 -0
- data/data_test_scripts/column_matching_hard_various_keys_and_every_columns_to_test +2 -0
- data/data_test_scripts/column_matching_hard_various_keys_and_some_columns_to_test +10 -0
- data/data_test_scripts/column_matching_hard_various_keys_to_test +7 -0
- data/data_test_scripts/column_matching_soft_1_column_reverse_to_test +21 -0
- data/data_test_scripts/column_matching_soft_1_column_to_test +29 -0
- data/data_test_scripts/column_matching_soft_1_column_uniq_to_test +12 -0
- data/data_test_scripts/column_matching_soft_and_every_columns_to_test +22 -0
- data/data_test_scripts/column_matching_soft_and_some_columns_to_test +29 -0
- data/data_test_scripts/column_matching_soft_to_test +29 -0
- data/data_test_scripts/intersect_columns_count_to_test +3 -0
- data/data_test_scripts/intersect_columns_default_stdin_a_to_test +5 -0
- data/data_test_scripts/intersect_columns_default_stdin_b_to_test +5 -0
- data/data_test_scripts/intersect_columns_default_to_test +5 -0
- data/data_test_scripts/intersect_columns_full_to_test +5 -0
- data/data_test_scripts/linked_table_2_to_test +7 -0
- data/data_test_scripts/linked_table_matches_to_test +7 -0
- data/data_test_scripts/linked_table_to_test +12 -0
- data/data_test_scripts/merge_disease_cluster_gene_to_test +12 -0
- data/data_test_scripts/metric_table_to_test +3 -0
- data/data_test_scripts/replaced_name_to_test +12 -0
- data/data_test_scripts/replaced_name_untranstaled_to_test +7 -0
- data/data_test_scripts/tag_table_header_to_test +8 -0
- data/data_test_scripts/tag_table_to_test +8 -0
- data/lib/cmdtabs/cmdtabs_lib.rb +351 -0
- data/lib/cmdtabs/version.rb +1 -1
- data/lib/cmdtabs.rb +2 -0
- data/test_scripts.sh +88 -0
- metadata +59 -7
- data/bin/table_header.rb +0 -207
data/bin/table_linker.rb
CHANGED
@@ -3,33 +3,60 @@
|
|
3
3
|
# Toma la informacion extraida de un archivo tabulado (donde la primera columna es el idetificador) en base a una lista de identificadores proporcionada
|
4
4
|
# la informacion se guarda en el archivo de salida
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
File.
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
6
|
+
ROOT_PATH = File.dirname(__FILE__)
|
7
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
8
|
+
require 'optparse'
|
9
|
+
require 'cmdtabs'
|
10
|
+
|
11
|
+
|
12
|
+
#####################################################################
|
13
|
+
## OPTPARSE
|
14
|
+
######################################################################
|
15
|
+
|
16
|
+
options = {}
|
17
|
+
OptionParser.new do |opts|
|
18
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
19
|
+
|
20
|
+
options[:input_file] = nil
|
21
|
+
opts.on("-i", "--input_file PATH", "Path to input file") do |item|
|
22
|
+
options[:input_file] = item
|
23
|
+
end
|
24
|
+
|
25
|
+
options[:linker_file] = nil
|
26
|
+
opts.on("-l", "--linker_file PATH", "Path to file linker") do |item|
|
27
|
+
options[:linker_file] = item
|
28
|
+
end
|
29
|
+
|
30
|
+
options[:drop_line] = false
|
31
|
+
opts.on("--drop", "Write the lines whose identifiers have been matched") do |item|
|
32
|
+
options[:drop_line] = true
|
33
|
+
end
|
34
|
+
|
35
|
+
options[:sep] = "\t"
|
36
|
+
opts.on("-s", "--separator STRING", "column character separator") do |item|
|
37
|
+
options[:sep] = item
|
33
38
|
end
|
34
|
-
|
35
|
-
|
39
|
+
|
40
|
+
options[:output_file] = nil
|
41
|
+
opts.on("-o", "--output_file PATH", "Output file ") do |item|
|
42
|
+
options[:output_file] = item
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
46
|
+
puts opts
|
47
|
+
exit
|
48
|
+
end
|
49
|
+
end.parse!
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
##################################################################################################
|
54
|
+
## MAIN
|
55
|
+
##################################################################################################
|
56
|
+
|
57
|
+
input_linker = load_input_data(options[:linker_file])
|
58
|
+
indexed_linker = index_array(input_linker)
|
59
|
+
input_table = load_input_data(options[:input_file], "\t", 2)
|
60
|
+
|
61
|
+
linked_table = link_table(indexed_linker, input_table, options[:drop_line], options[:sep])
|
62
|
+
write_output_data(linked_table, options[:output_file])
|
data/bin/tag_table.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
5
|
+
require 'optparse'
|
6
|
+
require 'cmdtabs'
|
7
|
+
|
8
|
+
|
9
|
+
#################################################################################################
|
10
|
+
## INPUT PARSING
|
11
|
+
#################################################################################################
|
12
|
+
options = {}
|
13
|
+
|
14
|
+
optparse = OptionParser.new do |opts|
|
15
|
+
options[:input_file] = nil
|
16
|
+
opts.on( '-i', '--input_file PATH', 'Input tabulated file' ) do |input_file|
|
17
|
+
options[:input_file] = input_file
|
18
|
+
end
|
19
|
+
|
20
|
+
options[:tags] = nil
|
21
|
+
opts.on( '-t', '--tags STRING', 'Strings or files (only first linewill be used) sepparated by commas' ) do |tags|
|
22
|
+
options[:tags] = tags.split(",")
|
23
|
+
end
|
24
|
+
|
25
|
+
options[:sep] = "\t"
|
26
|
+
opts.on( '-s', '--sep CHR', 'Character that separates fields in tags' ) do |chr|
|
27
|
+
options[:sep] = chr
|
28
|
+
end
|
29
|
+
|
30
|
+
options[:header] = false
|
31
|
+
opts.on( '-H', '--header', 'Indicate if input file has a header line. Header will not be printed in output' ) do
|
32
|
+
options[:header] = true
|
33
|
+
end
|
34
|
+
|
35
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
36
|
+
puts opts
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
|
40
|
+
end # End opts
|
41
|
+
|
42
|
+
# parse options and remove from ARGV
|
43
|
+
optparse.parse!
|
44
|
+
|
45
|
+
##################################################################################################
|
46
|
+
## MAIN
|
47
|
+
##################################################################################################
|
48
|
+
|
49
|
+
input_table = load_input_data(options[:input_file])
|
50
|
+
tags = load_and_parse_tags(options[:tags], options[:sep])
|
51
|
+
taged_table = tag_file(input_table, tags, options[:header])
|
52
|
+
write_output_data(taged_table, nil, options[:sep])
|
data/cmdtabs.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ["seoanezonjic"]
|
9
9
|
spec.email = ["seoanezonjic@hotmail.com"]
|
10
10
|
|
11
|
-
spec.summary = "
|
11
|
+
spec.summary = "DEPRECATED PROJECT. MIGRATED TO PYTHON: https://github.com/seoanezonjic/py_cmdtabs\nGem to manipulate text tables in cmd"
|
12
12
|
spec.description = "Toolset to merge, colapse tables rename field contents, etc "
|
13
13
|
spec.homepage = "https://github.com/seoanezonjic/cmdtabs"
|
14
14
|
spec.license = "MIT"
|
@@ -29,6 +29,8 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.executables = spec.files.grep(%r{\Abin/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
|
32
|
+
spec.add_dependency "xsv"
|
33
|
+
|
32
34
|
# Uncomment to register a new dependency of your gem
|
33
35
|
# spec.add_dependency "example-gem", "~> 1.0"
|
34
36
|
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MONDO:0008995
|
2
|
+
MONDO:0014823
|
3
|
+
MONDO:0011142
|
4
|
+
MONDO:0009833
|
5
|
+
MONDO:0009594
|
6
|
+
MONDO:0010193
|
7
|
+
MONDO:0011142
|
8
|
+
MONDO:0013969
|
9
|
+
MONDO:0008995
|
10
|
+
MONDO:0014823
|
11
|
+
MONDO:0009833
|
12
|
+
MONDO:0009594
|
13
|
+
MONDO:0010193
|
14
|
+
MONDO:0011142
|
15
|
+
MONDO:0013969
|
16
|
+
MONDO:0010193
|
17
|
+
MONDO:0008995
|
18
|
+
MONDO:0012866
|
19
|
+
MONDO:0011142
|
20
|
+
MONDO:0013969
|
21
|
+
MONDO:0018053
|
@@ -0,0 +1,29 @@
|
|
1
|
+
MONDO:0008995
|
2
|
+
MONDO:0007172
|
3
|
+
MONDO:0014823
|
4
|
+
MONDO:0017999
|
5
|
+
MONDO:0017999
|
6
|
+
MONDO:0011142
|
7
|
+
MONDO:0009833
|
8
|
+
MONDO:0009594
|
9
|
+
MONDO:0010193
|
10
|
+
MONDO:0012176
|
11
|
+
MONDO:0011142
|
12
|
+
MONDO:0013969
|
13
|
+
MONDO:0008995
|
14
|
+
MONDO:0007172
|
15
|
+
MONDO:0014823
|
16
|
+
MONDO:0017999
|
17
|
+
MONDO:0009833
|
18
|
+
MONDO:0009594
|
19
|
+
MONDO:0010193
|
20
|
+
MONDO:0012176
|
21
|
+
MONDO:0011142
|
22
|
+
MONDO:0013969
|
23
|
+
MONDO:0010193
|
24
|
+
MONDO:0008995
|
25
|
+
MONDO:0012866
|
26
|
+
MONDO:0017999
|
27
|
+
MONDO:0011142
|
28
|
+
MONDO:0013969
|
29
|
+
MONDO:0018053
|
@@ -0,0 +1,22 @@
|
|
1
|
+
MONDO:0008995 19_ref
|
2
|
+
MONDO:0007172 22_ref
|
3
|
+
MONDO:0014823 25_ref
|
4
|
+
MONDO:0017999 36_ref
|
5
|
+
MONDO:0017999 53_ref
|
6
|
+
MONDO:0011142 53_ref
|
7
|
+
MONDO:0009833 53_ref
|
8
|
+
MONDO:0009594 54_ref
|
9
|
+
MONDO:0010193 54_ref
|
10
|
+
MONDO:0012176 62_ref
|
11
|
+
MONDO:0011142 66_ref
|
12
|
+
MONDO:0013969 1189_ref
|
13
|
+
MONDO:0008995 19_ref
|
14
|
+
MONDO:0007172 22_ref
|
15
|
+
MONDO:0014823 25_ref
|
16
|
+
MONDO:0017999 53_ref
|
17
|
+
MONDO:0009833 53_ref
|
18
|
+
MONDO:0009594 54_ref
|
19
|
+
MONDO:0010193 54_ref
|
20
|
+
MONDO:0012176 62_ref
|
21
|
+
MONDO:0011142 66_ref
|
22
|
+
MONDO:0013969 1189_ref
|
@@ -0,0 +1,29 @@
|
|
1
|
+
MONDO:0008995 19_ref
|
2
|
+
MONDO:0007172 22_ref
|
3
|
+
MONDO:0014823 25_ref
|
4
|
+
MONDO:0017999 36_ref
|
5
|
+
MONDO:0017999 53_ref
|
6
|
+
MONDO:0011142 53_ref
|
7
|
+
MONDO:0009833 53_ref
|
8
|
+
MONDO:0009594 54_ref
|
9
|
+
MONDO:0010193 54_ref
|
10
|
+
MONDO:0012176 62_ref
|
11
|
+
MONDO:0011142 66_ref
|
12
|
+
MONDO:0013969 1189_ref
|
13
|
+
MONDO:0008995 19_ref
|
14
|
+
MONDO:0007172 22_ref
|
15
|
+
MONDO:0014823 25_ref
|
16
|
+
MONDO:0017999 53_ref
|
17
|
+
MONDO:0009833 53_ref
|
18
|
+
MONDO:0009594 54_ref
|
19
|
+
MONDO:0010193 54_ref
|
20
|
+
MONDO:0012176 62_ref
|
21
|
+
MONDO:0011142 66_ref
|
22
|
+
MONDO:0013969 1189_ref
|
23
|
+
MONDO:0010193 HGNC:3527
|
24
|
+
MONDO:0008995 HGNC:16873
|
25
|
+
MONDO:0012866 HGNC:21197
|
26
|
+
MONDO:0017999 HGNC:21197
|
27
|
+
MONDO:0011142 HGNC:21144
|
28
|
+
MONDO:0013969 HGNC:21176
|
29
|
+
MONDO:0018053 HGNC:21157
|
@@ -0,0 +1,29 @@
|
|
1
|
+
MONDO:0008995 19_ref
|
2
|
+
MONDO:0007172 22_ref
|
3
|
+
MONDO:0014823 25_ref
|
4
|
+
MONDO:0017999 36_ref
|
5
|
+
MONDO:0017999 53_ref
|
6
|
+
MONDO:0011142 53_ref
|
7
|
+
MONDO:0009833 53_ref
|
8
|
+
MONDO:0009594 54_ref
|
9
|
+
MONDO:0010193 54_ref
|
10
|
+
MONDO:0012176 62_ref
|
11
|
+
MONDO:0011142 66_ref
|
12
|
+
MONDO:0013969 1189_ref
|
13
|
+
MONDO:0008995 19_ref
|
14
|
+
MONDO:0007172 22_ref
|
15
|
+
MONDO:0014823 25_ref
|
16
|
+
MONDO:0017999 53_ref
|
17
|
+
MONDO:0009833 53_ref
|
18
|
+
MONDO:0009594 54_ref
|
19
|
+
MONDO:0010193 54_ref
|
20
|
+
MONDO:0012176 62_ref
|
21
|
+
MONDO:0011142 66_ref
|
22
|
+
MONDO:0013969 1189_ref
|
23
|
+
MONDO:0010193 HGNC:3527
|
24
|
+
MONDO:0008995 HGNC:16873
|
25
|
+
MONDO:0012866 HGNC:21197
|
26
|
+
MONDO:0017999 HGNC:21197
|
27
|
+
MONDO:0011142 HGNC:21144
|
28
|
+
MONDO:0013969 HGNC:21176
|
29
|
+
MONDO:0018053 HGNC:21157
|
@@ -0,0 +1,12 @@
|
|
1
|
+
MONDO:0008995 19_ref HGNC:16873
|
2
|
+
MONDO:0007172 22_ref
|
3
|
+
MONDO:0014823 25_ref
|
4
|
+
MONDO:0017999 36_ref HGNC:21197
|
5
|
+
MONDO:0017999 53_ref HGNC:21197
|
6
|
+
MONDO:0011142 53_ref HGNC:21144
|
7
|
+
MONDO:0009833 53_ref
|
8
|
+
MONDO:0009594 54_ref
|
9
|
+
MONDO:0010193 54_ref HGNC:3527
|
10
|
+
MONDO:0012176 62_ref
|
11
|
+
MONDO:0011142 66_ref HGNC:21144
|
12
|
+
MONDO:0013969 1189_ref HGNC:21176
|
@@ -0,0 +1,12 @@
|
|
1
|
+
MONDO:0010193 HGNC:3527 54_ref
|
2
|
+
MONDO:0008995 HGNC:16873 19_ref
|
3
|
+
MONDO:0012866 HGNC:21197 -
|
4
|
+
MONDO:0017999 HGNC:21197 53_ref
|
5
|
+
MONDO:0011142 HGNC:21144 66_ref
|
6
|
+
MONDO:0013969 HGNC:21176 1189_ref
|
7
|
+
MONDO:0018053 HGNC:21157 -
|
8
|
+
MONDO:0007172 - 22_ref
|
9
|
+
MONDO:0014823 - 25_ref
|
10
|
+
MONDO:0009833 - 53_ref
|
11
|
+
MONDO:0009594 - 54_ref
|
12
|
+
MONDO:0012176 - 62_ref
|
@@ -0,0 +1,8 @@
|
|
1
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_num 1988
|
2
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_size_min 1
|
3
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_size_max 296
|
4
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_size_median 3.0
|
5
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_size_mean 4.667505030181086
|
6
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_nodes_mean 1.0
|
7
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_nodes 9279
|
8
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_pairs 147578
|
@@ -0,0 +1,8 @@
|
|
1
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_num 1988
|
2
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_size_min 1
|
3
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_size_max 296
|
4
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_size_median 3.0
|
5
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_size_mean 4.667505030181086
|
6
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_nodes_mean 1.0
|
7
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_nodes 9279
|
8
|
+
MERGED_net_no_raw_cpm MERGED no no cpm cl_pairs 147578
|