cmdtabs 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -0
  3. data/Rakefile +9 -4
  4. data/TEST_file +1 -0
  5. data/bin/aggregate_column_data.rb +20 -25
  6. data/bin/column_filter.rb +87 -0
  7. data/bin/create_metric_table.rb +35 -47
  8. data/bin/desaggregate_column_data.rb +18 -17
  9. data/bin/excel_to_tabular.rb +57 -0
  10. data/bin/intersect_columns.rb +29 -28
  11. data/bin/merge_tabular.rb +21 -34
  12. data/bin/standard_name_replacer.rb +35 -23
  13. data/bin/table_linker.rb +56 -29
  14. data/bin/tag_table.rb +52 -0
  15. data/cmdtabs.gemspec +3 -1
  16. data/data_test_scripts/cluster_genes_dis_AGG_stdin_to_test +2 -0
  17. data/data_test_scripts/cluster_genes_dis_AGG_to_test +2 -0
  18. data/data_test_scripts/cluster_genes_dis_DESAGG_stdin_to_test +2 -0
  19. data/data_test_scripts/cluster_genes_dis_DESAGG_to_test +2 -0
  20. data/data_test_scripts/cluster_genes_from_excel.txt_to_test +2 -0
  21. data/data_test_scripts/column_matching_hard_to_test +3 -0
  22. data/data_test_scripts/column_matching_hard_various_keys_and_every_columns_header_to_test +3 -0
  23. data/data_test_scripts/column_matching_hard_various_keys_and_every_columns_to_test +2 -0
  24. data/data_test_scripts/column_matching_hard_various_keys_and_some_columns_to_test +10 -0
  25. data/data_test_scripts/column_matching_hard_various_keys_to_test +7 -0
  26. data/data_test_scripts/column_matching_soft_1_column_reverse_to_test +21 -0
  27. data/data_test_scripts/column_matching_soft_1_column_to_test +29 -0
  28. data/data_test_scripts/column_matching_soft_1_column_uniq_to_test +12 -0
  29. data/data_test_scripts/column_matching_soft_and_every_columns_to_test +22 -0
  30. data/data_test_scripts/column_matching_soft_and_some_columns_to_test +29 -0
  31. data/data_test_scripts/column_matching_soft_to_test +29 -0
  32. data/data_test_scripts/intersect_columns_count_to_test +3 -0
  33. data/data_test_scripts/intersect_columns_default_stdin_a_to_test +5 -0
  34. data/data_test_scripts/intersect_columns_default_stdin_b_to_test +5 -0
  35. data/data_test_scripts/intersect_columns_default_to_test +5 -0
  36. data/data_test_scripts/intersect_columns_full_to_test +5 -0
  37. data/data_test_scripts/linked_table_2_to_test +7 -0
  38. data/data_test_scripts/linked_table_matches_to_test +7 -0
  39. data/data_test_scripts/linked_table_to_test +12 -0
  40. data/data_test_scripts/merge_disease_cluster_gene_to_test +12 -0
  41. data/data_test_scripts/metric_table_to_test +3 -0
  42. data/data_test_scripts/replaced_name_to_test +12 -0
  43. data/data_test_scripts/replaced_name_untranstaled_to_test +7 -0
  44. data/data_test_scripts/tag_table_header_to_test +8 -0
  45. data/data_test_scripts/tag_table_to_test +8 -0
  46. data/lib/cmdtabs/cmdtabs_lib.rb +351 -0
  47. data/lib/cmdtabs/version.rb +1 -1
  48. data/lib/cmdtabs.rb +2 -0
  49. data/test_scripts.sh +88 -0
  50. metadata +59 -7
  51. data/bin/table_header.rb +0 -207
data/bin/table_linker.rb CHANGED
@@ -3,33 +3,60 @@
3
3
  # Toma la informacion extraida de un archivo tabulado (donde la primera columna es el idetificador) en base a una lista de identificadores proporcionada
4
4
  # la informacion se guarda en el archivo de salida
5
5
 
6
- if ARGV.size < 3
7
- puts "Usage: table_linker.rb file_table file_table output_file_name"
8
- Process.exit
9
- end
10
-
11
- drop_line = false
12
- if !ARGV[3].nil?
13
- drop_line = true
14
- end
15
- hash_info={}
16
-
17
- #Cargar tabla de informacion en hash en forma {identificador => campos de informacion}
18
- File.open(ARGV[0],'r').each do |line|
19
- fields=line.chomp.split("\t",2)
20
- hash_info[fields.first]=fields.last
21
- end
22
-
23
- save_info=File.open(ARGV[2],'w') #Crea archivo para guardar la informacion
24
- File.open(ARGV[1],'r').each do |line|
25
- line.chomp!
26
- fields = line.split("\t")
27
- id = fields.first
28
- info_id=hash_info[id]
29
- if !info_id.nil?
30
- save_info.puts line+"\t"+info_id
31
- else
32
- save_info.puts line if !drop_line
6
+ ROOT_PATH = File.dirname(__FILE__)
7
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
8
+ require 'optparse'
9
+ require 'cmdtabs'
10
+
11
+
12
+ #####################################################################
13
+ ## OPTPARSE
14
+ ######################################################################
15
+
16
+ options = {}
17
+ OptionParser.new do |opts|
18
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
19
+
20
+ options[:input_file] = nil
21
+ opts.on("-i", "--input_file PATH", "Path to input file") do |item|
22
+ options[:input_file] = item
23
+ end
24
+
25
+ options[:linker_file] = nil
26
+ opts.on("-l", "--linker_file PATH", "Path to file linker") do |item|
27
+ options[:linker_file] = item
28
+ end
29
+
30
+ options[:drop_line] = false
31
+ opts.on("--drop", "Write the lines whose identifiers have been matched") do |item|
32
+ options[:drop_line] = true
33
+ end
34
+
35
+ options[:sep] = "\t"
36
+ opts.on("-s", "--separator STRING", "column character separator") do |item|
37
+ options[:sep] = item
33
38
  end
34
- end
35
- save_info.close
39
+
40
+ options[:output_file] = nil
41
+ opts.on("-o", "--output_file PATH", "Output file ") do |item|
42
+ options[:output_file] = item
43
+ end
44
+
45
+ opts.on_tail("-h", "--help", "Show this message") do
46
+ puts opts
47
+ exit
48
+ end
49
+ end.parse!
50
+
51
+
52
+
53
+ ##################################################################################################
54
+ ## MAIN
55
+ ##################################################################################################
56
+
57
+ input_linker = load_input_data(options[:linker_file])
58
+ indexed_linker = index_array(input_linker)
59
+ input_table = load_input_data(options[:input_file], "\t", 2)
60
+
61
+ linked_table = link_table(indexed_linker, input_table, options[:drop_line], options[:sep])
62
+ write_output_data(linked_table, options[:output_file])
data/bin/tag_table.rb ADDED
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
5
+ require 'optparse'
6
+ require 'cmdtabs'
7
+
8
+
9
+ #################################################################################################
10
+ ## INPUT PARSING
11
+ #################################################################################################
12
+ options = {}
13
+
14
+ optparse = OptionParser.new do |opts|
15
+ options[:input_file] = nil
16
+ opts.on( '-i', '--input_file PATH', 'Input tabulated file' ) do |input_file|
17
+ options[:input_file] = input_file
18
+ end
19
+
20
+ options[:tags] = nil
21
+ opts.on( '-t', '--tags STRING', 'Strings or files (only first linewill be used) sepparated by commas' ) do |tags|
22
+ options[:tags] = tags.split(",")
23
+ end
24
+
25
+ options[:sep] = "\t"
26
+ opts.on( '-s', '--sep CHR', 'Character that separates fields in tags' ) do |chr|
27
+ options[:sep] = chr
28
+ end
29
+
30
+ options[:header] = false
31
+ opts.on( '-H', '--header', 'Indicate if input file has a header line. Header will not be printed in output' ) do
32
+ options[:header] = true
33
+ end
34
+
35
+ opts.on( '-h', '--help', 'Display this screen' ) do
36
+ puts opts
37
+ exit
38
+ end
39
+
40
+ end # End opts
41
+
42
+ # parse options and remove from ARGV
43
+ optparse.parse!
44
+
45
+ ##################################################################################################
46
+ ## MAIN
47
+ ##################################################################################################
48
+
49
+ input_table = load_input_data(options[:input_file])
50
+ tags = load_and_parse_tags(options[:tags], options[:sep])
51
+ taged_table = tag_file(input_table, tags, options[:header])
52
+ write_output_data(taged_table, nil, options[:sep])
data/cmdtabs.gemspec CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.authors = ["seoanezonjic"]
9
9
  spec.email = ["seoanezonjic@hotmail.com"]
10
10
 
11
- spec.summary = "Gem to manipulate text tables in cmd"
11
+ spec.summary = "DEPRECATED PROJECT. MIGRATED TO PYTHON: https://github.com/seoanezonjic/py_cmdtabs\nGem to manipulate text tables in cmd"
12
12
  spec.description = "Toolset to merge, colapse tables rename field contents, etc "
13
13
  spec.homepage = "https://github.com/seoanezonjic/cmdtabs"
14
14
  spec.license = "MIT"
@@ -29,6 +29,8 @@ Gem::Specification.new do |spec|
29
29
  spec.executables = spec.files.grep(%r{\Abin/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ["lib"]
31
31
 
32
+ spec.add_dependency "xsv"
33
+
32
34
  # Uncomment to register a new dependency of your gem
33
35
  # spec.add_dependency "example-gem", "~> 1.0"
34
36
 
@@ -0,0 +1,2 @@
1
+ HGNC:21197 483_ref,1039_ref,1071_ref
2
+ HGNC:21143 211_ref,4705_ref
@@ -0,0 +1,2 @@
1
+ HGNC:21197 483_ref,1039_ref,1071_ref
2
+ HGNC:21143 211_ref,4705_ref
@@ -0,0 +1,2 @@
1
+ 483_ref,1039_ref,1071_ref HGNC:21197
2
+ 211_ref,4705_ref HGNC:21143
@@ -0,0 +1,2 @@
1
+ HGNC:21197 483_ref,1039_ref,1071_ref
2
+ HGNC:21143 211_ref,4705_ref
@@ -0,0 +1,2 @@
1
+ 483_ref 1039_ref 1071_ref
2
+ 211_ref 4705_ref
@@ -0,0 +1,3 @@
1
+ MONDO:0008995 19_ref
2
+ MONDO:0008995 19_ref
3
+ MONDO:0008995 HGNC:16873
@@ -0,0 +1,3 @@
1
+ MONDO:0008995 19_ref
2
+ MONDO:0017999 53_ref
3
+ MONDO:0017999 53_ref
@@ -0,0 +1,2 @@
1
+ MONDO:0017999 53_ref
2
+ MONDO:0017999 53_ref
@@ -0,0 +1,10 @@
1
+ MONDO:0008995 19_ref
2
+ MONDO:0017999 36_ref
3
+ MONDO:0017999 53_ref
4
+ MONDO:0011142 53_ref
5
+ MONDO:0009833 53_ref
6
+ MONDO:0008995 19_ref
7
+ MONDO:0017999 53_ref
8
+ MONDO:0009833 53_ref
9
+ MONDO:0008995 HGNC:16873
10
+ MONDO:0017999 HGNC:21197
@@ -0,0 +1,7 @@
1
+ MONDO:0008995 19_ref
2
+ MONDO:0017999 36_ref
3
+ MONDO:0017999 53_ref
4
+ MONDO:0008995 19_ref
5
+ MONDO:0017999 53_ref
6
+ MONDO:0008995 HGNC:16873
7
+ MONDO:0017999 HGNC:21197
@@ -0,0 +1,21 @@
1
+ MONDO:0008995
2
+ MONDO:0014823
3
+ MONDO:0011142
4
+ MONDO:0009833
5
+ MONDO:0009594
6
+ MONDO:0010193
7
+ MONDO:0011142
8
+ MONDO:0013969
9
+ MONDO:0008995
10
+ MONDO:0014823
11
+ MONDO:0009833
12
+ MONDO:0009594
13
+ MONDO:0010193
14
+ MONDO:0011142
15
+ MONDO:0013969
16
+ MONDO:0010193
17
+ MONDO:0008995
18
+ MONDO:0012866
19
+ MONDO:0011142
20
+ MONDO:0013969
21
+ MONDO:0018053
@@ -0,0 +1,29 @@
1
+ MONDO:0008995
2
+ MONDO:0007172
3
+ MONDO:0014823
4
+ MONDO:0017999
5
+ MONDO:0017999
6
+ MONDO:0011142
7
+ MONDO:0009833
8
+ MONDO:0009594
9
+ MONDO:0010193
10
+ MONDO:0012176
11
+ MONDO:0011142
12
+ MONDO:0013969
13
+ MONDO:0008995
14
+ MONDO:0007172
15
+ MONDO:0014823
16
+ MONDO:0017999
17
+ MONDO:0009833
18
+ MONDO:0009594
19
+ MONDO:0010193
20
+ MONDO:0012176
21
+ MONDO:0011142
22
+ MONDO:0013969
23
+ MONDO:0010193
24
+ MONDO:0008995
25
+ MONDO:0012866
26
+ MONDO:0017999
27
+ MONDO:0011142
28
+ MONDO:0013969
29
+ MONDO:0018053
@@ -0,0 +1,12 @@
1
+ MONDO:0008995
2
+ MONDO:0007172
3
+ MONDO:0014823
4
+ MONDO:0017999
5
+ MONDO:0011142
6
+ MONDO:0009833
7
+ MONDO:0009594
8
+ MONDO:0010193
9
+ MONDO:0012176
10
+ MONDO:0013969
11
+ MONDO:0012866
12
+ MONDO:0018053
@@ -0,0 +1,22 @@
1
+ MONDO:0008995 19_ref
2
+ MONDO:0007172 22_ref
3
+ MONDO:0014823 25_ref
4
+ MONDO:0017999 36_ref
5
+ MONDO:0017999 53_ref
6
+ MONDO:0011142 53_ref
7
+ MONDO:0009833 53_ref
8
+ MONDO:0009594 54_ref
9
+ MONDO:0010193 54_ref
10
+ MONDO:0012176 62_ref
11
+ MONDO:0011142 66_ref
12
+ MONDO:0013969 1189_ref
13
+ MONDO:0008995 19_ref
14
+ MONDO:0007172 22_ref
15
+ MONDO:0014823 25_ref
16
+ MONDO:0017999 53_ref
17
+ MONDO:0009833 53_ref
18
+ MONDO:0009594 54_ref
19
+ MONDO:0010193 54_ref
20
+ MONDO:0012176 62_ref
21
+ MONDO:0011142 66_ref
22
+ MONDO:0013969 1189_ref
@@ -0,0 +1,29 @@
1
+ MONDO:0008995 19_ref
2
+ MONDO:0007172 22_ref
3
+ MONDO:0014823 25_ref
4
+ MONDO:0017999 36_ref
5
+ MONDO:0017999 53_ref
6
+ MONDO:0011142 53_ref
7
+ MONDO:0009833 53_ref
8
+ MONDO:0009594 54_ref
9
+ MONDO:0010193 54_ref
10
+ MONDO:0012176 62_ref
11
+ MONDO:0011142 66_ref
12
+ MONDO:0013969 1189_ref
13
+ MONDO:0008995 19_ref
14
+ MONDO:0007172 22_ref
15
+ MONDO:0014823 25_ref
16
+ MONDO:0017999 53_ref
17
+ MONDO:0009833 53_ref
18
+ MONDO:0009594 54_ref
19
+ MONDO:0010193 54_ref
20
+ MONDO:0012176 62_ref
21
+ MONDO:0011142 66_ref
22
+ MONDO:0013969 1189_ref
23
+ MONDO:0010193 HGNC:3527
24
+ MONDO:0008995 HGNC:16873
25
+ MONDO:0012866 HGNC:21197
26
+ MONDO:0017999 HGNC:21197
27
+ MONDO:0011142 HGNC:21144
28
+ MONDO:0013969 HGNC:21176
29
+ MONDO:0018053 HGNC:21157
@@ -0,0 +1,29 @@
1
+ MONDO:0008995 19_ref
2
+ MONDO:0007172 22_ref
3
+ MONDO:0014823 25_ref
4
+ MONDO:0017999 36_ref
5
+ MONDO:0017999 53_ref
6
+ MONDO:0011142 53_ref
7
+ MONDO:0009833 53_ref
8
+ MONDO:0009594 54_ref
9
+ MONDO:0010193 54_ref
10
+ MONDO:0012176 62_ref
11
+ MONDO:0011142 66_ref
12
+ MONDO:0013969 1189_ref
13
+ MONDO:0008995 19_ref
14
+ MONDO:0007172 22_ref
15
+ MONDO:0014823 25_ref
16
+ MONDO:0017999 53_ref
17
+ MONDO:0009833 53_ref
18
+ MONDO:0009594 54_ref
19
+ MONDO:0010193 54_ref
20
+ MONDO:0012176 62_ref
21
+ MONDO:0011142 66_ref
22
+ MONDO:0013969 1189_ref
23
+ MONDO:0010193 HGNC:3527
24
+ MONDO:0008995 HGNC:16873
25
+ MONDO:0012866 HGNC:21197
26
+ MONDO:0017999 HGNC:21197
27
+ MONDO:0011142 HGNC:21144
28
+ MONDO:0013969 HGNC:21176
29
+ MONDO:0018053 HGNC:21157
@@ -0,0 +1,3 @@
1
+ a: 5
2
+ b: 2
3
+ c: 5
@@ -0,0 +1,5 @@
1
+ MONDO:0008995
2
+ MONDO:0017999
3
+ MONDO:0011142
4
+ MONDO:0010193
5
+ MONDO:0013969
@@ -0,0 +1,5 @@
1
+ MONDO:0008995
2
+ MONDO:0017999
3
+ MONDO:0011142
4
+ MONDO:0010193
5
+ MONDO:0013969
@@ -0,0 +1,5 @@
1
+ MONDO:0008995
2
+ MONDO:0017999
3
+ MONDO:0011142
4
+ MONDO:0010193
5
+ MONDO:0013969
@@ -0,0 +1,5 @@
1
+ MONDO:0008995 19_ref MONDO:0008995 HGNC:16873
2
+ MONDO:0017999 53_ref MONDO:0017999 HGNC:21197
3
+ MONDO:0011142 66_ref MONDO:0011142 HGNC:21144
4
+ MONDO:0010193 54_ref MONDO:0010193 HGNC:3527
5
+ MONDO:0013969 1189_ref MONDO:0013969 HGNC:21176
@@ -0,0 +1,7 @@
1
+ MONDO:0010193 HGNC:3527 54_ref
2
+ MONDO:0008995 HGNC:16873 19_ref
3
+ MONDO:0012866 HGNC:21197
4
+ MONDO:0017999 HGNC:21197 53_ref
5
+ MONDO:0011142 HGNC:21144 66_ref
6
+ MONDO:0013969 HGNC:21176 1189_ref
7
+ MONDO:0018053 HGNC:21157
@@ -0,0 +1,7 @@
1
+ MONDO:0008995 19_ref HGNC:16873
2
+ MONDO:0017999 36_ref HGNC:21197
3
+ MONDO:0017999 53_ref HGNC:21197
4
+ MONDO:0011142 53_ref HGNC:21144
5
+ MONDO:0010193 54_ref HGNC:3527
6
+ MONDO:0011142 66_ref HGNC:21144
7
+ MONDO:0013969 1189_ref HGNC:21176
@@ -0,0 +1,12 @@
1
+ MONDO:0008995 19_ref HGNC:16873
2
+ MONDO:0007172 22_ref
3
+ MONDO:0014823 25_ref
4
+ MONDO:0017999 36_ref HGNC:21197
5
+ MONDO:0017999 53_ref HGNC:21197
6
+ MONDO:0011142 53_ref HGNC:21144
7
+ MONDO:0009833 53_ref
8
+ MONDO:0009594 54_ref
9
+ MONDO:0010193 54_ref HGNC:3527
10
+ MONDO:0012176 62_ref
11
+ MONDO:0011142 66_ref HGNC:21144
12
+ MONDO:0013969 1189_ref HGNC:21176
@@ -0,0 +1,12 @@
1
+ MONDO:0010193 HGNC:3527 54_ref
2
+ MONDO:0008995 HGNC:16873 19_ref
3
+ MONDO:0012866 HGNC:21197 -
4
+ MONDO:0017999 HGNC:21197 53_ref
5
+ MONDO:0011142 HGNC:21144 66_ref
6
+ MONDO:0013969 HGNC:21176 1189_ref
7
+ MONDO:0018053 HGNC:21157 -
8
+ MONDO:0007172 - 22_ref
9
+ MONDO:0014823 - 25_ref
10
+ MONDO:0009833 - 53_ref
11
+ MONDO:0009594 - 54_ref
12
+ MONDO:0012176 - 62_ref
@@ -0,0 +1,3 @@
1
+ sample initial_total_sequences initial_read_max_length initial_read_min_length initial_%gc
2
+ CTL_1_cell 11437331.0 76.0 35.0 45.0
3
+ CTL_1_exo 10668412.0 76.0 35.0 48.0
@@ -0,0 +1,12 @@
1
+ HGNC:16873 19_ref
2
+ MONDO:0007172 22_ref
3
+ MONDO:0014823 25_ref
4
+ HGNC:21197 36_ref
5
+ HGNC:21197 53_ref
6
+ HGNC:21144 53_ref
7
+ MONDO:0009833 53_ref
8
+ MONDO:0009594 54_ref
9
+ HGNC:3527 54_ref
10
+ MONDO:0012176 62_ref
11
+ HGNC:21144 66_ref
12
+ HGNC:21176 1189_ref
@@ -0,0 +1,7 @@
1
+ HGNC:16873 19_ref
2
+ HGNC:21197 36_ref
3
+ HGNC:21197 53_ref
4
+ HGNC:21144 53_ref
5
+ HGNC:3527 54_ref
6
+ HGNC:21144 66_ref
7
+ HGNC:21176 1189_ref
@@ -0,0 +1,8 @@
1
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_num 1988
2
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_size_min 1
3
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_size_max 296
4
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_size_median 3.0
5
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_size_mean 4.667505030181086
6
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_nodes_mean 1.0
7
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_nodes 9279
8
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_pairs 147578
@@ -0,0 +1,8 @@
1
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_num 1988
2
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_size_min 1
3
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_size_max 296
4
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_size_median 3.0
5
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_size_mean 4.667505030181086
6
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_nodes_mean 1.0
7
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_nodes 9279
8
+ MERGED_net_no_raw_cpm MERGED no no cpm cl_pairs 147578