cmdtabs 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -0
  3. data/Rakefile +9 -4
  4. data/TEST_file +1 -0
  5. data/bin/aggregate_column_data.rb +20 -25
  6. data/bin/column_filter.rb +87 -0
  7. data/bin/create_metric_table.rb +35 -47
  8. data/bin/desaggregate_column_data.rb +18 -17
  9. data/bin/excel_to_tabular.rb +57 -0
  10. data/bin/intersect_columns.rb +29 -28
  11. data/bin/merge_tabular.rb +21 -34
  12. data/bin/standard_name_replacer.rb +35 -23
  13. data/bin/table_linker.rb +56 -29
  14. data/bin/tag_table.rb +52 -0
  15. data/cmdtabs.gemspec +3 -1
  16. data/data_test_scripts/cluster_genes_dis_AGG_stdin_to_test +2 -0
  17. data/data_test_scripts/cluster_genes_dis_AGG_to_test +2 -0
  18. data/data_test_scripts/cluster_genes_dis_DESAGG_stdin_to_test +2 -0
  19. data/data_test_scripts/cluster_genes_dis_DESAGG_to_test +2 -0
  20. data/data_test_scripts/cluster_genes_from_excel.txt_to_test +2 -0
  21. data/data_test_scripts/column_matching_hard_to_test +3 -0
  22. data/data_test_scripts/column_matching_hard_various_keys_and_every_columns_header_to_test +3 -0
  23. data/data_test_scripts/column_matching_hard_various_keys_and_every_columns_to_test +2 -0
  24. data/data_test_scripts/column_matching_hard_various_keys_and_some_columns_to_test +10 -0
  25. data/data_test_scripts/column_matching_hard_various_keys_to_test +7 -0
  26. data/data_test_scripts/column_matching_soft_1_column_reverse_to_test +21 -0
  27. data/data_test_scripts/column_matching_soft_1_column_to_test +29 -0
  28. data/data_test_scripts/column_matching_soft_1_column_uniq_to_test +12 -0
  29. data/data_test_scripts/column_matching_soft_and_every_columns_to_test +22 -0
  30. data/data_test_scripts/column_matching_soft_and_some_columns_to_test +29 -0
  31. data/data_test_scripts/column_matching_soft_to_test +29 -0
  32. data/data_test_scripts/intersect_columns_count_to_test +3 -0
  33. data/data_test_scripts/intersect_columns_default_stdin_a_to_test +5 -0
  34. data/data_test_scripts/intersect_columns_default_stdin_b_to_test +5 -0
  35. data/data_test_scripts/intersect_columns_default_to_test +5 -0
  36. data/data_test_scripts/intersect_columns_full_to_test +5 -0
  37. data/data_test_scripts/linked_table_2_to_test +7 -0
  38. data/data_test_scripts/linked_table_matches_to_test +7 -0
  39. data/data_test_scripts/linked_table_to_test +12 -0
  40. data/data_test_scripts/merge_disease_cluster_gene_to_test +12 -0
  41. data/data_test_scripts/metric_table_to_test +3 -0
  42. data/data_test_scripts/replaced_name_to_test +12 -0
  43. data/data_test_scripts/replaced_name_untranstaled_to_test +7 -0
  44. data/data_test_scripts/tag_table_header_to_test +8 -0
  45. data/data_test_scripts/tag_table_to_test +8 -0
  46. data/lib/cmdtabs/cmdtabs_lib.rb +351 -0
  47. data/lib/cmdtabs/version.rb +1 -1
  48. data/lib/cmdtabs.rb +2 -0
  49. data/test_scripts.sh +88 -0
  50. metadata +59 -7
  51. data/bin/table_header.rb +0 -207
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 112a3fc1f558b0175f2d2905444c7a139204a600e2ba59d096fcbe616d48f0e5
4
- data.tar.gz: be9484e39d920387ed525ae22efa8bf74ba8c176bcd3fa85cd3094557c320c6d
3
+ metadata.gz: 94a9dd7a91e4fdd80ee75261c20869c48fb127762a7e09953ef4abb0048fbbad
4
+ data.tar.gz: c22e8c790cc8c420e3689d8da0835132bf594304bfd727a91cbf6e224ea6818d
5
5
  SHA512:
6
- metadata.gz: 0a3ac38dde1c8675a225674edae8a4c580928ede5cb68c97fd218a36bf1592390fe8b29dbddfae08bd7343f6aad77149bd21eab0d2f7dfdb28ca1a0b0bc31455
7
- data.tar.gz: e537e117d62911b7bb43025e9952ee1ff0f3a7a9af868471fb95351cd463206926be2a97d24e00aa922ad06a55d5a812173e9b752055e9690606c8e426372b8f
6
+ metadata.gz: 59e655baed9fe5df2570a01cbc026ca74dad7ac8f1ab0b36cdbbd1786c769d08001149a91eea94fad5895a8a754aa5b75c52c8cc5ed98e289bb72492e4ddaaa4
7
+ data.tar.gz: a1544b22c6453b2f2112451099eaaf42d274200a8184a28605c7e05731aa94570dfc47960c5ddb438ba7e0738dfb5f11c7670a4f8e34a031399dcdd2954a4b53
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Cmdtabs
2
2
 
3
+ DEPRECATED PROJECT. MIGRATED TO [python cmdtabs](https://github.com/seoanezonjic/py_cmdtabs)
4
+
3
5
  Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/cmdtabs`. To experiment with that code, run `bin/console` for an interactive prompt.
4
6
 
5
7
  TODO: Delete this and the text above, and describe your gem
data/Rakefile CHANGED
@@ -1,12 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "bundler/gem_tasks"
4
- require "rspec/core/rake_task"
5
-
6
- RSpec::Core::RakeTask.new(:spec)
4
+ require "rake/testtask"
7
5
 
8
6
  require "rubocop/rake_task"
9
7
 
10
8
  RuboCop::RakeTask.new
11
9
 
12
- task default: %i[spec rubocop]
10
+ Rake::TestTask.new(:test) do |t|
11
+ t.libs << "test"
12
+ t.libs << "lib"
13
+ t.test_files = FileList["test/**/*_test.rb"]
14
+ end
15
+
16
+ task default: %i[test rubocop]
17
+
data/TEST_file ADDED
@@ -0,0 +1 @@
1
+ sample initial_total_sequences initial_read_max_length initial_read_min_length initial_%gc
@@ -1,6 +1,15 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
5
+
3
6
  require 'optparse'
7
+ require 'cmdtabs'
8
+
9
+
10
+ #####################################################################
11
+ ## OPTPARSE
12
+ ######################################################################
4
13
 
5
14
  options = {}
6
15
  OptionParser.new do |opts|
@@ -12,8 +21,8 @@ OptionParser.new do |opts|
12
21
  end
13
22
 
14
23
  options[:col_index] = nil
15
- opts.on("-x", "--column_index INTEGER", "Column index (0 based) to use as reference") do |item|
16
- options[:col_index] = item.to_i
24
+ opts.on("-x", "--column_index INTEGER", "Column index (1 based) to use as reference") do |item|
25
+ options[:col_index] = item.to_i - 1
17
26
  end
18
27
 
19
28
  options[:sep] = ','
@@ -22,8 +31,8 @@ OptionParser.new do |opts|
22
31
  end
23
32
 
24
33
  options[:col_aggregate] = nil
25
- opts.on("-a", "--column_aggregate INTEGER", "Column index (0 based) to extract data and join for each id in column index") do |item|
26
- options[:col_aggregate] = item.to_i
34
+ opts.on("-a", "--column_aggregate INTEGER", "Column index (1 based) to extract data and join for each id in column index") do |item|
35
+ options[:col_aggregate] = item.to_i - 1
27
36
  end
28
37
 
29
38
  opts.on_tail("-h", "--help", "Show this message") do
@@ -33,24 +42,10 @@ OptionParser.new do |opts|
33
42
  end.parse!
34
43
 
35
44
 
36
- agg_data = {}
37
- if options[:input] == '-'
38
- input = STDIN
39
- else
40
- input = File.open(options[:input])
41
- end
42
- input.each do |line|
43
- fields = line.chomp.split("\t")
44
- key = fields[options[:col_index]]
45
- val = fields[options[:col_aggregate]]
46
- query = agg_data[key]
47
- if query.nil?
48
- agg_data[key] = [val]
49
- else
50
- query << val
51
- end
52
- end
53
-
54
- agg_data.each do |key, values|
55
- STDOUT.puts "#{key}\t#{values.join(options[:sep])}"
56
- end
45
+ ##################################################################################################
46
+ ## MAIN
47
+ ##################################################################################################
48
+
49
+ input_table = load_input_data(options[:input])
50
+ agg_data = aggregate_column(input_table, options[:col_index], options[:col_aggregate], options[:sep])
51
+ write_output_data(agg_data)
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
5
+
6
+ require 'find'
7
+ require 'optparse'
8
+ require 'cmdtabs'
9
+
10
+
11
+ #################################################################################################
12
+ ## INPUT PARSING
13
+ #################################################################################################
14
+ options = {}
15
+
16
+ optparse = OptionParser.new do |opts|
17
+ options[:table_file] = nil
18
+ opts.on( '-t', '--table_file FILE', 'Input tabulated file' ) do |table_file|
19
+ options[:table_file] = table_file
20
+ end
21
+
22
+ options[:cols_to_show] = nil
23
+ opts.on( '-c', '--column STRING', 'Column/s to show (1 based). Format: x,y,z..' ) do |column|
24
+ options[:cols_to_show] = parse_column_indices(sep = ",", column)
25
+ end
26
+
27
+ options[:col_filter] = nil
28
+ opts.on( '-f', '--col_filter STRING', 'Select columns where search keywords. Format: x,y,z..' ) do |col_filter|
29
+ options[:col_filter] = parse_column_indices(sep = ",", col_filter)
30
+ end
31
+
32
+ options[:keywords] = nil
33
+ opts.on( '-k', '--keywords STRING', 'Keywords for select rows. Format: key1_col1&key2_col1%key1_col2&key2_col2' ) do |keywords|
34
+ options[:keywords] = keywords
35
+ end
36
+
37
+ options[:search_mode] = 'c'
38
+ opts.on( '-s', '--search STRING', 'c for match in every columns set, s some match in some column. Default c' ) do |search_mode|
39
+ options[:search_mode] = search_mode
40
+ end
41
+
42
+ options[:match_mode] = 'i'
43
+ opts.on( '-m', '--match_mode STRING', 'i string must include the keyword, c for fullmatch. Default i') do |match_mode|
44
+ options[:match_mode] = match_mode
45
+ end
46
+
47
+ options[:separator] = "\t"
48
+ opts.on( '-p', '--separator STRING', 'Separator used in fields. Default i') do |separator|
49
+ options[:separator] = separator
50
+ end
51
+
52
+ options[:reverse] = false
53
+ opts.on( '-r', '--reverse', 'Select not matching' ) do
54
+ options[:reverse] = true
55
+ end
56
+
57
+ options[:uniq] = false
58
+ opts.on( '-u', '--uniq', 'Delete redundant items' ) do
59
+ options[:uniq] = true
60
+ end
61
+
62
+ options[:header] = false
63
+ opts.on( '-H', '--header', 'indicate if files have header' ) do
64
+ options[:header] = true
65
+ end
66
+
67
+ # Set a banner, displayed at the top of the help screen.
68
+ opts.banner = "Usage: column_filter.rb -t tabulated_file \n\n"
69
+
70
+ # This displays the help screen
71
+ opts.on( '-h', '--help', 'Display this screen' ) do
72
+ puts opts
73
+ exit
74
+ end
75
+
76
+ end
77
+ optparse.parse!
78
+
79
+ ##################################################################################################
80
+ ## MAIN
81
+ ##################################################################################################
82
+ abort('Tabulated file not specified') if options[:table_file].nil?
83
+ file_names = Dir.glob(options[:table_file])
84
+ input_files = load_several_files(file_names, options[:separator])
85
+ filtered_table = merge_and_filter_tables(input_files, options)
86
+ write_output_data(filtered_table)
87
+
@@ -1,49 +1,37 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- metric_file = ARGV[0]
4
- fixCols = ARGV[1].split(',')
5
- output = ARGV[2]
6
- name_tag = fixCols.shift
7
- fixColNumber = fixCols.length
8
-
9
- hash = {}
10
-
11
- varTags = []
12
- File.open(ARGV[0]).each do |line|
13
- line.chomp!
14
- fields = line.split("\t")
15
- name = fields.shift
16
- fixFields = fields[0..fixColNumber-1]
17
- varFields = fields[fixColNumber..fixColNumber+1]
18
- varTags << varFields.first if !varTags.include?(varFields.first)
19
-
20
- query = hash[name]
21
- if query.nil?
22
- hash[name] = {varFields.first => varFields.last}
23
- fixCols.each_with_index do |tag, i|
24
- hash[name][tag] = fixFields[i]
25
- end
26
- else
27
- query[varFields.first] = varFields.last
28
- end
29
- end
30
-
31
- metric_table = File.new(output, "w")
32
- if fixColNumber > 0
33
- header="#{name_tag}\t#{fixCols.join("\t")}\t#{varTags.join("\t")}"
34
- else
35
- header="#{name_tag}\t#{varTags.join("\t")}"
36
- end
37
-
38
- metric_table.puts(header)
39
- allTags = fixCols.concat(varTags)
40
- hash.each do |name, fields|
41
- array_temp = [name]
42
- allTags.each do |tag|
43
- array_temp << fields[tag]
44
- end
45
- metric_table.puts(array_temp.join("\t"))
46
- end
47
- metric_table.close()
48
-
49
-
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
5
+ require 'optparse'
6
+ require 'cmdtabs'
7
+
8
+
9
+ #####################################################################
10
+ ## OPTPARSE
11
+ ######################################################################
12
+
13
+ options = {}
14
+ OptionParser.new do |opts|
15
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
16
+
17
+ options[:corrupted] = nil
18
+ opts.on("-c", "--corrupted PATH", "File where corrupted metrics are stored") do |path|
19
+ options[:corrupted] = path
20
+ end
21
+
22
+ opts.on_tail("-h", "--help", "Show this message") do
23
+ puts opts
24
+ exit
25
+ end
26
+ end.parse!
27
+
28
+ ##################################################################################################
29
+ ## MAIN
30
+ ##################################################################################################
31
+ metric_file = load_input_data(ARGV[0])
32
+ attributes = ARGV[1].split(',')
33
+ samples_tag = attributes.shift
34
+ metric_names, indexed_metrics = index_metrics(metric_file, attributes)
35
+ table_output, corrupted_records = create_table(indexed_metrics, samples_tag, attributes, metric_names)
36
+ write_output_data(table_output, ARGV[2])
37
+ write_output_data(corrupted_records, options[:corrupted]) if !options[:corrupted].nil? && !corrupted_records.empty?
@@ -1,6 +1,14 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
3
5
  require 'optparse'
6
+ require 'cmdtabs'
7
+
8
+
9
+ #####################################################################
10
+ ## OPTPARSE
11
+ ######################################################################
4
12
 
5
13
  options = {}
6
14
  OptionParser.new do |opts|
@@ -12,8 +20,8 @@ OptionParser.new do |opts|
12
20
  end
13
21
 
14
22
  options[:col_index] = nil
15
- opts.on("-x", "--column_index INTEGER", "Column index (0 based) to use as reference") do |item|
16
- options[:col_index] = item.to_i
23
+ opts.on("-x", "--column_index INTEGER", "Column index (1 based) to use as reference") do |item|
24
+ options[:col_index] = item.to_i - 1
17
25
  end
18
26
 
19
27
  options[:sep] = ","
@@ -28,19 +36,12 @@ OptionParser.new do |opts|
28
36
  end.parse!
29
37
 
30
38
 
31
- agg_data = {}
32
- if options[:input] == '-'
33
- input = STDIN
34
- else
35
- input = File.open(options[:input])
36
- end
37
- input.each do |line|
38
- fields = line.chomp.split("\t")
39
- target_field = fields[options[:col_index]]
40
- target_field.split(options[:sep]).each do |val|
41
- record = fields[0..(options[:col_index]-1)] + [val] + fields[(options[:col_index] + 1)..fields.length]
42
- #record = fields[0..(options[:col_index] + 1)] + [val] + fields[(options[:col_index] + 1)..fields.length]
43
- STDOUT.puts record.join("\t")
44
- end
45
- end
39
+ ##################################################################################################
40
+ ## MAIN
41
+ ##################################################################################################
42
+
43
+ input_table = load_input_data(options[:input])
44
+ desagg_data = desaggregate_column(input_table, options[:col_index], options[:sep])
45
+ write_output_data(desagg_data)
46
+
46
47
 
@@ -0,0 +1,57 @@
1
+ #! /usr/bin/env ruby
2
+ #
3
+ ## Script to transform xlsx to tabular file.
4
+ ## By default, selects sheet 1 and column 1.
5
+
6
+ ROOT_PATH = File.dirname(__FILE__)
7
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
8
+
9
+ require 'optparse'
10
+ require 'cmdtabs'
11
+ require 'xsv'
12
+
13
+ #######################
14
+ ## OPTPARSE
15
+ #######################
16
+
17
+ options = {}
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
20
+
21
+ options[:columns2extract] = [0]
22
+ opts.on("-c", "--columns2extract INTEGER", "Column position to extract (1 based). Default 1") do |data|
23
+ options[:columns2extract] = parse_column_indices(sep = ",", data)
24
+ end
25
+
26
+ options[:input_file] = nil
27
+ opts.on("-i", "--input_file PATH", "Input xlsx file") do |path|
28
+ options[:input_file] = path
29
+ end
30
+
31
+ options[:output_file] = 'table.txt'
32
+ opts.on("-o", "--output_file PATH", "Output tabular file") do |path|
33
+ options[:output_file] = path
34
+ end
35
+
36
+ options[:sheet_number] = 0
37
+ opts.on("-s", "--sheet_number INTEGER", "Sheet number to work with. Default 1") do |data|
38
+ options[:sheet_number] = data.to_i - 1
39
+ end
40
+
41
+ opts.on_tail("-h", "--help", "Show this message") do
42
+ puts opts
43
+ exit
44
+ end
45
+
46
+ end.parse!
47
+
48
+ #######################
49
+ ## MAIN
50
+ #######################
51
+
52
+ #See https://github.com/martijn/xsv
53
+
54
+ x = Xsv.open(options[:input_file])
55
+ sheet = x.sheets[options[:sheet_number]].to_a
56
+ storage = extract_columns(sheet, options[:columns2extract])
57
+ write_output_data(storage, options[:output_file])
@@ -1,25 +1,10 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
3
5
  require 'optparse'
6
+ require 'cmdtabs'
4
7
 
5
- #####################################################################
6
- ## METHODS
7
- ######################################################################
8
-
9
- def load_records(file, cols, sep)
10
- recs = {}
11
- File.open(file).each do |line|
12
- fields = line.chomp.split(sep)
13
- recs[cols.map{|c| fields[c]}] = true
14
- end
15
- return recs.keys
16
- end
17
-
18
- def print_recs(recs, sep)
19
- recs.each do |rec|
20
- puts rec.join(sep)
21
- end
22
- end
23
8
 
24
9
  #####################################################################
25
10
  ## OPTPARSE
@@ -40,13 +25,13 @@ OptionParser.new do |opts|
40
25
  end
41
26
 
42
27
  options[:a_cols] = [0]
43
- opts.on("-A", "--a_cols STRING", "Index of columns in base 0 to compare") do |item|
44
- options[:a_cols] = item.split(',').map{|n| n.to_i}
28
+ opts.on("-A", "--a_cols STRING", "Index of columns in base 1 to compare") do |item|
29
+ options[:a_cols] = parse_column_indices(sep = ",", item)
45
30
  end
46
31
 
47
32
  options[:b_cols] = [0]
48
- opts.on("-B", "--b_cols STRING", "Index of columns in base 0 to compare") do |item|
49
- options[:b_cols] = item.split(',').map{|n| n.to_i}
33
+ opts.on("-B", "--b_cols STRING", "Index of columns in base 1 to compare") do |item|
34
+ options[:b_cols] = parse_column_indices(sep = ",", item)
50
35
  end
51
36
 
52
37
  options[:count] = false
@@ -59,6 +44,11 @@ OptionParser.new do |opts|
59
44
  options[:keep] = item
60
45
  end
61
46
 
47
+ options[:full] = false
48
+ opts.on("--full", "Give full record") do |item|
49
+ options[:full] = true
50
+ end
51
+
62
52
  options[:sep] = "\t"
63
53
  opts.on("-s", "--separator STRING", "column character separator") do |item|
64
54
  options[:sep] = item
@@ -70,8 +60,11 @@ OptionParser.new do |opts|
70
60
  end
71
61
  end.parse!
72
62
 
73
- a_records = load_records(options[:a_file], options[:a_cols], options[:sep])
74
- b_records = load_records(options[:b_file], options[:b_cols], options[:sep])
63
+ input_data_a = load_input_data(options[:a_file], options[:sep])
64
+ input_data_b = load_input_data(options[:b_file], options[:sep])
65
+
66
+ a_records, full_a_rec = load_records(input_data_a, options[:a_cols], options[:full])
67
+ b_records, full_b_rec = load_records(input_data_b, options[:b_cols], options[:full])
75
68
 
76
69
  common = a_records & b_records
77
70
  a_only = a_records - common
@@ -82,12 +75,20 @@ if options[:count]
82
75
  puts "c: #{common.length}"
83
76
  else
84
77
  if options[:keep] == 'c'
85
- print_recs(common, options[:sep])
78
+ result = common
79
+ result = common.map{|r| full_a_rec[r] + full_b_rec[r]} if options[:full]
86
80
  elsif options[:keep] == 'a'
87
- print_recs(a_only, options[:sep])
81
+ result = a_only
82
+ result = a_only.map{|r| full_a_rec[r]} if options[:full]
88
83
  elsif options[:keep] == 'b'
89
- print_recs(b_only, options[:sep])
84
+ result = b_only
85
+ result = b_only.map{|r| full_a_rec[r]} if options[:full]
90
86
  elsif options[:keep] == 'ab'
91
- print_recs(a_only + b_only, options[:sep])
87
+ if options[:full]
88
+ a_only = a_only.map{|r| full_a_rec[r]}
89
+ b_only = b_only.map{|r| full_a_rec[r]}
90
+ end
91
+ result = a_only + b_only
92
92
  end
93
+ write_output_data(result, nil, options[:sep])
93
94
  end
data/bin/merge_tabular.rb CHANGED
@@ -1,41 +1,28 @@
1
1
  #! /usr/bin/env ruby
2
+ ROOT_PATH = File.dirname(__FILE__)
3
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
2
4
 
3
- parent_table = {}
4
- table_length = 0
5
+ require 'optparse'
6
+ require 'cmdtabs'
5
7
 
6
- ARGV.each do |file_name|
7
-
8
- local_length = 0
9
- File.open(file_name).each do |line|
10
- line.chomp!
11
- n_fields = line.count("\t")+1
12
- fields = line.split("\t", n_fields).map{|field|
13
- if field == ""
14
- '-'
15
- else
16
- field
17
- end
18
- }
19
- next if fields.count('-') == fields.length #skip blank records
20
- id = fields.shift
21
- local_length = fields.length
22
- if !parent_table.has_key?(id)
23
- parent_table[id] = Array.new(table_length,'-')
24
- elsif parent_table[id].length < table_length
25
- parent_table[id].concat(Array.new(table_length-parent_table[id].length,'-'))
26
- end
27
- parent_table[id].concat(fields)
8
+ #####################################################################
9
+ ## OPTIONS
10
+ ######################################################################
11
+ options = {}
12
+ OptionParser.new do |opts|
13
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
28
14
 
29
- end
15
+ opts.on_tail("-h", "--help", "Show this message") do
16
+ puts opts
17
+ exit
18
+ end
19
+ end.parse!
30
20
 
31
- table_length += local_length
32
- parent_table.each do |id, fields|
33
- diference = table_length - fields.length
34
- fields.concat(Array.new(diference,'-')) if diference > 0
35
- end
36
21
 
37
- end
22
+ ##################################################################################################
23
+ ## MAIN
24
+ ##################################################################################################
38
25
 
39
- parent_table.each do |id, fields|
40
- puts id+"\t"+fields.join("\t")
41
- end
26
+ files = load_files(ARGV)
27
+ merged = merge_files(files)
28
+ write_output_data(merged)
@@ -1,6 +1,15 @@
1
1
  #! /usr/bin/env ruby
2
2
 
3
+ ROOT_PATH = File.dirname(__FILE__)
4
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
3
5
  require 'optparse'
6
+ require 'cmdtabs.rb'
7
+
8
+
9
+
10
+ #####################################################################
11
+ ## OPTPARSE
12
+ ######################################################################
4
13
 
5
14
  options = {}
6
15
  OptionParser.new do |opts|
@@ -16,44 +25,47 @@ OptionParser.new do |opts|
16
25
  options[:index_file] = item
17
26
  end
18
27
 
28
+ options[:output_file] = nil
29
+ opts.on("-o", "--output_file PATH", "Output file ") do |item|
30
+ options[:output_file] = item
31
+ end
32
+
19
33
  options[:input_separator] = "\t"
20
34
  opts.on("-s", "--input_separator STRING", "Separator character") do |item|
21
35
  options[:input_separator] = item
22
36
  end
23
37
 
24
38
  options[:columns] = [1]
25
- opts.on("-c", "--columns STRING", "Columns indexes, comma separated, to perform the ID translations.") do |item|
26
- options[:columns] = item.split(','). map{|i| i.to_i - 1}
39
+ opts.on("-c", "--columns STRING", "Columns indexes (1 based), comma separated, to perform the ID translations.") do |item|
40
+ options[:columns] = parse_column_indices(sep = ",", item)
27
41
  end
28
42
 
29
43
  options[:from] = 0
30
44
  opts.on("-f", "--from INTEGER", "Column in index file to take reference value. Default 1. Numeration is 1 based") do |item|
31
- options[:from] = item.to_i - 1
45
+ options[:from] = item.to_i - 1
32
46
  end
33
47
 
34
48
  options[:to] = 1
35
49
  opts.on("-t", "--to INTEGER", "Column in index file to take the value that will be used in substitution. Default 2. Numeration is 1 based") do |item|
36
- options[:to] = item.to_i - 1
50
+ options[:to] = item.to_i - 1
37
51
  end
38
52
 
53
+ options[:remove_untranslated] = false
54
+ opts.on("-u", "--remove_untranslated", "Activate this flag for remove the untranslated entries") do
55
+ options[:remove_untranslated] = true
56
+ end
39
57
  end.parse!
40
58
 
41
- #Load index
42
- index = {}
43
- File.open(options[:index_file]).read.each_line do |line|
44
- line.chomp!
45
- fields = line.split("\t")
46
- index[fields[options[:from]]] = fields[options[:to]]
47
- end
48
-
49
- #Reemplaza nombres
50
- File.open(options[:input_file]+'_rep','w') do |f|
51
- File.open(options[:input_file]).each do |line|
52
- fields = line.chomp.split(options[:input_separator])
53
- options[:columns].each do |col|
54
- new_string = index[fields[col]]
55
- fields[col] = new_string if !new_string.nil?
56
- end
57
- f.puts fields.join(options[:input_separator])
58
- end
59
- end
59
+
60
+ ##################################################################################################
61
+ ## MAIN
62
+ ##################################################################################################
63
+
64
+ input_index = load_input_data(options[:index_file])
65
+ translation_index = index_array(input_index, options[:from], options[:to])
66
+
67
+ input_table = load_input_data(options[:input_file], options[:input_separator])
68
+
69
+ tabular_output_translated, _ = name_replaces(input_table, options[:input_separator], options[:columns], translation_index, options[:remove_untranslated])
70
+
71
+ write_output_data(tabular_output_translated, options[:output_file], options[:input_separator])