cmdtabs 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/Rakefile +9 -4
- data/TEST_file +1 -0
- data/bin/aggregate_column_data.rb +20 -25
- data/bin/column_filter.rb +87 -0
- data/bin/create_metric_table.rb +35 -47
- data/bin/desaggregate_column_data.rb +18 -17
- data/bin/excel_to_tabular.rb +57 -0
- data/bin/intersect_columns.rb +29 -28
- data/bin/merge_tabular.rb +21 -34
- data/bin/standard_name_replacer.rb +35 -23
- data/bin/table_linker.rb +56 -29
- data/bin/tag_table.rb +52 -0
- data/cmdtabs.gemspec +3 -1
- data/data_test_scripts/cluster_genes_dis_AGG_stdin_to_test +2 -0
- data/data_test_scripts/cluster_genes_dis_AGG_to_test +2 -0
- data/data_test_scripts/cluster_genes_dis_DESAGG_stdin_to_test +2 -0
- data/data_test_scripts/cluster_genes_dis_DESAGG_to_test +2 -0
- data/data_test_scripts/cluster_genes_from_excel.txt_to_test +2 -0
- data/data_test_scripts/column_matching_hard_to_test +3 -0
- data/data_test_scripts/column_matching_hard_various_keys_and_every_columns_header_to_test +3 -0
- data/data_test_scripts/column_matching_hard_various_keys_and_every_columns_to_test +2 -0
- data/data_test_scripts/column_matching_hard_various_keys_and_some_columns_to_test +10 -0
- data/data_test_scripts/column_matching_hard_various_keys_to_test +7 -0
- data/data_test_scripts/column_matching_soft_1_column_reverse_to_test +21 -0
- data/data_test_scripts/column_matching_soft_1_column_to_test +29 -0
- data/data_test_scripts/column_matching_soft_1_column_uniq_to_test +12 -0
- data/data_test_scripts/column_matching_soft_and_every_columns_to_test +22 -0
- data/data_test_scripts/column_matching_soft_and_some_columns_to_test +29 -0
- data/data_test_scripts/column_matching_soft_to_test +29 -0
- data/data_test_scripts/intersect_columns_count_to_test +3 -0
- data/data_test_scripts/intersect_columns_default_stdin_a_to_test +5 -0
- data/data_test_scripts/intersect_columns_default_stdin_b_to_test +5 -0
- data/data_test_scripts/intersect_columns_default_to_test +5 -0
- data/data_test_scripts/intersect_columns_full_to_test +5 -0
- data/data_test_scripts/linked_table_2_to_test +7 -0
- data/data_test_scripts/linked_table_matches_to_test +7 -0
- data/data_test_scripts/linked_table_to_test +12 -0
- data/data_test_scripts/merge_disease_cluster_gene_to_test +12 -0
- data/data_test_scripts/metric_table_to_test +3 -0
- data/data_test_scripts/replaced_name_to_test +12 -0
- data/data_test_scripts/replaced_name_untranstaled_to_test +7 -0
- data/data_test_scripts/tag_table_header_to_test +8 -0
- data/data_test_scripts/tag_table_to_test +8 -0
- data/lib/cmdtabs/cmdtabs_lib.rb +351 -0
- data/lib/cmdtabs/version.rb +1 -1
- data/lib/cmdtabs.rb +2 -0
- data/test_scripts.sh +88 -0
- metadata +59 -7
- data/bin/table_header.rb +0 -207
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 94a9dd7a91e4fdd80ee75261c20869c48fb127762a7e09953ef4abb0048fbbad
|
4
|
+
data.tar.gz: c22e8c790cc8c420e3689d8da0835132bf594304bfd727a91cbf6e224ea6818d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59e655baed9fe5df2570a01cbc026ca74dad7ac8f1ab0b36cdbbd1786c769d08001149a91eea94fad5895a8a754aa5b75c52c8cc5ed98e289bb72492e4ddaaa4
|
7
|
+
data.tar.gz: a1544b22c6453b2f2112451099eaaf42d274200a8184a28605c7e05731aa94570dfc47960c5ddb438ba7e0738dfb5f11c7670a4f8e34a031399dcdd2954a4b53
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Cmdtabs
|
2
2
|
|
3
|
+
DEPRECATED PROJECT. MIGRATED TO [python cmdtabs](https://github.com/seoanezonjic/py_cmdtabs)
|
4
|
+
|
3
5
|
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/cmdtabs`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
6
|
|
5
7
|
TODO: Delete this and the text above, and describe your gem
|
data/Rakefile
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "bundler/gem_tasks"
|
4
|
-
require "
|
5
|
-
|
6
|
-
RSpec::Core::RakeTask.new(:spec)
|
4
|
+
require "rake/testtask"
|
7
5
|
|
8
6
|
require "rubocop/rake_task"
|
9
7
|
|
10
8
|
RuboCop::RakeTask.new
|
11
9
|
|
12
|
-
|
10
|
+
Rake::TestTask.new(:test) do |t|
|
11
|
+
t.libs << "test"
|
12
|
+
t.libs << "lib"
|
13
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
14
|
+
end
|
15
|
+
|
16
|
+
task default: %i[test rubocop]
|
17
|
+
|
data/TEST_file
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
sample initial_total_sequences initial_read_max_length initial_read_min_length initial_%gc
|
@@ -1,6 +1,15 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
5
|
+
|
3
6
|
require 'optparse'
|
7
|
+
require 'cmdtabs'
|
8
|
+
|
9
|
+
|
10
|
+
#####################################################################
|
11
|
+
## OPTPARSE
|
12
|
+
######################################################################
|
4
13
|
|
5
14
|
options = {}
|
6
15
|
OptionParser.new do |opts|
|
@@ -12,8 +21,8 @@ OptionParser.new do |opts|
|
|
12
21
|
end
|
13
22
|
|
14
23
|
options[:col_index] = nil
|
15
|
-
opts.on("-x", "--column_index INTEGER", "Column index (
|
16
|
-
options[:col_index] = item.to_i
|
24
|
+
opts.on("-x", "--column_index INTEGER", "Column index (1 based) to use as reference") do |item|
|
25
|
+
options[:col_index] = item.to_i - 1
|
17
26
|
end
|
18
27
|
|
19
28
|
options[:sep] = ','
|
@@ -22,8 +31,8 @@ OptionParser.new do |opts|
|
|
22
31
|
end
|
23
32
|
|
24
33
|
options[:col_aggregate] = nil
|
25
|
-
opts.on("-a", "--column_aggregate INTEGER", "Column index (
|
26
|
-
options[:col_aggregate] = item.to_i
|
34
|
+
opts.on("-a", "--column_aggregate INTEGER", "Column index (1 based) to extract data and join for each id in column index") do |item|
|
35
|
+
options[:col_aggregate] = item.to_i - 1
|
27
36
|
end
|
28
37
|
|
29
38
|
opts.on_tail("-h", "--help", "Show this message") do
|
@@ -33,24 +42,10 @@ OptionParser.new do |opts|
|
|
33
42
|
end.parse!
|
34
43
|
|
35
44
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
fields = line.chomp.split("\t")
|
44
|
-
key = fields[options[:col_index]]
|
45
|
-
val = fields[options[:col_aggregate]]
|
46
|
-
query = agg_data[key]
|
47
|
-
if query.nil?
|
48
|
-
agg_data[key] = [val]
|
49
|
-
else
|
50
|
-
query << val
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
agg_data.each do |key, values|
|
55
|
-
STDOUT.puts "#{key}\t#{values.join(options[:sep])}"
|
56
|
-
end
|
45
|
+
##################################################################################################
|
46
|
+
## MAIN
|
47
|
+
##################################################################################################
|
48
|
+
|
49
|
+
input_table = load_input_data(options[:input])
|
50
|
+
agg_data = aggregate_column(input_table, options[:col_index], options[:col_aggregate], options[:sep])
|
51
|
+
write_output_data(agg_data)
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
5
|
+
|
6
|
+
require 'find'
|
7
|
+
require 'optparse'
|
8
|
+
require 'cmdtabs'
|
9
|
+
|
10
|
+
|
11
|
+
#################################################################################################
|
12
|
+
## INPUT PARSING
|
13
|
+
#################################################################################################
|
14
|
+
options = {}
|
15
|
+
|
16
|
+
optparse = OptionParser.new do |opts|
|
17
|
+
options[:table_file] = nil
|
18
|
+
opts.on( '-t', '--table_file FILE', 'Input tabulated file' ) do |table_file|
|
19
|
+
options[:table_file] = table_file
|
20
|
+
end
|
21
|
+
|
22
|
+
options[:cols_to_show] = nil
|
23
|
+
opts.on( '-c', '--column STRING', 'Column/s to show (1 based). Format: x,y,z..' ) do |column|
|
24
|
+
options[:cols_to_show] = parse_column_indices(sep = ",", column)
|
25
|
+
end
|
26
|
+
|
27
|
+
options[:col_filter] = nil
|
28
|
+
opts.on( '-f', '--col_filter STRING', 'Select columns where search keywords. Format: x,y,z..' ) do |col_filter|
|
29
|
+
options[:col_filter] = parse_column_indices(sep = ",", col_filter)
|
30
|
+
end
|
31
|
+
|
32
|
+
options[:keywords] = nil
|
33
|
+
opts.on( '-k', '--keywords STRING', 'Keywords for select rows. Format: key1_col1&key2_col1%key1_col2&key2_col2' ) do |keywords|
|
34
|
+
options[:keywords] = keywords
|
35
|
+
end
|
36
|
+
|
37
|
+
options[:search_mode] = 'c'
|
38
|
+
opts.on( '-s', '--search STRING', 'c for match in every columns set, s some match in some column. Default c' ) do |search_mode|
|
39
|
+
options[:search_mode] = search_mode
|
40
|
+
end
|
41
|
+
|
42
|
+
options[:match_mode] = 'i'
|
43
|
+
opts.on( '-m', '--match_mode STRING', 'i string must include the keyword, c for fullmatch. Default i') do |match_mode|
|
44
|
+
options[:match_mode] = match_mode
|
45
|
+
end
|
46
|
+
|
47
|
+
options[:separator] = "\t"
|
48
|
+
opts.on( '-p', '--separator STRING', 'Separator used in fields. Default i') do |separator|
|
49
|
+
options[:separator] = separator
|
50
|
+
end
|
51
|
+
|
52
|
+
options[:reverse] = false
|
53
|
+
opts.on( '-r', '--reverse', 'Select not matching' ) do
|
54
|
+
options[:reverse] = true
|
55
|
+
end
|
56
|
+
|
57
|
+
options[:uniq] = false
|
58
|
+
opts.on( '-u', '--uniq', 'Delete redundant items' ) do
|
59
|
+
options[:uniq] = true
|
60
|
+
end
|
61
|
+
|
62
|
+
options[:header] = false
|
63
|
+
opts.on( '-H', '--header', 'indicate if files have header' ) do
|
64
|
+
options[:header] = true
|
65
|
+
end
|
66
|
+
|
67
|
+
# Set a banner, displayed at the top of the help screen.
|
68
|
+
opts.banner = "Usage: column_filter.rb -t tabulated_file \n\n"
|
69
|
+
|
70
|
+
# This displays the help screen
|
71
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
72
|
+
puts opts
|
73
|
+
exit
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
optparse.parse!
|
78
|
+
|
79
|
+
##################################################################################################
|
80
|
+
## MAIN
|
81
|
+
##################################################################################################
|
82
|
+
abort('Tabulated file not specified') if options[:table_file].nil?
|
83
|
+
file_names = Dir.glob(options[:table_file])
|
84
|
+
input_files = load_several_files(file_names, options[:separator])
|
85
|
+
filtered_table = merge_and_filter_tables(input_files, options)
|
86
|
+
write_output_data(filtered_table)
|
87
|
+
|
data/bin/create_metric_table.rb
CHANGED
@@ -1,49 +1,37 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
metric_table.puts(header)
|
39
|
-
allTags = fixCols.concat(varTags)
|
40
|
-
hash.each do |name, fields|
|
41
|
-
array_temp = [name]
|
42
|
-
allTags.each do |tag|
|
43
|
-
array_temp << fields[tag]
|
44
|
-
end
|
45
|
-
metric_table.puts(array_temp.join("\t"))
|
46
|
-
end
|
47
|
-
metric_table.close()
|
48
|
-
|
49
|
-
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
5
|
+
require 'optparse'
|
6
|
+
require 'cmdtabs'
|
7
|
+
|
8
|
+
|
9
|
+
#####################################################################
|
10
|
+
## OPTPARSE
|
11
|
+
######################################################################
|
12
|
+
|
13
|
+
options = {}
|
14
|
+
OptionParser.new do |opts|
|
15
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
16
|
+
|
17
|
+
options[:corrupted] = nil
|
18
|
+
opts.on("-c", "--corrupted PATH", "File where corrupted metrics are stored") do |path|
|
19
|
+
options[:corrupted] = path
|
20
|
+
end
|
21
|
+
|
22
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
23
|
+
puts opts
|
24
|
+
exit
|
25
|
+
end
|
26
|
+
end.parse!
|
27
|
+
|
28
|
+
##################################################################################################
|
29
|
+
## MAIN
|
30
|
+
##################################################################################################
|
31
|
+
metric_file = load_input_data(ARGV[0])
|
32
|
+
attributes = ARGV[1].split(',')
|
33
|
+
samples_tag = attributes.shift
|
34
|
+
metric_names, indexed_metrics = index_metrics(metric_file, attributes)
|
35
|
+
table_output, corrupted_records = create_table(indexed_metrics, samples_tag, attributes, metric_names)
|
36
|
+
write_output_data(table_output, ARGV[2])
|
37
|
+
write_output_data(corrupted_records, options[:corrupted]) if !options[:corrupted].nil? && !corrupted_records.empty?
|
@@ -1,6 +1,14 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
3
5
|
require 'optparse'
|
6
|
+
require 'cmdtabs'
|
7
|
+
|
8
|
+
|
9
|
+
#####################################################################
|
10
|
+
## OPTPARSE
|
11
|
+
######################################################################
|
4
12
|
|
5
13
|
options = {}
|
6
14
|
OptionParser.new do |opts|
|
@@ -12,8 +20,8 @@ OptionParser.new do |opts|
|
|
12
20
|
end
|
13
21
|
|
14
22
|
options[:col_index] = nil
|
15
|
-
opts.on("-x", "--column_index INTEGER", "Column index (
|
16
|
-
options[:col_index] = item.to_i
|
23
|
+
opts.on("-x", "--column_index INTEGER", "Column index (1 based) to use as reference") do |item|
|
24
|
+
options[:col_index] = item.to_i - 1
|
17
25
|
end
|
18
26
|
|
19
27
|
options[:sep] = ","
|
@@ -28,19 +36,12 @@ OptionParser.new do |opts|
|
|
28
36
|
end.parse!
|
29
37
|
|
30
38
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
target_field = fields[options[:col_index]]
|
40
|
-
target_field.split(options[:sep]).each do |val|
|
41
|
-
record = fields[0..(options[:col_index]-1)] + [val] + fields[(options[:col_index] + 1)..fields.length]
|
42
|
-
#record = fields[0..(options[:col_index] + 1)] + [val] + fields[(options[:col_index] + 1)..fields.length]
|
43
|
-
STDOUT.puts record.join("\t")
|
44
|
-
end
|
45
|
-
end
|
39
|
+
##################################################################################################
|
40
|
+
## MAIN
|
41
|
+
##################################################################################################
|
42
|
+
|
43
|
+
input_table = load_input_data(options[:input])
|
44
|
+
desagg_data = desaggregate_column(input_table, options[:col_index], options[:sep])
|
45
|
+
write_output_data(desagg_data)
|
46
|
+
|
46
47
|
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
## Script to transform xlsx to tabular file.
|
4
|
+
## By default, selects sheet 1 and column 1.
|
5
|
+
|
6
|
+
ROOT_PATH = File.dirname(__FILE__)
|
7
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
8
|
+
|
9
|
+
require 'optparse'
|
10
|
+
require 'cmdtabs'
|
11
|
+
require 'xsv'
|
12
|
+
|
13
|
+
#######################
|
14
|
+
## OPTPARSE
|
15
|
+
#######################
|
16
|
+
|
17
|
+
options = {}
|
18
|
+
OptionParser.new do |opts|
|
19
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
20
|
+
|
21
|
+
options[:columns2extract] = [0]
|
22
|
+
opts.on("-c", "--columns2extract INTEGER", "Column position to extract (1 based). Default 1") do |data|
|
23
|
+
options[:columns2extract] = parse_column_indices(sep = ",", data)
|
24
|
+
end
|
25
|
+
|
26
|
+
options[:input_file] = nil
|
27
|
+
opts.on("-i", "--input_file PATH", "Input xlsx file") do |path|
|
28
|
+
options[:input_file] = path
|
29
|
+
end
|
30
|
+
|
31
|
+
options[:output_file] = 'table.txt'
|
32
|
+
opts.on("-o", "--output_file PATH", "Output tabular file") do |path|
|
33
|
+
options[:output_file] = path
|
34
|
+
end
|
35
|
+
|
36
|
+
options[:sheet_number] = 0
|
37
|
+
opts.on("-s", "--sheet_number INTEGER", "Sheet number to work with. Default 1") do |data|
|
38
|
+
options[:sheet_number] = data.to_i - 1
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
42
|
+
puts opts
|
43
|
+
exit
|
44
|
+
end
|
45
|
+
|
46
|
+
end.parse!
|
47
|
+
|
48
|
+
#######################
|
49
|
+
## MAIN
|
50
|
+
#######################
|
51
|
+
|
52
|
+
#See https://github.com/martijn/xsv
|
53
|
+
|
54
|
+
x = Xsv.open(options[:input_file])
|
55
|
+
sheet = x.sheets[options[:sheet_number]].to_a
|
56
|
+
storage = extract_columns(sheet, options[:columns2extract])
|
57
|
+
write_output_data(storage, options[:output_file])
|
data/bin/intersect_columns.rb
CHANGED
@@ -1,25 +1,10 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
3
5
|
require 'optparse'
|
6
|
+
require 'cmdtabs'
|
4
7
|
|
5
|
-
#####################################################################
|
6
|
-
## METHODS
|
7
|
-
######################################################################
|
8
|
-
|
9
|
-
def load_records(file, cols, sep)
|
10
|
-
recs = {}
|
11
|
-
File.open(file).each do |line|
|
12
|
-
fields = line.chomp.split(sep)
|
13
|
-
recs[cols.map{|c| fields[c]}] = true
|
14
|
-
end
|
15
|
-
return recs.keys
|
16
|
-
end
|
17
|
-
|
18
|
-
def print_recs(recs, sep)
|
19
|
-
recs.each do |rec|
|
20
|
-
puts rec.join(sep)
|
21
|
-
end
|
22
|
-
end
|
23
8
|
|
24
9
|
#####################################################################
|
25
10
|
## OPTPARSE
|
@@ -40,13 +25,13 @@ OptionParser.new do |opts|
|
|
40
25
|
end
|
41
26
|
|
42
27
|
options[:a_cols] = [0]
|
43
|
-
opts.on("-A", "--a_cols STRING", "Index of columns in base
|
44
|
-
options[:a_cols] =
|
28
|
+
opts.on("-A", "--a_cols STRING", "Index of columns in base 1 to compare") do |item|
|
29
|
+
options[:a_cols] = parse_column_indices(sep = ",", item)
|
45
30
|
end
|
46
31
|
|
47
32
|
options[:b_cols] = [0]
|
48
|
-
opts.on("-B", "--b_cols STRING", "Index of columns in base
|
49
|
-
options[:b_cols] =
|
33
|
+
opts.on("-B", "--b_cols STRING", "Index of columns in base 1 to compare") do |item|
|
34
|
+
options[:b_cols] = parse_column_indices(sep = ",", item)
|
50
35
|
end
|
51
36
|
|
52
37
|
options[:count] = false
|
@@ -59,6 +44,11 @@ OptionParser.new do |opts|
|
|
59
44
|
options[:keep] = item
|
60
45
|
end
|
61
46
|
|
47
|
+
options[:full] = false
|
48
|
+
opts.on("--full", "Give full record") do |item|
|
49
|
+
options[:full] = true
|
50
|
+
end
|
51
|
+
|
62
52
|
options[:sep] = "\t"
|
63
53
|
opts.on("-s", "--separator STRING", "column character separator") do |item|
|
64
54
|
options[:sep] = item
|
@@ -70,8 +60,11 @@ OptionParser.new do |opts|
|
|
70
60
|
end
|
71
61
|
end.parse!
|
72
62
|
|
73
|
-
|
74
|
-
|
63
|
+
input_data_a = load_input_data(options[:a_file], options[:sep])
|
64
|
+
input_data_b = load_input_data(options[:b_file], options[:sep])
|
65
|
+
|
66
|
+
a_records, full_a_rec = load_records(input_data_a, options[:a_cols], options[:full])
|
67
|
+
b_records, full_b_rec = load_records(input_data_b, options[:b_cols], options[:full])
|
75
68
|
|
76
69
|
common = a_records & b_records
|
77
70
|
a_only = a_records - common
|
@@ -82,12 +75,20 @@ if options[:count]
|
|
82
75
|
puts "c: #{common.length}"
|
83
76
|
else
|
84
77
|
if options[:keep] == 'c'
|
85
|
-
|
78
|
+
result = common
|
79
|
+
result = common.map{|r| full_a_rec[r] + full_b_rec[r]} if options[:full]
|
86
80
|
elsif options[:keep] == 'a'
|
87
|
-
|
81
|
+
result = a_only
|
82
|
+
result = a_only.map{|r| full_a_rec[r]} if options[:full]
|
88
83
|
elsif options[:keep] == 'b'
|
89
|
-
|
84
|
+
result = b_only
|
85
|
+
result = b_only.map{|r| full_a_rec[r]} if options[:full]
|
90
86
|
elsif options[:keep] == 'ab'
|
91
|
-
|
87
|
+
if options[:full]
|
88
|
+
a_only = a_only.map{|r| full_a_rec[r]}
|
89
|
+
b_only = b_only.map{|r| full_a_rec[r]}
|
90
|
+
end
|
91
|
+
result = a_only + b_only
|
92
92
|
end
|
93
|
+
write_output_data(result, nil, options[:sep])
|
93
94
|
end
|
data/bin/merge_tabular.rb
CHANGED
@@ -1,41 +1,28 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
|
+
ROOT_PATH = File.dirname(__FILE__)
|
3
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
+
require 'optparse'
|
6
|
+
require 'cmdtabs'
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
fields = line.split("\t", n_fields).map{|field|
|
13
|
-
if field == ""
|
14
|
-
'-'
|
15
|
-
else
|
16
|
-
field
|
17
|
-
end
|
18
|
-
}
|
19
|
-
next if fields.count('-') == fields.length #skip blank records
|
20
|
-
id = fields.shift
|
21
|
-
local_length = fields.length
|
22
|
-
if !parent_table.has_key?(id)
|
23
|
-
parent_table[id] = Array.new(table_length,'-')
|
24
|
-
elsif parent_table[id].length < table_length
|
25
|
-
parent_table[id].concat(Array.new(table_length-parent_table[id].length,'-'))
|
26
|
-
end
|
27
|
-
parent_table[id].concat(fields)
|
8
|
+
#####################################################################
|
9
|
+
## OPTIONS
|
10
|
+
######################################################################
|
11
|
+
options = {}
|
12
|
+
OptionParser.new do |opts|
|
13
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
28
14
|
|
29
|
-
|
15
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
16
|
+
puts opts
|
17
|
+
exit
|
18
|
+
end
|
19
|
+
end.parse!
|
30
20
|
|
31
|
-
table_length += local_length
|
32
|
-
parent_table.each do |id, fields|
|
33
|
-
diference = table_length - fields.length
|
34
|
-
fields.concat(Array.new(diference,'-')) if diference > 0
|
35
|
-
end
|
36
21
|
|
37
|
-
|
22
|
+
##################################################################################################
|
23
|
+
## MAIN
|
24
|
+
##################################################################################################
|
38
25
|
|
39
|
-
|
40
|
-
|
41
|
-
|
26
|
+
files = load_files(ARGV)
|
27
|
+
merged = merge_files(files)
|
28
|
+
write_output_data(merged)
|
@@ -1,6 +1,15 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
|
+
ROOT_PATH = File.dirname(__FILE__)
|
4
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
3
5
|
require 'optparse'
|
6
|
+
require 'cmdtabs.rb'
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
#####################################################################
|
11
|
+
## OPTPARSE
|
12
|
+
######################################################################
|
4
13
|
|
5
14
|
options = {}
|
6
15
|
OptionParser.new do |opts|
|
@@ -16,44 +25,47 @@ OptionParser.new do |opts|
|
|
16
25
|
options[:index_file] = item
|
17
26
|
end
|
18
27
|
|
28
|
+
options[:output_file] = nil
|
29
|
+
opts.on("-o", "--output_file PATH", "Output file ") do |item|
|
30
|
+
options[:output_file] = item
|
31
|
+
end
|
32
|
+
|
19
33
|
options[:input_separator] = "\t"
|
20
34
|
opts.on("-s", "--input_separator STRING", "Separator character") do |item|
|
21
35
|
options[:input_separator] = item
|
22
36
|
end
|
23
37
|
|
24
38
|
options[:columns] = [1]
|
25
|
-
opts.on("-c", "--columns STRING", "Columns indexes, comma separated, to perform the ID translations.") do |item|
|
26
|
-
|
39
|
+
opts.on("-c", "--columns STRING", "Columns indexes (1 based), comma separated, to perform the ID translations.") do |item|
|
40
|
+
options[:columns] = parse_column_indices(sep = ",", item)
|
27
41
|
end
|
28
42
|
|
29
43
|
options[:from] = 0
|
30
44
|
opts.on("-f", "--from INTEGER", "Column in index file to take reference value. Default 1. Numeration is 1 based") do |item|
|
31
|
-
|
45
|
+
options[:from] = item.to_i - 1
|
32
46
|
end
|
33
47
|
|
34
48
|
options[:to] = 1
|
35
49
|
opts.on("-t", "--to INTEGER", "Column in index file to take the value that will be used in substitution. Default 2. Numeration is 1 based") do |item|
|
36
|
-
|
50
|
+
options[:to] = item.to_i - 1
|
37
51
|
end
|
38
52
|
|
53
|
+
options[:remove_untranslated] = false
|
54
|
+
opts.on("-u", "--remove_untranslated", "Activate this flag for remove the untranslated entries") do
|
55
|
+
options[:remove_untranslated] = true
|
56
|
+
end
|
39
57
|
end.parse!
|
40
58
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
new_string = index[fields[col]]
|
55
|
-
fields[col] = new_string if !new_string.nil?
|
56
|
-
end
|
57
|
-
f.puts fields.join(options[:input_separator])
|
58
|
-
end
|
59
|
-
end
|
59
|
+
|
60
|
+
##################################################################################################
|
61
|
+
## MAIN
|
62
|
+
##################################################################################################
|
63
|
+
|
64
|
+
input_index = load_input_data(options[:index_file])
|
65
|
+
translation_index = index_array(input_index, options[:from], options[:to])
|
66
|
+
|
67
|
+
input_table = load_input_data(options[:input_file], options[:input_separator])
|
68
|
+
|
69
|
+
tabular_output_translated, _ = name_replaces(input_table, options[:input_separator], options[:columns], translation_index, options[:remove_untranslated])
|
70
|
+
|
71
|
+
write_output_data(tabular_output_translated, options[:output_file], options[:input_separator])
|