cmdtabs 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/Rakefile +9 -4
- data/TEST_file +1 -0
- data/bin/aggregate_column_data.rb +20 -25
- data/bin/column_filter.rb +87 -0
- data/bin/create_metric_table.rb +35 -47
- data/bin/desaggregate_column_data.rb +18 -17
- data/bin/excel_to_tabular.rb +57 -0
- data/bin/intersect_columns.rb +29 -28
- data/bin/merge_tabular.rb +21 -34
- data/bin/standard_name_replacer.rb +35 -23
- data/bin/table_linker.rb +56 -29
- data/bin/tag_table.rb +52 -0
- data/cmdtabs.gemspec +3 -1
- data/data_test_scripts/cluster_genes_dis_AGG_stdin_to_test +2 -0
- data/data_test_scripts/cluster_genes_dis_AGG_to_test +2 -0
- data/data_test_scripts/cluster_genes_dis_DESAGG_stdin_to_test +2 -0
- data/data_test_scripts/cluster_genes_dis_DESAGG_to_test +2 -0
- data/data_test_scripts/cluster_genes_from_excel.txt_to_test +2 -0
- data/data_test_scripts/column_matching_hard_to_test +3 -0
- data/data_test_scripts/column_matching_hard_various_keys_and_every_columns_header_to_test +3 -0
- data/data_test_scripts/column_matching_hard_various_keys_and_every_columns_to_test +2 -0
- data/data_test_scripts/column_matching_hard_various_keys_and_some_columns_to_test +10 -0
- data/data_test_scripts/column_matching_hard_various_keys_to_test +7 -0
- data/data_test_scripts/column_matching_soft_1_column_reverse_to_test +21 -0
- data/data_test_scripts/column_matching_soft_1_column_to_test +29 -0
- data/data_test_scripts/column_matching_soft_1_column_uniq_to_test +12 -0
- data/data_test_scripts/column_matching_soft_and_every_columns_to_test +22 -0
- data/data_test_scripts/column_matching_soft_and_some_columns_to_test +29 -0
- data/data_test_scripts/column_matching_soft_to_test +29 -0
- data/data_test_scripts/intersect_columns_count_to_test +3 -0
- data/data_test_scripts/intersect_columns_default_stdin_a_to_test +5 -0
- data/data_test_scripts/intersect_columns_default_stdin_b_to_test +5 -0
- data/data_test_scripts/intersect_columns_default_to_test +5 -0
- data/data_test_scripts/intersect_columns_full_to_test +5 -0
- data/data_test_scripts/linked_table_2_to_test +7 -0
- data/data_test_scripts/linked_table_matches_to_test +7 -0
- data/data_test_scripts/linked_table_to_test +12 -0
- data/data_test_scripts/merge_disease_cluster_gene_to_test +12 -0
- data/data_test_scripts/metric_table_to_test +3 -0
- data/data_test_scripts/replaced_name_to_test +12 -0
- data/data_test_scripts/replaced_name_untranstaled_to_test +7 -0
- data/data_test_scripts/tag_table_header_to_test +8 -0
- data/data_test_scripts/tag_table_to_test +8 -0
- data/lib/cmdtabs/cmdtabs_lib.rb +351 -0
- data/lib/cmdtabs/version.rb +1 -1
- data/lib/cmdtabs.rb +2 -0
- data/test_scripts.sh +88 -0
- metadata +59 -7
- data/bin/table_header.rb +0 -207
data/bin/table_header.rb
DELETED
@@ -1,207 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'find'
|
4
|
-
require 'optparse'
|
5
|
-
|
6
|
-
#################################################################################################
|
7
|
-
## FUNCTIONS
|
8
|
-
#################################################################################################
|
9
|
-
def parse_cols(col_string)
|
10
|
-
cols = col_string.split(',').map{|col| col.to_i}
|
11
|
-
return cols
|
12
|
-
end
|
13
|
-
|
14
|
-
def build_pattern(col_filter, keywords)
|
15
|
-
pattern = {}
|
16
|
-
if col_filter.nil? || keywords.nil?
|
17
|
-
else
|
18
|
-
keys_per_col = keywords.split('%')
|
19
|
-
if keys_per_col.length != col_filter.length
|
20
|
-
puts 'Number of keywords not equal to number of filtering columns'
|
21
|
-
Process.exit
|
22
|
-
end
|
23
|
-
col_filter.each_with_index do |col, i|
|
24
|
-
pattern[col] = keys_per_col[i].split('&')
|
25
|
-
end
|
26
|
-
end
|
27
|
-
return pattern
|
28
|
-
end
|
29
|
-
|
30
|
-
def match(string, key, match_mode)
|
31
|
-
match = false
|
32
|
-
if string.nil?
|
33
|
-
match = false
|
34
|
-
elsif match_mode == 'i'
|
35
|
-
match = string.include?(key)
|
36
|
-
elsif match_mode == 'c'
|
37
|
-
if string == key
|
38
|
-
match = true
|
39
|
-
end
|
40
|
-
end
|
41
|
-
return match
|
42
|
-
end
|
43
|
-
|
44
|
-
def filter(header, pattern, search_mode, match_mode, reverse = false)
|
45
|
-
filter = false
|
46
|
-
pattern.each do |col,keys|
|
47
|
-
match = false
|
48
|
-
keys.each do |key|
|
49
|
-
if match(header[col], key, match_mode)
|
50
|
-
match =true
|
51
|
-
end
|
52
|
-
end
|
53
|
-
if match
|
54
|
-
if search_mode == 's'
|
55
|
-
filter = false
|
56
|
-
break
|
57
|
-
end
|
58
|
-
elsif !match && search_mode == 'c'
|
59
|
-
filter = true
|
60
|
-
break
|
61
|
-
elsif !match
|
62
|
-
filter = true
|
63
|
-
end
|
64
|
-
end
|
65
|
-
if reverse
|
66
|
-
filter = !filter
|
67
|
-
end
|
68
|
-
return filter
|
69
|
-
end
|
70
|
-
|
71
|
-
def check_file(file, names, options, pattern)
|
72
|
-
if file == '-'
|
73
|
-
input = STDIN
|
74
|
-
else
|
75
|
-
input = File.open(file)
|
76
|
-
end
|
77
|
-
relations = relations(options[:column])
|
78
|
-
input.read.each_line do |line|
|
79
|
-
line.chomp!
|
80
|
-
header = line.split(options[:separator])
|
81
|
-
if pattern.nil? || !filter(header, pattern, options[:search_mode], options[:match_mode], options[:reverse])
|
82
|
-
options[:column].each do |col|
|
83
|
-
if !options[:check_uniq] || !names[relations[col]].include?(header[col])
|
84
|
-
names[relations[col]] << header[col]
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
return names
|
90
|
-
end
|
91
|
-
|
92
|
-
def relations(column)
|
93
|
-
relations = {}
|
94
|
-
column.each_with_index do |col,i|
|
95
|
-
relations[col] = i
|
96
|
-
end
|
97
|
-
return relations
|
98
|
-
end
|
99
|
-
|
100
|
-
def report(names)
|
101
|
-
n_col = names.length
|
102
|
-
names.first.length.times do |y|
|
103
|
-
n_col.times do |x|
|
104
|
-
string = "#{names[x][y]}"
|
105
|
-
if x < n_col-1
|
106
|
-
string << "\t"
|
107
|
-
end
|
108
|
-
print string
|
109
|
-
end
|
110
|
-
puts
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
#################################################################################################
|
115
|
-
## INPUT PARSING
|
116
|
-
#################################################################################################
|
117
|
-
options = {}
|
118
|
-
|
119
|
-
optparse = OptionParser.new do |opts|
|
120
|
-
options[:table_file] = nil
|
121
|
-
opts.on( '-t', '--table_file FILE', 'Input tabulated file' ) do |table_file|
|
122
|
-
options[:table_file] = table_file
|
123
|
-
end
|
124
|
-
|
125
|
-
options[:column] = [0]
|
126
|
-
opts.on( '-c', '--column STRING', 'Column/s to show. Format: x,y,z..' ) do |column|
|
127
|
-
options[:column] = parse_cols(column)
|
128
|
-
end
|
129
|
-
|
130
|
-
options[:col_filter] = nil
|
131
|
-
opts.on( '-f', '--col_filter STRING', 'Select columns where search keywords. Format: x,y,z..' ) do |col_filter|
|
132
|
-
options[:col_filter] = parse_cols(col_filter)
|
133
|
-
end
|
134
|
-
|
135
|
-
options[:keywords] = nil
|
136
|
-
opts.on( '-k', '--keywords STRING', 'Keywords for select rows. Format: key1_col1&key2_col1%key1_col2&key2_col2' ) do |keywords|
|
137
|
-
options[:keywords] = keywords
|
138
|
-
end
|
139
|
-
|
140
|
-
options[:search_mode] = 'c'
|
141
|
-
opts.on( '-s', '--search STRING', 'c a match per column, s some match in some column. Default c' ) do |search_mode|
|
142
|
-
options[:search_mode] = search_mode
|
143
|
-
end
|
144
|
-
|
145
|
-
options[:match_mode] = 'i'
|
146
|
-
opts.on( '-m', '--match_mode STRING', 'i string must include the keyword, c for fullmatch. Default i') do |match_mode|
|
147
|
-
options[:match_mode] = match_mode
|
148
|
-
end
|
149
|
-
|
150
|
-
options[:separator] = "\t"
|
151
|
-
opts.on( '-p', '--separator STRING', 'Separator used in fields. Default i') do |separator|
|
152
|
-
options[:separator] = separator
|
153
|
-
end
|
154
|
-
|
155
|
-
options[:reverse] = false
|
156
|
-
opts.on( '-r', '--reverse', 'Select not matching' ) do
|
157
|
-
options[:reverse] = true
|
158
|
-
end
|
159
|
-
|
160
|
-
options[:uniq] = false
|
161
|
-
opts.on( '-u', '--uniq', 'Delete redundant items' ) do
|
162
|
-
options[:uniq] = true
|
163
|
-
end
|
164
|
-
|
165
|
-
# Set a banner, displayed at the top of the help screen.
|
166
|
-
opts.banner = "Usage: table_header.rb -t tabulated_file \n\n"
|
167
|
-
|
168
|
-
# This displays the help screen
|
169
|
-
opts.on( '-h', '--help', 'Display this screen' ) do
|
170
|
-
puts opts
|
171
|
-
exit
|
172
|
-
end
|
173
|
-
|
174
|
-
end # End opts
|
175
|
-
|
176
|
-
# parse options and remove from ARGV
|
177
|
-
optparse.parse!
|
178
|
-
|
179
|
-
##################################################################################################
|
180
|
-
## MAIN
|
181
|
-
##################################################################################################
|
182
|
-
if options[:table_file].nil?
|
183
|
-
puts 'Tabulated file not specified'
|
184
|
-
Process.exit
|
185
|
-
end
|
186
|
-
|
187
|
-
pattern = build_pattern(options[:col_filter], options[:keywords])
|
188
|
-
|
189
|
-
names = []
|
190
|
-
options[:column].length.times do
|
191
|
-
names << []
|
192
|
-
end
|
193
|
-
if options[:table_file].include?('*')
|
194
|
-
Find.find(Dir.pwd) do |path|
|
195
|
-
if FileTest.directory?(path)
|
196
|
-
next
|
197
|
-
else
|
198
|
-
if File.basename(path) =~ /#{options[:table_file]}/
|
199
|
-
names = check_file(path, names, options, pattern)
|
200
|
-
end
|
201
|
-
end
|
202
|
-
end
|
203
|
-
else
|
204
|
-
names = check_file(options[:table_file], names, options, pattern)
|
205
|
-
end
|
206
|
-
|
207
|
-
report(names)
|