mddb 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/aggregater +71 -0
- data/bin/average +59 -0
- data/bin/binner +95 -0
- data/lib/mddb/version.rb +1 -1
- metadata +16 -10
data/bin/aggregater
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "#{__FILE__}/../stat"
|
3
|
+
require 'micro-optparse'
|
4
|
+
|
5
|
+
@options = Parser.new do |p|
|
6
|
+
p.banner = "Binning, see below for options"
|
7
|
+
p.version = "0.01"
|
8
|
+
p.option :live, "print the live status", :default => false
|
9
|
+
p.option :header, "use if the stream contains a header", :default => false
|
10
|
+
p.option :delimiter, "specify the delimiting character", :default => "tsv", :value_in_set => ['tsv', 'csv', 'ssv']
|
11
|
+
p.option :drop, "Drop lines that are poorly formatted, instead of failing", :default => false
|
12
|
+
p.option :keys, "Number of columns to use as keys", :default => 1, :value_satisfies => lambda {|x| x > 0 }
|
13
|
+
end.process!
|
14
|
+
|
15
|
+
def header?
|
16
|
+
@options[:header] && ARGF.lineno == 1
|
17
|
+
end
|
18
|
+
|
19
|
+
catch :shit_stream do
|
20
|
+
input = 0
|
21
|
+
output = 0
|
22
|
+
stats = Stats.new "Aggregate", :input, :output, :ratio, :elapsed
|
23
|
+
last = @options[:keys] - 1
|
24
|
+
old_key = nil
|
25
|
+
totals = []
|
26
|
+
ARGF.each do |line|
|
27
|
+
stats.clock if ARGF.lineno == 1
|
28
|
+
input += 1
|
29
|
+
line.chomp!
|
30
|
+
if header?
|
31
|
+
puts line
|
32
|
+
output += 1
|
33
|
+
else
|
34
|
+
cols = line.split("\t")
|
35
|
+
cols = cols[0..(last)] +cols[(last+1)..(cols.count-1)].map {|d| d.to_f}
|
36
|
+
|
37
|
+
# Set the key
|
38
|
+
key = cols[0..last]
|
39
|
+
|
40
|
+
# First time round we set the old key to equal the new one
|
41
|
+
if old_key.nil?
|
42
|
+
totals = Array.new((cols.count-1-last),0)
|
43
|
+
old_key = key
|
44
|
+
end
|
45
|
+
|
46
|
+
# If the key is new, print the old key and its aggregates,
|
47
|
+
# otherwise aggregate the total for this key
|
48
|
+
if key == old_key
|
49
|
+
(cols.count - last - 1).times do |i|
|
50
|
+
totals[i] += cols[i+last+1]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
if key != old_key
|
55
|
+
puts old_key.map {|k| k.to_s}.join("\t") + "\t" + totals.map {|d| d.to_s}.join("\t")
|
56
|
+
old_key = key
|
57
|
+
# Reset the total
|
58
|
+
totals = Array.new((cols.count-1-last),0)
|
59
|
+
(cols.count - last - 1).times do |i|
|
60
|
+
totals[i] += cols[i+last+1]
|
61
|
+
end
|
62
|
+
output += 1
|
63
|
+
end
|
64
|
+
|
65
|
+
stats.show(input,output) if @options[:live]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
output += 1
|
69
|
+
puts old_key.map {|k| k.to_s}.join("\t") + "\t" + totals.map {|d| d.to_s}.join("\t")
|
70
|
+
stats.finish(input,output)
|
71
|
+
end
|
data/bin/average
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "#{__FILE__}/../stat"
|
3
|
+
require 'micro-optparse'
|
4
|
+
|
5
|
+
valid_columns = lambda {|x|
|
6
|
+
valid = true
|
7
|
+
x.each {|d|
|
8
|
+
valid = false unless d.to_i > 0
|
9
|
+
}
|
10
|
+
valid
|
11
|
+
}
|
12
|
+
|
13
|
+
@options = Parser.new do |p|
|
14
|
+
p.banner = "Average, see below for options"
|
15
|
+
p.version = "0.01"
|
16
|
+
p.option :live, "print the live status", :default => false
|
17
|
+
p.option :header, "use if the stream contains a header", :default => false
|
18
|
+
p.option :delimiter, "specify the delimiting character", :default => "tsv", :value_in_set => ['tsv', 'csv', 'ssv']
|
19
|
+
p.option :drop, "Drop lines that are poorly formatted, instead of failing", :default => false
|
20
|
+
p.option :columns, "The columns whose average you wish to obtain, columns are integers starting at 1", :default => [], :value_satisfies => valid_columns
|
21
|
+
end.process!
|
22
|
+
|
23
|
+
def header?
|
24
|
+
@options[:header] && ARGF.lineno == 1
|
25
|
+
end
|
26
|
+
|
27
|
+
columns = @options[:columns].map {|d| d.to_i - 1}
|
28
|
+
max_col = columns.max
|
29
|
+
input = 0.0
|
30
|
+
output = 0.0
|
31
|
+
stat = Stats.new("Average", :input, :output, :elapsed)
|
32
|
+
catch :shit_happened do
|
33
|
+
options = []
|
34
|
+
|
35
|
+
ARGF.each do |line|
|
36
|
+
stat.clock if ARGF.lineno == 1
|
37
|
+
input += 1
|
38
|
+
if header?
|
39
|
+
puts line
|
40
|
+
output += 1
|
41
|
+
else
|
42
|
+
cols = line.chomp.split("\t").map {|d| d.to_f}
|
43
|
+
|
44
|
+
if (max_col + 1) > cols.length
|
45
|
+
puts "There are only #{cols.length} columns, but you said there was a column #{max_col + 1}"
|
46
|
+
throw :shit_happened
|
47
|
+
end
|
48
|
+
|
49
|
+
columns.each do |option|
|
50
|
+
cols[option] = cols[option]/cols.last
|
51
|
+
end
|
52
|
+
|
53
|
+
puts cols.map {|c| c.to_s}.join("\t")
|
54
|
+
output += 1
|
55
|
+
stat.show(input,output) if @options[:live]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
stat.finish(input,output)
|
data/bin/binner
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'micro-optparse'
|
3
|
+
require "#{__FILE__}/../stat.rb"
|
4
|
+
|
5
|
+
class String
|
6
|
+
def is_numeric?
|
7
|
+
true if Float(self) rescue false
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
all_are_numeric = lambda {|x|
|
12
|
+
valid = true
|
13
|
+
x.each {|x| valid = false unless x.is_numeric?}
|
14
|
+
x.each {|x| valid = false unless x.to_f >= 0 }
|
15
|
+
valid
|
16
|
+
}
|
17
|
+
|
18
|
+
@options = Parser.new do |p|
|
19
|
+
p.banner = "Binning, see below for options"
|
20
|
+
p.version = "0.01"
|
21
|
+
p.option :live, "print the live status", :default => false
|
22
|
+
p.option :header, "use if the stream contains a header", :default => false
|
23
|
+
p.option :delimiter, "specify the delimiting character", :default => "tsv", :value_in_set => ['tsv', 'csv', 'ssv']
|
24
|
+
p.option :emit, "Emit 1 for map-reduce", :default => false
|
25
|
+
p.option :drop, "Drop lines that are poorly formatted, instead of failing", :default => false
|
26
|
+
p.option :bins, "bin width for each column, a width of 0 leaves a column unchanged", :default => [],
|
27
|
+
:value_satisfies => all_are_numeric
|
28
|
+
end.process!
|
29
|
+
|
30
|
+
@options[:bins].map! {|d| d.to_f}
|
31
|
+
|
32
|
+
@delimiters = {'tsv' => "\t", 'csv' => ",", 'ssv' => " "}
|
33
|
+
|
34
|
+
@delimiter = @delimiters[@options[:delimiter]]
|
35
|
+
|
36
|
+
def header?
|
37
|
+
@options[:header] && ARGF.lineno == 1
|
38
|
+
end
|
39
|
+
|
40
|
+
def bin value, width
|
41
|
+
if width == 0.0
|
42
|
+
value
|
43
|
+
else
|
44
|
+
((value/width).floor * width).round(6)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def emit
|
49
|
+
@options[:emit]? @delimiter + "1" : ""
|
50
|
+
end
|
51
|
+
|
52
|
+
def emit_header
|
53
|
+
@options[:emit]? @delimiter + "count" : ""
|
54
|
+
end
|
55
|
+
|
56
|
+
def columns line
|
57
|
+
line.split(@delimiter)
|
58
|
+
end
|
59
|
+
|
60
|
+
def drop? line
|
61
|
+
if line.length != @options[:bins].length
|
62
|
+
if @options[:drop]
|
63
|
+
true
|
64
|
+
else
|
65
|
+
$stderr.puts "There were #{line.length} columns on line #{ARGF.lineno}, but you gave #{@options[:bins].length} bins"
|
66
|
+
$stderr.puts "Perhaps this wasn't a #{@options[:delimiter]}. Try the -d --help for a list of filetypes"
|
67
|
+
$stderr.puts "If you think that some lines might be an issue, use the -r flag to drop bad lines"
|
68
|
+
throw :dropped
|
69
|
+
end
|
70
|
+
else
|
71
|
+
false
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
catch :dropped do
|
76
|
+
input = 0
|
77
|
+
output = 0
|
78
|
+
stats = Stats.new "Binning", :input, :output, :dropped, :elapsed
|
79
|
+
ARGF.each do |line|
|
80
|
+
input += 1
|
81
|
+
row = line.chomp.split(@delimiter)
|
82
|
+
unless drop? row
|
83
|
+
if header?
|
84
|
+
puts "#" + line.chomp + emit_header
|
85
|
+
else
|
86
|
+
row = row.map {|d| d.to_f}
|
87
|
+
binned = row.each_with_index.map {|d,i| bin(d,@options[:bins][i])}
|
88
|
+
puts binned.join(@delimiter) + emit
|
89
|
+
end
|
90
|
+
output += 1
|
91
|
+
end
|
92
|
+
stats.show(input,output) if @options[:live]
|
93
|
+
end
|
94
|
+
stats.finish(input,output)
|
95
|
+
end
|
data/lib/mddb/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mddb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-16 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &70129250480800 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70129250480800
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: mongo_mapper
|
27
|
-
requirement: &
|
27
|
+
requirement: &70129250480340 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70129250480340
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hirb
|
38
|
-
requirement: &
|
38
|
+
requirement: &70129250479880 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70129250479880
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: thor
|
49
|
-
requirement: &
|
49
|
+
requirement: &70129250479340 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,11 +54,14 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70129250479340
|
58
58
|
description: MDDB makes analysing molecular dynamics simulations easy
|
59
59
|
email:
|
60
60
|
- thom.mulvaney@gmail.com
|
61
61
|
executables:
|
62
|
+
- aggregater
|
63
|
+
- average
|
64
|
+
- binner
|
62
65
|
- mddb
|
63
66
|
extensions: []
|
64
67
|
extra_rdoc_files: []
|
@@ -66,6 +69,9 @@ files:
|
|
66
69
|
- .gitignore
|
67
70
|
- Gemfile
|
68
71
|
- Rakefile
|
72
|
+
- bin/aggregater
|
73
|
+
- bin/average
|
74
|
+
- bin/binner
|
69
75
|
- bin/mddb
|
70
76
|
- config.yml
|
71
77
|
- lib/mddb.rb
|