mddb 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/aggregater +71 -0
- data/bin/average +59 -0
- data/bin/binner +95 -0
- data/lib/mddb/version.rb +1 -1
- metadata +16 -10
data/bin/aggregater
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "#{__FILE__}/../stat"
|
3
|
+
require 'micro-optparse'
|
4
|
+
|
5
|
+
@options = Parser.new do |p|
|
6
|
+
p.banner = "Binning, see below for options"
|
7
|
+
p.version = "0.01"
|
8
|
+
p.option :live, "print the live status", :default => false
|
9
|
+
p.option :header, "use if the stream contains a header", :default => false
|
10
|
+
p.option :delimiter, "specify the delimiting character", :default => "tsv", :value_in_set => ['tsv', 'csv', 'ssv']
|
11
|
+
p.option :drop, "Drop lines that are poorly formatted, instead of failing", :default => false
|
12
|
+
p.option :keys, "Number of columns to use as keys", :default => 1, :value_satisfies => lambda {|x| x > 0 }
|
13
|
+
end.process!
|
14
|
+
|
15
|
+
def header?
|
16
|
+
@options[:header] && ARGF.lineno == 1
|
17
|
+
end
|
18
|
+
|
19
|
+
catch :shit_stream do
|
20
|
+
input = 0
|
21
|
+
output = 0
|
22
|
+
stats = Stats.new "Aggregate", :input, :output, :ratio, :elapsed
|
23
|
+
last = @options[:keys] - 1
|
24
|
+
old_key = nil
|
25
|
+
totals = []
|
26
|
+
ARGF.each do |line|
|
27
|
+
stats.clock if ARGF.lineno == 1
|
28
|
+
input += 1
|
29
|
+
line.chomp!
|
30
|
+
if header?
|
31
|
+
puts line
|
32
|
+
output += 1
|
33
|
+
else
|
34
|
+
cols = line.split("\t")
|
35
|
+
cols = cols[0..(last)] +cols[(last+1)..(cols.count-1)].map {|d| d.to_f}
|
36
|
+
|
37
|
+
# Set the key
|
38
|
+
key = cols[0..last]
|
39
|
+
|
40
|
+
# First time round we set the old key to equal the new one
|
41
|
+
if old_key.nil?
|
42
|
+
totals = Array.new((cols.count-1-last),0)
|
43
|
+
old_key = key
|
44
|
+
end
|
45
|
+
|
46
|
+
# If the key is new, print the old key and its aggregates,
|
47
|
+
# otherwise aggregate the total for this key
|
48
|
+
if key == old_key
|
49
|
+
(cols.count - last - 1).times do |i|
|
50
|
+
totals[i] += cols[i+last+1]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
if key != old_key
|
55
|
+
puts old_key.map {|k| k.to_s}.join("\t") + "\t" + totals.map {|d| d.to_s}.join("\t")
|
56
|
+
old_key = key
|
57
|
+
# Reset the total
|
58
|
+
totals = Array.new((cols.count-1-last),0)
|
59
|
+
(cols.count - last - 1).times do |i|
|
60
|
+
totals[i] += cols[i+last+1]
|
61
|
+
end
|
62
|
+
output += 1
|
63
|
+
end
|
64
|
+
|
65
|
+
stats.show(input,output) if @options[:live]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
output += 1
|
69
|
+
puts old_key.map {|k| k.to_s}.join("\t") + "\t" + totals.map {|d| d.to_s}.join("\t")
|
70
|
+
stats.finish(input,output)
|
71
|
+
end
|
data/bin/average
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "#{__FILE__}/../stat"
|
3
|
+
require 'micro-optparse'
|
4
|
+
|
5
|
+
valid_columns = lambda {|x|
|
6
|
+
valid = true
|
7
|
+
x.each {|d|
|
8
|
+
valid = false unless d.to_i > 0
|
9
|
+
}
|
10
|
+
valid
|
11
|
+
}
|
12
|
+
|
13
|
+
@options = Parser.new do |p|
|
14
|
+
p.banner = "Average, see below for options"
|
15
|
+
p.version = "0.01"
|
16
|
+
p.option :live, "print the live status", :default => false
|
17
|
+
p.option :header, "use if the stream contains a header", :default => false
|
18
|
+
p.option :delimiter, "specify the delimiting character", :default => "tsv", :value_in_set => ['tsv', 'csv', 'ssv']
|
19
|
+
p.option :drop, "Drop lines that are poorly formatted, instead of failing", :default => false
|
20
|
+
p.option :columns, "The columns whose average you wish to obtain, columns are integers starting at 1", :default => [], :value_satisfies => valid_columns
|
21
|
+
end.process!
|
22
|
+
|
23
|
+
def header?
|
24
|
+
@options[:header] && ARGF.lineno == 1
|
25
|
+
end
|
26
|
+
|
27
|
+
columns = @options[:columns].map {|d| d.to_i - 1}
|
28
|
+
max_col = columns.max
|
29
|
+
input = 0.0
|
30
|
+
output = 0.0
|
31
|
+
stat = Stats.new("Average", :input, :output, :elapsed)
|
32
|
+
catch :shit_happened do
|
33
|
+
options = []
|
34
|
+
|
35
|
+
ARGF.each do |line|
|
36
|
+
stat.clock if ARGF.lineno == 1
|
37
|
+
input += 1
|
38
|
+
if header?
|
39
|
+
puts line
|
40
|
+
output += 1
|
41
|
+
else
|
42
|
+
cols = line.chomp.split("\t").map {|d| d.to_f}
|
43
|
+
|
44
|
+
if (max_col + 1) > cols.length
|
45
|
+
puts "There are only #{cols.length} columns, but you said there was a column #{max_col + 1}"
|
46
|
+
throw :shit_happened
|
47
|
+
end
|
48
|
+
|
49
|
+
columns.each do |option|
|
50
|
+
cols[option] = cols[option]/cols.last
|
51
|
+
end
|
52
|
+
|
53
|
+
puts cols.map {|c| c.to_s}.join("\t")
|
54
|
+
output += 1
|
55
|
+
stat.show(input,output) if @options[:live]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
stat.finish(input,output)
|
data/bin/binner
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'micro-optparse'
|
3
|
+
require "#{__FILE__}/../stat.rb"
|
4
|
+
|
5
|
+
class String
|
6
|
+
def is_numeric?
|
7
|
+
true if Float(self) rescue false
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
all_are_numeric = lambda {|x|
|
12
|
+
valid = true
|
13
|
+
x.each {|x| valid = false unless x.is_numeric?}
|
14
|
+
x.each {|x| valid = false unless x.to_f >= 0 }
|
15
|
+
valid
|
16
|
+
}
|
17
|
+
|
18
|
+
@options = Parser.new do |p|
|
19
|
+
p.banner = "Binning, see below for options"
|
20
|
+
p.version = "0.01"
|
21
|
+
p.option :live, "print the live status", :default => false
|
22
|
+
p.option :header, "use if the stream contains a header", :default => false
|
23
|
+
p.option :delimiter, "specify the delimiting character", :default => "tsv", :value_in_set => ['tsv', 'csv', 'ssv']
|
24
|
+
p.option :emit, "Emit 1 for map-reduce", :default => false
|
25
|
+
p.option :drop, "Drop lines that are poorly formatted, instead of failing", :default => false
|
26
|
+
p.option :bins, "bin width for each column, a width of 0 leaves a column unchanged", :default => [],
|
27
|
+
:value_satisfies => all_are_numeric
|
28
|
+
end.process!
|
29
|
+
|
30
|
+
@options[:bins].map! {|d| d.to_f}
|
31
|
+
|
32
|
+
@delimiters = {'tsv' => "\t", 'csv' => ",", 'ssv' => " "}
|
33
|
+
|
34
|
+
@delimiter = @delimiters[@options[:delimiter]]
|
35
|
+
|
36
|
+
def header?
|
37
|
+
@options[:header] && ARGF.lineno == 1
|
38
|
+
end
|
39
|
+
|
40
|
+
def bin value, width
|
41
|
+
if width == 0.0
|
42
|
+
value
|
43
|
+
else
|
44
|
+
((value/width).floor * width).round(6)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def emit
|
49
|
+
@options[:emit]? @delimiter + "1" : ""
|
50
|
+
end
|
51
|
+
|
52
|
+
def emit_header
|
53
|
+
@options[:emit]? @delimiter + "count" : ""
|
54
|
+
end
|
55
|
+
|
56
|
+
def columns line
|
57
|
+
line.split(@delimiter)
|
58
|
+
end
|
59
|
+
|
60
|
+
def drop? line
|
61
|
+
if line.length != @options[:bins].length
|
62
|
+
if @options[:drop]
|
63
|
+
true
|
64
|
+
else
|
65
|
+
$stderr.puts "There were #{line.length} columns on line #{ARGF.lineno}, but you gave #{@options[:bins].length} bins"
|
66
|
+
$stderr.puts "Perhaps this wasn't a #{@options[:delimiter]}. Try the -d --help for a list of filetypes"
|
67
|
+
$stderr.puts "If you think that some lines might be an issue, use the -r flag to drop bad lines"
|
68
|
+
throw :dropped
|
69
|
+
end
|
70
|
+
else
|
71
|
+
false
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
catch :dropped do
|
76
|
+
input = 0
|
77
|
+
output = 0
|
78
|
+
stats = Stats.new "Binning", :input, :output, :dropped, :elapsed
|
79
|
+
ARGF.each do |line|
|
80
|
+
input += 1
|
81
|
+
row = line.chomp.split(@delimiter)
|
82
|
+
unless drop? row
|
83
|
+
if header?
|
84
|
+
puts "#" + line.chomp + emit_header
|
85
|
+
else
|
86
|
+
row = row.map {|d| d.to_f}
|
87
|
+
binned = row.each_with_index.map {|d,i| bin(d,@options[:bins][i])}
|
88
|
+
puts binned.join(@delimiter) + emit
|
89
|
+
end
|
90
|
+
output += 1
|
91
|
+
end
|
92
|
+
stats.show(input,output) if @options[:live]
|
93
|
+
end
|
94
|
+
stats.finish(input,output)
|
95
|
+
end
|
data/lib/mddb/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mddb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-16 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &70129250480800 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70129250480800
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: mongo_mapper
|
27
|
-
requirement: &
|
27
|
+
requirement: &70129250480340 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70129250480340
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hirb
|
38
|
-
requirement: &
|
38
|
+
requirement: &70129250479880 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70129250479880
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: thor
|
49
|
-
requirement: &
|
49
|
+
requirement: &70129250479340 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,11 +54,14 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70129250479340
|
58
58
|
description: MDDB makes analysing molecular dynamics simulations easy
|
59
59
|
email:
|
60
60
|
- thom.mulvaney@gmail.com
|
61
61
|
executables:
|
62
|
+
- aggregater
|
63
|
+
- average
|
64
|
+
- binner
|
62
65
|
- mddb
|
63
66
|
extensions: []
|
64
67
|
extra_rdoc_files: []
|
@@ -66,6 +69,9 @@ files:
|
|
66
69
|
- .gitignore
|
67
70
|
- Gemfile
|
68
71
|
- Rakefile
|
72
|
+
- bin/aggregater
|
73
|
+
- bin/average
|
74
|
+
- bin/binner
|
69
75
|
- bin/mddb
|
70
76
|
- config.yml
|
71
77
|
- lib/mddb.rb
|