mddb 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/bin/aggregater +71 -0
  2. data/bin/average +59 -0
  3. data/bin/binner +95 -0
  4. data/lib/mddb/version.rb +1 -1
  5. metadata +16 -10
@@ -0,0 +1,71 @@
1
+ #!/usr/bin/env ruby
2
+ require "#{__FILE__}/../stat"
3
+ require 'micro-optparse'
4
+
5
+ @options = Parser.new do |p|
6
+ p.banner = "Binning, see below for options"
7
+ p.version = "0.01"
8
+ p.option :live, "print the live status", :default => false
9
+ p.option :header, "use if the stream contains a header", :default => false
10
+ p.option :delimiter, "specify the delimiting character", :default => "tsv", :value_in_set => ['tsv', 'csv', 'ssv']
11
+ p.option :drop, "Drop lines that are poorly formatted, instead of failing", :default => false
12
+ p.option :keys, "Number of columns to use as keys", :default => 1, :value_satisfies => lambda {|x| x > 0 }
13
+ end.process!
14
+
15
+ def header?
16
+ @options[:header] && ARGF.lineno == 1
17
+ end
18
+
19
+ catch :shit_stream do
20
+ input = 0
21
+ output = 0
22
+ stats = Stats.new "Aggregate", :input, :output, :ratio, :elapsed
23
+ last = @options[:keys] - 1
24
+ old_key = nil
25
+ totals = []
26
+ ARGF.each do |line|
27
+ stats.clock if ARGF.lineno == 1
28
+ input += 1
29
+ line.chomp!
30
+ if header?
31
+ puts line
32
+ output += 1
33
+ else
34
+ cols = line.split("\t")
35
+ cols = cols[0..(last)] +cols[(last+1)..(cols.count-1)].map {|d| d.to_f}
36
+
37
+ # Set the key
38
+ key = cols[0..last]
39
+
40
+ # First time round we set the old key to equal the new one
41
+ if old_key.nil?
42
+ totals = Array.new((cols.count-1-last),0)
43
+ old_key = key
44
+ end
45
+
46
+ # If the key is new, print the old key and its aggregates,
47
+ # otherwise aggregate the total for this key
48
+ if key == old_key
49
+ (cols.count - last - 1).times do |i|
50
+ totals[i] += cols[i+last+1]
51
+ end
52
+ end
53
+
54
+ if key != old_key
55
+ puts old_key.map {|k| k.to_s}.join("\t") + "\t" + totals.map {|d| d.to_s}.join("\t")
56
+ old_key = key
57
+ # Reset the total
58
+ totals = Array.new((cols.count-1-last),0)
59
+ (cols.count - last - 1).times do |i|
60
+ totals[i] += cols[i+last+1]
61
+ end
62
+ output += 1
63
+ end
64
+
65
+ stats.show(input,output) if @options[:live]
66
+ end
67
+ end
68
+ output += 1
69
+ puts old_key.map {|k| k.to_s}.join("\t") + "\t" + totals.map {|d| d.to_s}.join("\t")
70
+ stats.finish(input,output)
71
+ end
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ require "#{__FILE__}/../stat"
3
+ require 'micro-optparse'
4
+
5
+ valid_columns = lambda {|x|
6
+ valid = true
7
+ x.each {|d|
8
+ valid = false unless d.to_i > 0
9
+ }
10
+ valid
11
+ }
12
+
13
+ @options = Parser.new do |p|
14
+ p.banner = "Average, see below for options"
15
+ p.version = "0.01"
16
+ p.option :live, "print the live status", :default => false
17
+ p.option :header, "use if the stream contains a header", :default => false
18
+ p.option :delimiter, "specify the delimiting character", :default => "tsv", :value_in_set => ['tsv', 'csv', 'ssv']
19
+ p.option :drop, "Drop lines that are poorly formatted, instead of failing", :default => false
20
+ p.option :columns, "The columns whose average you wish to obtain, columns are integers starting at 1", :default => [], :value_satisfies => valid_columns
21
+ end.process!
22
+
23
+ def header?
24
+ @options[:header] && ARGF.lineno == 1
25
+ end
26
+
27
+ columns = @options[:columns].map {|d| d.to_i - 1}
28
+ max_col = columns.max
29
+ input = 0.0
30
+ output = 0.0
31
+ stat = Stats.new("Average", :input, :output, :elapsed)
32
+ catch :shit_happened do
33
+ options = []
34
+
35
+ ARGF.each do |line|
36
+ stat.clock if ARGF.lineno == 1
37
+ input += 1
38
+ if header?
39
+ puts line
40
+ output += 1
41
+ else
42
+ cols = line.chomp.split("\t").map {|d| d.to_f}
43
+
44
+ if (max_col + 1) > cols.length
45
+ puts "There are only #{cols.length} columns, but you said there was a column #{max_col + 1}"
46
+ throw :shit_happened
47
+ end
48
+
49
+ columns.each do |option|
50
+ cols[option] = cols[option]/cols.last
51
+ end
52
+
53
+ puts cols.map {|c| c.to_s}.join("\t")
54
+ output += 1
55
+ stat.show(input,output) if @options[:live]
56
+ end
57
+ end
58
+ end
59
+ stat.finish(input,output)
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env ruby
2
+ require 'micro-optparse'
3
+ require "#{__FILE__}/../stat.rb"
4
+
5
+ class String
6
+ def is_numeric?
7
+ true if Float(self) rescue false
8
+ end
9
+ end
10
+
11
+ all_are_numeric = lambda {|x|
12
+ valid = true
13
+ x.each {|x| valid = false unless x.is_numeric?}
14
+ x.each {|x| valid = false unless x.to_f >= 0 }
15
+ valid
16
+ }
17
+
18
+ @options = Parser.new do |p|
19
+ p.banner = "Binning, see below for options"
20
+ p.version = "0.01"
21
+ p.option :live, "print the live status", :default => false
22
+ p.option :header, "use if the stream contains a header", :default => false
23
+ p.option :delimiter, "specify the delimiting character", :default => "tsv", :value_in_set => ['tsv', 'csv', 'ssv']
24
+ p.option :emit, "Emit 1 for map-reduce", :default => false
25
+ p.option :drop, "Drop lines that are poorly formatted, instead of failing", :default => false
26
+ p.option :bins, "bin width for each column, a width of 0 leaves a column unchanged", :default => [],
27
+ :value_satisfies => all_are_numeric
28
+ end.process!
29
+
30
+ @options[:bins].map! {|d| d.to_f}
31
+
32
+ @delimiters = {'tsv' => "\t", 'csv' => ",", 'ssv' => " "}
33
+
34
+ @delimiter = @delimiters[@options[:delimiter]]
35
+
36
+ def header?
37
+ @options[:header] && ARGF.lineno == 1
38
+ end
39
+
40
+ def bin value, width
41
+ if width == 0.0
42
+ value
43
+ else
44
+ ((value/width).floor * width).round(6)
45
+ end
46
+ end
47
+
48
+ def emit
49
+ @options[:emit]? @delimiter + "1" : ""
50
+ end
51
+
52
+ def emit_header
53
+ @options[:emit]? @delimiter + "count" : ""
54
+ end
55
+
56
+ def columns line
57
+ line.split(@delimiter)
58
+ end
59
+
60
+ def drop? line
61
+ if line.length != @options[:bins].length
62
+ if @options[:drop]
63
+ true
64
+ else
65
+ $stderr.puts "There were #{line.length} columns on line #{ARGF.lineno}, but you gave #{@options[:bins].length} bins"
66
+ $stderr.puts "Perhaps this wasn't a #{@options[:delimiter]}. Try the -d --help for a list of filetypes"
67
+ $stderr.puts "If you think that some lines might be an issue, use the -r flag to drop bad lines"
68
+ throw :dropped
69
+ end
70
+ else
71
+ false
72
+ end
73
+ end
74
+
75
+ catch :dropped do
76
+ input = 0
77
+ output = 0
78
+ stats = Stats.new "Binning", :input, :output, :dropped, :elapsed
79
+ ARGF.each do |line|
80
+ input += 1
81
+ row = line.chomp.split(@delimiter)
82
+ unless drop? row
83
+ if header?
84
+ puts "#" + line.chomp + emit_header
85
+ else
86
+ row = row.map {|d| d.to_f}
87
+ binned = row.each_with_index.map {|d,i| bin(d,@options[:bins][i])}
88
+ puts binned.join(@delimiter) + emit
89
+ end
90
+ output += 1
91
+ end
92
+ stats.show(input,output) if @options[:live]
93
+ end
94
+ stats.finish(input,output)
95
+ end
@@ -1,3 +1,3 @@
1
1
  module Mddb
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mddb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-15 00:00:00.000000000Z
12
+ date: 2012-02-16 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &70212474145840 !ruby/object:Gem::Requirement
16
+ requirement: &70129250480800 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70212474145840
24
+ version_requirements: *70129250480800
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: mongo_mapper
27
- requirement: &70212474145420 !ruby/object:Gem::Requirement
27
+ requirement: &70129250480340 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70212474145420
35
+ version_requirements: *70129250480340
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hirb
38
- requirement: &70212474145000 !ruby/object:Gem::Requirement
38
+ requirement: &70129250479880 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70212474145000
46
+ version_requirements: *70129250479880
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: thor
49
- requirement: &70212474144540 !ruby/object:Gem::Requirement
49
+ requirement: &70129250479340 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,11 +54,14 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70212474144540
57
+ version_requirements: *70129250479340
58
58
  description: MDDB makes analysing molecular dynamics simulations easy
59
59
  email:
60
60
  - thom.mulvaney@gmail.com
61
61
  executables:
62
+ - aggregater
63
+ - average
64
+ - binner
62
65
  - mddb
63
66
  extensions: []
64
67
  extra_rdoc_files: []
@@ -66,6 +69,9 @@ files:
66
69
  - .gitignore
67
70
  - Gemfile
68
71
  - Rakefile
72
+ - bin/aggregater
73
+ - bin/average
74
+ - bin/binner
69
75
  - bin/mddb
70
76
  - config.yml
71
77
  - lib/mddb.rb