jeremyf-comma_pile 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ test/tmp/*
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Jeremy Friesen
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,52 @@
1
+ CommaPile
2
+ ==========
3
+
4
+ A simple gem for processing and aggregating CSV files. CommaPile builds a table that count
5
+
6
+ Config Options:
7
+
8
+ * **source** - What is the name of the source csv file [**REQUIRED**, **SINGUL**]
9
+ * **on** - Specify any number of columns [**REQUIRED**, **MULTIPLE**]
10
+ * **sum_on** - Specify a column that you want to accumulate given [**OPTIONAL**, **MULTIPLE**]
11
+ * **conditions** - Specify a lambda, key-value pair that must be met for line to be
12
+ part of the compilation [**OPTIONAL**, **MULTIPLE**]
13
+ * **output** - Specify a path to output the compiled data; In addition a raw file,
14
+ containing all rows that were used in the compilation, is generated. [**OPTIONAL**, **SINGLE**]
15
+ * **line_parser** - Specify a custom line parser to use; By default CommaPile::LineParser is used.
16
+ A custom line parser would allow for transformation of data during compilation. See
17
+ test/example\_line\_parser.rb [**OPTIONAL**, **SINGLE**]
18
+
19
+ Example
20
+ -------
21
+ File.open('/path/to/input.csv', 'w+') do |file|
22
+ file.puts %('Work', 'Build CommaPile',2009-09-12,2)
23
+ file.puts %('Work', 'Build CommaPile',2009-09-13,1)
24
+ file.puts %('Work', 'Chase Chickens', 2009-09-12,4)
25
+ file.puts %('Work', 'Read Developer Blogs',2009-09-13,1)
26
+ file.puts %('Home', 'Do Dishes', 2009-09-12, 0.5)
27
+ end
28
+
29
+ require 'comma_pile'
30
+
31
+ report = CommaPile.new do |config|
32
+ config.source = '/path/to/input.csv'
33
+ config.on << 0
34
+ config.on << 1
35
+ config.sum_on << 3
36
+ config.output = '/path/to/output.csv'
37
+ config.conditions = lambda {|r| r[1] == /^Chase/ }
38
+ end
39
+
40
+ report.generate!
41
+
42
+ report['Work'].counter == 3
43
+ report['Work'].sum[3] == 4
44
+ report['Work']['Build CommaPile'].counter == 2
45
+ report['Work']['Build CommaPile'].sum[3] == 3
46
+ report['Home'].sum[3] == 0.5
47
+
48
+
49
+ Copyright
50
+ ---------
51
+
52
+ Copyright (c) 2009 Jeremy Friesen. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "comma_pile"
8
+ gem.summary = %Q{Video Stats for an onstreammedia.com log}
9
+ gem.email = "jeremy.n.friesen@gmail.com"
10
+ gem.homepage = "http://github.com/jeremyf/comma_pile"
11
+ gem.authors = ["Jeremy Friesen"]
12
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
13
+ end
14
+
15
+ rescue LoadError
16
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
17
+ end
18
+
19
+ require 'rake/testtask'
20
+ Rake::TestTask.new(:test) do |test|
21
+ test.libs << 'lib' << 'test'
22
+ test.pattern = 'test/**/*_test.rb'
23
+ test.verbose = true
24
+ end
25
+
26
+ begin
27
+ require 'rcov/rcovtask'
28
+ Rcov::RcovTask.new do |test|
29
+ test.libs << 'test'
30
+ test.pattern = 'test/**/*_test.rb'
31
+ test.verbose = true
32
+ end
33
+ rescue LoadError
34
+ task :rcov do
35
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
36
+ end
37
+ end
38
+
39
+
40
+ task :default => :test
41
+
42
+ require 'rake/rdoctask'
43
+ Rake::RDocTask.new do |rdoc|
44
+ if File.exist?('VERSION.yml')
45
+ config = YAML.load(File.read('VERSION.yml'))
46
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
47
+ else
48
+ version = ""
49
+ end
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "comma_pile #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
56
+
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 1
4
+ :patch: 0
@@ -0,0 +1,53 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{comma_pile}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Jeremy Friesen"]
9
+ s.date = %q{2009-08-13}
10
+ s.email = %q{jeremy.n.friesen@gmail.com}
11
+ s.extra_rdoc_files = [
12
+ "LICENSE",
13
+ "README.markdown"
14
+ ]
15
+ s.files = [
16
+ ".document",
17
+ ".gitignore",
18
+ "LICENSE",
19
+ "README.markdown",
20
+ "Rakefile",
21
+ "VERSION.yml",
22
+ "comma_pile.gemspec",
23
+ "lib/comma_pile.rb",
24
+ "lib/comma_pile/config.rb",
25
+ "lib/comma_pile/line_parser.rb",
26
+ "lib/comma_pile/pivot_node.rb",
27
+ "lib/comma_pile/report.rb",
28
+ "test/comma_pile_test.rb",
29
+ "test/example_line_parser.rb",
30
+ "test/fixtures/report.csv",
31
+ "test/test_helper.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/jeremyf/comma_pile}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.4}
37
+ s.summary = %q{Video Stats for an onstreammedia.com log}
38
+ s.test_files = [
39
+ "test/comma_pile_test.rb",
40
+ "test/example_line_parser.rb",
41
+ "test/test_helper.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
49
+ else
50
+ end
51
+ else
52
+ end
53
+ end
@@ -0,0 +1,40 @@
1
+ module CommaPile
2
+ class Config
3
+ def initialize
4
+ @field_names = []
5
+ @sum = []
6
+ end
7
+ attr_writer :conditions, :input, :output, :line_parser
8
+
9
+ def sum_on; @sum; end
10
+ def sum_on=(value)
11
+ @sum = (@sum << value).flatten.uniq
12
+ end
13
+
14
+ def line_parser
15
+ @line_parser ||= CommaPile::LineParser
16
+ if @line_parser.respond_to?(:with) && @line_parser.method(:with).arity == 1
17
+ @line_parser
18
+ else
19
+ raise RuntimeError, "#{self.class.to_s}#line_parser must respond to :with and have an arity of 1. The line parser will receive an array of fields"
20
+ end
21
+ end
22
+
23
+ def output; @output; end
24
+
25
+ def input; @input || './file.csv'; end
26
+ alias_method :source, :input
27
+ alias_method :source=, :input=
28
+
29
+ def conditions; @conditions; end
30
+
31
+ def field_names; @field_names; end
32
+ def field_names=(value)
33
+ @field_names = (@field_names << value).flatten.uniq
34
+ end
35
+
36
+ alias_method :on, :field_names
37
+ alias_method :on=, :field_names=
38
+ end
39
+
40
+ end
@@ -0,0 +1,16 @@
1
+ module CommaPile
2
+ class LineParser
3
+ def self.with(line)
4
+ yield(new(line))
5
+ end
6
+
7
+ attr_reader :line
8
+ def initialize(line)
9
+ @line = line
10
+ end
11
+
12
+ def [](value)
13
+ line[value]
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,20 @@
1
+ module CommaPile
2
+ class PivotNode < Hash
3
+ attr_reader :sum
4
+ def initialize
5
+ @sum = {}
6
+ end
7
+
8
+ attr_writer :counter
9
+ def counter; @counter ||= 0; end
10
+
11
+ def inspect
12
+ "(counter: #{counter}; hash:#{super})"
13
+ end
14
+
15
+ def add_to(fieldname, value)
16
+ self.sum[fieldname] ||= 0
17
+ self.sum[fieldname] += value
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,111 @@
1
+ if RUBY_VERSION =~ /^1\.8/
2
+ require 'fastercsv'
3
+ CSV = FCSV
4
+ else
5
+ require 'csv'
6
+ end
7
+ require 'delegate'
8
+
9
+ module CommaPile
10
+ class Report < DelegateClass(Hash)
11
+ attr_reader :results
12
+
13
+ def initialize(config)
14
+ @config = config
15
+ @results = {}
16
+ super(@results)
17
+ end
18
+
19
+
20
+ def summary(entry = nil, parent_keys = [])
21
+ collector = ''
22
+ (entry || @results).each do |key, value|
23
+ if value.nil? || value.empty?
24
+ cells = [value.counter] + parent_keys + [key] + value.sum.values
25
+ collector += CSV.generate_line(cells.flatten)
26
+ else
27
+ collector += summary(value, parent_keys + [key])
28
+ end
29
+ end
30
+ collector
31
+ end
32
+
33
+ def to_stdout(index = 0, entry = nil)
34
+ (entry || @results).each do |key, value|
35
+ puts "#{"\t" * index}#{key}: #{value.counter}"
36
+ if value.respond_to?(:each)
37
+ to_stdout(index + 1, value) if value && !value.empty?
38
+ end
39
+ end
40
+ end
41
+
42
+
43
+ def generate!
44
+ if output
45
+ CSV.open(output.sub(/\.(\w+)$/, '.raw.\1'), 'w+') do |raw_csv|
46
+ @raw_csv = raw_csv
47
+ CSV.open(output, 'w+') do |parsed_csv|
48
+ @parsed_csv = parsed_csv
49
+ @parsed_csv << field_names.collect {|f| f.to_s }
50
+ process_input
51
+ end
52
+ end
53
+ else
54
+ process_input
55
+ end
56
+ end
57
+
58
+ protected
59
+ def field_names; @config.field_names; end
60
+ def input; @config.input; end
61
+ def conditions; @config.conditions; end
62
+ def output; @config.output; end
63
+ def line_parser; @config.line_parser; end
64
+ def sum_on_field_names; @config.sum_on; end
65
+
66
+ def process_input
67
+ CSV.foreach(input) do |line|
68
+ parse_line(line) do |record|
69
+ with_conditions_met_for(record) do
70
+ render_output_for(record, line)
71
+ accumulate_entry_for(record)
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ def register(line)
78
+ end
79
+
80
+ def render_output_for(record, line)
81
+ return nil unless output
82
+ @parsed_csv << field_names.inject([]) {|m,v| m << record[v]} if @parsed_csv
83
+ @raw_csv << line if @raw_csv
84
+ end
85
+
86
+ def accumulate_entry_for(record)
87
+ field_names.inject(self) do |mem, field_name|
88
+ key = record[field_name]
89
+ mem[key] ||= CommaPile::PivotNode.new
90
+ mem[key].counter += 1
91
+ sum_on_field_names.each do |sum_on_field_name|
92
+ mem[key].add_to(sum_on_field_name, record[sum_on_field_name])
93
+ end
94
+ mem[key]
95
+ end
96
+ end
97
+
98
+ def parse_line(line)
99
+ line_parser.with(line) do |record|
100
+ yield(record) if block_given?
101
+ end
102
+ end
103
+
104
+ def with_conditions_met_for(record)
105
+ yield and return unless conditions
106
+ yield and return if conditions.respond_to?(:call) && conditions.call(record)
107
+ yield and return if conditions.respond_to?(:all?) && conditions.all?{|(k,v)| record.send(k) =~ (v.is_a?(Regexp) ? v : /^#{Regexp.escape(v)}$/) }
108
+ end
109
+
110
+ end
111
+ end
data/lib/comma_pile.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'comma_pile/line_parser'
2
+ require 'comma_pile/pivot_node'
3
+ require 'comma_pile/report'
4
+ require 'comma_pile/config'
5
+
6
+ module CommaPile
7
+ def self.new
8
+ config = CommaPile::Config.new
9
+ yield(config)
10
+ CommaPile::Report.new(config)
11
+ end
12
+ end
@@ -0,0 +1,169 @@
1
+ require 'test_helper'
2
+ require 'example_line_parser'
3
+
4
+ class CommaPileTest < Test::Unit::TestCase
5
+ CSV_FILE_PATH = File.join(File.dirname(__FILE__), "../test/fixtures/report.csv")
6
+
7
+ def output_filename
8
+ @output_filename ||= File.expand_path(File.join(File.dirname(__FILE__), 'tmp/output.csv'))
9
+ end
10
+
11
+ should 'not require line_parser' do
12
+ report = CommaPile.new do |config|
13
+ config.source = CSV_FILE_PATH
14
+ config.on = [0]
15
+ end
16
+ report.generate!
17
+
18
+ assert_equal 3, report['129.74.105.126'].counter
19
+ end
20
+
21
+ # CSV_FILE_PATH = '/Users/jeremyf/Downloads/FullMonthlyRpt_Undame_7_2009.csv'
22
+ should 'have results takes a conditions option that is a hash with string value' do
23
+ report = CommaPile.new do |config|
24
+ config.line_parser = ExampleLineParser
25
+ config.source = CSV_FILE_PATH
26
+ config.on = [:project, :viewer_geolocation]
27
+ config.conditions = {:viewer_event => 'play'}
28
+ end
29
+ report.generate!
30
+
31
+
32
+ assert_equal 6, report['vocation'].counter
33
+ assert_equal 5, report['vocation']['off-campus'].counter
34
+ assert_equal 1, report['vocation']['on-campus'].counter
35
+ assert_equal 85, report['commencement'].counter
36
+ assert_equal 85, report['commencement']['off-campus'].counter
37
+ assert_equal 3, report['innovationpark'].counter
38
+ assert_equal 3, report['innovationpark']['off-campus'].counter
39
+ end
40
+
41
+ should 'have results takes a conditions option that is a hash with regular express' do
42
+ report = CommaPile.new do |config|
43
+ config.line_parser = ExampleLineParser
44
+ config.source = CSV_FILE_PATH
45
+ config.on << :project
46
+ config.on << :viewer_event
47
+ config.on << :viewer_geolocation
48
+ config.conditions = {:viewer_event => /(play|stop)/}
49
+ end
50
+ report.generate!
51
+
52
+
53
+ assert_equal 12, report['vocation'].counter
54
+ assert_equal 6, report['vocation']['play'].counter
55
+ assert_equal 5, report['vocation']['play']['off-campus'].counter
56
+ assert_equal 1, report['vocation']['play']['on-campus'].counter
57
+ assert_equal 6, report['vocation']['stop'].counter
58
+ assert_equal 5, report['vocation']['stop']['off-campus'].counter
59
+ assert_equal 1, report['vocation']['stop']['on-campus'].counter
60
+ end
61
+
62
+ should 'have results takes a conditions option that is a lambda' do
63
+ report = CommaPile.new do |config|
64
+ config.line_parser = ExampleLineParser
65
+ config.source = CSV_FILE_PATH
66
+ config.field_names << :project
67
+ config.on << :viewer_event
68
+ config.conditions = lambda {|r| r.viewer_event == 'play' || r.project == 'vocation'}
69
+ end
70
+ report.generate!
71
+
72
+ assert_equal 94, report['vocation'].counter
73
+ assert_equal 6, report['vocation']['play'].counter
74
+ assert_equal 6, report['vocation']['stop'].counter
75
+ assert_nil report['commencement']['stop']
76
+ assert_equal 85, report['commencement']['play'].counter
77
+ end
78
+
79
+ should 'have results that no options' do
80
+ report = CommaPile.new do |config|
81
+ config.line_parser = ExampleLineParser
82
+ config.source = CSV_FILE_PATH
83
+ config.on = [:project, :viewer_event, :viewer_geolocation]
84
+ config.conditions = lambda {|r| r.viewer_event == 'play' || r.project == 'vocation'}
85
+ end
86
+ report.generate!
87
+
88
+
89
+ assert_equal 94, report['vocation'].counter
90
+ assert_equal 6, report['vocation']['play'].counter
91
+ assert_equal 29, report['vocation']['pause'].counter
92
+ assert_equal 28, report['vocation']['unpause'].counter
93
+ assert_equal 25, report['vocation']['seek'].counter
94
+ assert_equal 6, report['vocation']['stop'].counter
95
+ end
96
+
97
+ should 'have results that output conditional matches to a file' do
98
+ report = CommaPile.new do |config|
99
+ config.line_parser = ExampleLineParser
100
+ config.input = CSV_FILE_PATH
101
+ config.on = :viewer_event
102
+ config.output = output_filename
103
+ config.conditions = {:viewer_event => 'play'}
104
+ end
105
+ report.generate!
106
+
107
+ File.readlines(output_filename).each do |line|
108
+ assert_match(/^(viewer_event|play)$/i, line)
109
+ @yielded = true
110
+ end
111
+ assert @yielded
112
+ end
113
+
114
+ should 'have sub-results that add up to parent results' do
115
+ report = CommaPile.new do |config|
116
+ config.line_parser = ExampleLineParser
117
+ config.input = CSV_FILE_PATH
118
+ config.on = [:project, :viewer_event]
119
+ end
120
+
121
+ report.generate!
122
+
123
+
124
+ report.each do |name, collector|
125
+ @yielded = true
126
+ assert_equal collector.counter, collector.inject(0){|m,(k,v)| m += v.counter}
127
+ end
128
+
129
+ assert @yielded, "Making sure the above method is called"
130
+ end
131
+
132
+ should 'allow column numbers to be used instead of field names' do
133
+ report = CommaPile.new do |config|
134
+ config.line_parser = ExampleLineParser
135
+ config.input = CSV_FILE_PATH
136
+ config.on << :viewer_geolocation
137
+ config.on << 0
138
+ end
139
+ report.generate!
140
+ assert_equal 3, report['on-campus'].counter
141
+ assert_equal 94, report['off-campus']['68.45.25.118'].counter
142
+ end
143
+
144
+ should "have a summary" do
145
+ report = CommaPile.new do |config|
146
+ config.line_parser = ExampleLineParser
147
+ config.input = CSV_FILE_PATH
148
+ config.on << :viewer_geolocation
149
+ config.on << 0
150
+ config.sum_on << :filesize
151
+ end
152
+ report.generate!
153
+ assert_match /^#{Regexp.escape('1,off-campus,71.103.212.224,92094')}$/, report.summary
154
+ end
155
+
156
+ should "allow one or more accumulators" do
157
+ report = CommaPile.new do |config|
158
+ config.line_parser = ExampleLineParser
159
+ config.input = CSV_FILE_PATH
160
+ config.on << :viewer_geolocation
161
+ config.on << 0
162
+ config.sum_on << :filesize
163
+ end
164
+ report.generate!
165
+
166
+ assert_equal 17155, report['on-campus'].sum[:filesize]
167
+ assert_equal 371622045, report['off-campus'].sum[:filesize]
168
+ end
169
+ end
@@ -0,0 +1,43 @@
1
+ class ExampleLineParser < CommaPile::LineParser
2
+ INDEX_FOR_IP_ADDRESS = 0
3
+ HEADER_VALUE_FOR_IP_ADDRESS = 'c_ip'
4
+ INDEX_FOR_FILENAME = 65
5
+ INDEX_FOR_CLIENT_SIDE_REFERRER = 14
6
+ INDEX_FOR_VIEWER_EVENT = 54
7
+ INDEX_FOR_FILESIZE = 69
8
+ def self.with(line)
9
+ super if line[INDEX_FOR_IP_ADDRESS] != HEADER_VALUE_FOR_IP_ADDRESS
10
+ end
11
+
12
+ def filesize
13
+ line[69].to_i
14
+ end
15
+
16
+ def viewer_geolocation
17
+ line[INDEX_FOR_IP_ADDRESS].strip =~ /^129\.74\./ ? "on-campus" : "off-campus"
18
+ end
19
+
20
+ def viewer_event
21
+ line[INDEX_FOR_VIEWER_EVENT]
22
+ end
23
+
24
+ def project
25
+ line[INDEX_FOR_FILENAME].match(/[\\|\/]?undame[\\|\/]([^\\|\/]*)/i)[1] rescue nil
26
+ end
27
+
28
+ def system_name
29
+ File.basename(line[65].gsub(/\\/,'/')) if line[65]
30
+ end
31
+
32
+ def referrer
33
+ line[INDEX_FOR_CLIENT_SIDE_REFERRER]
34
+ end
35
+
36
+ def [](value)
37
+ if value.is_a?(Integer)
38
+ line[value]
39
+ else
40
+ send(value)
41
+ end
42
+ end
43
+ end