jeremyf-comma_pile 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ test/tmp/*
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Jeremy Friesen
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,52 @@
1
+ CommaPile
2
+ ==========
3
+
4
+ A simple gem for processing and aggregating CSV files. CommaPile builds a table that count
5
+
6
+ Config Options:
7
+
8
+ * **source** - What is the name of the source csv file [**REQUIRED**, **SINGUL**]
9
+ * **on** - Specify any number of columns [**REQUIRED**, **MULTIPLE**]
10
+ * **sum_on** - Specify a column that you want to accumulate given [**OPTIONAL**, **MULTIPLE**]
11
+ * **conditions** - Specify a lambda, key-value pair that must be met for line to be
12
+ part of the compilation [**OPTIONAL**, **MULTIPLE**]
13
+ * **output** - Specify a path to output the compiled data; In addition a raw file,
14
+ containing all rows that were used in the compilation, is generated. [**OPTIONAL**, **SINGLE**]
15
+ * **line_parser** - Specify a custom line parser to use; By default CommaPile::LineParser is used.
16
+ A custom line parser would allow for transformation of data during compilation. See
17
+ test/example\_line\_parser.rb [**OPTIONAL**, **SINGLE**]
18
+
19
+ Example
20
+ -------
21
+ File.open('/path/to/input.csv', 'w+') do |file|
22
+ file.puts %('Work', 'Build CommaPile',2009-09-12,2)
23
+ file.puts %('Work', 'Build CommaPile',2009-09-13,1)
24
+ file.puts %('Work', 'Chase Chickens', 2009-09-12,4)
25
+ file.puts %('Work', 'Read Developer Blogs',2009-09-13,1)
26
+ file.puts %('Home', 'Do Dishes', 2009-09-12, 0.5)
27
+ end
28
+
29
+ require 'comma_pile'
30
+
31
+ report = CommaPile.new do |config|
32
+ config.source = '/path/to/input.csv'
33
+ config.on << 0
34
+ config.on << 1
35
+ config.sum_on << 3
36
+ config.output = '/path/to/output.csv'
37
+ config.conditions = lambda {|r| r[1] == /^Chase/ }
38
+ end
39
+
40
+ report.generate!
41
+
42
+ report['Work'].counter == 3
43
+ report['Work'].sum[3] == 4
44
+ report['Work']['Build CommaPile'].counter == 2
45
+ report['Work']['Build CommaPile'].sum[3] == 3
46
+ report['Home'].sum[3] == 0.5
47
+
48
+
49
+ Copyright
50
+ ---------
51
+
52
+ Copyright (c) 2009 Jeremy Friesen. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "comma_pile"
8
+ gem.summary = %Q{Video Stats for an onstreammedia.com log}
9
+ gem.email = "jeremy.n.friesen@gmail.com"
10
+ gem.homepage = "http://github.com/jeremyf/comma_pile"
11
+ gem.authors = ["Jeremy Friesen"]
12
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
13
+ end
14
+
15
+ rescue LoadError
16
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
17
+ end
18
+
19
+ require 'rake/testtask'
20
+ Rake::TestTask.new(:test) do |test|
21
+ test.libs << 'lib' << 'test'
22
+ test.pattern = 'test/**/*_test.rb'
23
+ test.verbose = true
24
+ end
25
+
26
+ begin
27
+ require 'rcov/rcovtask'
28
+ Rcov::RcovTask.new do |test|
29
+ test.libs << 'test'
30
+ test.pattern = 'test/**/*_test.rb'
31
+ test.verbose = true
32
+ end
33
+ rescue LoadError
34
+ task :rcov do
35
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
36
+ end
37
+ end
38
+
39
+
40
+ task :default => :test
41
+
42
+ require 'rake/rdoctask'
43
+ Rake::RDocTask.new do |rdoc|
44
+ if File.exist?('VERSION.yml')
45
+ config = YAML.load(File.read('VERSION.yml'))
46
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
47
+ else
48
+ version = ""
49
+ end
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "comma_pile #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
56
+
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 1
4
+ :patch: 0
@@ -0,0 +1,53 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{comma_pile}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Jeremy Friesen"]
9
+ s.date = %q{2009-08-13}
10
+ s.email = %q{jeremy.n.friesen@gmail.com}
11
+ s.extra_rdoc_files = [
12
+ "LICENSE",
13
+ "README.markdown"
14
+ ]
15
+ s.files = [
16
+ ".document",
17
+ ".gitignore",
18
+ "LICENSE",
19
+ "README.markdown",
20
+ "Rakefile",
21
+ "VERSION.yml",
22
+ "comma_pile.gemspec",
23
+ "lib/comma_pile.rb",
24
+ "lib/comma_pile/config.rb",
25
+ "lib/comma_pile/line_parser.rb",
26
+ "lib/comma_pile/pivot_node.rb",
27
+ "lib/comma_pile/report.rb",
28
+ "test/comma_pile_test.rb",
29
+ "test/example_line_parser.rb",
30
+ "test/fixtures/report.csv",
31
+ "test/test_helper.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/jeremyf/comma_pile}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.4}
37
+ s.summary = %q{Video Stats for an onstreammedia.com log}
38
+ s.test_files = [
39
+ "test/comma_pile_test.rb",
40
+ "test/example_line_parser.rb",
41
+ "test/test_helper.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
49
+ else
50
+ end
51
+ else
52
+ end
53
+ end
@@ -0,0 +1,40 @@
1
+ module CommaPile
2
+ class Config
3
+ def initialize
4
+ @field_names = []
5
+ @sum = []
6
+ end
7
+ attr_writer :conditions, :input, :output, :line_parser
8
+
9
+ def sum_on; @sum; end
10
+ def sum_on=(value)
11
+ @sum = (@sum << value).flatten.uniq
12
+ end
13
+
14
+ def line_parser
15
+ @line_parser ||= CommaPile::LineParser
16
+ if @line_parser.respond_to?(:with) && @line_parser.method(:with).arity == 1
17
+ @line_parser
18
+ else
19
+ raise RuntimeError, "#{self.class.to_s}#line_parser must respond to :with and have an arity of 1. The line parser will receive an array of fields"
20
+ end
21
+ end
22
+
23
+ def output; @output; end
24
+
25
+ def input; @input || './file.csv'; end
26
+ alias_method :source, :input
27
+ alias_method :source=, :input=
28
+
29
+ def conditions; @conditions; end
30
+
31
+ def field_names; @field_names; end
32
+ def field_names=(value)
33
+ @field_names = (@field_names << value).flatten.uniq
34
+ end
35
+
36
+ alias_method :on, :field_names
37
+ alias_method :on=, :field_names=
38
+ end
39
+
40
+ end
@@ -0,0 +1,16 @@
1
+ module CommaPile
2
+ class LineParser
3
+ def self.with(line)
4
+ yield(new(line))
5
+ end
6
+
7
+ attr_reader :line
8
+ def initialize(line)
9
+ @line = line
10
+ end
11
+
12
+ def [](value)
13
+ line[value]
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,20 @@
1
+ module CommaPile
2
+ class PivotNode < Hash
3
+ attr_reader :sum
4
+ def initialize
5
+ @sum = {}
6
+ end
7
+
8
+ attr_writer :counter
9
+ def counter; @counter ||= 0; end
10
+
11
+ def inspect
12
+ "(counter: #{counter}; hash:#{super})"
13
+ end
14
+
15
+ def add_to(fieldname, value)
16
+ self.sum[fieldname] ||= 0
17
+ self.sum[fieldname] += value
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,111 @@
1
+ if RUBY_VERSION =~ /^1\.8/
2
+ require 'fastercsv'
3
+ CSV = FCSV
4
+ else
5
+ require 'csv'
6
+ end
7
+ require 'delegate'
8
+
9
+ module CommaPile
10
+ class Report < DelegateClass(Hash)
11
+ attr_reader :results
12
+
13
+ def initialize(config)
14
+ @config = config
15
+ @results = {}
16
+ super(@results)
17
+ end
18
+
19
+
20
+ def summary(entry = nil, parent_keys = [])
21
+ collector = ''
22
+ (entry || @results).each do |key, value|
23
+ if value.nil? || value.empty?
24
+ cells = [value.counter] + parent_keys + [key] + value.sum.values
25
+ collector += CSV.generate_line(cells.flatten)
26
+ else
27
+ collector += summary(value, parent_keys + [key])
28
+ end
29
+ end
30
+ collector
31
+ end
32
+
33
+ def to_stdout(index = 0, entry = nil)
34
+ (entry || @results).each do |key, value|
35
+ puts "#{"\t" * index}#{key}: #{value.counter}"
36
+ if value.respond_to?(:each)
37
+ to_stdout(index + 1, value) if value && !value.empty?
38
+ end
39
+ end
40
+ end
41
+
42
+
43
+ def generate!
44
+ if output
45
+ CSV.open(output.sub(/\.(\w+)$/, '.raw.\1'), 'w+') do |raw_csv|
46
+ @raw_csv = raw_csv
47
+ CSV.open(output, 'w+') do |parsed_csv|
48
+ @parsed_csv = parsed_csv
49
+ @parsed_csv << field_names.collect {|f| f.to_s }
50
+ process_input
51
+ end
52
+ end
53
+ else
54
+ process_input
55
+ end
56
+ end
57
+
58
+ protected
59
+ def field_names; @config.field_names; end
60
+ def input; @config.input; end
61
+ def conditions; @config.conditions; end
62
+ def output; @config.output; end
63
+ def line_parser; @config.line_parser; end
64
+ def sum_on_field_names; @config.sum_on; end
65
+
66
+ def process_input
67
+ CSV.foreach(input) do |line|
68
+ parse_line(line) do |record|
69
+ with_conditions_met_for(record) do
70
+ render_output_for(record, line)
71
+ accumulate_entry_for(record)
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ def register(line)
78
+ end
79
+
80
+ def render_output_for(record, line)
81
+ return nil unless output
82
+ @parsed_csv << field_names.inject([]) {|m,v| m << record[v]} if @parsed_csv
83
+ @raw_csv << line if @raw_csv
84
+ end
85
+
86
+ def accumulate_entry_for(record)
87
+ field_names.inject(self) do |mem, field_name|
88
+ key = record[field_name]
89
+ mem[key] ||= CommaPile::PivotNode.new
90
+ mem[key].counter += 1
91
+ sum_on_field_names.each do |sum_on_field_name|
92
+ mem[key].add_to(sum_on_field_name, record[sum_on_field_name])
93
+ end
94
+ mem[key]
95
+ end
96
+ end
97
+
98
+ def parse_line(line)
99
+ line_parser.with(line) do |record|
100
+ yield(record) if block_given?
101
+ end
102
+ end
103
+
104
+ def with_conditions_met_for(record)
105
+ yield and return unless conditions
106
+ yield and return if conditions.respond_to?(:call) && conditions.call(record)
107
+ yield and return if conditions.respond_to?(:all?) && conditions.all?{|(k,v)| record.send(k) =~ (v.is_a?(Regexp) ? v : /^#{Regexp.escape(v)}$/) }
108
+ end
109
+
110
+ end
111
+ end
data/lib/comma_pile.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'comma_pile/line_parser'
2
+ require 'comma_pile/pivot_node'
3
+ require 'comma_pile/report'
4
+ require 'comma_pile/config'
5
+
6
+ module CommaPile
7
+ def self.new
8
+ config = CommaPile::Config.new
9
+ yield(config)
10
+ CommaPile::Report.new(config)
11
+ end
12
+ end
@@ -0,0 +1,169 @@
1
+ require 'test_helper'
2
+ require 'example_line_parser'
3
+
4
+ class CommaPileTest < Test::Unit::TestCase
5
+ CSV_FILE_PATH = File.join(File.dirname(__FILE__), "../test/fixtures/report.csv")
6
+
7
+ def output_filename
8
+ @output_filename ||= File.expand_path(File.join(File.dirname(__FILE__), 'tmp/output.csv'))
9
+ end
10
+
11
+ should 'not require line_parser' do
12
+ report = CommaPile.new do |config|
13
+ config.source = CSV_FILE_PATH
14
+ config.on = [0]
15
+ end
16
+ report.generate!
17
+
18
+ assert_equal 3, report['129.74.105.126'].counter
19
+ end
20
+
21
+ # CSV_FILE_PATH = '/Users/jeremyf/Downloads/FullMonthlyRpt_Undame_7_2009.csv'
22
+ should 'have results takes a conditions option that is a hash with string value' do
23
+ report = CommaPile.new do |config|
24
+ config.line_parser = ExampleLineParser
25
+ config.source = CSV_FILE_PATH
26
+ config.on = [:project, :viewer_geolocation]
27
+ config.conditions = {:viewer_event => 'play'}
28
+ end
29
+ report.generate!
30
+
31
+
32
+ assert_equal 6, report['vocation'].counter
33
+ assert_equal 5, report['vocation']['off-campus'].counter
34
+ assert_equal 1, report['vocation']['on-campus'].counter
35
+ assert_equal 85, report['commencement'].counter
36
+ assert_equal 85, report['commencement']['off-campus'].counter
37
+ assert_equal 3, report['innovationpark'].counter
38
+ assert_equal 3, report['innovationpark']['off-campus'].counter
39
+ end
40
+
41
+ should 'have results takes a conditions option that is a hash with regular express' do
42
+ report = CommaPile.new do |config|
43
+ config.line_parser = ExampleLineParser
44
+ config.source = CSV_FILE_PATH
45
+ config.on << :project
46
+ config.on << :viewer_event
47
+ config.on << :viewer_geolocation
48
+ config.conditions = {:viewer_event => /(play|stop)/}
49
+ end
50
+ report.generate!
51
+
52
+
53
+ assert_equal 12, report['vocation'].counter
54
+ assert_equal 6, report['vocation']['play'].counter
55
+ assert_equal 5, report['vocation']['play']['off-campus'].counter
56
+ assert_equal 1, report['vocation']['play']['on-campus'].counter
57
+ assert_equal 6, report['vocation']['stop'].counter
58
+ assert_equal 5, report['vocation']['stop']['off-campus'].counter
59
+ assert_equal 1, report['vocation']['stop']['on-campus'].counter
60
+ end
61
+
62
+ should 'have results takes a conditions option that is a lambda' do
63
+ report = CommaPile.new do |config|
64
+ config.line_parser = ExampleLineParser
65
+ config.source = CSV_FILE_PATH
66
+ config.field_names << :project
67
+ config.on << :viewer_event
68
+ config.conditions = lambda {|r| r.viewer_event == 'play' || r.project == 'vocation'}
69
+ end
70
+ report.generate!
71
+
72
+ assert_equal 94, report['vocation'].counter
73
+ assert_equal 6, report['vocation']['play'].counter
74
+ assert_equal 6, report['vocation']['stop'].counter
75
+ assert_nil report['commencement']['stop']
76
+ assert_equal 85, report['commencement']['play'].counter
77
+ end
78
+
79
+ should 'have results that no options' do
80
+ report = CommaPile.new do |config|
81
+ config.line_parser = ExampleLineParser
82
+ config.source = CSV_FILE_PATH
83
+ config.on = [:project, :viewer_event, :viewer_geolocation]
84
+ config.conditions = lambda {|r| r.viewer_event == 'play' || r.project == 'vocation'}
85
+ end
86
+ report.generate!
87
+
88
+
89
+ assert_equal 94, report['vocation'].counter
90
+ assert_equal 6, report['vocation']['play'].counter
91
+ assert_equal 29, report['vocation']['pause'].counter
92
+ assert_equal 28, report['vocation']['unpause'].counter
93
+ assert_equal 25, report['vocation']['seek'].counter
94
+ assert_equal 6, report['vocation']['stop'].counter
95
+ end
96
+
97
+ should 'have results that output conditional matches to a file' do
98
+ report = CommaPile.new do |config|
99
+ config.line_parser = ExampleLineParser
100
+ config.input = CSV_FILE_PATH
101
+ config.on = :viewer_event
102
+ config.output = output_filename
103
+ config.conditions = {:viewer_event => 'play'}
104
+ end
105
+ report.generate!
106
+
107
+ File.readlines(output_filename).each do |line|
108
+ assert_match(/^(viewer_event|play)$/i, line)
109
+ @yielded = true
110
+ end
111
+ assert @yielded
112
+ end
113
+
114
+ should 'have sub-results that add up to parent results' do
115
+ report = CommaPile.new do |config|
116
+ config.line_parser = ExampleLineParser
117
+ config.input = CSV_FILE_PATH
118
+ config.on = [:project, :viewer_event]
119
+ end
120
+
121
+ report.generate!
122
+
123
+
124
+ report.each do |name, collector|
125
+ @yielded = true
126
+ assert_equal collector.counter, collector.inject(0){|m,(k,v)| m += v.counter}
127
+ end
128
+
129
+ assert @yielded, "Making sure the above method is called"
130
+ end
131
+
132
+ should 'allow column numbers to be used instead of field names' do
133
+ report = CommaPile.new do |config|
134
+ config.line_parser = ExampleLineParser
135
+ config.input = CSV_FILE_PATH
136
+ config.on << :viewer_geolocation
137
+ config.on << 0
138
+ end
139
+ report.generate!
140
+ assert_equal 3, report['on-campus'].counter
141
+ assert_equal 94, report['off-campus']['68.45.25.118'].counter
142
+ end
143
+
144
+ should "have a summary" do
145
+ report = CommaPile.new do |config|
146
+ config.line_parser = ExampleLineParser
147
+ config.input = CSV_FILE_PATH
148
+ config.on << :viewer_geolocation
149
+ config.on << 0
150
+ config.sum_on << :filesize
151
+ end
152
+ report.generate!
153
+ assert_match /^#{Regexp.escape('1,off-campus,71.103.212.224,92094')}$/, report.summary
154
+ end
155
+
156
+ should "allow one or more accumulators" do
157
+ report = CommaPile.new do |config|
158
+ config.line_parser = ExampleLineParser
159
+ config.input = CSV_FILE_PATH
160
+ config.on << :viewer_geolocation
161
+ config.on << 0
162
+ config.sum_on << :filesize
163
+ end
164
+ report.generate!
165
+
166
+ assert_equal 17155, report['on-campus'].sum[:filesize]
167
+ assert_equal 371622045, report['off-campus'].sum[:filesize]
168
+ end
169
+ end
@@ -0,0 +1,43 @@
1
+ class ExampleLineParser < CommaPile::LineParser
2
+ INDEX_FOR_IP_ADDRESS = 0
3
+ HEADER_VALUE_FOR_IP_ADDRESS = 'c_ip'
4
+ INDEX_FOR_FILENAME = 65
5
+ INDEX_FOR_CLIENT_SIDE_REFERRER = 14
6
+ INDEX_FOR_VIEWER_EVENT = 54
7
+ INDEX_FOR_FILESIZE = 69
8
+ def self.with(line)
9
+ super if line[INDEX_FOR_IP_ADDRESS] != HEADER_VALUE_FOR_IP_ADDRESS
10
+ end
11
+
12
+ def filesize
13
+ line[69].to_i
14
+ end
15
+
16
+ def viewer_geolocation
17
+ line[INDEX_FOR_IP_ADDRESS].strip =~ /^129\.74\./ ? "on-campus" : "off-campus"
18
+ end
19
+
20
+ def viewer_event
21
+ line[INDEX_FOR_VIEWER_EVENT]
22
+ end
23
+
24
+ def project
25
+ line[INDEX_FOR_FILENAME].match(/[\\|\/]?undame[\\|\/]([^\\|\/]*)/i)[1] rescue nil
26
+ end
27
+
28
+ def system_name
29
+ File.basename(line[65].gsub(/\\/,'/')) if line[65]
30
+ end
31
+
32
+ def referrer
33
+ line[INDEX_FOR_CLIENT_SIDE_REFERRER]
34
+ end
35
+
36
+ def [](value)
37
+ if value.is_a?(Integer)
38
+ line[value]
39
+ else
40
+ send(value)
41
+ end
42
+ end
43
+ end