ruby_reduce 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/README.md +71 -0
- data/Rakefile +1 -0
- data/lib/ruby_reduce.rb +25 -0
- data/lib/ruby_reduce/input_reader.rb +42 -0
- data/lib/ruby_reduce/map.rb +22 -0
- data/lib/ruby_reduce/out_put.rb +16 -0
- data/lib/ruby_reduce/reduce.rb +26 -0
- data/lib/ruby_reduce/version.rb +3 -0
- data/ruby_reduce.gemspec +21 -0
- metadata +91 -0
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
# Ruby Reduce
|
2
|
+
|
3
|
+
Ruby Reduce is a small library designed to run reduce commands on a Rails 3.x log without needing to setup a hadoop cluster to do it. So it is basically designed to be a small scale reduce function. The original idea was to make the gem plugable so that you could plugin different inputers to chunk non Rails 3 log files. Also there was supposed to be a similar plugin design for the output but that has not happened yet. So currently it only accepts rails 3 files for input and writes the results to MongoDB.
|
4
|
+
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
`gem install ruby_reduce`
|
9
|
+
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
The usage is also very straight forward and demostraited by this code snippet.
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
require 'date' #only required because I parse a date
|
17
|
+
require 'ruby_reduce' # 1) Require ruby reduce
|
18
|
+
|
19
|
+
module RubyReduce
|
20
|
+
#return host and port and mongo_db (or nothing for default host and port)
|
21
|
+
def self.mongo_connection
|
22
|
+
end
|
23
|
+
|
24
|
+
#actual mongo db to connect to
|
25
|
+
def self.mongo_db
|
26
|
+
return 'test_db'
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
#actually running the reduce function
|
31
|
+
r = RubyReduce.reduce({
|
32
|
+
:input => '../production.log', #the location of the log file to reduce
|
33
|
+
# The map function can be anything that response to call and accepts 2 paramaters.
|
34
|
+
# The first paramater is the log file name and the second are all the log statements
|
35
|
+
# for a single rails request. The only thing required here is that you **emit** a
|
36
|
+
# key value pair to reduce later. simple call the `emit(key, value)` function to accomplish this.
|
37
|
+
:map => Proc.new do |key, value|
|
38
|
+
processed_by = /Processing by (\w*#\w*)/.match(value)
|
39
|
+
|
40
|
+
unless processed_by.nil?
|
41
|
+
emit processed_by[1].gsub!("#", "_"), value
|
42
|
+
end
|
43
|
+
end,
|
44
|
+
# The reduce function can be anything that responds to call and takes one argument.
|
45
|
+
# This argument will be the value emited in the map function (so reduce is called once
|
46
|
+
# for every key value pair emited. Also in the reduce function you need to **emit**
|
47
|
+
# the result of the reduce. Simply call `emit(result)` to tell RubyReduce what the result is.
|
48
|
+
:reduce => Proc.new do |log_statement|
|
49
|
+
date = /Started [GET|POST|DELETE|PUT|HEAD].* at (.*)/.match(log_statement)
|
50
|
+
unless date.nil?
|
51
|
+
processing_time = /Completed \d* .* in (\d*)/.match(log_statement)[1]
|
52
|
+
emit({'date' => Time.parse(date[1]), 'processed_time' => processing_time})
|
53
|
+
end
|
54
|
+
end,
|
55
|
+
#MongoDB collection name to write results to
|
56
|
+
:output => 'graph_data'
|
57
|
+
})
|
58
|
+
```
|
59
|
+
|
60
|
+
|
61
|
+
Once all of the key value pairs have been reduced the results will be collected by key and written into mongodb with one document for each key emited in the map function. The id (_id) of this document is the emited key
|
62
|
+
|
63
|
+
|
64
|
+
## Limitations
|
65
|
+
|
66
|
+
Right now this is at best alpha software. There are no tests and while I use it for my on projects it is not really been on a wide range of problems. Your feedback is welcome.
|
67
|
+
|
68
|
+
|
69
|
+
## Questions
|
70
|
+
|
71
|
+
Contact me at joshsmoore@gmail.com with question, comments, or just that you are using the library and want me to continue work on it.
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
data/lib/ruby_reduce.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require "ruby_reduce/version"
|
2
|
+
require 'ruby_reduce/input_reader'
|
3
|
+
require 'ruby_reduce/out_put'
|
4
|
+
require 'ruby_reduce/reduce'
|
5
|
+
require 'ruby_reduce/map'
|
6
|
+
|
7
|
+
require 'mongo'
|
8
|
+
|
9
|
+
module RubyReduce
|
10
|
+
# Your code goes here...
|
11
|
+
|
12
|
+
def self.reduce(options)
|
13
|
+
input = InputReader.new(options[:input])
|
14
|
+
read = input.read
|
15
|
+
|
16
|
+
map = Map.new(read, options[:map])
|
17
|
+
map.map
|
18
|
+
|
19
|
+
reduce = Reduce.new(map.map, options[:reduce])
|
20
|
+
result = reduce.reduce
|
21
|
+
|
22
|
+
OutputWriter.new(options[:output]).write(result)
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module RubyReduce
|
2
|
+
class InputReader
|
3
|
+
def initialize(file_name)
|
4
|
+
@output = []
|
5
|
+
@file_name = file_name
|
6
|
+
|
7
|
+
end
|
8
|
+
|
9
|
+
def emit_chunk(key, chunk)
|
10
|
+
@output << [key, chunk]
|
11
|
+
end
|
12
|
+
|
13
|
+
def read
|
14
|
+
current_chunks = []
|
15
|
+
in_error = false
|
16
|
+
|
17
|
+
File.new(@file_name).each do |line|
|
18
|
+
if line =~ /^Started/ && line =~ /newrelic/
|
19
|
+
elsif line =~ /^Started/
|
20
|
+
current_chunks << [line]
|
21
|
+
elsif line =~ /^Completed \d/
|
22
|
+
current_chunks.first << line
|
23
|
+
emit_chunk(@file_name, current_chunks.first.join(''))
|
24
|
+
current_chunks.delete_at 0
|
25
|
+
elsif line =~ /^Completed(\s)*in/
|
26
|
+
in_error = true
|
27
|
+
current_chunks.first << line
|
28
|
+
elsif in_error && line =~ /within rescues\/layout/
|
29
|
+
in_error = false
|
30
|
+
current_chunks.first << line
|
31
|
+
emit_chunk(@file_name, current_chunks.first.join(''))
|
32
|
+
current_chunks.delete_at 0
|
33
|
+
elsif line == '' || line == "\n"
|
34
|
+
else
|
35
|
+
current_chunks.first << line if current_chunks.first
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
@output
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module RubyReduce
|
2
|
+
class Map
|
3
|
+
def initialize(reader_output, function)
|
4
|
+
@reader_output = reader_output
|
5
|
+
@mapped_values = {}
|
6
|
+
@function = function
|
7
|
+
end
|
8
|
+
|
9
|
+
def emit(key, value)
|
10
|
+
@mapped_values[key] ||= []
|
11
|
+
@mapped_values[key] << value
|
12
|
+
end
|
13
|
+
|
14
|
+
def map
|
15
|
+
@reader_output.each do |chunk|
|
16
|
+
instance_exec chunk[0], chunk[1], &@function
|
17
|
+
end
|
18
|
+
|
19
|
+
@mapped_values
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module RubyReduce
|
2
|
+
class OutputWriter
|
3
|
+
def initialize(options)
|
4
|
+
@collection_name = options
|
5
|
+
|
6
|
+
@mongo_db = Mongo::Connection.new(RubyReduce.mongo_connection).db(RubyReduce.mongo_db)
|
7
|
+
end
|
8
|
+
|
9
|
+
def write(data)
|
10
|
+
@mongo_db[@collection_name].remove()
|
11
|
+
data.each do |key, values|
|
12
|
+
@mongo_db[@collection_name].insert({:_id => key, :value => values})
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module RubyReduce
|
2
|
+
class Reduce
|
3
|
+
def initialize(map_result, reduce)
|
4
|
+
@map_result = map_result
|
5
|
+
@reduce = reduce
|
6
|
+
|
7
|
+
@result = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def emit(result)
|
11
|
+
@result[@current_key] ||= []
|
12
|
+
@result[@current_key] << result
|
13
|
+
end
|
14
|
+
|
15
|
+
def reduce
|
16
|
+
@map_result.each do |key, values|
|
17
|
+
@current_key = key
|
18
|
+
values.each do |value|
|
19
|
+
instance_exec value, &@reduce
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
@result
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/ruby_reduce.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "ruby_reduce/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "ruby_reduce"
|
7
|
+
s.version = RubyReduce::VERSION
|
8
|
+
s.authors = ["Josh Moore"]
|
9
|
+
s.email = ["joshsmoore@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{Map reduce library for Ruby}
|
12
|
+
s.description = %q{Map reduce library for Ruby for Rails 3 log files}
|
13
|
+
|
14
|
+
s.rubyforge_project = "ruby_reduce"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
s.add_dependency 'mongo'
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby_reduce
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Josh Moore
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-12-14 00:00:00 +08:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: mongo
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Map reduce library for Ruby for Rails 3 log files
|
36
|
+
email:
|
37
|
+
- joshsmoore@gmail.com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions: []
|
41
|
+
|
42
|
+
extra_rdoc_files: []
|
43
|
+
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- Gemfile
|
47
|
+
- README.md
|
48
|
+
- Rakefile
|
49
|
+
- lib/ruby_reduce.rb
|
50
|
+
- lib/ruby_reduce/input_reader.rb
|
51
|
+
- lib/ruby_reduce/map.rb
|
52
|
+
- lib/ruby_reduce/out_put.rb
|
53
|
+
- lib/ruby_reduce/reduce.rb
|
54
|
+
- lib/ruby_reduce/version.rb
|
55
|
+
- ruby_reduce.gemspec
|
56
|
+
has_rdoc: true
|
57
|
+
homepage: ""
|
58
|
+
licenses: []
|
59
|
+
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
hash: 3
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
83
|
+
requirements: []
|
84
|
+
|
85
|
+
rubyforge_project: ruby_reduce
|
86
|
+
rubygems_version: 1.3.7
|
87
|
+
signing_key:
|
88
|
+
specification_version: 3
|
89
|
+
summary: Map reduce library for Ruby
|
90
|
+
test_files: []
|
91
|
+
|