github_archive_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. checksums.yaml +7 -0
  2. data/bin/github_archive_parser +9 -0
  3. data/lib/github_archive_parser/XCust.rb +10 -0
  4. data/lib/github_archive_parser/event_handler.rb +13 -0
  5. data/lib/github_archive_parser/event_handlers/commit_comment_event.rb +5 -0
  6. data/lib/github_archive_parser/event_handlers/create_event.rb +5 -0
  7. data/lib/github_archive_parser/event_handlers/delete_event.rb +5 -0
  8. data/lib/github_archive_parser/event_handlers/deployment_event.rb +5 -0
  9. data/lib/github_archive_parser/event_handlers/deployment_status_event.rb +5 -0
  10. data/lib/github_archive_parser/event_handlers/download_event.rb +5 -0
  11. data/lib/github_archive_parser/event_handlers/follow_event.rb +5 -0
  12. data/lib/github_archive_parser/event_handlers/fork_apply_event.rb +5 -0
  13. data/lib/github_archive_parser/event_handlers/fork_event.rb +5 -0
  14. data/lib/github_archive_parser/event_handlers/gist_event.rb +5 -0
  15. data/lib/github_archive_parser/event_handlers/gollum_event.rb +5 -0
  16. data/lib/github_archive_parser/event_handlers/issue_comment_event.rb +5 -0
  17. data/lib/github_archive_parser/event_handlers/issues_event.rb +5 -0
  18. data/lib/github_archive_parser/event_handlers/member_event.rb +5 -0
  19. data/lib/github_archive_parser/event_handlers/public_event.rb +5 -0
  20. data/lib/github_archive_parser/event_handlers/pull_request_event.rb +5 -0
  21. data/lib/github_archive_parser/event_handlers/pull_request_review_comment_event.rb +5 -0
  22. data/lib/github_archive_parser/event_handlers/push_event.rb +5 -0
  23. data/lib/github_archive_parser/event_handlers/release_event.rb +5 -0
  24. data/lib/github_archive_parser/event_handlers/status_event.rb +5 -0
  25. data/lib/github_archive_parser/event_handlers/team_add_event.rb +5 -0
  26. data/lib/github_archive_parser/event_handlers/watch_event.rb +5 -0
  27. data/lib/github_archive_parser/initialize.rb +9 -0
  28. data/lib/github_archive_parser/log.rb +59 -0
  29. data/lib/github_archive_parser/version.rb +3 -0
  30. data/lib/github_archive_parser.rb +97 -0
  31. data/spec/log_spec.rb +54 -0
  32. metadata +75 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dbbcc01faaf08baed69942f2c59ffe2fca262966
4
+ data.tar.gz: 4aaba6c19c5caa3e2a4b17fa331e2a0411cac792
5
+ SHA512:
6
+ metadata.gz: f4e608379a73168cf4de92cd2ef11e62fb022f3f778afa43d57dd3508e26d9b188cc26114d86a5cac4b55c8638a856545a0e3ce2f525f17abdd19c633feb9cf3
7
+ data.tar.gz: 08d9faf62bdad2358c01d2f65440b8812fc91965ecc4c16f08af6fc98fb5dd4703914d43c9541b31d9b21ac7e4459b7b99ef1d7e7c2e4b2a9bf752eb3f0f9de9
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.expand_path("../../lib", __FILE__)
4
+ require 'github_archive_parser'
5
+
6
+ github_archive_parser = GitHubArchiveParser::Processor.new
7
+ ARGV.each do |url|
8
+ github_archive_parser.process(url)
9
+ end
@@ -0,0 +1,10 @@
1
+ module AwesomeApplication
2
+ class PrintCreateEvent
3
+ include GitHubArchiveParser::CreateEvent
4
+
5
+ def parse(event)
6
+ # The event is a Hashie::Mash object for easy (dot) access
7
+ puts "#{event.repository.owner}/#{event.repository.name}"
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,13 @@
1
+ module GitHubArchiveParser
2
+ module EventHandler
3
+ module ClassMethods
4
+ def descendants
5
+ @descendants ||= ObjectSpace.each_object(Class).select { |klass| klass < self }
6
+ end
7
+ end
8
+
9
+ def self.included(base)
10
+ base.extend(ClassMethods)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module CommitCommentEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module CreateEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module DeleteEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module DeploymentEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module DeploymentStatusEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module DownloadEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module FollowEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module ForkApplyEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module ForkEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module GistEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module GollumEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module IssueCommentEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module IssuesEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module MemberEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module PublicEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module PullRequestEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module PullRequestReviewCommentEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module PushEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module ReleaseEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module StatusEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module TeamAddEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module WatchEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,9 @@
1
+ require 'pathname'
2
+ require 'ostruct'
3
+ require 'optparse'
4
+ require 'open-uri'
5
+ require 'zlib'
6
+ require 'yajl'
7
+ require 'hashie'
8
+
9
+ Dir.glob(File.dirname(__FILE__) + '/**/*.rb') { |file| require file }
@@ -0,0 +1,59 @@
1
+ require 'logger'
2
+ require 'singleton'
3
+
4
+ module GitHubArchiveParser
5
+ class Log
6
+ include Singleton
7
+
8
+ attr_accessor :logger, :base_directory, :debugging
9
+
10
+ def initialize
11
+ @base_directory = File.expand_path("../..", __FILE__) + "/"
12
+ @debugging = false
13
+ @logger = Logger.new(STDOUT)
14
+ @logger.level = Logger::ERROR
15
+ @logger.formatter = proc do |sev, datetime, progname, msg|
16
+ "#{msg}\n"
17
+ end
18
+ end
19
+
20
+ def self.use_debug
21
+ instance.debugging = true
22
+ instance.logger.formatter = proc do |sev, datetime, progname, msg|
23
+ "#{sev} [#{progname}]: #{msg}\n"
24
+ end
25
+ end
26
+
27
+ # Determine the file, method, line number of the caller
28
+ def self.parse_caller(message)
29
+ if /^(?<file>.+?):(?<line>\d+)(?::in `(?<method>.*)')?/ =~ message
30
+ file = Regexp.last_match[:file]
31
+ line = Regexp.last_match[:line]
32
+ method = Regexp.last_match[:method]
33
+ "#{file.sub(instance.base_directory, "")}:#{line}"
34
+ end
35
+ end
36
+
37
+ def self.method_missing(method, *args, &blk)
38
+ if valid_method? method
39
+ instance.logger.progname = parse_caller(caller(1).first) if instance.debugging
40
+ instance.logger.send(method, *args, &blk)
41
+ else
42
+ super
43
+ end
44
+ end
45
+
46
+ def self.respond_to_missing?(method, include_all=false)
47
+ if valid_method? method
48
+ true
49
+ else
50
+ super
51
+ end
52
+ end
53
+
54
+ def self.valid_method?(method)
55
+ instance.logger.respond_to? method
56
+ end
57
+
58
+ end
59
+ end
@@ -0,0 +1,3 @@
1
+ module GitHubArchiveParser
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,97 @@
1
+ require 'github_archive_parser/initialize'
2
+
3
+ module GitHubArchiveParser
4
+ class Processor
5
+ attr_reader :options
6
+
7
+ def initialize
8
+ @options = OpenStruct.new(
9
+ debug: false,
10
+ )
11
+ parse_options
12
+ determine_log_level
13
+
14
+ # Create the concrete handlers and store them for future use
15
+ create_event_handlers
16
+ end
17
+
18
+ def process(url)
19
+ Log.info "Processing #{url}"
20
+ if (!url.nil? && url.start_with?("http://data.githubarchive.org/"))
21
+ gz = open(url)
22
+ js = Zlib::GzipReader.new(gz).read
23
+
24
+ Yajl::Parser.parse(js) do |event|
25
+ event = Hashie::Mash.new(event)
26
+ event_class = class_from_string("GitHubArchiveParser::#{event.type}")
27
+ event_handler = @event_handlers[event_class]
28
+
29
+ event_handler.each { |handler|
30
+ if !handler.nil? && handler.respond_to?(:parse)
31
+ handler.parse(event)
32
+ end
33
+ }
34
+ end
35
+ else
36
+ Log.warn "URL[#{url}] does not belong to http://data.githubarchive.org/"
37
+ end
38
+ end
39
+
40
+ def parse_options
41
+ OptionParser.new do |opt|
42
+ opt.version = VERSION
43
+ opt.on "-d", "--debug", "Debug output (shows DEBUG level log statements)" do
44
+ options.debug = true
45
+ end
46
+ opt.on "-q", "--quite", "Hide all output (shows only UNKNOWN level log statements)" do
47
+ options.quite = true
48
+ end
49
+ end.parse!
50
+ end
51
+
52
+ private
53
+
54
+ def determine_log_level
55
+ if options.debug
56
+ Log.level = Logger::DEBUG
57
+ Log.use_debug
58
+ elsif options.quite
59
+ Log.level = Logger::UNKNOWN
60
+ else
61
+ Log.level = Logger::INFO
62
+ end
63
+ end
64
+
65
+ def class_from_string(string)
66
+ begin
67
+ string.split('::').inject(Object) do |mod, class_name|
68
+ mod.const_get(class_name)
69
+ end
70
+ rescue Exception
71
+ Log.warn "Event #{string} not found"
72
+ nil
73
+ end
74
+ end
75
+
76
+ def create_event_handlers
77
+ @event_handlers = {}
78
+
79
+ # Probably can do something to not hardcode this
80
+ # Iterate over the event types
81
+ [CommitCommentEvent, CreateEvent, DeleteEvent,
82
+ DeploymentEvent, DeploymentStatusEvent, DownloadEvent,
83
+ FollowEvent, ForkApplyEvent, ForkEvent,
84
+ GistEvent, GollumEvent, IssueCommentEvent,
85
+ IssueCommentEvent, IssuesEvent, MemberEvent,
86
+ PublicEvent, PullRequestEvent, PullRequestReviewCommentEvent,
87
+ PushEvent, ReleaseEvent, StatusEvent,
88
+ TeamAddEvent, WatchEvent].each do | event_type |
89
+
90
+ # Map list of concrete event handler to their event type
91
+ @event_handlers[event_type] = event_type.descendants.map { |handler|
92
+ handler.new
93
+ }
94
+ end
95
+ end
96
+ end
97
+ end
data/spec/log_spec.rb ADDED
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+
3
+ include GitHubArchiveParser
4
+
5
+ describe Log do
6
+
7
+ let(:log) { Class.new(Log) }
8
+
9
+ context "initializes instance" do
10
+ it "should acts as singleton" do
11
+ log.instance.should == log.instance
12
+ end
13
+
14
+ it "should have a logger" do
15
+ log.instance.logger.should be_a Logger
16
+ end
17
+
18
+ it "should be a Log (FakeLog)" do
19
+ log.class.should eq Log.class
20
+ end
21
+
22
+ it "should react to Logger methods" do
23
+ Logger.public_instance_methods.each do |method|
24
+ log.valid_method?(method).should be_true
25
+ end
26
+ end
27
+ end
28
+
29
+ context "#use_debug" do
30
+ it "logger's progname before" do
31
+ log.progname.should be_nil
32
+ end
33
+
34
+ it "logger's progname after" do
35
+ log.use_debug
36
+ log.progname.should_not be_nil
37
+ end
38
+ end
39
+
40
+ context "#parse_caller" do
41
+ context "with nothing" do
42
+ it { log.parse_caller(nil).should be_nil }
43
+ end
44
+
45
+ context "with jumble (random text)" do
46
+ it { log.parse_caller("asdaacsdc").should be_nil }
47
+ end
48
+
49
+ context "with valid caller" do
50
+ it { log.parse_caller("github_archive_parser/lib/github_archive_parser.rb:45:in `respond_to_missing?'").should eq "github_archive_parser/lib/github_archive_parser.rb:45" }
51
+ end
52
+ end
53
+
54
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: github_archive_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kevin Jalbert
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-17 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Gem which parses GitHub Archive data
14
+ email:
15
+ - kevin.j.jalbert@gmail.com
16
+ executables:
17
+ - github_archive_parser
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - bin/github_archive_parser
22
+ - lib/github_archive_parser.rb
23
+ - lib/github_archive_parser/XCust.rb
24
+ - lib/github_archive_parser/event_handler.rb
25
+ - lib/github_archive_parser/event_handlers/commit_comment_event.rb
26
+ - lib/github_archive_parser/event_handlers/create_event.rb
27
+ - lib/github_archive_parser/event_handlers/delete_event.rb
28
+ - lib/github_archive_parser/event_handlers/deployment_event.rb
29
+ - lib/github_archive_parser/event_handlers/deployment_status_event.rb
30
+ - lib/github_archive_parser/event_handlers/download_event.rb
31
+ - lib/github_archive_parser/event_handlers/follow_event.rb
32
+ - lib/github_archive_parser/event_handlers/fork_apply_event.rb
33
+ - lib/github_archive_parser/event_handlers/fork_event.rb
34
+ - lib/github_archive_parser/event_handlers/gist_event.rb
35
+ - lib/github_archive_parser/event_handlers/gollum_event.rb
36
+ - lib/github_archive_parser/event_handlers/issue_comment_event.rb
37
+ - lib/github_archive_parser/event_handlers/issues_event.rb
38
+ - lib/github_archive_parser/event_handlers/member_event.rb
39
+ - lib/github_archive_parser/event_handlers/public_event.rb
40
+ - lib/github_archive_parser/event_handlers/pull_request_event.rb
41
+ - lib/github_archive_parser/event_handlers/pull_request_review_comment_event.rb
42
+ - lib/github_archive_parser/event_handlers/push_event.rb
43
+ - lib/github_archive_parser/event_handlers/release_event.rb
44
+ - lib/github_archive_parser/event_handlers/status_event.rb
45
+ - lib/github_archive_parser/event_handlers/team_add_event.rb
46
+ - lib/github_archive_parser/event_handlers/watch_event.rb
47
+ - lib/github_archive_parser/initialize.rb
48
+ - lib/github_archive_parser/log.rb
49
+ - lib/github_archive_parser/version.rb
50
+ - spec/log_spec.rb
51
+ homepage: https://github.com/kevinjalbert/github_archive_parser
52
+ licenses: []
53
+ metadata: {}
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - '>='
61
+ - !ruby/object:Gem::Version
62
+ version: 1.9.3
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project:
70
+ rubygems_version: 2.2.1
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Gem which parses GitHub Archive data
74
+ test_files:
75
+ - spec/log_spec.rb