github_archive_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. checksums.yaml +7 -0
  2. data/bin/github_archive_parser +9 -0
  3. data/lib/github_archive_parser/XCust.rb +10 -0
  4. data/lib/github_archive_parser/event_handler.rb +13 -0
  5. data/lib/github_archive_parser/event_handlers/commit_comment_event.rb +5 -0
  6. data/lib/github_archive_parser/event_handlers/create_event.rb +5 -0
  7. data/lib/github_archive_parser/event_handlers/delete_event.rb +5 -0
  8. data/lib/github_archive_parser/event_handlers/deployment_event.rb +5 -0
  9. data/lib/github_archive_parser/event_handlers/deployment_status_event.rb +5 -0
  10. data/lib/github_archive_parser/event_handlers/download_event.rb +5 -0
  11. data/lib/github_archive_parser/event_handlers/follow_event.rb +5 -0
  12. data/lib/github_archive_parser/event_handlers/fork_apply_event.rb +5 -0
  13. data/lib/github_archive_parser/event_handlers/fork_event.rb +5 -0
  14. data/lib/github_archive_parser/event_handlers/gist_event.rb +5 -0
  15. data/lib/github_archive_parser/event_handlers/gollum_event.rb +5 -0
  16. data/lib/github_archive_parser/event_handlers/issue_comment_event.rb +5 -0
  17. data/lib/github_archive_parser/event_handlers/issues_event.rb +5 -0
  18. data/lib/github_archive_parser/event_handlers/member_event.rb +5 -0
  19. data/lib/github_archive_parser/event_handlers/public_event.rb +5 -0
  20. data/lib/github_archive_parser/event_handlers/pull_request_event.rb +5 -0
  21. data/lib/github_archive_parser/event_handlers/pull_request_review_comment_event.rb +5 -0
  22. data/lib/github_archive_parser/event_handlers/push_event.rb +5 -0
  23. data/lib/github_archive_parser/event_handlers/release_event.rb +5 -0
  24. data/lib/github_archive_parser/event_handlers/status_event.rb +5 -0
  25. data/lib/github_archive_parser/event_handlers/team_add_event.rb +5 -0
  26. data/lib/github_archive_parser/event_handlers/watch_event.rb +5 -0
  27. data/lib/github_archive_parser/initialize.rb +9 -0
  28. data/lib/github_archive_parser/log.rb +59 -0
  29. data/lib/github_archive_parser/version.rb +3 -0
  30. data/lib/github_archive_parser.rb +97 -0
  31. data/spec/log_spec.rb +54 -0
  32. metadata +75 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dbbcc01faaf08baed69942f2c59ffe2fca262966
4
+ data.tar.gz: 4aaba6c19c5caa3e2a4b17fa331e2a0411cac792
5
+ SHA512:
6
+ metadata.gz: f4e608379a73168cf4de92cd2ef11e62fb022f3f778afa43d57dd3508e26d9b188cc26114d86a5cac4b55c8638a856545a0e3ce2f525f17abdd19c633feb9cf3
7
+ data.tar.gz: 08d9faf62bdad2358c01d2f65440b8812fc91965ecc4c16f08af6fc98fb5dd4703914d43c9541b31d9b21ac7e4459b7b99ef1d7e7c2e4b2a9bf752eb3f0f9de9
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.expand_path("../../lib", __FILE__)
4
+ require 'github_archive_parser'
5
+
6
+ github_archive_parser = GitHubArchiveParser::Processor.new
7
+ ARGV.each do |url|
8
+ github_archive_parser.process(url)
9
+ end
@@ -0,0 +1,10 @@
1
+ module AwesomeApplication
2
+ class PrintCreateEvent
3
+ include GitHubArchiveParser::CreateEvent
4
+
5
+ def parse(event)
6
+ # The event is a Hashie::Mash object for easy (dot) access
7
+ puts "#{event.repository.owner}/#{event.repository.name}"
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,13 @@
1
+ module GitHubArchiveParser
2
+ module EventHandler
3
+ module ClassMethods
4
+ def descendants
5
+ @descendants ||= ObjectSpace.each_object(Class).select { |klass| klass < self }
6
+ end
7
+ end
8
+
9
+ def self.included(base)
10
+ base.extend(ClassMethods)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module CommitCommentEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module CreateEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module DeleteEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module DeploymentEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module DeploymentStatusEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module DownloadEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module FollowEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module ForkApplyEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module ForkEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module GistEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module GollumEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module IssueCommentEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module IssuesEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module MemberEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module PublicEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module PullRequestEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module PullRequestReviewCommentEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module PushEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module ReleaseEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module StatusEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module TeamAddEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module GitHubArchiveParser
2
+ module WatchEvent
3
+ include EventHandler
4
+ end
5
+ end
@@ -0,0 +1,9 @@
1
+ require 'pathname'
2
+ require 'ostruct'
3
+ require 'optparse'
4
+ require 'open-uri'
5
+ require 'zlib'
6
+ require 'yajl'
7
+ require 'hashie'
8
+
9
+ Dir.glob(File.dirname(__FILE__) + '/**/*.rb') { |file| require file }
@@ -0,0 +1,59 @@
1
+ require 'logger'
2
+ require 'singleton'
3
+
4
+ module GitHubArchiveParser
5
+ class Log
6
+ include Singleton
7
+
8
+ attr_accessor :logger, :base_directory, :debugging
9
+
10
+ def initialize
11
+ @base_directory = File.expand_path("../..", __FILE__) + "/"
12
+ @debugging = false
13
+ @logger = Logger.new(STDOUT)
14
+ @logger.level = Logger::ERROR
15
+ @logger.formatter = proc do |sev, datetime, progname, msg|
16
+ "#{msg}\n"
17
+ end
18
+ end
19
+
20
+ def self.use_debug
21
+ instance.debugging = true
22
+ instance.logger.formatter = proc do |sev, datetime, progname, msg|
23
+ "#{sev} [#{progname}]: #{msg}\n"
24
+ end
25
+ end
26
+
27
+ # Determine the file, method, line number of the caller
28
+ def self.parse_caller(message)
29
+ if /^(?<file>.+?):(?<line>\d+)(?::in `(?<method>.*)')?/ =~ message
30
+ file = Regexp.last_match[:file]
31
+ line = Regexp.last_match[:line]
32
+ method = Regexp.last_match[:method]
33
+ "#{file.sub(instance.base_directory, "")}:#{line}"
34
+ end
35
+ end
36
+
37
+ def self.method_missing(method, *args, &blk)
38
+ if valid_method? method
39
+ instance.logger.progname = parse_caller(caller(1).first) if instance.debugging
40
+ instance.logger.send(method, *args, &blk)
41
+ else
42
+ super
43
+ end
44
+ end
45
+
46
+ def self.respond_to_missing?(method, include_all=false)
47
+ if valid_method? method
48
+ true
49
+ else
50
+ super
51
+ end
52
+ end
53
+
54
+ def self.valid_method?(method)
55
+ instance.logger.respond_to? method
56
+ end
57
+
58
+ end
59
+ end
@@ -0,0 +1,3 @@
1
+ module GitHubArchiveParser
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,97 @@
1
+ require 'github_archive_parser/initialize'
2
+
3
+ module GitHubArchiveParser
4
+ class Processor
5
+ attr_reader :options
6
+
7
+ def initialize
8
+ @options = OpenStruct.new(
9
+ debug: false,
10
+ )
11
+ parse_options
12
+ determine_log_level
13
+
14
+ # Create the concrete handlers and store them for future use
15
+ create_event_handlers
16
+ end
17
+
18
+ def process(url)
19
+ Log.info "Processing #{url}"
20
+ if (!url.nil? && url.start_with?("http://data.githubarchive.org/"))
21
+ gz = open(url)
22
+ js = Zlib::GzipReader.new(gz).read
23
+
24
+ Yajl::Parser.parse(js) do |event|
25
+ event = Hashie::Mash.new(event)
26
+ event_class = class_from_string("GitHubArchiveParser::#{event.type}")
27
+ event_handler = @event_handlers[event_class]
28
+
29
+ event_handler.each { |handler|
30
+ if !handler.nil? && handler.respond_to?(:parse)
31
+ handler.parse(event)
32
+ end
33
+ }
34
+ end
35
+ else
36
+ Log.warn "URL[#{url}] does not belong to http://data.githubarchive.org/"
37
+ end
38
+ end
39
+
40
+ def parse_options
41
+ OptionParser.new do |opt|
42
+ opt.version = VERSION
43
+ opt.on "-d", "--debug", "Debug output (shows DEBUG level log statements)" do
44
+ options.debug = true
45
+ end
46
+ opt.on "-q", "--quite", "Hide all output (shows only UNKNOWN level log statements)" do
47
+ options.quite = true
48
+ end
49
+ end.parse!
50
+ end
51
+
52
+ private
53
+
54
+ def determine_log_level
55
+ if options.debug
56
+ Log.level = Logger::DEBUG
57
+ Log.use_debug
58
+ elsif options.quite
59
+ Log.level = Logger::UNKNOWN
60
+ else
61
+ Log.level = Logger::INFO
62
+ end
63
+ end
64
+
65
+ def class_from_string(string)
66
+ begin
67
+ string.split('::').inject(Object) do |mod, class_name|
68
+ mod.const_get(class_name)
69
+ end
70
+ rescue Exception
71
+ Log.warn "Event #{string} not found"
72
+ nil
73
+ end
74
+ end
75
+
76
+ def create_event_handlers
77
+ @event_handlers = {}
78
+
79
+ # Probably can do something to not hardcode this
80
+ # Iterate over the event types
81
+ [CommitCommentEvent, CreateEvent, DeleteEvent,
82
+ DeploymentEvent, DeploymentStatusEvent, DownloadEvent,
83
+ FollowEvent, ForkApplyEvent, ForkEvent,
84
+ GistEvent, GollumEvent, IssueCommentEvent,
85
+ IssueCommentEvent, IssuesEvent, MemberEvent,
86
+ PublicEvent, PullRequestEvent, PullRequestReviewCommentEvent,
87
+ PushEvent, ReleaseEvent, StatusEvent,
88
+ TeamAddEvent, WatchEvent].each do | event_type |
89
+
90
+ # Map list of concrete event handler to their event type
91
+ @event_handlers[event_type] = event_type.descendants.map { |handler|
92
+ handler.new
93
+ }
94
+ end
95
+ end
96
+ end
97
+ end
data/spec/log_spec.rb ADDED
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+
3
+ include GitHubArchiveParser
4
+
5
+ describe Log do
6
+
7
+ let(:log) { Class.new(Log) }
8
+
9
+ context "initializes instance" do
10
+ it "should acts as singleton" do
11
+ log.instance.should == log.instance
12
+ end
13
+
14
+ it "should have a logger" do
15
+ log.instance.logger.should be_a Logger
16
+ end
17
+
18
+ it "should be a Log (FakeLog)" do
19
+ log.class.should eq Log.class
20
+ end
21
+
22
+ it "should react to Logger methods" do
23
+ Logger.public_instance_methods.each do |method|
24
+ log.valid_method?(method).should be_true
25
+ end
26
+ end
27
+ end
28
+
29
+ context "#use_debug" do
30
+ it "logger's progname before" do
31
+ log.progname.should be_nil
32
+ end
33
+
34
+ it "logger's progname after" do
35
+ log.use_debug
36
+ log.progname.should_not be_nil
37
+ end
38
+ end
39
+
40
+ context "#parse_caller" do
41
+ context "with nothing" do
42
+ it { log.parse_caller(nil).should be_nil }
43
+ end
44
+
45
+ context "with jumble (random text)" do
46
+ it { log.parse_caller("asdaacsdc").should be_nil }
47
+ end
48
+
49
+ context "with valid caller" do
50
+ it { log.parse_caller("github_archive_parser/lib/github_archive_parser.rb:45:in `respond_to_missing?'").should eq "github_archive_parser/lib/github_archive_parser.rb:45" }
51
+ end
52
+ end
53
+
54
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: github_archive_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kevin Jalbert
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-17 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Gem which parses GitHub Archive data
14
+ email:
15
+ - kevin.j.jalbert@gmail.com
16
+ executables:
17
+ - github_archive_parser
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - bin/github_archive_parser
22
+ - lib/github_archive_parser.rb
23
+ - lib/github_archive_parser/XCust.rb
24
+ - lib/github_archive_parser/event_handler.rb
25
+ - lib/github_archive_parser/event_handlers/commit_comment_event.rb
26
+ - lib/github_archive_parser/event_handlers/create_event.rb
27
+ - lib/github_archive_parser/event_handlers/delete_event.rb
28
+ - lib/github_archive_parser/event_handlers/deployment_event.rb
29
+ - lib/github_archive_parser/event_handlers/deployment_status_event.rb
30
+ - lib/github_archive_parser/event_handlers/download_event.rb
31
+ - lib/github_archive_parser/event_handlers/follow_event.rb
32
+ - lib/github_archive_parser/event_handlers/fork_apply_event.rb
33
+ - lib/github_archive_parser/event_handlers/fork_event.rb
34
+ - lib/github_archive_parser/event_handlers/gist_event.rb
35
+ - lib/github_archive_parser/event_handlers/gollum_event.rb
36
+ - lib/github_archive_parser/event_handlers/issue_comment_event.rb
37
+ - lib/github_archive_parser/event_handlers/issues_event.rb
38
+ - lib/github_archive_parser/event_handlers/member_event.rb
39
+ - lib/github_archive_parser/event_handlers/public_event.rb
40
+ - lib/github_archive_parser/event_handlers/pull_request_event.rb
41
+ - lib/github_archive_parser/event_handlers/pull_request_review_comment_event.rb
42
+ - lib/github_archive_parser/event_handlers/push_event.rb
43
+ - lib/github_archive_parser/event_handlers/release_event.rb
44
+ - lib/github_archive_parser/event_handlers/status_event.rb
45
+ - lib/github_archive_parser/event_handlers/team_add_event.rb
46
+ - lib/github_archive_parser/event_handlers/watch_event.rb
47
+ - lib/github_archive_parser/initialize.rb
48
+ - lib/github_archive_parser/log.rb
49
+ - lib/github_archive_parser/version.rb
50
+ - spec/log_spec.rb
51
+ homepage: https://github.com/kevinjalbert/github_archive_parser
52
+ licenses: []
53
+ metadata: {}
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - '>='
61
+ - !ruby/object:Gem::Version
62
+ version: 1.9.3
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project:
70
+ rubygems_version: 2.2.1
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Gem which parses GitHub Archive data
74
+ test_files:
75
+ - spec/log_spec.rb