github-archive 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +46 -0
- data/Rakefile +3 -0
- data/bin/github-archive +135 -0
- data/github-archive.gemspec +34 -0
- data/lib/github/archive.rb +29 -0
- data/lib/github/archive/archived_url.rb +9 -0
- data/lib/github/archive/connections.rb +85 -0
- data/lib/github/archive/create_archive.rb +30 -0
- data/lib/github/archive/destroy_archive.rb +17 -0
- data/lib/github/archive/event.rb +47 -0
- data/lib/github/archive/stat_collector.rb +39 -0
- data/lib/github/archive/version.rb +5 -0
- data/spec/factories/archived_url_factory.rb +6 -0
- data/spec/factories/event_factory.rb +5 -0
- data/spec/github/archive/archived_url_spec.rb +20 -0
- data/spec/github/archive/event_spec.rb +117 -0
- data/spec/github/archive/stat_collector_spec.rb +11 -0
- data/spec/spec_helper.rb +12 -0
- metadata +241 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 719058d828ae6865a67cbbff5e5cda3279e45b56
|
4
|
+
data.tar.gz: 97cf65cfe828103a18a85e10aee9927ee72c03bf
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3c6d94565edff148cc3615e1e797419cfb49f5abff894b418fd4493c068ed40f853d15b67b5b754188888f360c07a6f2aa0356052ed9aaa594577bbf81b15f55
|
7
|
+
data.tar.gz: 62c802e31a2c62f82429416319104663c44c2e0abb61432365dbd53a0ba6273a2d7cd5fb44083154dfba4fab170dc1b529d05b423b8505d7bc5b8c63e7439947
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 cparratto
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# Github Archive
|
2
|
+
|
3
|
+
A command line utility for concurrent processing and management of github event archives.
|
4
|
+
|
5
|
+
## Dependencies
|
6
|
+
|
7
|
+
1. MYSQL (For storing replica data)
|
8
|
+
2. REDIS (For processing queue persistence)
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
On a OSX:
|
13
|
+
|
14
|
+
$ brew install mysql (Follow instructions for launching)
|
15
|
+
$ brew install redis (Follow instructions for launching)
|
16
|
+
|
17
|
+
Get the gem:
|
18
|
+
|
19
|
+
$ gem install github-archive
|
20
|
+
|
21
|
+
## Setup Redis for Resque
|
22
|
+
|
23
|
+
$ github-archive setup_mysql --database=DATABASE --server=SERVER --username=USERNAME
|
24
|
+
$ github-archive setup_redis --port=PORT --server=SERVER
|
25
|
+
|
26
|
+
## Launch workers for processing github data
|
27
|
+
|
28
|
+
$ github-archive process_archives --proc-count=PROC_COUNT
|
29
|
+
|
30
|
+
## Get Stats
|
31
|
+
|
32
|
+
$ github-archive gh_repo_stats --after=AFTER --before=BEFORE --event=EVENT -n=N
|
33
|
+
|
34
|
+
Example:
|
35
|
+
|
36
|
+
$ github-archive gh_repo_stats --event=WatchEvent --after=2012-12-10T04:05:06+07:00 --before=2012-12-11T04:05:06+07:00 -n=20
|
37
|
+
|
38
|
+
##
|
39
|
+
|
40
|
+
## Contributing
|
41
|
+
|
42
|
+
1. Fork it
|
43
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
44
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
45
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
46
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bin/github-archive
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'thor'
|
3
|
+
require 'thor/rake_compat'
|
4
|
+
require 'bundler'
|
5
|
+
require 'github/archive'
|
6
|
+
require 'resque/tasks'
|
7
|
+
|
8
|
+
module Github
|
9
|
+
module Archive
|
10
|
+
class CLI < Thor
|
11
|
+
Bundler::GemHelper.install_tasks
|
12
|
+
|
13
|
+
desc 'check_for_errors', "Checks resque for processing errors."
|
14
|
+
def check_for_errors
|
15
|
+
Rake::Task['resque:failures:sort'].execute
|
16
|
+
end
|
17
|
+
|
18
|
+
desc 'process_archives', "Starts resque work processes."
|
19
|
+
method_option :proc_count, :type => :string, :default => '4', :required => true
|
20
|
+
def process_archives
|
21
|
+
ENV['QUEUE'] = StatCollector.queue.to_s
|
22
|
+
ENV['COUNT'] = options[:proc_count]
|
23
|
+
Rake::Task['resque:workers'].execute
|
24
|
+
end
|
25
|
+
|
26
|
+
desc "setup_mysql", "Sets connection params for mysql."
|
27
|
+
method_option :username, :type => :string, :default => 'root', :required => true
|
28
|
+
method_option :password, :type => :string, :default => ''
|
29
|
+
method_option :server, :type => :string, :default => 'localhost', :required => true
|
30
|
+
method_option :database, :type => :string, :default => 'github-archive', :required => true
|
31
|
+
def setup_mysql
|
32
|
+
Connections.config_mysql(options[:server],
|
33
|
+
options[:username],
|
34
|
+
options[:password],
|
35
|
+
options[:database])
|
36
|
+
|
37
|
+
Connections.write_settings
|
38
|
+
end
|
39
|
+
|
40
|
+
desc "create_archive", "Creates all Tables"
|
41
|
+
def create_archive
|
42
|
+
CreateArchive.do
|
43
|
+
end
|
44
|
+
|
45
|
+
desc "destroy_archive", "Destroys all Tables"
|
46
|
+
def destroy_archive
|
47
|
+
DestroyArchive.do
|
48
|
+
end
|
49
|
+
|
50
|
+
desc "setup_redis", "Sets connection params for redis."
|
51
|
+
method_option :server, :type => :string, :default => 'localhost', :required => true
|
52
|
+
method_option :port, :type => :string, :default => '6379', :required => true
|
53
|
+
method_option :password, :type => :string, :default => nil
|
54
|
+
def setup_redis
|
55
|
+
Connections.config_redis(options[:server],
|
56
|
+
options[:port],
|
57
|
+
options[:password])
|
58
|
+
|
59
|
+
Connections.write_settings
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
desc "gh_repo_stats", "Lists the most active repositories for a given time range."
|
64
|
+
method_option :after, :type => :string, :default => '2013-02-03T04:05:06+07:00', :required => true
|
65
|
+
method_option :before, :type => :string, :default => '2013-03-03T04:05:06+07:00', :required => true
|
66
|
+
method_option :event, :type => :string, :default => 'GollumEvent', :required => true
|
67
|
+
method_option :n, :type => :numeric, :default => 100, :required => true
|
68
|
+
def gh_repo_stats
|
69
|
+
date_from = build_date_time(options[:after])
|
70
|
+
date_to = build_date_time(options[:before])
|
71
|
+
date_array = (date_from.to_date..date_to.to_date).to_a
|
72
|
+
urls = build_url_array_from_dates(date_array)
|
73
|
+
|
74
|
+
if urls.nil?
|
75
|
+
puts "Bad time range. Please enter a correct time range."
|
76
|
+
exit 0
|
77
|
+
end
|
78
|
+
|
79
|
+
hour_count = urls.count
|
80
|
+
|
81
|
+
urls.each do |url|
|
82
|
+
archived = ArchivedUrl.where(url: url).first
|
83
|
+
|
84
|
+
if archived && archived.finished_processing
|
85
|
+
hour_count -= 1
|
86
|
+
elsif archived.nil?
|
87
|
+
ArchivedUrl.create(url: url, finished_processing: false)
|
88
|
+
Resque.enqueue(StatCollector, url)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
if hour_count == 0
|
93
|
+
results = Event.results_for_range_and_type(date_from..date_to,
|
94
|
+
options[:event],
|
95
|
+
options[:n])
|
96
|
+
|
97
|
+
results.each do |url, count|
|
98
|
+
puts "#{url} - #{count} \n"
|
99
|
+
end
|
100
|
+
else
|
101
|
+
puts "You still need #{hour_count} hours of data to calculate this.."
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
desc "event_count", "Gets the current event count"
|
106
|
+
def event_count
|
107
|
+
puts Event.count
|
108
|
+
end
|
109
|
+
|
110
|
+
private
|
111
|
+
|
112
|
+
def build_date_time(time_str)
|
113
|
+
begin
|
114
|
+
date_time = DateTime.strptime(time_str, '%Y-%m-%dT%H:%M:%S%z')
|
115
|
+
return date_time
|
116
|
+
rescue
|
117
|
+
puts "Bad time format for #{time_str}. Please use something like 2013-02-03T04:05:06+07:00"
|
118
|
+
exit 0
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def build_url_array_from_dates(date_array)
|
123
|
+
hour_array = (0..23).to_a
|
124
|
+
date_array.collect { |date|
|
125
|
+
hour_array.collect { |hour|
|
126
|
+
"http://data.githubarchive.org/#{date.strftime("%Y-%m-%d")}-#{hour}.json.gz"
|
127
|
+
}
|
128
|
+
}.flatten!
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
Github::Archive::CLI.start
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'github/archive/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "github-archive"
|
8
|
+
spec.version = Github::Archive::VERSION
|
9
|
+
spec.authors = ["cparratto"]
|
10
|
+
spec.email = ["cparratto@gmail.com"]
|
11
|
+
spec.description = %q{Github Archive}
|
12
|
+
spec.summary = %q{A command line utility for concurrent processing and management of github event archives.}
|
13
|
+
spec.homepage = "https://github.com/cparratto/github-archive"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency 'thor'
|
22
|
+
spec.add_dependency 'mysql'
|
23
|
+
spec.add_dependency 'activerecord'
|
24
|
+
spec.add_dependency 'yajl-ruby'
|
25
|
+
spec.add_dependency 'resque'
|
26
|
+
|
27
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
28
|
+
spec.add_development_dependency "rake"
|
29
|
+
spec.add_development_dependency "rspec"
|
30
|
+
spec.add_development_dependency "factory_girl"
|
31
|
+
spec.add_development_dependency 'database_cleaner'
|
32
|
+
spec.add_development_dependency "mocha"
|
33
|
+
spec.add_development_dependency "pry"
|
34
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require "github/archive/version"
|
2
|
+
require 'mysql'
|
3
|
+
require 'active_record'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'zlib'
|
6
|
+
require 'yajl'
|
7
|
+
require 'resque'
|
8
|
+
require 'yaml'
|
9
|
+
require 'pry'
|
10
|
+
|
11
|
+
require 'github/archive/connections'
|
12
|
+
|
13
|
+
require 'github/archive/stat_collector'
|
14
|
+
|
15
|
+
require 'github/archive/archived_url'
|
16
|
+
require 'github/archive/event'
|
17
|
+
|
18
|
+
require 'github/archive/create_archive'
|
19
|
+
require 'github/archive/destroy_archive'
|
20
|
+
|
21
|
+
module Github
|
22
|
+
module Archive
|
23
|
+
::Github::Archive::Connections.init_settings
|
24
|
+
::Github::Archive::Connections.read_settings
|
25
|
+
|
26
|
+
::Github::Archive::Connections.connect_redis
|
27
|
+
::Github::Archive::Connections.connect_mysql
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Github
|
2
|
+
module Archive
|
3
|
+
class Connections
|
4
|
+
class << self
|
5
|
+
SETTINGS = {}
|
6
|
+
|
7
|
+
def connect_mysql
|
8
|
+
::ActiveRecord::Base.establish_connection( adapter: 'mysql',
|
9
|
+
server: SETTINGS['GHA']['MYSQL']['SERVER'],
|
10
|
+
username: SETTINGS['GHA']['MYSQL']['USERNAME'],
|
11
|
+
password: SETTINGS['GHA']['MYSQL']['PASSWORD'],
|
12
|
+
database: SETTINGS['GHA']['MYSQL']['DATABASE']
|
13
|
+
)
|
14
|
+
end
|
15
|
+
|
16
|
+
def config_mysql(server, user, password, database)
|
17
|
+
SETTINGS['GHA']['MYSQL']['SERVER'] = server
|
18
|
+
SETTINGS['GHA']['MYSQL']['USERNAME'] = user
|
19
|
+
SETTINGS['GHA']['MYSQL']['PASSWORD'] = password
|
20
|
+
SETTINGS['GHA']['MYSQL']['DATABASE'] = database
|
21
|
+
end
|
22
|
+
|
23
|
+
def connect_redis
|
24
|
+
if SETTINGS['GHA']['REDIS']['PASSWORD'].nil?
|
25
|
+
::Resque.redis = Redis.new(:host => SETTINGS['GHA']['REDIS']['SERVER'],
|
26
|
+
:port => SETTINGS['GHA']['REDIS']['PORT'],
|
27
|
+
:thread_safe => true)
|
28
|
+
else
|
29
|
+
::Resque.redis = Redis.new(:host => SETTINGS['GHA']['REDIS']['SERVER'],
|
30
|
+
:port => SETTINGS['GHA']['REDIS']['PORT'],
|
31
|
+
:password => SETTINGS['GHA']['REDIS']['PASSWORD'],
|
32
|
+
:thread_safe => true)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def config_redis(server, port, password)
|
37
|
+
SETTINGS['GHA']['REDIS']['SERVER'] = server
|
38
|
+
SETTINGS['GHA']['REDIS']['PORT'] = port
|
39
|
+
SETTINGS['GHA']['REDIS']['PASSWORD'] = password
|
40
|
+
end
|
41
|
+
|
42
|
+
def init_settings
|
43
|
+
SETTINGS['GHA'] ||= {}
|
44
|
+
SETTINGS['GHA']['MYSQL'] ||= {}
|
45
|
+
SETTINGS['GHA']['MYSQL']['SERVER'] ||= 'localhost'
|
46
|
+
SETTINGS['GHA']['MYSQL']['USERNAME'] ||= 'root'
|
47
|
+
SETTINGS['GHA']['MYSQL']['PASSWORD'] ||= nil
|
48
|
+
SETTINGS['GHA']['MYSQL']['DATABASE'] ||= 'github_archive'
|
49
|
+
|
50
|
+
SETTINGS['GHA']['REDIS'] ||= {}
|
51
|
+
SETTINGS['GHA']['REDIS']['SERVER'] ||= 'localhost'
|
52
|
+
SETTINGS['GHA']['REDIS']['PASSWORD'] ||= nil
|
53
|
+
SETTINGS['GHA']['REDIS']['PORT'] ||= '6379'
|
54
|
+
end
|
55
|
+
|
56
|
+
def read_settings
|
57
|
+
if File.exist?(setting_path)
|
58
|
+
settings = YAML::load_file setting_path
|
59
|
+
|
60
|
+
config_mysql settings['MYSQL']['SERVER'],
|
61
|
+
settings['MYSQL']['USERNAME'],
|
62
|
+
settings['MYSQL']['PASSWORD'],
|
63
|
+
settings['MYSQL']['DATABASE']
|
64
|
+
|
65
|
+
config_redis settings['REDIS']['SERVER'],
|
66
|
+
settings['REDIS']['PORT'],
|
67
|
+
settings['REDIS']['PASSWORD']
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def write_settings
|
72
|
+
File.open(setting_path, "w") do |file|
|
73
|
+
file.write SETTINGS['GHA'].to_yaml
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def setting_path
|
80
|
+
"#{File.expand_path('~')}/.github-archive"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Github
|
2
|
+
module Archive
|
3
|
+
module CreateArchive
|
4
|
+
def self.do
|
5
|
+
ActiveRecord::Schema.define do
|
6
|
+
unless Github::Archive::ArchivedUrl.table_exists?
|
7
|
+
create_table :archived_urls do |table|
|
8
|
+
table.column :url, :string
|
9
|
+
table.column :finished_processing, :boolean
|
10
|
+
end
|
11
|
+
|
12
|
+
add_index :archived_urls,:url
|
13
|
+
end
|
14
|
+
|
15
|
+
unless Github::Archive::Event.table_exists?
|
16
|
+
create_table :events do |table|
|
17
|
+
table.column :url, :string
|
18
|
+
table.column :event_type, :string
|
19
|
+
table.column :gh_created_at, :datetime
|
20
|
+
end
|
21
|
+
|
22
|
+
add_index :events, :url
|
23
|
+
add_index :events, :event_type
|
24
|
+
add_index :events, :gh_created_at
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Github
|
2
|
+
module Archive
|
3
|
+
module DestroyArchive
|
4
|
+
def self.do
|
5
|
+
ActiveRecord::Schema.define do
|
6
|
+
unless !Github::Archive::Event.table_exists?
|
7
|
+
drop_table :events
|
8
|
+
end
|
9
|
+
|
10
|
+
unless !Github::Archive::ArchivedUrl.table_exists?
|
11
|
+
drop_table :archived_urls
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Github
|
2
|
+
module Archive
|
3
|
+
class Event < ::ActiveRecord::Base
|
4
|
+
scope :in_time_range, ->(range) { where(:gh_created_at => range) }
|
5
|
+
scope :with_event_type, ->(type) { where(:event_type => type) }
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def create_with_json(raw_json)
|
9
|
+
json = raw_json.with_indifferent_access
|
10
|
+
|
11
|
+
if related_event?(json[:type])
|
12
|
+
url = "UNKNOWN"
|
13
|
+
|
14
|
+
if json[:repo]
|
15
|
+
url = json[:repo][:url]
|
16
|
+
elsif json[:repository]
|
17
|
+
url = json[:repository][:url]
|
18
|
+
end
|
19
|
+
|
20
|
+
self.create(
|
21
|
+
url: url,
|
22
|
+
event_type: json[:type],
|
23
|
+
gh_created_at: json[:created_at]
|
24
|
+
)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def results_for_range_and_type(range, type, limit)
|
29
|
+
self.in_time_range(range)
|
30
|
+
.with_event_type(type)
|
31
|
+
.group(:url)
|
32
|
+
.limit(limit)
|
33
|
+
.order('count(*) DESC')
|
34
|
+
.count(:url)
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def related_event?(event_type)
|
40
|
+
event_type != "GistEvent" &&
|
41
|
+
event_type != "FollowEvent" &&
|
42
|
+
event_type != "TeamAddEvent"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Github
|
2
|
+
module Archive
|
3
|
+
class StatCollector
|
4
|
+
@queue = :github_archive
|
5
|
+
|
6
|
+
class << self
|
7
|
+
attr_reader :queue
|
8
|
+
|
9
|
+
def perform(stat_url)
|
10
|
+
archived = ArchivedUrl.where(url: stat_url).first
|
11
|
+
|
12
|
+
if archived.nil?
|
13
|
+
puts "cannot archive untracked url #{stat_url}"
|
14
|
+
elsif archived.finished_processing
|
15
|
+
puts "already processed #{stat_url}"
|
16
|
+
else
|
17
|
+
begin
|
18
|
+
ArchivedUrl.transaction do
|
19
|
+
puts "processing #{stat_url}"
|
20
|
+
gz = open(stat_url)
|
21
|
+
js = Zlib::GzipReader.new(gz).read
|
22
|
+
Yajl::Parser.parse(js) do |event|
|
23
|
+
Github::Archive::Event.create_with_json(event)
|
24
|
+
end
|
25
|
+
|
26
|
+
archived.finished_processing = true
|
27
|
+
archived.save
|
28
|
+
end
|
29
|
+
rescue
|
30
|
+
puts "There was some kind of error while parsing #{stat_url}"
|
31
|
+
archived.finished_processing = true
|
32
|
+
archived.save
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Github
|
4
|
+
module Archive
|
5
|
+
describe ArchivedUrl do
|
6
|
+
after(:each) do
|
7
|
+
ArchivedUrl.delete_all
|
8
|
+
end
|
9
|
+
|
10
|
+
it "tracks processed urls" do
|
11
|
+
FactoryGirl.create(:archived_url, url: 'test_url')
|
12
|
+
ArchivedUrl.already_has?("test_url").should be_true
|
13
|
+
end
|
14
|
+
|
15
|
+
it "does not track unprocessed urls" do
|
16
|
+
ArchivedUrl.already_has?("test_url").should be_false
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Github
|
4
|
+
module Archive
|
5
|
+
describe Event do
|
6
|
+
after(:each) do
|
7
|
+
Event.delete_all
|
8
|
+
end
|
9
|
+
|
10
|
+
let(:expected_results) {{"url1"=>2, "url2"=>1, "url3"=>1}}
|
11
|
+
let(:json_data){
|
12
|
+
{"payload"=>
|
13
|
+
{"head"=>"6234f9fcb61c7b03782cf32e232ae1d90bf3c772",
|
14
|
+
"size"=>1,
|
15
|
+
"shas"=>
|
16
|
+
[["6234f9fcb61c7b03782cf32e232ae1d90bf3c772",
|
17
|
+
"dev+commit-bot@koideploy.com",
|
18
|
+
"Display photo \"nuances\" from \"paul bica\"",
|
19
|
+
"Koi Deploy Commit Bot",
|
20
|
+
true]],
|
21
|
+
"ref"=>"refs/heads/master"},
|
22
|
+
"created_at"=>"2012-12-10T12:00:29-08:00",
|
23
|
+
"repository"=>
|
24
|
+
{"description"=>
|
25
|
+
"Deploy your Rails app straight from GitHub to Heroku in a single click, or continuously triggered by your CI. This is an example application for continuous deployment via Travis CI. ",
|
26
|
+
"stargazers"=>1,
|
27
|
+
"owner"=>"koideploy",
|
28
|
+
"url"=>"https://github.com/koideploy/showcase",
|
29
|
+
"has_downloads"=>true,
|
30
|
+
"language"=>"Ruby",
|
31
|
+
"pushed_at"=>"2012-12-10T12:00:21-08:00",
|
32
|
+
"forks"=>0,
|
33
|
+
"has_issues"=>true,
|
34
|
+
"organization"=>"koideploy",
|
35
|
+
"fork"=>false,
|
36
|
+
"size"=>4368,
|
37
|
+
"has_wiki"=>false,
|
38
|
+
"name"=>"showcase",
|
39
|
+
"id"=>6306116,
|
40
|
+
"homepage"=>"http://showcase.koideploy.com",
|
41
|
+
"private"=>false,
|
42
|
+
"open_issues"=>0,
|
43
|
+
"created_at"=>"2012-10-19T23:38:50-07:00",
|
44
|
+
"watchers"=>1},
|
45
|
+
"actor_attributes"=>
|
46
|
+
{"login"=>"koideploy-commit-bot",
|
47
|
+
"name"=>"KoiDeploy Commit Bot",
|
48
|
+
"location"=>"Brisbane, Australia",
|
49
|
+
"company"=>"KoiDeploy",
|
50
|
+
"blog"=>"https://koideploy.com",
|
51
|
+
"type"=>"User",
|
52
|
+
"gravatar_id"=>"a1aea43cf721ab953474a0e4c699bc90"},
|
53
|
+
"url"=>
|
54
|
+
"https://github.com/koideploy/showcase/compare/ec6f77514f...6234f9fcb6",
|
55
|
+
"public"=>true,
|
56
|
+
"actor"=>"koideploy-commit-bot",
|
57
|
+
"type"=>"PushEvent"}
|
58
|
+
}
|
59
|
+
|
60
|
+
context "when looking up events" do
|
61
|
+
it "returns events in the correct time range" do
|
62
|
+
FactoryGirl.create(:event, gh_created_at: 3.days.ago)
|
63
|
+
FactoryGirl.create(:event, gh_created_at: 2.days.ago)
|
64
|
+
FactoryGirl.create(:event, gh_created_at: 1.days.ago)
|
65
|
+
FactoryGirl.create(:event, gh_created_at: DateTime.now)
|
66
|
+
|
67
|
+
Event.count.should eq 4
|
68
|
+
Event.in_time_range(2.days.ago..1.days.ago).count.should eq 2
|
69
|
+
end
|
70
|
+
|
71
|
+
it "returns events with the correct type" do
|
72
|
+
FactoryGirl.create(:event, event_type: "test_event")
|
73
|
+
FactoryGirl.create(:event, event_type: "test_event2")
|
74
|
+
|
75
|
+
Event.with_event_type("test_event").count.should eq 1
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
context "when creating events" do
|
80
|
+
it "creates repo events with json" do
|
81
|
+
Event.create_with_json(json_data)
|
82
|
+
event = Event.first
|
83
|
+
event.event_type.should == "PushEvent"
|
84
|
+
event.gh_created_at.to_s.should == "2012-12-10 12:00:29 UTC"
|
85
|
+
event.url.should == "https://github.com/koideploy/showcase"
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
["GistEvent", "FollowEvent" ,"TeamAddEvent"].each do |event_str|
|
90
|
+
it "does not create for #{event_str} events" do
|
91
|
+
test_json = json_data.dup
|
92
|
+
test_json["type"] = event_str
|
93
|
+
Event.create_with_json(test_json)
|
94
|
+
Event.count.should eq 0
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
context "when getting results" do
|
100
|
+
it "returns a result hash with the correct results" do
|
101
|
+
FactoryGirl.create(:event, gh_created_at: 3.days.ago, event_type: "test_event1", url: "url1")
|
102
|
+
FactoryGirl.create(:event, gh_created_at: 2.days.ago, event_type: "test_event1", url: "url1")
|
103
|
+
FactoryGirl.create(:event, gh_created_at: 2.days.ago, event_type: "test_event1", url: "url2")
|
104
|
+
FactoryGirl.create(:event, gh_created_at: 2.days.ago, event_type: "test_event1", url: "url3")
|
105
|
+
FactoryGirl.create(:event, gh_created_at: 1.days.ago, event_type: "test_event3", url: "url3")
|
106
|
+
FactoryGirl.create(:event, gh_created_at: DateTime.now, event_type: "test_event3", url: "url4")
|
107
|
+
|
108
|
+
range = (3.days.ago..2.days.ago)
|
109
|
+
|
110
|
+
results = Event.results_for_range_and_type(range, "test_event1", 4)
|
111
|
+
|
112
|
+
results.should == expected_results
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,241 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: github-archive
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- cparratto
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-10-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: thor
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mysql
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: activerecord
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: yajl-ruby
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: resque
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: bundler
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ~>
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.3'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ~>
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.3'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rake
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rspec
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: factory_girl
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - '>='
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: database_cleaner
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - '>='
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - '>='
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: mocha
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - '>='
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: pry
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - '>='
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
type: :development
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - '>='
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
181
|
+
description: Github Archive
|
182
|
+
email:
|
183
|
+
- cparratto@gmail.com
|
184
|
+
executables:
|
185
|
+
- github-archive
|
186
|
+
extensions: []
|
187
|
+
extra_rdoc_files: []
|
188
|
+
files:
|
189
|
+
- .gitignore
|
190
|
+
- Gemfile
|
191
|
+
- LICENSE.txt
|
192
|
+
- README.md
|
193
|
+
- Rakefile
|
194
|
+
- bin/github-archive
|
195
|
+
- github-archive.gemspec
|
196
|
+
- lib/github/archive.rb
|
197
|
+
- lib/github/archive/archived_url.rb
|
198
|
+
- lib/github/archive/connections.rb
|
199
|
+
- lib/github/archive/create_archive.rb
|
200
|
+
- lib/github/archive/destroy_archive.rb
|
201
|
+
- lib/github/archive/event.rb
|
202
|
+
- lib/github/archive/stat_collector.rb
|
203
|
+
- lib/github/archive/version.rb
|
204
|
+
- spec/factories/archived_url_factory.rb
|
205
|
+
- spec/factories/event_factory.rb
|
206
|
+
- spec/github/archive/archived_url_spec.rb
|
207
|
+
- spec/github/archive/event_spec.rb
|
208
|
+
- spec/github/archive/stat_collector_spec.rb
|
209
|
+
- spec/spec_helper.rb
|
210
|
+
homepage: https://github.com/cparratto/github-archive
|
211
|
+
licenses:
|
212
|
+
- MIT
|
213
|
+
metadata: {}
|
214
|
+
post_install_message:
|
215
|
+
rdoc_options: []
|
216
|
+
require_paths:
|
217
|
+
- lib
|
218
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - '>='
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '0'
|
223
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
224
|
+
requirements:
|
225
|
+
- - '>='
|
226
|
+
- !ruby/object:Gem::Version
|
227
|
+
version: '0'
|
228
|
+
requirements: []
|
229
|
+
rubyforge_project:
|
230
|
+
rubygems_version: 2.0.3
|
231
|
+
signing_key:
|
232
|
+
specification_version: 4
|
233
|
+
summary: A command line utility for concurrent processing and management of github
|
234
|
+
event archives.
|
235
|
+
test_files:
|
236
|
+
- spec/factories/archived_url_factory.rb
|
237
|
+
- spec/factories/event_factory.rb
|
238
|
+
- spec/github/archive/archived_url_spec.rb
|
239
|
+
- spec/github/archive/event_spec.rb
|
240
|
+
- spec/github/archive/stat_collector_spec.rb
|
241
|
+
- spec/spec_helper.rb
|