ghtorrent 0.6 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +12 -0
- data/Gemfile +1 -11
- data/Gemfile.lock +27 -29
- data/README.md +10 -14
- data/bin/ght-mirror-events +0 -0
- data/bin/ght-process-event +0 -0
- data/bin/ght-retrieve-repo +0 -0
- data/bin/ght-retrieve-user +6 -0
- data/lib/ghtorrent.rb +1 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +6 -0
- data/lib/ghtorrent/adapters/mongo_persister.rb +8 -0
- data/lib/ghtorrent/api_client.rb +8 -29
- data/lib/ghtorrent/command.rb +1 -3
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +5 -10
- data/lib/ghtorrent/commands/ght_get_more_commits.rb +28 -17
- data/lib/ghtorrent/commands/ght_load.rb +2 -2
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +45 -15
- data/lib/ghtorrent/commands/ght_retrieve_user.rb +72 -0
- data/lib/ghtorrent/ghtorrent.rb +288 -209
- data/lib/ghtorrent/migrations/012_add_forks_to_projects.rb +31 -0
- data/lib/ghtorrent/migrations/013_add_merged_to_pullreqs.rb +39 -0
- data/lib/ghtorrent/migrations/014_add_deleted_to_projects.rb +21 -0
- data/lib/ghtorrent/retriever.rb +90 -25
- data/lib/ghtorrent/settings.rb +44 -6
- data/lib/version.rb +2 -2
- metadata +52 -84
- data/bin/ght-periodic-dump +0 -130
- data/bin/ght-torrent-index +0 -150
- data/test/callstack_test.rb +0 -67
data/bin/ght-periodic-dump
DELETED
@@ -1,130 +0,0 @@
|
|
1
|
-
#!/bin/sh
|
2
|
-
#
|
3
|
-
# Create the periodic database dump files
|
4
|
-
#
|
5
|
-
|
6
|
-
# Directory to place compressed files and torrents
|
7
|
-
OUTDIR=/home/data/github-mirror/dumps
|
8
|
-
|
9
|
-
# Base URL for HTTP dir containing torrents and data
|
10
|
-
WEBSEED=http://ikaria.dmst.aueb.gr/ghtorrent/
|
11
|
-
|
12
|
-
usage()
|
13
|
-
{
|
14
|
-
echo "Usage: $0 [-f 'yyyy-mm-dd hh:mm'] [-t 'yyyy-mm-dd hh:mm']"
|
15
|
-
echo " [-c collection_to_dump]"
|
16
|
-
echo "Dump the database. -f earliest record timestamp"
|
17
|
-
echo " -t latest record timestamp"
|
18
|
-
echo " -c collection to dump (default: all)"
|
19
|
-
}
|
20
|
-
|
21
|
-
if [ -z $1 ]
|
22
|
-
then
|
23
|
-
usage
|
24
|
-
exit 1
|
25
|
-
fi
|
26
|
-
|
27
|
-
while getopts "f:t:c:" o
|
28
|
-
do
|
29
|
-
case $o in
|
30
|
-
f) timeStart=`date -d "$OPTARG" +%s` ;;
|
31
|
-
t) timeEnd=`date -d "$OPTARG" +%s` ;;
|
32
|
-
c) collection=$OPTARG ;;
|
33
|
-
\?) echo "Invalid option: -$OPTARG" >&2
|
34
|
-
usage
|
35
|
-
exit 1
|
36
|
-
;;
|
37
|
-
esac
|
38
|
-
done
|
39
|
-
|
40
|
-
|
41
|
-
# Time to start dumping from
|
42
|
-
if [ -z $timeStart ]
|
43
|
-
then
|
44
|
-
if [ -r lastrun ]
|
45
|
-
then
|
46
|
-
timeStart=`cat lastrun`
|
47
|
-
else
|
48
|
-
timeStart=0
|
49
|
-
fi
|
50
|
-
fi
|
51
|
-
|
52
|
-
# Time to end dumping
|
53
|
-
if [ -z $timeEnd ]
|
54
|
-
then
|
55
|
-
timeEnd=`date +%s`
|
56
|
-
fi
|
57
|
-
|
58
|
-
# Name used for the files
|
59
|
-
dateName=`date -d @$timeEnd -u +'%Y-%m-%d'`
|
60
|
-
|
61
|
-
# _id example:
|
62
|
-
# 4f208c3e08d69a1835000077
|
63
|
-
# 000102030405060708091011
|
64
|
-
# | || || || |
|
65
|
-
# time mach pid count
|
66
|
-
|
67
|
-
endId=`printf '%08x0000000000000000' $timeEnd`
|
68
|
-
startId=`printf '%08x0000000000000000' $timeStart`
|
69
|
-
|
70
|
-
|
71
|
-
if [ -z $collection ]
|
72
|
-
then
|
73
|
-
collections=`echo "show collections"|mongo --quiet github|egrep -v "system|bye"`
|
74
|
-
else
|
75
|
-
collections=$collection
|
76
|
-
fi
|
77
|
-
|
78
|
-
echo "Dumping database from `date -d @$timeStart` to `date -d @$timeEnd`"
|
79
|
-
|
80
|
-
rm -rf dump
|
81
|
-
mkdir -p dump/github
|
82
|
-
|
83
|
-
for col in $collections; do
|
84
|
-
|
85
|
-
echo "Dumping $col"
|
86
|
-
mongodump --db github --collection $col -q '{"_id" : {"$gte" : ObjectId("'$startId'"), "$lt" : ObjectId("'$endId'")} }' || exit 1
|
87
|
-
done
|
88
|
-
|
89
|
-
# Report the metadata for the given database
|
90
|
-
meta()
|
91
|
-
{
|
92
|
-
echo -n "Number of $1: "
|
93
|
-
mongo --quiet --eval 'db.'$1'.find({"_id" : {"$gte" : ObjectId("'$startId'"), "$lt" : ObjectId("'$endId'")} }).count() + 0' github
|
94
|
-
echo -n "Uncompressed size of $1: "
|
95
|
-
wc -c dump/github/$1.bson | awk '{printf "%d bytes ", $1}'
|
96
|
-
du -h dump/github/$1.bson | awk '{print " (" $1 ")" }'
|
97
|
-
}
|
98
|
-
|
99
|
-
for col in $collections; do
|
100
|
-
(
|
101
|
-
echo "Start date: `date -u -d @$timeStart +'%Y-%m-%dT%H:%M:%SZ'`"
|
102
|
-
echo "End date: `date -u -d @$timeEnd +'%Y-%m-%dT%H:%M:%SZ'`"
|
103
|
-
meta $col
|
104
|
-
)
|
105
|
-
done |
|
106
|
-
tee README.$dateName.txt >dump/github/README.txt || exit 1
|
107
|
-
|
108
|
-
# Do the same per collection
|
109
|
-
for col in $collections; do
|
110
|
-
echo "Archiving $col.bson"
|
111
|
-
if [ ! -s dump/github/$col.bson ]; then
|
112
|
-
echo "Collection empty, skipping"
|
113
|
-
continue
|
114
|
-
fi
|
115
|
-
|
116
|
-
if ! tar zcf $OUTDIR/$col-dump.$dateName.tar.gz dump/github/$col.bson
|
117
|
-
then
|
118
|
-
rm -f $OUTDIR/$col-dump.$dateName.tar.gz
|
119
|
-
exit 1
|
120
|
-
fi
|
121
|
-
|
122
|
-
mktorrent -a udp://tracker.openbittorrent.com:80 -a udp://tracker.publicbt.com:80/announce -a http://tracker.bittorrent.am/announce -w $WEBSEED/$col-dump.$dateName.tar.gz -o $OUTDIR/$col-dump.$dateName.torrent $OUTDIR/$col-dump.$dateName.tar.gz
|
123
|
-
done
|
124
|
-
|
125
|
-
# Update last run info
|
126
|
-
echo $timeEnd >lastrun || exit 1
|
127
|
-
|
128
|
-
# Clean up
|
129
|
-
rm -rf dump
|
130
|
-
|
data/bin/ght-torrent-index
DELETED
@@ -1,150 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'erb'
|
3
|
-
require 'set'
|
4
|
-
require 'date'
|
5
|
-
require 'ghtorrent'
|
6
|
-
|
7
|
-
class Page
|
8
|
-
attr_reader :collections
|
9
|
-
attr_reader :dumps
|
10
|
-
|
11
|
-
def initialize(last_update)
|
12
|
-
@last_update = last_update
|
13
|
-
@dumps = Set.new
|
14
|
-
@collections = Set.new
|
15
|
-
end
|
16
|
-
|
17
|
-
def add_dump(dump)
|
18
|
-
@dumps << dump
|
19
|
-
end
|
20
|
-
|
21
|
-
def add_collection(col)
|
22
|
-
@collections << col
|
23
|
-
end
|
24
|
-
|
25
|
-
# Expose private binding() method.
|
26
|
-
def get_binding
|
27
|
-
binding()
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
class Dump
|
33
|
-
attr_reader :torrents
|
34
|
-
attr_reader :date
|
35
|
-
|
36
|
-
def initialize(torrents, date)
|
37
|
-
@torrents = torrents
|
38
|
-
@date = date
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
class Torrent
|
43
|
-
attr_reader :url
|
44
|
-
attr_reader :name
|
45
|
-
attr_reader :size
|
46
|
-
attr_reader :date
|
47
|
-
|
48
|
-
def initialize(url, name, size, date)
|
49
|
-
@url = url
|
50
|
-
@name = name
|
51
|
-
@size = size
|
52
|
-
@date = date
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
class Indexer < GHTorrent::Command
|
57
|
-
|
58
|
-
def prepare_options(options)
|
59
|
-
options.banner <<-BANNER
|
60
|
-
Create an HTML table from a list of torrent and data files. The expected
|
61
|
-
naming is the following:
|
62
|
-
|
63
|
-
collname-dump-2012-03-27.torrent
|
64
|
-
collname-dump-2012-03-27.tar.gz
|
65
|
-
|
66
|
-
#{command_name} [options]
|
67
|
-
|
68
|
-
#{command_name} options:
|
69
|
-
BANNER
|
70
|
-
|
71
|
-
options.opt :prefix, 'URL prefix to use for links',
|
72
|
-
:short => 'p', :default => "", :type => :string
|
73
|
-
end
|
74
|
-
|
75
|
-
def validate_options
|
76
|
-
|
77
|
-
end
|
78
|
-
|
79
|
-
def go
|
80
|
-
url_prefix=options[:prefix]
|
81
|
-
|
82
|
-
# Load the template
|
83
|
-
gem_root = Gem.loaded_specs['ghtorrent']
|
84
|
-
|
85
|
-
file = if gem_root.nil?
|
86
|
-
# Gem not installed yet, try current dir
|
87
|
-
File.open("index.erb").read
|
88
|
-
else
|
89
|
-
File.open(File.join(gem_root, "index.erb")).read
|
90
|
-
end
|
91
|
-
|
92
|
-
rhtml = ERB.new(file)
|
93
|
-
|
94
|
-
# Open the dir to read entries from
|
95
|
-
dir = ARGV.shift
|
96
|
-
|
97
|
-
if dir.nil?
|
98
|
-
dir = "."
|
99
|
-
end
|
100
|
-
|
101
|
-
torrents = Dir.entries("#{dir}").map do |f|
|
102
|
-
|
103
|
-
#File name format expected: collname-dump-2012-03-27.torrent
|
104
|
-
# collname-dump-2012-03-27.tar.gz
|
105
|
-
|
106
|
-
# Go through all torrent files and extract name of
|
107
|
-
# dumped collection and dump date
|
108
|
-
matches = /([a-z0-9]+)-[a-z]+\.(.*)\.torrent/.match(f)
|
109
|
-
next if matches.nil?
|
110
|
-
|
111
|
-
# Calculate original file size
|
112
|
-
dump = f.gsub(/.torrent/, ".tar.gz")
|
113
|
-
size = File.stat(File.join(dir, dump)).size / 1024 / 1024
|
114
|
-
|
115
|
-
# Expects a format of yyyy-mm-dd
|
116
|
-
date = Date.parse(matches[2])
|
117
|
-
|
118
|
-
if size > 0
|
119
|
-
Torrent.new(url_prefix + "/" + f, matches[1], size, date)
|
120
|
-
end
|
121
|
-
end.select { |x| !x.nil? }
|
122
|
-
|
123
|
-
all_dates = torrents.inject(Set.new) { |acc, t| acc << t.date }
|
124
|
-
|
125
|
-
all_dumps = all_dates.map { |d|
|
126
|
-
date_torrents = torrents.select { |t| t.date == d }
|
127
|
-
name_torrents = date_torrents.inject(Hash.new) { |acc, a|
|
128
|
-
acc.store(a.name, a);
|
129
|
-
acc
|
130
|
-
}
|
131
|
-
Dump.new(name_torrents, d)
|
132
|
-
}
|
133
|
-
|
134
|
-
max_date = all_dates.max { |a, b| a <=> b }
|
135
|
-
|
136
|
-
ghtorrent = Page.new(max_date)
|
137
|
-
all_dumps.each { |x|
|
138
|
-
ghtorrent.add_dump x
|
139
|
-
x.torrents.values.each { |t|
|
140
|
-
ghtorrent.add_collection t.name
|
141
|
-
}
|
142
|
-
}
|
143
|
-
|
144
|
-
puts rhtml.result(ghtorrent.get_binding).gsub(/^\s+/, "").gsub(/\s+$/, $/).gsub(/<table>/, "\n<table>")
|
145
|
-
end
|
146
|
-
end
|
147
|
-
|
148
|
-
Indexer.run
|
149
|
-
|
150
|
-
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|
data/test/callstack_test.rb
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
require "test/unit"
|
2
|
-
require 'ghtorrent'
|
3
|
-
|
4
|
-
class CallStackTest < Test::Unit::TestCase
|
5
|
-
|
6
|
-
def setup
|
7
|
-
end
|
8
|
-
|
9
|
-
def teardown
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_constructor
|
13
|
-
a = CallStack.new('users', 0)
|
14
|
-
b = CallStack.new('users', 0)
|
15
|
-
assert_equal a,b
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_push
|
19
|
-
stack = CallStack.new('users1', 0)
|
20
|
-
assert_not_nil stack
|
21
|
-
|
22
|
-
stack.push("foo bar")
|
23
|
-
stack.push("2")
|
24
|
-
stack.push("1234421")
|
25
|
-
stack.empty
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_pop
|
29
|
-
stack = CallStack.new('users2', 0)
|
30
|
-
assert_not_nil stack
|
31
|
-
|
32
|
-
stack.push("foo bar")
|
33
|
-
stack.push("2")
|
34
|
-
stack.push("1234421")
|
35
|
-
|
36
|
-
assert stack.pop == "1234421"
|
37
|
-
stack.empty
|
38
|
-
end
|
39
|
-
|
40
|
-
def test_push_pop_push
|
41
|
-
stack = CallStack.new('users3', 0)
|
42
|
-
assert_not_nil stack
|
43
|
-
|
44
|
-
stack.push("foo bar")
|
45
|
-
stack.push("2")
|
46
|
-
|
47
|
-
stack.pop
|
48
|
-
|
49
|
-
stack.push("1234421")
|
50
|
-
|
51
|
-
stack.empty
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_stress
|
55
|
-
stack = CallStack.new('users4', 0)
|
56
|
-
|
57
|
-
1000.times do
|
58
|
-
txt = (0..rand(20)).map{65.+(rand(25)).chr}.join
|
59
|
-
stack.push txt
|
60
|
-
end
|
61
|
-
|
62
|
-
999.times do
|
63
|
-
stack.pop
|
64
|
-
end
|
65
|
-
stack.pop
|
66
|
-
end
|
67
|
-
end
|