ghtorrent 0.6 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,130 +0,0 @@
1
- #!/bin/sh
2
- #
3
- # Create the periodic database dump files
4
- #
5
-
6
- # Directory to place compressed files and torrents
7
- OUTDIR=/home/data/github-mirror/dumps
8
-
9
- # Base URL for HTTP dir containing torrents and data
10
- WEBSEED=http://ikaria.dmst.aueb.gr/ghtorrent/
11
-
12
- usage()
13
- {
14
- echo "Usage: $0 [-f 'yyyy-mm-dd hh:mm'] [-t 'yyyy-mm-dd hh:mm']"
15
- echo " [-c collection_to_dump]"
16
- echo "Dump the database. -f earliest record timestamp"
17
- echo " -t latest record timestamp"
18
- echo " -c collection to dump (default: all)"
19
- }
20
-
21
- if [ -z $1 ]
22
- then
23
- usage
24
- exit 1
25
- fi
26
-
27
- while getopts "f:t:c:" o
28
- do
29
- case $o in
30
- f) timeStart=`date -d "$OPTARG" +%s` ;;
31
- t) timeEnd=`date -d "$OPTARG" +%s` ;;
32
- c) collection=$OPTARG ;;
33
- \?) echo "Invalid option: -$OPTARG" >&2
34
- usage
35
- exit 1
36
- ;;
37
- esac
38
- done
39
-
40
-
41
- # Time to start dumping from
42
- if [ -z $timeStart ]
43
- then
44
- if [ -r lastrun ]
45
- then
46
- timeStart=`cat lastrun`
47
- else
48
- timeStart=0
49
- fi
50
- fi
51
-
52
- # Time to end dumping
53
- if [ -z $timeEnd ]
54
- then
55
- timeEnd=`date +%s`
56
- fi
57
-
58
- # Name used for the files
59
- dateName=`date -d @$timeEnd -u +'%Y-%m-%d'`
60
-
61
- # _id example:
62
- # 4f208c3e08d69a1835000077
63
- # 000102030405060708091011
64
- # | || || || |
65
- # time mach pid count
66
-
67
- endId=`printf '%08x0000000000000000' $timeEnd`
68
- startId=`printf '%08x0000000000000000' $timeStart`
69
-
70
-
71
- if [ -z $collection ]
72
- then
73
- collections=`echo "show collections"|mongo --quiet github|egrep -v "system|bye"`
74
- else
75
- collections=$collection
76
- fi
77
-
78
- echo "Dumping database from `date -d @$timeStart` to `date -d @$timeEnd`"
79
-
80
- rm -rf dump
81
- mkdir -p dump/github
82
-
83
- for col in $collections; do
84
-
85
- echo "Dumping $col"
86
- mongodump --db github --collection $col -q '{"_id" : {"$gte" : ObjectId("'$startId'"), "$lt" : ObjectId("'$endId'")} }' || exit 1
87
- done
88
-
89
- # Report the metadata for the given database
90
- meta()
91
- {
92
- echo -n "Number of $1: "
93
- mongo --quiet --eval 'db.'$1'.find({"_id" : {"$gte" : ObjectId("'$startId'"), "$lt" : ObjectId("'$endId'")} }).count() + 0' github
94
- echo -n "Uncompressed size of $1: "
95
- wc -c dump/github/$1.bson | awk '{printf "%d bytes ", $1}'
96
- du -h dump/github/$1.bson | awk '{print " (" $1 ")" }'
97
- }
98
-
99
- for col in $collections; do
100
- (
101
- echo "Start date: `date -u -d @$timeStart +'%Y-%m-%dT%H:%M:%SZ'`"
102
- echo "End date: `date -u -d @$timeEnd +'%Y-%m-%dT%H:%M:%SZ'`"
103
- meta $col
104
- )
105
- done |
106
- tee README.$dateName.txt >dump/github/README.txt || exit 1
107
-
108
- # Do the same per collection
109
- for col in $collections; do
110
- echo "Archiving $col.bson"
111
- if [ ! -s dump/github/$col.bson ]; then
112
- echo "Collection empty, skipping"
113
- continue
114
- fi
115
-
116
- if ! tar zcf $OUTDIR/$col-dump.$dateName.tar.gz dump/github/$col.bson
117
- then
118
- rm -f $OUTDIR/$col-dump.$dateName.tar.gz
119
- exit 1
120
- fi
121
-
122
- mktorrent -a udp://tracker.openbittorrent.com:80 -a udp://tracker.publicbt.com:80/announce -a http://tracker.bittorrent.am/announce -w $WEBSEED/$col-dump.$dateName.tar.gz -o $OUTDIR/$col-dump.$dateName.torrent $OUTDIR/$col-dump.$dateName.tar.gz
123
- done
124
-
125
- # Update last run info
126
- echo $timeEnd >lastrun || exit 1
127
-
128
- # Clean up
129
- rm -rf dump
130
-
@@ -1,150 +0,0 @@
1
- require 'rubygems'
2
- require 'erb'
3
- require 'set'
4
- require 'date'
5
- require 'ghtorrent'
6
-
7
- class Page
8
- attr_reader :collections
9
- attr_reader :dumps
10
-
11
- def initialize(last_update)
12
- @last_update = last_update
13
- @dumps = Set.new
14
- @collections = Set.new
15
- end
16
-
17
- def add_dump(dump)
18
- @dumps << dump
19
- end
20
-
21
- def add_collection(col)
22
- @collections << col
23
- end
24
-
25
- # Expose private binding() method.
26
- def get_binding
27
- binding()
28
- end
29
-
30
- end
31
-
32
- class Dump
33
- attr_reader :torrents
34
- attr_reader :date
35
-
36
- def initialize(torrents, date)
37
- @torrents = torrents
38
- @date = date
39
- end
40
- end
41
-
42
- class Torrent
43
- attr_reader :url
44
- attr_reader :name
45
- attr_reader :size
46
- attr_reader :date
47
-
48
- def initialize(url, name, size, date)
49
- @url = url
50
- @name = name
51
- @size = size
52
- @date = date
53
- end
54
- end
55
-
56
- class Indexer < GHTorrent::Command
57
-
58
- def prepare_options(options)
59
- options.banner <<-BANNER
60
- Create an HTML table from a list of torrent and data files. The expected
61
- naming is the following:
62
-
63
- collname-dump-2012-03-27.torrent
64
- collname-dump-2012-03-27.tar.gz
65
-
66
- #{command_name} [options]
67
-
68
- #{command_name} options:
69
- BANNER
70
-
71
- options.opt :prefix, 'URL prefix to use for links',
72
- :short => 'p', :default => "", :type => :string
73
- end
74
-
75
- def validate_options
76
-
77
- end
78
-
79
- def go
80
- url_prefix=options[:prefix]
81
-
82
- # Load the template
83
- gem_root = Gem.loaded_specs['ghtorrent']
84
-
85
- file = if gem_root.nil?
86
- # Gem not installed yet, try current dir
87
- File.open("index.erb").read
88
- else
89
- File.open(File.join(gem_root, "index.erb")).read
90
- end
91
-
92
- rhtml = ERB.new(file)
93
-
94
- # Open the dir to read entries from
95
- dir = ARGV.shift
96
-
97
- if dir.nil?
98
- dir = "."
99
- end
100
-
101
- torrents = Dir.entries("#{dir}").map do |f|
102
-
103
- #File name format expected: collname-dump-2012-03-27.torrent
104
- # collname-dump-2012-03-27.tar.gz
105
-
106
- # Go through all torrent files and extract name of
107
- # dumped collection and dump date
108
- matches = /([a-z0-9]+)-[a-z]+\.(.*)\.torrent/.match(f)
109
- next if matches.nil?
110
-
111
- # Calculate original file size
112
- dump = f.gsub(/.torrent/, ".tar.gz")
113
- size = File.stat(File.join(dir, dump)).size / 1024 / 1024
114
-
115
- # Expects a format of yyyy-mm-dd
116
- date = Date.parse(matches[2])
117
-
118
- if size > 0
119
- Torrent.new(url_prefix + "/" + f, matches[1], size, date)
120
- end
121
- end.select { |x| !x.nil? }
122
-
123
- all_dates = torrents.inject(Set.new) { |acc, t| acc << t.date }
124
-
125
- all_dumps = all_dates.map { |d|
126
- date_torrents = torrents.select { |t| t.date == d }
127
- name_torrents = date_torrents.inject(Hash.new) { |acc, a|
128
- acc.store(a.name, a);
129
- acc
130
- }
131
- Dump.new(name_torrents, d)
132
- }
133
-
134
- max_date = all_dates.max { |a, b| a <=> b }
135
-
136
- ghtorrent = Page.new(max_date)
137
- all_dumps.each { |x|
138
- ghtorrent.add_dump x
139
- x.torrents.values.each { |t|
140
- ghtorrent.add_collection t.name
141
- }
142
- }
143
-
144
- puts rhtml.result(ghtorrent.get_binding).gsub(/^\s+/, "").gsub(/\s+$/, $/).gsub(/<table>/, "\n<table>")
145
- end
146
- end
147
-
148
- Indexer.run
149
-
150
- # vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
@@ -1,67 +0,0 @@
1
- require "test/unit"
2
- require 'ghtorrent'
3
-
4
- class CallStackTest < Test::Unit::TestCase
5
-
6
- def setup
7
- end
8
-
9
- def teardown
10
- end
11
-
12
- def test_constructor
13
- a = CallStack.new('users', 0)
14
- b = CallStack.new('users', 0)
15
- assert_equal a,b
16
- end
17
-
18
- def test_push
19
- stack = CallStack.new('users1', 0)
20
- assert_not_nil stack
21
-
22
- stack.push("foo bar")
23
- stack.push("2")
24
- stack.push("1234421")
25
- stack.empty
26
- end
27
-
28
- def test_pop
29
- stack = CallStack.new('users2', 0)
30
- assert_not_nil stack
31
-
32
- stack.push("foo bar")
33
- stack.push("2")
34
- stack.push("1234421")
35
-
36
- assert stack.pop == "1234421"
37
- stack.empty
38
- end
39
-
40
- def test_push_pop_push
41
- stack = CallStack.new('users3', 0)
42
- assert_not_nil stack
43
-
44
- stack.push("foo bar")
45
- stack.push("2")
46
-
47
- stack.pop
48
-
49
- stack.push("1234421")
50
-
51
- stack.empty
52
- end
53
-
54
- def test_stress
55
- stack = CallStack.new('users4', 0)
56
-
57
- 1000.times do
58
- txt = (0..rand(20)).map{65.+(rand(25)).chr}.join
59
- stack.push txt
60
- end
61
-
62
- 999.times do
63
- stack.pop
64
- end
65
- stack.pop
66
- end
67
- end