ghtorrent 0.6 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,130 +0,0 @@
1
- #!/bin/sh
2
- #
3
- # Create the periodic database dump files
4
- #
5
-
6
- # Directory to place compressed files and torrents
7
- OUTDIR=/home/data/github-mirror/dumps
8
-
9
- # Base URL for HTTP dir containing torrents and data
10
- WEBSEED=http://ikaria.dmst.aueb.gr/ghtorrent/
11
-
12
- usage()
13
- {
14
- echo "Usage: $0 [-f 'yyyy-mm-dd hh:mm'] [-t 'yyyy-mm-dd hh:mm']"
15
- echo " [-c collection_to_dump]"
16
- echo "Dump the database. -f earliest record timestamp"
17
- echo " -t latest record timestamp"
18
- echo " -c collection to dump (default: all)"
19
- }
20
-
21
- if [ -z $1 ]
22
- then
23
- usage
24
- exit 1
25
- fi
26
-
27
- while getopts "f:t:c:" o
28
- do
29
- case $o in
30
- f) timeStart=`date -d "$OPTARG" +%s` ;;
31
- t) timeEnd=`date -d "$OPTARG" +%s` ;;
32
- c) collection=$OPTARG ;;
33
- \?) echo "Invalid option: -$OPTARG" >&2
34
- usage
35
- exit 1
36
- ;;
37
- esac
38
- done
39
-
40
-
41
- # Time to start dumping from
42
- if [ -z $timeStart ]
43
- then
44
- if [ -r lastrun ]
45
- then
46
- timeStart=`cat lastrun`
47
- else
48
- timeStart=0
49
- fi
50
- fi
51
-
52
- # Time to end dumping
53
- if [ -z $timeEnd ]
54
- then
55
- timeEnd=`date +%s`
56
- fi
57
-
58
- # Name used for the files
59
- dateName=`date -d @$timeEnd -u +'%Y-%m-%d'`
60
-
61
- # _id example:
62
- # 4f208c3e08d69a1835000077
63
- # 000102030405060708091011
64
- # | || || || |
65
- # time mach pid count
66
-
67
- endId=`printf '%08x0000000000000000' $timeEnd`
68
- startId=`printf '%08x0000000000000000' $timeStart`
69
-
70
-
71
- if [ -z $collection ]
72
- then
73
- collections=`echo "show collections"|mongo --quiet github|egrep -v "system|bye"`
74
- else
75
- collections=$collection
76
- fi
77
-
78
- echo "Dumping database from `date -d @$timeStart` to `date -d @$timeEnd`"
79
-
80
- rm -rf dump
81
- mkdir -p dump/github
82
-
83
- for col in $collections; do
84
-
85
- echo "Dumping $col"
86
- mongodump --db github --collection $col -q '{"_id" : {"$gte" : ObjectId("'$startId'"), "$lt" : ObjectId("'$endId'")} }' || exit 1
87
- done
88
-
89
- # Report the metadata for the given database
90
- meta()
91
- {
92
- echo -n "Number of $1: "
93
- mongo --quiet --eval 'db.'$1'.find({"_id" : {"$gte" : ObjectId("'$startId'"), "$lt" : ObjectId("'$endId'")} }).count() + 0' github
94
- echo -n "Uncompressed size of $1: "
95
- wc -c dump/github/$1.bson | awk '{printf "%d bytes ", $1}'
96
- du -h dump/github/$1.bson | awk '{print " (" $1 ")" }'
97
- }
98
-
99
- for col in $collections; do
100
- (
101
- echo "Start date: `date -u -d @$timeStart +'%Y-%m-%dT%H:%M:%SZ'`"
102
- echo "End date: `date -u -d @$timeEnd +'%Y-%m-%dT%H:%M:%SZ'`"
103
- meta $col
104
- )
105
- done |
106
- tee README.$dateName.txt >dump/github/README.txt || exit 1
107
-
108
- # Do the same per collection
109
- for col in $collections; do
110
- echo "Archiving $col.bson"
111
- if [ ! -s dump/github/$col.bson ]; then
112
- echo "Collection empty, skipping"
113
- continue
114
- fi
115
-
116
- if ! tar zcf $OUTDIR/$col-dump.$dateName.tar.gz dump/github/$col.bson
117
- then
118
- rm -f $OUTDIR/$col-dump.$dateName.tar.gz
119
- exit 1
120
- fi
121
-
122
- mktorrent -a udp://tracker.openbittorrent.com:80 -a udp://tracker.publicbt.com:80/announce -a http://tracker.bittorrent.am/announce -w $WEBSEED/$col-dump.$dateName.tar.gz -o $OUTDIR/$col-dump.$dateName.torrent $OUTDIR/$col-dump.$dateName.tar.gz
123
- done
124
-
125
- # Update last run info
126
- echo $timeEnd >lastrun || exit 1
127
-
128
- # Clean up
129
- rm -rf dump
130
-
@@ -1,150 +0,0 @@
1
- require 'rubygems'
2
- require 'erb'
3
- require 'set'
4
- require 'date'
5
- require 'ghtorrent'
6
-
7
- class Page
8
- attr_reader :collections
9
- attr_reader :dumps
10
-
11
- def initialize(last_update)
12
- @last_update = last_update
13
- @dumps = Set.new
14
- @collections = Set.new
15
- end
16
-
17
- def add_dump(dump)
18
- @dumps << dump
19
- end
20
-
21
- def add_collection(col)
22
- @collections << col
23
- end
24
-
25
- # Expose private binding() method.
26
- def get_binding
27
- binding()
28
- end
29
-
30
- end
31
-
32
- class Dump
33
- attr_reader :torrents
34
- attr_reader :date
35
-
36
- def initialize(torrents, date)
37
- @torrents = torrents
38
- @date = date
39
- end
40
- end
41
-
42
- class Torrent
43
- attr_reader :url
44
- attr_reader :name
45
- attr_reader :size
46
- attr_reader :date
47
-
48
- def initialize(url, name, size, date)
49
- @url = url
50
- @name = name
51
- @size = size
52
- @date = date
53
- end
54
- end
55
-
56
- class Indexer < GHTorrent::Command
57
-
58
- def prepare_options(options)
59
- options.banner <<-BANNER
60
- Create an HTML table from a list of torrent and data files. The expected
61
- naming is the following:
62
-
63
- collname-dump-2012-03-27.torrent
64
- collname-dump-2012-03-27.tar.gz
65
-
66
- #{command_name} [options]
67
-
68
- #{command_name} options:
69
- BANNER
70
-
71
- options.opt :prefix, 'URL prefix to use for links',
72
- :short => 'p', :default => "", :type => :string
73
- end
74
-
75
- def validate_options
76
-
77
- end
78
-
79
- def go
80
- url_prefix=options[:prefix]
81
-
82
- # Load the template
83
- gem_root = Gem.loaded_specs['ghtorrent']
84
-
85
- file = if gem_root.nil?
86
- # Gem not installed yet, try current dir
87
- File.open("index.erb").read
88
- else
89
- File.open(File.join(gem_root, "index.erb")).read
90
- end
91
-
92
- rhtml = ERB.new(file)
93
-
94
- # Open the dir to read entries from
95
- dir = ARGV.shift
96
-
97
- if dir.nil?
98
- dir = "."
99
- end
100
-
101
- torrents = Dir.entries("#{dir}").map do |f|
102
-
103
- #File name format expected: collname-dump-2012-03-27.torrent
104
- # collname-dump-2012-03-27.tar.gz
105
-
106
- # Go through all torrent files and extract name of
107
- # dumped collection and dump date
108
- matches = /([a-z0-9]+)-[a-z]+\.(.*)\.torrent/.match(f)
109
- next if matches.nil?
110
-
111
- # Calculate original file size
112
- dump = f.gsub(/.torrent/, ".tar.gz")
113
- size = File.stat(File.join(dir, dump)).size / 1024 / 1024
114
-
115
- # Expects a format of yyyy-mm-dd
116
- date = Date.parse(matches[2])
117
-
118
- if size > 0
119
- Torrent.new(url_prefix + "/" + f, matches[1], size, date)
120
- end
121
- end.select { |x| !x.nil? }
122
-
123
- all_dates = torrents.inject(Set.new) { |acc, t| acc << t.date }
124
-
125
- all_dumps = all_dates.map { |d|
126
- date_torrents = torrents.select { |t| t.date == d }
127
- name_torrents = date_torrents.inject(Hash.new) { |acc, a|
128
- acc.store(a.name, a);
129
- acc
130
- }
131
- Dump.new(name_torrents, d)
132
- }
133
-
134
- max_date = all_dates.max { |a, b| a <=> b }
135
-
136
- ghtorrent = Page.new(max_date)
137
- all_dumps.each { |x|
138
- ghtorrent.add_dump x
139
- x.torrents.values.each { |t|
140
- ghtorrent.add_collection t.name
141
- }
142
- }
143
-
144
- puts rhtml.result(ghtorrent.get_binding).gsub(/^\s+/, "").gsub(/\s+$/, $/).gsub(/<table>/, "\n<table>")
145
- end
146
- end
147
-
148
- Indexer.run
149
-
150
- # vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
@@ -1,67 +0,0 @@
1
- require "test/unit"
2
- require 'ghtorrent'
3
-
4
- class CallStackTest < Test::Unit::TestCase
5
-
6
- def setup
7
- end
8
-
9
- def teardown
10
- end
11
-
12
- def test_constructor
13
- a = CallStack.new('users', 0)
14
- b = CallStack.new('users', 0)
15
- assert_equal a,b
16
- end
17
-
18
- def test_push
19
- stack = CallStack.new('users1', 0)
20
- assert_not_nil stack
21
-
22
- stack.push("foo bar")
23
- stack.push("2")
24
- stack.push("1234421")
25
- stack.empty
26
- end
27
-
28
- def test_pop
29
- stack = CallStack.new('users2', 0)
30
- assert_not_nil stack
31
-
32
- stack.push("foo bar")
33
- stack.push("2")
34
- stack.push("1234421")
35
-
36
- assert stack.pop == "1234421"
37
- stack.empty
38
- end
39
-
40
- def test_push_pop_push
41
- stack = CallStack.new('users3', 0)
42
- assert_not_nil stack
43
-
44
- stack.push("foo bar")
45
- stack.push("2")
46
-
47
- stack.pop
48
-
49
- stack.push("1234421")
50
-
51
- stack.empty
52
- end
53
-
54
- def test_stress
55
- stack = CallStack.new('users4', 0)
56
-
57
- 1000.times do
58
- txt = (0..rand(20)).map{65.+(rand(25)).chr}.join
59
- stack.push txt
60
- end
61
-
62
- 999.times do
63
- stack.pop
64
- end
65
- stack.pop
66
- end
67
- end