warc 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +20 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/bin/warc +4 -0
- data/lib/warc.rb +30 -0
- data/lib/warc/cli.rb +23 -0
- data/lib/warc/exceptions.rb +0 -0
- data/lib/warc/ext/net_http.rb +50 -0
- data/lib/warc/http.rb +35 -0
- data/lib/warc/parser.rb +24 -0
- data/lib/warc/proxy.rb +1 -0
- data/lib/warc/proxy/css/main.css +45 -0
- data/lib/warc/proxy/proxy.rb +85 -0
- data/lib/warc/proxy/views/index.erb +16 -0
- data/lib/warc/proxy/views/layout.erb +9 -0
- data/lib/warc/record.rb +59 -0
- data/lib/warc/record/header.rb +88 -0
- data/lib/warc/record/validator.rb +13 -0
- data/lib/warc/stream.rb +96 -0
- data/lib/warc/stream/gzip.rb +35 -0
- data/lib/warc/stream/plain.rb +23 -0
- data/lib/warc/utils/header_hash.rb +63 -0
- data/lib/warc/version.rb +3 -0
- data/spec/fixtures/arg.warc +267 -0
- data/spec/fixtures/criterion.warc +643 -0
- data/spec/fixtures/criterion.warc.gz +0 -0
- data/spec/fixtures/frg.warc +3617 -4
- data/spec/fixtures/frg.warc.gz +0 -0
- data/spec/fixtures/http_imdb +954 -0
- data/spec/spec_helper.rb +27 -0
- data/spec/warc/http_spec.rb +9 -0
- data/spec/warc/record/header_spec.rb +37 -0
- data/spec/warc/record_spec.rb +20 -0
- data/spec/warc/stream/gzip_spec.rb +46 -0
- data/spec/warc/stream/plain_spec.rb +41 -0
- data/spec/warc/stream_spec.rb +55 -0
- data/warc.gemspec +27 -0
- metadata +195 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
require "uuid"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
module Warc
|
5
|
+
class Record::Header < HeaderHash
|
6
|
+
# WARC field names are case-insensitive
|
7
|
+
# header["content-length"] == header["Content-Length"]
|
8
|
+
|
9
|
+
attr_reader :record
|
10
|
+
include ::ActiveModel::Validations
|
11
|
+
validates_with ::Warc::Record::Validator
|
12
|
+
|
13
|
+
# Set of field names defined in the spec
|
14
|
+
NAMED_FIELDS = [
|
15
|
+
"WARC-Type",
|
16
|
+
"WARC-Record-ID",
|
17
|
+
"WARC-Date",
|
18
|
+
"Content-Length",
|
19
|
+
"Content-Type",
|
20
|
+
"ARC-Concurrent-To",
|
21
|
+
"WARC-Block-Digest",
|
22
|
+
"WARC-Payload-Digest",
|
23
|
+
"WARC-IP-Address",
|
24
|
+
"WARC-Refers-To",
|
25
|
+
"WARC-Target-URI",
|
26
|
+
"WARC-Truncated",
|
27
|
+
"WARC-Warcinfo-ID",
|
28
|
+
"WARC-Filename", #warcinfo only
|
29
|
+
"WARC-Profile", #revisit only
|
30
|
+
"WARC-Identified-Payload-Type",
|
31
|
+
"WARC-Segment-Origin-ID", # continuation only
|
32
|
+
"WARC-Segment-Number",
|
33
|
+
"WARC-Segment-Total-Length" #continuation only
|
34
|
+
]
|
35
|
+
|
36
|
+
REQUIRED_FIELDS = ["WARC-Record-ID","Content-Length","WARC-Date","WARC-Type"]
|
37
|
+
|
38
|
+
def initialize(record,h={})
|
39
|
+
@record=record
|
40
|
+
super(h)
|
41
|
+
end
|
42
|
+
|
43
|
+
def content_length
|
44
|
+
(self["content-length"] ||= self.record.content.length rescue 0).to_i
|
45
|
+
end
|
46
|
+
|
47
|
+
def date
|
48
|
+
Time.parse(self["warc-date"]).iso8601 ||= Time.now.iso8601
|
49
|
+
end
|
50
|
+
|
51
|
+
def date=(d)
|
52
|
+
self["warc-date"] = Time.parse(d).iso8601
|
53
|
+
end
|
54
|
+
|
55
|
+
def type
|
56
|
+
self["warc-type"]
|
57
|
+
end
|
58
|
+
|
59
|
+
def record_id
|
60
|
+
self["warc-record-id"] ||= sprintf("<urn:uuid:%s>",UUID.generate)
|
61
|
+
end
|
62
|
+
|
63
|
+
def block_digest
|
64
|
+
self["warc-block-digest"] ||= compute_digest(self.record.content)
|
65
|
+
end
|
66
|
+
|
67
|
+
def compute_digest(content)
|
68
|
+
"sha256:" + (Digest::SHA256.hexdigest(content))
|
69
|
+
end
|
70
|
+
|
71
|
+
def uri
|
72
|
+
self["warc-target-uri"]
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_s
|
76
|
+
crfl="\r\n"
|
77
|
+
str = String.new
|
78
|
+
str << "WARC-Type: #{self.type}" + crfl
|
79
|
+
str << "WARC-Record-ID: #{self.record_id}" + crfl
|
80
|
+
str << "WARC-Date: #{self.date}" + crfl
|
81
|
+
str << "Content-Length: #{self.content_length}" + crfl
|
82
|
+
each do |k,v|
|
83
|
+
str << "#{k}: #{v}#{crfl}" unless REQUIRED_FIELDS.map(&:downcase).include?(k)
|
84
|
+
end
|
85
|
+
return str
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'active_model'
|
2
|
+
|
3
|
+
module Warc
|
4
|
+
class Record::Validator < ::ActiveModel::Validator
|
5
|
+
def validate(header)
|
6
|
+
["WARC-Record-ID","Content-Length","WARC-Date","WARC-Type"].each do |key|
|
7
|
+
unless header.has_key?(key)
|
8
|
+
header.errors[:base] << "#{key} is a required field"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/warc/stream.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Warc
|
4
|
+
def self.open_stream(path,mode='r+')
|
5
|
+
|
6
|
+
gzipped = path.match(/.*\.warc\.gz$/)
|
7
|
+
warc = path.match(/.*\.warc$/)
|
8
|
+
|
9
|
+
if (gzipped || warc)
|
10
|
+
fh = ::File.exists?(path) ? ::File.new(path,mode) : path
|
11
|
+
return Stream::Gzip.new(fh) if gzipped
|
12
|
+
return Stream::Plain.new(fh) if warc
|
13
|
+
else
|
14
|
+
return Stream::Gzip.new(path)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Stream
|
19
|
+
private_class_method :new
|
20
|
+
include Enumerable
|
21
|
+
attr_reader :parser
|
22
|
+
|
23
|
+
DEFAULT_OPTS = {
|
24
|
+
# Maximum file size
|
25
|
+
:max_filesize => 10**9
|
26
|
+
}
|
27
|
+
|
28
|
+
def initialize(fh,options={},&block)
|
29
|
+
@options = DEFAULT_OPTS.merge options
|
30
|
+
@index = 0
|
31
|
+
fh = case fh
|
32
|
+
when ::File
|
33
|
+
@name = ::File.basename(fh)
|
34
|
+
fh
|
35
|
+
when String
|
36
|
+
@name = fh
|
37
|
+
@naming_proc = block || lambda {|name,index| "#{name}.#{sprintf('%06d',index)}"}
|
38
|
+
next_file_handle
|
39
|
+
end
|
40
|
+
@file_handle=fh
|
41
|
+
@parser = ::Warc::Parser.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def each(offset=0,&block)
|
45
|
+
@file_handle.seek(offset,::IO::SEEK_SET)
|
46
|
+
loop do
|
47
|
+
position = @file_handle.tell
|
48
|
+
rec = self.read_record
|
49
|
+
if rec
|
50
|
+
rec.offset = position
|
51
|
+
if block_given?
|
52
|
+
block.call(rec)
|
53
|
+
else
|
54
|
+
yield rec
|
55
|
+
end
|
56
|
+
else
|
57
|
+
break
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def record(offset=0)
|
63
|
+
@file_handle.seek(offset,::IO::SEEK_SET)
|
64
|
+
self.read_record
|
65
|
+
end
|
66
|
+
|
67
|
+
def close
|
68
|
+
@file_handle.close
|
69
|
+
end
|
70
|
+
|
71
|
+
def read_record
|
72
|
+
raise StandardError
|
73
|
+
end
|
74
|
+
|
75
|
+
def write_record(record)
|
76
|
+
# Go to end of file
|
77
|
+
@file_handle.seek(0,::IO::SEEK_END)
|
78
|
+
expected_size = record.header.content_length + @file_handle.tell
|
79
|
+
next_file_handle if (expected_size > @options[:max_filesize])
|
80
|
+
record.offset = @file_handle.tell
|
81
|
+
end
|
82
|
+
|
83
|
+
def size
|
84
|
+
@file_handle.stat.size
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def next_file_handle
|
90
|
+
@file_handle.close if @file_handle
|
91
|
+
@index += 1
|
92
|
+
path = @naming_proc.call(@name,@index)
|
93
|
+
@file_handle = ::File.new(path + @ext,'a+')
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
module Warc
|
4
|
+
class Stream::Gzip < Stream
|
5
|
+
public_class_method :new
|
6
|
+
def initialize(fh,options={},&block)
|
7
|
+
@ext = ".warc.gz"
|
8
|
+
super(fh,options,&block)
|
9
|
+
end
|
10
|
+
|
11
|
+
def read_record
|
12
|
+
begin
|
13
|
+
gz = ::Zlib::GzipReader.new(@file_handle)
|
14
|
+
rec = self.parser.parse(gz)
|
15
|
+
loop {gz.readline} # Make sure we read the whole gzip
|
16
|
+
|
17
|
+
rescue EOFError # End of gzipped record
|
18
|
+
@file_handle.pos -= gz.unused.length unless gz.unused.nil? # We move the cursor back if extra bytes were read
|
19
|
+
return rec # We return the record
|
20
|
+
|
21
|
+
rescue ::Zlib::Error => e # Raised when there's no more gzipped data to read
|
22
|
+
return nil
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def write_record(record)
|
27
|
+
super
|
28
|
+
|
29
|
+
# Prepare gzip IO object
|
30
|
+
gz = ::Zlib::GzipWriter.new(@file_handle)
|
31
|
+
record.dump_to(gz)
|
32
|
+
gz.finish # Need to close GzipWriter for it to write the gzip footer
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Warc
|
2
|
+
class Stream::Plain < Stream
|
3
|
+
public_class_method :new
|
4
|
+
|
5
|
+
def initialize(fh,options={},&block)
|
6
|
+
@ext = '.warc'
|
7
|
+
super(fh,options,&block)
|
8
|
+
end
|
9
|
+
|
10
|
+
def read_record
|
11
|
+
begin
|
12
|
+
self.parser.parse(@file_handle)
|
13
|
+
rescue EOFError # No more records
|
14
|
+
return nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def write_record(record)
|
19
|
+
super
|
20
|
+
record.dump_to(@file_handle)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Warc
|
2
|
+
class HeaderHash < ::Hash
|
3
|
+
def initialize(hash={})
|
4
|
+
super()
|
5
|
+
@names = {}
|
6
|
+
hash.each { |k, v| self[k] = v }
|
7
|
+
end
|
8
|
+
|
9
|
+
def each
|
10
|
+
super do |k, v|
|
11
|
+
yield(k, v.respond_to?(:to_ary) ? v.to_ary.join("\n") : v)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_hash
|
16
|
+
hash = {}
|
17
|
+
each { |k,v| hash[k] = v }
|
18
|
+
hash
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](k)
|
22
|
+
super(k) || super(@names[k.downcase])
|
23
|
+
end
|
24
|
+
|
25
|
+
def []=(k, v)
|
26
|
+
canonical = k.downcase
|
27
|
+
delete k if @names[canonical] && @names[canonical] != k # .delete is expensive, don't invoke it unless necessary
|
28
|
+
@names[k] = @names[canonical] = k
|
29
|
+
super k, v
|
30
|
+
end
|
31
|
+
|
32
|
+
def delete(k)
|
33
|
+
canonical = k.downcase
|
34
|
+
result = super @names.delete(canonical)
|
35
|
+
@names.delete_if { |name,| name.downcase == canonical }
|
36
|
+
result
|
37
|
+
end
|
38
|
+
|
39
|
+
def include?(k)
|
40
|
+
@names.include?(k) || @names.include?(k.downcase)
|
41
|
+
end
|
42
|
+
|
43
|
+
alias_method :has_key?, :include?
|
44
|
+
alias_method :member?, :include?
|
45
|
+
alias_method :key?, :include?
|
46
|
+
|
47
|
+
def merge!(other)
|
48
|
+
other.each { |k, v| self[k] = v }
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
def merge(other)
|
53
|
+
hash = dup
|
54
|
+
hash.merge! other
|
55
|
+
end
|
56
|
+
|
57
|
+
def replace(other)
|
58
|
+
clear
|
59
|
+
other.each { |k, v| self[k] = v }
|
60
|
+
self
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/warc/version.rb
ADDED
@@ -0,0 +1,267 @@
|
|
1
|
+
WARC/1.0
|
2
|
+
WARC-Type: warcinfo
|
3
|
+
Content-Type: application/warc-fields
|
4
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
5
|
+
WARC-Record-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
6
|
+
WARC-Filename: arg.warc
|
7
|
+
WARC-Block-Digest: sha1:RFDKLAAA6JWB4XZXWKVLNVFPTDLFTPSK
|
8
|
+
Content-Length: 270
|
9
|
+
|
10
|
+
software: Wget/1.14 (darwin10.8.0)
|
11
|
+
format: WARC File Format 1.0
|
12
|
+
conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf
|
13
|
+
robots: classic
|
14
|
+
wget-arguments: "http://www.antoineroygobeil.com/" "--mirror" "--warc-file=arg" "--no-warc-compression"
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
WARC/1.0
|
19
|
+
WARC-Type: request
|
20
|
+
WARC-Target-URI: http://www.antoineroygobeil.com/
|
21
|
+
Content-Type: application/http;msgtype=request
|
22
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
23
|
+
WARC-Record-ID: <urn:uuid:01D6037B-E36C-44C1-9731-8E75CC8996CD>
|
24
|
+
WARC-IP-Address: 64.34.145.145
|
25
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
26
|
+
WARC-Block-Digest: sha1:L5UPLOIIBXTVUBQI26LL2C2PC2RXQUAB
|
27
|
+
Content-Length: 125
|
28
|
+
|
29
|
+
GET / HTTP/1.1
|
30
|
+
User-Agent: Wget/1.14 (darwin10.8.0)
|
31
|
+
Accept: */*
|
32
|
+
Host: www.antoineroygobeil.com
|
33
|
+
Connection: Keep-Alive
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
WARC/1.0
|
38
|
+
WARC-Type: response
|
39
|
+
WARC-Record-ID: <urn:uuid:DC635649-820F-4A26-89C8-3ED1FB5398ED>
|
40
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
41
|
+
WARC-Concurrent-To: <urn:uuid:01D6037B-E36C-44C1-9731-8E75CC8996CD>
|
42
|
+
WARC-Target-URI: http://www.antoineroygobeil.com/
|
43
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
44
|
+
WARC-IP-Address: 64.34.145.145
|
45
|
+
WARC-Block-Digest: sha1:KALWHSW57TQKVJVZPLKTV7ZO24QN3BAH
|
46
|
+
WARC-Payload-Digest: sha1:SWVIXMD24OCGFYUOH5CPVJW3HPW2TPJT
|
47
|
+
Content-Type: application/http;msgtype=response
|
48
|
+
Content-Length: 803
|
49
|
+
|
50
|
+
HTTP/1.1 301 Moved Permanently
|
51
|
+
Date: Fri, 14 Sep 2012 03:08:05 GMT
|
52
|
+
Server: Apache/2.2.11 (Ubuntu) PHP/5.2.6-3ubuntu4.6 with Suhosin-Patch mod_ssl/2.2.11 OpenSSL/0.9.8g Phusion_Passenger/2.2.5
|
53
|
+
Location: http://antoineroygobeil.com/
|
54
|
+
Vary: Accept-Encoding
|
55
|
+
Content-Length: 420
|
56
|
+
Keep-Alive: timeout=15, max=100
|
57
|
+
Connection: Keep-Alive
|
58
|
+
Content-Type: text/html; charset=iso-8859-1
|
59
|
+
|
60
|
+
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
|
61
|
+
<html><head>
|
62
|
+
<title>301 Moved Permanently</title>
|
63
|
+
</head><body>
|
64
|
+
<h1>Moved Permanently</h1>
|
65
|
+
<p>The document has moved <a href="http://antoineroygobeil.com/">here</a>.</p>
|
66
|
+
<hr>
|
67
|
+
<address>Apache/2.2.11 (Ubuntu) PHP/5.2.6-3ubuntu4.6 with Suhosin-Patch mod_ssl/2.2.11 OpenSSL/0.9.8g Phusion_Passenger/2.2.5 Server at www.antoineroygobeil.com Port 80</address>
|
68
|
+
</body></html>
|
69
|
+
|
70
|
+
|
71
|
+
WARC/1.0
|
72
|
+
WARC-Type: request
|
73
|
+
WARC-Target-URI: http://antoineroygobeil.com/
|
74
|
+
Content-Type: application/http;msgtype=request
|
75
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
76
|
+
WARC-Record-ID: <urn:uuid:5641209D-F1AF-46D1-8A41-CBDB746C40D1>
|
77
|
+
WARC-IP-Address: 64.34.145.145
|
78
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
79
|
+
WARC-Block-Digest: sha1:LGO6T275AU573ZMIRCECF4Z6ZEX7RVTM
|
80
|
+
Content-Length: 121
|
81
|
+
|
82
|
+
GET / HTTP/1.1
|
83
|
+
User-Agent: Wget/1.14 (darwin10.8.0)
|
84
|
+
Accept: */*
|
85
|
+
Host: antoineroygobeil.com
|
86
|
+
Connection: Keep-Alive
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
WARC/1.0
|
91
|
+
WARC-Type: response
|
92
|
+
WARC-Record-ID: <urn:uuid:CD7AF690-EBA5-4071-BC9D-EDAD854D57D6>
|
93
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
94
|
+
WARC-Concurrent-To: <urn:uuid:5641209D-F1AF-46D1-8A41-CBDB746C40D1>
|
95
|
+
WARC-Target-URI: http://antoineroygobeil.com/
|
96
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
97
|
+
WARC-IP-Address: 64.34.145.145
|
98
|
+
WARC-Block-Digest: sha1:TABLB7DABQVOYHUK3E2CEFJ7LIHGTCV2
|
99
|
+
WARC-Payload-Digest: sha1:H62OXGS6ICUKWEIZFIS3Z4L6EWMT54K2
|
100
|
+
Content-Type: application/http;msgtype=response
|
101
|
+
Content-Length: 4795
|
102
|
+
|
103
|
+
HTTP/1.1 200 OK
|
104
|
+
Date: Fri, 14 Sep 2012 03:08:05 GMT
|
105
|
+
Server: Apache/2.2.11 (Ubuntu) PHP/5.2.6-3ubuntu4.6 with Suhosin-Patch mod_ssl/2.2.11 OpenSSL/0.9.8g Phusion_Passenger/2.2.5
|
106
|
+
Last-Modified: Thu, 23 Aug 2012 02:52:11 GMT
|
107
|
+
Accept-Ranges: bytes
|
108
|
+
Content-Length: 4404
|
109
|
+
Vary: Accept-Encoding
|
110
|
+
Keep-Alive: timeout=15, max=99
|
111
|
+
Connection: Keep-Alive
|
112
|
+
Content-Type: text/html; charset=utf-8
|
113
|
+
|
114
|
+
<!DOCTYPE html>
|
115
|
+
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
|
116
|
+
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
|
117
|
+
<!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]-->
|
118
|
+
<!--[if gt IE 8]><!-->
|
119
|
+
<html class='no-js' lang='en'>
|
120
|
+
<!--<![endif]-->
|
121
|
+
<head>
|
122
|
+
<meta charset='utf-8' />
|
123
|
+
<title>Antoine Roy-Gobeil</title>
|
124
|
+
<meta content="Antoine Roy-Gobeil's webpage and blog" name='description' />
|
125
|
+
<meta content='Antoine Roy-Gobeil' name='author' />
|
126
|
+
<meta content='nanoc 3.4.0' name='generator' />
|
127
|
+
<link href='http://antoineroygobeil.com/blog.xml' rel='alternate' title="Antoine Roy-Gobeil's blog" type='application/atom+xml' />
|
128
|
+
<meta content='width=device-width' name='viewport' />
|
129
|
+
<link href='/assets/css/style-cb0d0dfd528.css' rel='stylesheet' type='text/css' />
|
130
|
+
<link href='//fonts.googleapis.com/css?family=Open+Sans:300,300italic,400,400itlalic|Cabin:400' rel='stylesheet' />
|
131
|
+
<script src='/assets/js/vendor/modernizr-cb5f7134b69.js'></script>
|
132
|
+
</head>
|
133
|
+
<body>
|
134
|
+
<!--[if lt IE 7 ]>
|
135
|
+
<p class='chromeframe'>
|
136
|
+
Your browser is <em>ancient!</em>
|
137
|
+
<a href="http://browsehappy.com/">Upgrade to a different browser</a> or
|
138
|
+
<a href="http://www.google.com/chromeframe/?redirect=true">install Google Chrome Frame</a>
|
139
|
+
to experience this site.
|
140
|
+
</p>
|
141
|
+
<![endif]-->
|
142
|
+
<div class='container'>
|
143
|
+
<header>
|
144
|
+
<h1><a href="/">Antoine Roy-Gobeil</a></h1>
|
145
|
+
<p class="presentation">
|
146
|
+
<strong>Hello.</strong>
|
147
|
+
My name is Antoine Roy-Gobeil. I am currently a PhD candidate
|
148
|
+
in <a href="http://www.physics.mcgill.ca">physics at McGill University</a>. I am passionate about science, technology
|
149
|
+
<abbr title="and">&</abbr>
|
150
|
+
software. In my free time I enjoy basketball, cinema <abbr title="and">&</abbr> video games.
|
151
|
+
</p>
|
152
|
+
</header>
|
153
|
+
<div id='main-content'>
|
154
|
+
|
155
|
+
<article class="hentry" itemscope itemtype="http://schema.org/BlogPosting" lang="en">
|
156
|
+
<header>
|
157
|
+
<p class="meta">
|
158
|
+
|
159
|
+
<time datetime="2012-08-12T00:40:00Z">
|
160
|
+
<span class="day">11</span>
|
161
|
+
<span class="month">August</span>
|
162
|
+
<span class="year">2012</span>
|
163
|
+
</time>
|
164
|
+
</p>
|
165
|
+
<h1 class="entry-title" itemprop="name"><a href="/blog/2012/08/11/a-place-to-call-home/">A place to call home</a></h1>
|
166
|
+
</header>
|
167
|
+
|
168
|
+
<div class="entry-content" itemprop="description">
|
169
|
+
<p>I have been postponing this day for too much time already. It was about time I had a small place to call home on the web!</p>
|
170
|
+
|
171
|
+
<p>Much to my regret, I have been doing many small projects over the years for which I kept almost no traces. I hope to make this blog a beautiful place to look back at stuff I did.</p>
|
172
|
+
|
173
|
+
<p>In the process, I hope to make the internet a better place. I am sometimes surprised by the lack of information, documentation or tutorial I can find online on particular topics. Yet, to this day, I haven't published anything useful on the internet. I figured starting a blog was the right thing to do. Therefore, from this day forward, I intend to blog about the stuff I wished would have been on Google but wasn't. I hope it may be of some help to fellow web travellers like me.</p>
|
174
|
+
</div>
|
175
|
+
|
176
|
+
<footer>
|
177
|
+
|
178
|
+
</footer>
|
179
|
+
</article>
|
180
|
+
</div>
|
181
|
+
<footer>
|
182
|
+
<p>Copyright 2012 • Antoine Roy-Gobeil</p>
|
183
|
+
<img src="/assets/img/portrait_182px_circle.jpg" alt="Portrait of Antoine Roy-Gobeil - Summer 2012"/>
|
184
|
+
</footer>
|
185
|
+
</div>
|
186
|
+
<script src='//ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js'></script>
|
187
|
+
<script type='text/javascript'>
|
188
|
+
//<![CDATA[
|
189
|
+
window.jQuery || document.write('<script src="/js/vendor/jquery.js"><\/script>')
|
190
|
+
//]]>
|
191
|
+
</script>
|
192
|
+
<script src='/assets/js/application-cb4c5a72106.js'></script>
|
193
|
+
<script type='text/javascript'>
|
194
|
+
//<![CDATA[
|
195
|
+
var _gaq=[['_setAccount','UA-34075505-1'],['_trackPageview']];
|
196
|
+
(function(d,t){var g=d.createElement(t),s=d.getElementsByTagName(t)[0];
|
197
|
+
g.src=('https:'==location.protocol?'//ssl':'//www')+'.google-analytics.com/ga.js';
|
198
|
+
s.parentNode.insertBefore(g,s)}(document,'script'));
|
199
|
+
//]]>
|
200
|
+
</script>
|
201
|
+
</body>
|
202
|
+
</html>
|
203
|
+
|
204
|
+
|
205
|
+
WARC/1.0
|
206
|
+
WARC-Type: resource
|
207
|
+
WARC-Record-ID: <urn:uuid:5D799C11-D46C-4AC8-B598-5DC9F4205C6E>
|
208
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
209
|
+
WARC-Target-URI: metadata://gnu.org/software/wget/warc/MANIFEST.txt
|
210
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
211
|
+
WARC-Block-Digest: sha1:Y2K6QTLHURLM7GPHWC7RCMQJXJMADJAA
|
212
|
+
Content-Type: text/plain
|
213
|
+
Content-Length: 48
|
214
|
+
|
215
|
+
<urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
216
|
+
|
217
|
+
|
218
|
+
WARC/1.0
|
219
|
+
WARC-Type: resource
|
220
|
+
WARC-Record-ID: <urn:uuid:5D799C11-D46C-4AC8-B598-5DC9F4205C6E>
|
221
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
222
|
+
WARC-Target-URI: metadata://gnu.org/software/wget/warc/wget_arguments.txt
|
223
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
224
|
+
WARC-Block-Digest: sha1:IVTUDTXVSYCNFXVTELIUWCVCLXYXY5CF
|
225
|
+
Content-Type: text/plain
|
226
|
+
Content-Length: 89
|
227
|
+
|
228
|
+
"http://www.antoineroygobeil.com/" "--mirror" "--warc-file=arg" "--no-warc-compression"
|
229
|
+
|
230
|
+
|
231
|
+
WARC/1.0
|
232
|
+
WARC-Type: resource
|
233
|
+
WARC-Record-ID: <urn:uuid:984565DE-4E6E-4443-97C5-25235CC91140>
|
234
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
235
|
+
WARC-Concurrent-To: <urn:uuid:5D799C11-D46C-4AC8-B598-5DC9F4205C6E>
|
236
|
+
WARC-Target-URI: metadata://gnu.org/software/wget/warc/wget.log
|
237
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
238
|
+
WARC-Block-Digest: sha1:CRKHZFN5I63JL5CBW4MKF3UBV3A2GQF3
|
239
|
+
Content-Type: text/plain
|
240
|
+
Content-Length: 987
|
241
|
+
|
242
|
+
Opening WARC file ‘arg.warc’.
|
243
|
+
|
244
|
+
--2012-09-13 23:08:05-- http://www.antoineroygobeil.com/
|
245
|
+
Resolving www.antoineroygobeil.com... 64.34.145.145
|
246
|
+
Connecting to www.antoineroygobeil.com|64.34.145.145|:80... connected.
|
247
|
+
HTTP request sent, awaiting response... 301 Moved Permanently
|
248
|
+
Location: http://antoineroygobeil.com/ [following]
|
249
|
+
|
250
|
+
0K 100% 50.1M=0s
|
251
|
+
|
252
|
+
--2012-09-13 23:08:05-- http://antoineroygobeil.com/
|
253
|
+
Resolving antoineroygobeil.com... 64.34.145.145
|
254
|
+
Reusing existing connection to www.antoineroygobeil.com:80.
|
255
|
+
HTTP request sent, awaiting response... 200 OK
|
256
|
+
Length: 4404 (4.3K) [text/html]
|
257
|
+
Saving to: ‘www.antoineroygobeil.com/index.html’
|
258
|
+
|
259
|
+
0K .... 100% 2.27M=0.002s
|
260
|
+
|
261
|
+
2012-09-13 23:08:05 (2.27 MB/s) - ‘www.antoineroygobeil.com/index.html’ saved [4404/4404]
|
262
|
+
|
263
|
+
FINISHED --2012-09-13 23:08:05--
|
264
|
+
Total wall clock time: 0.2s
|
265
|
+
Downloaded: 1 files, 4.3K in 0.002s (2.27 MB/s)
|
266
|
+
|
267
|
+
|