warc 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +20 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/bin/warc +4 -0
- data/lib/warc.rb +30 -0
- data/lib/warc/cli.rb +23 -0
- data/lib/warc/exceptions.rb +0 -0
- data/lib/warc/ext/net_http.rb +50 -0
- data/lib/warc/http.rb +35 -0
- data/lib/warc/parser.rb +24 -0
- data/lib/warc/proxy.rb +1 -0
- data/lib/warc/proxy/css/main.css +45 -0
- data/lib/warc/proxy/proxy.rb +85 -0
- data/lib/warc/proxy/views/index.erb +16 -0
- data/lib/warc/proxy/views/layout.erb +9 -0
- data/lib/warc/record.rb +59 -0
- data/lib/warc/record/header.rb +88 -0
- data/lib/warc/record/validator.rb +13 -0
- data/lib/warc/stream.rb +96 -0
- data/lib/warc/stream/gzip.rb +35 -0
- data/lib/warc/stream/plain.rb +23 -0
- data/lib/warc/utils/header_hash.rb +63 -0
- data/lib/warc/version.rb +3 -0
- data/spec/fixtures/arg.warc +267 -0
- data/spec/fixtures/criterion.warc +643 -0
- data/spec/fixtures/criterion.warc.gz +0 -0
- data/spec/fixtures/frg.warc +3617 -4
- data/spec/fixtures/frg.warc.gz +0 -0
- data/spec/fixtures/http_imdb +954 -0
- data/spec/spec_helper.rb +27 -0
- data/spec/warc/http_spec.rb +9 -0
- data/spec/warc/record/header_spec.rb +37 -0
- data/spec/warc/record_spec.rb +20 -0
- data/spec/warc/stream/gzip_spec.rb +46 -0
- data/spec/warc/stream/plain_spec.rb +41 -0
- data/spec/warc/stream_spec.rb +55 -0
- data/warc.gemspec +27 -0
- metadata +195 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
require "uuid"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
module Warc
|
5
|
+
class Record::Header < HeaderHash
|
6
|
+
# WARC field names are case-insensitive
|
7
|
+
# header["content-length"] == header["Content-Length"]
|
8
|
+
|
9
|
+
attr_reader :record
|
10
|
+
include ::ActiveModel::Validations
|
11
|
+
validates_with ::Warc::Record::Validator
|
12
|
+
|
13
|
+
# Set of field names defined in the spec
|
14
|
+
NAMED_FIELDS = [
|
15
|
+
"WARC-Type",
|
16
|
+
"WARC-Record-ID",
|
17
|
+
"WARC-Date",
|
18
|
+
"Content-Length",
|
19
|
+
"Content-Type",
|
20
|
+
"ARC-Concurrent-To",
|
21
|
+
"WARC-Block-Digest",
|
22
|
+
"WARC-Payload-Digest",
|
23
|
+
"WARC-IP-Address",
|
24
|
+
"WARC-Refers-To",
|
25
|
+
"WARC-Target-URI",
|
26
|
+
"WARC-Truncated",
|
27
|
+
"WARC-Warcinfo-ID",
|
28
|
+
"WARC-Filename", #warcinfo only
|
29
|
+
"WARC-Profile", #revisit only
|
30
|
+
"WARC-Identified-Payload-Type",
|
31
|
+
"WARC-Segment-Origin-ID", # continuation only
|
32
|
+
"WARC-Segment-Number",
|
33
|
+
"WARC-Segment-Total-Length" #continuation only
|
34
|
+
]
|
35
|
+
|
36
|
+
REQUIRED_FIELDS = ["WARC-Record-ID","Content-Length","WARC-Date","WARC-Type"]
|
37
|
+
|
38
|
+
def initialize(record,h={})
|
39
|
+
@record=record
|
40
|
+
super(h)
|
41
|
+
end
|
42
|
+
|
43
|
+
def content_length
|
44
|
+
(self["content-length"] ||= self.record.content.length rescue 0).to_i
|
45
|
+
end
|
46
|
+
|
47
|
+
def date
|
48
|
+
Time.parse(self["warc-date"]).iso8601 ||= Time.now.iso8601
|
49
|
+
end
|
50
|
+
|
51
|
+
def date=(d)
|
52
|
+
self["warc-date"] = Time.parse(d).iso8601
|
53
|
+
end
|
54
|
+
|
55
|
+
def type
|
56
|
+
self["warc-type"]
|
57
|
+
end
|
58
|
+
|
59
|
+
def record_id
|
60
|
+
self["warc-record-id"] ||= sprintf("<urn:uuid:%s>",UUID.generate)
|
61
|
+
end
|
62
|
+
|
63
|
+
def block_digest
|
64
|
+
self["warc-block-digest"] ||= compute_digest(self.record.content)
|
65
|
+
end
|
66
|
+
|
67
|
+
def compute_digest(content)
|
68
|
+
"sha256:" + (Digest::SHA256.hexdigest(content))
|
69
|
+
end
|
70
|
+
|
71
|
+
def uri
|
72
|
+
self["warc-target-uri"]
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_s
|
76
|
+
crfl="\r\n"
|
77
|
+
str = String.new
|
78
|
+
str << "WARC-Type: #{self.type}" + crfl
|
79
|
+
str << "WARC-Record-ID: #{self.record_id}" + crfl
|
80
|
+
str << "WARC-Date: #{self.date}" + crfl
|
81
|
+
str << "Content-Length: #{self.content_length}" + crfl
|
82
|
+
each do |k,v|
|
83
|
+
str << "#{k}: #{v}#{crfl}" unless REQUIRED_FIELDS.map(&:downcase).include?(k)
|
84
|
+
end
|
85
|
+
return str
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'active_model'
|
2
|
+
|
3
|
+
module Warc
|
4
|
+
class Record::Validator < ::ActiveModel::Validator
|
5
|
+
def validate(header)
|
6
|
+
["WARC-Record-ID","Content-Length","WARC-Date","WARC-Type"].each do |key|
|
7
|
+
unless header.has_key?(key)
|
8
|
+
header.errors[:base] << "#{key} is a required field"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/warc/stream.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module Warc
|
4
|
+
def self.open_stream(path,mode='r+')
|
5
|
+
|
6
|
+
gzipped = path.match(/.*\.warc\.gz$/)
|
7
|
+
warc = path.match(/.*\.warc$/)
|
8
|
+
|
9
|
+
if (gzipped || warc)
|
10
|
+
fh = ::File.exists?(path) ? ::File.new(path,mode) : path
|
11
|
+
return Stream::Gzip.new(fh) if gzipped
|
12
|
+
return Stream::Plain.new(fh) if warc
|
13
|
+
else
|
14
|
+
return Stream::Gzip.new(path)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Stream
|
19
|
+
private_class_method :new
|
20
|
+
include Enumerable
|
21
|
+
attr_reader :parser
|
22
|
+
|
23
|
+
DEFAULT_OPTS = {
|
24
|
+
# Maximum file size
|
25
|
+
:max_filesize => 10**9
|
26
|
+
}
|
27
|
+
|
28
|
+
def initialize(fh,options={},&block)
|
29
|
+
@options = DEFAULT_OPTS.merge options
|
30
|
+
@index = 0
|
31
|
+
fh = case fh
|
32
|
+
when ::File
|
33
|
+
@name = ::File.basename(fh)
|
34
|
+
fh
|
35
|
+
when String
|
36
|
+
@name = fh
|
37
|
+
@naming_proc = block || lambda {|name,index| "#{name}.#{sprintf('%06d',index)}"}
|
38
|
+
next_file_handle
|
39
|
+
end
|
40
|
+
@file_handle=fh
|
41
|
+
@parser = ::Warc::Parser.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def each(offset=0,&block)
|
45
|
+
@file_handle.seek(offset,::IO::SEEK_SET)
|
46
|
+
loop do
|
47
|
+
position = @file_handle.tell
|
48
|
+
rec = self.read_record
|
49
|
+
if rec
|
50
|
+
rec.offset = position
|
51
|
+
if block_given?
|
52
|
+
block.call(rec)
|
53
|
+
else
|
54
|
+
yield rec
|
55
|
+
end
|
56
|
+
else
|
57
|
+
break
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def record(offset=0)
|
63
|
+
@file_handle.seek(offset,::IO::SEEK_SET)
|
64
|
+
self.read_record
|
65
|
+
end
|
66
|
+
|
67
|
+
def close
|
68
|
+
@file_handle.close
|
69
|
+
end
|
70
|
+
|
71
|
+
def read_record
|
72
|
+
raise StandardError
|
73
|
+
end
|
74
|
+
|
75
|
+
def write_record(record)
|
76
|
+
# Go to end of file
|
77
|
+
@file_handle.seek(0,::IO::SEEK_END)
|
78
|
+
expected_size = record.header.content_length + @file_handle.tell
|
79
|
+
next_file_handle if (expected_size > @options[:max_filesize])
|
80
|
+
record.offset = @file_handle.tell
|
81
|
+
end
|
82
|
+
|
83
|
+
def size
|
84
|
+
@file_handle.stat.size
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def next_file_handle
|
90
|
+
@file_handle.close if @file_handle
|
91
|
+
@index += 1
|
92
|
+
path = @naming_proc.call(@name,@index)
|
93
|
+
@file_handle = ::File.new(path + @ext,'a+')
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
module Warc
|
4
|
+
class Stream::Gzip < Stream
|
5
|
+
public_class_method :new
|
6
|
+
def initialize(fh,options={},&block)
|
7
|
+
@ext = ".warc.gz"
|
8
|
+
super(fh,options,&block)
|
9
|
+
end
|
10
|
+
|
11
|
+
def read_record
|
12
|
+
begin
|
13
|
+
gz = ::Zlib::GzipReader.new(@file_handle)
|
14
|
+
rec = self.parser.parse(gz)
|
15
|
+
loop {gz.readline} # Make sure we read the whole gzip
|
16
|
+
|
17
|
+
rescue EOFError # End of gzipped record
|
18
|
+
@file_handle.pos -= gz.unused.length unless gz.unused.nil? # We move the cursor back if extra bytes were read
|
19
|
+
return rec # We return the record
|
20
|
+
|
21
|
+
rescue ::Zlib::Error => e # Raised when there's no more gzipped data to read
|
22
|
+
return nil
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def write_record(record)
|
27
|
+
super
|
28
|
+
|
29
|
+
# Prepare gzip IO object
|
30
|
+
gz = ::Zlib::GzipWriter.new(@file_handle)
|
31
|
+
record.dump_to(gz)
|
32
|
+
gz.finish # Need to close GzipWriter for it to write the gzip footer
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Warc
|
2
|
+
class Stream::Plain < Stream
|
3
|
+
public_class_method :new
|
4
|
+
|
5
|
+
def initialize(fh,options={},&block)
|
6
|
+
@ext = '.warc'
|
7
|
+
super(fh,options,&block)
|
8
|
+
end
|
9
|
+
|
10
|
+
def read_record
|
11
|
+
begin
|
12
|
+
self.parser.parse(@file_handle)
|
13
|
+
rescue EOFError # No more records
|
14
|
+
return nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def write_record(record)
|
19
|
+
super
|
20
|
+
record.dump_to(@file_handle)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Warc
|
2
|
+
class HeaderHash < ::Hash
|
3
|
+
def initialize(hash={})
|
4
|
+
super()
|
5
|
+
@names = {}
|
6
|
+
hash.each { |k, v| self[k] = v }
|
7
|
+
end
|
8
|
+
|
9
|
+
def each
|
10
|
+
super do |k, v|
|
11
|
+
yield(k, v.respond_to?(:to_ary) ? v.to_ary.join("\n") : v)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_hash
|
16
|
+
hash = {}
|
17
|
+
each { |k,v| hash[k] = v }
|
18
|
+
hash
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](k)
|
22
|
+
super(k) || super(@names[k.downcase])
|
23
|
+
end
|
24
|
+
|
25
|
+
def []=(k, v)
|
26
|
+
canonical = k.downcase
|
27
|
+
delete k if @names[canonical] && @names[canonical] != k # .delete is expensive, don't invoke it unless necessary
|
28
|
+
@names[k] = @names[canonical] = k
|
29
|
+
super k, v
|
30
|
+
end
|
31
|
+
|
32
|
+
def delete(k)
|
33
|
+
canonical = k.downcase
|
34
|
+
result = super @names.delete(canonical)
|
35
|
+
@names.delete_if { |name,| name.downcase == canonical }
|
36
|
+
result
|
37
|
+
end
|
38
|
+
|
39
|
+
def include?(k)
|
40
|
+
@names.include?(k) || @names.include?(k.downcase)
|
41
|
+
end
|
42
|
+
|
43
|
+
alias_method :has_key?, :include?
|
44
|
+
alias_method :member?, :include?
|
45
|
+
alias_method :key?, :include?
|
46
|
+
|
47
|
+
def merge!(other)
|
48
|
+
other.each { |k, v| self[k] = v }
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
def merge(other)
|
53
|
+
hash = dup
|
54
|
+
hash.merge! other
|
55
|
+
end
|
56
|
+
|
57
|
+
def replace(other)
|
58
|
+
clear
|
59
|
+
other.each { |k, v| self[k] = v }
|
60
|
+
self
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/warc/version.rb
ADDED
@@ -0,0 +1,267 @@
|
|
1
|
+
WARC/1.0
|
2
|
+
WARC-Type: warcinfo
|
3
|
+
Content-Type: application/warc-fields
|
4
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
5
|
+
WARC-Record-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
6
|
+
WARC-Filename: arg.warc
|
7
|
+
WARC-Block-Digest: sha1:RFDKLAAA6JWB4XZXWKVLNVFPTDLFTPSK
|
8
|
+
Content-Length: 270
|
9
|
+
|
10
|
+
software: Wget/1.14 (darwin10.8.0)
|
11
|
+
format: WARC File Format 1.0
|
12
|
+
conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf
|
13
|
+
robots: classic
|
14
|
+
wget-arguments: "http://www.antoineroygobeil.com/" "--mirror" "--warc-file=arg" "--no-warc-compression"
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
WARC/1.0
|
19
|
+
WARC-Type: request
|
20
|
+
WARC-Target-URI: http://www.antoineroygobeil.com/
|
21
|
+
Content-Type: application/http;msgtype=request
|
22
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
23
|
+
WARC-Record-ID: <urn:uuid:01D6037B-E36C-44C1-9731-8E75CC8996CD>
|
24
|
+
WARC-IP-Address: 64.34.145.145
|
25
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
26
|
+
WARC-Block-Digest: sha1:L5UPLOIIBXTVUBQI26LL2C2PC2RXQUAB
|
27
|
+
Content-Length: 125
|
28
|
+
|
29
|
+
GET / HTTP/1.1
|
30
|
+
User-Agent: Wget/1.14 (darwin10.8.0)
|
31
|
+
Accept: */*
|
32
|
+
Host: www.antoineroygobeil.com
|
33
|
+
Connection: Keep-Alive
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
WARC/1.0
|
38
|
+
WARC-Type: response
|
39
|
+
WARC-Record-ID: <urn:uuid:DC635649-820F-4A26-89C8-3ED1FB5398ED>
|
40
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
41
|
+
WARC-Concurrent-To: <urn:uuid:01D6037B-E36C-44C1-9731-8E75CC8996CD>
|
42
|
+
WARC-Target-URI: http://www.antoineroygobeil.com/
|
43
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
44
|
+
WARC-IP-Address: 64.34.145.145
|
45
|
+
WARC-Block-Digest: sha1:KALWHSW57TQKVJVZPLKTV7ZO24QN3BAH
|
46
|
+
WARC-Payload-Digest: sha1:SWVIXMD24OCGFYUOH5CPVJW3HPW2TPJT
|
47
|
+
Content-Type: application/http;msgtype=response
|
48
|
+
Content-Length: 803
|
49
|
+
|
50
|
+
HTTP/1.1 301 Moved Permanently
|
51
|
+
Date: Fri, 14 Sep 2012 03:08:05 GMT
|
52
|
+
Server: Apache/2.2.11 (Ubuntu) PHP/5.2.6-3ubuntu4.6 with Suhosin-Patch mod_ssl/2.2.11 OpenSSL/0.9.8g Phusion_Passenger/2.2.5
|
53
|
+
Location: http://antoineroygobeil.com/
|
54
|
+
Vary: Accept-Encoding
|
55
|
+
Content-Length: 420
|
56
|
+
Keep-Alive: timeout=15, max=100
|
57
|
+
Connection: Keep-Alive
|
58
|
+
Content-Type: text/html; charset=iso-8859-1
|
59
|
+
|
60
|
+
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
|
61
|
+
<html><head>
|
62
|
+
<title>301 Moved Permanently</title>
|
63
|
+
</head><body>
|
64
|
+
<h1>Moved Permanently</h1>
|
65
|
+
<p>The document has moved <a href="http://antoineroygobeil.com/">here</a>.</p>
|
66
|
+
<hr>
|
67
|
+
<address>Apache/2.2.11 (Ubuntu) PHP/5.2.6-3ubuntu4.6 with Suhosin-Patch mod_ssl/2.2.11 OpenSSL/0.9.8g Phusion_Passenger/2.2.5 Server at www.antoineroygobeil.com Port 80</address>
|
68
|
+
</body></html>
|
69
|
+
|
70
|
+
|
71
|
+
WARC/1.0
|
72
|
+
WARC-Type: request
|
73
|
+
WARC-Target-URI: http://antoineroygobeil.com/
|
74
|
+
Content-Type: application/http;msgtype=request
|
75
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
76
|
+
WARC-Record-ID: <urn:uuid:5641209D-F1AF-46D1-8A41-CBDB746C40D1>
|
77
|
+
WARC-IP-Address: 64.34.145.145
|
78
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
79
|
+
WARC-Block-Digest: sha1:LGO6T275AU573ZMIRCECF4Z6ZEX7RVTM
|
80
|
+
Content-Length: 121
|
81
|
+
|
82
|
+
GET / HTTP/1.1
|
83
|
+
User-Agent: Wget/1.14 (darwin10.8.0)
|
84
|
+
Accept: */*
|
85
|
+
Host: antoineroygobeil.com
|
86
|
+
Connection: Keep-Alive
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
WARC/1.0
|
91
|
+
WARC-Type: response
|
92
|
+
WARC-Record-ID: <urn:uuid:CD7AF690-EBA5-4071-BC9D-EDAD854D57D6>
|
93
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
94
|
+
WARC-Concurrent-To: <urn:uuid:5641209D-F1AF-46D1-8A41-CBDB746C40D1>
|
95
|
+
WARC-Target-URI: http://antoineroygobeil.com/
|
96
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
97
|
+
WARC-IP-Address: 64.34.145.145
|
98
|
+
WARC-Block-Digest: sha1:TABLB7DABQVOYHUK3E2CEFJ7LIHGTCV2
|
99
|
+
WARC-Payload-Digest: sha1:H62OXGS6ICUKWEIZFIS3Z4L6EWMT54K2
|
100
|
+
Content-Type: application/http;msgtype=response
|
101
|
+
Content-Length: 4795
|
102
|
+
|
103
|
+
HTTP/1.1 200 OK
|
104
|
+
Date: Fri, 14 Sep 2012 03:08:05 GMT
|
105
|
+
Server: Apache/2.2.11 (Ubuntu) PHP/5.2.6-3ubuntu4.6 with Suhosin-Patch mod_ssl/2.2.11 OpenSSL/0.9.8g Phusion_Passenger/2.2.5
|
106
|
+
Last-Modified: Thu, 23 Aug 2012 02:52:11 GMT
|
107
|
+
Accept-Ranges: bytes
|
108
|
+
Content-Length: 4404
|
109
|
+
Vary: Accept-Encoding
|
110
|
+
Keep-Alive: timeout=15, max=99
|
111
|
+
Connection: Keep-Alive
|
112
|
+
Content-Type: text/html; charset=utf-8
|
113
|
+
|
114
|
+
<!DOCTYPE html>
|
115
|
+
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
|
116
|
+
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8" lang="en"> <![endif]-->
|
117
|
+
<!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]-->
|
118
|
+
<!--[if gt IE 8]><!-->
|
119
|
+
<html class='no-js' lang='en'>
|
120
|
+
<!--<![endif]-->
|
121
|
+
<head>
|
122
|
+
<meta charset='utf-8' />
|
123
|
+
<title>Antoine Roy-Gobeil</title>
|
124
|
+
<meta content="Antoine Roy-Gobeil's webpage and blog" name='description' />
|
125
|
+
<meta content='Antoine Roy-Gobeil' name='author' />
|
126
|
+
<meta content='nanoc 3.4.0' name='generator' />
|
127
|
+
<link href='http://antoineroygobeil.com/blog.xml' rel='alternate' title="Antoine Roy-Gobeil's blog" type='application/atom+xml' />
|
128
|
+
<meta content='width=device-width' name='viewport' />
|
129
|
+
<link href='/assets/css/style-cb0d0dfd528.css' rel='stylesheet' type='text/css' />
|
130
|
+
<link href='//fonts.googleapis.com/css?family=Open+Sans:300,300italic,400,400itlalic|Cabin:400' rel='stylesheet' />
|
131
|
+
<script src='/assets/js/vendor/modernizr-cb5f7134b69.js'></script>
|
132
|
+
</head>
|
133
|
+
<body>
|
134
|
+
<!--[if lt IE 7 ]>
|
135
|
+
<p class='chromeframe'>
|
136
|
+
Your browser is <em>ancient!</em>
|
137
|
+
<a href="http://browsehappy.com/">Upgrade to a different browser</a> or
|
138
|
+
<a href="http://www.google.com/chromeframe/?redirect=true">install Google Chrome Frame</a>
|
139
|
+
to experience this site.
|
140
|
+
</p>
|
141
|
+
<![endif]-->
|
142
|
+
<div class='container'>
|
143
|
+
<header>
|
144
|
+
<h1><a href="/">Antoine Roy-Gobeil</a></h1>
|
145
|
+
<p class="presentation">
|
146
|
+
<strong>Hello.</strong>
|
147
|
+
My name is Antoine Roy-Gobeil. I am currently a PhD candidate
|
148
|
+
in <a href="http://www.physics.mcgill.ca">physics at McGill University</a>. I am passionate about science, technology
|
149
|
+
<abbr title="and">&</abbr>
|
150
|
+
software. In my free time I enjoy basketball, cinema <abbr title="and">&</abbr> video games.
|
151
|
+
</p>
|
152
|
+
</header>
|
153
|
+
<div id='main-content'>
|
154
|
+
|
155
|
+
<article class="hentry" itemscope itemtype="http://schema.org/BlogPosting" lang="en">
|
156
|
+
<header>
|
157
|
+
<p class="meta">
|
158
|
+
|
159
|
+
<time datetime="2012-08-12T00:40:00Z">
|
160
|
+
<span class="day">11</span>
|
161
|
+
<span class="month">August</span>
|
162
|
+
<span class="year">2012</span>
|
163
|
+
</time>
|
164
|
+
</p>
|
165
|
+
<h1 class="entry-title" itemprop="name"><a href="/blog/2012/08/11/a-place-to-call-home/">A place to call home</a></h1>
|
166
|
+
</header>
|
167
|
+
|
168
|
+
<div class="entry-content" itemprop="description">
|
169
|
+
<p>I have been postponing this day for too much time already. It was about time I had a small place to call home on the web!</p>
|
170
|
+
|
171
|
+
<p>Much to my regret, I have been doing many small projects over the years for which I kept almost no traces. I hope to make this blog a beautiful place to look back at stuff I did.</p>
|
172
|
+
|
173
|
+
<p>In the process, I hope to make the internet a better place. I am sometimes surprised by the lack of information, documentation or tutorial I can find online on particular topics. Yet, to this day, I haven't published anything useful on the internet. I figured starting a blog was the right thing to do. Therefore, from this day forward, I intend to blog about the stuff I wished would have been on Google but wasn't. I hope it may be of some help to fellow web travellers like me.</p>
|
174
|
+
</div>
|
175
|
+
|
176
|
+
<footer>
|
177
|
+
|
178
|
+
</footer>
|
179
|
+
</article>
|
180
|
+
</div>
|
181
|
+
<footer>
|
182
|
+
<p>Copyright 2012 • Antoine Roy-Gobeil</p>
|
183
|
+
<img src="/assets/img/portrait_182px_circle.jpg" alt="Portrait of Antoine Roy-Gobeil - Summer 2012"/>
|
184
|
+
</footer>
|
185
|
+
</div>
|
186
|
+
<script src='//ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js'></script>
|
187
|
+
<script type='text/javascript'>
|
188
|
+
//<![CDATA[
|
189
|
+
window.jQuery || document.write('<script src="/js/vendor/jquery.js"><\/script>')
|
190
|
+
//]]>
|
191
|
+
</script>
|
192
|
+
<script src='/assets/js/application-cb4c5a72106.js'></script>
|
193
|
+
<script type='text/javascript'>
|
194
|
+
//<![CDATA[
|
195
|
+
var _gaq=[['_setAccount','UA-34075505-1'],['_trackPageview']];
|
196
|
+
(function(d,t){var g=d.createElement(t),s=d.getElementsByTagName(t)[0];
|
197
|
+
g.src=('https:'==location.protocol?'//ssl':'//www')+'.google-analytics.com/ga.js';
|
198
|
+
s.parentNode.insertBefore(g,s)}(document,'script'));
|
199
|
+
//]]>
|
200
|
+
</script>
|
201
|
+
</body>
|
202
|
+
</html>
|
203
|
+
|
204
|
+
|
205
|
+
WARC/1.0
|
206
|
+
WARC-Type: resource
|
207
|
+
WARC-Record-ID: <urn:uuid:5D799C11-D46C-4AC8-B598-5DC9F4205C6E>
|
208
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
209
|
+
WARC-Target-URI: metadata://gnu.org/software/wget/warc/MANIFEST.txt
|
210
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
211
|
+
WARC-Block-Digest: sha1:Y2K6QTLHURLM7GPHWC7RCMQJXJMADJAA
|
212
|
+
Content-Type: text/plain
|
213
|
+
Content-Length: 48
|
214
|
+
|
215
|
+
<urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
216
|
+
|
217
|
+
|
218
|
+
WARC/1.0
|
219
|
+
WARC-Type: resource
|
220
|
+
WARC-Record-ID: <urn:uuid:5D799C11-D46C-4AC8-B598-5DC9F4205C6E>
|
221
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
222
|
+
WARC-Target-URI: metadata://gnu.org/software/wget/warc/wget_arguments.txt
|
223
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
224
|
+
WARC-Block-Digest: sha1:IVTUDTXVSYCNFXVTELIUWCVCLXYXY5CF
|
225
|
+
Content-Type: text/plain
|
226
|
+
Content-Length: 89
|
227
|
+
|
228
|
+
"http://www.antoineroygobeil.com/" "--mirror" "--warc-file=arg" "--no-warc-compression"
|
229
|
+
|
230
|
+
|
231
|
+
WARC/1.0
|
232
|
+
WARC-Type: resource
|
233
|
+
WARC-Record-ID: <urn:uuid:984565DE-4E6E-4443-97C5-25235CC91140>
|
234
|
+
WARC-Warcinfo-ID: <urn:uuid:CE171A3B-FF47-4859-89DA-A03ADEB37334>
|
235
|
+
WARC-Concurrent-To: <urn:uuid:5D799C11-D46C-4AC8-B598-5DC9F4205C6E>
|
236
|
+
WARC-Target-URI: metadata://gnu.org/software/wget/warc/wget.log
|
237
|
+
WARC-Date: 2012-09-14T03:08:05Z
|
238
|
+
WARC-Block-Digest: sha1:CRKHZFN5I63JL5CBW4MKF3UBV3A2GQF3
|
239
|
+
Content-Type: text/plain
|
240
|
+
Content-Length: 987
|
241
|
+
|
242
|
+
Opening WARC file ‘arg.warc’.
|
243
|
+
|
244
|
+
--2012-09-13 23:08:05-- http://www.antoineroygobeil.com/
|
245
|
+
Resolving www.antoineroygobeil.com... 64.34.145.145
|
246
|
+
Connecting to www.antoineroygobeil.com|64.34.145.145|:80... connected.
|
247
|
+
HTTP request sent, awaiting response... 301 Moved Permanently
|
248
|
+
Location: http://antoineroygobeil.com/ [following]
|
249
|
+
|
250
|
+
0K 100% 50.1M=0s
|
251
|
+
|
252
|
+
--2012-09-13 23:08:05-- http://antoineroygobeil.com/
|
253
|
+
Resolving antoineroygobeil.com... 64.34.145.145
|
254
|
+
Reusing existing connection to www.antoineroygobeil.com:80.
|
255
|
+
HTTP request sent, awaiting response... 200 OK
|
256
|
+
Length: 4404 (4.3K) [text/html]
|
257
|
+
Saving to: ‘www.antoineroygobeil.com/index.html’
|
258
|
+
|
259
|
+
0K .... 100% 2.27M=0.002s
|
260
|
+
|
261
|
+
2012-09-13 23:08:05 (2.27 MB/s) - ‘www.antoineroygobeil.com/index.html’ saved [4404/4404]
|
262
|
+
|
263
|
+
FINISHED --2012-09-13 23:08:05--
|
264
|
+
Total wall clock time: 0.2s
|
265
|
+
Downloaded: 1 files, 4.3K in 0.002s (2.27 MB/s)
|
266
|
+
|
267
|
+
|