crabfarm 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/crabfarm/configuration.rb +3 -3
- data/lib/crabfarm/crabtrap_context.rb +2 -2
- data/lib/crabfarm/crabtrap_runner.rb +11 -2
- data/lib/crabfarm/engines/safe_state_loop.rb +32 -25
- data/lib/crabfarm/modes/generator.rb +1 -0
- data/lib/crabfarm/version.rb +1 -1
- metadata +2 -4
- data/bin/crabtrap +0 -347
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9ffaef8409650267bf6e4272421008ff9c38d05e
|
4
|
+
data.tar.gz: 996ba45929699ec7eebac5d8d4a1a24f231cf4a1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d5cffdf273b3f31c807502f036583ab89e9ad58b66aa2f13a79112350f418a246535702109bcdabf8cbded64410414b8fa39a581af079f99d205b28c1387cc9
|
7
|
+
data.tar.gz: 6dd417dbebd417b32fa738c58af2927651d6106f66909b4748328dcb9965976c3f206f9d792362740793ffd1b8ae0dc18bb07dc6ea5fe29f8581b53c60728fa6
|
@@ -27,7 +27,7 @@ module Crabfarm
|
|
27
27
|
[:phantom_lock_file, :string, 'Phantomjs lock file path, only for phantomjs driver.'],
|
28
28
|
|
29
29
|
# Crabtrap launcher configuration
|
30
|
-
[:
|
30
|
+
[:crabtrap_bin_path, :string, 'Crabtrap binary path.'],
|
31
31
|
[:crabtrap_port, :integer, 'Crabtrap port, defaults to 4000.'],
|
32
32
|
[:crabtrap_mode, ['capture', 'replay'], 'Crabtrap operation mode.']
|
33
33
|
]
|
@@ -68,7 +68,7 @@ module Crabfarm
|
|
68
68
|
phantom_bin_path: 'phantomjs',
|
69
69
|
phantom_lock_file: nil,
|
70
70
|
|
71
|
-
|
71
|
+
crabtrap_bin_path: 'crabtrap',
|
72
72
|
crabtrap_port: 4000
|
73
73
|
}
|
74
74
|
end
|
@@ -113,7 +113,7 @@ module Crabfarm
|
|
113
113
|
|
114
114
|
def crabtrap_config
|
115
115
|
{
|
116
|
-
bin_path:
|
116
|
+
bin_path: crabtrap_bin_path,
|
117
117
|
port: crabtrap_port,
|
118
118
|
proxy: proxy
|
119
119
|
}
|
@@ -39,11 +39,11 @@ module Crabfarm
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def driver_config
|
42
|
-
super.merge(proxy: proxy_address)
|
42
|
+
if @runner.is_running? then super.merge(proxy: proxy_address) else super end
|
43
43
|
end
|
44
44
|
|
45
45
|
def phantom_config
|
46
|
-
super.merge(proxy: proxy_address)
|
46
|
+
if @runner.is_running? then super.merge(proxy: proxy_address) else super end
|
47
47
|
end
|
48
48
|
|
49
49
|
def proxy_address
|
@@ -8,6 +8,10 @@ module Crabfarm
|
|
8
8
|
@pid = nil
|
9
9
|
end
|
10
10
|
|
11
|
+
def is_running?
|
12
|
+
not @pid.nil?
|
13
|
+
end
|
14
|
+
|
11
15
|
def port
|
12
16
|
@config[:port] # TODO: maybe select port dynamically...
|
13
17
|
end
|
@@ -17,8 +21,13 @@ module Crabfarm
|
|
17
21
|
end
|
18
22
|
|
19
23
|
def start
|
20
|
-
|
21
|
-
|
24
|
+
begin
|
25
|
+
@pid = Process.spawn({}, crabtrap_cmd)
|
26
|
+
wait_for_server
|
27
|
+
rescue
|
28
|
+
puts "Could not find crabtrap at #{@config[:bin_path]}, memento replaying is disabled!"
|
29
|
+
@pid = nil
|
30
|
+
end
|
22
31
|
end
|
23
32
|
|
24
33
|
def stop
|
@@ -6,7 +6,6 @@ module Crabfarm
|
|
6
6
|
class SafeStateLoop
|
7
7
|
|
8
8
|
def initialize
|
9
|
-
@context = Crabfarm::Context.new
|
10
9
|
@running = true
|
11
10
|
@working = false
|
12
11
|
@lock = Mutex.new
|
@@ -16,7 +15,6 @@ module Crabfarm
|
|
16
15
|
def release
|
17
16
|
@running = false
|
18
17
|
@thread.join
|
19
|
-
@context.release
|
20
18
|
end
|
21
19
|
|
22
20
|
def change_state(_name, _params={}, _wait=nil)
|
@@ -90,29 +88,38 @@ module Crabfarm
|
|
90
88
|
end
|
91
89
|
|
92
90
|
def crawl_loop
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
@
|
114
|
-
|
115
|
-
|
91
|
+
context = Crabfarm::Context.new
|
92
|
+
|
93
|
+
begin
|
94
|
+
while @running
|
95
|
+
if @working
|
96
|
+
@elapsed = Benchmark.measure do
|
97
|
+
begin
|
98
|
+
ActiveSupport::Dependencies.clear
|
99
|
+
logger.info "StateLoop: loading state: #{@next_state_name}"
|
100
|
+
@doc = context.run_state(@next_state_name, @next_state_params).output_as_json
|
101
|
+
logger.info "StateLoop: state loaded successfully: #{@next_state_name}"
|
102
|
+
@error = nil
|
103
|
+
rescue Exception => e
|
104
|
+
logger.error "StateLoop: error while loading state: #{@next_state_name}"
|
105
|
+
logger.error e
|
106
|
+
@doc = nil
|
107
|
+
@error = e
|
108
|
+
end
|
109
|
+
end.real
|
110
|
+
|
111
|
+
@lock.synchronize {
|
112
|
+
@state_name = @next_state_name
|
113
|
+
@state_params = @next_state_params
|
114
|
+
@working = false
|
115
|
+
}
|
116
|
+
else sleep 0.2 end
|
117
|
+
end
|
118
|
+
rescue Exception => e
|
119
|
+
logger.fatal "StateLoop: unhandled exception!"
|
120
|
+
logger.fatal e
|
121
|
+
ensure
|
122
|
+
context.release
|
116
123
|
end
|
117
124
|
end
|
118
125
|
|
@@ -30,6 +30,7 @@ module Crabfarm
|
|
30
30
|
path(_name, 'spec', 'snapshots', '.gitkeep').render('dot_gitkeep')
|
31
31
|
path(_name, 'spec', 'mementos', '.gitkeep').render('dot_gitkeep')
|
32
32
|
path(_name, 'spec', 'integration', '.gitkeep').render('dot_gitkeep')
|
33
|
+
path(_name, 'logs').render('dot_gitkeep')
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
data/lib/crabfarm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crabfarm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ignacio Baixas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jbuilder
|
@@ -315,7 +315,6 @@ email:
|
|
315
315
|
- ignacio@platan.us
|
316
316
|
executables:
|
317
317
|
- crabfarm
|
318
|
-
- crabtrap
|
319
318
|
extensions: []
|
320
319
|
extra_rdoc_files: []
|
321
320
|
files:
|
@@ -371,7 +370,6 @@ files:
|
|
371
370
|
- lib/crabfarm/version.rb
|
372
371
|
- lib/crabfarm.rb
|
373
372
|
- bin/crabfarm
|
374
|
-
- bin/crabtrap
|
375
373
|
homepage: https://github.com/platanus/crabfarm-gem
|
376
374
|
licenses:
|
377
375
|
- MIT
|
data/bin/crabtrap
DELETED
@@ -1,347 +0,0 @@
|
|
1
|
-
#!/usr/bin/env node
|
2
|
-
|
3
|
-
var net = require('net'),
|
4
|
-
http = require('http'),
|
5
|
-
https = require('https'),
|
6
|
-
url = require('url'),
|
7
|
-
fs = require('fs'),
|
8
|
-
zlib = require('zlib');
|
9
|
-
|
10
|
-
// Globals
|
11
|
-
|
12
|
-
var HTTPS_OPTIONS = {
|
13
|
-
key: '-----BEGIN RSA PRIVATE KEY-----\nMIIBOQIBAAJBAK/L/lXb/kxUzve1olo71s6mQLvuQCm3z2wqClq71NLerFnaXpN+\nFrNPy7+R3gZ1hdWXqbN5NqpWDMM9fcbd7p0CAwEAAQJAUDImN3Lhgl7Z/+TLSJCt\nwJ3VQCZC/QUOSdCv4o53Wy5aL/n8ootYFC3eoFC2Nal5bnH6onP9YR+X9l3HKLaT\n3QIhANXwb5SvJ+Kewa8F5wNHo9LFjSbL7WSSb1MyvYnOeFlPAiEA0lvaLz6UXRDL\n6T6Z1fkF0exmQqVimeL5qjY5o9Gk5lMCH1A52Z3oEQzqe7cmf3q7YrOnYUcrMdqF\nDzojzO/gfUECIQCe9fImiW+r9CljFH9Dhm6zd6S+8CNWjoKD8X4VITMvKQIgb3sg\nq9gPVzXn/+f8Qcc2KILSh3ffkIpA8yJK9omUIxI=\n-----END RSA PRIVATE KEY-----\n',
|
14
|
-
cert: '-----BEGIN CERTIFICATE-----\nMIIBmDCCAUICCQDGtiGKgI9AXjANBgkqhkiG9w0BAQUFADBTMQswCQYDVQQGEwJD\nTDELMAkGA1UECBMCUk0xETAPBgNVBAcTCFNhbnRpYWdvMREwDwYDVQQKEwhQbGF0\nYW51czERMA8GA1UEAxMIQ3JhYnRyYXAwHhcNMTUwMTE1MjAxNzMzWhcNNDIwNjAx\nMjAxNzMzWjBTMQswCQYDVQQGEwJDTDELMAkGA1UECBMCUk0xETAPBgNVBAcTCFNh\nbnRpYWdvMREwDwYDVQQKEwhQbGF0YW51czERMA8GA1UEAxMIQ3JhYnRyYXAwXDAN\nBgkqhkiG9w0BAQEFAANLADBIAkEAr8v+Vdv+TFTO97WiWjvWzqZAu+5AKbfPbCoK\nWrvU0t6sWdpek34Ws0/Lv5HeBnWF1Zeps3k2qlYMwz19xt3unQIDAQABMA0GCSqG\nSIb3DQEBBQUAA0EAmecqIZqQ8OXSIj0V2VKaIXwz8RBnhLzU7BJwcsWJE/Bex7zB\nWP+vLv9ML5ZRLCsXjL5IOav8qAX/NZXjoN3e3Q==\n-----END CERTIFICATE-----\n'
|
15
|
-
};
|
16
|
-
|
17
|
-
var LOG = {
|
18
|
-
DEBUG: 0,
|
19
|
-
INFO: 1,
|
20
|
-
WARN: 2,
|
21
|
-
ERROR: 3
|
22
|
-
};
|
23
|
-
|
24
|
-
var STACK = [],
|
25
|
-
MODE = false,
|
26
|
-
SOURCE = null,
|
27
|
-
PORT = 4000,
|
28
|
-
LOG_LEVEL = LOG.WARN;
|
29
|
-
|
30
|
-
(function() {
|
31
|
-
if(process.argv.length < 2) throw 'Must provide a proxy mode';
|
32
|
-
MODE = process.argv[2];
|
33
|
-
var i = 3;
|
34
|
-
|
35
|
-
if(MODE != 'pass') {
|
36
|
-
if(process.argv.length < 3) throw 'Must provide a bucket path';
|
37
|
-
SOURCE = process.argv[3];
|
38
|
-
i = 4;
|
39
|
-
}
|
40
|
-
|
41
|
-
for(; i < process.argv.length; i++) {
|
42
|
-
var parts = process.argv[i].split('=');
|
43
|
-
switch(parts[0]) {
|
44
|
-
case '--port': PORT = parseInt(parts[1], 10); break;
|
45
|
-
case '--quiet': PORT = parseInt(parts[1], 10); break;
|
46
|
-
default: throw 'Invalid option ' + parts[0];
|
47
|
-
}
|
48
|
-
}
|
49
|
-
})();
|
50
|
-
|
51
|
-
// Utility methods
|
52
|
-
|
53
|
-
function log(_level, _message) {
|
54
|
-
if(_level == LOG.DEBUG) _message = '\t' + _message;
|
55
|
-
if(_level >= LOG_LEVEL) console.log(_message);
|
56
|
-
}
|
57
|
-
|
58
|
-
function forOwn(_obj, _cb) {
|
59
|
-
for(var key in _obj) {
|
60
|
-
if(_obj.hasOwnProperty(key)) {
|
61
|
-
_cb(key, _obj[key]);
|
62
|
-
}
|
63
|
-
}
|
64
|
-
}
|
65
|
-
|
66
|
-
function keysToLowerCase(_obj) {
|
67
|
-
var result = {};
|
68
|
-
forOwn(_obj, function(k,v) { result[k.toLowerCase()] = v; });
|
69
|
-
return result;
|
70
|
-
}
|
71
|
-
|
72
|
-
function pickRandomPort() {
|
73
|
-
return 0; // This could fail on Linux...
|
74
|
-
}
|
75
|
-
|
76
|
-
function matchRequestToResource(_req, _resource) {
|
77
|
-
return _resource.method.toLowerCase() == _req.method.toLowerCase() && _resource.url == _req.url;
|
78
|
-
}
|
79
|
-
|
80
|
-
function matchRequestToResourceWOQuery(_req, _resource) {
|
81
|
-
if(_resource.method.toLowerCase() == _req.method.toLowerCase()) return false;
|
82
|
-
|
83
|
-
var reqUrl = url.parse(_req.url, true),
|
84
|
-
resUrl = url.parse(_resource.url, true);
|
85
|
-
|
86
|
-
return reqUrl.hostname == resUrl.hostname && reqUrl.pathname == resUrl.pathname;
|
87
|
-
}
|
88
|
-
|
89
|
-
function findAndMoveLast(_req, _array, _matches) {
|
90
|
-
for(var i = 0, l = _array.length; i < l; i++) {
|
91
|
-
if(_matches(_req, _array[i])) {
|
92
|
-
var resource = _array.splice(i, 1)[0];
|
93
|
-
_array.push(resource);
|
94
|
-
return resource;
|
95
|
-
}
|
96
|
-
}
|
97
|
-
|
98
|
-
return null;
|
99
|
-
}
|
100
|
-
|
101
|
-
function loadStackFrom(_path, _then) {
|
102
|
-
var data = fs.readFileSync(_path);
|
103
|
-
zlib.gunzip(data, function(err, buffer) {
|
104
|
-
if (!err) STACK = JSON.parse(buffer.toString());
|
105
|
-
_then();
|
106
|
-
});
|
107
|
-
}
|
108
|
-
|
109
|
-
function saveStackTo(_path, _then) {
|
110
|
-
var data = JSON.stringify(STACK);
|
111
|
-
zlib.gzip(data, function(err, buffer) {
|
112
|
-
if (!err) fs.writeFileSync(_path, buffer);
|
113
|
-
_then();
|
114
|
-
});
|
115
|
-
}
|
116
|
-
|
117
|
-
function resolveAndServeResource(_req, _resp) {
|
118
|
-
var resource = findInStack(_req);
|
119
|
-
if(resource) {
|
120
|
-
log(LOG.INFO, "Serving: " + resource.method + ' ' + resource.url);
|
121
|
-
log(LOG.DEBUG, "HTTP " + resource.statusCode);
|
122
|
-
log(LOG.DEBUG, JSON.stringify(resource.headers));
|
123
|
-
|
124
|
-
serveResource(resource, _resp);
|
125
|
-
} else {
|
126
|
-
log(LOG.WARN, 'Not found: ' + _req.url);
|
127
|
-
_resp.statusCode = 404;
|
128
|
-
_resp.end();
|
129
|
-
}
|
130
|
-
}
|
131
|
-
|
132
|
-
function serveLastResource(_resp) {
|
133
|
-
serveResource(STACK[STACK.length-1], _resp);
|
134
|
-
}
|
135
|
-
|
136
|
-
function serveResource(_resource, _resp) {
|
137
|
-
_resp.statusCode = _resource.statusCode;
|
138
|
-
|
139
|
-
forOwn(_resource.headers, function(k, v) { _resp.setHeader(k, v); });
|
140
|
-
|
141
|
-
if(_resource.content) {
|
142
|
-
var buf = new Buffer(_resource.content, _resource.encoding);
|
143
|
-
_resp.end(buf);
|
144
|
-
} else {
|
145
|
-
_resp.end();
|
146
|
-
}
|
147
|
-
}
|
148
|
-
|
149
|
-
function findAndMoveLast(_req, _matches) {
|
150
|
-
for(var i = 0, l = STACK.length; i < l; i++) {
|
151
|
-
if(_matches(_req, STACK[i])) {
|
152
|
-
var resource = STACK.splice(i, 1)[0];
|
153
|
-
STACK.push(resource);
|
154
|
-
return resource;
|
155
|
-
}
|
156
|
-
}
|
157
|
-
|
158
|
-
return null;
|
159
|
-
}
|
160
|
-
|
161
|
-
function findInStack(_req, _partial) {
|
162
|
-
return findAndMoveLast(_req, matchRequestToResource) ||
|
163
|
-
findAndMoveLast(_req, matchRequestToResourceWOQuery);
|
164
|
-
}
|
165
|
-
|
166
|
-
function cacheResponse(_req, _resp, _cb) {
|
167
|
-
|
168
|
-
log(LOG.INFO, "Caching Response");
|
169
|
-
log(LOG.DEBUG, "HTTP " + _resp.statusCode);
|
170
|
-
log(LOG.DEBUG, JSON.stringify(keysToLowerCase(_resp.headers)));
|
171
|
-
|
172
|
-
var encoding = null,
|
173
|
-
// TODO: consider storing port and protocoll in the resource.
|
174
|
-
resource = {
|
175
|
-
url: _req.url,
|
176
|
-
statusCode: _resp.statusCode,
|
177
|
-
method: _req.method,
|
178
|
-
// inHeaders: req.headers, // store request headers to aid in recognition?
|
179
|
-
headers: keysToLowerCase(_resp.headers),
|
180
|
-
content: '',
|
181
|
-
encoding: 'base64'
|
182
|
-
},
|
183
|
-
contentEncoding = resource.headers['content-encoding'],
|
184
|
-
contentType = resource.headers['content-type'],
|
185
|
-
outStream = _resp;
|
186
|
-
|
187
|
-
// add decompression if supported encoding:
|
188
|
-
if(contentEncoding == 'gzip') {
|
189
|
-
outStream = _resp.pipe(zlib.createGunzip());
|
190
|
-
delete resource.headers['content-encoding'];
|
191
|
-
contentEncoding = null;
|
192
|
-
} else if(contentEncoding == 'deflate') {
|
193
|
-
outStream = _resp.pipe(zlib.createInflate());
|
194
|
-
delete resource.headers['content-encoding'];
|
195
|
-
contentEncoding = null;
|
196
|
-
}
|
197
|
-
|
198
|
-
// use utf8 encoding for uncompresed text:
|
199
|
-
if(!contentEncoding && contentType) {
|
200
|
-
contentType = contentType.match(/([^\/]+)\/([^\s]+)(?:\s+(.+))?/i);
|
201
|
-
if(contentType && (contentType[1] == 'text' || contentType[1] == 'application')) {
|
202
|
-
resource.encoding = 'utf-8';
|
203
|
-
}
|
204
|
-
}
|
205
|
-
|
206
|
-
// remove unwanted headers:
|
207
|
-
delete resource.headers['content-length'];
|
208
|
-
|
209
|
-
// start receiving data:
|
210
|
-
if(resource.encoding) outStream.setEncoding(resource.encoding);
|
211
|
-
outStream.on('data', function(_chunk) {
|
212
|
-
resource.content += _chunk;
|
213
|
-
});
|
214
|
-
|
215
|
-
// when all data is received, store resource (dont know how this will handle more than one request)
|
216
|
-
outStream.on('end', function() {
|
217
|
-
STACK.push(resource);
|
218
|
-
_cb();
|
219
|
-
});
|
220
|
-
}
|
221
|
-
|
222
|
-
function prepareForwardRequest(_req) {
|
223
|
-
var urlObj = url.parse(_req.url);
|
224
|
-
|
225
|
-
var options = {
|
226
|
-
method: _req.method,
|
227
|
-
host: urlObj.host,
|
228
|
-
path: urlObj.path,
|
229
|
-
rejectUnauthorized: false,
|
230
|
-
headers: keysToLowerCase(_req.headers)
|
231
|
-
};
|
232
|
-
|
233
|
-
// Rewrite headers
|
234
|
-
options.headers['accept-encoding'] = 'gzip,deflate';
|
235
|
-
return options;
|
236
|
-
}
|
237
|
-
|
238
|
-
function passRequest(_req, _resp) {
|
239
|
-
log(LOG.INFO, 'Passing through ' + _req.method + ' request for ' + _req.url);
|
240
|
-
|
241
|
-
var urlObj = url.parse(_req.url);
|
242
|
-
var forward = (urlObj.protocol == 'https:' ? https : http).request({
|
243
|
-
method: _req.method,
|
244
|
-
host: urlObj.host,
|
245
|
-
path: urlObj.path,
|
246
|
-
headers: _req.headers
|
247
|
-
}, function(_fw_resp) {
|
248
|
-
// pipe response back untouched
|
249
|
-
_resp.writeHead(_fw_resp.statusCode, _fw_resp.headers);
|
250
|
-
_fw_resp.pipe(_resp);
|
251
|
-
});
|
252
|
-
|
253
|
-
_req.pipe(forward);
|
254
|
-
}
|
255
|
-
|
256
|
-
function captureRequest(_req, _resp, _useSSL) {
|
257
|
-
log(LOG.INFO, 'Forwarding ' + _req.method + ' request for ' + _req.url);
|
258
|
-
|
259
|
-
var urlObj = url.parse(_req.url);
|
260
|
-
var options = {
|
261
|
-
method: _req.method,
|
262
|
-
host: urlObj.host,
|
263
|
-
path: urlObj.path,
|
264
|
-
rejectUnauthorized: false,
|
265
|
-
headers: keysToLowerCase(_req.headers)
|
266
|
-
};
|
267
|
-
|
268
|
-
// Rewrite headers
|
269
|
-
options.headers['accept-encoding'] = 'gzip,deflate';
|
270
|
-
log(LOG.DEBUG, JSON.stringify(options));
|
271
|
-
|
272
|
-
var forward = (urlObj.protocol == 'https:' ? https : http).request(options, function(_fw_resp) {
|
273
|
-
cacheResponse(_req, _fw_resp, function() {
|
274
|
-
serveLastResource(_resp);
|
275
|
-
});
|
276
|
-
});
|
277
|
-
|
278
|
-
_req.pipe(forward); // forward request data
|
279
|
-
}
|
280
|
-
|
281
|
-
function replayRequest(_req, _resp) {
|
282
|
-
log(LOG.INFO, 'Resolving ' + _req.method + ' request for ' + _req.url);
|
283
|
-
resolveAndServeResource(_req, _resp);
|
284
|
-
}
|
285
|
-
|
286
|
-
function selectProxy() {
|
287
|
-
switch(MODE) {
|
288
|
-
case 'pass': return passRequest;
|
289
|
-
case 'capture': return captureRequest;
|
290
|
-
case 'replay': return replayRequest;
|
291
|
-
default: throw 'Invalid proxy mode';
|
292
|
-
}
|
293
|
-
}
|
294
|
-
|
295
|
-
var PROXY_FUN = selectProxy(),
|
296
|
-
SERVER = http.createServer(PROXY_FUN);
|
297
|
-
|
298
|
-
// Special handler for HTTPS request, creates a dedicated HTTPS proxy per connection,
|
299
|
-
// that way the CONNECT tunnel can be intercepted, requires support for self signed
|
300
|
-
// certificates in the client.
|
301
|
-
SERVER.on('connect', function (_req, _sock, _head) {
|
302
|
-
|
303
|
-
var urlObj = url.parse('http://' + _req.url);
|
304
|
-
log(LOG.INFO, 'New HTTPS request: starting https intercept on ' + urlObj.hostname);
|
305
|
-
|
306
|
-
var httpsServ = https.createServer(HTTPS_OPTIONS, function(_req, _resp) {
|
307
|
-
_req.url = 'https://' + urlObj.hostname + _req.url;
|
308
|
-
PROXY_FUN(_req, _resp);
|
309
|
-
});
|
310
|
-
|
311
|
-
httpsServ.listen(pickRandomPort());
|
312
|
-
|
313
|
-
var tunnelSock = net.connect(httpsServ.address().port, function() {
|
314
|
-
_sock.write('HTTP/1.1 200 Connection Established\r\n' +
|
315
|
-
'Proxy-agent: Node-Proxy\r\n' +
|
316
|
-
'\r\n');
|
317
|
-
tunnelSock.write(_head);
|
318
|
-
tunnelSock.pipe(_sock);
|
319
|
-
_sock.pipe(tunnelSock);
|
320
|
-
});
|
321
|
-
|
322
|
-
_sock.on('close', function() {
|
323
|
-
httpsServ.close();
|
324
|
-
});
|
325
|
-
});
|
326
|
-
|
327
|
-
console.log("Starting crabtrap! mode: " + MODE);
|
328
|
-
|
329
|
-
if(MODE == 'replay') {
|
330
|
-
loadStackFrom(SOURCE, SERVER.listen.bind(SERVER, PORT));
|
331
|
-
} else {
|
332
|
-
SERVER.listen(PORT);
|
333
|
-
}
|
334
|
-
|
335
|
-
var EXITING = false;
|
336
|
-
process.on('SIGINT', function() {
|
337
|
-
if(EXITING) return;
|
338
|
-
EXITING = true;
|
339
|
-
|
340
|
-
console.log("Shutting down crabtrap!");
|
341
|
-
SERVER.close();
|
342
|
-
if(MODE == 'capture') {
|
343
|
-
saveStackTo(SOURCE, process.exit.bind(process));
|
344
|
-
} else {
|
345
|
-
process.exit();
|
346
|
-
}
|
347
|
-
});
|