iudex-httpclient-3 1.0.0-java → 1.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
data/History.rdoc CHANGED
@@ -1,2 +1,10 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Update to iudex-http ~> 1.1.0 and associated changes:
3
+ ContentTypeSet, buffer in Client, acceptedContentType
4
+ maxContentLength from session.
5
+ * Default max-redirects 0
6
+ * Adopt iudex-http-test based tests and fix failure cases.
7
+ * Update to minitest ~> 2.3
8
+
1
9
  === 1.0.0 (2011-04-04)
2
10
  * Initial release.
data/Manifest.txt CHANGED
@@ -7,4 +7,4 @@ lib/iudex-httpclient-3/base.rb
7
7
  lib/iudex-httpclient-3.rb
8
8
  test/setup.rb
9
9
  test/test_httpclient.rb
10
- lib/iudex-httpclient-3/iudex-httpclient-3-1.0.0.jar
10
+ lib/iudex-httpclient-3/iudex-httpclient-3-1.1.0.jar
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
4
4
  require 'iudex-httpclient-3/base'
5
5
 
6
6
  require 'rubygems'
7
- gem 'rjack-tarpit', '~> 1.2'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
8
  require 'rjack-tarpit'
9
9
 
10
10
  t = RJack::TarPit.new( 'iudex-httpclient-3',
@@ -14,12 +14,13 @@ t = RJack::TarPit.new( 'iudex-httpclient-3',
14
14
  t.specify do |h|
15
15
  h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
16
 
17
- h.extra_deps += [ [ 'iudex-http', '~> 1.0.0' ],
17
+ h.extra_deps += [ [ 'iudex-http', '~> 1.1.0' ],
18
18
  [ 'rjack-httpclient-3', '~> 3.1.3' ],
19
19
  [ 'hooker', '~> 1.0.0' ] ]
20
20
 
21
21
  h.testlib = :minitest
22
- h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
22
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
23
+ [ 'iudex-http-test', '~> 1.1.0' ],
23
24
  [ 'rjack-logback', '~> 1.0' ] ]
24
25
  end
25
26
 
@@ -34,11 +34,14 @@ module Iudex
34
34
 
35
35
  # Sensible defaults:
36
36
  mgr.manager_params.max_total_connections = 100
37
- mgr.manager_params.default_max_connections_per_host = 2
37
+ mgr.manager_params.default_max_connections_per_host = 3
38
38
  mgr.manager_params.stale_checking_enabled = false
39
39
  mgr.client_params.connection_manager_timeout = 3_000 #ms
40
40
  mgr.client_params.so_timeout = 5_000 #ms
41
41
 
42
+ # Default no redirects
43
+ mgr.client_params.set_int_parameter( "http.protocol.max-redirects", 0 )
44
+
42
45
  mgr.client_params.set_parameter(
43
46
  RJack::HTTPClient3::HttpMethodParams::RETRY_HANDLER,
44
47
  RJack::HTTPClient3::DefaultHttpMethodRetryHandler.new( 2, false ) )
@@ -16,7 +16,7 @@
16
16
 
17
17
  module Iudex
18
18
  module HTTPClient3
19
- VERSION = '1.0.0'
19
+ VERSION = '1.1.0'
20
20
 
21
21
  LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
22
  end
data/pom.xml CHANGED
@@ -5,13 +5,13 @@
5
5
  <groupId>iudex</groupId>
6
6
  <artifactId>iudex-httpclient-3</artifactId>
7
7
  <packaging>jar</packaging>
8
- <version>1.0.0</version>
9
- <name>Iudex Core System</name>
8
+ <version>1.1.0</version>
9
+ <name>Iudex HTTP Client 3</name>
10
10
 
11
11
  <parent>
12
12
  <groupId>iudex</groupId>
13
13
  <artifactId>iudex-parent</artifactId>
14
- <version>1.0</version>
14
+ <version>1.1</version>
15
15
  <relativePath>..</relativePath>
16
16
  </parent>
17
17
 
@@ -29,7 +29,7 @@
29
29
  <dependency>
30
30
  <groupId>iudex</groupId>
31
31
  <artifactId>iudex-http</artifactId>
32
- <version>[1.0,1.1)</version>
32
+ <version>[1.1,1.2)</version>
33
33
  </dependency>
34
34
 
35
35
  <dependency>
@@ -21,8 +21,28 @@ require File.join( File.dirname( __FILE__ ), "setup" )
21
21
 
22
22
  require 'iudex-httpclient-3'
23
23
 
24
+ require 'iudex-http-test/helper'
25
+ require 'iudex-http-test/broken_server'
26
+
27
+ require 'thread'
28
+
24
29
  class TestHTTPClient < MiniTest::Unit::TestCase
25
30
  include Iudex
31
+ include Iudex::HTTP
32
+ include Iudex::HTTP::Test
33
+ include Helper
34
+
35
+ import 'java.net.ConnectException'
36
+ import 'java.net.UnknownHostException'
37
+ import 'java.net.SocketTimeoutException'
38
+ import 'java.net.SocketException'
39
+ import 'org.apache.commons.httpclient.NoHttpResponseException'
40
+
41
+ CustomUnit.register
42
+
43
+ def setup
44
+ server # make sure jetty starts, for cosmetic log output
45
+ end
26
46
 
27
47
  def test_config
28
48
 
@@ -43,4 +63,330 @@ class TestHTTPClient < MiniTest::Unit::TestCase
43
63
  mgr.shutdown
44
64
 
45
65
  end
66
+
67
+ def test_200
68
+ with_new_client do |client|
69
+
70
+ with_session_handler( client, "/index" ) do |s,x|
71
+ assert_equal( 200, s.status_code )
72
+ assert_match( /Test Index Page/, s.response_stream.to_io.read )
73
+ end
74
+
75
+ with_session_handler( client, "/atom.xml" ) do |s,x|
76
+ assert_equal( 200, s.status_code )
77
+ body = s.response_stream.to_io.read
78
+ assert_operator( body.length, :>, 10_000 )
79
+ end
80
+
81
+ end
82
+ end
83
+
84
+ def test_correct_type
85
+ with_new_client do |client|
86
+ client.accepted_content_types = ContentTypeSet.new( [ "text/html" ] )
87
+ with_session_handler( client, "/index" ) do |s,x|
88
+ assert_equal( 200, s.status_code )
89
+ assert_nil( x )
90
+ assert_match( /^text\/html/,
91
+ find_header( s.response_headers, 'Content-Type' ) )
92
+ end
93
+ end
94
+ end
95
+
96
+ def test_headers
97
+ req,rsp = nil
98
+ with_new_client do |client|
99
+ with_session_handler( client,
100
+ "/echo/header/Accept?noop=3",
101
+ { 'Accept' => 'text/plain;moo' } ) do |s,x|
102
+ assert_equal( 200, s.status_code )
103
+ assert_equal( 'GET /echo/header/Accept?noop=3',
104
+ find_header( s.request_headers, "Request-Line" ) )
105
+ assert_equal( 'text/plain;moo',
106
+ find_header( s.request_headers, 'Accept' ) )
107
+ assert_equal( 'localhost:19292',
108
+ find_header( s.request_headers, 'Host' ) )
109
+
110
+ assert_match( /^text\/plain/,
111
+ find_header( s.response_headers, 'Content-Type' ) )
112
+ assert_match( /^text\/plain;moo$/, s.response_stream.to_io.read )
113
+ end
114
+ end
115
+ end
116
+
117
+ def test_unknown_host
118
+ with_new_client do |client|
119
+ with_session_handler( client,
120
+ "http://9xa9.a7v6a7lop-9m9q-w12.com" ) do |s,x|
121
+ assert_instance_of( UnknownHostException, x )
122
+ end
123
+ end
124
+ end
125
+
126
+ def test_local_connection_refused
127
+ with_new_client do |client|
128
+ with_session_handler( client,
129
+ "http://localhost:54929/" ) do |s,x|
130
+ assert_instance_of( ConnectException, x )
131
+ end
132
+ end
133
+ end
134
+
135
+ def test_connection_timeout
136
+ bs = BrokenServer.new
137
+ bs.start
138
+
139
+ with_new_client do |client|
140
+ with_session_handler( client,
141
+ "http://localhost:19293/" ) do |s,x|
142
+ assert_instance_of( SocketTimeoutException, x )
143
+ end
144
+ end
145
+ ensure
146
+ bs.stop
147
+ end
148
+
149
+ def test_404
150
+ with_new_client do |client|
151
+ with_session_handler( client, "/not-found" ) do |s,x|
152
+ assert_equal( 404, s.status_code )
153
+ end
154
+ end
155
+ end
156
+
157
+ def test_304
158
+ with_new_client do |client|
159
+ client.accepted_content_types = ContentTypeSet.new( [ "text/html" ] )
160
+ with_session_handler( client, "/304" ) do |s,x|
161
+ assert_equal( 304, s.status_code )
162
+ end
163
+ end
164
+ end
165
+
166
+ def test_timeout
167
+ with_new_client do |client|
168
+ with_session_handler( client, "/index?sleep=1.0" ) do |s,x|
169
+ assert_instance_of( SocketTimeoutException, x )
170
+ end
171
+ end
172
+ sleep 0.65 # FIXME: Account for test server delay. Should be
173
+ # joined instead.
174
+ end
175
+
176
+ def test_redirect
177
+ with_new_client do |client|
178
+ with_session_handler( client, "/" ) do |s,x|
179
+ assert_equal( 200, s.status_code )
180
+ assert_equal( 'http://localhost:19292/index', s.url )
181
+ end
182
+ end
183
+ end
184
+
185
+ def test_redirect_with_query_string
186
+ with_new_client do |client|
187
+ with_session_handler( client, "/redirects/multi/2?sleep=0" ) do |s,x|
188
+ assert_equal( 200, s.status_code )
189
+ assert_equal( 'http://localhost:19292/redirects/multi/1?sleep=0',
190
+ s.url )
191
+ assert_equal( 'GET /redirects/multi/1?sleep=0',
192
+ find_header( s.request_headers, "Request-Line" ) )
193
+ end
194
+ end
195
+ end
196
+
197
+ def test_multi_redirect
198
+ settings = lambda do |mgr|
199
+ mgr.client_params.set_int_parameter( "http.protocol.max-redirects", 7 )
200
+ end
201
+
202
+ with_new_client( settings ) do |client|
203
+ with_session_handler( client, "/redirects/multi/6" ) do |s,x|
204
+ assert_equal( 200, s.status_code )
205
+ assert_nil x
206
+ end
207
+ end
208
+ end
209
+
210
+ def test_unfollowed_301_redirect
211
+ settings = lambda do |mgr|
212
+ mgr.client_params.set_int_parameter( "http.protocol.max-redirects", 0 )
213
+ end
214
+
215
+ with_new_client( settings ) do |client|
216
+ with_session_handler( client, "/301" ) do |s,x|
217
+ assert_nil( x )
218
+ assert_equal( 301, s.status_code )
219
+ assert_match( %r{/index$},
220
+ find_header( s.response_headers, "Location" ) )
221
+ end
222
+ end
223
+ end
224
+
225
+ def test_too_many_redirects
226
+ settings = lambda do |mgr|
227
+ mgr.client_params.set_int_parameter( "http.protocol.max-redirects", 19 )
228
+ end
229
+
230
+ with_new_client( settings ) do |client|
231
+ with_session_handler( client, "/redirects/multi/20" ) do |s,x|
232
+ assert_equal( 302, s.status_code )
233
+ end
234
+ end
235
+ end
236
+
237
+ def test_redirect_timeout
238
+ skip( "Unreliable timeout with redirects, timing dependent" )
239
+ with_new_client() do |client|
240
+ with_session_handler( client, "/redirects/multi/3?sleep=0.40" ) do |s,x|
241
+ assert_instance_of( SocketTimeoutException, x )
242
+ end
243
+ sleep 0.75
244
+ end
245
+ end
246
+
247
+ def test_bad_server_response
248
+ bs = BrokenServer.new
249
+ bs.start
250
+
251
+ sthread = Thread.new do
252
+ bs.accept { |sock| sock.write "FU Stinky\r\n" }
253
+ end
254
+
255
+ #FIXME: SocketTimeoutException on bad HTTP response line?
256
+ with_new_client do |client|
257
+ with_session_handler( client, "http://localhost:19293/" ) do |s,x|
258
+ assert_instance_of( SocketTimeoutException, x )
259
+ end
260
+ end
261
+
262
+ sthread.join
263
+
264
+ ensure
265
+ bs.stop
266
+ end
267
+
268
+ def test_empty_server_response
269
+ bs = BrokenServer.new
270
+ bs.start
271
+
272
+ sthread = Thread.new do
273
+ bs.accept { |sock| sock.close }
274
+ end
275
+
276
+ with_new_client do |client|
277
+ with_session_handler( client, "http://localhost:19293/" ) do |s,x|
278
+ assert( [ NoHttpResponseException, SocketException ].include?( x.class ) )
279
+ #FIXME: One or the other, timing dependent!?
280
+ end
281
+ end
282
+
283
+ sthread.join
284
+
285
+ ensure
286
+ bs.stop
287
+ end
288
+
289
+ def test_abort_when_too_large
290
+ with_new_client do |client|
291
+ with_session_handler( client, "/giant" ) do |s,x|
292
+ assert_nil( x )
293
+ assert_equal( HTTPSession::TOO_LARGE, s.status_code )
294
+ end
295
+ end
296
+ end
297
+
298
+ def test_abort_when_too_large_length
299
+ with_new_client do |client|
300
+ client.max_content_length = 1
301
+ with_session_handler( client, "/atom.xml" ) do |s,x|
302
+ assert_nil( x )
303
+ assert_equal( HTTPSession::TOO_LARGE_LENGTH, s.status_code )
304
+ end
305
+ end
306
+ end
307
+
308
+ def test_abort_when_wrong_type
309
+ with_new_client do |client|
310
+ client.accepted_content_types = ContentTypeSet.new( [ "gold/*" ] )
311
+ with_session_handler( client, "/giant" ) do |s,x|
312
+ assert_nil( x )
313
+ assert_equal( HTTPSession::NOT_ACCEPTED, s.status_code )
314
+ end
315
+ end
316
+ end
317
+
318
+ def with_session_handler( client, uri, headers = {}, &block )
319
+ session = client.create_session
320
+ uri = "http://localhost:#{server.port}#{uri}" unless uri =~ /^http:/
321
+ session.url = uri
322
+ headers.each do |k,v|
323
+ session.add_request_header( Java::iudex.http.Header.new( k, v ) )
324
+ end
325
+
326
+ handler = TestHandler.new( &block )
327
+ client.request( session, handler )
328
+
329
+ assert( handler.called?, "Handler should have been called!" )
330
+ session.close
331
+ session
332
+ end
333
+
334
+ def with_new_client( mgr_proc = nil )
335
+ # Default manager config
336
+ mgr = HTTPClient3.create_manager
337
+ mgr.client_params.set_parameter(
338
+ RJack::HTTPClient3::HttpMethodParams::RETRY_HANDLER,
339
+ RJack::HTTPClient3::DefaultHttpMethodRetryHandler.new( 0, false ) )
340
+ mgr.client_params.connection_manager_timeout = 500 #ms
341
+ mgr.client_params.so_timeout = 500 #ms
342
+
343
+ # For testing redirects
344
+ mgr.client_params.set_int_parameter( "http.protocol.max-redirects", 20 )
345
+
346
+ # Overrides via proc
347
+ mgr_proc.call( mgr ) if mgr_proc
348
+
349
+ mgr.start
350
+ begin
351
+ yield HTTPClient3::HTTPClient3.new( mgr.client )
352
+ ensure
353
+ mgr.shutdown
354
+ end
355
+ end
356
+
357
+ class TestHandler < BaseResponseHandler
358
+
359
+ def initialize( &block )
360
+ @block = block
361
+ @failure = nil
362
+ end
363
+
364
+ def sessionCompleted( session )
365
+ forward( session, session.error )
366
+ end
367
+
368
+ def called?
369
+ raise @failure if @failure
370
+ @block.nil?
371
+ end
372
+
373
+ def forward( s, x = nil )
374
+ b, @block = @block, nil
375
+ if b
376
+ b.call( s, x )
377
+ else
378
+ flunk "Handler called twice!"
379
+ end
380
+ rescue NativeException => x
381
+ @failure = x.cause
382
+ rescue Exception => x
383
+ @failure = x
384
+ end
385
+
386
+ end
387
+
388
+ def find_header( headers, name )
389
+ cl = headers.find { |h| h.name.to_s == name }
390
+ cl && cl.value.to_s
391
+ end
46
392
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: iudex-httpclient-3
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.0
5
+ version: 1.1.0
6
6
  platform: java
7
7
  authors:
8
8
  - David Kellum
@@ -10,8 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-04 00:00:00 -07:00
14
- default_executable:
13
+ date: 2011-11-13 00:00:00 Z
15
14
  dependencies:
16
15
  - !ruby/object:Gem::Dependency
17
16
  name: iudex-http
@@ -21,7 +20,7 @@ dependencies:
21
20
  requirements:
22
21
  - - ~>
23
22
  - !ruby/object:Gem::Version
24
- version: 1.0.0
23
+ version: 1.1.0
25
24
  type: :runtime
26
25
  version_requirements: *id001
27
26
  - !ruby/object:Gem::Dependency
@@ -52,36 +51,44 @@ dependencies:
52
51
  requirement: &id004 !ruby/object:Gem::Requirement
53
52
  none: false
54
53
  requirements:
55
- - - ">="
56
- - !ruby/object:Gem::Version
57
- version: 1.7.1
58
- - - <
54
+ - - ~>
59
55
  - !ruby/object:Gem::Version
60
- version: "2.1"
56
+ version: "2.3"
61
57
  type: :development
62
58
  version_requirements: *id004
63
59
  - !ruby/object:Gem::Dependency
64
- name: rjack-logback
60
+ name: iudex-http-test
65
61
  prerelease: false
66
62
  requirement: &id005 !ruby/object:Gem::Requirement
67
63
  none: false
68
64
  requirements:
69
65
  - - ~>
70
66
  - !ruby/object:Gem::Version
71
- version: "1.0"
67
+ version: 1.1.0
72
68
  type: :development
73
69
  version_requirements: *id005
74
70
  - !ruby/object:Gem::Dependency
75
- name: rjack-tarpit
71
+ name: rjack-logback
76
72
  prerelease: false
77
73
  requirement: &id006 !ruby/object:Gem::Requirement
78
74
  none: false
79
75
  requirements:
80
76
  - - ~>
81
77
  - !ruby/object:Gem::Version
82
- version: 1.3.0
78
+ version: "1.0"
83
79
  type: :development
84
80
  version_requirements: *id006
81
+ - !ruby/object:Gem::Dependency
82
+ name: rjack-tarpit
83
+ prerelease: false
84
+ requirement: &id007 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ~>
88
+ - !ruby/object:Gem::Version
89
+ version: 1.4.0
90
+ type: :development
91
+ version_requirements: *id007
85
92
  description: |-
86
93
  Iudex is a general purpose web crawler and feed processor in
87
94
  ruby/java. This gem is an rjack-httpclient-3 based implementation of
@@ -106,8 +113,8 @@ files:
106
113
  - lib/iudex-httpclient-3.rb
107
114
  - test/setup.rb
108
115
  - test/test_httpclient.rb
109
- - lib/iudex-httpclient-3/iudex-httpclient-3-1.0.0.jar
110
- has_rdoc: true
116
+ - lib/iudex-httpclient-3/iudex-httpclient-3-1.1.0.jar
117
+ - .gemtest
111
118
  homepage: http://github.com/dekellum/iudex
112
119
  licenses: []
113
120
 
@@ -132,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
139
  requirements: []
133
140
 
134
141
  rubyforge_project: iudex-httpclient-3
135
- rubygems_version: 1.5.1
142
+ rubygems_version: 1.8.9
136
143
  signing_key:
137
144
  specification_version: 3
138
145
  summary: Iudex is a general purpose web crawler and feed processor in ruby/java