iudex-httpclient-3 1.0.0-java → 1.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemtest ADDED
File without changes
data/History.rdoc CHANGED
@@ -1,2 +1,10 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Update to iudex-http ~> 1.1.0 and associated changes:
3
+ ContentTypeSet, buffer in Client, acceptedContentType
4
+ maxContentLength from session.
5
+ * Default max-redirects 0
6
+ * Adopt iudex-http-test based tests and fix failure cases.
7
+ * Update to minitest ~> 2.3
8
+
1
9
  === 1.0.0 (2011-04-04)
2
10
  * Initial release.
data/Manifest.txt CHANGED
@@ -7,4 +7,4 @@ lib/iudex-httpclient-3/base.rb
7
7
  lib/iudex-httpclient-3.rb
8
8
  test/setup.rb
9
9
  test/test_httpclient.rb
10
- lib/iudex-httpclient-3/iudex-httpclient-3-1.0.0.jar
10
+ lib/iudex-httpclient-3/iudex-httpclient-3-1.1.0.jar
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
4
4
  require 'iudex-httpclient-3/base'
5
5
 
6
6
  require 'rubygems'
7
- gem 'rjack-tarpit', '~> 1.2'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
8
  require 'rjack-tarpit'
9
9
 
10
10
  t = RJack::TarPit.new( 'iudex-httpclient-3',
@@ -14,12 +14,13 @@ t = RJack::TarPit.new( 'iudex-httpclient-3',
14
14
  t.specify do |h|
15
15
  h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
16
 
17
- h.extra_deps += [ [ 'iudex-http', '~> 1.0.0' ],
17
+ h.extra_deps += [ [ 'iudex-http', '~> 1.1.0' ],
18
18
  [ 'rjack-httpclient-3', '~> 3.1.3' ],
19
19
  [ 'hooker', '~> 1.0.0' ] ]
20
20
 
21
21
  h.testlib = :minitest
22
- h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
22
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
23
+ [ 'iudex-http-test', '~> 1.1.0' ],
23
24
  [ 'rjack-logback', '~> 1.0' ] ]
24
25
  end
25
26
 
@@ -34,11 +34,14 @@ module Iudex
34
34
 
35
35
  # Sensible defaults:
36
36
  mgr.manager_params.max_total_connections = 100
37
- mgr.manager_params.default_max_connections_per_host = 2
37
+ mgr.manager_params.default_max_connections_per_host = 3
38
38
  mgr.manager_params.stale_checking_enabled = false
39
39
  mgr.client_params.connection_manager_timeout = 3_000 #ms
40
40
  mgr.client_params.so_timeout = 5_000 #ms
41
41
 
42
+ # Default no redirects
43
+ mgr.client_params.set_int_parameter( "http.protocol.max-redirects", 0 )
44
+
42
45
  mgr.client_params.set_parameter(
43
46
  RJack::HTTPClient3::HttpMethodParams::RETRY_HANDLER,
44
47
  RJack::HTTPClient3::DefaultHttpMethodRetryHandler.new( 2, false ) )
@@ -16,7 +16,7 @@
16
16
 
17
17
  module Iudex
18
18
  module HTTPClient3
19
- VERSION = '1.0.0'
19
+ VERSION = '1.1.0'
20
20
 
21
21
  LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
22
  end
data/pom.xml CHANGED
@@ -5,13 +5,13 @@
5
5
  <groupId>iudex</groupId>
6
6
  <artifactId>iudex-httpclient-3</artifactId>
7
7
  <packaging>jar</packaging>
8
- <version>1.0.0</version>
9
- <name>Iudex Core System</name>
8
+ <version>1.1.0</version>
9
+ <name>Iudex HTTP Client 3</name>
10
10
 
11
11
  <parent>
12
12
  <groupId>iudex</groupId>
13
13
  <artifactId>iudex-parent</artifactId>
14
- <version>1.0</version>
14
+ <version>1.1</version>
15
15
  <relativePath>..</relativePath>
16
16
  </parent>
17
17
 
@@ -29,7 +29,7 @@
29
29
  <dependency>
30
30
  <groupId>iudex</groupId>
31
31
  <artifactId>iudex-http</artifactId>
32
- <version>[1.0,1.1)</version>
32
+ <version>[1.1,1.2)</version>
33
33
  </dependency>
34
34
 
35
35
  <dependency>
@@ -21,8 +21,28 @@ require File.join( File.dirname( __FILE__ ), "setup" )
21
21
 
22
22
  require 'iudex-httpclient-3'
23
23
 
24
+ require 'iudex-http-test/helper'
25
+ require 'iudex-http-test/broken_server'
26
+
27
+ require 'thread'
28
+
24
29
  class TestHTTPClient < MiniTest::Unit::TestCase
25
30
  include Iudex
31
+ include Iudex::HTTP
32
+ include Iudex::HTTP::Test
33
+ include Helper
34
+
35
+ import 'java.net.ConnectException'
36
+ import 'java.net.UnknownHostException'
37
+ import 'java.net.SocketTimeoutException'
38
+ import 'java.net.SocketException'
39
+ import 'org.apache.commons.httpclient.NoHttpResponseException'
40
+
41
+ CustomUnit.register
42
+
43
+ def setup
44
+ server # make sure jetty starts, for cosmetic log output
45
+ end
26
46
 
27
47
  def test_config
28
48
 
@@ -43,4 +63,330 @@ class TestHTTPClient < MiniTest::Unit::TestCase
43
63
  mgr.shutdown
44
64
 
45
65
  end
66
+
67
+ def test_200
68
+ with_new_client do |client|
69
+
70
+ with_session_handler( client, "/index" ) do |s,x|
71
+ assert_equal( 200, s.status_code )
72
+ assert_match( /Test Index Page/, s.response_stream.to_io.read )
73
+ end
74
+
75
+ with_session_handler( client, "/atom.xml" ) do |s,x|
76
+ assert_equal( 200, s.status_code )
77
+ body = s.response_stream.to_io.read
78
+ assert_operator( body.length, :>, 10_000 )
79
+ end
80
+
81
+ end
82
+ end
83
+
84
+ def test_correct_type
85
+ with_new_client do |client|
86
+ client.accepted_content_types = ContentTypeSet.new( [ "text/html" ] )
87
+ with_session_handler( client, "/index" ) do |s,x|
88
+ assert_equal( 200, s.status_code )
89
+ assert_nil( x )
90
+ assert_match( /^text\/html/,
91
+ find_header( s.response_headers, 'Content-Type' ) )
92
+ end
93
+ end
94
+ end
95
+
96
+ def test_headers
97
+ req,rsp = nil
98
+ with_new_client do |client|
99
+ with_session_handler( client,
100
+ "/echo/header/Accept?noop=3",
101
+ { 'Accept' => 'text/plain;moo' } ) do |s,x|
102
+ assert_equal( 200, s.status_code )
103
+ assert_equal( 'GET /echo/header/Accept?noop=3',
104
+ find_header( s.request_headers, "Request-Line" ) )
105
+ assert_equal( 'text/plain;moo',
106
+ find_header( s.request_headers, 'Accept' ) )
107
+ assert_equal( 'localhost:19292',
108
+ find_header( s.request_headers, 'Host' ) )
109
+
110
+ assert_match( /^text\/plain/,
111
+ find_header( s.response_headers, 'Content-Type' ) )
112
+ assert_match( /^text\/plain;moo$/, s.response_stream.to_io.read )
113
+ end
114
+ end
115
+ end
116
+
117
+ def test_unknown_host
118
+ with_new_client do |client|
119
+ with_session_handler( client,
120
+ "http://9xa9.a7v6a7lop-9m9q-w12.com" ) do |s,x|
121
+ assert_instance_of( UnknownHostException, x )
122
+ end
123
+ end
124
+ end
125
+
126
+ def test_local_connection_refused
127
+ with_new_client do |client|
128
+ with_session_handler( client,
129
+ "http://localhost:54929/" ) do |s,x|
130
+ assert_instance_of( ConnectException, x )
131
+ end
132
+ end
133
+ end
134
+
135
+ def test_connection_timeout
136
+ bs = BrokenServer.new
137
+ bs.start
138
+
139
+ with_new_client do |client|
140
+ with_session_handler( client,
141
+ "http://localhost:19293/" ) do |s,x|
142
+ assert_instance_of( SocketTimeoutException, x )
143
+ end
144
+ end
145
+ ensure
146
+ bs.stop
147
+ end
148
+
149
+ def test_404
150
+ with_new_client do |client|
151
+ with_session_handler( client, "/not-found" ) do |s,x|
152
+ assert_equal( 404, s.status_code )
153
+ end
154
+ end
155
+ end
156
+
157
+ def test_304
158
+ with_new_client do |client|
159
+ client.accepted_content_types = ContentTypeSet.new( [ "text/html" ] )
160
+ with_session_handler( client, "/304" ) do |s,x|
161
+ assert_equal( 304, s.status_code )
162
+ end
163
+ end
164
+ end
165
+
166
+ def test_timeout
167
+ with_new_client do |client|
168
+ with_session_handler( client, "/index?sleep=1.0" ) do |s,x|
169
+ assert_instance_of( SocketTimeoutException, x )
170
+ end
171
+ end
172
+ sleep 0.65 # FIXME: Account for test server delay. Should be
173
+ # joined instead.
174
+ end
175
+
176
+ def test_redirect
177
+ with_new_client do |client|
178
+ with_session_handler( client, "/" ) do |s,x|
179
+ assert_equal( 200, s.status_code )
180
+ assert_equal( 'http://localhost:19292/index', s.url )
181
+ end
182
+ end
183
+ end
184
+
185
+ def test_redirect_with_query_string
186
+ with_new_client do |client|
187
+ with_session_handler( client, "/redirects/multi/2?sleep=0" ) do |s,x|
188
+ assert_equal( 200, s.status_code )
189
+ assert_equal( 'http://localhost:19292/redirects/multi/1?sleep=0',
190
+ s.url )
191
+ assert_equal( 'GET /redirects/multi/1?sleep=0',
192
+ find_header( s.request_headers, "Request-Line" ) )
193
+ end
194
+ end
195
+ end
196
+
197
+ def test_multi_redirect
198
+ settings = lambda do |mgr|
199
+ mgr.client_params.set_int_parameter( "http.protocol.max-redirects", 7 )
200
+ end
201
+
202
+ with_new_client( settings ) do |client|
203
+ with_session_handler( client, "/redirects/multi/6" ) do |s,x|
204
+ assert_equal( 200, s.status_code )
205
+ assert_nil x
206
+ end
207
+ end
208
+ end
209
+
210
+ def test_unfollowed_301_redirect
211
+ settings = lambda do |mgr|
212
+ mgr.client_params.set_int_parameter( "http.protocol.max-redirects", 0 )
213
+ end
214
+
215
+ with_new_client( settings ) do |client|
216
+ with_session_handler( client, "/301" ) do |s,x|
217
+ assert_nil( x )
218
+ assert_equal( 301, s.status_code )
219
+ assert_match( %r{/index$},
220
+ find_header( s.response_headers, "Location" ) )
221
+ end
222
+ end
223
+ end
224
+
225
+ def test_too_many_redirects
226
+ settings = lambda do |mgr|
227
+ mgr.client_params.set_int_parameter( "http.protocol.max-redirects", 19 )
228
+ end
229
+
230
+ with_new_client( settings ) do |client|
231
+ with_session_handler( client, "/redirects/multi/20" ) do |s,x|
232
+ assert_equal( 302, s.status_code )
233
+ end
234
+ end
235
+ end
236
+
237
+ def test_redirect_timeout
238
+ skip( "Unreliable timeout with redirects, timing dependent" )
239
+ with_new_client() do |client|
240
+ with_session_handler( client, "/redirects/multi/3?sleep=0.40" ) do |s,x|
241
+ assert_instance_of( SocketTimeoutException, x )
242
+ end
243
+ sleep 0.75
244
+ end
245
+ end
246
+
247
+ def test_bad_server_response
248
+ bs = BrokenServer.new
249
+ bs.start
250
+
251
+ sthread = Thread.new do
252
+ bs.accept { |sock| sock.write "FU Stinky\r\n" }
253
+ end
254
+
255
+ #FIXME: SocketTimeoutException on bad HTTP response line?
256
+ with_new_client do |client|
257
+ with_session_handler( client, "http://localhost:19293/" ) do |s,x|
258
+ assert_instance_of( SocketTimeoutException, x )
259
+ end
260
+ end
261
+
262
+ sthread.join
263
+
264
+ ensure
265
+ bs.stop
266
+ end
267
+
268
+ def test_empty_server_response
269
+ bs = BrokenServer.new
270
+ bs.start
271
+
272
+ sthread = Thread.new do
273
+ bs.accept { |sock| sock.close }
274
+ end
275
+
276
+ with_new_client do |client|
277
+ with_session_handler( client, "http://localhost:19293/" ) do |s,x|
278
+ assert( [ NoHttpResponseException, SocketException ].include?( x.class ) )
279
+ #FIXME: One or the other, timing dependent!?
280
+ end
281
+ end
282
+
283
+ sthread.join
284
+
285
+ ensure
286
+ bs.stop
287
+ end
288
+
289
+ def test_abort_when_too_large
290
+ with_new_client do |client|
291
+ with_session_handler( client, "/giant" ) do |s,x|
292
+ assert_nil( x )
293
+ assert_equal( HTTPSession::TOO_LARGE, s.status_code )
294
+ end
295
+ end
296
+ end
297
+
298
+ def test_abort_when_too_large_length
299
+ with_new_client do |client|
300
+ client.max_content_length = 1
301
+ with_session_handler( client, "/atom.xml" ) do |s,x|
302
+ assert_nil( x )
303
+ assert_equal( HTTPSession::TOO_LARGE_LENGTH, s.status_code )
304
+ end
305
+ end
306
+ end
307
+
308
+ def test_abort_when_wrong_type
309
+ with_new_client do |client|
310
+ client.accepted_content_types = ContentTypeSet.new( [ "gold/*" ] )
311
+ with_session_handler( client, "/giant" ) do |s,x|
312
+ assert_nil( x )
313
+ assert_equal( HTTPSession::NOT_ACCEPTED, s.status_code )
314
+ end
315
+ end
316
+ end
317
+
318
+ def with_session_handler( client, uri, headers = {}, &block )
319
+ session = client.create_session
320
+ uri = "http://localhost:#{server.port}#{uri}" unless uri =~ /^http:/
321
+ session.url = uri
322
+ headers.each do |k,v|
323
+ session.add_request_header( Java::iudex.http.Header.new( k, v ) )
324
+ end
325
+
326
+ handler = TestHandler.new( &block )
327
+ client.request( session, handler )
328
+
329
+ assert( handler.called?, "Handler should have been called!" )
330
+ session.close
331
+ session
332
+ end
333
+
334
+ def with_new_client( mgr_proc = nil )
335
+ # Default manager config
336
+ mgr = HTTPClient3.create_manager
337
+ mgr.client_params.set_parameter(
338
+ RJack::HTTPClient3::HttpMethodParams::RETRY_HANDLER,
339
+ RJack::HTTPClient3::DefaultHttpMethodRetryHandler.new( 0, false ) )
340
+ mgr.client_params.connection_manager_timeout = 500 #ms
341
+ mgr.client_params.so_timeout = 500 #ms
342
+
343
+ # For testing redirects
344
+ mgr.client_params.set_int_parameter( "http.protocol.max-redirects", 20 )
345
+
346
+ # Overrides via proc
347
+ mgr_proc.call( mgr ) if mgr_proc
348
+
349
+ mgr.start
350
+ begin
351
+ yield HTTPClient3::HTTPClient3.new( mgr.client )
352
+ ensure
353
+ mgr.shutdown
354
+ end
355
+ end
356
+
357
+ class TestHandler < BaseResponseHandler
358
+
359
+ def initialize( &block )
360
+ @block = block
361
+ @failure = nil
362
+ end
363
+
364
+ def sessionCompleted( session )
365
+ forward( session, session.error )
366
+ end
367
+
368
+ def called?
369
+ raise @failure if @failure
370
+ @block.nil?
371
+ end
372
+
373
+ def forward( s, x = nil )
374
+ b, @block = @block, nil
375
+ if b
376
+ b.call( s, x )
377
+ else
378
+ flunk "Handler called twice!"
379
+ end
380
+ rescue NativeException => x
381
+ @failure = x.cause
382
+ rescue Exception => x
383
+ @failure = x
384
+ end
385
+
386
+ end
387
+
388
+ def find_header( headers, name )
389
+ cl = headers.find { |h| h.name.to_s == name }
390
+ cl && cl.value.to_s
391
+ end
46
392
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: iudex-httpclient-3
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.0
5
+ version: 1.1.0
6
6
  platform: java
7
7
  authors:
8
8
  - David Kellum
@@ -10,8 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-04 00:00:00 -07:00
14
- default_executable:
13
+ date: 2011-11-13 00:00:00 Z
15
14
  dependencies:
16
15
  - !ruby/object:Gem::Dependency
17
16
  name: iudex-http
@@ -21,7 +20,7 @@ dependencies:
21
20
  requirements:
22
21
  - - ~>
23
22
  - !ruby/object:Gem::Version
24
- version: 1.0.0
23
+ version: 1.1.0
25
24
  type: :runtime
26
25
  version_requirements: *id001
27
26
  - !ruby/object:Gem::Dependency
@@ -52,36 +51,44 @@ dependencies:
52
51
  requirement: &id004 !ruby/object:Gem::Requirement
53
52
  none: false
54
53
  requirements:
55
- - - ">="
56
- - !ruby/object:Gem::Version
57
- version: 1.7.1
58
- - - <
54
+ - - ~>
59
55
  - !ruby/object:Gem::Version
60
- version: "2.1"
56
+ version: "2.3"
61
57
  type: :development
62
58
  version_requirements: *id004
63
59
  - !ruby/object:Gem::Dependency
64
- name: rjack-logback
60
+ name: iudex-http-test
65
61
  prerelease: false
66
62
  requirement: &id005 !ruby/object:Gem::Requirement
67
63
  none: false
68
64
  requirements:
69
65
  - - ~>
70
66
  - !ruby/object:Gem::Version
71
- version: "1.0"
67
+ version: 1.1.0
72
68
  type: :development
73
69
  version_requirements: *id005
74
70
  - !ruby/object:Gem::Dependency
75
- name: rjack-tarpit
71
+ name: rjack-logback
76
72
  prerelease: false
77
73
  requirement: &id006 !ruby/object:Gem::Requirement
78
74
  none: false
79
75
  requirements:
80
76
  - - ~>
81
77
  - !ruby/object:Gem::Version
82
- version: 1.3.0
78
+ version: "1.0"
83
79
  type: :development
84
80
  version_requirements: *id006
81
+ - !ruby/object:Gem::Dependency
82
+ name: rjack-tarpit
83
+ prerelease: false
84
+ requirement: &id007 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ~>
88
+ - !ruby/object:Gem::Version
89
+ version: 1.4.0
90
+ type: :development
91
+ version_requirements: *id007
85
92
  description: |-
86
93
  Iudex is a general purpose web crawler and feed processor in
87
94
  ruby/java. This gem is an rjack-httpclient-3 based implementation of
@@ -106,8 +113,8 @@ files:
106
113
  - lib/iudex-httpclient-3.rb
107
114
  - test/setup.rb
108
115
  - test/test_httpclient.rb
109
- - lib/iudex-httpclient-3/iudex-httpclient-3-1.0.0.jar
110
- has_rdoc: true
116
+ - lib/iudex-httpclient-3/iudex-httpclient-3-1.1.0.jar
117
+ - .gemtest
111
118
  homepage: http://github.com/dekellum/iudex
112
119
  licenses: []
113
120
 
@@ -132,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
139
  requirements: []
133
140
 
134
141
  rubyforge_project: iudex-httpclient-3
135
- rubygems_version: 1.5.1
142
+ rubygems_version: 1.8.9
136
143
  signing_key:
137
144
  specification_version: 3
138
145
  summary: Iudex is a general purpose web crawler and feed processor in ruby/java