iudex-worker 1.0.0-java → 1.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
data/History.rdoc CHANGED
@@ -1,2 +1,14 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Update to iudex-core, -da, -rome, -html, -simhash ~> 1.1.0
3
+ * Changes for VisitManager, VisitCounter, RedirectHandler, Revisitor
4
+ * Use ContentTypeSet in ContentFetcher
5
+ * Generalize Agent to for all three HTTP clients; all are now
6
+ optional/dev dependencies
7
+ * Add iudex-char-detector ~> 1.1.0 dep and use CharDetectFilter in
8
+ FCF.page_receiver
9
+ * Improved setup error logging in Agent
10
+ * Enable :main listeners in filter_chain_factory
11
+ * Update to minitest ~> 2.3
12
+
1
13
  === 1.0.0 (2011-04-04)
2
14
  * Initial release.
data/Manifest.txt CHANGED
@@ -4,6 +4,8 @@ README.rdoc
4
4
  Rakefile
5
5
  bin/iudex-worker-fg
6
6
  config/config.rb
7
+ config/config_async_http.rb
8
+ config/config_jetty_http.rb
7
9
  init/iudex-worker
8
10
  lib/iudex-worker/base.rb
9
11
  lib/iudex-worker.rb
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
4
4
  require 'iudex-worker/base'
5
5
 
6
6
  require 'rubygems'
7
- gem 'rjack-tarpit', '~> 1.2'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
8
  require 'rjack-tarpit'
9
9
 
10
10
  t = RJack::TarPit.new( 'iudex-worker', Iudex::Worker::VERSION, :java_platform )
@@ -12,16 +12,19 @@ t = RJack::TarPit.new( 'iudex-worker', Iudex::Worker::VERSION, :java_platform )
12
12
  t.specify do |h|
13
13
  h.developer( "David Kellum", "dek-oss@gravitext.com" )
14
14
 
15
- h.extra_deps += [ [ 'iudex-core', '~> 1.0.0' ],
15
+ h.extra_deps += [ [ 'iudex-core', '~> 1.1.0' ],
16
16
  [ 'rjack-logback', '~> 1.0' ],
17
- [ 'iudex-da', '~> 1.0.0' ],
18
- [ 'iudex-rome', '~> 1.0.0' ],
19
- [ 'iudex-html', '~> 1.0.0' ],
20
- [ 'iudex-simhash', '~> 1.0.0' ],
21
- [ 'iudex-httpclient-3', '~> 1.0.0' ] ]
17
+ [ 'iudex-da', '~> 1.1.0' ],
18
+ [ 'iudex-rome', '~> 1.1.0' ],
19
+ [ 'iudex-html', '~> 1.1.0' ],
20
+ [ 'iudex-simhash', '~> 1.1.0' ],
21
+ [ 'iudex-char-detector', '~> 1.1.0' ] ]
22
22
 
23
23
  h.testlib = :minitest
24
- h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ] ]
24
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
25
+ [ 'iudex-httpclient-3', '~> 1.1.0' ],
26
+ [ 'iudex-jetty-httpclient', '~> 1.1.0' ],
27
+ [ 'iudex-async-httpclient', '~> 1.1.0' ] ]
25
28
  end
26
29
 
27
30
  task :chk_hist_vers do
data/bin/iudex-worker-fg CHANGED
@@ -35,11 +35,11 @@ module IudexBinScript
35
35
  Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
36
36
 
37
37
  OptionParser.new do |opts|
38
- opts.on( "-v", "--version", "Display version" ) do |file|
38
+ opts.on( "-v", "--version", "Display version" ) do
39
39
  puts "iudex-worker: #{ Worker::VERSION }"
40
40
  exit 1
41
41
  end
42
- opts.on( "-d", "--debug", "Enable verbose DEBUG logging" ) do |file|
42
+ opts.on( "-d", "--debug", "Enable verbose DEBUG logging" ) do
43
43
  Logback[ 'iudex' ].level = Logback::DEBUG
44
44
  end
45
45
  Hooker.register_config( opts )
data/config/config.rb CHANGED
@@ -7,7 +7,8 @@ Iudex.configure do |c|
7
7
  threads = 3
8
8
 
9
9
  c.setup_connect_props do
10
- { :ds_pool => { :max_active => threads / 3 * 2,
10
+ { :database => 'iudex_test',
11
+ :ds_pool => { :max_active => threads / 3 * 2,
11
12
  :max_idle => threads / 3 },
12
13
  :loglevel => 1 }
13
14
  end
@@ -16,9 +17,15 @@ Iudex.configure do |c|
16
17
  mgr.manager_params.max_total_connections = threads * 10
17
18
  end
18
19
 
19
- c.setup_visit_executor do |vx|
20
+ c.setup_visit_manager do |vx|
20
21
  vx.max_threads = threads
21
- vx.min_host_delay = 100 #ms
22
+ end
23
+
24
+ c.setup_visit_queue do |q|
25
+ q.default_min_host_delay = 100 #ms
26
+ q.default_max_access_per_host = 1
27
+
28
+ q.configure_host( "gravitext.com", 100, 2 ) # 100ms, 2 connections
22
29
  end
23
30
 
24
31
  c.setup_work_poller do |wp|
@@ -30,15 +37,15 @@ Iudex.configure do |c|
30
37
  c.setup_filter_factory do |ff|
31
38
 
32
39
  def ff.barc_writer
33
- bw = super
34
- bw.do_compress = false
35
- bw
40
+ super.tap do |w|
41
+ w.do_compress = false
42
+ end
36
43
  end
37
44
 
38
45
  def ff.barc_directory
39
- bdir = super
40
- bdir.target_length = 2 * ( 1024 ** 2 )
41
- bdir
46
+ super.tap do |bdir|
47
+ bdir.target_length = 2 * ( 1024 ** 2 )
48
+ end
42
49
  end
43
50
 
44
51
  end
@@ -0,0 +1,13 @@
1
+ require 'iudex-async-httpclient'
2
+
3
+ Iudex.configure do |c|
4
+
5
+ c.setup_async_httpclient do
6
+ { :connection_timeout_in_ms => 5_000,
7
+ :request_timeout_in_ms => 10_000,
8
+ :idle_connection_timeout_in_ms => 6_000,
9
+ :maximum_connections_total => 200,
10
+ :maximum_connections_per_host => 5 }
11
+ end
12
+
13
+ end
@@ -0,0 +1,11 @@
1
+ require 'iudex-jetty-httpclient'
2
+
3
+ Iudex.configure do |c|
4
+
5
+ c.setup_jetty_httpclient do
6
+ { :timeout => 20_000,
7
+ :max_connections_per_address => 2,
8
+ :max_queue_size_per_address => 20 }
9
+ end
10
+
11
+ end
data/init/iudex-worker CHANGED
@@ -24,7 +24,7 @@
24
24
 
25
25
  require 'rubygems'
26
26
 
27
- gem( "iudex-worker", "= 1.0.0" )
27
+ gem( "iudex-worker", "= 1.1.0" )
28
28
 
29
29
  module IudexInitScript
30
30
 
@@ -18,8 +18,6 @@ require 'iudex-da'
18
18
  require 'iudex-da/key_helper'
19
19
  require 'iudex-da/pool_data_source_factory'
20
20
 
21
- require 'iudex-httpclient-3'
22
-
23
21
  require 'iudex-worker'
24
22
  require 'iudex-worker/filter_chain_factory'
25
23
 
@@ -36,6 +34,8 @@ module Iudex
36
34
  include Gravitext::HTMap
37
35
 
38
36
  def initialize
37
+ @log = RJack::SLF4J[ self.class ]
38
+ @http_manager = nil
39
39
  Hooker.apply( [ :iudex, :worker ], self )
40
40
  end
41
41
 
@@ -48,28 +48,66 @@ module Iudex
48
48
  FilterChainFactory.new( 'agent' )
49
49
  end
50
50
 
51
+ def http_client( executor )
52
+ if defined?( JettyHTTPClient )
53
+ @log.info "Setting up JettyHTTPClient"
54
+ JettyHTTPClient.create_client.tap do |c|
55
+ c.executor = executor
56
+ c.start
57
+ end
58
+ elsif defined?( AsyncHTTPClient )
59
+ @log.info "Setting up AsyncHTTPClient"
60
+ AsyncHTTPClient.create_client( :executor_service => executor )
61
+ else
62
+ gem 'iudex-httpclient-3', '~> 1.1.0'
63
+ require 'iudex-httpclient-3'
64
+ @log.info "Setting up HTTPClient3"
65
+ @http_manager = HTTPClient3.create_manager
66
+ @http_manager.start
67
+ HTTPClient3::HTTPClient3.new( @http_manager.client )
68
+ end
69
+ end
70
+
71
+ def visit_manager( wpoller )
72
+ vexec = VisitManager.new( wpoller )
73
+ Hooker.apply( [ :iudex, :visit_manager ], vexec )
74
+ end
75
+
76
+ def work_poller( data_source )
77
+ cmapper = ContentMapper.new( keys( poll_keys ) )
78
+ wpoller = WorkPoller.new( data_source, cmapper )
79
+
80
+ visit_q = Hooker.apply( [ :iudex, :visit_queue ], VisitQueue.new )
81
+
82
+ wpoller.visit_queue_factory = VisitQueueFactory.new( visit_q )
83
+
84
+ Hooker.apply( [ :iudex, :work_poller ], wpoller )
85
+ end
86
+
51
87
  def run
52
88
  Hooker.with( :iudex ) do
53
89
  dsf = PoolDataSourceFactory.new
54
90
  data_source = dsf.create
55
91
 
56
- cmapper = ContentMapper.new( keys( poll_keys ) )
57
- wpoller = WorkPoller.new( data_source, cmapper )
58
- Hooker.apply( :work_poller, wpoller )
92
+ wpoller = work_poller( data_source )
93
+ vexec = visit_manager( wpoller )
94
+ vexec.start_executor
59
95
 
60
- mgr = HTTPClient3.create_manager
61
- mgr.start
62
- http_client = HTTPClient3::HTTPClient3.new( mgr.client )
96
+ hclient = http_client( vexec.executor )
63
97
 
64
98
  fcf = filter_chain_factory
65
- fcf.http_client = http_client
99
+ fcf.http_client = hclient
66
100
  fcf.data_source = data_source
101
+ fcf.visit_counter = vexec
102
+
103
+ # FilterChain's executor is the same executor, unless using
104
+ # HTTPClient3, where executor is best not used
105
+ fcf.executor = vexec.executor unless @http_manager
67
106
 
68
107
  Hooker.apply( :filter_factory, fcf )
69
108
 
70
109
  fcf.filter do |chain|
71
- vexec = VisitExecutor.new( chain, wpoller )
72
- Hooker.apply( :visit_executor, vexec )
110
+ vexec.filter_chain = chain
73
111
 
74
112
  Hooker.log_not_applied # All hooks should be used by now
75
113
 
@@ -77,10 +115,15 @@ module Iudex
77
115
  vexec.join #Run until interrupted
78
116
  end # fcf closes
79
117
 
80
- mgr.shutdown
118
+ hclient.close if hclient.respond_to?( :close )
119
+ @http_manager.shutdown if @http_manager
120
+
81
121
  dsf.close
82
122
  end
123
+ rescue => e
124
+ @log.error( "On run: ", e )
83
125
  end
126
+
84
127
  end
85
128
 
86
129
  end
@@ -16,6 +16,6 @@
16
16
 
17
17
  module Iudex
18
18
  module Worker
19
- VERSION = '1.0.0'
19
+ VERSION = '1.1.0'
20
20
  end
21
21
  end
@@ -23,11 +23,17 @@ module Iudex
23
23
  include Iudex::HTTP
24
24
  include Iudex::Core::Filters
25
25
 
26
- def create_content_fetcher( accept_types, receiver_sym )
27
- cf = ContentFetcher.new( http_client, create_chain( receiver_sym ) )
26
+ def create_content_fetcher( accept_types, receiver, listener = nil )
27
+ cf = ContentFetcher.new( http_client,
28
+ visit_counter,
29
+ create_chain( receiver, nil, listener ) )
30
+
31
+ cf.executor = executor if executor
28
32
 
29
33
  alist = accept_list( accept_types )
30
- cf.accepted_content_types = alist unless alist.include?( '*/*' )
34
+ unless alist.include?( '*/*' )
35
+ cf.accepted_content_types = ContentTypeSet.new( alist )
36
+ end
31
37
 
32
38
  headers = [ [ 'User-Agent', http_user_agent ],
33
39
  [ 'Accept', accept_header( accept_types ) ] ]
@@ -26,6 +26,8 @@ require 'iudex-da/factory_helper'
26
26
 
27
27
  require 'iudex-rome'
28
28
 
29
+ require 'iudex-char-detector'
30
+
29
31
  require 'iudex-html'
30
32
  require 'iudex-html/factory_helper'
31
33
 
@@ -45,6 +47,7 @@ module Iudex
45
47
  include Iudex::Core
46
48
  include Iudex::Core::Filters
47
49
  include Iudex::ROME
50
+ include Iudex::CharDetector
48
51
 
49
52
  include Iudex::DA::Filters::FactoryHelper
50
53
  include Iudex::HTML::Filters::FactoryHelper
@@ -53,6 +56,8 @@ module Iudex
53
56
 
54
57
  attr_accessor :http_client
55
58
  attr_accessor :data_source
59
+ attr_accessor :visit_counter
60
+ attr_accessor :executor
56
61
 
57
62
  def initialize( name )
58
63
  super
@@ -60,15 +65,14 @@ module Iudex
60
65
  end
61
66
 
62
67
  def setup_reporters
63
- add_summary_reporter
64
- add_by_filter_reporter
68
+ # Use default, preserved for overrides
65
69
  end
66
70
 
67
71
  def filters
68
72
  [ UHashMDCSetter.new,
69
73
  DefaultFilter.new,
70
74
  super,
71
- type_switch ].flatten
75
+ type_switch ]
72
76
  end
73
77
 
74
78
  def listeners
@@ -76,8 +80,8 @@ module Iudex
76
80
  end
77
81
 
78
82
  def type_map
79
- { "FEED" => feed_fetcher,
80
- "PAGE" => page_fetcher }
83
+ { "FEED" => [ feed_fetcher, :main ],
84
+ "PAGE" => [ page_fetcher, :main ] }
81
85
  end
82
86
 
83
87
  def type_switch( tmap = type_map )
@@ -85,15 +89,17 @@ module Iudex
85
89
  end
86
90
 
87
91
  def feed_fetcher
88
- [ create_content_fetcher( feed_mime_types, :feed_receiver ) ]
92
+ [ create_content_fetcher( feed_mime_types, :feed_receiver, :main ) ]
89
93
  end
90
94
 
91
95
  def page_fetcher
92
- [ create_content_fetcher( page_mime_types, :page_receiver ) ]
96
+ [ create_content_fetcher( page_mime_types, :page_receiver, :main ) ]
93
97
  end
94
98
 
95
99
  def feed_receiver
96
- [ RomeFeedParser.new,
100
+ [ RedirectHandler.new,
101
+ Revisitor.new( visit_counter ),
102
+ RomeFeedParser.new,
97
103
  DefaultFilter.new,
98
104
  DateChangeFilter.new( false ),
99
105
  feed_updater ]
@@ -109,7 +115,7 @@ module Iudex
109
115
  ref_common_cleanup,
110
116
  Prioritizer.new( "feed-ref-new",
111
117
  :constant => 50,
112
- :min_next => 0.0 ) ].flatten
118
+ :min_next => 0.0 ) ]
113
119
  end
114
120
 
115
121
  def feed_ref_update
@@ -118,24 +124,32 @@ module Iudex
118
124
  ref_common_cleanup,
119
125
  Prioritizer.new( "feed-ref-update",
120
126
  :constant => 10,
121
- :min_next => 0.0 ) ].flatten
122
- end
123
-
124
- # Note: *_post is run possibly twice, once for both base content
125
- # map and referer map.
127
+ :min_next => 0.0 ) ]
128
+ end
129
+
130
+ # Filters to apply for feed update.
131
+ #
132
+ # Notes:
133
+ #
134
+ # * This is run possibly twice, for both base content map and
135
+ # referer map if present.
136
+ #
137
+ # * If this is an update then these filters act on a *new* map,
138
+ # thus any changes made here will not be visible after exit
139
+ # from the update_filter.
126
140
  def feed_post
127
141
  [ UHashMDCSetter.new,
128
142
  ref_common_cleanup,
129
143
  Prioritizer.new( "feed-post",
130
144
  :constant => 30,
131
145
  :visiting_now => true ),
132
- last_visit_setter ].flatten
146
+ last_visit_setter ]
133
147
  end
134
148
 
135
149
  def ref_common_cleanup
136
150
  [ ref_html_filters,
137
151
  TextCtrlWSFilter.new( :title.to_k ),
138
- FutureDateFilter.new( :pub_date.to_k ) ].flatten
152
+ FutureDateFilter.new( :pub_date.to_k ) ]
139
153
  end
140
154
 
141
155
  def ref_html_filters
@@ -143,7 +157,7 @@ module Iudex
143
157
  html_clean_filters( :summary ),
144
158
  html_clean_filters( :content ),
145
159
  html_write_filter( :summary ),
146
- html_write_filter( :content ) ].flatten
160
+ html_write_filter( :content ) ]
147
161
  end
148
162
 
149
163
  def feed_update_keys
@@ -151,9 +165,12 @@ module Iudex
151
165
  end
152
166
 
153
167
  def page_receiver
154
- [ html_clean_filters( :source ),
168
+ [ RedirectHandler.new,
169
+ Revisitor.new( visit_counter ),
170
+ CharDetectFilter.new,
171
+ html_clean_filters( :source ),
155
172
  simhash_generator,
156
- page_updater ].flatten
173
+ page_updater ]
157
174
  end
158
175
 
159
176
  def barc_writer
@@ -171,8 +188,16 @@ module Iudex
171
188
  create_update_filter( keys( page_update_keys ), :page_post )
172
189
  end
173
190
 
174
- # Note: *_post is run possibly twice, once for both base content
175
- # map and referer map.
191
+ # Filters to apply during page update
192
+ #
193
+ # Notes:
194
+ #
195
+ # * This is run possibly twice, for both base content map and
196
+ # referer map if present.
197
+ #
198
+ # * If this is an update then these filters act on a *new* map,
199
+ # thus any changes made here will not be visible after exit
200
+ # from the update_filter.
176
201
  def page_post
177
202
  [ UHashMDCSetter.new,
178
203
  barc_writer, # Not run in 302 referer case, since no SOURCE.
@@ -184,7 +209,7 @@ module Iudex
184
209
  end
185
210
 
186
211
  def page_update_keys
187
- [ :uhash, :host, :url, :type,
212
+ [ :uhash, :domain, :url, :type,
188
213
  :ref_pub_date, :pub_date,
189
214
  :priority, :last_visit, :next_visit_after,
190
215
  :status, :etag, :reason, :referer, :referent,
data/test/setup.rb CHANGED
@@ -21,7 +21,7 @@ $LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
21
21
 
22
22
  require 'rubygems'
23
23
  require 'rjack-logback'
24
- RJack::Logback.config_console( :stderr => true )
24
+ RJack::Logback.config_console( :stderr => true, :mdc => "uhash" )
25
25
 
26
26
  require 'minitest/unit'
27
27
  require 'minitest/autorun'
data/test/test_agent.rb CHANGED
@@ -27,6 +27,7 @@ class TestAgent < MiniTest::Unit::TestCase
27
27
 
28
28
  def setup
29
29
  Logback[ 'iudex.worker.FilterChainFactory' ].level = Logback::WARN
30
+ Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
30
31
  end
31
32
 
32
33
  def teardown
@@ -46,11 +47,33 @@ class TestAgent < MiniTest::Unit::TestCase
46
47
  assert_agent
47
48
  end
48
49
 
50
+ def test_agent_with_sample_config_jetty_http
51
+ Hooker.load_file( File.join( File.dirname( __FILE__ ),
52
+ '..', 'config', 'config_jetty_http.rb' ) )
53
+
54
+ assert_agent
55
+ ensure
56
+ # Hack to avoid interference in test of other configs, given
57
+ # require iudex-jetty-httpclient. This only works once, but
58
+ # thats enough for testing.
59
+ Iudex.send( :remove_const, :JettyHTTPClient )
60
+ end
61
+
62
+ def test_agent_with_sample_config_async_http
63
+ Hooker.load_file( File.join( File.dirname( __FILE__ ),
64
+ '..', 'config', 'config_async_http.rb' ) )
65
+
66
+ assert_agent
67
+ ensure
68
+ # Same hack as above.
69
+ Iudex.send( :remove_const, :AsyncHTTPClient )
70
+ end
71
+
49
72
  def assert_agent
50
73
 
51
- # Stub VisitExecutor.start to allow agent.run to return early.
52
- Hooker.add( [ :iudex, :visit_executor ] ) do |vexec|
53
- def vexec.start
74
+ # Stub VisitManager.start to allow agent.run to return early.
75
+ Hooker.add( [ :iudex, :visit_manager ] ) do |vm|
76
+ def vm.start
54
77
  #disable
55
78
  end
56
79
  end
@@ -19,10 +19,6 @@
19
19
 
20
20
  require File.join( File.dirname( __FILE__ ), "setup" )
21
21
 
22
- RJack::Logback.config_console( :stderr => true, :mdc => "uhash" )
23
-
24
- RJack::Logback[ 'iudex' ].level = RJack::Logback::DEBUG
25
-
26
22
  require 'iudex-httpclient-3'
27
23
 
28
24
  require 'iudex-da'
@@ -30,34 +26,70 @@ require 'iudex-da/pool_data_source_factory'
30
26
 
31
27
  require 'iudex-worker'
32
28
  require 'iudex-worker/filter_chain_factory'
29
+ require 'iudex-httpclient-3'
33
30
 
34
31
  class TestFilterChainFactory < MiniTest::Unit::TestCase
35
32
  include Iudex
36
33
  include Gravitext::HTMap
37
34
 
35
+ def setup
36
+ RJack::Logback[ 'iudex' ].level = RJack::Logback::DEBUG
37
+ end
38
+
39
+ def teardown
40
+ RJack::Logback[ 'iudex' ].level = nil
41
+ end
42
+
43
+ import 'iudex.core.VisitCounter'
44
+ class TestVisitCounter
45
+ include VisitCounter
46
+
47
+ attr_reader :released
48
+
49
+ def initialize
50
+ super()
51
+ @released = []
52
+ end
53
+
54
+ def release( acquired, newOrder )
55
+ @released << acquired
56
+ end
57
+ end
58
+
38
59
  def test_filter
39
60
  fcf = Worker::FilterChainFactory.new( "test" )
40
61
 
41
62
  mgr = HTTPClient3.create_manager
42
63
  mgr.start
43
64
  fcf.http_client = HTTPClient3::HTTPClient3.new( mgr.client )
65
+ fcf.visit_counter = counter = TestVisitCounter.new
44
66
 
45
67
  dsf = DA::PoolDataSourceFactory.new
46
68
  fcf.data_source = dsf.create
47
69
 
48
70
  fcf.filter do |chain|
49
71
  # Run twice (assume new the first time, updates the second).
50
- 2.times do
51
- content = UniMap.new
52
- content.url = Core::VisitURL.normalize( "http://gravitext.com/atom.xml" )
53
- content.type = "FEED"
54
- content.priority = 1.0
55
- assert( chain.filter( content ) )
72
+
73
+ order = UniMap.new.tap do |o|
74
+ o.url = Core::VisitURL.normalize( "http://gravitext.com/atom.xml" )
75
+ o.type = "FEED"
76
+ o.priority = 1.0
56
77
  end
78
+
79
+ orders = [ order, order.clone ]
80
+
81
+ orders.each do |o|
82
+ assert( chain.filter( o ) )
83
+ end
84
+
85
+ # Note this only works timing wise because of blocking
86
+ # HTTPClient.
87
+ assert_equal( orders, counter.released )
57
88
  end
58
89
 
59
90
  mgr.shutdown
60
91
  dsf.close
92
+
61
93
  end
62
94
 
63
95
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: iudex-worker
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.0
5
+ version: 1.1.0
6
6
  platform: java
7
7
  authors:
8
8
  - David Kellum
@@ -10,8 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-04 00:00:00 -07:00
14
- default_executable:
13
+ date: 2011-11-13 00:00:00 Z
15
14
  dependencies:
16
15
  - !ruby/object:Gem::Dependency
17
16
  name: iudex-core
@@ -21,7 +20,7 @@ dependencies:
21
20
  requirements:
22
21
  - - ~>
23
22
  - !ruby/object:Gem::Version
24
- version: 1.0.0
23
+ version: 1.1.0
25
24
  type: :runtime
26
25
  version_requirements: *id001
27
26
  - !ruby/object:Gem::Dependency
@@ -43,7 +42,7 @@ dependencies:
43
42
  requirements:
44
43
  - - ~>
45
44
  - !ruby/object:Gem::Version
46
- version: 1.0.0
45
+ version: 1.1.0
47
46
  type: :runtime
48
47
  version_requirements: *id003
49
48
  - !ruby/object:Gem::Dependency
@@ -54,7 +53,7 @@ dependencies:
54
53
  requirements:
55
54
  - - ~>
56
55
  - !ruby/object:Gem::Version
57
- version: 1.0.0
56
+ version: 1.1.0
58
57
  type: :runtime
59
58
  version_requirements: *id004
60
59
  - !ruby/object:Gem::Dependency
@@ -65,7 +64,7 @@ dependencies:
65
64
  requirements:
66
65
  - - ~>
67
66
  - !ruby/object:Gem::Version
68
- version: 1.0.0
67
+ version: 1.1.0
69
68
  type: :runtime
70
69
  version_requirements: *id005
71
70
  - !ruby/object:Gem::Dependency
@@ -76,18 +75,18 @@ dependencies:
76
75
  requirements:
77
76
  - - ~>
78
77
  - !ruby/object:Gem::Version
79
- version: 1.0.0
78
+ version: 1.1.0
80
79
  type: :runtime
81
80
  version_requirements: *id006
82
81
  - !ruby/object:Gem::Dependency
83
- name: iudex-httpclient-3
82
+ name: iudex-char-detector
84
83
  prerelease: false
85
84
  requirement: &id007 !ruby/object:Gem::Requirement
86
85
  none: false
87
86
  requirements:
88
87
  - - ~>
89
88
  - !ruby/object:Gem::Version
90
- version: 1.0.0
89
+ version: 1.1.0
91
90
  type: :runtime
92
91
  version_requirements: *id007
93
92
  - !ruby/object:Gem::Dependency
@@ -96,25 +95,55 @@ dependencies:
96
95
  requirement: &id008 !ruby/object:Gem::Requirement
97
96
  none: false
98
97
  requirements:
99
- - - ">="
100
- - !ruby/object:Gem::Version
101
- version: 1.7.1
102
- - - <
98
+ - - ~>
103
99
  - !ruby/object:Gem::Version
104
- version: "2.1"
100
+ version: "2.3"
105
101
  type: :development
106
102
  version_requirements: *id008
107
103
  - !ruby/object:Gem::Dependency
108
- name: rjack-tarpit
104
+ name: iudex-httpclient-3
109
105
  prerelease: false
110
106
  requirement: &id009 !ruby/object:Gem::Requirement
111
107
  none: false
112
108
  requirements:
113
109
  - - ~>
114
110
  - !ruby/object:Gem::Version
115
- version: 1.3.0
111
+ version: 1.1.0
116
112
  type: :development
117
113
  version_requirements: *id009
114
+ - !ruby/object:Gem::Dependency
115
+ name: iudex-jetty-httpclient
116
+ prerelease: false
117
+ requirement: &id010 !ruby/object:Gem::Requirement
118
+ none: false
119
+ requirements:
120
+ - - ~>
121
+ - !ruby/object:Gem::Version
122
+ version: 1.1.0
123
+ type: :development
124
+ version_requirements: *id010
125
+ - !ruby/object:Gem::Dependency
126
+ name: iudex-async-httpclient
127
+ prerelease: false
128
+ requirement: &id011 !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: 1.1.0
134
+ type: :development
135
+ version_requirements: *id011
136
+ - !ruby/object:Gem::Dependency
137
+ name: rjack-tarpit
138
+ prerelease: false
139
+ requirement: &id012 !ruby/object:Gem::Requirement
140
+ none: false
141
+ requirements:
142
+ - - ~>
143
+ - !ruby/object:Gem::Version
144
+ version: 1.4.0
145
+ type: :development
146
+ version_requirements: *id012
118
147
  description: |-
119
148
  Iudex is a general purpose web crawler and feed processor in
120
149
  ruby/java. The iudex-worker gem provides a worker deamon for feed/page
@@ -136,6 +165,8 @@ files:
136
165
  - Rakefile
137
166
  - bin/iudex-worker-fg
138
167
  - config/config.rb
168
+ - config/config_async_http.rb
169
+ - config/config_jetty_http.rb
139
170
  - init/iudex-worker
140
171
  - lib/iudex-worker/base.rb
141
172
  - lib/iudex-worker.rb
@@ -147,7 +178,7 @@ files:
147
178
  - test/test_agent.rb
148
179
  - test/test_filter_chain_factory.rb
149
180
  - test/test_prioritizer.rb
150
- has_rdoc: true
181
+ - .gemtest
151
182
  homepage: http://github.com/dekellum/iudex
152
183
  licenses: []
153
184
 
@@ -172,7 +203,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
172
203
  requirements: []
173
204
 
174
205
  rubyforge_project: iudex-worker
175
- rubygems_version: 1.5.1
206
+ rubygems_version: 1.8.9
176
207
  signing_key:
177
208
  specification_version: 3
178
209
  summary: Iudex is a general purpose web crawler and feed processor in ruby/java