iudex-worker 1.0.0-java → 1.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemtest ADDED
File without changes
data/History.rdoc CHANGED
@@ -1,2 +1,14 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Update to iudex-core, -da, -rome, -html, -simhash ~> 1.1.0
3
+ * Changes for VisitManager, VisitCounter, RedirectHandler, Revisitor
4
+ * Use ContentTypeSet in ContentFetcher
5
+ * Generalize Agent to for all three HTTP clients; all are now
6
+ optional/dev dependencies
7
+ * Add iudex-char-detector ~> 1.1.0 dep and use CharDetectFilter in
8
+ FCF.page_receiver
9
+ * Improved setup error logging in Agent
10
+ * Enable :main listeners in filter_chain_factory
11
+ * Update to minitest ~> 2.3
12
+
1
13
  === 1.0.0 (2011-04-04)
2
14
  * Initial release.
data/Manifest.txt CHANGED
@@ -4,6 +4,8 @@ README.rdoc
4
4
  Rakefile
5
5
  bin/iudex-worker-fg
6
6
  config/config.rb
7
+ config/config_async_http.rb
8
+ config/config_jetty_http.rb
7
9
  init/iudex-worker
8
10
  lib/iudex-worker/base.rb
9
11
  lib/iudex-worker.rb
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
4
4
  require 'iudex-worker/base'
5
5
 
6
6
  require 'rubygems'
7
- gem 'rjack-tarpit', '~> 1.2'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
8
  require 'rjack-tarpit'
9
9
 
10
10
  t = RJack::TarPit.new( 'iudex-worker', Iudex::Worker::VERSION, :java_platform )
@@ -12,16 +12,19 @@ t = RJack::TarPit.new( 'iudex-worker', Iudex::Worker::VERSION, :java_platform )
12
12
  t.specify do |h|
13
13
  h.developer( "David Kellum", "dek-oss@gravitext.com" )
14
14
 
15
- h.extra_deps += [ [ 'iudex-core', '~> 1.0.0' ],
15
+ h.extra_deps += [ [ 'iudex-core', '~> 1.1.0' ],
16
16
  [ 'rjack-logback', '~> 1.0' ],
17
- [ 'iudex-da', '~> 1.0.0' ],
18
- [ 'iudex-rome', '~> 1.0.0' ],
19
- [ 'iudex-html', '~> 1.0.0' ],
20
- [ 'iudex-simhash', '~> 1.0.0' ],
21
- [ 'iudex-httpclient-3', '~> 1.0.0' ] ]
17
+ [ 'iudex-da', '~> 1.1.0' ],
18
+ [ 'iudex-rome', '~> 1.1.0' ],
19
+ [ 'iudex-html', '~> 1.1.0' ],
20
+ [ 'iudex-simhash', '~> 1.1.0' ],
21
+ [ 'iudex-char-detector', '~> 1.1.0' ] ]
22
22
 
23
23
  h.testlib = :minitest
24
- h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ] ]
24
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
25
+ [ 'iudex-httpclient-3', '~> 1.1.0' ],
26
+ [ 'iudex-jetty-httpclient', '~> 1.1.0' ],
27
+ [ 'iudex-async-httpclient', '~> 1.1.0' ] ]
25
28
  end
26
29
 
27
30
  task :chk_hist_vers do
data/bin/iudex-worker-fg CHANGED
@@ -35,11 +35,11 @@ module IudexBinScript
35
35
  Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
36
36
 
37
37
  OptionParser.new do |opts|
38
- opts.on( "-v", "--version", "Display version" ) do |file|
38
+ opts.on( "-v", "--version", "Display version" ) do
39
39
  puts "iudex-worker: #{ Worker::VERSION }"
40
40
  exit 1
41
41
  end
42
- opts.on( "-d", "--debug", "Enable verbose DEBUG logging" ) do |file|
42
+ opts.on( "-d", "--debug", "Enable verbose DEBUG logging" ) do
43
43
  Logback[ 'iudex' ].level = Logback::DEBUG
44
44
  end
45
45
  Hooker.register_config( opts )
data/config/config.rb CHANGED
@@ -7,7 +7,8 @@ Iudex.configure do |c|
7
7
  threads = 3
8
8
 
9
9
  c.setup_connect_props do
10
- { :ds_pool => { :max_active => threads / 3 * 2,
10
+ { :database => 'iudex_test',
11
+ :ds_pool => { :max_active => threads / 3 * 2,
11
12
  :max_idle => threads / 3 },
12
13
  :loglevel => 1 }
13
14
  end
@@ -16,9 +17,15 @@ Iudex.configure do |c|
16
17
  mgr.manager_params.max_total_connections = threads * 10
17
18
  end
18
19
 
19
- c.setup_visit_executor do |vx|
20
+ c.setup_visit_manager do |vx|
20
21
  vx.max_threads = threads
21
- vx.min_host_delay = 100 #ms
22
+ end
23
+
24
+ c.setup_visit_queue do |q|
25
+ q.default_min_host_delay = 100 #ms
26
+ q.default_max_access_per_host = 1
27
+
28
+ q.configure_host( "gravitext.com", 100, 2 ) # 100ms, 2 connections
22
29
  end
23
30
 
24
31
  c.setup_work_poller do |wp|
@@ -30,15 +37,15 @@ Iudex.configure do |c|
30
37
  c.setup_filter_factory do |ff|
31
38
 
32
39
  def ff.barc_writer
33
- bw = super
34
- bw.do_compress = false
35
- bw
40
+ super.tap do |w|
41
+ w.do_compress = false
42
+ end
36
43
  end
37
44
 
38
45
  def ff.barc_directory
39
- bdir = super
40
- bdir.target_length = 2 * ( 1024 ** 2 )
41
- bdir
46
+ super.tap do |bdir|
47
+ bdir.target_length = 2 * ( 1024 ** 2 )
48
+ end
42
49
  end
43
50
 
44
51
  end
@@ -0,0 +1,13 @@
1
+ require 'iudex-async-httpclient'
2
+
3
+ Iudex.configure do |c|
4
+
5
+ c.setup_async_httpclient do
6
+ { :connection_timeout_in_ms => 5_000,
7
+ :request_timeout_in_ms => 10_000,
8
+ :idle_connection_timeout_in_ms => 6_000,
9
+ :maximum_connections_total => 200,
10
+ :maximum_connections_per_host => 5 }
11
+ end
12
+
13
+ end
@@ -0,0 +1,11 @@
1
+ require 'iudex-jetty-httpclient'
2
+
3
+ Iudex.configure do |c|
4
+
5
+ c.setup_jetty_httpclient do
6
+ { :timeout => 20_000,
7
+ :max_connections_per_address => 2,
8
+ :max_queue_size_per_address => 20 }
9
+ end
10
+
11
+ end
data/init/iudex-worker CHANGED
@@ -24,7 +24,7 @@
24
24
 
25
25
  require 'rubygems'
26
26
 
27
- gem( "iudex-worker", "= 1.0.0" )
27
+ gem( "iudex-worker", "= 1.1.0" )
28
28
 
29
29
  module IudexInitScript
30
30
 
@@ -18,8 +18,6 @@ require 'iudex-da'
18
18
  require 'iudex-da/key_helper'
19
19
  require 'iudex-da/pool_data_source_factory'
20
20
 
21
- require 'iudex-httpclient-3'
22
-
23
21
  require 'iudex-worker'
24
22
  require 'iudex-worker/filter_chain_factory'
25
23
 
@@ -36,6 +34,8 @@ module Iudex
36
34
  include Gravitext::HTMap
37
35
 
38
36
  def initialize
37
+ @log = RJack::SLF4J[ self.class ]
38
+ @http_manager = nil
39
39
  Hooker.apply( [ :iudex, :worker ], self )
40
40
  end
41
41
 
@@ -48,28 +48,66 @@ module Iudex
48
48
  FilterChainFactory.new( 'agent' )
49
49
  end
50
50
 
51
+ def http_client( executor )
52
+ if defined?( JettyHTTPClient )
53
+ @log.info "Setting up JettyHTTPClient"
54
+ JettyHTTPClient.create_client.tap do |c|
55
+ c.executor = executor
56
+ c.start
57
+ end
58
+ elsif defined?( AsyncHTTPClient )
59
+ @log.info "Setting up AsyncHTTPClient"
60
+ AsyncHTTPClient.create_client( :executor_service => executor )
61
+ else
62
+ gem 'iudex-httpclient-3', '~> 1.1.0'
63
+ require 'iudex-httpclient-3'
64
+ @log.info "Setting up HTTPClient3"
65
+ @http_manager = HTTPClient3.create_manager
66
+ @http_manager.start
67
+ HTTPClient3::HTTPClient3.new( @http_manager.client )
68
+ end
69
+ end
70
+
71
+ def visit_manager( wpoller )
72
+ vexec = VisitManager.new( wpoller )
73
+ Hooker.apply( [ :iudex, :visit_manager ], vexec )
74
+ end
75
+
76
+ def work_poller( data_source )
77
+ cmapper = ContentMapper.new( keys( poll_keys ) )
78
+ wpoller = WorkPoller.new( data_source, cmapper )
79
+
80
+ visit_q = Hooker.apply( [ :iudex, :visit_queue ], VisitQueue.new )
81
+
82
+ wpoller.visit_queue_factory = VisitQueueFactory.new( visit_q )
83
+
84
+ Hooker.apply( [ :iudex, :work_poller ], wpoller )
85
+ end
86
+
51
87
  def run
52
88
  Hooker.with( :iudex ) do
53
89
  dsf = PoolDataSourceFactory.new
54
90
  data_source = dsf.create
55
91
 
56
- cmapper = ContentMapper.new( keys( poll_keys ) )
57
- wpoller = WorkPoller.new( data_source, cmapper )
58
- Hooker.apply( :work_poller, wpoller )
92
+ wpoller = work_poller( data_source )
93
+ vexec = visit_manager( wpoller )
94
+ vexec.start_executor
59
95
 
60
- mgr = HTTPClient3.create_manager
61
- mgr.start
62
- http_client = HTTPClient3::HTTPClient3.new( mgr.client )
96
+ hclient = http_client( vexec.executor )
63
97
 
64
98
  fcf = filter_chain_factory
65
- fcf.http_client = http_client
99
+ fcf.http_client = hclient
66
100
  fcf.data_source = data_source
101
+ fcf.visit_counter = vexec
102
+
103
+ # FilterChain's executor is the same executor, unless using
104
+ # HTTPClient3, where executor is best not used
105
+ fcf.executor = vexec.executor unless @http_manager
67
106
 
68
107
  Hooker.apply( :filter_factory, fcf )
69
108
 
70
109
  fcf.filter do |chain|
71
- vexec = VisitExecutor.new( chain, wpoller )
72
- Hooker.apply( :visit_executor, vexec )
110
+ vexec.filter_chain = chain
73
111
 
74
112
  Hooker.log_not_applied # All hooks should be used by now
75
113
 
@@ -77,10 +115,15 @@ module Iudex
77
115
  vexec.join #Run until interrupted
78
116
  end # fcf closes
79
117
 
80
- mgr.shutdown
118
+ hclient.close if hclient.respond_to?( :close )
119
+ @http_manager.shutdown if @http_manager
120
+
81
121
  dsf.close
82
122
  end
123
+ rescue => e
124
+ @log.error( "On run: ", e )
83
125
  end
126
+
84
127
  end
85
128
 
86
129
  end
@@ -16,6 +16,6 @@
16
16
 
17
17
  module Iudex
18
18
  module Worker
19
- VERSION = '1.0.0'
19
+ VERSION = '1.1.0'
20
20
  end
21
21
  end
@@ -23,11 +23,17 @@ module Iudex
23
23
  include Iudex::HTTP
24
24
  include Iudex::Core::Filters
25
25
 
26
- def create_content_fetcher( accept_types, receiver_sym )
27
- cf = ContentFetcher.new( http_client, create_chain( receiver_sym ) )
26
+ def create_content_fetcher( accept_types, receiver, listener = nil )
27
+ cf = ContentFetcher.new( http_client,
28
+ visit_counter,
29
+ create_chain( receiver, nil, listener ) )
30
+
31
+ cf.executor = executor if executor
28
32
 
29
33
  alist = accept_list( accept_types )
30
- cf.accepted_content_types = alist unless alist.include?( '*/*' )
34
+ unless alist.include?( '*/*' )
35
+ cf.accepted_content_types = ContentTypeSet.new( alist )
36
+ end
31
37
 
32
38
  headers = [ [ 'User-Agent', http_user_agent ],
33
39
  [ 'Accept', accept_header( accept_types ) ] ]
@@ -26,6 +26,8 @@ require 'iudex-da/factory_helper'
26
26
 
27
27
  require 'iudex-rome'
28
28
 
29
+ require 'iudex-char-detector'
30
+
29
31
  require 'iudex-html'
30
32
  require 'iudex-html/factory_helper'
31
33
 
@@ -45,6 +47,7 @@ module Iudex
45
47
  include Iudex::Core
46
48
  include Iudex::Core::Filters
47
49
  include Iudex::ROME
50
+ include Iudex::CharDetector
48
51
 
49
52
  include Iudex::DA::Filters::FactoryHelper
50
53
  include Iudex::HTML::Filters::FactoryHelper
@@ -53,6 +56,8 @@ module Iudex
53
56
 
54
57
  attr_accessor :http_client
55
58
  attr_accessor :data_source
59
+ attr_accessor :visit_counter
60
+ attr_accessor :executor
56
61
 
57
62
  def initialize( name )
58
63
  super
@@ -60,15 +65,14 @@ module Iudex
60
65
  end
61
66
 
62
67
  def setup_reporters
63
- add_summary_reporter
64
- add_by_filter_reporter
68
+ # Use default, preserved for overrides
65
69
  end
66
70
 
67
71
  def filters
68
72
  [ UHashMDCSetter.new,
69
73
  DefaultFilter.new,
70
74
  super,
71
- type_switch ].flatten
75
+ type_switch ]
72
76
  end
73
77
 
74
78
  def listeners
@@ -76,8 +80,8 @@ module Iudex
76
80
  end
77
81
 
78
82
  def type_map
79
- { "FEED" => feed_fetcher,
80
- "PAGE" => page_fetcher }
83
+ { "FEED" => [ feed_fetcher, :main ],
84
+ "PAGE" => [ page_fetcher, :main ] }
81
85
  end
82
86
 
83
87
  def type_switch( tmap = type_map )
@@ -85,15 +89,17 @@ module Iudex
85
89
  end
86
90
 
87
91
  def feed_fetcher
88
- [ create_content_fetcher( feed_mime_types, :feed_receiver ) ]
92
+ [ create_content_fetcher( feed_mime_types, :feed_receiver, :main ) ]
89
93
  end
90
94
 
91
95
  def page_fetcher
92
- [ create_content_fetcher( page_mime_types, :page_receiver ) ]
96
+ [ create_content_fetcher( page_mime_types, :page_receiver, :main ) ]
93
97
  end
94
98
 
95
99
  def feed_receiver
96
- [ RomeFeedParser.new,
100
+ [ RedirectHandler.new,
101
+ Revisitor.new( visit_counter ),
102
+ RomeFeedParser.new,
97
103
  DefaultFilter.new,
98
104
  DateChangeFilter.new( false ),
99
105
  feed_updater ]
@@ -109,7 +115,7 @@ module Iudex
109
115
  ref_common_cleanup,
110
116
  Prioritizer.new( "feed-ref-new",
111
117
  :constant => 50,
112
- :min_next => 0.0 ) ].flatten
118
+ :min_next => 0.0 ) ]
113
119
  end
114
120
 
115
121
  def feed_ref_update
@@ -118,24 +124,32 @@ module Iudex
118
124
  ref_common_cleanup,
119
125
  Prioritizer.new( "feed-ref-update",
120
126
  :constant => 10,
121
- :min_next => 0.0 ) ].flatten
122
- end
123
-
124
- # Note: *_post is run possibly twice, once for both base content
125
- # map and referer map.
127
+ :min_next => 0.0 ) ]
128
+ end
129
+
130
+ # Filters to apply for feed update.
131
+ #
132
+ # Notes:
133
+ #
134
+ # * This is run possibly twice, for both base content map and
135
+ # referer map if present.
136
+ #
137
+ # * If this is an update then these filters act on a *new* map,
138
+ # thus any changes made here will not be visible after exit
139
+ # from the update_filter.
126
140
  def feed_post
127
141
  [ UHashMDCSetter.new,
128
142
  ref_common_cleanup,
129
143
  Prioritizer.new( "feed-post",
130
144
  :constant => 30,
131
145
  :visiting_now => true ),
132
- last_visit_setter ].flatten
146
+ last_visit_setter ]
133
147
  end
134
148
 
135
149
  def ref_common_cleanup
136
150
  [ ref_html_filters,
137
151
  TextCtrlWSFilter.new( :title.to_k ),
138
- FutureDateFilter.new( :pub_date.to_k ) ].flatten
152
+ FutureDateFilter.new( :pub_date.to_k ) ]
139
153
  end
140
154
 
141
155
  def ref_html_filters
@@ -143,7 +157,7 @@ module Iudex
143
157
  html_clean_filters( :summary ),
144
158
  html_clean_filters( :content ),
145
159
  html_write_filter( :summary ),
146
- html_write_filter( :content ) ].flatten
160
+ html_write_filter( :content ) ]
147
161
  end
148
162
 
149
163
  def feed_update_keys
@@ -151,9 +165,12 @@ module Iudex
151
165
  end
152
166
 
153
167
  def page_receiver
154
- [ html_clean_filters( :source ),
168
+ [ RedirectHandler.new,
169
+ Revisitor.new( visit_counter ),
170
+ CharDetectFilter.new,
171
+ html_clean_filters( :source ),
155
172
  simhash_generator,
156
- page_updater ].flatten
173
+ page_updater ]
157
174
  end
158
175
 
159
176
  def barc_writer
@@ -171,8 +188,16 @@ module Iudex
171
188
  create_update_filter( keys( page_update_keys ), :page_post )
172
189
  end
173
190
 
174
- # Note: *_post is run possibly twice, once for both base content
175
- # map and referer map.
191
+ # Filters to apply during page update
192
+ #
193
+ # Notes:
194
+ #
195
+ # * This is run possibly twice, for both base content map and
196
+ # referer map if present.
197
+ #
198
+ # * If this is an update then these filters act on a *new* map,
199
+ # thus any changes made here will not be visible after exit
200
+ # from the update_filter.
176
201
  def page_post
177
202
  [ UHashMDCSetter.new,
178
203
  barc_writer, # Not run in 302 referer case, since no SOURCE.
@@ -184,7 +209,7 @@ module Iudex
184
209
  end
185
210
 
186
211
  def page_update_keys
187
- [ :uhash, :host, :url, :type,
212
+ [ :uhash, :domain, :url, :type,
188
213
  :ref_pub_date, :pub_date,
189
214
  :priority, :last_visit, :next_visit_after,
190
215
  :status, :etag, :reason, :referer, :referent,
data/test/setup.rb CHANGED
@@ -21,7 +21,7 @@ $LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
21
21
 
22
22
  require 'rubygems'
23
23
  require 'rjack-logback'
24
- RJack::Logback.config_console( :stderr => true )
24
+ RJack::Logback.config_console( :stderr => true, :mdc => "uhash" )
25
25
 
26
26
  require 'minitest/unit'
27
27
  require 'minitest/autorun'
data/test/test_agent.rb CHANGED
@@ -27,6 +27,7 @@ class TestAgent < MiniTest::Unit::TestCase
27
27
 
28
28
  def setup
29
29
  Logback[ 'iudex.worker.FilterChainFactory' ].level = Logback::WARN
30
+ Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
30
31
  end
31
32
 
32
33
  def teardown
@@ -46,11 +47,33 @@ class TestAgent < MiniTest::Unit::TestCase
46
47
  assert_agent
47
48
  end
48
49
 
50
+ def test_agent_with_sample_config_jetty_http
51
+ Hooker.load_file( File.join( File.dirname( __FILE__ ),
52
+ '..', 'config', 'config_jetty_http.rb' ) )
53
+
54
+ assert_agent
55
+ ensure
56
+ # Hack to avoid interference in test of other configs, given
57
+ # require iudex-jetty-httpclient. This only works once, but
58
+ # thats enough for testing.
59
+ Iudex.send( :remove_const, :JettyHTTPClient )
60
+ end
61
+
62
+ def test_agent_with_sample_config_async_http
63
+ Hooker.load_file( File.join( File.dirname( __FILE__ ),
64
+ '..', 'config', 'config_async_http.rb' ) )
65
+
66
+ assert_agent
67
+ ensure
68
+ # Same hack as above.
69
+ Iudex.send( :remove_const, :AsyncHTTPClient )
70
+ end
71
+
49
72
  def assert_agent
50
73
 
51
- # Stub VisitExecutor.start to allow agent.run to return early.
52
- Hooker.add( [ :iudex, :visit_executor ] ) do |vexec|
53
- def vexec.start
74
+ # Stub VisitManager.start to allow agent.run to return early.
75
+ Hooker.add( [ :iudex, :visit_manager ] ) do |vm|
76
+ def vm.start
54
77
  #disable
55
78
  end
56
79
  end
@@ -19,10 +19,6 @@
19
19
 
20
20
  require File.join( File.dirname( __FILE__ ), "setup" )
21
21
 
22
- RJack::Logback.config_console( :stderr => true, :mdc => "uhash" )
23
-
24
- RJack::Logback[ 'iudex' ].level = RJack::Logback::DEBUG
25
-
26
22
  require 'iudex-httpclient-3'
27
23
 
28
24
  require 'iudex-da'
@@ -30,34 +26,70 @@ require 'iudex-da/pool_data_source_factory'
30
26
 
31
27
  require 'iudex-worker'
32
28
  require 'iudex-worker/filter_chain_factory'
29
+ require 'iudex-httpclient-3'
33
30
 
34
31
  class TestFilterChainFactory < MiniTest::Unit::TestCase
35
32
  include Iudex
36
33
  include Gravitext::HTMap
37
34
 
35
+ def setup
36
+ RJack::Logback[ 'iudex' ].level = RJack::Logback::DEBUG
37
+ end
38
+
39
+ def teardown
40
+ RJack::Logback[ 'iudex' ].level = nil
41
+ end
42
+
43
+ import 'iudex.core.VisitCounter'
44
+ class TestVisitCounter
45
+ include VisitCounter
46
+
47
+ attr_reader :released
48
+
49
+ def initialize
50
+ super()
51
+ @released = []
52
+ end
53
+
54
+ def release( acquired, newOrder )
55
+ @released << acquired
56
+ end
57
+ end
58
+
38
59
  def test_filter
39
60
  fcf = Worker::FilterChainFactory.new( "test" )
40
61
 
41
62
  mgr = HTTPClient3.create_manager
42
63
  mgr.start
43
64
  fcf.http_client = HTTPClient3::HTTPClient3.new( mgr.client )
65
+ fcf.visit_counter = counter = TestVisitCounter.new
44
66
 
45
67
  dsf = DA::PoolDataSourceFactory.new
46
68
  fcf.data_source = dsf.create
47
69
 
48
70
  fcf.filter do |chain|
49
71
  # Run twice (assume new the first time, updates the second).
50
- 2.times do
51
- content = UniMap.new
52
- content.url = Core::VisitURL.normalize( "http://gravitext.com/atom.xml" )
53
- content.type = "FEED"
54
- content.priority = 1.0
55
- assert( chain.filter( content ) )
72
+
73
+ order = UniMap.new.tap do |o|
74
+ o.url = Core::VisitURL.normalize( "http://gravitext.com/atom.xml" )
75
+ o.type = "FEED"
76
+ o.priority = 1.0
56
77
  end
78
+
79
+ orders = [ order, order.clone ]
80
+
81
+ orders.each do |o|
82
+ assert( chain.filter( o ) )
83
+ end
84
+
85
+ # Note this only works timing wise because of blocking
86
+ # HTTPClient.
87
+ assert_equal( orders, counter.released )
57
88
  end
58
89
 
59
90
  mgr.shutdown
60
91
  dsf.close
92
+
61
93
  end
62
94
 
63
95
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: iudex-worker
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.0
5
+ version: 1.1.0
6
6
  platform: java
7
7
  authors:
8
8
  - David Kellum
@@ -10,8 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-04 00:00:00 -07:00
14
- default_executable:
13
+ date: 2011-11-13 00:00:00 Z
15
14
  dependencies:
16
15
  - !ruby/object:Gem::Dependency
17
16
  name: iudex-core
@@ -21,7 +20,7 @@ dependencies:
21
20
  requirements:
22
21
  - - ~>
23
22
  - !ruby/object:Gem::Version
24
- version: 1.0.0
23
+ version: 1.1.0
25
24
  type: :runtime
26
25
  version_requirements: *id001
27
26
  - !ruby/object:Gem::Dependency
@@ -43,7 +42,7 @@ dependencies:
43
42
  requirements:
44
43
  - - ~>
45
44
  - !ruby/object:Gem::Version
46
- version: 1.0.0
45
+ version: 1.1.0
47
46
  type: :runtime
48
47
  version_requirements: *id003
49
48
  - !ruby/object:Gem::Dependency
@@ -54,7 +53,7 @@ dependencies:
54
53
  requirements:
55
54
  - - ~>
56
55
  - !ruby/object:Gem::Version
57
- version: 1.0.0
56
+ version: 1.1.0
58
57
  type: :runtime
59
58
  version_requirements: *id004
60
59
  - !ruby/object:Gem::Dependency
@@ -65,7 +64,7 @@ dependencies:
65
64
  requirements:
66
65
  - - ~>
67
66
  - !ruby/object:Gem::Version
68
- version: 1.0.0
67
+ version: 1.1.0
69
68
  type: :runtime
70
69
  version_requirements: *id005
71
70
  - !ruby/object:Gem::Dependency
@@ -76,18 +75,18 @@ dependencies:
76
75
  requirements:
77
76
  - - ~>
78
77
  - !ruby/object:Gem::Version
79
- version: 1.0.0
78
+ version: 1.1.0
80
79
  type: :runtime
81
80
  version_requirements: *id006
82
81
  - !ruby/object:Gem::Dependency
83
- name: iudex-httpclient-3
82
+ name: iudex-char-detector
84
83
  prerelease: false
85
84
  requirement: &id007 !ruby/object:Gem::Requirement
86
85
  none: false
87
86
  requirements:
88
87
  - - ~>
89
88
  - !ruby/object:Gem::Version
90
- version: 1.0.0
89
+ version: 1.1.0
91
90
  type: :runtime
92
91
  version_requirements: *id007
93
92
  - !ruby/object:Gem::Dependency
@@ -96,25 +95,55 @@ dependencies:
96
95
  requirement: &id008 !ruby/object:Gem::Requirement
97
96
  none: false
98
97
  requirements:
99
- - - ">="
100
- - !ruby/object:Gem::Version
101
- version: 1.7.1
102
- - - <
98
+ - - ~>
103
99
  - !ruby/object:Gem::Version
104
- version: "2.1"
100
+ version: "2.3"
105
101
  type: :development
106
102
  version_requirements: *id008
107
103
  - !ruby/object:Gem::Dependency
108
- name: rjack-tarpit
104
+ name: iudex-httpclient-3
109
105
  prerelease: false
110
106
  requirement: &id009 !ruby/object:Gem::Requirement
111
107
  none: false
112
108
  requirements:
113
109
  - - ~>
114
110
  - !ruby/object:Gem::Version
115
- version: 1.3.0
111
+ version: 1.1.0
116
112
  type: :development
117
113
  version_requirements: *id009
114
+ - !ruby/object:Gem::Dependency
115
+ name: iudex-jetty-httpclient
116
+ prerelease: false
117
+ requirement: &id010 !ruby/object:Gem::Requirement
118
+ none: false
119
+ requirements:
120
+ - - ~>
121
+ - !ruby/object:Gem::Version
122
+ version: 1.1.0
123
+ type: :development
124
+ version_requirements: *id010
125
+ - !ruby/object:Gem::Dependency
126
+ name: iudex-async-httpclient
127
+ prerelease: false
128
+ requirement: &id011 !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: 1.1.0
134
+ type: :development
135
+ version_requirements: *id011
136
+ - !ruby/object:Gem::Dependency
137
+ name: rjack-tarpit
138
+ prerelease: false
139
+ requirement: &id012 !ruby/object:Gem::Requirement
140
+ none: false
141
+ requirements:
142
+ - - ~>
143
+ - !ruby/object:Gem::Version
144
+ version: 1.4.0
145
+ type: :development
146
+ version_requirements: *id012
118
147
  description: |-
119
148
  Iudex is a general purpose web crawler and feed processor in
120
149
  ruby/java. The iudex-worker gem provides a worker deamon for feed/page
@@ -136,6 +165,8 @@ files:
136
165
  - Rakefile
137
166
  - bin/iudex-worker-fg
138
167
  - config/config.rb
168
+ - config/config_async_http.rb
169
+ - config/config_jetty_http.rb
139
170
  - init/iudex-worker
140
171
  - lib/iudex-worker/base.rb
141
172
  - lib/iudex-worker.rb
@@ -147,7 +178,7 @@ files:
147
178
  - test/test_agent.rb
148
179
  - test/test_filter_chain_factory.rb
149
180
  - test/test_prioritizer.rb
150
- has_rdoc: true
181
+ - .gemtest
151
182
  homepage: http://github.com/dekellum/iudex
152
183
  licenses: []
153
184
 
@@ -172,7 +203,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
172
203
  requirements: []
173
204
 
174
205
  rubyforge_project: iudex-worker
175
- rubygems_version: 1.5.1
206
+ rubygems_version: 1.8.9
176
207
  signing_key:
177
208
  specification_version: 3
178
209
  summary: Iudex is a general purpose web crawler and feed processor in ruby/java