iudex-worker 1.3.2-java → 1.4.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.rdoc +14 -0
- data/README.rdoc +1 -1
- data/bin/iudex-worker-fg +1 -1
- data/config/config.rb +4 -0
- data/config/config_jetty_http.rb +6 -3
- data/init/iudex-worker +2 -2
- data/lib/iudex-worker.rb +1 -1
- data/lib/iudex-worker/agent.rb +21 -4
- data/lib/iudex-worker/base.rb +2 -2
- data/lib/iudex-worker/fetch_helper.rb +1 -1
- data/lib/iudex-worker/filter_chain_factory.rb +10 -3
- data/lib/iudex-worker/prioritizer.rb +1 -1
- data/test/setup.rb +1 -1
- data/test/test_agent.rb +15 -5
- data/test/test_filter_chain_factory.rb +1 -1
- data/test/test_prioritizer.rb +1 -1
- metadata +22 -22
data/History.rdoc
CHANGED
@@ -1,3 +1,17 @@
|
|
1
|
+
=== 1.4.0 (2013-10-29)
|
2
|
+
* Add Agent.raise_on_run flag for testing
|
3
|
+
* Ensure executor/VisitManager shutdown on failed Agent.run_safe (for
|
4
|
+
example on filter chain factory errors.) Jetty 9 uses executor
|
5
|
+
threads at startup so this is now important.
|
6
|
+
* FilterChainFactory.last_visit_setter and page/feed keys extended to
|
7
|
+
conditially include handling of reserved, instance fields based on
|
8
|
+
set (by Agent) WorkPoller reserve?, instance properties as configured.
|
9
|
+
* Add config example of WorkPoller do_reserve and instance settings.
|
10
|
+
* Add VisitQueue.max_access_total config eaxample.
|
11
|
+
* Update config_jetty_http.rb sample for client 1.4.0 (jetty 9.x)
|
12
|
+
* Upgrade to iudex-* ~> 1.4.0 dependencies
|
13
|
+
* Upgrade to minitest ~> 4.7.4 (dev)
|
14
|
+
|
1
15
|
=== 1.3.2 (2012-11-8)
|
2
16
|
* Upgrade/narrow to iudex-core, -da, -rome, -html, -simhash ~> 1.3.0
|
3
17
|
* FetchHelper.create_content_fetcher now prefers an options Hash,
|
data/README.rdoc
CHANGED
data/bin/iudex-worker-fg
CHANGED
data/config/config.rb
CHANGED
@@ -22,6 +22,7 @@ Iudex.configure do |c|
|
|
22
22
|
end
|
23
23
|
|
24
24
|
c.setup_visit_queue do |q|
|
25
|
+
q.max_access_total = threads * 8
|
25
26
|
q.config( :rate => 5.0, :cons => 1 )
|
26
27
|
q.config( :domain => "gravitext.com", :rate => 10.0, :cons => 2 )
|
27
28
|
end
|
@@ -30,6 +31,9 @@ Iudex.configure do |c|
|
|
30
31
|
wp.min_order_remaining_ratio = 0.30
|
31
32
|
wp.max_check_interval = 100 #ms
|
32
33
|
wp.min_poll_interval = 2_000 #ms
|
34
|
+
wp.do_reserve = true
|
35
|
+
wp.instance = 'solo'
|
36
|
+
wp.instance_unreserve
|
33
37
|
end
|
34
38
|
|
35
39
|
c.setup_filter_factory do |ff|
|
data/config/config_jetty_http.rb
CHANGED
@@ -3,9 +3,12 @@ require 'iudex-jetty-httpclient'
|
|
3
3
|
Iudex.configure do |c|
|
4
4
|
|
5
5
|
c.setup_jetty_httpclient do
|
6
|
-
{ :timeout
|
7
|
-
:
|
8
|
-
:
|
6
|
+
{ :timeout => 35_000,
|
7
|
+
:connect_timeout => 12_000,
|
8
|
+
:idle_timeout => 20_000,
|
9
|
+
:max_connections_per_destination => 2,
|
10
|
+
:max_requests_queued_per_destination => 20,
|
11
|
+
:follow_redirects => false }
|
9
12
|
end
|
10
13
|
|
11
14
|
end
|
data/init/iudex-worker
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
#. hashdot.vm.options += -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled
|
8
8
|
|
9
9
|
#--
|
10
|
-
# Copyright (c) 2008-
|
10
|
+
# Copyright (c) 2008-2013 David Kellum
|
11
11
|
#
|
12
12
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
13
13
|
# may not use this file except in compliance with the License. You
|
@@ -24,7 +24,7 @@
|
|
24
24
|
|
25
25
|
require 'rubygems'
|
26
26
|
|
27
|
-
gem( "iudex-worker", "= 1.
|
27
|
+
gem( "iudex-worker", "= 1.4.0" )
|
28
28
|
|
29
29
|
module IudexInitScript
|
30
30
|
|
data/lib/iudex-worker.rb
CHANGED
data/lib/iudex-worker/agent.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c) 2008-
|
2
|
+
# Copyright (c) 2008-2013 David Kellum
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
5
|
# may not use this file except in compliance with the License. You
|
@@ -33,9 +33,12 @@ module Iudex
|
|
33
33
|
include Iudex::Worker
|
34
34
|
include Gravitext::HTMap
|
35
35
|
|
36
|
+
attr_accessor :raise_on_run
|
37
|
+
|
36
38
|
def initialize
|
37
39
|
@log = RJack::SLF4J[ self.class ]
|
38
40
|
@http_manager = nil
|
41
|
+
@raise_on_run = false
|
39
42
|
Hooker.apply( [ :iudex, :worker ], self )
|
40
43
|
end
|
41
44
|
|
@@ -57,7 +60,9 @@ module Iudex
|
|
57
60
|
end
|
58
61
|
elsif defined?( AsyncHTTPClient.create_client )
|
59
62
|
@log.info "Setting up AsyncHTTPClient"
|
60
|
-
|
63
|
+
opts = {}
|
64
|
+
opts[ :executor_service ] = executor if executor
|
65
|
+
AsyncHTTPClient.create_client( opts )
|
61
66
|
else
|
62
67
|
gem 'iudex-httpclient-3', '~> 1.2.b'
|
63
68
|
require 'iudex-httpclient-3'
|
@@ -104,6 +109,7 @@ module Iudex
|
|
104
109
|
fcf.http_client = hclient
|
105
110
|
fcf.data_source = data_source
|
106
111
|
fcf.visit_counter = vexec
|
112
|
+
fcf.work_poller = wpoller
|
107
113
|
|
108
114
|
# FilterChain's executor is the same executor, unless using
|
109
115
|
# HTTPClient3, where executor is best not used
|
@@ -117,14 +123,25 @@ module Iudex
|
|
117
123
|
Hooker.log_not_applied # All hooks should be used by now
|
118
124
|
|
119
125
|
vexec.start
|
120
|
-
vexec
|
126
|
+
veref, vexec = vexec, nil
|
127
|
+
veref.join # Run until interrupted
|
121
128
|
end # fcf closes
|
122
129
|
|
123
130
|
rescue => e
|
124
|
-
@
|
131
|
+
if @raise_on_run
|
132
|
+
raise e
|
133
|
+
else
|
134
|
+
@log.error( "On run: ", e )
|
135
|
+
end
|
125
136
|
ensure
|
126
137
|
hclient.close if hclient && hclient.respond_to?( :close )
|
127
138
|
@http_manager.shutdown if @http_manager
|
139
|
+
|
140
|
+
# Jetty 9 for example, uses executor thread on start, so
|
141
|
+
# executor shutdown is required for early termination
|
142
|
+
# (i.e. when fcf.filter raises)
|
143
|
+
vexec.shutdown if vexec
|
144
|
+
|
128
145
|
dsf.close if dsf
|
129
146
|
end
|
130
147
|
|
data/lib/iudex-worker/base.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c) 2008-
|
2
|
+
# Copyright (c) 2008-2013 David Kellum
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
5
|
# may not use this file except in compliance with the License. You
|
@@ -16,6 +16,6 @@
|
|
16
16
|
|
17
17
|
module Iudex
|
18
18
|
module Worker
|
19
|
-
VERSION = '1.
|
19
|
+
VERSION = '1.4.0'
|
20
20
|
end
|
21
21
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (c) 2008-
|
2
|
+
# Copyright (c) 2008-2013 David Kellum
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
5
|
# may not use this file except in compliance with the License. You may
|
@@ -58,6 +58,7 @@ module Iudex
|
|
58
58
|
attr_accessor :data_source
|
59
59
|
attr_accessor :visit_counter
|
60
60
|
attr_accessor :executor
|
61
|
+
attr_accessor :work_poller
|
61
62
|
|
62
63
|
def initialize( name )
|
63
64
|
super
|
@@ -197,14 +198,20 @@ module Iudex
|
|
197
198
|
|
198
199
|
def page_update_keys
|
199
200
|
[ :uhash, :domain, :url, :type,
|
201
|
+
( :reserved if work_poller && work_poller.reserve? ),
|
202
|
+
( :instance if work_poller && work_poller.instance ),
|
200
203
|
:ref_pub_date, :pub_date,
|
201
204
|
:priority, :last_visit, :next_visit_after,
|
202
205
|
:status, :etag, :reason, :referer, :referent,
|
203
|
-
:cache_file, :cache_file_offset, :simhash ]
|
206
|
+
:cache_file, :cache_file_offset, :simhash ].compact
|
204
207
|
end
|
205
208
|
|
206
209
|
def last_visit_setter
|
207
|
-
|
210
|
+
resv = work_poller && work_poller.reserve?
|
211
|
+
inst = work_poller && work_poller.instance
|
212
|
+
[ Copier.new( *keys( :visit_start, :last_visit ) ),
|
213
|
+
( Setter.new( :reserved.to_k, nil ) if resv ),
|
214
|
+
( Setter.new( :instance.to_k, inst ) if inst ) ]
|
208
215
|
end
|
209
216
|
|
210
217
|
end
|
data/test/setup.rb
CHANGED
data/test/test_agent.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
3
3
|
|
4
4
|
#--
|
5
|
-
# Copyright (c) 2008-
|
5
|
+
# Copyright (c) 2008-2013 David Kellum
|
6
6
|
#
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
8
|
# may not use this file except in compliance with the License. You
|
@@ -69,29 +69,39 @@ class TestAgent < MiniTest::Unit::TestCase
|
|
69
69
|
Iudex.send( :remove_const, :AsyncHTTPClient )
|
70
70
|
end
|
71
71
|
|
72
|
+
class TestBadness < StandardError
|
73
|
+
end
|
74
|
+
|
72
75
|
def test_agent_graceful_shutdown_on_fcf_error
|
73
76
|
Hooker.add( [ :iudex, :filter_factory ] ) do |fcf|
|
74
77
|
def fcf.filters
|
75
78
|
super
|
76
|
-
raise "Test Badness in FCF"
|
79
|
+
raise TestBadness, "Test Badness in FCF"
|
77
80
|
end
|
78
81
|
end
|
79
82
|
|
80
83
|
agent = Agent.new
|
81
|
-
agent.
|
82
|
-
|
84
|
+
agent.raise_on_run = true
|
85
|
+
assert_raises( TestBadness ) do
|
86
|
+
agent.run
|
87
|
+
end
|
83
88
|
end
|
84
89
|
|
85
90
|
def assert_agent
|
86
91
|
|
87
|
-
# Stub VisitManager.start to allow agent.run to
|
92
|
+
# Stub VisitManager.start/start_executor to allow agent.run to
|
93
|
+
# return early.
|
88
94
|
Hooker.add( [ :iudex, :visit_manager ] ) do |vm|
|
89
95
|
def vm.start
|
90
96
|
#disable
|
91
97
|
end
|
98
|
+
def vm.start_executor
|
99
|
+
#disable
|
100
|
+
end
|
92
101
|
end
|
93
102
|
|
94
103
|
agent = Agent.new
|
104
|
+
agent.raise_on_run = true
|
95
105
|
agent.run
|
96
106
|
pass
|
97
107
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
3
3
|
|
4
4
|
#--
|
5
|
-
# Copyright (c) 2008-
|
5
|
+
# Copyright (c) 2008-2013 David Kellum
|
6
6
|
#
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
8
|
# may not use this file except in compliance with the License. You
|
data/test/test_prioritizer.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
3
3
|
|
4
4
|
#--
|
5
|
-
# Copyright (c) 2008-
|
5
|
+
# Copyright (c) 2008-2013 David Kellum
|
6
6
|
#
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
8
|
# may not use this file except in compliance with the License. You
|
metadata
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
name: iudex-worker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.
|
5
|
+
version: 1.4.0
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- David Kellum
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-10-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: iudex-core
|
@@ -17,13 +17,13 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - ~>
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 1.
|
20
|
+
version: 1.4.0
|
21
21
|
none: false
|
22
22
|
requirement: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.
|
26
|
+
version: 1.4.0
|
27
27
|
none: false
|
28
28
|
prerelease: false
|
29
29
|
type: :runtime
|
@@ -33,13 +33,13 @@ dependencies:
|
|
33
33
|
requirements:
|
34
34
|
- - ~>
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version: 1.
|
36
|
+
version: 1.4.0
|
37
37
|
none: false
|
38
38
|
requirement: !ruby/object:Gem::Requirement
|
39
39
|
requirements:
|
40
40
|
- - ~>
|
41
41
|
- !ruby/object:Gem::Version
|
42
|
-
version: 1.
|
42
|
+
version: 1.4.0
|
43
43
|
none: false
|
44
44
|
prerelease: false
|
45
45
|
type: :runtime
|
@@ -49,13 +49,13 @@ dependencies:
|
|
49
49
|
requirements:
|
50
50
|
- - ~>
|
51
51
|
- !ruby/object:Gem::Version
|
52
|
-
version: 1.
|
52
|
+
version: 1.4.0
|
53
53
|
none: false
|
54
54
|
requirement: !ruby/object:Gem::Requirement
|
55
55
|
requirements:
|
56
56
|
- - ~>
|
57
57
|
- !ruby/object:Gem::Version
|
58
|
-
version: 1.
|
58
|
+
version: 1.4.0
|
59
59
|
none: false
|
60
60
|
prerelease: false
|
61
61
|
type: :runtime
|
@@ -65,13 +65,13 @@ dependencies:
|
|
65
65
|
requirements:
|
66
66
|
- - ~>
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 1.
|
68
|
+
version: 1.4.0
|
69
69
|
none: false
|
70
70
|
requirement: !ruby/object:Gem::Requirement
|
71
71
|
requirements:
|
72
72
|
- - ~>
|
73
73
|
- !ruby/object:Gem::Version
|
74
|
-
version: 1.
|
74
|
+
version: 1.4.0
|
75
75
|
none: false
|
76
76
|
prerelease: false
|
77
77
|
type: :runtime
|
@@ -81,13 +81,13 @@ dependencies:
|
|
81
81
|
requirements:
|
82
82
|
- - ~>
|
83
83
|
- !ruby/object:Gem::Version
|
84
|
-
version: 1.
|
84
|
+
version: 1.4.0
|
85
85
|
none: false
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
87
87
|
requirements:
|
88
88
|
- - ~>
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version: 1.
|
90
|
+
version: 1.4.0
|
91
91
|
none: false
|
92
92
|
prerelease: false
|
93
93
|
type: :runtime
|
@@ -97,13 +97,13 @@ dependencies:
|
|
97
97
|
requirements:
|
98
98
|
- - ~>
|
99
99
|
- !ruby/object:Gem::Version
|
100
|
-
version: 1.
|
100
|
+
version: 1.4.0
|
101
101
|
none: false
|
102
102
|
requirement: !ruby/object:Gem::Requirement
|
103
103
|
requirements:
|
104
104
|
- - ~>
|
105
105
|
- !ruby/object:Gem::Version
|
106
|
-
version: 1.
|
106
|
+
version: 1.4.0
|
107
107
|
none: false
|
108
108
|
prerelease: false
|
109
109
|
type: :runtime
|
@@ -129,13 +129,13 @@ dependencies:
|
|
129
129
|
requirements:
|
130
130
|
- - ~>
|
131
131
|
- !ruby/object:Gem::Version
|
132
|
-
version:
|
132
|
+
version: 4.7.4
|
133
133
|
none: false
|
134
134
|
requirement: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - ~>
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version:
|
138
|
+
version: 4.7.4
|
139
139
|
none: false
|
140
140
|
prerelease: false
|
141
141
|
type: :development
|
@@ -145,13 +145,13 @@ dependencies:
|
|
145
145
|
requirements:
|
146
146
|
- - ~>
|
147
147
|
- !ruby/object:Gem::Version
|
148
|
-
version: 1.
|
148
|
+
version: 1.4.0
|
149
149
|
none: false
|
150
150
|
requirement: !ruby/object:Gem::Requirement
|
151
151
|
requirements:
|
152
152
|
- - ~>
|
153
153
|
- !ruby/object:Gem::Version
|
154
|
-
version: 1.
|
154
|
+
version: 1.4.0
|
155
155
|
none: false
|
156
156
|
prerelease: false
|
157
157
|
type: :development
|
@@ -161,13 +161,13 @@ dependencies:
|
|
161
161
|
requirements:
|
162
162
|
- - ~>
|
163
163
|
- !ruby/object:Gem::Version
|
164
|
-
version: 1.
|
164
|
+
version: 1.4.0
|
165
165
|
none: false
|
166
166
|
requirement: !ruby/object:Gem::Requirement
|
167
167
|
requirements:
|
168
168
|
- - ~>
|
169
169
|
- !ruby/object:Gem::Version
|
170
|
-
version: 1.
|
170
|
+
version: 1.4.0
|
171
171
|
none: false
|
172
172
|
prerelease: false
|
173
173
|
type: :development
|
@@ -177,13 +177,13 @@ dependencies:
|
|
177
177
|
requirements:
|
178
178
|
- - ~>
|
179
179
|
- !ruby/object:Gem::Version
|
180
|
-
version: 1.
|
180
|
+
version: 1.4.0
|
181
181
|
none: false
|
182
182
|
requirement: !ruby/object:Gem::Requirement
|
183
183
|
requirements:
|
184
184
|
- - ~>
|
185
185
|
- !ruby/object:Gem::Version
|
186
|
-
version: 1.
|
186
|
+
version: 1.4.0
|
187
187
|
none: false
|
188
188
|
prerelease: false
|
189
189
|
type: :development
|