iudex-core 1.0.0-java → 1.1.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/History.rdoc +21 -0
- data/Manifest.txt +9 -1
- data/Rakefile +6 -6
- data/bin/iudex-test-config +1 -1
- data/bin/iudex-url-norm +4 -4
- data/build/effective_tld_name.dat +432 -29
- data/config/mojibake +268 -0
- data/lib/iudex-core/base.rb +1 -1
- data/lib/iudex-core/iudex-core-1.1.0.jar +0 -0
- data/lib/iudex-core/mojibake.rb +73 -0
- data/lib/iudex-core.rb +8 -2
- data/pom.xml +5 -5
- data/test/test_content_fetcher.rb +37 -39
- data/test/test_content_source.rb +75 -0
- data/test/test_mojibake.rb +58 -0
- data/test/test_redirect_handler.rb +170 -0
- data/test/test_visit_manager.rb +107 -0
- data/test/test_visit_queue.rb +268 -0
- data/test/test_visit_url.rb +150 -0
- metadata +26 -16
- data/lib/iudex-core/iudex-core-1.0.0.jar +0 -0
@@ -0,0 +1,170 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
require 'iudex-core'
|
22
|
+
|
23
|
+
class TestRedirectHandler < MiniTest::Unit::TestCase
|
24
|
+
include Iudex::HTTP
|
25
|
+
include Iudex::Core
|
26
|
+
include Iudex::Core::Filters
|
27
|
+
include Gravitext::HTMap
|
28
|
+
|
29
|
+
UniMap.define_accessors
|
30
|
+
|
31
|
+
def test_first_redirect
|
32
|
+
order = new_order
|
33
|
+
redirect( order )
|
34
|
+
do_filter( order )
|
35
|
+
|
36
|
+
assert_equal( "http://www/1", order.url.to_s )
|
37
|
+
assert_equal( 301, order.status )
|
38
|
+
assert_equal( 1.0, order.priority )
|
39
|
+
|
40
|
+
order = revisit( order )
|
41
|
+
assert_nil( order.status )
|
42
|
+
assert_equal( 1.5, order.priority )
|
43
|
+
|
44
|
+
assert_equal( "http://www/2", order.url.to_s )
|
45
|
+
assert_equal( "http://www/1", order.last.url.to_s )
|
46
|
+
|
47
|
+
assert_equal( "http://www/1", order.referer.url.to_s )
|
48
|
+
assert_equal( "http://www/2", order.referer.referent.url.to_s )
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_second_redirect
|
52
|
+
order = new_order
|
53
|
+
redirect( order, 301, 2 )
|
54
|
+
do_filter( order )
|
55
|
+
|
56
|
+
order = revisit( order )
|
57
|
+
redirect( order, 302, 3 )
|
58
|
+
do_filter( order )
|
59
|
+
|
60
|
+
order = revisit( order )
|
61
|
+
|
62
|
+
assert_equal( 2.0, order.priority )
|
63
|
+
|
64
|
+
assert_equal( "http://www/3", order.url.to_s )
|
65
|
+
assert_equal( "http://www/2", order.last.url.to_s )
|
66
|
+
assert_equal( "http://www/1", order.referer.url.to_s )
|
67
|
+
assert_equal( "http://www/3", order.referer.referent.url.to_s )
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_third_redirect
|
71
|
+
order = new_order
|
72
|
+
redirect( order, 301, 2 )
|
73
|
+
do_filter( order )
|
74
|
+
|
75
|
+
order = revisit( order )
|
76
|
+
redirect( order, 302, 3 )
|
77
|
+
do_filter( order )
|
78
|
+
|
79
|
+
order = revisit( order )
|
80
|
+
redirect( order, 303, 4 )
|
81
|
+
do_filter( order )
|
82
|
+
|
83
|
+
order = revisit( order )
|
84
|
+
|
85
|
+
assert_equal( 2.5, order.priority )
|
86
|
+
|
87
|
+
assert_equal( "http://www/4", order.url.to_s )
|
88
|
+
assert_equal( "http://www/3", order.last.url.to_s )
|
89
|
+
assert_equal( "http://www/2", order.last.last.url.to_s )
|
90
|
+
assert_equal( "http://www/1", order.referer.url.to_s )
|
91
|
+
assert_equal( "http://www/4", order.referer.referent.url.to_s )
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_self_redirect
|
95
|
+
order = new_order
|
96
|
+
redirect( order, 307, 1 )
|
97
|
+
do_filter( order )
|
98
|
+
|
99
|
+
assert_equal( "http://www/1", order.url.to_s )
|
100
|
+
assert_equal( HTTPSession::REDIRECT_LOOP, order.status )
|
101
|
+
assert_equal( 1.0, order.priority )
|
102
|
+
assert_nil( order.revisit_order )
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_missing_location
|
106
|
+
order = new_order
|
107
|
+
order.status = 307
|
108
|
+
do_filter( order )
|
109
|
+
|
110
|
+
assert_equal( "http://www/1", order.url.to_s )
|
111
|
+
assert_equal( HTTPSession::MISSING_REDIRECT_LOCATION, order.status )
|
112
|
+
assert_equal( 1.0, order.priority )
|
113
|
+
assert_nil( order.revisit_order )
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_redirect_loop
|
117
|
+
order = new_order
|
118
|
+
redirect( order, 301, 2 )
|
119
|
+
do_filter( order )
|
120
|
+
|
121
|
+
order = revisit( order )
|
122
|
+
redirect( order, 302, 3 )
|
123
|
+
do_filter( order )
|
124
|
+
|
125
|
+
order = revisit( order )
|
126
|
+
redirect( order, 303, 1 ) #Eat our tail
|
127
|
+
do_filter( order )
|
128
|
+
|
129
|
+
assert_equal( "http://www/3", order.url.to_s )
|
130
|
+
assert_equal( HTTPSession::REDIRECT_LOOP, order.status )
|
131
|
+
assert_nil( order.revisit_order )
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_max_path
|
135
|
+
order = new_order
|
136
|
+
(2..5).each do |i|
|
137
|
+
redirect( order, 302, i )
|
138
|
+
do_filter( order, 3 )
|
139
|
+
order = revisit( order ) || break
|
140
|
+
end
|
141
|
+
|
142
|
+
assert_equal( "http://www/3", order.url.to_s )
|
143
|
+
assert_equal( HTTPSession::MAX_REDIRECTS_EXCEEDED, order.status )
|
144
|
+
assert_nil( order.revisit_order )
|
145
|
+
end
|
146
|
+
|
147
|
+
def do_filter( order, max_path = 4 )
|
148
|
+
orig_url = order.url
|
149
|
+
handler = RedirectHandler.new
|
150
|
+
handler.max_path = max_path
|
151
|
+
handler.filter( order )
|
152
|
+
end
|
153
|
+
|
154
|
+
def revisit( order )
|
155
|
+
order.remove( ContentKeys::REVISIT_ORDER )
|
156
|
+
end
|
157
|
+
|
158
|
+
def new_order( i = 1 )
|
159
|
+
UniMap.new.tap do |o|
|
160
|
+
o.url = VisitURL.normalize( "http://www/#{i}" )
|
161
|
+
o.priority = 1.0
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def redirect( o, s = 301, r = 2 )
|
166
|
+
o.status = 301
|
167
|
+
o.response_headers = [ Header.new( "Location", "http://WWW/#{r}" ) ]
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
require 'iudex-core'
|
22
|
+
|
23
|
+
class TestVisitManager < MiniTest::Unit::TestCase
|
24
|
+
include Gravitext::HTMap
|
25
|
+
include Iudex::Filter
|
26
|
+
include Iudex::Filter::Core
|
27
|
+
include Iudex::Core
|
28
|
+
|
29
|
+
import 'iudex.core.GenericWorkPollStrategy'
|
30
|
+
|
31
|
+
import 'java.util.concurrent.Executors'
|
32
|
+
import 'java.util.concurrent.TimeUnit'
|
33
|
+
import 'java.util.concurrent.CountDownLatch'
|
34
|
+
|
35
|
+
UniMap.define_accessors
|
36
|
+
|
37
|
+
def setup
|
38
|
+
@latch = CountDownLatch.new( 20 )
|
39
|
+
|
40
|
+
@manager = VisitManager.new( TestWorkPoller.new )
|
41
|
+
|
42
|
+
test_filter = fltr do |order|
|
43
|
+
@scheduler.schedule( proc { @manager.release( order, nil ) },
|
44
|
+
rand( 20_000 ), TimeUnit::MICROSECONDS )
|
45
|
+
@latch.countDown();
|
46
|
+
end
|
47
|
+
|
48
|
+
@manager.filter_chain = FilterChain.new( "test", [ test_filter ] )
|
49
|
+
|
50
|
+
@scheduler = Executors::new_scheduled_thread_pool( 1 )
|
51
|
+
end
|
52
|
+
|
53
|
+
def teardown
|
54
|
+
@scheduler.shutdown_now if @scheduler
|
55
|
+
@manager.shutdown if @manager
|
56
|
+
end
|
57
|
+
|
58
|
+
def test
|
59
|
+
@manager.start
|
60
|
+
pass
|
61
|
+
assert( @latch.await( 5, TimeUnit::SECONDS ) )
|
62
|
+
@manager.shutdown
|
63
|
+
pass
|
64
|
+
end
|
65
|
+
|
66
|
+
class TestWorkPoller < GenericWorkPollStrategy
|
67
|
+
include Gravitext::HTMap
|
68
|
+
include Iudex::Core
|
69
|
+
|
70
|
+
def initialize
|
71
|
+
super()
|
72
|
+
self.min_poll_interval = 5
|
73
|
+
self.max_check_interval = 21
|
74
|
+
self.max_poll_interval = 130 #ms
|
75
|
+
@batch = 0
|
76
|
+
end
|
77
|
+
|
78
|
+
def pollWorkImpl( visit_q )
|
79
|
+
@batch += 1
|
80
|
+
|
81
|
+
[ %w[ h2 a 2.2 ],
|
82
|
+
%w[ h2 b 2.1 ],
|
83
|
+
%w[ h2 c 2.0 ],
|
84
|
+
%w[ h3 a 3.2 ],
|
85
|
+
%w[ h3 b 3.1 ],
|
86
|
+
%w[ h3 c 3.0 ],
|
87
|
+
%w[ h1 a 1.2 ],
|
88
|
+
%w[ h1 b 1.1 ] ].each do |h,i,p|
|
89
|
+
|
90
|
+
visit_q.add( order( h, @batch, i, p ) )
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def order( host, batch, i, p )
|
95
|
+
UniMap.new.tap do |o|
|
96
|
+
o.url = visit_url( "http://#{host}.com/#{batch}/#{i}" )
|
97
|
+
o.priority = p.to_f
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def visit_url( url )
|
102
|
+
VisitURL.normalize( url )
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
@@ -0,0 +1,268 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
require 'iudex-core'
|
22
|
+
|
23
|
+
class TestVisitQueue < MiniTest::Unit::TestCase
|
24
|
+
include Iudex::Core
|
25
|
+
include Gravitext::HTMap
|
26
|
+
|
27
|
+
import 'java.util.concurrent.Executors'
|
28
|
+
import 'java.util.concurrent.TimeUnit'
|
29
|
+
import 'java.lang.Runnable'
|
30
|
+
|
31
|
+
UniMap.create_key( 'vtest_input' )
|
32
|
+
|
33
|
+
UniMap.define_accessors
|
34
|
+
|
35
|
+
def setup
|
36
|
+
@visit_q = VisitQueue.new
|
37
|
+
@visit_q.default_min_host_delay = 50 #ms
|
38
|
+
@scheduler = Executors::new_scheduled_thread_pool( 2 )
|
39
|
+
end
|
40
|
+
|
41
|
+
def teardown
|
42
|
+
@scheduler.shutdown_now if @scheduler
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_priority
|
46
|
+
orders = [ %w[ h a 1.3 ],
|
47
|
+
%w[ h b 1.1 ],
|
48
|
+
%w[ h c 1.2 ] ].map do |oinp|
|
49
|
+
order( oinp )
|
50
|
+
end
|
51
|
+
|
52
|
+
@visit_q.add_all( orders )
|
53
|
+
|
54
|
+
orders.sort { |p,n| n.priority <=> p.priority }.each do |o|
|
55
|
+
assert_equal( o.vtest_input, acquire_order )
|
56
|
+
end
|
57
|
+
|
58
|
+
assert_queue_empty
|
59
|
+
end
|
60
|
+
|
61
|
+
def add_common_orders
|
62
|
+
[ %w[ h2 a 2.2 100 ],
|
63
|
+
%w[ w.h2 b 2.1 ],
|
64
|
+
%w[ h2 c 2.0 ],
|
65
|
+
%w[ h3 a 3.2 130 ],
|
66
|
+
%w[ h3 b 3.1 130 ],
|
67
|
+
%w[ m.h3 c 3.0 ],
|
68
|
+
%w[ h1 a 1.2 ],
|
69
|
+
%w[ h1 b 1.1 ] ].each do |oinp|
|
70
|
+
|
71
|
+
@visit_q.add( order( oinp ) )
|
72
|
+
end
|
73
|
+
|
74
|
+
assert_equal( 3, @visit_q.host_count, "host count" )
|
75
|
+
assert_equal( 8, @visit_q.order_count, "order count" )
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_hosts_acquire
|
79
|
+
add_common_orders
|
80
|
+
|
81
|
+
expected = [ %w[ h3 a 3.2 ],
|
82
|
+
%w[ h2 a 2.2 ],
|
83
|
+
%w[ h1 a 1.2 ],
|
84
|
+
%w[ h1 b 1.1 ],
|
85
|
+
%w[ w.h2 b 2.1 ],
|
86
|
+
%w[ h3 b 3.1 ],
|
87
|
+
%w[ h2 c 2.0 ],
|
88
|
+
%w[ m.h3 c 3.0 ] ]
|
89
|
+
|
90
|
+
p = 0
|
91
|
+
expected.each do |o|
|
92
|
+
assert_equal( o, acquire_order, p += 1 )
|
93
|
+
end
|
94
|
+
|
95
|
+
assert_queue_empty
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_configure
|
99
|
+
@visit_q.configure_host( 'h2.com', 75, 2 )
|
100
|
+
|
101
|
+
[ %w[ h2 a 2.2 ],
|
102
|
+
%w[ w.h2 b 2.1 ],
|
103
|
+
%w[ h3 a 3.2 ],
|
104
|
+
%w[ h3 b 3.1 ],
|
105
|
+
%w[ h1 a 1.2 ],
|
106
|
+
%w[ h1 b 1.1 ] ].each do |oinp|
|
107
|
+
|
108
|
+
@visit_q.add( order( oinp ) )
|
109
|
+
|
110
|
+
end
|
111
|
+
assert_equal( 3, @visit_q.host_count, "host count" )
|
112
|
+
|
113
|
+
expected = [ %w[ h3 a 3.2 ],
|
114
|
+
%w[ h2 a 2.2 ],
|
115
|
+
%w[ h1 a 1.2 ],
|
116
|
+
%w[ h3 b 3.1 ],
|
117
|
+
%w[ h1 b 1.1 ],
|
118
|
+
%w[ w.h2 b 2.1 ] ]
|
119
|
+
|
120
|
+
p = 0
|
121
|
+
expected.each do |o|
|
122
|
+
assert_equal( o, acquire_order, p += 1 )
|
123
|
+
end
|
124
|
+
|
125
|
+
assert_queue_empty
|
126
|
+
end
|
127
|
+
|
128
|
+
def test_multi_access_2
|
129
|
+
@visit_q.default_max_access_per_host = 2
|
130
|
+
add_common_orders
|
131
|
+
|
132
|
+
expected = [ %w[ h3 a 3.2 ],
|
133
|
+
%w[ h2 a 2.2 ],
|
134
|
+
%w[ h1 a 1.2 ],
|
135
|
+
%w[ h3 b 3.1 ],
|
136
|
+
%w[ w.h2 b 2.1 ],
|
137
|
+
%w[ h1 b 1.1 ],
|
138
|
+
%w[ h2 c 2.0 ],
|
139
|
+
%w[ m.h3 c 3.0 ] ]
|
140
|
+
|
141
|
+
p = 0
|
142
|
+
expected.each do |o|
|
143
|
+
assert_equal( o, acquire_order, p += 1 )
|
144
|
+
end
|
145
|
+
|
146
|
+
assert_queue_empty
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_multi_access_3
|
150
|
+
@visit_q.default_max_access_per_host = 3
|
151
|
+
add_common_orders
|
152
|
+
|
153
|
+
expected = [ %w[ h3 a 3.2 ],
|
154
|
+
%w[ h2 a 2.2 ],
|
155
|
+
%w[ h1 a 1.2 ],
|
156
|
+
%w[ h3 b 3.1 ],
|
157
|
+
%w[ w.h2 b 2.1 ],
|
158
|
+
%w[ h1 b 1.1 ],
|
159
|
+
%w[ m.h3 c 3.0 ],
|
160
|
+
%w[ h2 c 2.0 ] ]
|
161
|
+
|
162
|
+
p = 0
|
163
|
+
expected.each do |o|
|
164
|
+
assert_equal( o, acquire_order, p += 1 )
|
165
|
+
end
|
166
|
+
|
167
|
+
assert_queue_empty
|
168
|
+
end
|
169
|
+
|
170
|
+
def test_interleaved
|
171
|
+
@visit_q.default_max_access_per_host = 2
|
172
|
+
@visit_q.default_min_host_delay = 3 #ms
|
173
|
+
@visit_q.configure_host( 'h2.com', 1, 4 )
|
174
|
+
|
175
|
+
512.times do |i|
|
176
|
+
@visit_q.add( order( [ %w[ h1 h2 ][rand( 2 )], i, 5 * rand ] ) )
|
177
|
+
end
|
178
|
+
|
179
|
+
c = @visit_q.order_count
|
180
|
+
added = 0
|
181
|
+
|
182
|
+
while c > 0
|
183
|
+
o = @visit_q.acquire( 300 )
|
184
|
+
flunk( "acquire returned null" ) unless o
|
185
|
+
c -= 1
|
186
|
+
@scheduler.schedule( ReleaseJob.new( @visit_q, o ),
|
187
|
+
rand( 20_000 ), TimeUnit::MICROSECONDS )
|
188
|
+
|
189
|
+
while ( added < 1024 ) && ( rand(3) != 1 )
|
190
|
+
added += 1
|
191
|
+
c += 1
|
192
|
+
j = Job.new( added ) do | i, p |
|
193
|
+
@visit_q.add( order( [ %w[ h1 h2 ][rand( 2 )], i, 5 * rand ] ) )
|
194
|
+
end
|
195
|
+
@scheduler.schedule( j, rand( 20_000 ), TimeUnit::MICROSECONDS )
|
196
|
+
end
|
197
|
+
|
198
|
+
end
|
199
|
+
|
200
|
+
assert_queue_empty
|
201
|
+
end
|
202
|
+
|
203
|
+
def assert_queue_empty
|
204
|
+
@scheduler.shutdown
|
205
|
+
@scheduler.await_termination( 2, TimeUnit::SECONDS )
|
206
|
+
@scheduler = nil
|
207
|
+
assert_equal( 0, @visit_q.order_count, "order count" )
|
208
|
+
assert_equal( 0, @visit_q.host_count, "host count" )
|
209
|
+
end
|
210
|
+
|
211
|
+
def acquire_order
|
212
|
+
o = @visit_q.acquire( 200 )
|
213
|
+
if o
|
214
|
+
o.vtest_input.tap do |i|
|
215
|
+
delay = ( i[3] || 20 ).to_i
|
216
|
+
@scheduler.schedule( ReleaseJob.new( @visit_q, o ),
|
217
|
+
delay,
|
218
|
+
TimeUnit::MILLISECONDS )
|
219
|
+
end.slice( 0..2 )
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def order( args )
|
224
|
+
host, c, p = args
|
225
|
+
UniMap.new.tap do |o|
|
226
|
+
o.url = visit_url( "http://#{host}.com/#{c}" )
|
227
|
+
o.priority = p.to_f
|
228
|
+
o.vtest_input = args
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def visit_url( url )
|
233
|
+
VisitURL.normalize( url )
|
234
|
+
end
|
235
|
+
|
236
|
+
LOG = RJack::SLF4J[ self ]
|
237
|
+
|
238
|
+
class ReleaseJob
|
239
|
+
include Runnable
|
240
|
+
|
241
|
+
def initialize( visit_q, order )
|
242
|
+
super()
|
243
|
+
@visit_q = visit_q
|
244
|
+
@order = order
|
245
|
+
end
|
246
|
+
|
247
|
+
def run
|
248
|
+
@visit_q.release( @order, nil )
|
249
|
+
rescue => e
|
250
|
+
LOG.error( e )
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
class Job
|
255
|
+
include Runnable
|
256
|
+
|
257
|
+
def initialize( *args, &block )
|
258
|
+
@block = block
|
259
|
+
@args = args
|
260
|
+
end
|
261
|
+
def run
|
262
|
+
@block.call( *@args )
|
263
|
+
rescue => e
|
264
|
+
LOG.error( e )
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
#.hashdot.profile += jruby-shortlived
|
4
|
+
|
5
|
+
#--
|
6
|
+
# Copyright (c) 2011 David Kellum
|
7
|
+
#
|
8
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
9
|
+
# may not use this file except in compliance with the License. You
|
10
|
+
# may obtain a copy of the License at
|
11
|
+
#
|
12
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
13
|
+
#
|
14
|
+
# Unless required by applicable law or agreed to in writing, software
|
15
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
16
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
17
|
+
# implied. See the License for the specific language governing
|
18
|
+
# permissions and limitations under the License.
|
19
|
+
#++
|
20
|
+
|
21
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
22
|
+
require 'iudex-core'
|
23
|
+
|
24
|
+
class TestVisitURL < MiniTest::Unit::TestCase
|
25
|
+
include Iudex::Core
|
26
|
+
|
27
|
+
# def setup; end
|
28
|
+
# def teardown end
|
29
|
+
|
30
|
+
def test_normalize_basic
|
31
|
+
|
32
|
+
sets = [ %w[ http://h.c/foo http://h.c/foo
|
33
|
+
http://h.c//foo
|
34
|
+
http://h.c/foo#anchor
|
35
|
+
HTTP://H.C:80/foo
|
36
|
+
HTTP://h.c/bar/../foo
|
37
|
+
http://h.c/./foo
|
38
|
+
http://h.c./foo
|
39
|
+
http://h.c/foo? ],
|
40
|
+
|
41
|
+
%w[ http://h.c/ http://h.c ],
|
42
|
+
|
43
|
+
%w[ http://h.c/?x=a%26b http://h.c/?x=a%26b ],
|
44
|
+
|
45
|
+
[ "http://h.c/foo", " \thttp://h.c/foo\n\r\t" ],
|
46
|
+
[ "http://h.c/foo?q=a+b", "http://h.c/foo?q=a+b" ],
|
47
|
+
[ "http://h.c/foo?q=a%20b", "http://h.c/foo?q=a b",
|
48
|
+
"http://h.c/foo?q=a b",
|
49
|
+
"HTTP://h.c/foo?q=a%20b#anchor",
|
50
|
+
"http://h.c/foo?q=a\t b#anchor\t" ] ]
|
51
|
+
|
52
|
+
sets.each do |tset|
|
53
|
+
expected = VisitURL.normalize( tset.shift )
|
54
|
+
tset.each do |raw|
|
55
|
+
assert_equal( expected.to_s, VisitURL.normalize( raw ).to_s )
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_normalize_utf8
|
61
|
+
|
62
|
+
sets = [ %w[ http://h.c/f%C5%8Do HTTP://h.c/fōo ] ]
|
63
|
+
|
64
|
+
sets.each do |tset|
|
65
|
+
expected = VisitURL.normalize( tset.shift )
|
66
|
+
tset.each do |raw|
|
67
|
+
assert_equal( expected.to_s, VisitURL.normalize( raw ).to_s )
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_normalize_escape_case
|
73
|
+
skip( "Escape normalizations not implemented" )
|
74
|
+
|
75
|
+
sets = [ %w[ http://h.c/?x=a%3Ab http://h.c/?x=a%3ab ],
|
76
|
+
%w[ http://h.c/%C2 http://h.c/%C2
|
77
|
+
http://h.c/%c2 ],
|
78
|
+
%w[ http://h.c/foo%20bar HTTP://h.c/%66oo%20bar ],
|
79
|
+
%w[ http://h.c/a%5Bb%5D http://h.c/a[b] ] ]
|
80
|
+
|
81
|
+
sets.each do |tset|
|
82
|
+
expected = VisitURL.normalize( tset.shift )
|
83
|
+
tset.each do |raw|
|
84
|
+
assert_equal( expected.to_s, VisitURL.normalize( raw ).to_s )
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_normalize_idn
|
90
|
+
skip( "IDN normalization not implemented" )
|
91
|
+
|
92
|
+
sets = [ %w[ http://xn--bcher-kva.ch/ http://Bücher.ch ] ]
|
93
|
+
|
94
|
+
sets.each do |tset|
|
95
|
+
expected = VisitURL.normalize( tset.shift )
|
96
|
+
tset.each do |raw|
|
97
|
+
assert_equal( expected.to_s, VisitURL.normalize( raw ).to_s )
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_uhash
|
103
|
+
h = VisitURL.normalize( "http://gravitext.com/" ).uhash
|
104
|
+
assert_equal( "8dOml647JKxoA1vSNdi3WAK", h.to_s )
|
105
|
+
|
106
|
+
h = VisitURL.normalize( "http://gravitext.com/x/y" ).uhash
|
107
|
+
assert_equal( "0pRfQvGEzGRMQ-RgFbytf7l", h.to_s )
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_domain_hash
|
111
|
+
d = VisitURL.hash_domain( "gravitext.com" );
|
112
|
+
assert_equal( "VdYKPM", d.to_s )
|
113
|
+
|
114
|
+
d = VisitURL.hash_domain( "other.com" );
|
115
|
+
assert_equal( "ZleSiQ", d.to_s )
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_resolve
|
119
|
+
|
120
|
+
sets = [ %w[ http://h.c/ http://h.c/foo ] << "",
|
121
|
+
%w[ http://h.c/ http://h.c/ ] << "",
|
122
|
+
%w[ http://h.c/ http://h.c/foo ] << " ",
|
123
|
+
|
124
|
+
%w[ http://h.c/ http://h.c/foo . ],
|
125
|
+
%w[ http://h.c/bar http://h.c/foo /bar ],
|
126
|
+
%w[ http://h.c/bar http://h.c/foo bar ],
|
127
|
+
%w[ http://h.c/bar http://h.c/foo?q=1 bar ],
|
128
|
+
%w[ http://h.c/bar http://h.c/foo/x/y /bar ],
|
129
|
+
%w[ http://h.c/foo/bar http://h.c/foo/x/y ../bar ],
|
130
|
+
%w[ http://h.c/foo/bar http://h.c/foo/ bar ],
|
131
|
+
|
132
|
+
%w[ http://h.c/a%20b/c%20d http://h.c/a%20b/f ] << "c d",
|
133
|
+
|
134
|
+
%w[ http://h.c/bar?q=1 http://h.c/foo bar?q=1 ],
|
135
|
+
%w[ http://h.c/bar?q=1 http://h.c/foo/ /bar?q=1 ],
|
136
|
+
%w[ http://h.c/bar?q=1 http://h.c/foo?x=2 bar?q=1 ],
|
137
|
+
%w[ http://h.c/foo/bar?q=1 http://h.c/foo/ bar?q=1 ],
|
138
|
+
%w[ http://h.c/foo/bar?q=1 http://h.c/foo/ ./bar?q=1 ] ]
|
139
|
+
|
140
|
+
sets.each do |e,b,r|
|
141
|
+
expected = VisitURL.normalize( e )
|
142
|
+
base = VisitURL.normalize( b )
|
143
|
+
resolved = base.resolve( r )
|
144
|
+
|
145
|
+
assert_equal( expected.to_s, resolved.to_s, [ e,b,r ].inspect )
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|