iudex-core 1.1.0-java → 1.2.b.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.rdoc +7 -0
- data/Manifest.txt +1 -1
- data/README.rdoc +1 -1
- data/Rakefile +2 -39
- data/bin/iudex-test-config +1 -1
- data/bin/iudex-url-norm +1 -1
- data/build/TLDSets.java.erb +1 -1
- data/build/effective_tld_name.dat +1 -1
- data/build/tld_set_generator.rb +1 -1
- data/lib/iudex-core/base.rb +2 -2
- data/lib/iudex-core/config.rb +1 -1
- data/lib/iudex-core/{iudex-core-1.1.0.jar → iudex-core-1.2.b.0.jar} +0 -0
- data/lib/iudex-core/mojibake.rb +1 -1
- data/lib/iudex-core.rb +1 -1
- data/pom.xml +5 -5
- data/test/setup.rb +20 -14
- data/test/test_content_fetcher.rb +1 -1
- data/test/test_content_source.rb +1 -1
- data/test/test_log_writer.rb +1 -1
- data/test/test_mojibake.rb +1 -1
- data/test/test_redirect_handler.rb +1 -1
- data/test/test_visit_manager.rb +1 -1
- data/test/test_visit_queue.rb +1 -1
- data/test/test_visit_url.rb +84 -12
- metadata +47 -56
- data/.gemtest +0 -0
data/History.rdoc
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
=== 1.2.b.0 (2012-3-4)
|
|
2
|
+
* VisitURL now throws SyntaxException on missing scheme, host, bad
|
|
3
|
+
ports and some other URL malformations.
|
|
4
|
+
* MojiBakeMapper optimization, no copy when nothing found.
|
|
5
|
+
* Upgrade to gravitext-util ~> 1.6.b (beta)
|
|
6
|
+
* Upgrade to tarpit ~> 2.0, bundler Gemfile, gemspec (dev)
|
|
7
|
+
|
|
1
8
|
=== 1.1.0 (2011-11-13)
|
|
2
9
|
* Update to iudex-filter,http,barc ~> 1.1.0
|
|
3
10
|
* ContentFetcher updates for iudex-http changes
|
data/Manifest.txt
CHANGED
data/README.rdoc
CHANGED
|
@@ -10,7 +10,7 @@ does not contain such facilities as database-backed state management.
|
|
|
10
10
|
|
|
11
11
|
== License
|
|
12
12
|
|
|
13
|
-
Copyright (c) 2008-
|
|
13
|
+
Copyright (c) 2008-2012 David Kellum
|
|
14
14
|
|
|
15
15
|
Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
16
16
|
may not use this file except in compliance with the License. You
|
data/Rakefile
CHANGED
|
@@ -1,45 +1,10 @@
|
|
|
1
1
|
# -*- ruby -*-
|
|
2
2
|
|
|
3
|
-
$LOAD_PATH << './lib'
|
|
4
|
-
require 'iudex-core/base'
|
|
5
|
-
|
|
6
3
|
require 'rubygems'
|
|
7
|
-
|
|
4
|
+
require 'bundler/setup'
|
|
8
5
|
require 'rjack-tarpit'
|
|
9
6
|
|
|
10
|
-
|
|
11
|
-
Iudex::Core::VERSION,
|
|
12
|
-
:no_assembly, :java_platform )
|
|
13
|
-
|
|
14
|
-
t.specify do |h|
|
|
15
|
-
h.developer( "David Kellum", "dek-oss@gravitext.com" )
|
|
16
|
-
h.extra_deps += [ [ 'rjack-slf4j', '~> 1.6.1' ],
|
|
17
|
-
[ 'hooker', '~> 1.0.0' ],
|
|
18
|
-
[ 'gravitext-util', '~> 1.5.1' ],
|
|
19
|
-
[ 'iudex-filter', '~> 1.1.0' ],
|
|
20
|
-
[ 'iudex-http', '~> 1.1.0' ],
|
|
21
|
-
[ 'iudex-barc', '~> 1.1.0' ] ]
|
|
22
|
-
|
|
23
|
-
h.testlib = :minitest
|
|
24
|
-
h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
|
|
25
|
-
[ 'rjack-logback', '~> 1.0' ] ]
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
file 'Manifest.txt' => "lib/#{t.name}/base.rb"
|
|
29
|
-
|
|
30
|
-
task :check_pom_version do
|
|
31
|
-
t.test_line_match( 'pom.xml', /<version>/, /#{t.version}/ )
|
|
32
|
-
end
|
|
33
|
-
task :check_history_version do
|
|
34
|
-
t.test_line_match( 'History.rdoc', /^==/, / #{t.version} / )
|
|
35
|
-
end
|
|
36
|
-
task :check_history_date do
|
|
37
|
-
t.test_line_match( 'History.rdoc', /^==/, /\([0-9\-]+\)$/ )
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
task :gem => [ :check_pom_version, :check_history_version ]
|
|
41
|
-
task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
|
|
42
|
-
task :push => [ :check_history_date ]
|
|
7
|
+
RJack::TarPit.new( 'iudex-core' ).define_tasks
|
|
43
8
|
|
|
44
9
|
file 'src/main/java/iudex/core/TLDSets.java' => FileList.new( "build/*" ) do
|
|
45
10
|
require 'build/tld_set_generator'
|
|
@@ -57,5 +22,3 @@ task :refresh_tld_dat do
|
|
|
57
22
|
sh( "curl http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1" +
|
|
58
23
|
" -o build/effective_tld_name.dat" )
|
|
59
24
|
end
|
|
60
|
-
|
|
61
|
-
t.define_tasks
|
data/bin/iudex-test-config
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env jruby
|
|
2
2
|
# -*- ruby -*-
|
|
3
3
|
#--
|
|
4
|
-
# Copyright (c) 2008-
|
|
4
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
7
7
|
# may not use this file except in compliance with the License. You
|
data/bin/iudex-url-norm
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env jruby
|
|
2
2
|
# -*- ruby -*-
|
|
3
3
|
#--
|
|
4
|
-
# Copyright (c) 2008-
|
|
4
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
7
7
|
# may not use this file except in compliance with the License. You
|
data/build/TLDSets.java.erb
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
//
|
|
16
16
|
// The Initial Developer of the Original Code is
|
|
17
17
|
// Jo Hermans <jo.hermans@gmail.com>.
|
|
18
|
-
// Portions created by the Initial Developer are Copyright (
|
|
18
|
+
// Portions created by the Initial Developer are Copyright (c) 2008-2012 David Kellum
|
|
19
19
|
// the Initial Developer. All Rights Reserved.
|
|
20
20
|
//
|
|
21
21
|
// Contributor(s):
|
data/build/tld_set_generator.rb
CHANGED
data/lib/iudex-core/base.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#--
|
|
2
|
-
# Copyright (c) 2008-
|
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
5
5
|
# may not use this file except in compliance with the License. You
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
module Iudex
|
|
18
18
|
module Core
|
|
19
|
-
VERSION = '1.
|
|
19
|
+
VERSION = '1.2.b.0'
|
|
20
20
|
|
|
21
21
|
LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
|
|
22
22
|
end
|
data/lib/iudex-core/config.rb
CHANGED
|
Binary file
|
data/lib/iudex-core/mojibake.rb
CHANGED
data/lib/iudex-core.rb
CHANGED
data/pom.xml
CHANGED
|
@@ -5,13 +5,13 @@
|
|
|
5
5
|
<groupId>iudex</groupId>
|
|
6
6
|
<artifactId>iudex-core</artifactId>
|
|
7
7
|
<packaging>jar</packaging>
|
|
8
|
-
<version>1.
|
|
8
|
+
<version>1.2.b.0</version>
|
|
9
9
|
<name>Iudex Core System</name>
|
|
10
10
|
|
|
11
11
|
<parent>
|
|
12
12
|
<groupId>iudex</groupId>
|
|
13
13
|
<artifactId>iudex-parent</artifactId>
|
|
14
|
-
<version>1.
|
|
14
|
+
<version>1.2.b.0</version>
|
|
15
15
|
<relativePath>..</relativePath>
|
|
16
16
|
</parent>
|
|
17
17
|
|
|
@@ -30,19 +30,19 @@
|
|
|
30
30
|
<dependency>
|
|
31
31
|
<groupId>iudex</groupId>
|
|
32
32
|
<artifactId>iudex-filter</artifactId>
|
|
33
|
-
<version>[1.
|
|
33
|
+
<version>[1.2,1.2.9999)</version>
|
|
34
34
|
</dependency>
|
|
35
35
|
|
|
36
36
|
<dependency>
|
|
37
37
|
<groupId>iudex</groupId>
|
|
38
38
|
<artifactId>iudex-http</artifactId>
|
|
39
|
-
<version>[1.
|
|
39
|
+
<version>[1.2,1.2.9999)</version>
|
|
40
40
|
</dependency>
|
|
41
41
|
|
|
42
42
|
<dependency>
|
|
43
43
|
<groupId>iudex</groupId>
|
|
44
44
|
<artifactId>iudex-barc</artifactId>
|
|
45
|
-
<version>[1.
|
|
45
|
+
<version>[1.2,1.2.9999)</version>
|
|
46
46
|
</dependency>
|
|
47
47
|
|
|
48
48
|
<dependency>
|
data/test/setup.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#--
|
|
2
|
-
# Copyright (c) 2008-
|
|
2
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
3
3
|
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
5
5
|
# may not use this file except in compliance with the License. You
|
|
@@ -14,27 +14,33 @@
|
|
|
14
14
|
# permissions and limitations under the License.
|
|
15
15
|
#++
|
|
16
16
|
|
|
17
|
-
#### General test setup
|
|
18
|
-
|
|
19
|
-
ldir = File.join( File.dirname( __FILE__ ), "..", "lib" )
|
|
20
|
-
$LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
|
|
17
|
+
#### General test setup, logging, console output ####
|
|
21
18
|
|
|
22
19
|
require 'rubygems'
|
|
23
|
-
require '
|
|
20
|
+
require 'bundler/setup'
|
|
21
|
+
|
|
24
22
|
require 'minitest/unit'
|
|
25
23
|
require 'minitest/autorun'
|
|
26
24
|
|
|
25
|
+
require 'rjack-logback'
|
|
26
|
+
|
|
27
27
|
module TestSetup
|
|
28
28
|
include RJack
|
|
29
|
-
Logback.config_console( :stderr => true )
|
|
30
|
-
|
|
29
|
+
Logback.config_console( :stderr => true, :thread => true )
|
|
30
|
+
|
|
31
|
+
if ( ARGV & %w[ -v --verbose --debug ] ).empty?
|
|
32
|
+
|
|
33
|
+
# Make test output logging compatible: no partial lines.
|
|
34
|
+
class TestOut
|
|
35
|
+
def print( *a ); $stdout.puts( *a ); end
|
|
36
|
+
def puts( *a ); $stdout.puts( *a ); end
|
|
37
|
+
end
|
|
38
|
+
MiniTest::Unit.output = TestOut.new
|
|
39
|
+
|
|
40
|
+
else
|
|
31
41
|
Logback.root.level = Logback::DEBUG
|
|
32
42
|
end
|
|
33
|
-
end
|
|
34
43
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def print( *a ); $stdout.puts( *a ); end
|
|
38
|
-
def puts( *a ); $stdout.puts( *a ); end
|
|
44
|
+
ARGV.delete( '--debug' )
|
|
45
|
+
|
|
39
46
|
end
|
|
40
|
-
MiniTest::Unit.output = TestOut.new
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
|
3
3
|
|
|
4
4
|
#--
|
|
5
|
-
# Copyright (c) 2008-
|
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
8
8
|
# may not use this file except in compliance with the License. You
|
data/test/test_content_source.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
|
3
3
|
|
|
4
4
|
#--
|
|
5
|
-
# Copyright (c)
|
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
8
8
|
# may not use this file except in compliance with the License. You
|
data/test/test_log_writer.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
|
3
3
|
|
|
4
4
|
#--
|
|
5
|
-
# Copyright (c) 2008-
|
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
8
8
|
# may not use this file except in compliance with the License. You
|
data/test/test_mojibake.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#.hashdot.profile += jruby-shortlived
|
|
4
4
|
|
|
5
5
|
#--
|
|
6
|
-
# Copyright (c)
|
|
6
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
7
7
|
#
|
|
8
8
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
9
9
|
# may not use this file except in compliance with the License. You
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
|
3
3
|
|
|
4
4
|
#--
|
|
5
|
-
# Copyright (c)
|
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
8
8
|
# may not use this file except in compliance with the License. You
|
data/test/test_visit_manager.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
|
3
3
|
|
|
4
4
|
#--
|
|
5
|
-
# Copyright (c)
|
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
8
8
|
# may not use this file except in compliance with the License. You
|
data/test/test_visit_queue.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#.hashdot.profile += jruby-shortlived
|
|
3
3
|
|
|
4
4
|
#--
|
|
5
|
-
# Copyright (c)
|
|
5
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
6
6
|
#
|
|
7
7
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
8
8
|
# may not use this file except in compliance with the License. You
|
data/test/test_visit_url.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
#.hashdot.profile += jruby-shortlived
|
|
4
4
|
|
|
5
5
|
#--
|
|
6
|
-
# Copyright (c)
|
|
6
|
+
# Copyright (c) 2008-2012 David Kellum
|
|
7
7
|
#
|
|
8
8
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
9
9
|
# may not use this file except in compliance with the License. You
|
|
@@ -40,6 +40,11 @@ class TestVisitURL < MiniTest::Unit::TestCase
|
|
|
40
40
|
|
|
41
41
|
%w[ http://h.c/ http://h.c ],
|
|
42
42
|
|
|
43
|
+
%w[ http://127.0.0.1/ http://127.0.0.1:80 ],
|
|
44
|
+
|
|
45
|
+
%w[ https://h.c/ httpS://h.c:443/
|
|
46
|
+
httpS://h.c:443 ],
|
|
47
|
+
|
|
43
48
|
%w[ http://h.c/?x=a%26b http://h.c/?x=a%26b ],
|
|
44
49
|
|
|
45
50
|
[ "http://h.c/foo", " \thttp://h.c/foo\n\r\t" ],
|
|
@@ -50,9 +55,9 @@ class TestVisitURL < MiniTest::Unit::TestCase
|
|
|
50
55
|
"http://h.c/foo?q=a\t b#anchor\t" ] ]
|
|
51
56
|
|
|
52
57
|
sets.each do |tset|
|
|
53
|
-
expected =
|
|
58
|
+
expected = tset.shift
|
|
54
59
|
tset.each do |raw|
|
|
55
|
-
assert_equal( expected
|
|
60
|
+
assert_equal( expected, VisitURL.normalize( raw ).to_s )
|
|
56
61
|
end
|
|
57
62
|
end
|
|
58
63
|
end
|
|
@@ -62,11 +67,80 @@ class TestVisitURL < MiniTest::Unit::TestCase
|
|
|
62
67
|
sets = [ %w[ http://h.c/f%C5%8Do HTTP://h.c/fōo ] ]
|
|
63
68
|
|
|
64
69
|
sets.each do |tset|
|
|
65
|
-
expected =
|
|
70
|
+
expected = tset.shift
|
|
71
|
+
tset.each do |raw|
|
|
72
|
+
assert_equal( expected, VisitURL.normalize( raw ).to_s )
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def test_bad_urls
|
|
78
|
+
bads = [ '',
|
|
79
|
+
' ',
|
|
80
|
+
'.',
|
|
81
|
+
':',
|
|
82
|
+
'\\',
|
|
83
|
+
'\/' ] +
|
|
84
|
+
%w[ bogus
|
|
85
|
+
bogus:
|
|
86
|
+
bogus:/
|
|
87
|
+
bogus:/bar
|
|
88
|
+
http
|
|
89
|
+
http:
|
|
90
|
+
http:/
|
|
91
|
+
http://
|
|
92
|
+
http:///
|
|
93
|
+
http:///path/
|
|
94
|
+
http://\[h/
|
|
95
|
+
http://h\]/
|
|
96
|
+
http://::/
|
|
97
|
+
http://[:]/
|
|
98
|
+
http://wonkie\biz
|
|
99
|
+
http://wonkie/biz\ness
|
|
100
|
+
https://h.c:-33/
|
|
101
|
+
https://h.c:0/
|
|
102
|
+
https://h.c:65537/ ]
|
|
103
|
+
|
|
104
|
+
bads.each do |raw|
|
|
105
|
+
begin
|
|
106
|
+
flunk "[#{raw}] normalized to [#{VisitURL.normalize( raw )}]"
|
|
107
|
+
rescue NativeException => e
|
|
108
|
+
if e.cause.is_a?( VisitURL::SyntaxException )
|
|
109
|
+
pass
|
|
110
|
+
else
|
|
111
|
+
raise e
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def test_ipv6
|
|
119
|
+
# Demonstrate validity from a URI perspective, but likely want to
|
|
120
|
+
# post-filter these.
|
|
121
|
+
# http://www.ietf.org/rfc/rfc2732.txt
|
|
122
|
+
sets = [
|
|
123
|
+
%w[ http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/index.html
|
|
124
|
+
http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html ],
|
|
125
|
+
%w[ http://[1080:0:0:0:8:800:200c:417a]/index.html ],
|
|
126
|
+
%w[ http://[3ffe:2a00:100:7031::1]/
|
|
127
|
+
http://[3ffe:2a00:100:7031::1] ],
|
|
128
|
+
%w[ http://[1080::8:800:200c:417a]/foo ],
|
|
129
|
+
%w[ http://[::]/ ], #FIXME: Unspecified or multicast
|
|
130
|
+
%w[ http://[::192.9.5.5]/ipng ],
|
|
131
|
+
%w[ http://[::ffff:129.144.52.38]/index.html
|
|
132
|
+
http://[::FFFF:129.144.52.38]:80/index.html ],
|
|
133
|
+
%w[ http://[2010:836b:4179::836b:4179]/
|
|
134
|
+
http://[2010:836B:4179::836B:4179] ] ]
|
|
135
|
+
|
|
136
|
+
sets.each do |tset|
|
|
137
|
+
expected = tset.shift
|
|
138
|
+
tset = [ expected ] if tset.empty? #identity test
|
|
66
139
|
tset.each do |raw|
|
|
67
|
-
assert_equal( expected
|
|
140
|
+
assert_equal( expected, VisitURL.normalize( raw ).to_s )
|
|
68
141
|
end
|
|
69
142
|
end
|
|
143
|
+
|
|
70
144
|
end
|
|
71
145
|
|
|
72
146
|
def test_normalize_escape_case
|
|
@@ -79,9 +153,9 @@ class TestVisitURL < MiniTest::Unit::TestCase
|
|
|
79
153
|
%w[ http://h.c/a%5Bb%5D http://h.c/a[b] ] ]
|
|
80
154
|
|
|
81
155
|
sets.each do |tset|
|
|
82
|
-
expected =
|
|
156
|
+
expected = tset.shift
|
|
83
157
|
tset.each do |raw|
|
|
84
|
-
assert_equal( expected
|
|
158
|
+
assert_equal( expected, VisitURL.normalize( raw ).to_s )
|
|
85
159
|
end
|
|
86
160
|
end
|
|
87
161
|
end
|
|
@@ -92,9 +166,9 @@ class TestVisitURL < MiniTest::Unit::TestCase
|
|
|
92
166
|
sets = [ %w[ http://xn--bcher-kva.ch/ http://Bücher.ch ] ]
|
|
93
167
|
|
|
94
168
|
sets.each do |tset|
|
|
95
|
-
expected =
|
|
169
|
+
expected = tset.shift
|
|
96
170
|
tset.each do |raw|
|
|
97
|
-
assert_equal( expected
|
|
171
|
+
assert_equal( expected, VisitURL.normalize( raw ).to_s )
|
|
98
172
|
end
|
|
99
173
|
end
|
|
100
174
|
end
|
|
@@ -138,11 +212,9 @@ class TestVisitURL < MiniTest::Unit::TestCase
|
|
|
138
212
|
%w[ http://h.c/foo/bar?q=1 http://h.c/foo/ ./bar?q=1 ] ]
|
|
139
213
|
|
|
140
214
|
sets.each do |e,b,r|
|
|
141
|
-
expected = VisitURL.normalize( e )
|
|
142
215
|
base = VisitURL.normalize( b )
|
|
143
216
|
resolved = base.resolve( r )
|
|
144
|
-
|
|
145
|
-
assert_equal( expected.to_s, resolved.to_s, [ e,b,r ].inspect )
|
|
217
|
+
assert_equal( e, resolved.to_s, [ e,b,r ].inspect )
|
|
146
218
|
end
|
|
147
219
|
|
|
148
220
|
end
|
metadata
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iudex-core
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
prerelease:
|
|
5
|
-
version: 1.
|
|
4
|
+
prerelease: 4
|
|
5
|
+
version: 1.2.b.0
|
|
6
6
|
platform: java
|
|
7
7
|
authors:
|
|
8
8
|
- David Kellum
|
|
@@ -10,111 +10,108 @@ autorequire:
|
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
12
|
|
|
13
|
-
date:
|
|
13
|
+
date: 2012-03-05 00:00:00 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: rjack-slf4j
|
|
17
|
-
|
|
18
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
|
17
|
+
version_requirements: &id001 !ruby/object:Gem::Requirement
|
|
19
18
|
none: false
|
|
20
19
|
requirements:
|
|
21
20
|
- - ~>
|
|
22
21
|
- !ruby/object:Gem::Version
|
|
23
22
|
version: 1.6.1
|
|
23
|
+
requirement: *id001
|
|
24
|
+
prerelease: false
|
|
24
25
|
type: :runtime
|
|
25
|
-
version_requirements: *id001
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
27
|
name: hooker
|
|
28
|
-
|
|
29
|
-
requirement: &id002 !ruby/object:Gem::Requirement
|
|
28
|
+
version_requirements: &id002 !ruby/object:Gem::Requirement
|
|
30
29
|
none: false
|
|
31
30
|
requirements:
|
|
32
31
|
- - ~>
|
|
33
32
|
- !ruby/object:Gem::Version
|
|
34
33
|
version: 1.0.0
|
|
34
|
+
requirement: *id002
|
|
35
|
+
prerelease: false
|
|
35
36
|
type: :runtime
|
|
36
|
-
version_requirements: *id002
|
|
37
37
|
- !ruby/object:Gem::Dependency
|
|
38
38
|
name: gravitext-util
|
|
39
|
-
|
|
40
|
-
requirement: &id003 !ruby/object:Gem::Requirement
|
|
39
|
+
version_requirements: &id003 !ruby/object:Gem::Requirement
|
|
41
40
|
none: false
|
|
42
41
|
requirements:
|
|
43
42
|
- - ~>
|
|
44
43
|
- !ruby/object:Gem::Version
|
|
45
|
-
version: 1.
|
|
44
|
+
version: 1.6.b
|
|
45
|
+
requirement: *id003
|
|
46
|
+
prerelease: false
|
|
46
47
|
type: :runtime
|
|
47
|
-
version_requirements: *id003
|
|
48
48
|
- !ruby/object:Gem::Dependency
|
|
49
49
|
name: iudex-filter
|
|
50
|
-
|
|
51
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
|
50
|
+
version_requirements: &id004 !ruby/object:Gem::Requirement
|
|
52
51
|
none: false
|
|
53
52
|
requirements:
|
|
54
53
|
- - ~>
|
|
55
54
|
- !ruby/object:Gem::Version
|
|
56
|
-
version: 1.
|
|
55
|
+
version: 1.2.b
|
|
56
|
+
requirement: *id004
|
|
57
|
+
prerelease: false
|
|
57
58
|
type: :runtime
|
|
58
|
-
version_requirements: *id004
|
|
59
59
|
- !ruby/object:Gem::Dependency
|
|
60
60
|
name: iudex-http
|
|
61
|
-
|
|
62
|
-
requirement: &id005 !ruby/object:Gem::Requirement
|
|
61
|
+
version_requirements: &id005 !ruby/object:Gem::Requirement
|
|
63
62
|
none: false
|
|
64
63
|
requirements:
|
|
65
64
|
- - ~>
|
|
66
65
|
- !ruby/object:Gem::Version
|
|
67
|
-
version: 1.
|
|
66
|
+
version: 1.2.b
|
|
67
|
+
requirement: *id005
|
|
68
|
+
prerelease: false
|
|
68
69
|
type: :runtime
|
|
69
|
-
version_requirements: *id005
|
|
70
70
|
- !ruby/object:Gem::Dependency
|
|
71
71
|
name: iudex-barc
|
|
72
|
-
|
|
73
|
-
requirement: &id006 !ruby/object:Gem::Requirement
|
|
72
|
+
version_requirements: &id006 !ruby/object:Gem::Requirement
|
|
74
73
|
none: false
|
|
75
74
|
requirements:
|
|
76
75
|
- - ~>
|
|
77
76
|
- !ruby/object:Gem::Version
|
|
78
|
-
version: 1.
|
|
77
|
+
version: 1.2.b
|
|
78
|
+
requirement: *id006
|
|
79
|
+
prerelease: false
|
|
79
80
|
type: :runtime
|
|
80
|
-
version_requirements: *id006
|
|
81
81
|
- !ruby/object:Gem::Dependency
|
|
82
82
|
name: minitest
|
|
83
|
-
|
|
84
|
-
requirement: &id007 !ruby/object:Gem::Requirement
|
|
83
|
+
version_requirements: &id007 !ruby/object:Gem::Requirement
|
|
85
84
|
none: false
|
|
86
85
|
requirements:
|
|
87
86
|
- - ~>
|
|
88
87
|
- !ruby/object:Gem::Version
|
|
89
88
|
version: "2.3"
|
|
89
|
+
requirement: *id007
|
|
90
|
+
prerelease: false
|
|
90
91
|
type: :development
|
|
91
|
-
version_requirements: *id007
|
|
92
92
|
- !ruby/object:Gem::Dependency
|
|
93
93
|
name: rjack-logback
|
|
94
|
-
|
|
95
|
-
requirement: &id008 !ruby/object:Gem::Requirement
|
|
94
|
+
version_requirements: &id008 !ruby/object:Gem::Requirement
|
|
96
95
|
none: false
|
|
97
96
|
requirements:
|
|
98
97
|
- - ~>
|
|
99
98
|
- !ruby/object:Gem::Version
|
|
100
99
|
version: "1.0"
|
|
100
|
+
requirement: *id008
|
|
101
|
+
prerelease: false
|
|
101
102
|
type: :development
|
|
102
|
-
version_requirements: *id008
|
|
103
103
|
- !ruby/object:Gem::Dependency
|
|
104
104
|
name: rjack-tarpit
|
|
105
|
-
|
|
106
|
-
requirement: &id009 !ruby/object:Gem::Requirement
|
|
105
|
+
version_requirements: &id009 !ruby/object:Gem::Requirement
|
|
107
106
|
none: false
|
|
108
107
|
requirements:
|
|
109
108
|
- - ~>
|
|
110
109
|
- !ruby/object:Gem::Version
|
|
111
|
-
version:
|
|
110
|
+
version: "2.0"
|
|
111
|
+
requirement: *id009
|
|
112
|
+
prerelease: false
|
|
112
113
|
type: :development
|
|
113
|
-
|
|
114
|
-
description: |-
|
|
115
|
-
Iudex is a general purpose web crawler and feed processor in
|
|
116
|
-
ruby/java. The iudex-core gem contains core facilities and notably,
|
|
117
|
-
does not contain such facilities as database-backed state management.
|
|
114
|
+
description: Iudex is a general purpose web crawler and feed processor in ruby/java. The iudex-core gem contains core facilities and notably, does not contain such facilities as database-backed state management.
|
|
118
115
|
email:
|
|
119
116
|
- dek-oss@gravitext.com
|
|
120
117
|
executables:
|
|
@@ -123,7 +120,6 @@ executables:
|
|
|
123
120
|
extensions: []
|
|
124
121
|
|
|
125
122
|
extra_rdoc_files:
|
|
126
|
-
- Manifest.txt
|
|
127
123
|
- History.rdoc
|
|
128
124
|
- README.rdoc
|
|
129
125
|
files:
|
|
@@ -152,8 +148,7 @@ files:
|
|
|
152
148
|
- test/test_visit_manager.rb
|
|
153
149
|
- test/test_visit_queue.rb
|
|
154
150
|
- test/test_visit_url.rb
|
|
155
|
-
- lib/iudex-core/iudex-core-1.
|
|
156
|
-
- .gemtest
|
|
151
|
+
- lib/iudex-core/iudex-core-1.2.b.0.jar
|
|
157
152
|
homepage: http://github.com/dekellum/iudex
|
|
158
153
|
licenses: []
|
|
159
154
|
|
|
@@ -168,26 +163,22 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
168
163
|
requirements:
|
|
169
164
|
- - ">="
|
|
170
165
|
- !ruby/object:Gem::Version
|
|
166
|
+
hash: 2
|
|
167
|
+
segments:
|
|
168
|
+
- 0
|
|
171
169
|
version: "0"
|
|
172
170
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
173
171
|
none: false
|
|
174
172
|
requirements:
|
|
175
|
-
- - "
|
|
173
|
+
- - ">"
|
|
176
174
|
- !ruby/object:Gem::Version
|
|
177
|
-
version:
|
|
175
|
+
version: 1.3.1
|
|
178
176
|
requirements: []
|
|
179
177
|
|
|
180
|
-
rubyforge_project:
|
|
181
|
-
rubygems_version: 1.8.
|
|
178
|
+
rubyforge_project:
|
|
179
|
+
rubygems_version: 1.8.15
|
|
182
180
|
signing_key:
|
|
183
181
|
specification_version: 3
|
|
184
|
-
summary: Iudex is a general purpose web crawler and feed processor in ruby/java
|
|
185
|
-
test_files:
|
|
186
|
-
|
|
187
|
-
- test/test_content_fetcher.rb
|
|
188
|
-
- test/test_visit_url.rb
|
|
189
|
-
- test/test_mojibake.rb
|
|
190
|
-
- test/test_log_writer.rb
|
|
191
|
-
- test/test_visit_queue.rb
|
|
192
|
-
- test/test_redirect_handler.rb
|
|
193
|
-
- test/test_content_source.rb
|
|
182
|
+
summary: Iudex is a general purpose web crawler and feed processor in ruby/java.
|
|
183
|
+
test_files: []
|
|
184
|
+
|
data/.gemtest
DELETED
|
File without changes
|