iudex-core 1.2.b.1-java → 1.2.1-java
Sign up to get free protection for your applications and to get access to all the features.
- data/History.rdoc +10 -2
- data/Manifest.txt +2 -1
- data/README.rdoc +1 -0
- data/build/effective_tld_name.dat +1934 -255
- data/lib/iudex-core/base.rb +1 -1
- data/lib/iudex-core/iudex-core-1.2.1.jar +0 -0
- data/lib/iudex-core/mojibake.rb +1 -1
- data/pom.xml +5 -5
- data/test/test_charsets.rb +56 -0
- data/test/test_content_source.rb +1 -1
- metadata +22 -15
- data/lib/iudex-core/iudex-core-1.2.b.1.jar +0 -0
data/lib/iudex-core/base.rb
CHANGED
Binary file
|
data/lib/iudex-core/mojibake.rb
CHANGED
data/pom.xml
CHANGED
@@ -5,13 +5,13 @@
|
|
5
5
|
<groupId>iudex</groupId>
|
6
6
|
<artifactId>iudex-core</artifactId>
|
7
7
|
<packaging>jar</packaging>
|
8
|
-
<version>1.2.
|
8
|
+
<version>1.2.1</version>
|
9
9
|
<name>Iudex Core System</name>
|
10
10
|
|
11
11
|
<parent>
|
12
12
|
<groupId>iudex</groupId>
|
13
13
|
<artifactId>iudex-parent</artifactId>
|
14
|
-
<version>1.2.
|
14
|
+
<version>1.2.1</version>
|
15
15
|
<relativePath>..</relativePath>
|
16
16
|
</parent>
|
17
17
|
|
@@ -30,19 +30,19 @@
|
|
30
30
|
<dependency>
|
31
31
|
<groupId>iudex</groupId>
|
32
32
|
<artifactId>iudex-filter</artifactId>
|
33
|
-
<version>[1.2,1.2.
|
33
|
+
<version>[1.2.1,1.2.999)</version>
|
34
34
|
</dependency>
|
35
35
|
|
36
36
|
<dependency>
|
37
37
|
<groupId>iudex</groupId>
|
38
38
|
<artifactId>iudex-http</artifactId>
|
39
|
-
<version>[1.2,1.2.
|
39
|
+
<version>[1.2.1,1.2.999)</version>
|
40
40
|
</dependency>
|
41
41
|
|
42
42
|
<dependency>
|
43
43
|
<groupId>iudex</groupId>
|
44
44
|
<artifactId>iudex-barc</artifactId>
|
45
|
-
<version>[1.2,1.2.
|
45
|
+
<version>[1.2.1,1.2.999)</version>
|
46
46
|
</dependency>
|
47
47
|
|
48
48
|
<dependency>
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
#.hashdot.profile += jruby-shortlived
|
4
|
+
|
5
|
+
#--
|
6
|
+
# Copyright (c) 2008-2012 David Kellum
|
7
|
+
#
|
8
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
9
|
+
# may not use this file except in compliance with the License. You
|
10
|
+
# may obtain a copy of the License at
|
11
|
+
#
|
12
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
13
|
+
#
|
14
|
+
# Unless required by applicable law or agreed to in writing, software
|
15
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
16
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
17
|
+
# implied. See the License for the specific language governing
|
18
|
+
# permissions and limitations under the License.
|
19
|
+
#++
|
20
|
+
|
21
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
22
|
+
require 'iudex-core'
|
23
|
+
|
24
|
+
class TestCharsets < MiniTest::Unit::TestCase
|
25
|
+
include Gravitext::HTMap
|
26
|
+
|
27
|
+
import 'iudex.util.Charsets'
|
28
|
+
|
29
|
+
UniMap.define_accessors
|
30
|
+
|
31
|
+
def test_default
|
32
|
+
assert_equal( Charsets::WINDOWS_1252, Charsets.default_charset )
|
33
|
+
end
|
34
|
+
|
35
|
+
# Test all mappings from the whatwg guidelines that are supported by the JVM.
|
36
|
+
#
|
37
|
+
# @see http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#character-encodings-0
|
38
|
+
def test_expand
|
39
|
+
mapping = {
|
40
|
+
Charsets::EUC_KR => Charsets::WINDOWS_949,
|
41
|
+
Charsets::GB2312 => Charsets::GBK,
|
42
|
+
Charsets::ISO_8859_1 => Charsets::WINDOWS_1252,
|
43
|
+
Charsets::ISO_8859_9 => Charsets::WINDOWS_1254,
|
44
|
+
Charsets::ISO_8859_11 => Charsets::WINDOWS_874,
|
45
|
+
Charsets::KS_C_5601_1987 => Charsets::WINDOWS_949,
|
46
|
+
Charsets::SHIFT_JIS => Charsets::WINDOWS_31J,
|
47
|
+
Charsets::TIS_620 => Charsets::WINDOWS_874,
|
48
|
+
Charsets::ASCII => Charsets::WINDOWS_1252,
|
49
|
+
}
|
50
|
+
|
51
|
+
mapping.each_pair do |map_from, map_to|
|
52
|
+
assert_equal( map_to, Charsets.expand(map_from), "#{map_from} should map to #{map_to}" )
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
data/test/test_content_source.rb
CHANGED
metadata
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iudex-core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
version: 1.2.
|
4
|
+
prerelease:
|
5
|
+
version: 1.2.1
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- David Kellum
|
@@ -10,16 +10,19 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-09-15 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rjack-slf4j
|
17
17
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
|
-
- -
|
20
|
+
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 1.6.
|
22
|
+
version: 1.6.5
|
23
|
+
- - <
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: "1.8"
|
23
26
|
requirement: *id001
|
24
27
|
prerelease: false
|
25
28
|
type: :runtime
|
@@ -41,7 +44,7 @@ dependencies:
|
|
41
44
|
requirements:
|
42
45
|
- - ~>
|
43
46
|
- !ruby/object:Gem::Version
|
44
|
-
version: 1.6.
|
47
|
+
version: 1.6.1
|
45
48
|
requirement: *id003
|
46
49
|
prerelease: false
|
47
50
|
type: :runtime
|
@@ -52,7 +55,7 @@ dependencies:
|
|
52
55
|
requirements:
|
53
56
|
- - ~>
|
54
57
|
- !ruby/object:Gem::Version
|
55
|
-
version: 1.2.
|
58
|
+
version: 1.2.1
|
56
59
|
requirement: *id004
|
57
60
|
prerelease: false
|
58
61
|
type: :runtime
|
@@ -63,7 +66,7 @@ dependencies:
|
|
63
66
|
requirements:
|
64
67
|
- - ~>
|
65
68
|
- !ruby/object:Gem::Version
|
66
|
-
version: 1.2.
|
69
|
+
version: 1.2.1
|
67
70
|
requirement: *id005
|
68
71
|
prerelease: false
|
69
72
|
type: :runtime
|
@@ -74,7 +77,7 @@ dependencies:
|
|
74
77
|
requirements:
|
75
78
|
- - ~>
|
76
79
|
- !ruby/object:Gem::Version
|
77
|
-
version: 1.2.
|
80
|
+
version: 1.2.1
|
78
81
|
requirement: *id006
|
79
82
|
prerelease: false
|
80
83
|
type: :runtime
|
@@ -96,7 +99,7 @@ dependencies:
|
|
96
99
|
requirements:
|
97
100
|
- - ~>
|
98
101
|
- !ruby/object:Gem::Version
|
99
|
-
version: "1.
|
102
|
+
version: "1.2"
|
100
103
|
requirement: *id008
|
101
104
|
prerelease: false
|
102
105
|
type: :development
|
@@ -140,6 +143,7 @@ files:
|
|
140
143
|
- lib/iudex-core/config.rb
|
141
144
|
- lib/iudex-core/mojibake.rb
|
142
145
|
- test/setup.rb
|
146
|
+
- test/test_charsets.rb
|
143
147
|
- test/test_content_fetcher.rb
|
144
148
|
- test/test_content_source.rb
|
145
149
|
- test/test_log_writer.rb
|
@@ -148,8 +152,8 @@ files:
|
|
148
152
|
- test/test_visit_manager.rb
|
149
153
|
- test/test_visit_queue.rb
|
150
154
|
- test/test_visit_url.rb
|
151
|
-
- lib/iudex-core/iudex-core-1.2.
|
152
|
-
homepage: http://
|
155
|
+
- lib/iudex-core/iudex-core-1.2.1.jar
|
156
|
+
homepage: http://iudex.gravitext.com
|
153
157
|
licenses: []
|
154
158
|
|
155
159
|
post_install_message:
|
@@ -170,13 +174,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
170
174
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
175
|
none: false
|
172
176
|
requirements:
|
173
|
-
- - "
|
177
|
+
- - ">="
|
174
178
|
- !ruby/object:Gem::Version
|
175
|
-
|
179
|
+
hash: 2
|
180
|
+
segments:
|
181
|
+
- 0
|
182
|
+
version: "0"
|
176
183
|
requirements: []
|
177
184
|
|
178
185
|
rubyforge_project:
|
179
|
-
rubygems_version: 1.8.
|
186
|
+
rubygems_version: 1.8.15
|
180
187
|
signing_key:
|
181
188
|
specification_version: 3
|
182
189
|
summary: Iudex is a general purpose web crawler and feed processor in ruby/java.
|
Binary file
|