iudex-core 1.2.b.1-java → 1.2.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.rdoc +10 -2
- data/Manifest.txt +2 -1
- data/README.rdoc +1 -0
- data/build/effective_tld_name.dat +1934 -255
- data/lib/iudex-core/base.rb +1 -1
- data/lib/iudex-core/iudex-core-1.2.1.jar +0 -0
- data/lib/iudex-core/mojibake.rb +1 -1
- data/pom.xml +5 -5
- data/test/test_charsets.rb +56 -0
- data/test/test_content_source.rb +1 -1
- metadata +22 -15
- data/lib/iudex-core/iudex-core-1.2.b.1.jar +0 -0
data/lib/iudex-core/base.rb
CHANGED
Binary file
|
data/lib/iudex-core/mojibake.rb
CHANGED
data/pom.xml
CHANGED
@@ -5,13 +5,13 @@
|
|
5
5
|
<groupId>iudex</groupId>
|
6
6
|
<artifactId>iudex-core</artifactId>
|
7
7
|
<packaging>jar</packaging>
|
8
|
-
<version>1.2.
|
8
|
+
<version>1.2.1</version>
|
9
9
|
<name>Iudex Core System</name>
|
10
10
|
|
11
11
|
<parent>
|
12
12
|
<groupId>iudex</groupId>
|
13
13
|
<artifactId>iudex-parent</artifactId>
|
14
|
-
<version>1.2.
|
14
|
+
<version>1.2.1</version>
|
15
15
|
<relativePath>..</relativePath>
|
16
16
|
</parent>
|
17
17
|
|
@@ -30,19 +30,19 @@
|
|
30
30
|
<dependency>
|
31
31
|
<groupId>iudex</groupId>
|
32
32
|
<artifactId>iudex-filter</artifactId>
|
33
|
-
<version>[1.2,1.2.
|
33
|
+
<version>[1.2.1,1.2.999)</version>
|
34
34
|
</dependency>
|
35
35
|
|
36
36
|
<dependency>
|
37
37
|
<groupId>iudex</groupId>
|
38
38
|
<artifactId>iudex-http</artifactId>
|
39
|
-
<version>[1.2,1.2.
|
39
|
+
<version>[1.2.1,1.2.999)</version>
|
40
40
|
</dependency>
|
41
41
|
|
42
42
|
<dependency>
|
43
43
|
<groupId>iudex</groupId>
|
44
44
|
<artifactId>iudex-barc</artifactId>
|
45
|
-
<version>[1.2,1.2.
|
45
|
+
<version>[1.2.1,1.2.999)</version>
|
46
46
|
</dependency>
|
47
47
|
|
48
48
|
<dependency>
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
#.hashdot.profile += jruby-shortlived
|
4
|
+
|
5
|
+
#--
|
6
|
+
# Copyright (c) 2008-2012 David Kellum
|
7
|
+
#
|
8
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
9
|
+
# may not use this file except in compliance with the License. You
|
10
|
+
# may obtain a copy of the License at
|
11
|
+
#
|
12
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
13
|
+
#
|
14
|
+
# Unless required by applicable law or agreed to in writing, software
|
15
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
16
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
17
|
+
# implied. See the License for the specific language governing
|
18
|
+
# permissions and limitations under the License.
|
19
|
+
#++
|
20
|
+
|
21
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
22
|
+
require 'iudex-core'
|
23
|
+
|
24
|
+
class TestCharsets < MiniTest::Unit::TestCase
|
25
|
+
include Gravitext::HTMap
|
26
|
+
|
27
|
+
import 'iudex.util.Charsets'
|
28
|
+
|
29
|
+
UniMap.define_accessors
|
30
|
+
|
31
|
+
def test_default
|
32
|
+
assert_equal( Charsets::WINDOWS_1252, Charsets.default_charset )
|
33
|
+
end
|
34
|
+
|
35
|
+
# Test all mappings from the whatwg guidelines that are supported by the JVM.
|
36
|
+
#
|
37
|
+
# @see http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#character-encodings-0
|
38
|
+
def test_expand
|
39
|
+
mapping = {
|
40
|
+
Charsets::EUC_KR => Charsets::WINDOWS_949,
|
41
|
+
Charsets::GB2312 => Charsets::GBK,
|
42
|
+
Charsets::ISO_8859_1 => Charsets::WINDOWS_1252,
|
43
|
+
Charsets::ISO_8859_9 => Charsets::WINDOWS_1254,
|
44
|
+
Charsets::ISO_8859_11 => Charsets::WINDOWS_874,
|
45
|
+
Charsets::KS_C_5601_1987 => Charsets::WINDOWS_949,
|
46
|
+
Charsets::SHIFT_JIS => Charsets::WINDOWS_31J,
|
47
|
+
Charsets::TIS_620 => Charsets::WINDOWS_874,
|
48
|
+
Charsets::ASCII => Charsets::WINDOWS_1252,
|
49
|
+
}
|
50
|
+
|
51
|
+
mapping.each_pair do |map_from, map_to|
|
52
|
+
assert_equal( map_to, Charsets.expand(map_from), "#{map_from} should map to #{map_to}" )
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
data/test/test_content_source.rb
CHANGED
metadata
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iudex-core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
version: 1.2.
|
4
|
+
prerelease:
|
5
|
+
version: 1.2.1
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- David Kellum
|
@@ -10,16 +10,19 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-09-15 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rjack-slf4j
|
17
17
|
version_requirements: &id001 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
|
-
- -
|
20
|
+
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 1.6.
|
22
|
+
version: 1.6.5
|
23
|
+
- - <
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: "1.8"
|
23
26
|
requirement: *id001
|
24
27
|
prerelease: false
|
25
28
|
type: :runtime
|
@@ -41,7 +44,7 @@ dependencies:
|
|
41
44
|
requirements:
|
42
45
|
- - ~>
|
43
46
|
- !ruby/object:Gem::Version
|
44
|
-
version: 1.6.
|
47
|
+
version: 1.6.1
|
45
48
|
requirement: *id003
|
46
49
|
prerelease: false
|
47
50
|
type: :runtime
|
@@ -52,7 +55,7 @@ dependencies:
|
|
52
55
|
requirements:
|
53
56
|
- - ~>
|
54
57
|
- !ruby/object:Gem::Version
|
55
|
-
version: 1.2.
|
58
|
+
version: 1.2.1
|
56
59
|
requirement: *id004
|
57
60
|
prerelease: false
|
58
61
|
type: :runtime
|
@@ -63,7 +66,7 @@ dependencies:
|
|
63
66
|
requirements:
|
64
67
|
- - ~>
|
65
68
|
- !ruby/object:Gem::Version
|
66
|
-
version: 1.2.
|
69
|
+
version: 1.2.1
|
67
70
|
requirement: *id005
|
68
71
|
prerelease: false
|
69
72
|
type: :runtime
|
@@ -74,7 +77,7 @@ dependencies:
|
|
74
77
|
requirements:
|
75
78
|
- - ~>
|
76
79
|
- !ruby/object:Gem::Version
|
77
|
-
version: 1.2.
|
80
|
+
version: 1.2.1
|
78
81
|
requirement: *id006
|
79
82
|
prerelease: false
|
80
83
|
type: :runtime
|
@@ -96,7 +99,7 @@ dependencies:
|
|
96
99
|
requirements:
|
97
100
|
- - ~>
|
98
101
|
- !ruby/object:Gem::Version
|
99
|
-
version: "1.
|
102
|
+
version: "1.2"
|
100
103
|
requirement: *id008
|
101
104
|
prerelease: false
|
102
105
|
type: :development
|
@@ -140,6 +143,7 @@ files:
|
|
140
143
|
- lib/iudex-core/config.rb
|
141
144
|
- lib/iudex-core/mojibake.rb
|
142
145
|
- test/setup.rb
|
146
|
+
- test/test_charsets.rb
|
143
147
|
- test/test_content_fetcher.rb
|
144
148
|
- test/test_content_source.rb
|
145
149
|
- test/test_log_writer.rb
|
@@ -148,8 +152,8 @@ files:
|
|
148
152
|
- test/test_visit_manager.rb
|
149
153
|
- test/test_visit_queue.rb
|
150
154
|
- test/test_visit_url.rb
|
151
|
-
- lib/iudex-core/iudex-core-1.2.
|
152
|
-
homepage: http://
|
155
|
+
- lib/iudex-core/iudex-core-1.2.1.jar
|
156
|
+
homepage: http://iudex.gravitext.com
|
153
157
|
licenses: []
|
154
158
|
|
155
159
|
post_install_message:
|
@@ -170,13 +174,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
170
174
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
175
|
none: false
|
172
176
|
requirements:
|
173
|
-
- - "
|
177
|
+
- - ">="
|
174
178
|
- !ruby/object:Gem::Version
|
175
|
-
|
179
|
+
hash: 2
|
180
|
+
segments:
|
181
|
+
- 0
|
182
|
+
version: "0"
|
176
183
|
requirements: []
|
177
184
|
|
178
185
|
rubyforge_project:
|
179
|
-
rubygems_version: 1.8.
|
186
|
+
rubygems_version: 1.8.15
|
180
187
|
signing_key:
|
181
188
|
specification_version: 3
|
182
189
|
summary: Iudex is a general purpose web crawler and feed processor in ruby/java.
|
Binary file
|