iudex-char-detector 1.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemtest ADDED
File without changes
data/History.rdoc ADDED
@@ -0,0 +1,2 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Initial release with Iudex 1.1.x.
data/Manifest.txt ADDED
@@ -0,0 +1,16 @@
1
+ History.rdoc
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ pom.xml
6
+ bin/iudex-char-detect
7
+ lib/iudex-char-detector/base.rb
8
+ lib/iudex-char-detector.rb
9
+ test/sample.html.ascii
10
+ test/sample.html.iso
11
+ test/sample.html.utf16
12
+ test/sample.html.utf16le
13
+ test/sample.html.utf8
14
+ test/setup.rb
15
+ test/test_char_detector.rb
16
+ lib/iudex-char-detector/iudex-char-detector-1.1.0.jar
data/README.rdoc ADDED
@@ -0,0 +1,25 @@
1
+ = iudex-char-detector
2
+
3
+ * http://github.com/dekellum/iudex
4
+
5
+ == Description
6
+
7
+ Iudex is a general purpose web crawler and feed processor in
8
+ ruby/java. The iudex-char-detector gem provides charset detection
9
+ support.
10
+
11
+ == License
12
+
13
+ Copyright (c) 2011 David Kellum
14
+
15
+ Licensed under the Apache License, Version 2.0 (the "License"); you
16
+ may not use this file except in compliance with the License. You
17
+ may obtain a copy of the License at:
18
+
19
+ http://www.apache.org/licenses/LICENSE-2.0
20
+
21
+ Unless required by applicable law or agreed to in writing, software
22
+ distributed under the License is distributed on an "AS IS" BASIS,
23
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
24
+ implied. See the License for the specific language governing
25
+ permissions and limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,40 @@
1
+ # -*- ruby -*-
2
+
3
+ $LOAD_PATH << './lib'
4
+ require 'iudex-char-detector/base'
5
+
6
+ require 'rubygems'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
+ require 'rjack-tarpit'
9
+
10
+ t = RJack::TarPit.new( 'iudex-char-detector',
11
+ Iudex::CharDetector::VERSION,
12
+ :no_assembly, :java_platform )
13
+
14
+ t.specify do |h|
15
+ h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
+ h.extra_deps += [ [ 'iudex-core', '~> 1.1.0' ],
17
+ [ 'rjack-icu', '~> 4.8.0' ] ]
18
+
19
+ h.testlib = :minitest
20
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
21
+ [ 'rjack-logback', '~> 1.0' ] ]
22
+ end
23
+
24
+ file 'Manifest.txt' => [ 'pom.xml' ]
25
+
26
+ task :check_pom_version do
27
+ t.test_line_match( 'pom.xml', /<version>/, /#{t.version}/ )
28
+ end
29
+ task :check_history_version do
30
+ t.test_line_match( 'History.rdoc', /^==/, / #{t.version} / )
31
+ end
32
+ task :check_history_date do
33
+ t.test_line_match( 'History.rdoc', /^==/, /\([0-9\-]+\)$/ )
34
+ end
35
+
36
+ task :gem => [ :check_pom_version, :check_history_version ]
37
+ task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
38
+ task :push => [ :check_history_date ]
39
+
40
+ t.define_tasks
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env jruby
2
+ # -*- ruby -*-
3
+
4
+ #--
5
+ # Copyright (c) 2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You
9
+ # may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+
20
+ $LOAD_PATH.unshift File.join( File.dirname(__FILE__), "..", "lib" )
21
+
22
+ require 'rubygems'
23
+ require 'optparse'
24
+
25
+ class ChartDetectUtil
26
+
27
+ require 'rjack-logback'
28
+ include RJack
29
+
30
+ Logback.config_console( :level => Logback::INFO, :stderr => true )
31
+
32
+ require 'iudex-char-detector'
33
+ include Iudex
34
+
35
+ include Gravitext::HTMap
36
+ UniMap.define_accessors
37
+
38
+ include Iudex::Core
39
+ include Iudex::CharDetector
40
+
41
+ import 'java.nio.ByteBuffer'
42
+ import 'java.nio.charset.Charset'
43
+
44
+ def initialize
45
+ @default_encode = "windows-1252"
46
+ end
47
+
48
+ def run( args = ARGV )
49
+
50
+ p = OptionParser.new do |opts|
51
+ opts.banner = "Usage: iudex-char-detect [options] [TestFile]"
52
+ opts.on( "-v", "--version", "Display version" ) do
53
+ puts "iudex-char-detector: #{CharDetector::VERSION}"
54
+ exit 1
55
+ end
56
+ opts.on( "-d", "--debug" ) do
57
+ Logback[ 'iudex' ].level = Logback::DEBUG
58
+ end
59
+ opts.on_tail( "-e", "--encoding ENCODING", String,
60
+ "Set default encoding (#{@default_encode})" ) do |enc|
61
+ @default_encode = enc
62
+ end
63
+ opts.on_tail( "-h", "--help", "Show help and exit" ) do
64
+ puts opts
65
+ puts
66
+ puts( "Detect charset of File and report encoding, confidence " +
67
+ "(independent of HTML features)" )
68
+ exit 1
69
+ end
70
+ end
71
+
72
+ p.parse!( args )
73
+
74
+ if args.empty?
75
+ show_detail( detect( $stdin ) )
76
+ else
77
+ max_name = args.map { |fn| fn.length }.max
78
+ args.each do |fname|
79
+ res = open( fname, "r" ) { |fin| detect( fin ) }
80
+ if ARGV.length > 1
81
+ res.unshift( fname )
82
+ puts "%-#{ max_name }s : %-12s (%5.4f)" % res
83
+ else
84
+ show_detail( res )
85
+ end
86
+ end
87
+ end
88
+
89
+ end
90
+
91
+ def detect( fin )
92
+ map = UniMap.new
93
+ bytes = ByteBuffer::wrap( fin.read.to_java_bytes )
94
+ map.source = ContentSource.new( bytes )
95
+ map.source.set_default_encoding( Charset::lookup( @default_encode ) )
96
+
97
+ df = CharDetectFilter.new
98
+ #FIXME: Option? df.max_detect_length =
99
+ df.filter( map )
100
+ s = map.source
101
+
102
+ [ s.default_encoding.name,
103
+ s.encoding_confidence,
104
+ s.encoding_confidences ]
105
+ end
106
+
107
+ def show_detail( res )
108
+ puts "%-12s (%.4f)" % res
109
+ puts "====================="
110
+ res[2].each do |enc,conf|
111
+ puts "%-12s (%.4f)" % [ enc, conf ]
112
+ end
113
+ end
114
+
115
+ end
116
+
117
+ ChartDetectUtil.new.run
@@ -0,0 +1,21 @@
1
+ #--
2
+ # Copyright (c) 2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You may
6
+ # obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ module Iudex
18
+ module CharDetector
19
+ VERSION = '1.1.0'
20
+ end
21
+ end
@@ -0,0 +1,30 @@
1
+ #--
2
+ # Copyright (c) 2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You may
6
+ # obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-core'
18
+ require 'rjack-icu'
19
+
20
+ require 'iudex-char-detector/base.rb'
21
+
22
+ require 'java'
23
+
24
+ module Iudex
25
+ module CharDetector
26
+ require "iudex-char-detector/iudex-char-detector-#{VERSION}.jar"
27
+
28
+ import 'iudex.chardetector.CharDetectFilter'
29
+ end
30
+ end
data/pom.xml ADDED
@@ -0,0 +1,45 @@
1
+ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
2
+ <modelVersion>4.0.0</modelVersion>
3
+ <groupId>iudex</groupId>
4
+ <artifactId>iudex-char-detector</artifactId>
5
+ <packaging>jar</packaging>
6
+ <version>1.1.0</version>
7
+ <name>Iudex charset detection support</name>
8
+
9
+ <parent>
10
+ <groupId>iudex</groupId>
11
+ <artifactId>iudex-parent</artifactId>
12
+ <version>1.1</version>
13
+ <relativePath>..</relativePath>
14
+ </parent>
15
+
16
+ <dependencies>
17
+
18
+ <dependency>
19
+ <groupId>iudex</groupId>
20
+ <artifactId>iudex-core</artifactId>
21
+ <version>[1.1,1.2)</version>
22
+ </dependency>
23
+
24
+ <dependency>
25
+ <groupId>com.ibm.icu</groupId>
26
+ <artifactId>icu4j</artifactId>
27
+ <version>[4.8,4.8.9999]</version>
28
+ </dependency>
29
+
30
+ </dependencies>
31
+
32
+ <build>
33
+ <plugins>
34
+ <plugin>
35
+ <!-- Parent settings -->
36
+ <artifactId>maven-compiler-plugin</artifactId>
37
+ </plugin>
38
+ <plugin>
39
+ <!-- Parent settings -->
40
+ <artifactId>maven-source-plugin</artifactId>
41
+ </plugin>
42
+ </plugins>
43
+ </build>
44
+
45
+ </project>
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <title>Un documento electronica (titulo en ASCII)</title>
4
+ </head>
5
+ <body>
6
+ <p>De donde eres tu?</p>
7
+ </body>
8
+ </html>
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <title>Un documento electronica (titulo en ASCII)</title>
4
+ </head>
5
+ <body>
6
+ <p>�De donde eres t�?</p>
7
+ </body>
8
+ </html>
Binary file
Binary file
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <title>Un documento electronica (titulo en ASCII)</title>
4
+ </head>
5
+ <body>
6
+ <p>¿De donde eres tú?</p>
7
+ </body>
8
+ </html>
data/test/setup.rb ADDED
@@ -0,0 +1,39 @@
1
+ #--
2
+ # Copyright (c) 2010-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ #### General test setup: LOAD_PATH, logging, console output ####
18
+
19
+ test_dir = File.dirname( __FILE__ )
20
+
21
+ ldir = File.join( test_dir, "..", "lib" )
22
+ $LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
23
+
24
+ require 'rubygems'
25
+ require 'rjack-logback'
26
+ RJack::Logback.config_console( :stderr => true )
27
+ if ARGV.include?( '--verbose' ) || ARGV.include?( '-v' )
28
+ RJack::Logback.root.level = RJack::Logback::DEBUG
29
+ end
30
+
31
+ require 'minitest/unit'
32
+ require 'minitest/autorun'
33
+
34
+ # Make test output logging compatible: no partial lines.
35
+ # class TestOut
36
+ # def print( *a ); $stdout.puts( *a ); end
37
+ # def puts( *a ); $stdout.puts( *a ); end
38
+ # end
39
+ # MiniTest::Unit.output = TestOut.new
@@ -0,0 +1,156 @@
1
+ #!/usr/bin/env jruby
2
+ # -*- coding: utf-8 -*-
3
+ #.hashdot.profile += jruby-shortlived
4
+
5
+ #--
6
+ # Copyright (c) 2011 David Kellum
7
+ #
8
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
9
+ # may not use this file except in compliance with the License. You
10
+ # may obtain a copy of the License at
11
+ #
12
+ # http://www.apache.org/licenses/LICENSE-2.0
13
+ #
14
+ # Unless required by applicable law or agreed to in writing, software
15
+ # distributed under the License is distributed on an "AS IS" BASIS,
16
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
17
+ # implied. See the License for the specific language governing
18
+ # permissions and limitations under the License.
19
+ #++
20
+
21
+ require File.join( File.dirname( __FILE__ ), "setup" )
22
+ require 'iudex-char-detector'
23
+
24
+ class TestCharDetector < MiniTest::Unit::TestCase
25
+ include Gravitext::HTMap
26
+ UniMap.define_accessors
27
+
28
+ include Iudex::Core
29
+ include Iudex::CharDetector
30
+
31
+ import 'java.nio.ByteBuffer'
32
+ import 'java.nio.charset.Charset'
33
+ JString = Java::java.lang.String
34
+
35
+ SHORT_HTML = <<HTML
36
+ <html>
37
+ <head>
38
+ <title>Un documento electronica (titulo en ASCII)</title>
39
+ </head>
40
+ <body>
41
+ <p>¿De donde eres tú?</p>
42
+ </body>
43
+ </html>
44
+ HTML
45
+
46
+ def test_find_nothing
47
+ df = CharDetectFilter.new
48
+ df.max_detect_length = 3
49
+
50
+ [ "", "a", "ascii" ].each do |ib|
51
+ assert_nil( df.find_detect_buffer( wrap( ib ) ), ib )
52
+ end
53
+ end
54
+
55
+ def test_find_something
56
+ df = CharDetectFilter.new
57
+ df.max_detect_length = 3
58
+
59
+ trials = [ %w[ á á ],
60
+ %w[ é. é. ],
61
+ %w[ ..ü ..ü ],
62
+ %w[ ..ü0 ..ü ],
63
+ %w[ 0..í ..í ],
64
+ %w[ 0..ó0 ..ó ] ]
65
+
66
+ trials.each do |ib,ob|
67
+ out = df.find_detect_buffer( wrap( encode_as( ib, "ISO-8859-1" ) ) )
68
+ assert( out, ob )
69
+ assert_equal( ob, JString.new( out, "ISO-8859-1" ).to_s, ob )
70
+ end
71
+
72
+ end
73
+
74
+ def test_ascii
75
+ map = detect_from( "", "UTF-8" )
76
+ assert_encoding( map.source, "UTF-8", 0.0 )
77
+
78
+ map = detect_from( "ascii", "UTF-8" )
79
+ assert_encoding( map.source, "UTF-8", 0.0 )
80
+ end
81
+
82
+ def test_html_utf8_as_default
83
+ map = detect_from( SHORT_HTML, "UTF-8" )
84
+ assert_encoding( map.source, "UTF-8", 0.80 )
85
+ end
86
+
87
+ def test_html_utf8_wrong_default
88
+ map = detect_from( SHORT_HTML, "UTF-8", "ISO-8859-1" )
89
+ assert_encoding( map.source, "UTF-8", 0.80 )
90
+ end
91
+
92
+ def test_html_iso_as_default
93
+ map = detect_from( SHORT_HTML, "ISO-8859-1" )
94
+ assert_encoding( map.source, "ISO-8859-1", 0.40 )
95
+ end
96
+
97
+ def test_html_iso_wrong_default
98
+ map = detect_from( SHORT_HTML, "ISO-8859-1", "UTF-8" )
99
+ assert_encoding( map.source, "ISO-8859-1", 0.40 )
100
+ end
101
+
102
+ def test_html_iso_from_windows
103
+ map = detect_from( SHORT_HTML, "windows-1252" )
104
+ assert_encoding( map.source, "ISO-8859-1", 0.40 )
105
+ end
106
+
107
+ def test_windows_default
108
+ map = detect_from( '“¿De donde eres tú?”', "windows-1252" )
109
+ assert_encoding( map.source, "windows-1252", 0.90 )
110
+ end
111
+
112
+ def test_windows_wrong_default
113
+ map = detect_from( '“¿De donde eres tú?”', "windows-1252", "UTF-8" )
114
+ assert_encoding( map.source, "windows-1252", 0.90 )
115
+ end
116
+
117
+ def test_mojibaked_utf8
118
+ map = detect_from( '“quoted”', "UTF-8" )
119
+ assert_encoding( map.source, "UTF-8", 0.99 )
120
+ end
121
+
122
+ def detect_from( bytes, enc, claimed_enc = nil )
123
+ map = content( encode_as( bytes, enc ), claimed_enc || enc )
124
+ df = CharDetectFilter.new
125
+ df.max_detect_length = SHORT_HTML.length - 20
126
+ assert( df.filter( map ) )
127
+ map
128
+ end
129
+
130
+ def assert_encoding( source, enc, min_confidence = 0.10 )
131
+ assert_equal( enc, source.default_encoding.name )
132
+ assert_operator( source.encoding_confidence, :>=, min_confidence )
133
+ end
134
+
135
+ def encode_as( bytes, encoding )
136
+ if encoding == "UTF-8"
137
+ bytes
138
+ else
139
+ bytes = bytes.to_java_bytes if bytes.respond_to?( :to_java_bytes )
140
+ JString.new( bytes, "UTF-8" ).bytes( encoding )
141
+ end
142
+ end
143
+
144
+ def content( bytes, charset = "UTF-8" )
145
+ map = UniMap.new
146
+ map.source = ContentSource.new( wrap( bytes ) )
147
+ map.source.set_default_encoding( Charset::lookup( charset ) )
148
+ map
149
+ end
150
+
151
+ def wrap( bytes )
152
+ bytes = bytes.to_java_bytes if bytes.respond_to?( :to_java_bytes )
153
+ ByteBuffer::wrap( bytes )
154
+ end
155
+
156
+ end
metadata ADDED
@@ -0,0 +1,131 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iudex-char-detector
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 1.1.0
6
+ platform: java
7
+ authors:
8
+ - David Kellum
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-11-13 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: iudex-core
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ~>
22
+ - !ruby/object:Gem::Version
23
+ version: 1.1.0
24
+ type: :runtime
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: rjack-icu
28
+ prerelease: false
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ~>
33
+ - !ruby/object:Gem::Version
34
+ version: 4.8.0
35
+ type: :runtime
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: minitest
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: "2.3"
46
+ type: :development
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: rjack-logback
50
+ prerelease: false
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ version: "1.0"
57
+ type: :development
58
+ version_requirements: *id004
59
+ - !ruby/object:Gem::Dependency
60
+ name: rjack-tarpit
61
+ prerelease: false
62
+ requirement: &id005 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ~>
66
+ - !ruby/object:Gem::Version
67
+ version: 1.4.0
68
+ type: :development
69
+ version_requirements: *id005
70
+ description: |-
71
+ Iudex is a general purpose web crawler and feed processor in
72
+ ruby/java. The iudex-char-detector gem provides charset detection
73
+ support.
74
+ email:
75
+ - dek-oss@gravitext.com
76
+ executables:
77
+ - iudex-char-detect
78
+ extensions: []
79
+
80
+ extra_rdoc_files:
81
+ - Manifest.txt
82
+ - History.rdoc
83
+ - README.rdoc
84
+ files:
85
+ - History.rdoc
86
+ - Manifest.txt
87
+ - README.rdoc
88
+ - Rakefile
89
+ - pom.xml
90
+ - bin/iudex-char-detect
91
+ - lib/iudex-char-detector/base.rb
92
+ - lib/iudex-char-detector.rb
93
+ - test/sample.html.ascii
94
+ - test/sample.html.iso
95
+ - test/sample.html.utf16
96
+ - test/sample.html.utf16le
97
+ - test/sample.html.utf8
98
+ - test/setup.rb
99
+ - test/test_char_detector.rb
100
+ - lib/iudex-char-detector/iudex-char-detector-1.1.0.jar
101
+ - .gemtest
102
+ homepage: http://github.com/dekellum/iudex
103
+ licenses: []
104
+
105
+ post_install_message:
106
+ rdoc_options:
107
+ - --main
108
+ - README.rdoc
109
+ require_paths:
110
+ - lib
111
+ required_ruby_version: !ruby/object:Gem::Requirement
112
+ none: false
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: "0"
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ none: false
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: "0"
123
+ requirements: []
124
+
125
+ rubyforge_project: iudex-char-detector
126
+ rubygems_version: 1.8.9
127
+ signing_key:
128
+ specification_version: 3
129
+ summary: Iudex is a general purpose web crawler and feed processor in ruby/java
130
+ test_files:
131
+ - test/test_char_detector.rb