iudex-core 1.0.0-java → 1.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
data/History.rdoc CHANGED
@@ -1,2 +1,23 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Update to iudex-filter,http,barc ~> 1.1.0
3
+ * ContentFetcher updates for iudex-http changes
4
+ * New MojiBakeFilter, MojiBakeMapper with config table loading support
5
+ * Replaced VisitExecutor with asynchronous client compatible
6
+ VisitManager
7
+ * Visit/HostQueue acquire/release for concurrecy and per host settings
8
+ * VisitQueue uses VisitURL.domain (registration level) host keys
9
+ * New VisitCounter interface
10
+ * New RedirectHandler and Revisitor filters for direct redirect
11
+ handling with filter access
12
+ * VisitURL.resolve for redirect support
13
+ * Drop now redundant RLDomainFilter and RL_DOMAIN key
14
+ * Add domain to iudex-url-norm output
15
+ * Add encoding confidence map to ContentSource (supports encoding
16
+ detection)
17
+ * Add U+2060 WORD JOINER to Characters.ctrlWS list
18
+ * Update to minitest ~> 2.3
19
+ * Update to gravitext-util ~> 1.5.1 (for UniMap.toString)
20
+ * Update TLDSets based on upstream 9411dffc948b (2011-09-02)
21
+
1
22
  === 1.0.0 (2011-04-04)
2
23
  * Initial release.
data/Manifest.txt CHANGED
@@ -9,10 +9,18 @@ build/TLDSets.java.erb
9
9
  build/effective_tld_name.dat
10
10
  build/tld_set_generator.rb
11
11
  config/config.rb
12
+ config/mojibake
12
13
  lib/iudex-core/base.rb
13
14
  lib/iudex-core.rb
14
15
  lib/iudex-core/config.rb
16
+ lib/iudex-core/mojibake.rb
15
17
  test/setup.rb
16
18
  test/test_content_fetcher.rb
19
+ test/test_content_source.rb
17
20
  test/test_log_writer.rb
18
- lib/iudex-core/iudex-core-1.0.0.jar
21
+ test/test_mojibake.rb
22
+ test/test_redirect_handler.rb
23
+ test/test_visit_manager.rb
24
+ test/test_visit_queue.rb
25
+ test/test_visit_url.rb
26
+ lib/iudex-core/iudex-core-1.1.0.jar
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
4
4
  require 'iudex-core/base'
5
5
 
6
6
  require 'rubygems'
7
- gem 'rjack-tarpit', '~> 1.2'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
8
  require 'rjack-tarpit'
9
9
 
10
10
  t = RJack::TarPit.new( 'iudex-core',
@@ -15,13 +15,13 @@ t.specify do |h|
15
15
  h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
16
  h.extra_deps += [ [ 'rjack-slf4j', '~> 1.6.1' ],
17
17
  [ 'hooker', '~> 1.0.0' ],
18
- [ 'gravitext-util', '~> 1.5.0' ],
19
- [ 'iudex-filter', '~> 1.0.0' ],
20
- [ 'iudex-http', '~> 1.0.0' ],
21
- [ 'iudex-barc', '~> 1.0.0' ] ]
18
+ [ 'gravitext-util', '~> 1.5.1' ],
19
+ [ 'iudex-filter', '~> 1.1.0' ],
20
+ [ 'iudex-http', '~> 1.1.0' ],
21
+ [ 'iudex-barc', '~> 1.1.0' ] ]
22
22
 
23
23
  h.testlib = :minitest
24
- h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
24
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
25
25
  [ 'rjack-logback', '~> 1.0' ] ]
26
26
  end
27
27
 
@@ -34,7 +34,7 @@ module IudexBinScript
34
34
  Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
35
35
 
36
36
  OptionParser.new do |opts|
37
- opts.on( "-v", "--version", "Display version" ) do |file|
37
+ opts.on( "-v", "--version", "Display version" ) do
38
38
  puts "iudex-core: #{ Core::VERSION }"
39
39
  end
40
40
  Hooker.register_config( opts )
data/bin/iudex-url-norm CHANGED
@@ -34,14 +34,14 @@ module IudexBinScript
34
34
 
35
35
  OptionParser.new do |opts|
36
36
  opts.banner = "Usage: iudex-url-norm [options] [UrlsFile]..."
37
- opts.on( "-v", "--version", "Display version" ) do |file|
37
+ opts.on( "-v", "--version", "Display version" ) do
38
38
  puts "iudex-core: #{Core::VERSION}"
39
39
  exit 1
40
40
  end
41
41
  opts.on_tail( "-h", "--help", "Show help and exit" ) do
42
42
  puts opts
43
43
  puts
44
- puts( "Write uhash and normalized URLs to STDOUT, " +
44
+ puts( "Write uhash, domain, and normalized URLs to STDOUT, " +
45
45
  "from UrlsFile(s) or STDIN." )
46
46
  exit 1
47
47
  end
@@ -62,8 +62,8 @@ module IudexBinScript
62
62
 
63
63
  def self.process( fin )
64
64
  fin.each do |url|
65
- vurl = Core::VisitURL.normalize( url.chomp )
66
- puts vurl.uhash + ' ' + vurl.to_s
65
+ vurl = Core::VisitURL.normalize( url )
66
+ puts '%23s %24s %s' % [ vurl.uhash, vurl.domain, vurl ]
67
67
  end
68
68
  end
69
69