iudex-core 1.0.0-java → 1.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemtest ADDED
File without changes
data/History.rdoc CHANGED
@@ -1,2 +1,23 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Update to iudex-filter,http,barc ~> 1.1.0
3
+ * ContentFetcher updates for iudex-http changes
4
+ * New MojiBakeFilter, MojiBakeMapper with config table loading support
5
+ * Replaced VisitExecutor with asynchronous client compatible
6
+ VisitManager
7
+ * Visit/HostQueue acquire/release for concurrecy and per host settings
8
+ * VisitQueue uses VisitURL.domain (registration level) host keys
9
+ * New VisitCounter interface
10
+ * New RedirectHandler and Revisitor filters for direct redirect
11
+ handling with filter access
12
+ * VisitURL.resolve for redirect support
13
+ * Drop now redundant RLDomainFilter and RL_DOMAIN key
14
+ * Add domain to iudex-url-norm output
15
+ * Add encoding confidence map to ContentSource (supports encoding
16
+ detection)
17
+ * Add U+2060 WORD JOINER to Characters.ctrlWS list
18
+ * Update to minitest ~> 2.3
19
+ * Update to gravitext-util ~> 1.5.1 (for UniMap.toString)
20
+ * Update TLDSets based on upstream 9411dffc948b (2011-09-02)
21
+
1
22
  === 1.0.0 (2011-04-04)
2
23
  * Initial release.
data/Manifest.txt CHANGED
@@ -9,10 +9,18 @@ build/TLDSets.java.erb
9
9
  build/effective_tld_name.dat
10
10
  build/tld_set_generator.rb
11
11
  config/config.rb
12
+ config/mojibake
12
13
  lib/iudex-core/base.rb
13
14
  lib/iudex-core.rb
14
15
  lib/iudex-core/config.rb
16
+ lib/iudex-core/mojibake.rb
15
17
  test/setup.rb
16
18
  test/test_content_fetcher.rb
19
+ test/test_content_source.rb
17
20
  test/test_log_writer.rb
18
- lib/iudex-core/iudex-core-1.0.0.jar
21
+ test/test_mojibake.rb
22
+ test/test_redirect_handler.rb
23
+ test/test_visit_manager.rb
24
+ test/test_visit_queue.rb
25
+ test/test_visit_url.rb
26
+ lib/iudex-core/iudex-core-1.1.0.jar
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
4
4
  require 'iudex-core/base'
5
5
 
6
6
  require 'rubygems'
7
- gem 'rjack-tarpit', '~> 1.2'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
8
  require 'rjack-tarpit'
9
9
 
10
10
  t = RJack::TarPit.new( 'iudex-core',
@@ -15,13 +15,13 @@ t.specify do |h|
15
15
  h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
16
  h.extra_deps += [ [ 'rjack-slf4j', '~> 1.6.1' ],
17
17
  [ 'hooker', '~> 1.0.0' ],
18
- [ 'gravitext-util', '~> 1.5.0' ],
19
- [ 'iudex-filter', '~> 1.0.0' ],
20
- [ 'iudex-http', '~> 1.0.0' ],
21
- [ 'iudex-barc', '~> 1.0.0' ] ]
18
+ [ 'gravitext-util', '~> 1.5.1' ],
19
+ [ 'iudex-filter', '~> 1.1.0' ],
20
+ [ 'iudex-http', '~> 1.1.0' ],
21
+ [ 'iudex-barc', '~> 1.1.0' ] ]
22
22
 
23
23
  h.testlib = :minitest
24
- h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
24
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
25
25
  [ 'rjack-logback', '~> 1.0' ] ]
26
26
  end
27
27
 
@@ -34,7 +34,7 @@ module IudexBinScript
34
34
  Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
35
35
 
36
36
  OptionParser.new do |opts|
37
- opts.on( "-v", "--version", "Display version" ) do |file|
37
+ opts.on( "-v", "--version", "Display version" ) do
38
38
  puts "iudex-core: #{ Core::VERSION }"
39
39
  end
40
40
  Hooker.register_config( opts )
data/bin/iudex-url-norm CHANGED
@@ -34,14 +34,14 @@ module IudexBinScript
34
34
 
35
35
  OptionParser.new do |opts|
36
36
  opts.banner = "Usage: iudex-url-norm [options] [UrlsFile]..."
37
- opts.on( "-v", "--version", "Display version" ) do |file|
37
+ opts.on( "-v", "--version", "Display version" ) do
38
38
  puts "iudex-core: #{Core::VERSION}"
39
39
  exit 1
40
40
  end
41
41
  opts.on_tail( "-h", "--help", "Show help and exit" ) do
42
42
  puts opts
43
43
  puts
44
- puts( "Write uhash and normalized URLs to STDOUT, " +
44
+ puts( "Write uhash, domain, and normalized URLs to STDOUT, " +
45
45
  "from UrlsFile(s) or STDIN." )
46
46
  exit 1
47
47
  end
@@ -62,8 +62,8 @@ module IudexBinScript
62
62
 
63
63
  def self.process( fin )
64
64
  fin.each do |url|
65
- vurl = Core::VisitURL.normalize( url.chomp )
66
- puts vurl.uhash + ' ' + vurl.to_s
65
+ vurl = Core::VisitURL.normalize( url )
66
+ puts '%23s %24s %s' % [ vurl.uhash, vurl.domain, vurl ]
67
67
  end
68
68
  end
69
69