iudex-core 1.2.1-java → 1.3.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/History.rdoc CHANGED
@@ -1,3 +1,14 @@
1
+ === 1.3.0 (2012-11-8)
2
+ * Add DomainKey with optional :type to support configuring of
3
+ a :domain,:type specific HostQueue
4
+ * Add VisitQueue.config( options ) extensions for cleaner
5
+ configuration of HostQueue with type, rate, etc.
6
+ * Update TLDSets based on upstream c61f326ad19f 2012-10-25
7
+ * Upgrade to gravitext-util ~> 1.7.0
8
+ * Upgrade to slf4j ~> 1.7.0, logback ~> 1.5 (dev)
9
+ * Misc java logging simplifications with slf4j 1.7 varargs
10
+ * Expose GenericWorkPollStrategy.log()
11
+
1
12
  === 1.2.1 (2012-9-15)
2
13
  * Upgrade/narrow to gravitext-util ~> 1.6.1
3
14
  * Upgrade to slf4j [1.6.5,1.8), logback ~> 1.2 (dev)
data/Manifest.txt CHANGED
@@ -14,6 +14,7 @@ lib/iudex-core/base.rb
14
14
  lib/iudex-core.rb
15
15
  lib/iudex-core/config.rb
16
16
  lib/iudex-core/mojibake.rb
17
+ lib/iudex-core/visit_queue.rb
17
18
  test/setup.rb
18
19
  test/test_charsets.rb
19
20
  test/test_content_fetcher.rb
@@ -24,4 +25,4 @@ test/test_redirect_handler.rb
24
25
  test/test_visit_manager.rb
25
26
  test/test_visit_queue.rb
26
27
  test/test_visit_url.rb
27
- lib/iudex-core/iudex-core-1.2.1.jar
28
+ lib/iudex-core/iudex-core-1.3.0.jar
data/Rakefile CHANGED
@@ -18,6 +18,7 @@ task :clean do
18
18
  rm_f 'src/main/java/iudex/core/TLDSets.java'
19
19
  end
20
20
 
21
+ desc "Download and install latest effective_tld_name.dat"
21
22
  task :refresh_tld_dat do
22
23
  sh( "curl http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1" +
23
24
  " -o build/effective_tld_name.dat" )
@@ -219,7 +219,6 @@ net.au
219
219
  org.au
220
220
  edu.au
221
221
  gov.au
222
- csiro.au
223
222
  asn.au
224
223
  id.au
225
224
  // Historic 2LDs (closed to new registration, but sites still exist)
@@ -950,9 +949,15 @@ gov.gr
950
949
  // gs : http://en.wikipedia.org/wiki/.gs
951
950
  gs
952
951
 
953
- // gt : http://www.gt/politicas.html
954
- *.gt
955
- !www.gt
952
+ // gt : http://www.gt/politicas_de_registro.html
953
+ gt
954
+ com.gt
955
+ edu.gt
956
+ gob.gt
957
+ ind.gt
958
+ mil.gt
959
+ net.gt
960
+ org.gt
956
961
 
957
962
  // gu : http://gadao.gov.gu/registration.txt
958
963
  *.gu
@@ -4166,6 +4171,7 @@ name.my
4166
4171
 
4167
4172
  // mz : http://www.gobin.info/domainname/mz-template.doc
4168
4173
  *.mz
4174
+ !teledata.mz
4169
4175
 
4170
4176
  // na : http://www.na-nic.com.na/
4171
4177
  // http://www.info.na/domain/
@@ -5359,8 +5365,17 @@ ed.pw
5359
5365
  go.pw
5360
5366
  belau.pw
5361
5367
 
5362
- // py : http://www.nic.py/faq_a.html#faq_b
5363
- *.py
5368
+ // py : http://www.nic.py/pautas.html#seccion_9
5369
+ // Confirmed by registry 2012-10-03
5370
+ com.py
5371
+ coop.py
5372
+ edu.py
5373
+ gov.py
5374
+ mil.py
5375
+ net.py
5376
+ org.py
5377
+ !nic.py
5378
+ !una.py
5364
5379
 
5365
5380
  // qa : http://domains.qa/en/
5366
5381
  qa
@@ -5999,20 +6014,20 @@ com.ug
5999
6014
  org.ug
6000
6015
 
6001
6016
  // uk : http://en.wikipedia.org/wiki/.uk
6017
+ // Submitted by registry <noc@nominet.org.uk> 2012-10-02
6002
6018
  *.uk
6019
+ *.nhs.uk
6020
+ *.police.uk
6003
6021
  *.sch.uk
6004
6022
  !bl.uk
6005
6023
  !british-library.uk
6006
- !icnet.uk
6007
6024
  !jet.uk
6008
6025
  !mod.uk
6026
+ !national-library-scotland.uk
6009
6027
  !nel.uk
6010
- !nhs.uk
6011
6028
  !nic.uk
6012
6029
  !nls.uk
6013
- !national-library-scotland.uk
6014
6030
  !parliament.uk
6015
- !police.uk
6016
6031
 
6017
6032
  // us : http://en.wikipedia.org/wiki/.us
6018
6033
  us
@@ -6288,8 +6303,19 @@ gov.vc
6288
6303
  mil.vc
6289
6304
  edu.vc
6290
6305
 
6291
- // ve : http://registro.nic.ve/nicve/registro/index.html
6292
- *.ve
6306
+ // ve : https://registro.nic.ve/
6307
+ // Confirmed by registry 2012-10-04
6308
+ ve
6309
+ co.ve
6310
+ com.ve
6311
+ e12.ve
6312
+ edu.ve
6313
+ gov.ve
6314
+ info.ve
6315
+ mil.ve
6316
+ net.ve
6317
+ org.ve
6318
+ web.ve
6293
6319
 
6294
6320
  // vg : http://en.wikipedia.org/wiki/.vg
6295
6321
  vg
@@ -6529,15 +6555,20 @@ priv.at
6529
6555
  co.ca
6530
6556
 
6531
6557
  // CentralNic : http://www.centralnic.com/names/domains
6532
- // Confirmed by registry <gavin.brown@centralnic.com> 2008-06-09
6558
+ // Confirmed by registry <gavin.brown@centralnic.com> 2012-09-27
6559
+ ae.org
6533
6560
  ar.com
6534
6561
  br.com
6535
6562
  cn.com
6563
+ com.de
6536
6564
  de.com
6537
6565
  eu.com
6538
6566
  gb.com
6567
+ gb.net
6539
6568
  gr.com
6540
6569
  hu.com
6570
+ hu.net
6571
+ jp.net
6541
6572
  jpn.com
6542
6573
  kr.com
6543
6574
  no.com
@@ -6545,25 +6576,68 @@ qc.com
6545
6576
  ru.com
6546
6577
  sa.com
6547
6578
  se.com
6579
+ se.net
6548
6580
  uk.com
6581
+ uk.net
6549
6582
  us.com
6583
+ us.org
6550
6584
  uy.com
6551
6585
  za.com
6552
- gb.net
6553
- jp.net
6554
- se.net
6555
- uk.net
6556
- ae.org
6557
- us.org
6558
- com.de
6559
6586
 
6560
6587
  // Opera Software, A.S.A.
6561
6588
  // Requested by Yngve Pettersen <yngve@opera.com> 2009-11-26
6562
6589
  operaunite.com
6563
6590
 
6564
6591
  // Google, Inc.
6565
- // Requested by Eduardo Vela <evn@google.com> 2010-09-06
6592
+ // Requested by Eduardo Vela <evn@google.com> 2012-10-24
6566
6593
  appspot.com
6594
+ blogspot.be
6595
+ blogspot.bj
6596
+ blogspot.ca
6597
+ blogspot.cf
6598
+ blogspot.ch
6599
+ blogspot.co.at
6600
+ blogspot.co.il
6601
+ blogspot.co.nz
6602
+ blogspot.co.uk
6603
+ blogspot.com
6604
+ blogspot.com.ar
6605
+ blogspot.com.au
6606
+ blogspot.com.br
6607
+ blogspot.com.es
6608
+ blogspot.cv
6609
+ blogspot.cz
6610
+ blogspot.de
6611
+ blogspot.dk
6612
+ blogspot.fi
6613
+ blogspot.fr
6614
+ blogspot.gr
6615
+ blogspot.hk
6616
+ blogspot.hu
6617
+ blogspot.ie
6618
+ blogspot.in
6619
+ blogspot.it
6620
+ blogspot.jp
6621
+ blogspot.kr
6622
+ blogspot.mr
6623
+ blogspot.mx
6624
+ blogspot.nl
6625
+ blogspot.no
6626
+ blogspot.pt
6627
+ blogspot.re
6628
+ blogspot.ro
6629
+ blogspot.se
6630
+ blogspot.sg
6631
+ blogspot.sk
6632
+ blogspot.td
6633
+ blogspot.tw
6634
+ codespot.com
6635
+ googleapis.com
6636
+ googlecode.com
6637
+
6638
+ // DreamHost : http://www.dreamhost.com/
6639
+ // Requested by Andrew Farmer <andrew.farmer@dreamhost.com> 2012-10-02
6640
+ dreamhosters.com
6567
6641
 
6568
6642
  // iki.fi : Submitted by Hannu Aronsson <haa@iki.fi> 2009-11-05
6569
6643
  iki.fi
@@ -6865,4 +6939,12 @@ webhop.org
6865
6939
  worse-than.tv
6866
6940
  writesthisblog.com
6867
6941
 
6942
+ // BetaInABox
6943
+ // Requested by adrian@betainabox.com 2012-09-13
6944
+ betainabox.com
6945
+
6946
+ // Red Hat, Inc. OpenShift : https://openshift.redhat.com/
6947
+ // Requested by Tim Kramer <tkramer@rhcloud.com> 2012-10-24
6948
+ rhcloud.com
6949
+
6868
6950
  // ===END PRIVATE DOMAINS===
@@ -16,7 +16,7 @@
16
16
 
17
17
  module Iudex
18
18
  module Core
19
- VERSION = '1.2.1'
19
+ VERSION = '1.3.0'
20
20
 
21
21
  LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
22
  end
Binary file
@@ -0,0 +1,86 @@
1
+ #--
2
+ # Copyright (c) 2008-2012 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-core'
18
+
19
+ module Iudex::Core
20
+
21
+ # Configuration extensions for Java::iudex.core.VisitQueue.
22
+ class VisitQueue
23
+
24
+ # Configure defaults, a specific domain or domain,type pair via an
25
+ # options Hash.
26
+ #
27
+ # ==== Options
28
+ #
29
+ # :domain:: Registration level domain String. If not specified,
30
+ # :type is ignored and other options apply as general
31
+ # defaults for all (otherwise un-configured
32
+ # domains/types).
33
+ #
34
+ # :type:: An optional type (i.e. PAGE). If specified, this
35
+ # :domain,:type pair will be given its own HostQueue with
36
+ # other the options applying exclusively to it.
37
+ #
38
+ # :rate:: Target maximum rate of crawl as a Float requests/second
39
+ # for this :domain(,:type) or the default for any not
40
+ # otherwise configured. Resource limits including :cons
41
+ # and HTTP client connections may further inhibit rate
42
+ # below this value. (Initial default is 2.0 req/second)
43
+ #
44
+ # :delay:: Alternative inverse to :rate as Integer milliseconds to
45
+ # delay between scheduling visits. If specifies, takes
46
+ # precedence over rate.
47
+ #
48
+ # :cons:: Maximum number of concurrent requests for this
49
+ # :domain(,:type) or the default for any not otherwise
50
+ # configured. Note that the HTTP clients have their own
51
+ # per *host:port* destination connection limit which
52
+ # should generally be set higher than this value.
53
+ # (Initial default: 1)
54
+ #
55
+ def config( opts = {} )
56
+
57
+ if opts[ :domain ]
58
+ opts = { :rate => delay_to_rate( default_min_host_delay ),
59
+ :cons => default_max_access_per_host }.merge( opts )
60
+ configure_host( opts[ :domain ],
61
+ opts[ :type ], # includes nil
62
+ opts[ :delay ] || rate_to_delay( opts[ :rate ] ),
63
+ opts[ :cons ] )
64
+ else
65
+ if opts[ :rate ]
66
+ self.default_min_host_delay = rate_to_delay( opts[ :rate ] )
67
+ end
68
+ self.default_min_host_delay = opts[ :delay ] if opts[ :delay ]
69
+ self.default_max_access_per_host = opts[ :cons ] if opts[ :cons ]
70
+ end
71
+
72
+ end
73
+
74
+ private
75
+
76
+ def rate_to_delay( r )
77
+ ( 1_000.0 / r ).round
78
+ end
79
+
80
+ def delay_to_rate( d )
81
+ ( 1_000.0 / d )
82
+ end
83
+
84
+ end
85
+
86
+ end
data/lib/iudex-core.rb CHANGED
@@ -53,3 +53,4 @@ module Iudex
53
53
  end
54
54
 
55
55
  require 'iudex-core/mojibake'
56
+ require 'iudex-core/visit_queue'
data/pom.xml CHANGED
@@ -5,13 +5,13 @@
5
5
  <groupId>iudex</groupId>
6
6
  <artifactId>iudex-core</artifactId>
7
7
  <packaging>jar</packaging>
8
- <version>1.2.1</version>
8
+ <version>1.3.0</version>
9
9
  <name>Iudex Core System</name>
10
10
 
11
11
  <parent>
12
12
  <groupId>iudex</groupId>
13
13
  <artifactId>iudex-parent</artifactId>
14
- <version>1.2.1</version>
14
+ <version>1.3.0</version>
15
15
  <relativePath>..</relativePath>
16
16
  </parent>
17
17
 
@@ -30,19 +30,19 @@
30
30
  <dependency>
31
31
  <groupId>iudex</groupId>
32
32
  <artifactId>iudex-filter</artifactId>
33
- <version>[1.2.1,1.2.999)</version>
33
+ <version>[1.3.0,1.3.999)</version>
34
34
  </dependency>
35
35
 
36
36
  <dependency>
37
37
  <groupId>iudex</groupId>
38
38
  <artifactId>iudex-http</artifactId>
39
- <version>[1.2.1,1.2.999)</version>
39
+ <version>[1.3.0,1.3.999)</version>
40
40
  </dependency>
41
41
 
42
42
  <dependency>
43
43
  <groupId>iudex</groupId>
44
44
  <artifactId>iudex-barc</artifactId>
45
- <version>[1.2.1,1.2.999)</version>
45
+ <version>[1.3.0,1.3.999)</version>
46
46
  </dependency>
47
47
 
48
48
  <dependency>
@@ -73,6 +73,11 @@ class TestVisitManager < MiniTest::Unit::TestCase
73
73
  self.max_check_interval = 21
74
74
  self.max_poll_interval = 130 #ms
75
75
  @batch = 0
76
+ @log = RJack::SLF4J[ self.class ]
77
+ end
78
+
79
+ def log
80
+ @log.java_logger
76
81
  end
77
82
 
78
83
  def pollWorkImpl( visit_q )
@@ -34,7 +34,7 @@ class TestVisitQueue < MiniTest::Unit::TestCase
34
34
 
35
35
  def setup
36
36
  @visit_q = VisitQueue.new
37
- @visit_q.default_min_host_delay = 50 #ms
37
+ @visit_q.config( :delay => 50 ) #ms
38
38
  @scheduler = Executors::new_scheduled_thread_pool( 2 )
39
39
  end
40
40
 
@@ -96,7 +96,7 @@ class TestVisitQueue < MiniTest::Unit::TestCase
96
96
  end
97
97
 
98
98
  def test_configure
99
- @visit_q.configure_host( 'h2.com', 75, 2 )
99
+ @visit_q.config( :domain => 'h2.com', :delay => 75, :cons => 2 )
100
100
 
101
101
  [ %w[ h2 a 2.2 ],
102
102
  %w[ w.h2 b 2.1 ],
@@ -125,6 +125,39 @@ class TestVisitQueue < MiniTest::Unit::TestCase
125
125
  assert_queue_empty
126
126
  end
127
127
 
128
+ def test_configure_type
129
+ @visit_q.config( :domain => 'h2.com',
130
+ :delay => 75, :cons => 2 )
131
+ @visit_q.config( :domain => 'h2.com', :type => 'ALT',
132
+ :delay => 50, :cons => 1 )
133
+
134
+ [ %w[ h2 a 2.2 ],
135
+ %w[ w.h2 b 2.1 ],
136
+ %w[ h2:ALT c 3.2 ],
137
+ %w[ h2:ALT d 3.1 ],
138
+ %w[ h1 a 1.2 ],
139
+ %w[ h1 b 1.1 ] ].each do |oinp|
140
+
141
+ @visit_q.add( order( oinp ) )
142
+
143
+ end
144
+ assert_equal( 3, @visit_q.host_count, "host count" )
145
+
146
+ expected = [ %w[ h2:ALT c 3.2 ],
147
+ %w[ h2 a 2.2 ],
148
+ %w[ h1 a 1.2 ],
149
+ %w[ h2:ALT d 3.1 ],
150
+ %w[ h1 b 1.1 ],
151
+ %w[ w.h2 b 2.1 ] ]
152
+
153
+ p = 0
154
+ expected.each do |o|
155
+ assert_equal( o, acquire_order, p += 1 )
156
+ end
157
+
158
+ assert_queue_empty
159
+ end
160
+
128
161
  def test_multi_access_2
129
162
  @visit_q.default_max_access_per_host = 2
130
163
  add_common_orders
@@ -170,7 +203,7 @@ class TestVisitQueue < MiniTest::Unit::TestCase
170
203
  def test_interleaved
171
204
  @visit_q.default_max_access_per_host = 2
172
205
  @visit_q.default_min_host_delay = 3 #ms
173
- @visit_q.configure_host( 'h2.com', 1, 4 )
206
+ @visit_q.config( :domain => 'h2.com', :delay => 1, :cons => 4 )
174
207
 
175
208
  512.times do |i|
176
209
  @visit_q.add( order( [ %w[ h1 h2 ][rand( 2 )], i, 5 * rand ] ) )
@@ -222,10 +255,13 @@ class TestVisitQueue < MiniTest::Unit::TestCase
222
255
 
223
256
  def order( args )
224
257
  host, c, p = args
258
+ host, t = host.split( ':' )
259
+
225
260
  UniMap.new.tap do |o|
226
261
  o.url = visit_url( "http://#{host}.com/#{c}" )
227
262
  o.priority = p.to_f
228
263
  o.vtest_input = args
264
+ o.type = t || 'PAGE'
229
265
  end
230
266
  end
231
267
 
metadata CHANGED
@@ -1,191 +1,230 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: iudex-core
3
- version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 1.2.1
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 1.3.0
6
6
  platform: java
7
- authors:
8
- - David Kellum
9
- autorequire:
7
+ authors:
8
+ - David Kellum
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
-
13
- date: 2012-09-15 00:00:00 Z
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
16
- name: rjack-slf4j
17
- version_requirements: &id001 !ruby/object:Gem::Requirement
18
- none: false
19
- requirements:
20
- - - ">="
21
- - !ruby/object:Gem::Version
22
- version: 1.6.5
23
- - - <
24
- - !ruby/object:Gem::Version
25
- version: "1.8"
26
- requirement: *id001
27
- prerelease: false
28
- type: :runtime
29
- - !ruby/object:Gem::Dependency
30
- name: hooker
31
- version_requirements: &id002 !ruby/object:Gem::Requirement
32
- none: false
33
- requirements:
34
- - - ~>
35
- - !ruby/object:Gem::Version
36
- version: 1.0.0
37
- requirement: *id002
38
- prerelease: false
39
- type: :runtime
40
- - !ruby/object:Gem::Dependency
41
- name: gravitext-util
42
- version_requirements: &id003 !ruby/object:Gem::Requirement
43
- none: false
44
- requirements:
45
- - - ~>
46
- - !ruby/object:Gem::Version
47
- version: 1.6.1
48
- requirement: *id003
49
- prerelease: false
50
- type: :runtime
51
- - !ruby/object:Gem::Dependency
52
- name: iudex-filter
53
- version_requirements: &id004 !ruby/object:Gem::Requirement
54
- none: false
55
- requirements:
56
- - - ~>
57
- - !ruby/object:Gem::Version
58
- version: 1.2.1
59
- requirement: *id004
60
- prerelease: false
61
- type: :runtime
62
- - !ruby/object:Gem::Dependency
63
- name: iudex-http
64
- version_requirements: &id005 !ruby/object:Gem::Requirement
65
- none: false
66
- requirements:
67
- - - ~>
68
- - !ruby/object:Gem::Version
69
- version: 1.2.1
70
- requirement: *id005
71
- prerelease: false
72
- type: :runtime
73
- - !ruby/object:Gem::Dependency
74
- name: iudex-barc
75
- version_requirements: &id006 !ruby/object:Gem::Requirement
76
- none: false
77
- requirements:
78
- - - ~>
79
- - !ruby/object:Gem::Version
80
- version: 1.2.1
81
- requirement: *id006
82
- prerelease: false
83
- type: :runtime
84
- - !ruby/object:Gem::Dependency
85
- name: minitest
86
- version_requirements: &id007 !ruby/object:Gem::Requirement
87
- none: false
88
- requirements:
89
- - - ~>
90
- - !ruby/object:Gem::Version
91
- version: "2.3"
92
- requirement: *id007
93
- prerelease: false
94
- type: :development
95
- - !ruby/object:Gem::Dependency
96
- name: rjack-logback
97
- version_requirements: &id008 !ruby/object:Gem::Requirement
98
- none: false
99
- requirements:
100
- - - ~>
101
- - !ruby/object:Gem::Version
102
- version: "1.2"
103
- requirement: *id008
104
- prerelease: false
105
- type: :development
106
- - !ruby/object:Gem::Dependency
107
- name: rjack-tarpit
108
- version_requirements: &id009 !ruby/object:Gem::Requirement
109
- none: false
110
- requirements:
111
- - - ~>
112
- - !ruby/object:Gem::Version
113
- version: "2.0"
114
- requirement: *id009
115
- prerelease: false
116
- type: :development
12
+ date: 2012-11-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rjack-slf4j
16
+ version_requirements: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ~>
19
+ - !ruby/object:Gem::Version
20
+ version: 1.7.0
21
+ none: false
22
+ requirement: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.7.0
27
+ none: false
28
+ prerelease: false
29
+ type: :runtime
30
+ - !ruby/object:Gem::Dependency
31
+ name: hooker
32
+ version_requirements: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ~>
35
+ - !ruby/object:Gem::Version
36
+ version: 1.0.0
37
+ none: false
38
+ requirement: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ version: 1.0.0
43
+ none: false
44
+ prerelease: false
45
+ type: :runtime
46
+ - !ruby/object:Gem::Dependency
47
+ name: gravitext-util
48
+ version_requirements: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ~>
51
+ - !ruby/object:Gem::Version
52
+ version: 1.7.0
53
+ none: false
54
+ requirement: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ~>
57
+ - !ruby/object:Gem::Version
58
+ version: 1.7.0
59
+ none: false
60
+ prerelease: false
61
+ type: :runtime
62
+ - !ruby/object:Gem::Dependency
63
+ name: iudex-filter
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 1.3.0
69
+ none: false
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ~>
73
+ - !ruby/object:Gem::Version
74
+ version: 1.3.0
75
+ none: false
76
+ prerelease: false
77
+ type: :runtime
78
+ - !ruby/object:Gem::Dependency
79
+ name: iudex-http
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ~>
83
+ - !ruby/object:Gem::Version
84
+ version: 1.3.0
85
+ none: false
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ~>
89
+ - !ruby/object:Gem::Version
90
+ version: 1.3.0
91
+ none: false
92
+ prerelease: false
93
+ type: :runtime
94
+ - !ruby/object:Gem::Dependency
95
+ name: iudex-barc
96
+ version_requirements: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ~>
99
+ - !ruby/object:Gem::Version
100
+ version: 1.3.0
101
+ none: false
102
+ requirement: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ~>
105
+ - !ruby/object:Gem::Version
106
+ version: 1.3.0
107
+ none: false
108
+ prerelease: false
109
+ type: :runtime
110
+ - !ruby/object:Gem::Dependency
111
+ name: minitest
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ~>
115
+ - !ruby/object:Gem::Version
116
+ version: '2.3'
117
+ none: false
118
+ requirement: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ~>
121
+ - !ruby/object:Gem::Version
122
+ version: '2.3'
123
+ none: false
124
+ prerelease: false
125
+ type: :development
126
+ - !ruby/object:Gem::Dependency
127
+ name: rjack-logback
128
+ version_requirements: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ~>
131
+ - !ruby/object:Gem::Version
132
+ version: '1.5'
133
+ none: false
134
+ requirement: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ~>
137
+ - !ruby/object:Gem::Version
138
+ version: '1.5'
139
+ none: false
140
+ prerelease: false
141
+ type: :development
142
+ - !ruby/object:Gem::Dependency
143
+ name: rjack-tarpit
144
+ version_requirements: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ~>
147
+ - !ruby/object:Gem::Version
148
+ version: '2.0'
149
+ none: false
150
+ requirement: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ~>
153
+ - !ruby/object:Gem::Version
154
+ version: '2.0'
155
+ none: false
156
+ prerelease: false
157
+ type: :development
117
158
  description: Iudex is a general purpose web crawler and feed processor in ruby/java. The iudex-core gem contains core facilities and notably, does not contain such facilities as database-backed state management.
118
- email:
119
- - dek-oss@gravitext.com
120
- executables:
121
- - iudex-test-config
122
- - iudex-url-norm
159
+ email:
160
+ - dek-oss@gravitext.com
161
+ executables:
162
+ - iudex-test-config
163
+ - iudex-url-norm
123
164
  extensions: []
124
-
125
- extra_rdoc_files:
126
- - History.rdoc
127
- - README.rdoc
128
- files:
129
- - History.rdoc
130
- - Manifest.txt
131
- - README.rdoc
132
- - Rakefile
133
- - pom.xml
134
- - bin/iudex-test-config
135
- - bin/iudex-url-norm
136
- - build/TLDSets.java.erb
137
- - build/effective_tld_name.dat
138
- - build/tld_set_generator.rb
139
- - config/config.rb
140
- - config/mojibake
141
- - lib/iudex-core/base.rb
142
- - lib/iudex-core.rb
143
- - lib/iudex-core/config.rb
144
- - lib/iudex-core/mojibake.rb
145
- - test/setup.rb
146
- - test/test_charsets.rb
147
- - test/test_content_fetcher.rb
148
- - test/test_content_source.rb
149
- - test/test_log_writer.rb
150
- - test/test_mojibake.rb
151
- - test/test_redirect_handler.rb
152
- - test/test_visit_manager.rb
153
- - test/test_visit_queue.rb
154
- - test/test_visit_url.rb
155
- - lib/iudex-core/iudex-core-1.2.1.jar
165
+ extra_rdoc_files:
166
+ - History.rdoc
167
+ - README.rdoc
168
+ files:
169
+ - History.rdoc
170
+ - Manifest.txt
171
+ - README.rdoc
172
+ - Rakefile
173
+ - pom.xml
174
+ - bin/iudex-test-config
175
+ - bin/iudex-url-norm
176
+ - build/TLDSets.java.erb
177
+ - build/effective_tld_name.dat
178
+ - build/tld_set_generator.rb
179
+ - config/config.rb
180
+ - config/mojibake
181
+ - lib/iudex-core/base.rb
182
+ - lib/iudex-core.rb
183
+ - lib/iudex-core/config.rb
184
+ - lib/iudex-core/mojibake.rb
185
+ - lib/iudex-core/visit_queue.rb
186
+ - test/setup.rb
187
+ - test/test_charsets.rb
188
+ - test/test_content_fetcher.rb
189
+ - test/test_content_source.rb
190
+ - test/test_log_writer.rb
191
+ - test/test_mojibake.rb
192
+ - test/test_redirect_handler.rb
193
+ - test/test_visit_manager.rb
194
+ - test/test_visit_queue.rb
195
+ - test/test_visit_url.rb
196
+ - lib/iudex-core/iudex-core-1.3.0.jar
156
197
  homepage: http://iudex.gravitext.com
157
198
  licenses: []
158
-
159
- post_install_message:
160
- rdoc_options:
161
- - --main
162
- - README.rdoc
163
- require_paths:
164
- - lib
165
- required_ruby_version: !ruby/object:Gem::Requirement
199
+ post_install_message:
200
+ rdoc_options:
201
+ - --main
202
+ - README.rdoc
203
+ require_paths:
204
+ - lib
205
+ required_ruby_version: !ruby/object:Gem::Requirement
206
+ requirements:
207
+ - - ! '>='
208
+ - !ruby/object:Gem::Version
209
+ version: '0'
210
+ segments:
211
+ - 0
212
+ hash: 2
166
213
  none: false
167
- requirements:
168
- - - ">="
169
- - !ruby/object:Gem::Version
170
- hash: 2
171
- segments:
172
- - 0
173
- version: "0"
174
- required_rubygems_version: !ruby/object:Gem::Requirement
214
+ required_rubygems_version: !ruby/object:Gem::Requirement
215
+ requirements:
216
+ - - ! '>='
217
+ - !ruby/object:Gem::Version
218
+ version: '0'
219
+ segments:
220
+ - 0
221
+ hash: 2
175
222
  none: false
176
- requirements:
177
- - - ">="
178
- - !ruby/object:Gem::Version
179
- hash: 2
180
- segments:
181
- - 0
182
- version: "0"
183
223
  requirements: []
184
-
185
- rubyforge_project:
186
- rubygems_version: 1.8.15
187
- signing_key:
224
+ rubyforge_project:
225
+ rubygems_version: 1.8.24
226
+ signing_key:
188
227
  specification_version: 3
189
228
  summary: Iudex is a general purpose web crawler and feed processor in ruby/java.
190
229
  test_files: []
191
-
230
+ ...
Binary file