iudex-core 1.2.1-java → 1.3.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.rdoc CHANGED
@@ -1,3 +1,14 @@
1
+ === 1.3.0 (2012-11-8)
2
+ * Add DomainKey with optional :type to support configuring of
3
+ a :domain,:type specific HostQueue
4
+ * Add VisitQueue.config( options ) extensions for cleaner
5
+ configuration of HostQueue with type, rate, etc.
6
+ * Update TLDSets based on upstream c61f326ad19f 2012-10-25
7
+ * Upgrade to gravitext-util ~> 1.7.0
8
+ * Upgrade to slf4j ~> 1.7.0, logback ~> 1.5 (dev)
9
+ * Misc java logging simplifications with slf4j 1.7 varargs
10
+ * Expose GenericWorkPollStrategy.log()
11
+
1
12
  === 1.2.1 (2012-9-15)
2
13
  * Upgrade/narrow to gravitext-util ~> 1.6.1
3
14
  * Upgrade to slf4j [1.6.5,1.8), logback ~> 1.2 (dev)
data/Manifest.txt CHANGED
@@ -14,6 +14,7 @@ lib/iudex-core/base.rb
14
14
  lib/iudex-core.rb
15
15
  lib/iudex-core/config.rb
16
16
  lib/iudex-core/mojibake.rb
17
+ lib/iudex-core/visit_queue.rb
17
18
  test/setup.rb
18
19
  test/test_charsets.rb
19
20
  test/test_content_fetcher.rb
@@ -24,4 +25,4 @@ test/test_redirect_handler.rb
24
25
  test/test_visit_manager.rb
25
26
  test/test_visit_queue.rb
26
27
  test/test_visit_url.rb
27
- lib/iudex-core/iudex-core-1.2.1.jar
28
+ lib/iudex-core/iudex-core-1.3.0.jar
data/Rakefile CHANGED
@@ -18,6 +18,7 @@ task :clean do
18
18
  rm_f 'src/main/java/iudex/core/TLDSets.java'
19
19
  end
20
20
 
21
+ desc "Download and install latest effective_tld_name.dat"
21
22
  task :refresh_tld_dat do
22
23
  sh( "curl http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1" +
23
24
  " -o build/effective_tld_name.dat" )
@@ -219,7 +219,6 @@ net.au
219
219
  org.au
220
220
  edu.au
221
221
  gov.au
222
- csiro.au
223
222
  asn.au
224
223
  id.au
225
224
  // Historic 2LDs (closed to new registration, but sites still exist)
@@ -950,9 +949,15 @@ gov.gr
950
949
  // gs : http://en.wikipedia.org/wiki/.gs
951
950
  gs
952
951
 
953
- // gt : http://www.gt/politicas.html
954
- *.gt
955
- !www.gt
952
+ // gt : http://www.gt/politicas_de_registro.html
953
+ gt
954
+ com.gt
955
+ edu.gt
956
+ gob.gt
957
+ ind.gt
958
+ mil.gt
959
+ net.gt
960
+ org.gt
956
961
 
957
962
  // gu : http://gadao.gov.gu/registration.txt
958
963
  *.gu
@@ -4166,6 +4171,7 @@ name.my
4166
4171
 
4167
4172
  // mz : http://www.gobin.info/domainname/mz-template.doc
4168
4173
  *.mz
4174
+ !teledata.mz
4169
4175
 
4170
4176
  // na : http://www.na-nic.com.na/
4171
4177
  // http://www.info.na/domain/
@@ -5359,8 +5365,17 @@ ed.pw
5359
5365
  go.pw
5360
5366
  belau.pw
5361
5367
 
5362
- // py : http://www.nic.py/faq_a.html#faq_b
5363
- *.py
5368
+ // py : http://www.nic.py/pautas.html#seccion_9
5369
+ // Confirmed by registry 2012-10-03
5370
+ com.py
5371
+ coop.py
5372
+ edu.py
5373
+ gov.py
5374
+ mil.py
5375
+ net.py
5376
+ org.py
5377
+ !nic.py
5378
+ !una.py
5364
5379
 
5365
5380
  // qa : http://domains.qa/en/
5366
5381
  qa
@@ -5999,20 +6014,20 @@ com.ug
5999
6014
  org.ug
6000
6015
 
6001
6016
  // uk : http://en.wikipedia.org/wiki/.uk
6017
+ // Submitted by registry <noc@nominet.org.uk> 2012-10-02
6002
6018
  *.uk
6019
+ *.nhs.uk
6020
+ *.police.uk
6003
6021
  *.sch.uk
6004
6022
  !bl.uk
6005
6023
  !british-library.uk
6006
- !icnet.uk
6007
6024
  !jet.uk
6008
6025
  !mod.uk
6026
+ !national-library-scotland.uk
6009
6027
  !nel.uk
6010
- !nhs.uk
6011
6028
  !nic.uk
6012
6029
  !nls.uk
6013
- !national-library-scotland.uk
6014
6030
  !parliament.uk
6015
- !police.uk
6016
6031
 
6017
6032
  // us : http://en.wikipedia.org/wiki/.us
6018
6033
  us
@@ -6288,8 +6303,19 @@ gov.vc
6288
6303
  mil.vc
6289
6304
  edu.vc
6290
6305
 
6291
- // ve : http://registro.nic.ve/nicve/registro/index.html
6292
- *.ve
6306
+ // ve : https://registro.nic.ve/
6307
+ // Confirmed by registry 2012-10-04
6308
+ ve
6309
+ co.ve
6310
+ com.ve
6311
+ e12.ve
6312
+ edu.ve
6313
+ gov.ve
6314
+ info.ve
6315
+ mil.ve
6316
+ net.ve
6317
+ org.ve
6318
+ web.ve
6293
6319
 
6294
6320
  // vg : http://en.wikipedia.org/wiki/.vg
6295
6321
  vg
@@ -6529,15 +6555,20 @@ priv.at
6529
6555
  co.ca
6530
6556
 
6531
6557
  // CentralNic : http://www.centralnic.com/names/domains
6532
- // Confirmed by registry <gavin.brown@centralnic.com> 2008-06-09
6558
+ // Confirmed by registry <gavin.brown@centralnic.com> 2012-09-27
6559
+ ae.org
6533
6560
  ar.com
6534
6561
  br.com
6535
6562
  cn.com
6563
+ com.de
6536
6564
  de.com
6537
6565
  eu.com
6538
6566
  gb.com
6567
+ gb.net
6539
6568
  gr.com
6540
6569
  hu.com
6570
+ hu.net
6571
+ jp.net
6541
6572
  jpn.com
6542
6573
  kr.com
6543
6574
  no.com
@@ -6545,25 +6576,68 @@ qc.com
6545
6576
  ru.com
6546
6577
  sa.com
6547
6578
  se.com
6579
+ se.net
6548
6580
  uk.com
6581
+ uk.net
6549
6582
  us.com
6583
+ us.org
6550
6584
  uy.com
6551
6585
  za.com
6552
- gb.net
6553
- jp.net
6554
- se.net
6555
- uk.net
6556
- ae.org
6557
- us.org
6558
- com.de
6559
6586
 
6560
6587
  // Opera Software, A.S.A.
6561
6588
  // Requested by Yngve Pettersen <yngve@opera.com> 2009-11-26
6562
6589
  operaunite.com
6563
6590
 
6564
6591
  // Google, Inc.
6565
- // Requested by Eduardo Vela <evn@google.com> 2010-09-06
6592
+ // Requested by Eduardo Vela <evn@google.com> 2012-10-24
6566
6593
  appspot.com
6594
+ blogspot.be
6595
+ blogspot.bj
6596
+ blogspot.ca
6597
+ blogspot.cf
6598
+ blogspot.ch
6599
+ blogspot.co.at
6600
+ blogspot.co.il
6601
+ blogspot.co.nz
6602
+ blogspot.co.uk
6603
+ blogspot.com
6604
+ blogspot.com.ar
6605
+ blogspot.com.au
6606
+ blogspot.com.br
6607
+ blogspot.com.es
6608
+ blogspot.cv
6609
+ blogspot.cz
6610
+ blogspot.de
6611
+ blogspot.dk
6612
+ blogspot.fi
6613
+ blogspot.fr
6614
+ blogspot.gr
6615
+ blogspot.hk
6616
+ blogspot.hu
6617
+ blogspot.ie
6618
+ blogspot.in
6619
+ blogspot.it
6620
+ blogspot.jp
6621
+ blogspot.kr
6622
+ blogspot.mr
6623
+ blogspot.mx
6624
+ blogspot.nl
6625
+ blogspot.no
6626
+ blogspot.pt
6627
+ blogspot.re
6628
+ blogspot.ro
6629
+ blogspot.se
6630
+ blogspot.sg
6631
+ blogspot.sk
6632
+ blogspot.td
6633
+ blogspot.tw
6634
+ codespot.com
6635
+ googleapis.com
6636
+ googlecode.com
6637
+
6638
+ // DreamHost : http://www.dreamhost.com/
6639
+ // Requested by Andrew Farmer <andrew.farmer@dreamhost.com> 2012-10-02
6640
+ dreamhosters.com
6567
6641
 
6568
6642
  // iki.fi : Submitted by Hannu Aronsson <haa@iki.fi> 2009-11-05
6569
6643
  iki.fi
@@ -6865,4 +6939,12 @@ webhop.org
6865
6939
  worse-than.tv
6866
6940
  writesthisblog.com
6867
6941
 
6942
+ // BetaInABox
6943
+ // Requested by adrian@betainabox.com 2012-09-13
6944
+ betainabox.com
6945
+
6946
+ // Red Hat, Inc. OpenShift : https://openshift.redhat.com/
6947
+ // Requested by Tim Kramer <tkramer@rhcloud.com> 2012-10-24
6948
+ rhcloud.com
6949
+
6868
6950
  // ===END PRIVATE DOMAINS===
@@ -16,7 +16,7 @@
16
16
 
17
17
  module Iudex
18
18
  module Core
19
- VERSION = '1.2.1'
19
+ VERSION = '1.3.0'
20
20
 
21
21
  LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
22
  end
Binary file
@@ -0,0 +1,86 @@
1
+ #--
2
+ # Copyright (c) 2008-2012 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-core'
18
+
19
+ module Iudex::Core
20
+
21
+ # Configuration extensions for Java::iudex.core.VisitQueue.
22
+ class VisitQueue
23
+
24
+ # Configure defaults, a specific domain or domain,type pair via an
25
+ # options Hash.
26
+ #
27
+ # ==== Options
28
+ #
29
+ # :domain:: Registration level domain String. If not specified,
30
+ # :type is ignored and other options apply as general
31
+ # defaults for all (otherwise un-configured
32
+ # domains/types).
33
+ #
34
+ # :type:: An optional type (i.e. PAGE). If specified, this
35
+ # :domain,:type pair will be given its own HostQueue with
36
+ # other the options applying exclusively to it.
37
+ #
38
+ # :rate:: Target maximum rate of crawl as a Float requests/second
39
+ # for this :domain(,:type) or the default for any not
40
+ # otherwise configured. Resource limits including :cons
41
+ # and HTTP client connections may further inhibit rate
42
+ # below this value. (Initial default is 2.0 req/second)
43
+ #
44
+ # :delay:: Alternative inverse to :rate as Integer milliseconds to
45
+ # delay between scheduling visits. If specifies, takes
46
+ # precedence over rate.
47
+ #
48
+ # :cons:: Maximum number of concurrent requests for this
49
+ # :domain(,:type) or the default for any not otherwise
50
+ # configured. Note that the HTTP clients have their own
51
+ # per *host:port* destination connection limit which
52
+ # should generally be set higher than this value.
53
+ # (Initial default: 1)
54
+ #
55
+ def config( opts = {} )
56
+
57
+ if opts[ :domain ]
58
+ opts = { :rate => delay_to_rate( default_min_host_delay ),
59
+ :cons => default_max_access_per_host }.merge( opts )
60
+ configure_host( opts[ :domain ],
61
+ opts[ :type ], # includes nil
62
+ opts[ :delay ] || rate_to_delay( opts[ :rate ] ),
63
+ opts[ :cons ] )
64
+ else
65
+ if opts[ :rate ]
66
+ self.default_min_host_delay = rate_to_delay( opts[ :rate ] )
67
+ end
68
+ self.default_min_host_delay = opts[ :delay ] if opts[ :delay ]
69
+ self.default_max_access_per_host = opts[ :cons ] if opts[ :cons ]
70
+ end
71
+
72
+ end
73
+
74
+ private
75
+
76
+ def rate_to_delay( r )
77
+ ( 1_000.0 / r ).round
78
+ end
79
+
80
+ def delay_to_rate( d )
81
+ ( 1_000.0 / d )
82
+ end
83
+
84
+ end
85
+
86
+ end
data/lib/iudex-core.rb CHANGED
@@ -53,3 +53,4 @@ module Iudex
53
53
  end
54
54
 
55
55
  require 'iudex-core/mojibake'
56
+ require 'iudex-core/visit_queue'
data/pom.xml CHANGED
@@ -5,13 +5,13 @@
5
5
  <groupId>iudex</groupId>
6
6
  <artifactId>iudex-core</artifactId>
7
7
  <packaging>jar</packaging>
8
- <version>1.2.1</version>
8
+ <version>1.3.0</version>
9
9
  <name>Iudex Core System</name>
10
10
 
11
11
  <parent>
12
12
  <groupId>iudex</groupId>
13
13
  <artifactId>iudex-parent</artifactId>
14
- <version>1.2.1</version>
14
+ <version>1.3.0</version>
15
15
  <relativePath>..</relativePath>
16
16
  </parent>
17
17
 
@@ -30,19 +30,19 @@
30
30
  <dependency>
31
31
  <groupId>iudex</groupId>
32
32
  <artifactId>iudex-filter</artifactId>
33
- <version>[1.2.1,1.2.999)</version>
33
+ <version>[1.3.0,1.3.999)</version>
34
34
  </dependency>
35
35
 
36
36
  <dependency>
37
37
  <groupId>iudex</groupId>
38
38
  <artifactId>iudex-http</artifactId>
39
- <version>[1.2.1,1.2.999)</version>
39
+ <version>[1.3.0,1.3.999)</version>
40
40
  </dependency>
41
41
 
42
42
  <dependency>
43
43
  <groupId>iudex</groupId>
44
44
  <artifactId>iudex-barc</artifactId>
45
- <version>[1.2.1,1.2.999)</version>
45
+ <version>[1.3.0,1.3.999)</version>
46
46
  </dependency>
47
47
 
48
48
  <dependency>
@@ -73,6 +73,11 @@ class TestVisitManager < MiniTest::Unit::TestCase
73
73
  self.max_check_interval = 21
74
74
  self.max_poll_interval = 130 #ms
75
75
  @batch = 0
76
+ @log = RJack::SLF4J[ self.class ]
77
+ end
78
+
79
+ def log
80
+ @log.java_logger
76
81
  end
77
82
 
78
83
  def pollWorkImpl( visit_q )
@@ -34,7 +34,7 @@ class TestVisitQueue < MiniTest::Unit::TestCase
34
34
 
35
35
  def setup
36
36
  @visit_q = VisitQueue.new
37
- @visit_q.default_min_host_delay = 50 #ms
37
+ @visit_q.config( :delay => 50 ) #ms
38
38
  @scheduler = Executors::new_scheduled_thread_pool( 2 )
39
39
  end
40
40
 
@@ -96,7 +96,7 @@ class TestVisitQueue < MiniTest::Unit::TestCase
96
96
  end
97
97
 
98
98
  def test_configure
99
- @visit_q.configure_host( 'h2.com', 75, 2 )
99
+ @visit_q.config( :domain => 'h2.com', :delay => 75, :cons => 2 )
100
100
 
101
101
  [ %w[ h2 a 2.2 ],
102
102
  %w[ w.h2 b 2.1 ],
@@ -125,6 +125,39 @@ class TestVisitQueue < MiniTest::Unit::TestCase
125
125
  assert_queue_empty
126
126
  end
127
127
 
128
+ def test_configure_type
129
+ @visit_q.config( :domain => 'h2.com',
130
+ :delay => 75, :cons => 2 )
131
+ @visit_q.config( :domain => 'h2.com', :type => 'ALT',
132
+ :delay => 50, :cons => 1 )
133
+
134
+ [ %w[ h2 a 2.2 ],
135
+ %w[ w.h2 b 2.1 ],
136
+ %w[ h2:ALT c 3.2 ],
137
+ %w[ h2:ALT d 3.1 ],
138
+ %w[ h1 a 1.2 ],
139
+ %w[ h1 b 1.1 ] ].each do |oinp|
140
+
141
+ @visit_q.add( order( oinp ) )
142
+
143
+ end
144
+ assert_equal( 3, @visit_q.host_count, "host count" )
145
+
146
+ expected = [ %w[ h2:ALT c 3.2 ],
147
+ %w[ h2 a 2.2 ],
148
+ %w[ h1 a 1.2 ],
149
+ %w[ h2:ALT d 3.1 ],
150
+ %w[ h1 b 1.1 ],
151
+ %w[ w.h2 b 2.1 ] ]
152
+
153
+ p = 0
154
+ expected.each do |o|
155
+ assert_equal( o, acquire_order, p += 1 )
156
+ end
157
+
158
+ assert_queue_empty
159
+ end
160
+
128
161
  def test_multi_access_2
129
162
  @visit_q.default_max_access_per_host = 2
130
163
  add_common_orders
@@ -170,7 +203,7 @@ class TestVisitQueue < MiniTest::Unit::TestCase
170
203
  def test_interleaved
171
204
  @visit_q.default_max_access_per_host = 2
172
205
  @visit_q.default_min_host_delay = 3 #ms
173
- @visit_q.configure_host( 'h2.com', 1, 4 )
206
+ @visit_q.config( :domain => 'h2.com', :delay => 1, :cons => 4 )
174
207
 
175
208
  512.times do |i|
176
209
  @visit_q.add( order( [ %w[ h1 h2 ][rand( 2 )], i, 5 * rand ] ) )
@@ -222,10 +255,13 @@ class TestVisitQueue < MiniTest::Unit::TestCase
222
255
 
223
256
  def order( args )
224
257
  host, c, p = args
258
+ host, t = host.split( ':' )
259
+
225
260
  UniMap.new.tap do |o|
226
261
  o.url = visit_url( "http://#{host}.com/#{c}" )
227
262
  o.priority = p.to_f
228
263
  o.vtest_input = args
264
+ o.type = t || 'PAGE'
229
265
  end
230
266
  end
231
267
 
metadata CHANGED
@@ -1,191 +1,230 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: iudex-core
3
- version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 1.2.1
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 1.3.0
6
6
  platform: java
7
- authors:
8
- - David Kellum
9
- autorequire:
7
+ authors:
8
+ - David Kellum
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
-
13
- date: 2012-09-15 00:00:00 Z
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
16
- name: rjack-slf4j
17
- version_requirements: &id001 !ruby/object:Gem::Requirement
18
- none: false
19
- requirements:
20
- - - ">="
21
- - !ruby/object:Gem::Version
22
- version: 1.6.5
23
- - - <
24
- - !ruby/object:Gem::Version
25
- version: "1.8"
26
- requirement: *id001
27
- prerelease: false
28
- type: :runtime
29
- - !ruby/object:Gem::Dependency
30
- name: hooker
31
- version_requirements: &id002 !ruby/object:Gem::Requirement
32
- none: false
33
- requirements:
34
- - - ~>
35
- - !ruby/object:Gem::Version
36
- version: 1.0.0
37
- requirement: *id002
38
- prerelease: false
39
- type: :runtime
40
- - !ruby/object:Gem::Dependency
41
- name: gravitext-util
42
- version_requirements: &id003 !ruby/object:Gem::Requirement
43
- none: false
44
- requirements:
45
- - - ~>
46
- - !ruby/object:Gem::Version
47
- version: 1.6.1
48
- requirement: *id003
49
- prerelease: false
50
- type: :runtime
51
- - !ruby/object:Gem::Dependency
52
- name: iudex-filter
53
- version_requirements: &id004 !ruby/object:Gem::Requirement
54
- none: false
55
- requirements:
56
- - - ~>
57
- - !ruby/object:Gem::Version
58
- version: 1.2.1
59
- requirement: *id004
60
- prerelease: false
61
- type: :runtime
62
- - !ruby/object:Gem::Dependency
63
- name: iudex-http
64
- version_requirements: &id005 !ruby/object:Gem::Requirement
65
- none: false
66
- requirements:
67
- - - ~>
68
- - !ruby/object:Gem::Version
69
- version: 1.2.1
70
- requirement: *id005
71
- prerelease: false
72
- type: :runtime
73
- - !ruby/object:Gem::Dependency
74
- name: iudex-barc
75
- version_requirements: &id006 !ruby/object:Gem::Requirement
76
- none: false
77
- requirements:
78
- - - ~>
79
- - !ruby/object:Gem::Version
80
- version: 1.2.1
81
- requirement: *id006
82
- prerelease: false
83
- type: :runtime
84
- - !ruby/object:Gem::Dependency
85
- name: minitest
86
- version_requirements: &id007 !ruby/object:Gem::Requirement
87
- none: false
88
- requirements:
89
- - - ~>
90
- - !ruby/object:Gem::Version
91
- version: "2.3"
92
- requirement: *id007
93
- prerelease: false
94
- type: :development
95
- - !ruby/object:Gem::Dependency
96
- name: rjack-logback
97
- version_requirements: &id008 !ruby/object:Gem::Requirement
98
- none: false
99
- requirements:
100
- - - ~>
101
- - !ruby/object:Gem::Version
102
- version: "1.2"
103
- requirement: *id008
104
- prerelease: false
105
- type: :development
106
- - !ruby/object:Gem::Dependency
107
- name: rjack-tarpit
108
- version_requirements: &id009 !ruby/object:Gem::Requirement
109
- none: false
110
- requirements:
111
- - - ~>
112
- - !ruby/object:Gem::Version
113
- version: "2.0"
114
- requirement: *id009
115
- prerelease: false
116
- type: :development
12
+ date: 2012-11-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rjack-slf4j
16
+ version_requirements: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ~>
19
+ - !ruby/object:Gem::Version
20
+ version: 1.7.0
21
+ none: false
22
+ requirement: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.7.0
27
+ none: false
28
+ prerelease: false
29
+ type: :runtime
30
+ - !ruby/object:Gem::Dependency
31
+ name: hooker
32
+ version_requirements: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ~>
35
+ - !ruby/object:Gem::Version
36
+ version: 1.0.0
37
+ none: false
38
+ requirement: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ version: 1.0.0
43
+ none: false
44
+ prerelease: false
45
+ type: :runtime
46
+ - !ruby/object:Gem::Dependency
47
+ name: gravitext-util
48
+ version_requirements: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ~>
51
+ - !ruby/object:Gem::Version
52
+ version: 1.7.0
53
+ none: false
54
+ requirement: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ~>
57
+ - !ruby/object:Gem::Version
58
+ version: 1.7.0
59
+ none: false
60
+ prerelease: false
61
+ type: :runtime
62
+ - !ruby/object:Gem::Dependency
63
+ name: iudex-filter
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 1.3.0
69
+ none: false
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ~>
73
+ - !ruby/object:Gem::Version
74
+ version: 1.3.0
75
+ none: false
76
+ prerelease: false
77
+ type: :runtime
78
+ - !ruby/object:Gem::Dependency
79
+ name: iudex-http
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ~>
83
+ - !ruby/object:Gem::Version
84
+ version: 1.3.0
85
+ none: false
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ~>
89
+ - !ruby/object:Gem::Version
90
+ version: 1.3.0
91
+ none: false
92
+ prerelease: false
93
+ type: :runtime
94
+ - !ruby/object:Gem::Dependency
95
+ name: iudex-barc
96
+ version_requirements: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ~>
99
+ - !ruby/object:Gem::Version
100
+ version: 1.3.0
101
+ none: false
102
+ requirement: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ~>
105
+ - !ruby/object:Gem::Version
106
+ version: 1.3.0
107
+ none: false
108
+ prerelease: false
109
+ type: :runtime
110
+ - !ruby/object:Gem::Dependency
111
+ name: minitest
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ~>
115
+ - !ruby/object:Gem::Version
116
+ version: '2.3'
117
+ none: false
118
+ requirement: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ~>
121
+ - !ruby/object:Gem::Version
122
+ version: '2.3'
123
+ none: false
124
+ prerelease: false
125
+ type: :development
126
+ - !ruby/object:Gem::Dependency
127
+ name: rjack-logback
128
+ version_requirements: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ~>
131
+ - !ruby/object:Gem::Version
132
+ version: '1.5'
133
+ none: false
134
+ requirement: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ~>
137
+ - !ruby/object:Gem::Version
138
+ version: '1.5'
139
+ none: false
140
+ prerelease: false
141
+ type: :development
142
+ - !ruby/object:Gem::Dependency
143
+ name: rjack-tarpit
144
+ version_requirements: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ~>
147
+ - !ruby/object:Gem::Version
148
+ version: '2.0'
149
+ none: false
150
+ requirement: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ~>
153
+ - !ruby/object:Gem::Version
154
+ version: '2.0'
155
+ none: false
156
+ prerelease: false
157
+ type: :development
117
158
  description: Iudex is a general purpose web crawler and feed processor in ruby/java. The iudex-core gem contains core facilities and notably, does not contain such facilities as database-backed state management.
118
- email:
119
- - dek-oss@gravitext.com
120
- executables:
121
- - iudex-test-config
122
- - iudex-url-norm
159
+ email:
160
+ - dek-oss@gravitext.com
161
+ executables:
162
+ - iudex-test-config
163
+ - iudex-url-norm
123
164
  extensions: []
124
-
125
- extra_rdoc_files:
126
- - History.rdoc
127
- - README.rdoc
128
- files:
129
- - History.rdoc
130
- - Manifest.txt
131
- - README.rdoc
132
- - Rakefile
133
- - pom.xml
134
- - bin/iudex-test-config
135
- - bin/iudex-url-norm
136
- - build/TLDSets.java.erb
137
- - build/effective_tld_name.dat
138
- - build/tld_set_generator.rb
139
- - config/config.rb
140
- - config/mojibake
141
- - lib/iudex-core/base.rb
142
- - lib/iudex-core.rb
143
- - lib/iudex-core/config.rb
144
- - lib/iudex-core/mojibake.rb
145
- - test/setup.rb
146
- - test/test_charsets.rb
147
- - test/test_content_fetcher.rb
148
- - test/test_content_source.rb
149
- - test/test_log_writer.rb
150
- - test/test_mojibake.rb
151
- - test/test_redirect_handler.rb
152
- - test/test_visit_manager.rb
153
- - test/test_visit_queue.rb
154
- - test/test_visit_url.rb
155
- - lib/iudex-core/iudex-core-1.2.1.jar
165
+ extra_rdoc_files:
166
+ - History.rdoc
167
+ - README.rdoc
168
+ files:
169
+ - History.rdoc
170
+ - Manifest.txt
171
+ - README.rdoc
172
+ - Rakefile
173
+ - pom.xml
174
+ - bin/iudex-test-config
175
+ - bin/iudex-url-norm
176
+ - build/TLDSets.java.erb
177
+ - build/effective_tld_name.dat
178
+ - build/tld_set_generator.rb
179
+ - config/config.rb
180
+ - config/mojibake
181
+ - lib/iudex-core/base.rb
182
+ - lib/iudex-core.rb
183
+ - lib/iudex-core/config.rb
184
+ - lib/iudex-core/mojibake.rb
185
+ - lib/iudex-core/visit_queue.rb
186
+ - test/setup.rb
187
+ - test/test_charsets.rb
188
+ - test/test_content_fetcher.rb
189
+ - test/test_content_source.rb
190
+ - test/test_log_writer.rb
191
+ - test/test_mojibake.rb
192
+ - test/test_redirect_handler.rb
193
+ - test/test_visit_manager.rb
194
+ - test/test_visit_queue.rb
195
+ - test/test_visit_url.rb
196
+ - lib/iudex-core/iudex-core-1.3.0.jar
156
197
  homepage: http://iudex.gravitext.com
157
198
  licenses: []
158
-
159
- post_install_message:
160
- rdoc_options:
161
- - --main
162
- - README.rdoc
163
- require_paths:
164
- - lib
165
- required_ruby_version: !ruby/object:Gem::Requirement
199
+ post_install_message:
200
+ rdoc_options:
201
+ - --main
202
+ - README.rdoc
203
+ require_paths:
204
+ - lib
205
+ required_ruby_version: !ruby/object:Gem::Requirement
206
+ requirements:
207
+ - - ! '>='
208
+ - !ruby/object:Gem::Version
209
+ version: '0'
210
+ segments:
211
+ - 0
212
+ hash: 2
166
213
  none: false
167
- requirements:
168
- - - ">="
169
- - !ruby/object:Gem::Version
170
- hash: 2
171
- segments:
172
- - 0
173
- version: "0"
174
- required_rubygems_version: !ruby/object:Gem::Requirement
214
+ required_rubygems_version: !ruby/object:Gem::Requirement
215
+ requirements:
216
+ - - ! '>='
217
+ - !ruby/object:Gem::Version
218
+ version: '0'
219
+ segments:
220
+ - 0
221
+ hash: 2
175
222
  none: false
176
- requirements:
177
- - - ">="
178
- - !ruby/object:Gem::Version
179
- hash: 2
180
- segments:
181
- - 0
182
- version: "0"
183
223
  requirements: []
184
-
185
- rubyforge_project:
186
- rubygems_version: 1.8.15
187
- signing_key:
224
+ rubyforge_project:
225
+ rubygems_version: 1.8.24
226
+ signing_key:
188
227
  specification_version: 3
189
228
  summary: Iudex is a general purpose web crawler and feed processor in ruby/java.
190
229
  test_files: []
191
-
230
+ ...
Binary file