iudex-da 1.3.3-java → 1.4.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/History.rdoc CHANGED
@@ -1,3 +1,31 @@
1
+ === 1.4.0 (2013-10-29)
2
+ * Add experimental WorkPoller.do_reserve support, which utilizes
3
+ PostgreSQL 9.1+ writable Common Table Expressions (CTEs) to
4
+ efficiently(?) set a reserved column and instance (identifier) on
5
+ poll that guards against polling by another or the same iudex-worker
6
+ process. This enables multiple workers to cover the same urls/orders
7
+ as well as concurrent polling. Previously only an instance unique,
8
+ uhash_slice single-tenant partitioning scheme was supported for
9
+ multi-worker distribution.
10
+ * Add experimental support for concurrent (no discard) WorkPoller (in
11
+ collaboration with iudex-core 1.4.0 VisitManager), enabled when set
12
+ do_reserve = true, do_discard = false.
13
+ * When enabled, polled max_urls is adjusted by current order_count
14
+ for subsequent polls.
15
+ * New max_reserved_time will force discard of possibly long reserved
16
+ tenure based on time since the queue was last empty.
17
+ * New max_discard_ratio will also force a discard if
18
+ order_count/max_urls exceeds this ratio.
19
+ * Add iudex-da-unreserve utility for cleaning reservations left due to
20
+ failure, based on age.
21
+ * Add DAKeys INSTANCE (String identifier) and RESERVED (date)
22
+ * Add migrations for instance and reserved columns; removal of
23
+ reserved from index_next_visit (profile)
24
+ * Upgrade to dbutils ~> 1.5.0, rjack-jdbc-postgres ~> 9.2.1002,
25
+ sequel ~> 3.46 (but not upcoming 4.x)
26
+ * Upgrade to iudex-* ~> 1.4.0 dependencies
27
+ * Upgrade to minitest ~> 4.7.4 (dev)
28
+
1
29
  === 1.3.3 (2012-11-8)
2
30
  * FactoryHelper.create_update_filter now prefers an options Hash
3
31
  exposing greater control over what is updated and how. In
data/Manifest.txt CHANGED
@@ -6,12 +6,16 @@ pom.xml
6
6
  bin/iudex-da-generate-test-data
7
7
  bin/iudex-da-import
8
8
  bin/iudex-da-simhash-dump
9
+ bin/iudex-da-unreserve
9
10
  bin/iudex-migrate
10
11
  config/config.rb
11
12
  db/20111012173757_base.rb
12
13
  db/20120930173600_uhash_collation_order.rb
14
+ db/20130419090000_instance_column.rb
15
+ db/20130419095500_reserved_column.rb
13
16
  db/index_next_visit/21500000000101_add_index_next_visit.rb
14
17
  db/index_next_visit/21500000000110_index_next_visit_partial.rb
18
+ db/index_next_visit/21500000000120_index_next_visit_not_reserved.rb
15
19
  db/simhash/21500000000001_add_simhash_index.rb
16
20
  lib/iudex-da/base.rb
17
21
  lib/iudex-da.rb
@@ -28,4 +32,4 @@ test/test_migrate.rb
28
32
  test/test_pool_factory.rb
29
33
  test/test_url_model.rb
30
34
  test/test_work_poller.rb
31
- lib/iudex-da/iudex-da-1.3.3.jar
35
+ lib/iudex-da/iudex-da-1.4.0.jar
data/README.rdoc CHANGED
@@ -16,7 +16,7 @@ meta-data store and work priority queue.
16
16
 
17
17
  == License
18
18
 
19
- Copyright (c) 2008-2012 David Kellum
19
+ Copyright (c) 2008-2013 David Kellum
20
20
 
21
21
  Licensed under the Apache License, Version 2.0 (the "License"); you
22
22
  may not use this file except in compliance with the License. You may
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env jruby
2
2
  # -*- ruby -*-
3
3
  #--
4
- # Copyright (c) 2008-2012 David Kellum
4
+ # Copyright (c) 2008-2013 David Kellum
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License"); you
7
7
  # may not use this file except in compliance with the License. You
data/bin/iudex-da-import CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env jruby
2
2
  # -*- ruby -*-
3
3
  #--
4
- # Copyright (c) 2008-2012 David Kellum
4
+ # Copyright (c) 2008-2013 David Kellum
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License"); you
7
7
  # may not use this file except in compliance with the License. You
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env jruby
2
2
  # -*- ruby -*-
3
3
  #--
4
- # Copyright (c) 2008-2012 David Kellum
4
+ # Copyright (c) 2008-2013 David Kellum
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License"); you
7
7
  # may not use this file except in compliance with the License. You
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env jruby
2
+ # -*- ruby -*-
3
+ #--
4
+ # Copyright (c) 2008-2013 David Kellum
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
7
+ # may not use this file except in compliance with the License. You
8
+ # may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15
+ # implied. See the License for the specific language governing
16
+ # permissions and limitations under the License.
17
+ #++
18
+
19
+ $LOAD_PATH.unshift File.join( File.dirname(__FILE__), "..", "lib" )
20
+
21
+ require 'rubygems'
22
+ require 'optparse'
23
+
24
+ module IudexBinScript
25
+
26
+ require 'rjack-logback'
27
+ include RJack
28
+ Logback.config_console
29
+
30
+ require 'iudex-core'
31
+ require 'iudex-da/base'
32
+ require 'iudex-da/config'
33
+ include Iudex
34
+
35
+ Hooker.log_with { |m| SLF4J[ 'iudex' ].info( m.rstrip ) }
36
+
37
+ interval = '60 minutes'
38
+
39
+ # Apply all config before including ar
40
+ OptionParser.new do |opts|
41
+ opts.banner = <<END
42
+ Usage: iudex-da-unreserve [options]
43
+ END
44
+ opts.on( "-s", "--set name=value", String,
45
+ "Set connect prop (ex: database=iudex)" ) do |nv|
46
+ name,value = nv.split('=').map { |t| t.strip }
47
+ Hooker.add( [ :iudex, :connect_props ] ) do
48
+ { name.to_sym => value }
49
+ end
50
+ end
51
+ opts.on( "-p", "--profile NAME", String,
52
+ "Add a migration profile (ex: simhash)" ) do |p|
53
+ Hooker.add( [ :iudex, :migration_profiles ] ) do |profiles|
54
+ profiles << p
55
+ end
56
+ end
57
+ opts.on( "-d", "--debug" ) do
58
+ Logback[ 'iudex.da' ].level = :debug
59
+ Hooker.add( [ :iudex, :connect_props ] ) { { :log => true } }
60
+ end
61
+ opts.on( "-v", "--version", "Display version and exit" ) do
62
+ puts "iudex-da: #{DA::VERSION}"
63
+ exit 1
64
+ end
65
+ opts.on( "-l", "--long", "Log in long form, to STDERR" ) do
66
+ Logback.config_console( :full => true, :stderr => true )
67
+ end
68
+ opts.on( "-a", "--age AGE",
69
+ "Age to unreserve, in PG interval syntax " +
70
+ "(default: '60 minutes')" ) do |age|
71
+ interval = age
72
+ end
73
+ Hooker.register_config( opts )
74
+ end.parse!
75
+
76
+ require 'iudex-da/orm'
77
+
78
+ class Runner
79
+ include Iudex::DA
80
+
81
+ def unreserve( age )
82
+ ORM.db.transaction( :isolation => :repeatable,
83
+ :retry_on => [ Sequel::SerializationFailure ] ) do
84
+ ds = ORM::db.fetch <<-SQL
85
+ WITH unreserve AS (
86
+ UPDATE urls
87
+ SET reserved = NULL
88
+ WHERE reserved < ( now() - interval '#{age}' )
89
+ RETURNING instance )
90
+ SELECT count(*),instance FROM unreserve
91
+ GROUP BY instance
92
+ ORDER BY count DESC
93
+ SQL
94
+
95
+ total = ds.inject( 0 ) do |m,row|
96
+ puts( "%30s %7d" % [ row[:instance], row[:count] ] )
97
+ m + row[:count]
98
+ end
99
+ puts( "%30s %7d" % [ 'TOTAL', total ] ) if total > 0
100
+
101
+ end
102
+ end
103
+
104
+ end
105
+
106
+ Runner.new.unreserve( interval )
107
+ end
data/bin/iudex-migrate CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env jruby
2
2
  # -*- ruby -*-
3
3
  #--
4
- # Copyright (c) 2008-2012 David Kellum
4
+ # Copyright (c) 2008-2013 David Kellum
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License"); you
7
7
  # may not use this file except in compliance with the License. You
@@ -29,7 +29,7 @@ module IudexBinScript
29
29
 
30
30
  # Note: Avoid loading iudex-da with its jar dependency which would
31
31
  # make it hard to boostrap the db from source alone.
32
- # Instead load only nessary core, base, config, and ar (post config):
32
+ # Instead load only necessary core, base, config, and orm (post config):
33
33
  require 'iudex-core'
34
34
  require 'iudex-da/base'
35
35
  require 'iudex-da/config'
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
@@ -0,0 +1,27 @@
1
+ #--
2
+ # Copyright (c) 2008-2013 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ Sequel.migration do
18
+ change do
19
+ alter_table( :urls ) do
20
+ # A short string uniquely identifying the iudex worker instance
21
+ # that has last operated on this URL (for example processed, or
22
+ # in future: reserved). A hostname or IP may be a suitable
23
+ # value.
24
+ add_column :instance, String
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,23 @@
1
+ #--
2
+ # Copyright (c) 2008-2013 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ Sequel.migration do
18
+ change do
19
+ alter_table( :urls ) do
20
+ add_column :reserved, "timestamp with time zone"
21
+ end
22
+ end
23
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
@@ -0,0 +1,35 @@
1
+ #--
2
+ # Copyright (c) 2008-2013 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ Sequel.migration do
18
+
19
+ up do
20
+ alter_table( :urls ) do
21
+ drop_index( :next_visit_after )
22
+ add_index( :next_visit_after,
23
+ :where => 'next_visit_after IS NOT NULL AND reserved IS NULL' )
24
+ end
25
+ end
26
+
27
+ down do
28
+ alter_table( :urls ) do
29
+ drop_index( :next_visit_after )
30
+ add_index( :next_visit_after,
31
+ :where => 'next_visit_after is not null' )
32
+ end
33
+ end
34
+
35
+ end
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
data/lib/iudex-da.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
data/lib/iudex-da/base.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
@@ -16,7 +16,7 @@
16
16
 
17
17
  module Iudex
18
18
  module DA
19
- VERSION = '1.3.3'
19
+ VERSION = '1.4.0'
20
20
 
21
21
  LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
22
  end
@@ -1,6 +1,6 @@
1
1
 
2
2
  #--
3
- # Copyright (c) 2008-2012 David Kellum
3
+ # Copyright (c) 2008-2013 David Kellum
4
4
  #
5
5
  # Licensed under the Apache License, Version 2.0 (the "License"); you
6
6
  # may not use this file except in compliance with the License. You
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You may
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You may
Binary file
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You may
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
data/lib/iudex-da/orm.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
@@ -17,7 +17,7 @@
17
17
  require 'rjack-slf4j'
18
18
  require 'iudex-da/config'
19
19
  require 'sequel'
20
- require 'jdbc/postgres'
20
+ require 'rjack-jdbc-postgres'
21
21
  require 'hooker'
22
22
 
23
23
  Sequel.extension :migration
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
@@ -17,7 +17,7 @@
17
17
  require 'iudex-da'
18
18
  require 'rjack-slf4j'
19
19
  require 'java'
20
- require 'jdbc/postgres'
20
+ require 'rjack-jdbc-postgres'
21
21
 
22
22
  module Iudex::DA
23
23
 
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You may
@@ -70,10 +70,53 @@ module Iudex::DA
70
70
  # priority order (default: false)
71
71
  attr_writer :do_domain_group
72
72
 
73
+ # If set true, UPDATE reserved date (and instance, if specified)
74
+ # (Default: false)
75
+ attr_writer :do_reserve
76
+
77
+ # If set true, discards old queue at every poll, even if
78
+ # do_reserve could make queue re-fill a safe operation.
79
+ # (Default: true)
80
+ attr_writer :do_discard
81
+
82
+ # The maximum ratio of current to max_urls where the old queue
83
+ # will be discarded as a safety to avoid starvation (Default: 0.667)
84
+ attr_accessor :max_discard_ratio
85
+
86
+ # The maximum amount of time in milliseconds that the oldest order
87
+ # can remain reserved before a discard is required. This is only
88
+ # relevant when do_reserve is true and do_discard is set false,
89
+ # and typically would be set as a multiple of max_poll_interval
90
+ # (ms). Note that max_poll_interval is interpreted as the worst
91
+ # case next discard opportunity for this purpose. The next poll
92
+ # made to an empty queue, either by prior discard or completion,
93
+ # resets the time tracking. (Default: nil, off)
94
+ def max_reserved_time
95
+ @max_reserved_time_s && ( @max_reserved_time_s * 1000.0 ).round
96
+ end
97
+
98
+ def max_reserved_time=( ms )
99
+ @max_reserved_time_s = ms / 1000.0
100
+ end
101
+
102
+ attr_reader :max_reserved_time_s
103
+
73
104
  def domain_group?
74
105
  @do_domain_group
75
106
  end
76
107
 
108
+ def reserve?
109
+ @do_reserve
110
+ end
111
+
112
+ def discard?
113
+ @do_discard
114
+ end
115
+
116
+ # String uniquely identifying this worker instance. Only used here
117
+ # with do_reserve.
118
+ attr_accessor :instance
119
+
77
120
  # First age coefficient. If set > 0.0, adjust priority by the
78
121
  # equation:
79
122
  #
@@ -125,10 +168,16 @@ module Iudex::DA
125
168
 
126
169
  @domain_depth_coef = nil
127
170
  @do_domain_group = false
171
+ @do_reserve = false
172
+ @do_discard = true
173
+ @instance = nil
128
174
 
129
175
  @max_priority_urls = nil
130
176
  @max_domain_urls = 10_000
131
177
  @max_urls = 50_000
178
+ @max_discard_ratio = 2.0/3.0
179
+ @max_reserved_time_s = nil
180
+ @last_none_reserved = Time.now
132
181
 
133
182
  @age_coef_1 = 0.2
134
183
  @age_coef_2 = 0.1
@@ -138,7 +187,6 @@ module Iudex::DA
138
187
  @uhash_slice = nil
139
188
 
140
189
  @log = RJack::SLF4J[ self.class ]
141
- #FIXME: Add accessor for log in GenericWorkPollStrategy
142
190
 
143
191
  keys( :url, :priority, :next_visit_after ).each do |k|
144
192
  unless mapper.fields.include?( k )
@@ -157,22 +205,68 @@ module Iudex::DA
157
205
 
158
206
  # Override GenericWorkPollStrategy
159
207
  def pollWorkImpl( visit_queue )
160
- visit_queue.add_all( poll )
208
+ visit_queue.add_all( poll( visit_queue.order_count ) )
161
209
  rescue SQLException => x
162
210
  @log.error( "On poll: ", x )
163
211
  end
164
212
 
165
213
  # Poll work and return as List<UniMap>
166
214
  # Raises SQLException
167
- def poll
168
- query, params = generate_query
169
- @log.debug { "Poll query: #{query}; #{params.inspect}" }
170
- reader.select( query, *params )
215
+ def poll( current_urls = 0 )
216
+ @last_none_reserved = Time.now if max_reserved_time_s && current_urls == 0
217
+ query = generate_query( current_urls )
218
+ @log.debug { "Poll query: #{query}" }
219
+ reader.select_with_retry( query )
220
+ end
221
+
222
+ # Override GenericWorkPollStrategy
223
+ def shouldReplaceQueue( visit_queue )
224
+ ( !reserve? || discard? ||
225
+ ( ( visit_queue.order_count.to_f / max_urls ) > max_discard_ratio ) ||
226
+ ( max_reserved_time_s && next_reserve_time > max_reserved_time_s ) )
227
+ end
228
+
229
+ def next_reserve_time( now = Time.now )
230
+ now - @last_none_reserved + ( max_poll_interval / 1000.0 )
231
+ end
232
+
233
+ # Override GenericWorkPollStrategy to discard old VisitQueue
234
+ # contents when do_reserve is enabled.
235
+ def discard( visit_queue )
236
+ if reserve? && visit_queue.order_count > 0
237
+ orders = visit_queue.hosts.inject( [] ) do |a, hq|
238
+ a.concat( hq.orders.to_a )
239
+ end
240
+ if orders.length > 0
241
+ n = reader.unreserve( orders )
242
+ @log.info { "Unreserved #{n} orders on discard" }
243
+ end
244
+ end
245
+ rescue SQLException => x
246
+ @log.error( "On discard: ", x )
247
+ end
248
+
249
+ # Unreserve any orders that are reserved by the current instance.
250
+ # No-op unless do_reserve and instance are set.
251
+ def instance_unreserve
252
+ if reserve? && instance
253
+ n = reader.update( <<-SQL )
254
+ UPDATE urls
255
+ SET reserved = NULL
256
+ WHERE reserved IS NOT NULL AND
257
+ instance = '#{instance}'
258
+ SQL
259
+ @log.info { "Unreserved #{n} orders for instance #{instance}" }
260
+ n
261
+ end
262
+ rescue SQLException => x
263
+ @log.error( "On instance_unreserve: ", x )
171
264
  end
172
265
 
173
266
  def reader
174
267
  @reader ||= ContentReader.new( @data_source, @mapper ).tap do |r|
175
268
  r.priority_adjusted = aged_priority?
269
+ r.max_retries = 10
176
270
  end
177
271
  end
178
272
 
@@ -186,25 +280,33 @@ module Iudex::DA
186
280
  end
187
281
  end
188
282
 
189
- def generate_query
283
+ def domain_union?
284
+ !@domain_union.empty?
285
+ end
286
+
287
+ def generate_query( current_urls )
190
288
  criteria = [ "next_visit_after <= now()" ]
191
289
 
290
+ criteria << "reserved IS NULL" if reserve?
291
+
192
292
  if uhash_slice
193
293
  min, max = url64_range( *uhash_slice )
194
294
  criteria << "uhash > ( '#{min}' COLLATE \"C\" )" if min
195
295
  criteria << "uhash < ( '#{max}' COLLATE \"C\" )" if max
196
296
  end
197
297
 
198
- params = []
199
-
200
- if @domain_union.empty?
201
- query = generate_query_inner( criteria )
202
- params = [ max_urls ]
298
+ unless domain_union?
299
+ query = generate_query_inner( criteria, ( max_urls - current_urls ) )
203
300
  else
204
301
  subqueries = []
205
302
  @domain_union.each do | opts |
206
303
  opts = opts.dup
207
- opts[ :max ] ||= @max_urls
304
+ if opts[ :max ]
305
+ opts[ :max ] = ( opts[ :max ] * ( max_urls - current_urls ) /
306
+ max_urls.to_f ).floor
307
+ else
308
+ opts[ :max ] = ( max_urls - current_urls )
309
+ end
208
310
 
209
311
  next if opts[ :max ] == 0
210
312
 
@@ -228,8 +330,7 @@ module Iudex::DA
228
330
  c << "type = '#{opts[ :type ]}'"
229
331
  end
230
332
 
231
- subqueries << generate_query_inner( c )
232
- params << opts[ :max ]
333
+ subqueries << generate_query_inner( c, opts[ :max ] )
233
334
  end
234
335
  if subqueries.size == 1
235
336
  query = subqueries.first
@@ -238,17 +339,20 @@ module Iudex::DA
238
339
  end
239
340
  end
240
341
 
342
+ query = wrap_with_update( fields, query ) if reserve?
343
+
241
344
  query = wrap_domain_group_query( fields, query ) if domain_group?
242
345
 
243
346
  query = query.gsub( /\s+/, ' ').strip
244
347
 
245
- [ query, params ]
348
+ query
246
349
  end
247
350
 
248
- def generate_query_inner( criteria )
351
+ def generate_query_inner( criteria, max_urls )
249
352
 
250
353
  query = filter_query(
251
- fields( ( :domain if domain_depth? || domain_group? ) ),
354
+ fields( ( :domain if domain_depth? || domain_group? ),
355
+ ( :uhash if reserve? ) ),
252
356
  ( max_priority_urls if domain_depth? ),
253
357
  criteria )
254
358
 
@@ -260,7 +364,7 @@ module Iudex::DA
260
364
  limit_priority = domain_depth? ? :adj_priority : :priority
261
365
  query += <<-SQL
262
366
  ORDER BY #{limit_priority} DESC
263
- LIMIT ?
367
+ LIMIT #{max_urls}
264
368
  SQL
265
369
 
266
370
  query
@@ -288,7 +392,7 @@ module Iudex::DA
288
392
  if aged_priority?
289
393
  flds = flds.dup
290
394
  i = flds.index( :priority ) || flds.size
291
- flds[ i ] = <<-SQL
395
+ flds[ i ] = <<-SQL.strip
292
396
  ( priority +
293
397
  #{age_coef_1}::REAL *
294
398
  SQRT( #{age_coef_2}::REAL *
@@ -311,6 +415,24 @@ module Iudex::DA
311
415
  sql
312
416
  end
313
417
 
418
+ def wrap_with_update( flds, sub )
419
+ sflds = [ "reserved = now()" ]
420
+ sflds << "instance = '#{instance}'" if instance
421
+
422
+ # Use ..FOR UPDATE unless not supported by query specific
423
+ # options with PostgreSQL <= 9.1
424
+ sub += " FOR UPDATE" unless domain_depth? || domain_union?
425
+
426
+ <<-SQL
427
+ WITH work AS ( #{sub} ),
428
+ reserve AS (
429
+ UPDATE urls
430
+ SET #{clist sflds}
431
+ WHERE uhash IN ( SELECT uhash FROM work ) )
432
+ SELECT #{clist flds} FROM work
433
+ SQL
434
+ end
435
+
314
436
  def wrap_domain_group_query( flds, sub )
315
437
  <<-SQL
316
438
  SELECT #{clist flds}
data/pom.xml CHANGED
@@ -5,13 +5,13 @@
5
5
  <groupId>iudex</groupId>
6
6
  <artifactId>iudex-da</artifactId>
7
7
  <packaging>jar</packaging>
8
- <version>1.3.3</version>
8
+ <version>1.4.0</version>
9
9
  <name>Iudex Data Access</name>
10
10
 
11
11
  <parent>
12
12
  <groupId>iudex</groupId>
13
13
  <artifactId>iudex-parent</artifactId>
14
- <version>1.3.0</version>
14
+ <version>1.4.0</version>
15
15
  <relativePath>..</relativePath>
16
16
  </parent>
17
17
 
@@ -20,13 +20,13 @@
20
20
  <dependency>
21
21
  <groupId>iudex</groupId>
22
22
  <artifactId>iudex-core</artifactId>
23
- <version>[1.3.0,1.3.999)</version>
23
+ <version>[1.4.0,1.4.999)</version>
24
24
  </dependency>
25
25
 
26
26
  <dependency>
27
27
  <groupId>commons-dbutils</groupId>
28
28
  <artifactId>commons-dbutils</artifactId>
29
- <version>1.4</version>
29
+ <version>1.5</version>
30
30
  </dependency>
31
31
 
32
32
  <dependency>
@@ -38,7 +38,7 @@
38
38
  <dependency>
39
39
  <groupId>commons-pool</groupId>
40
40
  <artifactId>commons-pool</artifactId>
41
- <version>[1.5.4,1.5.6]</version>
41
+ <version>[1.5.7,1.5.999]</version>
42
42
  </dependency>
43
43
 
44
44
  <dependency>
data/test/setup.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2012 David Kellum
2
+ # Copyright (c) 2008-2013 David Kellum
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License"); you
5
5
  # may not use this file except in compliance with the License. You
data/test/test_migrate.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  #.hashdot.profile += jruby-shortlived
3
3
 
4
4
  #--
5
- # Copyright (c) 2008-2012 David Kellum
5
+ # Copyright (c) 2008-2013 David Kellum
6
6
  #
7
7
  # Licensed under the Apache License, Version 2.0 (the "License"); you
8
8
  # may not use this file except in compliance with the License. You may
@@ -2,7 +2,7 @@
2
2
  #.hashdot.profile += jruby-shortlived
3
3
 
4
4
  #--
5
- # Copyright (c) 2008-2012 David Kellum
5
+ # Copyright (c) 2008-2013 David Kellum
6
6
  #
7
7
  # Licensed under the Apache License, Version 2.0 (the "License"); you
8
8
  # may not use this file except in compliance with the License. You
@@ -2,7 +2,7 @@
2
2
  #.hashdot.profile += jruby-shortlived
3
3
 
4
4
  #--
5
- # Copyright (c) 2008-2012 David Kellum
5
+ # Copyright (c) 2008-2013 David Kellum
6
6
  #
7
7
  # Licensed under the Apache License, Version 2.0 (the "License"); you
8
8
  # may not use this file except in compliance with the License. You may
@@ -2,7 +2,7 @@
2
2
  #.hashdot.profile += jruby-shortlived
3
3
 
4
4
  #--
5
- # Copyright (c) 2008-2012 David Kellum
5
+ # Copyright (c) 2008-2013 David Kellum
6
6
  #
7
7
  # Licensed under the Apache License, Version 2.0 (the "License"); you
8
8
  # may not use this file except in compliance with the License. You may
@@ -25,6 +25,7 @@ require 'iudex-da/pool_data_source_factory'
25
25
  require 'iudex-da/models'
26
26
 
27
27
  class TestWorkPoller < MiniTest::Unit::TestCase
28
+ include Iudex::Core
28
29
  include Iudex::Filter::KeyHelper
29
30
  include Iudex::DA
30
31
  include Iudex::DA::ORM
@@ -66,6 +67,31 @@ class TestWorkPoller < MiniTest::Unit::TestCase
66
67
  assert_equal( 3, pos )
67
68
  end
68
69
 
70
+ def test_poll_with_reserve
71
+ poller.do_reserve = true
72
+ poller.max_urls = 2
73
+ poller.instance = 'test'
74
+
75
+ polled = poller.poll
76
+ polled.each_with_index do |map,i|
77
+ assert_equal( URLS[ i ][ 0 ], map.url.url )
78
+ end
79
+ assert_equal( 2, polled.size )
80
+ reserved = polled
81
+
82
+ polled = poller.poll
83
+ assert_equal( 1, polled.size )
84
+ assert_equal( URLS[2][0], polled.first.url.url )
85
+ reserved += polled
86
+
87
+ RJack::Logback[ 'iudex.da.WorkPoller' ].with_level( :warn ) do
88
+ poller.discard( VisitQueue.new.tap { |q| q.add_all( reserved ) } )
89
+ end
90
+ poller.max_urls = 3
91
+
92
+ assert_equal( 3, poller.poll.size )
93
+ end
94
+
69
95
  def test_poll_with_max_priority_urls
70
96
  poller.max_priority_urls = 4
71
97
 
@@ -89,6 +115,25 @@ class TestWorkPoller < MiniTest::Unit::TestCase
89
115
  assert_equal( 3, pos )
90
116
  end
91
117
 
118
+ def test_poll_with_domain_depth_reserve
119
+ poller.domain_depth_coef = 0.125
120
+ poller.max_priority_urls = 4
121
+ poller.do_reserve = true
122
+ poller.instance = 'test'
123
+
124
+ pos = 0
125
+ poller.poll.each do |map|
126
+ assert_equal( URLS[ pos ][ 0 ], map.url.url )
127
+ pos += 1
128
+ end
129
+ assert_equal( 3, pos )
130
+ RJack::Logback[ 'iudex.da.WorkPoller' ].with_level( :warn ) do
131
+ assert_equal( 3, poller.instance_unreserve )
132
+ end
133
+ assert_equal( 3, poller.poll.size )
134
+ assert_equal( 0, poller.poll.size )
135
+ end
136
+
92
137
  def test_poll_with_domain_depth_only
93
138
  poller.domain_depth_coef = 0.125
94
139
  poller.age_coef_1 = 0.0
@@ -131,6 +176,15 @@ class TestWorkPoller < MiniTest::Unit::TestCase
131
176
  assert_equal( 3, result.size )
132
177
  end
133
178
 
179
+ def test_poll_domain_union_2_reserve
180
+ poller.do_reserve = true
181
+ poller.domain_union = [ { :domain => 'gravitext.com', :max => 15000 },
182
+ { :max => 10000 } ]
183
+
184
+ assert_equal( 3, poller.poll.size )
185
+ assert_equal( 0, poller.poll.size )
186
+ end
187
+
134
188
  def test_poll_domain_union_3
135
189
  poller.domain_union = [ { :domain => 'gravitext.com', :max => 1 },
136
190
  { :domain => 'hometown.com', :max => 1 },
metadata CHANGED
@@ -2,14 +2,14 @@
2
2
  name: iudex-da
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.3.3
5
+ version: 1.4.0
6
6
  platform: java
7
7
  authors:
8
8
  - David Kellum
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-08 00:00:00.000000000 Z
12
+ date: 2013-10-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: iudex-core
@@ -17,13 +17,13 @@ dependencies:
17
17
  requirements:
18
18
  - - ~>
19
19
  - !ruby/object:Gem::Version
20
- version: 1.3.0
20
+ version: 1.4.0
21
21
  none: false
22
22
  requirement: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
- version: 1.3.0
26
+ version: 1.4.0
27
27
  none: false
28
28
  prerelease: false
29
29
  type: :runtime
@@ -33,29 +33,29 @@ dependencies:
33
33
  requirements:
34
34
  - - ~>
35
35
  - !ruby/object:Gem::Version
36
- version: 3.40.0
36
+ version: '3.46'
37
37
  none: false
38
38
  requirement: !ruby/object:Gem::Requirement
39
39
  requirements:
40
40
  - - ~>
41
41
  - !ruby/object:Gem::Version
42
- version: 3.40.0
42
+ version: '3.46'
43
43
  none: false
44
44
  prerelease: false
45
45
  type: :runtime
46
46
  - !ruby/object:Gem::Dependency
47
- name: jdbc-postgres
47
+ name: rjack-jdbc-postgres
48
48
  version_requirements: !ruby/object:Gem::Requirement
49
49
  requirements:
50
50
  - - ~>
51
51
  - !ruby/object:Gem::Version
52
- version: 9.1.901
52
+ version: 9.2.1002
53
53
  none: false
54
54
  requirement: !ruby/object:Gem::Requirement
55
55
  requirements:
56
56
  - - ~>
57
57
  - !ruby/object:Gem::Version
58
- version: 9.1.901
58
+ version: 9.2.1002
59
59
  none: false
60
60
  prerelease: false
61
61
  type: :runtime
@@ -81,13 +81,13 @@ dependencies:
81
81
  requirements:
82
82
  - - ~>
83
83
  - !ruby/object:Gem::Version
84
- version: 1.4.0
84
+ version: 1.5.0
85
85
  none: false
86
86
  requirement: !ruby/object:Gem::Requirement
87
87
  requirements:
88
88
  - - ~>
89
89
  - !ruby/object:Gem::Version
90
- version: 1.4.0
90
+ version: 1.5.0
91
91
  none: false
92
92
  prerelease: false
93
93
  type: :runtime
@@ -97,13 +97,13 @@ dependencies:
97
97
  requirements:
98
98
  - - ~>
99
99
  - !ruby/object:Gem::Version
100
- version: '2.3'
100
+ version: 4.7.4
101
101
  none: false
102
102
  requirement: !ruby/object:Gem::Requirement
103
103
  requirements:
104
104
  - - ~>
105
105
  - !ruby/object:Gem::Version
106
- version: '2.3'
106
+ version: 4.7.4
107
107
  none: false
108
108
  prerelease: false
109
109
  type: :development
@@ -146,6 +146,7 @@ executables:
146
146
  - iudex-da-generate-test-data
147
147
  - iudex-da-import
148
148
  - iudex-da-simhash-dump
149
+ - iudex-da-unreserve
149
150
  - iudex-migrate
150
151
  extensions: []
151
152
  extra_rdoc_files:
@@ -160,12 +161,16 @@ files:
160
161
  - bin/iudex-da-generate-test-data
161
162
  - bin/iudex-da-import
162
163
  - bin/iudex-da-simhash-dump
164
+ - bin/iudex-da-unreserve
163
165
  - bin/iudex-migrate
164
166
  - config/config.rb
165
167
  - db/20111012173757_base.rb
166
168
  - db/20120930173600_uhash_collation_order.rb
169
+ - db/20130419090000_instance_column.rb
170
+ - db/20130419095500_reserved_column.rb
167
171
  - db/index_next_visit/21500000000101_add_index_next_visit.rb
168
172
  - db/index_next_visit/21500000000110_index_next_visit_partial.rb
173
+ - db/index_next_visit/21500000000120_index_next_visit_not_reserved.rb
169
174
  - db/simhash/21500000000001_add_simhash_index.rb
170
175
  - lib/iudex-da/base.rb
171
176
  - lib/iudex-da.rb
@@ -182,7 +187,7 @@ files:
182
187
  - test/test_pool_factory.rb
183
188
  - test/test_url_model.rb
184
189
  - test/test_work_poller.rb
185
- - lib/iudex-da/iudex-da-1.3.3.jar
190
+ - lib/iudex-da/iudex-da-1.4.0.jar
186
191
  homepage: http://iudex.gravitext.com
187
192
  licenses: []
188
193
  post_install_message:
Binary file