iudex-da 1.0.0-java → 1.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
data/History.rdoc CHANGED
@@ -1,2 +1,12 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Update to iudex-core ~> 1.1.0
3
+ * Update to activerecord ~> 3.0.10, pg ~> 9.0.801, adapter ~> 1.1.3
4
+ * Expand commons-pool pom dep to [1.5.4, 1.5.6]
5
+ * Migrate host field to domain (registration level), mapper/writer
6
+ domain support, :domain in WorkPoller, drop :host key.
7
+ * Add migration profile support, move simhash index to simhash
8
+ profile, add index_next_visit migration profile
9
+ * Update to minitest ~> 2.3
10
+
1
11
  === 1.0.0 (2011-04-04)
2
12
  * Initial release.
data/Manifest.txt CHANGED
@@ -17,6 +17,9 @@ db/0050_add_cache_location.rb
17
17
  db/0060_url_indexes.rb
18
18
  db/0070_add_created_at.rb
19
19
  db/0080_add_simhash.rb
20
+ db/0081_remove_simhash_index.rb
21
+ db/index_next_visit/0100_add_index_next_visit.rb
22
+ db/simhash/0085_add_simhash_index.rb
20
23
  lib/iudex-da/base.rb
21
24
  lib/iudex-da.rb
22
25
  lib/iudex-da/ar.rb
@@ -29,4 +32,4 @@ test/setup.rb
29
32
  test/test_migrate.rb
30
33
  test/test_poll_work.rb
31
34
  test/test_pool_factory.rb
32
- lib/iudex-da/iudex-da-1.0.0.jar
35
+ lib/iudex-da/iudex-da-1.1.0.jar
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
4
4
  require 'iudex-da/base'
5
5
 
6
6
  require 'rubygems'
7
- gem 'rjack-tarpit', '~> 1.2'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
8
  require 'rjack-tarpit'
9
9
 
10
10
  t = RJack::TarPit.new( 'iudex-da',
@@ -14,15 +14,15 @@ t = RJack::TarPit.new( 'iudex-da',
14
14
  t.specify do |h|
15
15
  h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
16
 
17
- h.extra_deps += [ [ 'iudex-core', '~> 1.0.0' ],
18
- [ 'activerecord', '~> 2.3.10' ],
19
- [ 'jdbc-postgres', '>= 8.4.702', '< 9.1' ],
20
- [ 'activerecord-jdbcpostgresql-adapter', '~> 1.1.0' ],
17
+ h.extra_deps += [ [ 'iudex-core', '~> 1.1.0' ],
18
+ [ 'activerecord', '~> 3.0.10' ],
19
+ [ 'jdbc-postgres', '~> 9.0.801' ],
20
+ [ 'activerecord-jdbcpostgresql-adapter', '~> 1.1.3' ],
21
21
  [ 'rjack-commons-dbcp', '~> 1.4.0' ],
22
22
  [ 'rjack-commons-dbutils', '~> 1.3.0' ] ]
23
23
 
24
24
  h.testlib = :minitest
25
- h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
25
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
26
26
  [ 'rjack-logback', '~> 1.0' ] ]
27
27
  end
28
28
 
@@ -42,8 +42,4 @@ task :gem => [ :check_pom_version, :check_history_version
42
42
  task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
43
43
  task :push => [ :check_history_date ]
44
44
 
45
- # Disable verbose warnings, which are a bit much with ActiveRecord
46
- # 2.3.x at least.
47
- Hoe::RUBY_FLAGS.sub!( /\-w(\s|$)/, '-W1\1' )
48
-
49
45
  t.define_tasks
@@ -115,7 +115,7 @@ OptionParser.new do |opts|
115
115
  opts.on( "-d", "--debug" ) do
116
116
  RJack::Logback[ 'iudex.da' ].level = RJack::Logback::DEBUG
117
117
  end
118
- opts.on( "-v", "--version", "Display version and exit" ) do |file|
118
+ opts.on( "-v", "--version", "Display version and exit" ) do
119
119
  puts "iudex-da: #{Iudex::DA::VERSION}"
120
120
  exit 1
121
121
  end
data/bin/iudex-da-import CHANGED
@@ -49,7 +49,7 @@ END
49
49
  opts.on( "-d", "--debug" ) do
50
50
  Logback[ 'iudex' ].level = Logback::DEBUG
51
51
  end
52
- opts.on( "-v", "--version", "Display version and exit" ) do |file|
52
+ opts.on( "-v", "--version", "Display version and exit" ) do
53
53
  puts "iudex-da: #{Iudex::DA::VERSION}"
54
54
  exit 1
55
55
  end
@@ -54,7 +54,7 @@ END
54
54
  opts.on( "-d", "--debug" ) do
55
55
  Logback[ 'iudex.da' ].level = Logback::DEBUG
56
56
  end
57
- opts.on( "-v", "--version", "Display version and exit" ) do |file|
57
+ opts.on( "-v", "--version", "Display version and exit" ) do
58
58
  puts "iudex-da: #{Iudex::DA::VERSION}"
59
59
  exit 1
60
60
  end
data/bin/iudex-migrate CHANGED
@@ -49,10 +49,16 @@ END
49
49
  { name.to_sym => value }
50
50
  end
51
51
  end
52
+ opts.on( "-p", "--profile NAME", String,
53
+ "Add a migration profile (ex: simhash)" ) do |p|
54
+ Hooker.add( [ :iudex, :migration_profiles ] ) do |profiles|
55
+ profiles << p
56
+ end
57
+ end
52
58
  opts.on( "-d", "--debug" ) do
53
59
  Logback[ 'iudex.da' ].level = Logback::DEBUG
54
60
  end
55
- opts.on( "-v", "--version", "Display version and exit" ) do |file|
61
+ opts.on( "-v", "--version", "Display version and exit" ) do
56
62
  puts "iudex-da: #{DA::VERSION}"
57
63
  exit 1
58
64
  end
data/config/config.rb CHANGED
@@ -11,4 +11,9 @@ Iudex.configure do |c|
11
11
  :loglevel => 2 }
12
12
  end
13
13
 
14
+ # Add optional migration profiles
15
+ c.setup_migration_profiles do |profiles|
16
+ profiles += [ :simhash :index_next_visit ]
17
+ end
18
+
14
19
  end
@@ -0,0 +1,27 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ class RemoveSimhashIndex < ActiveRecord::Migration
18
+
19
+ def self.up
20
+ remove_index( 'urls', 'simhash' )
21
+ end
22
+
23
+ def self.down
24
+ add_index( 'urls', [ 'simhash' ] )
25
+ end
26
+
27
+ end
@@ -0,0 +1,27 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ class AddIndexNextVisit < ActiveRecord::Migration
18
+
19
+ def self.up
20
+ add_index( 'urls', 'next_visit_after' )
21
+ end
22
+
23
+ def self.down
24
+ remove_index( 'urls', 'next_visit_after' )
25
+ end
26
+
27
+ end
@@ -0,0 +1,27 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ class AddSimhashIndex < ActiveRecord::Migration
18
+
19
+ def self.up
20
+ add_index( 'urls', [ 'simhash' ] )
21
+ end
22
+
23
+ def self.down
24
+ remove_index( 'urls', 'simhash' )
25
+ end
26
+
27
+ end
data/lib/iudex-da/ar.rb CHANGED
@@ -33,9 +33,13 @@ module Iudex::DA
33
33
  setup #FIXME: Require explicit setup for use?
34
34
 
35
35
  def migrate( target_version = nil )
36
- ActiveRecord::Migrator.migrate( File.join( LIB_DIR, '..', '..', 'db' ),
37
- target_version )
38
- #FIXME: Support additional migration directories?
36
+ base = File.join( LIB_DIR, '..', '..', 'db' )
37
+
38
+ profiles = Hooker.apply( [ :iudex, :migration_profiles ], [] )
39
+
40
+ ext = profiles.compact.map { |p| "/#{p}" }.join(',')
41
+ base += "{#{ext},}" unless ext.empty?
42
+ ActiveRecord::Migrator.migrate( base, target_version )
39
43
  end
40
44
 
41
45
  module_function :migrate
data/lib/iudex-da/base.rb CHANGED
@@ -16,7 +16,7 @@
16
16
 
17
17
  module Iudex
18
18
  module DA
19
- VERSION = '1.0.0'
19
+ VERSION = '1.1.0'
20
20
 
21
21
  LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
22
  end
@@ -66,7 +66,7 @@ module Iudex::DA
66
66
  end
67
67
 
68
68
  def import_keys
69
- [ :uhash, :host, :url, :type, :priority, :next_visit_after ]
69
+ [ :uhash, :domain, :url, :type, :priority, :next_visit_after ]
70
70
  end
71
71
 
72
72
  def template_map
Binary file
data/pom.xml CHANGED
@@ -5,13 +5,13 @@
5
5
  <groupId>iudex</groupId>
6
6
  <artifactId>iudex-da</artifactId>
7
7
  <packaging>jar</packaging>
8
- <version>1.0.0</version>
8
+ <version>1.1.0</version>
9
9
  <name>Iudex Data Access</name>
10
10
 
11
11
  <parent>
12
12
  <groupId>iudex</groupId>
13
13
  <artifactId>iudex-parent</artifactId>
14
- <version>1.0</version>
14
+ <version>1.1</version>
15
15
  <relativePath>..</relativePath>
16
16
  </parent>
17
17
 
@@ -29,7 +29,7 @@
29
29
  <dependency>
30
30
  <groupId>iudex</groupId>
31
31
  <artifactId>iudex-core</artifactId>
32
- <version>[1.0,1.1)</version>
32
+ <version>[1.1,1.2)</version>
33
33
  </dependency>
34
34
 
35
35
  <dependency>
@@ -47,13 +47,13 @@
47
47
  <dependency>
48
48
  <groupId>commons-pool</groupId>
49
49
  <artifactId>commons-pool</artifactId>
50
- <version>[1.5.4,1.5.5]</version>
50
+ <version>[1.5.4,1.5.6]</version>
51
51
  </dependency>
52
52
 
53
53
  <dependency>
54
54
  <groupId>org.postgresql</groupId>
55
55
  <artifactId>postgresql-jdbc4</artifactId>
56
- <version>[8.4.702,9.1)</version>
56
+ <version>[8.4.702,9.0.9999]</version>
57
57
  <scope>test</scope>
58
58
  </dependency>
59
59
 
data/test/test_migrate.rb CHANGED
@@ -26,16 +26,49 @@ class TestMigrate < MiniTest::Unit::TestCase
26
26
  include Iudex::DA
27
27
  include RJack
28
28
 
29
- def test_up_down_up
30
- Logback[ 'iudex.da.ActiveRecord' ].level = Logback::WARN
29
+ VERBOSE = ! ( ARGV & %w[ -v --verbose ] ).empty?
31
30
 
32
- ActiveRecord::Migration.suppress_messages do
31
+ def setup
32
+ unless VERBOSE
33
+ Logback[ 'iudex.da.ActiveRecord' ].level = Logback::WARN
34
+ end
35
+ end
36
+
37
+ def teardown
38
+ Hooker.send( :clear )
39
+ suppress_messages? { migrate }
40
+ Logback[ 'iudex.da.ActiveRecord' ].level = nil
41
+ end
42
+
43
+ def test_default
44
+ check_up_down
45
+ end
46
+
47
+ def test_simhash_profile
48
+ Hooker.add( [ :iudex, :migration_profiles ] ) { |p| p << :simhash }
49
+ check_up_down
50
+ end
51
+
52
+ def test_next_visit_profile
53
+ Hooker.add( [ :iudex, :migration_profiles ] ) { |p| p << :index_next_visit }
54
+ check_up_down
55
+ end
56
+
57
+ def check_up_down
58
+ suppress_messages? do
33
59
  migrate
60
+ pass
34
61
  migrate( 0 )
35
- migrate
62
+ pass
36
63
  end
64
+ end
37
65
 
38
- Logback[ 'iudex.da.ActiveRecord' ].level = nil
66
+ def suppress_messages?( &block )
67
+ if VERBOSE
68
+ block.call
69
+ else
70
+ ActiveRecord::Migration.suppress_messages( &block )
71
+ end
39
72
  end
40
73
 
41
74
  end
@@ -29,15 +29,15 @@ class TestPollWork < MiniTest::Unit::TestCase
29
29
  def setup
30
30
  Url.delete_all
31
31
 
32
- hosts = [ 'foo.org', 'other.net', 'gravitext.com', 'one.at' ]
32
+ domains = [ 'foo.org', 'other.net', 'gravitext.com', 'one.at' ]
33
33
  count = 0
34
- hosts.each do |host|
34
+ domains.each do |domain|
35
35
  (5..15).each do |val|
36
36
  url = Url.create! do |u|
37
37
  u.priority = ( val.to_f / 10.0 ) + (count.to_f / 50.0)
38
- vurl = VisitURL.normalize( "http://#{host}/#{u.priority}" )
38
+ vurl = VisitURL.normalize( "http://#{domain}/#{u.priority}" )
39
39
  u.type = "FEED"
40
- u.host = vurl.host
40
+ u.domain = vurl.domain
41
41
  u.url = vurl.to_s
42
42
  u.uhash = vurl.uhash
43
43
  u.next_visit_after = Time.now
@@ -51,38 +51,38 @@ class TestPollWork < MiniTest::Unit::TestCase
51
51
  Url.delete_all
52
52
  end
53
53
 
54
- # Query to get new work, with limits on work per host, and total
54
+ # Query to get new work, with limits on work per domain, and total
55
55
  # work (in descending piority order)
56
56
  def test_poll
57
57
  query = <<END
58
- SELECT url, host, type, priority
58
+ SELECT url, domain, type, priority
59
59
  FROM ( SELECT *, row_number() OVER ( ORDER BY priority DESC ) as ppos
60
- FROM ( SELECT *, row_number() OVER ( PARTITION BY host
60
+ FROM ( SELECT *, row_number() OVER ( PARTITION BY domain
61
61
  ORDER BY priority DESC ) AS hpos
62
62
  FROM urls
63
63
  WHERE next_visit_after <= now() ) AS subh
64
64
  WHERE hpos <= ? ) AS subp
65
65
  WHERE ppos <= ?
66
- ORDER BY host, priority DESC;
66
+ ORDER BY domain, priority DESC;
67
67
  END
68
68
  res = Url.find_by_sql( [ query, 5, 18 ] )
69
69
 
70
- def check_host_subset( byhost )
71
- assert( byhost.length <= 5 )
72
- byhost.each_cons(2) { |p,n| assert( p.priority >= n.priority ) }
70
+ def check_domain_subset( bydomain )
71
+ assert( bydomain.length <= 5 )
72
+ bydomain.each_cons(2) { |p,n| assert( p.priority >= n.priority ) }
73
73
  end
74
74
 
75
75
  assert( res.length <= 18 )
76
- byhost = []
76
+ bydomain = []
77
77
  res.each do |u|
78
- if byhost.empty? || byhost.last.host == u.host
79
- byhost << u
78
+ if bydomain.empty? || bydomain.last.domain == u.domain
79
+ bydomain << u
80
80
  else
81
- check_host_subset( byhost )
82
- byhost = []
81
+ check_domain_subset( bydomain )
82
+ bydomain = []
83
83
  end
84
84
  end
85
- check_host_subset( byhost ) unless byhost.empty?
85
+ check_domain_subset( bydomain ) unless bydomain.empty?
86
86
 
87
87
  end
88
88
 
@@ -93,7 +93,7 @@ END
93
93
  CREATE TEMPORARY TABLE mod_urls
94
94
  ( uhash text,
95
95
  url text,
96
- host text );
96
+ domain text );
97
97
  END
98
98
  # ON COMMIT DROP;
99
99
 
@@ -110,14 +110,14 @@ END
110
110
  vurl = VisitURL.normalize( "http://gravitext.com/#{priority}" )
111
111
 
112
112
  sql = "INSERT into mod_urls VALUES ('%s','%s','%s')" %
113
- [ vurl.uhash, vurl.to_s, vurl.host ]
113
+ [ vurl.uhash, vurl.to_s, vurl.domain ]
114
114
  Url.connection.execute( sql )
115
115
  # u.next_visit_after = Time.now
116
116
  count += 1
117
117
  end
118
118
  insert_query = <<END
119
- INSERT INTO urls (uhash,url,host,type,priority)
120
- ( SELECT uhash,url,host,'FEEDX',4.78 FROM mod_urls
119
+ INSERT INTO urls (uhash,url,domain,type,priority)
120
+ ( SELECT uhash,url,domain,'FEEDX',4.78 FROM mod_urls
121
121
  WHERE uhash NOT IN ( SELECT uhash FROM urls ) );
122
122
  END
123
123
  Url.connection.execute( insert_query )
@@ -50,7 +50,7 @@ class TestPoolFactory < MiniTest::Unit::TestCase
50
50
  end
51
51
 
52
52
  def test_query
53
- assert( ! @data_source.nil? )
53
+ refute( @data_source.nil? )
54
54
  qrun = QueryRunner.new( @data_source )
55
55
  qrun.query( "SELECT url FROM urls WHERE uhash IN ('uRlU1h_YL-NvooSv2i98Rd3', 'notthere' );",
56
56
  TestHandler.new )
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: iudex-da
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.0
5
+ version: 1.1.0
6
6
  platform: java
7
7
  authors:
8
8
  - David Kellum
@@ -10,8 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-04 00:00:00 -07:00
14
- default_executable:
13
+ date: 2011-11-13 00:00:00 Z
15
14
  dependencies:
16
15
  - !ruby/object:Gem::Dependency
17
16
  name: iudex-core
@@ -21,7 +20,7 @@ dependencies:
21
20
  requirements:
22
21
  - - ~>
23
22
  - !ruby/object:Gem::Version
24
- version: 1.0.0
23
+ version: 1.1.0
25
24
  type: :runtime
26
25
  version_requirements: *id001
27
26
  - !ruby/object:Gem::Dependency
@@ -32,7 +31,7 @@ dependencies:
32
31
  requirements:
33
32
  - - ~>
34
33
  - !ruby/object:Gem::Version
35
- version: 2.3.10
34
+ version: 3.0.10
36
35
  type: :runtime
37
36
  version_requirements: *id002
38
37
  - !ruby/object:Gem::Dependency
@@ -41,12 +40,9 @@ dependencies:
41
40
  requirement: &id003 !ruby/object:Gem::Requirement
42
41
  none: false
43
42
  requirements:
44
- - - ">="
45
- - !ruby/object:Gem::Version
46
- version: 8.4.702
47
- - - <
43
+ - - ~>
48
44
  - !ruby/object:Gem::Version
49
- version: "9.1"
45
+ version: 9.0.801
50
46
  type: :runtime
51
47
  version_requirements: *id003
52
48
  - !ruby/object:Gem::Dependency
@@ -57,7 +53,7 @@ dependencies:
57
53
  requirements:
58
54
  - - ~>
59
55
  - !ruby/object:Gem::Version
60
- version: 1.1.0
56
+ version: 1.1.3
61
57
  type: :runtime
62
58
  version_requirements: *id004
63
59
  - !ruby/object:Gem::Dependency
@@ -88,12 +84,9 @@ dependencies:
88
84
  requirement: &id007 !ruby/object:Gem::Requirement
89
85
  none: false
90
86
  requirements:
91
- - - ">="
92
- - !ruby/object:Gem::Version
93
- version: 1.7.1
94
- - - <
87
+ - - ~>
95
88
  - !ruby/object:Gem::Version
96
- version: "2.1"
89
+ version: "2.3"
97
90
  type: :development
98
91
  version_requirements: *id007
99
92
  - !ruby/object:Gem::Dependency
@@ -115,7 +108,7 @@ dependencies:
115
108
  requirements:
116
109
  - - ~>
117
110
  - !ruby/object:Gem::Version
118
- version: 1.3.0
111
+ version: 1.4.0
119
112
  type: :development
120
113
  version_requirements: *id009
121
114
  description: |-
@@ -155,6 +148,9 @@ files:
155
148
  - db/0060_url_indexes.rb
156
149
  - db/0070_add_created_at.rb
157
150
  - db/0080_add_simhash.rb
151
+ - db/0081_remove_simhash_index.rb
152
+ - db/index_next_visit/0100_add_index_next_visit.rb
153
+ - db/simhash/0085_add_simhash_index.rb
158
154
  - lib/iudex-da/base.rb
159
155
  - lib/iudex-da.rb
160
156
  - lib/iudex-da/ar.rb
@@ -167,8 +163,8 @@ files:
167
163
  - test/test_migrate.rb
168
164
  - test/test_poll_work.rb
169
165
  - test/test_pool_factory.rb
170
- - lib/iudex-da/iudex-da-1.0.0.jar
171
- has_rdoc: true
166
+ - lib/iudex-da/iudex-da-1.1.0.jar
167
+ - .gemtest
172
168
  homepage: http://github.com/dekellum/iudex
173
169
  licenses: []
174
170
 
@@ -193,7 +189,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
189
  requirements: []
194
190
 
195
191
  rubyforge_project: iudex-da
196
- rubygems_version: 1.5.1
192
+ rubygems_version: 1.8.9
197
193
  signing_key:
198
194
  specification_version: 3
199
195
  summary: Iudex is a general purpose web crawler and feed processor in ruby/java
Binary file