iudex-da 1.0.0-java → 1.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemtest ADDED
File without changes
data/History.rdoc CHANGED
@@ -1,2 +1,12 @@
1
+ === 1.1.0 (2011-11-13)
2
+ * Update to iudex-core ~> 1.1.0
3
+ * Update to activerecord ~> 3.0.10, pg ~> 9.0.801, adapter ~> 1.1.3
4
+ * Expand commons-pool pom dep to [1.5.4, 1.5.6]
5
+ * Migrate host field to domain (registration level), mapper/writer
6
+ domain support, :domain in WorkPoller, drop :host key.
7
+ * Add migration profile support, move simhash index to simhash
8
+ profile, add index_next_visit migration profile
9
+ * Update to minitest ~> 2.3
10
+
1
11
  === 1.0.0 (2011-04-04)
2
12
  * Initial release.
data/Manifest.txt CHANGED
@@ -17,6 +17,9 @@ db/0050_add_cache_location.rb
17
17
  db/0060_url_indexes.rb
18
18
  db/0070_add_created_at.rb
19
19
  db/0080_add_simhash.rb
20
+ db/0081_remove_simhash_index.rb
21
+ db/index_next_visit/0100_add_index_next_visit.rb
22
+ db/simhash/0085_add_simhash_index.rb
20
23
  lib/iudex-da/base.rb
21
24
  lib/iudex-da.rb
22
25
  lib/iudex-da/ar.rb
@@ -29,4 +32,4 @@ test/setup.rb
29
32
  test/test_migrate.rb
30
33
  test/test_poll_work.rb
31
34
  test/test_pool_factory.rb
32
- lib/iudex-da/iudex-da-1.0.0.jar
35
+ lib/iudex-da/iudex-da-1.1.0.jar
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
4
4
  require 'iudex-da/base'
5
5
 
6
6
  require 'rubygems'
7
- gem 'rjack-tarpit', '~> 1.2'
7
+ gem 'rjack-tarpit', '~> 1.4'
8
8
  require 'rjack-tarpit'
9
9
 
10
10
  t = RJack::TarPit.new( 'iudex-da',
@@ -14,15 +14,15 @@ t = RJack::TarPit.new( 'iudex-da',
14
14
  t.specify do |h|
15
15
  h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
16
 
17
- h.extra_deps += [ [ 'iudex-core', '~> 1.0.0' ],
18
- [ 'activerecord', '~> 2.3.10' ],
19
- [ 'jdbc-postgres', '>= 8.4.702', '< 9.1' ],
20
- [ 'activerecord-jdbcpostgresql-adapter', '~> 1.1.0' ],
17
+ h.extra_deps += [ [ 'iudex-core', '~> 1.1.0' ],
18
+ [ 'activerecord', '~> 3.0.10' ],
19
+ [ 'jdbc-postgres', '~> 9.0.801' ],
20
+ [ 'activerecord-jdbcpostgresql-adapter', '~> 1.1.3' ],
21
21
  [ 'rjack-commons-dbcp', '~> 1.4.0' ],
22
22
  [ 'rjack-commons-dbutils', '~> 1.3.0' ] ]
23
23
 
24
24
  h.testlib = :minitest
25
- h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
25
+ h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
26
26
  [ 'rjack-logback', '~> 1.0' ] ]
27
27
  end
28
28
 
@@ -42,8 +42,4 @@ task :gem => [ :check_pom_version, :check_history_version
42
42
  task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
43
43
  task :push => [ :check_history_date ]
44
44
 
45
- # Disable verbose warnings, which are a bit much with ActiveRecord
46
- # 2.3.x at least.
47
- Hoe::RUBY_FLAGS.sub!( /\-w(\s|$)/, '-W1\1' )
48
-
49
45
  t.define_tasks
@@ -115,7 +115,7 @@ OptionParser.new do |opts|
115
115
  opts.on( "-d", "--debug" ) do
116
116
  RJack::Logback[ 'iudex.da' ].level = RJack::Logback::DEBUG
117
117
  end
118
- opts.on( "-v", "--version", "Display version and exit" ) do |file|
118
+ opts.on( "-v", "--version", "Display version and exit" ) do
119
119
  puts "iudex-da: #{Iudex::DA::VERSION}"
120
120
  exit 1
121
121
  end
data/bin/iudex-da-import CHANGED
@@ -49,7 +49,7 @@ END
49
49
  opts.on( "-d", "--debug" ) do
50
50
  Logback[ 'iudex' ].level = Logback::DEBUG
51
51
  end
52
- opts.on( "-v", "--version", "Display version and exit" ) do |file|
52
+ opts.on( "-v", "--version", "Display version and exit" ) do
53
53
  puts "iudex-da: #{Iudex::DA::VERSION}"
54
54
  exit 1
55
55
  end
@@ -54,7 +54,7 @@ END
54
54
  opts.on( "-d", "--debug" ) do
55
55
  Logback[ 'iudex.da' ].level = Logback::DEBUG
56
56
  end
57
- opts.on( "-v", "--version", "Display version and exit" ) do |file|
57
+ opts.on( "-v", "--version", "Display version and exit" ) do
58
58
  puts "iudex-da: #{Iudex::DA::VERSION}"
59
59
  exit 1
60
60
  end
data/bin/iudex-migrate CHANGED
@@ -49,10 +49,16 @@ END
49
49
  { name.to_sym => value }
50
50
  end
51
51
  end
52
+ opts.on( "-p", "--profile NAME", String,
53
+ "Add a migration profile (ex: simhash)" ) do |p|
54
+ Hooker.add( [ :iudex, :migration_profiles ] ) do |profiles|
55
+ profiles << p
56
+ end
57
+ end
52
58
  opts.on( "-d", "--debug" ) do
53
59
  Logback[ 'iudex.da' ].level = Logback::DEBUG
54
60
  end
55
- opts.on( "-v", "--version", "Display version and exit" ) do |file|
61
+ opts.on( "-v", "--version", "Display version and exit" ) do
56
62
  puts "iudex-da: #{DA::VERSION}"
57
63
  exit 1
58
64
  end
data/config/config.rb CHANGED
@@ -11,4 +11,9 @@ Iudex.configure do |c|
11
11
  :loglevel => 2 }
12
12
  end
13
13
 
14
+ # Add optional migration profiles
15
+ c.setup_migration_profiles do |profiles|
16
+ profiles += [ :simhash :index_next_visit ]
17
+ end
18
+
14
19
  end
@@ -0,0 +1,27 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ class RemoveSimhashIndex < ActiveRecord::Migration
18
+
19
+ def self.up
20
+ remove_index( 'urls', 'simhash' )
21
+ end
22
+
23
+ def self.down
24
+ add_index( 'urls', [ 'simhash' ] )
25
+ end
26
+
27
+ end
@@ -0,0 +1,27 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ class AddIndexNextVisit < ActiveRecord::Migration
18
+
19
+ def self.up
20
+ add_index( 'urls', 'next_visit_after' )
21
+ end
22
+
23
+ def self.down
24
+ remove_index( 'urls', 'next_visit_after' )
25
+ end
26
+
27
+ end
@@ -0,0 +1,27 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ class AddSimhashIndex < ActiveRecord::Migration
18
+
19
+ def self.up
20
+ add_index( 'urls', [ 'simhash' ] )
21
+ end
22
+
23
+ def self.down
24
+ remove_index( 'urls', 'simhash' )
25
+ end
26
+
27
+ end
data/lib/iudex-da/ar.rb CHANGED
@@ -33,9 +33,13 @@ module Iudex::DA
33
33
  setup #FIXME: Require explicit setup for use?
34
34
 
35
35
  def migrate( target_version = nil )
36
- ActiveRecord::Migrator.migrate( File.join( LIB_DIR, '..', '..', 'db' ),
37
- target_version )
38
- #FIXME: Support additional migration directories?
36
+ base = File.join( LIB_DIR, '..', '..', 'db' )
37
+
38
+ profiles = Hooker.apply( [ :iudex, :migration_profiles ], [] )
39
+
40
+ ext = profiles.compact.map { |p| "/#{p}" }.join(',')
41
+ base += "{#{ext},}" unless ext.empty?
42
+ ActiveRecord::Migrator.migrate( base, target_version )
39
43
  end
40
44
 
41
45
  module_function :migrate
data/lib/iudex-da/base.rb CHANGED
@@ -16,7 +16,7 @@
16
16
 
17
17
  module Iudex
18
18
  module DA
19
- VERSION = '1.0.0'
19
+ VERSION = '1.1.0'
20
20
 
21
21
  LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
22
  end
@@ -66,7 +66,7 @@ module Iudex::DA
66
66
  end
67
67
 
68
68
  def import_keys
69
- [ :uhash, :host, :url, :type, :priority, :next_visit_after ]
69
+ [ :uhash, :domain, :url, :type, :priority, :next_visit_after ]
70
70
  end
71
71
 
72
72
  def template_map
Binary file
data/pom.xml CHANGED
@@ -5,13 +5,13 @@
5
5
  <groupId>iudex</groupId>
6
6
  <artifactId>iudex-da</artifactId>
7
7
  <packaging>jar</packaging>
8
- <version>1.0.0</version>
8
+ <version>1.1.0</version>
9
9
  <name>Iudex Data Access</name>
10
10
 
11
11
  <parent>
12
12
  <groupId>iudex</groupId>
13
13
  <artifactId>iudex-parent</artifactId>
14
- <version>1.0</version>
14
+ <version>1.1</version>
15
15
  <relativePath>..</relativePath>
16
16
  </parent>
17
17
 
@@ -29,7 +29,7 @@
29
29
  <dependency>
30
30
  <groupId>iudex</groupId>
31
31
  <artifactId>iudex-core</artifactId>
32
- <version>[1.0,1.1)</version>
32
+ <version>[1.1,1.2)</version>
33
33
  </dependency>
34
34
 
35
35
  <dependency>
@@ -47,13 +47,13 @@
47
47
  <dependency>
48
48
  <groupId>commons-pool</groupId>
49
49
  <artifactId>commons-pool</artifactId>
50
- <version>[1.5.4,1.5.5]</version>
50
+ <version>[1.5.4,1.5.6]</version>
51
51
  </dependency>
52
52
 
53
53
  <dependency>
54
54
  <groupId>org.postgresql</groupId>
55
55
  <artifactId>postgresql-jdbc4</artifactId>
56
- <version>[8.4.702,9.1)</version>
56
+ <version>[8.4.702,9.0.9999]</version>
57
57
  <scope>test</scope>
58
58
  </dependency>
59
59
 
data/test/test_migrate.rb CHANGED
@@ -26,16 +26,49 @@ class TestMigrate < MiniTest::Unit::TestCase
26
26
  include Iudex::DA
27
27
  include RJack
28
28
 
29
- def test_up_down_up
30
- Logback[ 'iudex.da.ActiveRecord' ].level = Logback::WARN
29
+ VERBOSE = ! ( ARGV & %w[ -v --verbose ] ).empty?
31
30
 
32
- ActiveRecord::Migration.suppress_messages do
31
+ def setup
32
+ unless VERBOSE
33
+ Logback[ 'iudex.da.ActiveRecord' ].level = Logback::WARN
34
+ end
35
+ end
36
+
37
+ def teardown
38
+ Hooker.send( :clear )
39
+ suppress_messages? { migrate }
40
+ Logback[ 'iudex.da.ActiveRecord' ].level = nil
41
+ end
42
+
43
+ def test_default
44
+ check_up_down
45
+ end
46
+
47
+ def test_simhash_profile
48
+ Hooker.add( [ :iudex, :migration_profiles ] ) { |p| p << :simhash }
49
+ check_up_down
50
+ end
51
+
52
+ def test_next_visit_profile
53
+ Hooker.add( [ :iudex, :migration_profiles ] ) { |p| p << :index_next_visit }
54
+ check_up_down
55
+ end
56
+
57
+ def check_up_down
58
+ suppress_messages? do
33
59
  migrate
60
+ pass
34
61
  migrate( 0 )
35
- migrate
62
+ pass
36
63
  end
64
+ end
37
65
 
38
- Logback[ 'iudex.da.ActiveRecord' ].level = nil
66
+ def suppress_messages?( &block )
67
+ if VERBOSE
68
+ block.call
69
+ else
70
+ ActiveRecord::Migration.suppress_messages( &block )
71
+ end
39
72
  end
40
73
 
41
74
  end
@@ -29,15 +29,15 @@ class TestPollWork < MiniTest::Unit::TestCase
29
29
  def setup
30
30
  Url.delete_all
31
31
 
32
- hosts = [ 'foo.org', 'other.net', 'gravitext.com', 'one.at' ]
32
+ domains = [ 'foo.org', 'other.net', 'gravitext.com', 'one.at' ]
33
33
  count = 0
34
- hosts.each do |host|
34
+ domains.each do |domain|
35
35
  (5..15).each do |val|
36
36
  url = Url.create! do |u|
37
37
  u.priority = ( val.to_f / 10.0 ) + (count.to_f / 50.0)
38
- vurl = VisitURL.normalize( "http://#{host}/#{u.priority}" )
38
+ vurl = VisitURL.normalize( "http://#{domain}/#{u.priority}" )
39
39
  u.type = "FEED"
40
- u.host = vurl.host
40
+ u.domain = vurl.domain
41
41
  u.url = vurl.to_s
42
42
  u.uhash = vurl.uhash
43
43
  u.next_visit_after = Time.now
@@ -51,38 +51,38 @@ class TestPollWork < MiniTest::Unit::TestCase
51
51
  Url.delete_all
52
52
  end
53
53
 
54
- # Query to get new work, with limits on work per host, and total
54
+ # Query to get new work, with limits on work per domain, and total
55
55
  # work (in descending piority order)
56
56
  def test_poll
57
57
  query = <<END
58
- SELECT url, host, type, priority
58
+ SELECT url, domain, type, priority
59
59
  FROM ( SELECT *, row_number() OVER ( ORDER BY priority DESC ) as ppos
60
- FROM ( SELECT *, row_number() OVER ( PARTITION BY host
60
+ FROM ( SELECT *, row_number() OVER ( PARTITION BY domain
61
61
  ORDER BY priority DESC ) AS hpos
62
62
  FROM urls
63
63
  WHERE next_visit_after <= now() ) AS subh
64
64
  WHERE hpos <= ? ) AS subp
65
65
  WHERE ppos <= ?
66
- ORDER BY host, priority DESC;
66
+ ORDER BY domain, priority DESC;
67
67
  END
68
68
  res = Url.find_by_sql( [ query, 5, 18 ] )
69
69
 
70
- def check_host_subset( byhost )
71
- assert( byhost.length <= 5 )
72
- byhost.each_cons(2) { |p,n| assert( p.priority >= n.priority ) }
70
+ def check_domain_subset( bydomain )
71
+ assert( bydomain.length <= 5 )
72
+ bydomain.each_cons(2) { |p,n| assert( p.priority >= n.priority ) }
73
73
  end
74
74
 
75
75
  assert( res.length <= 18 )
76
- byhost = []
76
+ bydomain = []
77
77
  res.each do |u|
78
- if byhost.empty? || byhost.last.host == u.host
79
- byhost << u
78
+ if bydomain.empty? || bydomain.last.domain == u.domain
79
+ bydomain << u
80
80
  else
81
- check_host_subset( byhost )
82
- byhost = []
81
+ check_domain_subset( bydomain )
82
+ bydomain = []
83
83
  end
84
84
  end
85
- check_host_subset( byhost ) unless byhost.empty?
85
+ check_domain_subset( bydomain ) unless bydomain.empty?
86
86
 
87
87
  end
88
88
 
@@ -93,7 +93,7 @@ END
93
93
  CREATE TEMPORARY TABLE mod_urls
94
94
  ( uhash text,
95
95
  url text,
96
- host text );
96
+ domain text );
97
97
  END
98
98
  # ON COMMIT DROP;
99
99
 
@@ -110,14 +110,14 @@ END
110
110
  vurl = VisitURL.normalize( "http://gravitext.com/#{priority}" )
111
111
 
112
112
  sql = "INSERT into mod_urls VALUES ('%s','%s','%s')" %
113
- [ vurl.uhash, vurl.to_s, vurl.host ]
113
+ [ vurl.uhash, vurl.to_s, vurl.domain ]
114
114
  Url.connection.execute( sql )
115
115
  # u.next_visit_after = Time.now
116
116
  count += 1
117
117
  end
118
118
  insert_query = <<END
119
- INSERT INTO urls (uhash,url,host,type,priority)
120
- ( SELECT uhash,url,host,'FEEDX',4.78 FROM mod_urls
119
+ INSERT INTO urls (uhash,url,domain,type,priority)
120
+ ( SELECT uhash,url,domain,'FEEDX',4.78 FROM mod_urls
121
121
  WHERE uhash NOT IN ( SELECT uhash FROM urls ) );
122
122
  END
123
123
  Url.connection.execute( insert_query )
@@ -50,7 +50,7 @@ class TestPoolFactory < MiniTest::Unit::TestCase
50
50
  end
51
51
 
52
52
  def test_query
53
- assert( ! @data_source.nil? )
53
+ refute( @data_source.nil? )
54
54
  qrun = QueryRunner.new( @data_source )
55
55
  qrun.query( "SELECT url FROM urls WHERE uhash IN ('uRlU1h_YL-NvooSv2i98Rd3', 'notthere' );",
56
56
  TestHandler.new )
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: iudex-da
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.0
5
+ version: 1.1.0
6
6
  platform: java
7
7
  authors:
8
8
  - David Kellum
@@ -10,8 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-04 00:00:00 -07:00
14
- default_executable:
13
+ date: 2011-11-13 00:00:00 Z
15
14
  dependencies:
16
15
  - !ruby/object:Gem::Dependency
17
16
  name: iudex-core
@@ -21,7 +20,7 @@ dependencies:
21
20
  requirements:
22
21
  - - ~>
23
22
  - !ruby/object:Gem::Version
24
- version: 1.0.0
23
+ version: 1.1.0
25
24
  type: :runtime
26
25
  version_requirements: *id001
27
26
  - !ruby/object:Gem::Dependency
@@ -32,7 +31,7 @@ dependencies:
32
31
  requirements:
33
32
  - - ~>
34
33
  - !ruby/object:Gem::Version
35
- version: 2.3.10
34
+ version: 3.0.10
36
35
  type: :runtime
37
36
  version_requirements: *id002
38
37
  - !ruby/object:Gem::Dependency
@@ -41,12 +40,9 @@ dependencies:
41
40
  requirement: &id003 !ruby/object:Gem::Requirement
42
41
  none: false
43
42
  requirements:
44
- - - ">="
45
- - !ruby/object:Gem::Version
46
- version: 8.4.702
47
- - - <
43
+ - - ~>
48
44
  - !ruby/object:Gem::Version
49
- version: "9.1"
45
+ version: 9.0.801
50
46
  type: :runtime
51
47
  version_requirements: *id003
52
48
  - !ruby/object:Gem::Dependency
@@ -57,7 +53,7 @@ dependencies:
57
53
  requirements:
58
54
  - - ~>
59
55
  - !ruby/object:Gem::Version
60
- version: 1.1.0
56
+ version: 1.1.3
61
57
  type: :runtime
62
58
  version_requirements: *id004
63
59
  - !ruby/object:Gem::Dependency
@@ -88,12 +84,9 @@ dependencies:
88
84
  requirement: &id007 !ruby/object:Gem::Requirement
89
85
  none: false
90
86
  requirements:
91
- - - ">="
92
- - !ruby/object:Gem::Version
93
- version: 1.7.1
94
- - - <
87
+ - - ~>
95
88
  - !ruby/object:Gem::Version
96
- version: "2.1"
89
+ version: "2.3"
97
90
  type: :development
98
91
  version_requirements: *id007
99
92
  - !ruby/object:Gem::Dependency
@@ -115,7 +108,7 @@ dependencies:
115
108
  requirements:
116
109
  - - ~>
117
110
  - !ruby/object:Gem::Version
118
- version: 1.3.0
111
+ version: 1.4.0
119
112
  type: :development
120
113
  version_requirements: *id009
121
114
  description: |-
@@ -155,6 +148,9 @@ files:
155
148
  - db/0060_url_indexes.rb
156
149
  - db/0070_add_created_at.rb
157
150
  - db/0080_add_simhash.rb
151
+ - db/0081_remove_simhash_index.rb
152
+ - db/index_next_visit/0100_add_index_next_visit.rb
153
+ - db/simhash/0085_add_simhash_index.rb
158
154
  - lib/iudex-da/base.rb
159
155
  - lib/iudex-da.rb
160
156
  - lib/iudex-da/ar.rb
@@ -167,8 +163,8 @@ files:
167
163
  - test/test_migrate.rb
168
164
  - test/test_poll_work.rb
169
165
  - test/test_pool_factory.rb
170
- - lib/iudex-da/iudex-da-1.0.0.jar
171
- has_rdoc: true
166
+ - lib/iudex-da/iudex-da-1.1.0.jar
167
+ - .gemtest
172
168
  homepage: http://github.com/dekellum/iudex
173
169
  licenses: []
174
170
 
@@ -193,7 +189,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
189
  requirements: []
194
190
 
195
191
  rubyforge_project: iudex-da
196
- rubygems_version: 1.5.1
192
+ rubygems_version: 1.8.9
197
193
  signing_key:
198
194
  specification_version: 3
199
195
  summary: Iudex is a general purpose web crawler and feed processor in ruby/java
Binary file