iudex-da 1.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env jruby
2
+ #.hashdot.profile += jruby-shortlived
3
+
4
+ #--
5
+ # Copyright (c) 2008-2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You may
9
+ # obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+
20
+ require File.join( File.dirname( __FILE__ ), "setup" )
21
+
22
+ require 'iudex-da'
23
+ require 'iudex-da/ar'
24
+
25
+ class TestMigrate < MiniTest::Unit::TestCase
26
+ include Iudex::DA
27
+ include RJack
28
+
29
+ def test_up_down_up
30
+ Logback[ 'iudex.da.ActiveRecord' ].level = Logback::WARN
31
+
32
+ ActiveRecord::Migration.suppress_messages do
33
+ migrate
34
+ migrate( 0 )
35
+ migrate
36
+ end
37
+
38
+ Logback[ 'iudex.da.ActiveRecord' ].level = nil
39
+ end
40
+
41
+ end
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env jruby
2
+ #.hashdot.profile += jruby-shortlived
3
+
4
+ #--
5
+ # Copyright (c) 2008-2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You
9
+ # may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+
20
+ require File.join( File.dirname( __FILE__ ), "setup" )
21
+
22
+ require 'iudex-core'
23
+ require 'iudex-da/ar'
24
+
25
+ class TestPollWork < MiniTest::Unit::TestCase
26
+ include Iudex::DA
27
+ import 'iudex.core.VisitURL'
28
+
29
+ def setup
30
+ Url.delete_all
31
+
32
+ hosts = [ 'foo.org', 'other.net', 'gravitext.com', 'one.at' ]
33
+ count = 0
34
+ hosts.each do |host|
35
+ (5..15).each do |val|
36
+ url = Url.create! do |u|
37
+ u.priority = ( val.to_f / 10.0 ) + (count.to_f / 50.0)
38
+ vurl = VisitURL.normalize( "http://#{host}/#{u.priority}" )
39
+ u.type = "FEED"
40
+ u.host = vurl.host
41
+ u.url = vurl.to_s
42
+ u.uhash = vurl.uhash
43
+ u.next_visit_after = Time.now
44
+ count += 1
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ def teardown
51
+ Url.delete_all
52
+ end
53
+
54
+ # Query to get new work, with limits on work per host, and total
55
+ # work (in descending piority order)
56
+ def test_poll
57
+ query = <<END
58
+ SELECT url, host, type, priority
59
+ FROM ( SELECT *, row_number() OVER ( ORDER BY priority DESC ) as ppos
60
+ FROM ( SELECT *, row_number() OVER ( PARTITION BY host
61
+ ORDER BY priority DESC ) AS hpos
62
+ FROM urls
63
+ WHERE next_visit_after <= now() ) AS subh
64
+ WHERE hpos <= ? ) AS subp
65
+ WHERE ppos <= ?
66
+ ORDER BY host, priority DESC;
67
+ END
68
+ res = Url.find_by_sql( [ query, 5, 18 ] )
69
+
70
+ def check_host_subset( byhost )
71
+ assert( byhost.length <= 5 )
72
+ byhost.each_cons(2) { |p,n| assert( p.priority >= n.priority ) }
73
+ end
74
+
75
+ assert( res.length <= 18 )
76
+ byhost = []
77
+ res.each do |u|
78
+ if byhost.empty? || byhost.last.host == u.host
79
+ byhost << u
80
+ else
81
+ check_host_subset( byhost )
82
+ byhost = []
83
+ end
84
+ end
85
+ check_host_subset( byhost ) unless byhost.empty?
86
+
87
+ end
88
+
89
+ def test_insert
90
+
91
+ Url.transaction do
92
+ sql = <<END
93
+ CREATE TEMPORARY TABLE mod_urls
94
+ ( uhash text,
95
+ url text,
96
+ host text );
97
+ END
98
+ # ON COMMIT DROP;
99
+
100
+ Url.connection.execute( sql ) #FIXME: auto-commit mode?
101
+
102
+ # Url.set_table_name "mod_urls"
103
+
104
+ count = ( 11 * 2 )
105
+ (5..20).each do |val|
106
+ # url = Url.create! do |u|
107
+ priority = ( val.to_f / 10.0 ) + (count.to_f / 50.0)
108
+ # u.priority =
109
+ # u.type = "FEEDX"
110
+ vurl = VisitURL.normalize( "http://gravitext.com/#{priority}" )
111
+
112
+ sql = "INSERT into mod_urls VALUES ('%s','%s','%s')" %
113
+ [ vurl.uhash, vurl.to_s, vurl.host ]
114
+ Url.connection.execute( sql )
115
+ # u.next_visit_after = Time.now
116
+ count += 1
117
+ end
118
+ insert_query = <<END
119
+ INSERT INTO urls (uhash,url,host,type,priority)
120
+ ( SELECT uhash,url,host,'FEEDX',4.78 FROM mod_urls
121
+ WHERE uhash NOT IN ( SELECT uhash FROM urls ) );
122
+ END
123
+ Url.connection.execute( insert_query )
124
+
125
+ Url.connection.execute( "DROP TABLE mod_urls;" )
126
+
127
+ # Url.set_table_name "urls"
128
+ end
129
+
130
+ end
131
+
132
+ end
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env jruby
2
+ #.hashdot.profile += jruby-shortlived
3
+
4
+ #--
5
+ # Copyright (c) 2008-2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You
9
+ # may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+ require File.join( File.dirname( __FILE__ ), "setup" )
20
+
21
+ require 'iudex-core'
22
+ require 'iudex-da/ar'
23
+
24
+ require 'iudex-da'
25
+ require 'iudex-da/pool_data_source_factory'
26
+
27
+ class TestPoolFactory < MiniTest::Unit::TestCase
28
+ include Iudex::DA
29
+ import 'org.apache.commons.dbutils.ResultSetHandler'
30
+ import 'org.apache.commons.dbutils.QueryRunner'
31
+
32
+ def setup
33
+ @factory = PoolDataSourceFactory.new( :loglevel => 2 )
34
+ @data_source = @factory.create
35
+ end
36
+
37
+ def teardown
38
+ @factory.close
39
+ @data_source = nil
40
+ end
41
+
42
+ class TestHandler
43
+ include ResultSetHandler
44
+ def handle( rs )
45
+ while rs.next
46
+ p [ rs.string( 'url' ) ]
47
+ end
48
+ nil
49
+ end
50
+ end
51
+
52
+ def test_query
53
+ assert( ! @data_source.nil? )
54
+ qrun = QueryRunner.new( @data_source )
55
+ qrun.query( "SELECT url FROM urls WHERE uhash IN ('uRlU1h_YL-NvooSv2i98Rd3', 'notthere' );",
56
+ TestHandler.new )
57
+ end
58
+
59
+ end
metadata ADDED
@@ -0,0 +1,203 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iudex-da
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 1.0.0
6
+ platform: java
7
+ authors:
8
+ - David Kellum
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-04-04 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: iudex-core
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: 1.0.0
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: activerecord
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ~>
34
+ - !ruby/object:Gem::Version
35
+ version: 2.3.10
36
+ type: :runtime
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: jdbc-postgres
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 8.4.702
47
+ - - <
48
+ - !ruby/object:Gem::Version
49
+ version: "9.1"
50
+ type: :runtime
51
+ version_requirements: *id003
52
+ - !ruby/object:Gem::Dependency
53
+ name: activerecord-jdbcpostgresql-adapter
54
+ prerelease: false
55
+ requirement: &id004 !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ~>
59
+ - !ruby/object:Gem::Version
60
+ version: 1.1.0
61
+ type: :runtime
62
+ version_requirements: *id004
63
+ - !ruby/object:Gem::Dependency
64
+ name: rjack-commons-dbcp
65
+ prerelease: false
66
+ requirement: &id005 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ~>
70
+ - !ruby/object:Gem::Version
71
+ version: 1.4.0
72
+ type: :runtime
73
+ version_requirements: *id005
74
+ - !ruby/object:Gem::Dependency
75
+ name: rjack-commons-dbutils
76
+ prerelease: false
77
+ requirement: &id006 !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: 1.3.0
83
+ type: :runtime
84
+ version_requirements: *id006
85
+ - !ruby/object:Gem::Dependency
86
+ name: minitest
87
+ prerelease: false
88
+ requirement: &id007 !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: 1.7.1
94
+ - - <
95
+ - !ruby/object:Gem::Version
96
+ version: "2.1"
97
+ type: :development
98
+ version_requirements: *id007
99
+ - !ruby/object:Gem::Dependency
100
+ name: rjack-logback
101
+ prerelease: false
102
+ requirement: &id008 !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ~>
106
+ - !ruby/object:Gem::Version
107
+ version: "1.0"
108
+ type: :development
109
+ version_requirements: *id008
110
+ - !ruby/object:Gem::Dependency
111
+ name: rjack-tarpit
112
+ prerelease: false
113
+ requirement: &id009 !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ~>
117
+ - !ruby/object:Gem::Version
118
+ version: 1.3.0
119
+ type: :development
120
+ version_requirements: *id009
121
+ description: |-
122
+ Iudex is a general purpose web crawler and feed processor in
123
+ ruby/java. The iudex-da gem provides a PostgreSQL-based content
124
+ meta-data store and work priority queue.
125
+ email:
126
+ - dek-oss@gravitext.com
127
+ executables:
128
+ - iudex-da-generate-test-data
129
+ - iudex-da-import
130
+ - iudex-da-simhash-dump
131
+ - iudex-migrate
132
+ extensions: []
133
+
134
+ extra_rdoc_files:
135
+ - Manifest.txt
136
+ - History.rdoc
137
+ - README.rdoc
138
+ files:
139
+ - History.rdoc
140
+ - Manifest.txt
141
+ - README.rdoc
142
+ - Rakefile
143
+ - pom.xml
144
+ - bin/iudex-da-generate-test-data
145
+ - bin/iudex-da-import
146
+ - bin/iudex-da-simhash-dump
147
+ - bin/iudex-migrate
148
+ - config/config.rb
149
+ - db/0010_base_urls.rb
150
+ - db/0020_add_feed_metadata.rb
151
+ - db/0021_more_feed_text.rb
152
+ - db/0030_add_priority.rb
153
+ - db/0040_add_visit_after.rb
154
+ - db/0050_add_cache_location.rb
155
+ - db/0060_url_indexes.rb
156
+ - db/0070_add_created_at.rb
157
+ - db/0080_add_simhash.rb
158
+ - lib/iudex-da/base.rb
159
+ - lib/iudex-da.rb
160
+ - lib/iudex-da/ar.rb
161
+ - lib/iudex-da/config.rb
162
+ - lib/iudex-da/factory_helper.rb
163
+ - lib/iudex-da/importer.rb
164
+ - lib/iudex-da/key_helper.rb
165
+ - lib/iudex-da/pool_data_source_factory.rb
166
+ - test/setup.rb
167
+ - test/test_migrate.rb
168
+ - test/test_poll_work.rb
169
+ - test/test_pool_factory.rb
170
+ - lib/iudex-da/iudex-da-1.0.0.jar
171
+ has_rdoc: true
172
+ homepage: http://github.com/dekellum/iudex
173
+ licenses: []
174
+
175
+ post_install_message:
176
+ rdoc_options:
177
+ - --main
178
+ - README.rdoc
179
+ require_paths:
180
+ - lib
181
+ required_ruby_version: !ruby/object:Gem::Requirement
182
+ none: false
183
+ requirements:
184
+ - - ">="
185
+ - !ruby/object:Gem::Version
186
+ version: "0"
187
+ required_rubygems_version: !ruby/object:Gem::Requirement
188
+ none: false
189
+ requirements:
190
+ - - ">="
191
+ - !ruby/object:Gem::Version
192
+ version: "0"
193
+ requirements: []
194
+
195
+ rubyforge_project: iudex-da
196
+ rubygems_version: 1.5.1
197
+ signing_key:
198
+ specification_version: 3
199
+ summary: Iudex is a general purpose web crawler and feed processor in ruby/java
200
+ test_files:
201
+ - test/test_migrate.rb
202
+ - test/test_poll_work.rb
203
+ - test/test_pool_factory.rb