iudex-da 1.0.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/History.rdoc +2 -0
- data/Manifest.txt +32 -0
- data/README.rdoc +30 -0
- data/Rakefile +49 -0
- data/bin/iudex-da-generate-test-data +127 -0
- data/bin/iudex-da-import +62 -0
- data/bin/iudex-da-simhash-dump +112 -0
- data/bin/iudex-migrate +66 -0
- data/config/config.rb +14 -0
- data/db/0010_base_urls.rb +84 -0
- data/db/0020_add_feed_metadata.rb +37 -0
- data/db/0021_more_feed_text.rb +29 -0
- data/db/0030_add_priority.rb +28 -0
- data/db/0040_add_visit_after.rb +30 -0
- data/db/0050_add_cache_location.rb +32 -0
- data/db/0060_url_indexes.rb +41 -0
- data/db/0070_add_created_at.rb +28 -0
- data/db/0080_add_simhash.rb +33 -0
- data/lib/iudex-da.rb +40 -0
- data/lib/iudex-da/ar.rb +48 -0
- data/lib/iudex-da/base.rb +23 -0
- data/lib/iudex-da/config.rb +31 -0
- data/lib/iudex-da/factory_helper.rb +53 -0
- data/lib/iudex-da/importer.rb +91 -0
- data/lib/iudex-da/iudex-da-1.0.0.jar +0 -0
- data/lib/iudex-da/key_helper.rb +33 -0
- data/lib/iudex-da/pool_data_source_factory.rb +108 -0
- data/pom.xml +86 -0
- data/test/setup.rb +34 -0
- data/test/test_migrate.rb +41 -0
- data/test/test_poll_work.rb +132 -0
- data/test/test_pool_factory.rb +59 -0
- metadata +203 -0
@@ -0,0 +1,23 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
module Iudex
|
18
|
+
module DA
|
19
|
+
VERSION = '1.0.0'
|
20
|
+
|
21
|
+
LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2008-2011 David Kellum
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
6
|
+
# may not use this file except in compliance with the License. You
|
7
|
+
# may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
14
|
+
# implied. See the License for the specific language governing
|
15
|
+
# permissions and limitations under the License.
|
16
|
+
#++
|
17
|
+
|
18
|
+
module Iudex
|
19
|
+
|
20
|
+
module DA
|
21
|
+
# Default database connection configuration for both ActiveRecord
|
22
|
+
# (migrations, testing) and PoolDataSourceFactory.
|
23
|
+
CONFIG = {
|
24
|
+
:adapter => 'jdbcpostgresql',
|
25
|
+
:host => 'localhost',
|
26
|
+
:database => 'iudex_test',
|
27
|
+
:username => 'iudex',
|
28
|
+
:pool => 10 }
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-da'
|
18
|
+
require 'iudex-da/key_helper'
|
19
|
+
require 'iudex-da/pool_data_source_factory'
|
20
|
+
|
21
|
+
module Iudex
|
22
|
+
module DA
|
23
|
+
module Filters
|
24
|
+
|
25
|
+
# Mixin FilterChainFactory helper methods
|
26
|
+
module FactoryHelper
|
27
|
+
include Iudex::Filter::KeyHelper
|
28
|
+
|
29
|
+
# Lazy initialize DataSource
|
30
|
+
def data_source
|
31
|
+
@data_source ||= PoolDataSourceFactory.new.create
|
32
|
+
end
|
33
|
+
|
34
|
+
# Create UpdateFilter given fields and filter list factory
|
35
|
+
# methods
|
36
|
+
def create_update_filter( fields = [],
|
37
|
+
post_sym = nil,
|
38
|
+
update_sym = nil,
|
39
|
+
new_sym = nil )
|
40
|
+
fields = ( keys( :uhash ) + fields ).uniq
|
41
|
+
|
42
|
+
f = UpdateFilter.new( data_source, ContentMapper.new( fields ) )
|
43
|
+
create_chain( update_sym ) { |c| f.update_ref_filter = c }
|
44
|
+
create_chain( new_sym ) { |c| f.new_ref_filter = c }
|
45
|
+
create_chain( post_sym ) { |c| f.content_filter = c }
|
46
|
+
f
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-da'
|
18
|
+
require 'iudex-da/key_helper'
|
19
|
+
require 'iudex-da/pool_data_source_factory'
|
20
|
+
|
21
|
+
module Iudex::DA
|
22
|
+
|
23
|
+
class Importer
|
24
|
+
include Iudex::Core
|
25
|
+
include Gravitext::HTMap
|
26
|
+
|
27
|
+
include Iudex::Filter::KeyHelper
|
28
|
+
|
29
|
+
import 'iudex.da.BaseTransformer'
|
30
|
+
import 'iudex.da.ContentUpdater'
|
31
|
+
|
32
|
+
def initialize()
|
33
|
+
@dsf = PoolDataSourceFactory.new
|
34
|
+
UniMap.define_accessors
|
35
|
+
|
36
|
+
Hooker.apply( [ :iudex, :importer ], self )
|
37
|
+
end
|
38
|
+
|
39
|
+
def import_files( files = ARGV )
|
40
|
+
files.each do |fname|
|
41
|
+
open( fname, "r" ) do |fin|
|
42
|
+
import( fin )
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def import( input )
|
48
|
+
|
49
|
+
cmapper = ContentMapper.new( keys( import_keys ) )
|
50
|
+
transformer = BaseTransformer.new
|
51
|
+
updater = ContentUpdater.new( @dsf.create, cmapper, transformer )
|
52
|
+
|
53
|
+
tmpl = template_map
|
54
|
+
batch = []
|
55
|
+
|
56
|
+
input.each do |line|
|
57
|
+
umap = tmpl.clone
|
58
|
+
parse_to( line, umap )
|
59
|
+
batch << umap
|
60
|
+
if batch.length >= 1_000
|
61
|
+
updater.update( batch )
|
62
|
+
batch.clear
|
63
|
+
end
|
64
|
+
end
|
65
|
+
updater.update( batch ) unless batch.empty?
|
66
|
+
end
|
67
|
+
|
68
|
+
def import_keys
|
69
|
+
[ :uhash, :host, :url, :type, :priority, :next_visit_after ]
|
70
|
+
end
|
71
|
+
|
72
|
+
def template_map
|
73
|
+
umap = UniMap.new
|
74
|
+
umap.type = "FEED"
|
75
|
+
umap.next_visit_after = Time.now
|
76
|
+
umap.priority = 1.0
|
77
|
+
umap
|
78
|
+
end
|
79
|
+
|
80
|
+
def parse_to( line, umap )
|
81
|
+
fields = line.split( ',' )
|
82
|
+
umap.url = VisitURL.normalize( fields[0] )
|
83
|
+
umap
|
84
|
+
end
|
85
|
+
|
86
|
+
def close
|
87
|
+
@dsf.close if @dsf
|
88
|
+
@dsf = nil
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
Binary file
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-da'
|
18
|
+
require 'iudex-filter/key_helper'
|
19
|
+
|
20
|
+
module Iudex
|
21
|
+
module Filter
|
22
|
+
module KeyHelper
|
23
|
+
|
24
|
+
# Override to lookup matching Key in ContentMapper::LOGICAL_KEYS
|
25
|
+
# or normal UniMap::KEY_SPACE
|
26
|
+
def self.lookup_key( name )
|
27
|
+
Iudex::DA::ContentMapper::LOGICAL_KEYS.get( name ) or
|
28
|
+
lookup_key_space( name )
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-da'
|
18
|
+
require 'rjack-slf4j'
|
19
|
+
require 'java'
|
20
|
+
require 'jdbc/postgres'
|
21
|
+
|
22
|
+
module Iudex::DA
|
23
|
+
|
24
|
+
# Factory for a DataSource using commons-dbcp and postgres driver
|
25
|
+
class PoolDataSourceFactory
|
26
|
+
import 'java.io.PrintWriter'
|
27
|
+
import 'java.sql.DriverManager'
|
28
|
+
import 'java.util.Properties'
|
29
|
+
import 'java.util.regex.Pattern'
|
30
|
+
import 'org.apache.commons.dbcp.DriverManagerConnectionFactory'
|
31
|
+
import 'org.apache.commons.dbcp.PoolableConnectionFactory'
|
32
|
+
import 'org.apache.commons.dbcp.PoolingDataSource'
|
33
|
+
import 'org.apache.commons.pool.impl.GenericObjectPool'
|
34
|
+
import 'iudex.util.LogWriter'
|
35
|
+
|
36
|
+
attr_accessor :data_source
|
37
|
+
|
38
|
+
def initialize( in_props = {} )
|
39
|
+
@props = Hooker.merge( [ :iudex, :connect_props ],
|
40
|
+
CONFIG.merge( in_props ) )
|
41
|
+
|
42
|
+
# Tweeks specific for Java datasource/pool
|
43
|
+
@props[ :user ] ||= @props[ :username ]
|
44
|
+
@props.delete( :username )
|
45
|
+
|
46
|
+
@props[ :loglevel ] ||= 1
|
47
|
+
|
48
|
+
RJack::SLF4J[ 'iudex.da.PoolDataSourceFactory' ].info do
|
49
|
+
"Init properties: #{@props.inspect}"
|
50
|
+
end
|
51
|
+
load_driver
|
52
|
+
end
|
53
|
+
|
54
|
+
def create
|
55
|
+
con_factory = create_connection_factory
|
56
|
+
@con_pool = create_connection_pool( con_factory )
|
57
|
+
@data_source = PoolingDataSource.new( @con_pool )
|
58
|
+
end
|
59
|
+
|
60
|
+
def close
|
61
|
+
@con_pool.close
|
62
|
+
@con_pool = @data_source = nil
|
63
|
+
end
|
64
|
+
|
65
|
+
def load_driver
|
66
|
+
import 'org.postgresql.Driver'
|
67
|
+
lw = LogWriter.new( 'iudex.da.Driver' )
|
68
|
+
# Remove postgres time stamp, trailing whitespace.
|
69
|
+
lw.remove_pattern =
|
70
|
+
Pattern.compile( '(^\d\d:\d\d:\d\d\.\d\d\d\s\(\d\)\s)|(\s+$)' )
|
71
|
+
DriverManager::set_log_writer( PrintWriter.new( lw, true ) )
|
72
|
+
end
|
73
|
+
|
74
|
+
def create_connection_factory
|
75
|
+
uri = "jdbc:postgresql://%s/%s" % [ @props[ :host ], @props[ :database ] ]
|
76
|
+
|
77
|
+
jprops = Properties.new
|
78
|
+
@props.each { |k,v| jprops.set_property( k.to_s, v.to_s ) }
|
79
|
+
|
80
|
+
DriverManagerConnectionFactory.new( uri, jprops )
|
81
|
+
end
|
82
|
+
|
83
|
+
def create_connection_pool( con_factory )
|
84
|
+
con_pool = GenericObjectPool.new( nil )
|
85
|
+
|
86
|
+
con_count = @props[ :pool ]
|
87
|
+
if con_count
|
88
|
+
con_pool.max_active = con_count
|
89
|
+
con_pool.max_idle = con_count
|
90
|
+
end
|
91
|
+
|
92
|
+
props = @props[ :ds_pool ]
|
93
|
+
if props
|
94
|
+
props.each { |k,v| con_pool.send( k.to_s + '=', v ) }
|
95
|
+
end
|
96
|
+
|
97
|
+
# This sets self on con_pool
|
98
|
+
PoolableConnectionFactory.new( con_factory,
|
99
|
+
con_pool,
|
100
|
+
nil, #stmtPoolFactory
|
101
|
+
nil, #validationQuery
|
102
|
+
false, #read_only_default
|
103
|
+
true ) #auto_commit_default
|
104
|
+
con_pool
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
data/pom.xml
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
3
|
+
|
4
|
+
<modelVersion>4.0.0</modelVersion>
|
5
|
+
<groupId>iudex</groupId>
|
6
|
+
<artifactId>iudex-da</artifactId>
|
7
|
+
<packaging>jar</packaging>
|
8
|
+
<version>1.0.0</version>
|
9
|
+
<name>Iudex Data Access</name>
|
10
|
+
|
11
|
+
<parent>
|
12
|
+
<groupId>iudex</groupId>
|
13
|
+
<artifactId>iudex-parent</artifactId>
|
14
|
+
<version>1.0</version>
|
15
|
+
<relativePath>..</relativePath>
|
16
|
+
</parent>
|
17
|
+
|
18
|
+
<repositories>
|
19
|
+
<repository>
|
20
|
+
<id>maven2-repository.dev.java.net</id>
|
21
|
+
<name>Java.net Repository for Maven</name>
|
22
|
+
<url>http://download.java.net/maven/2/</url>
|
23
|
+
<layout>default</layout>
|
24
|
+
</repository>
|
25
|
+
</repositories>
|
26
|
+
|
27
|
+
<dependencies>
|
28
|
+
|
29
|
+
<dependency>
|
30
|
+
<groupId>iudex</groupId>
|
31
|
+
<artifactId>iudex-core</artifactId>
|
32
|
+
<version>[1.0,1.1)</version>
|
33
|
+
</dependency>
|
34
|
+
|
35
|
+
<dependency>
|
36
|
+
<groupId>commons-dbutils</groupId>
|
37
|
+
<artifactId>commons-dbutils</artifactId>
|
38
|
+
<version>1.3</version>
|
39
|
+
</dependency>
|
40
|
+
|
41
|
+
<dependency>
|
42
|
+
<groupId>commons-dbcp</groupId>
|
43
|
+
<artifactId>commons-dbcp</artifactId>
|
44
|
+
<version>1.4</version>
|
45
|
+
</dependency>
|
46
|
+
|
47
|
+
<dependency>
|
48
|
+
<groupId>commons-pool</groupId>
|
49
|
+
<artifactId>commons-pool</artifactId>
|
50
|
+
<version>[1.5.4,1.5.5]</version>
|
51
|
+
</dependency>
|
52
|
+
|
53
|
+
<dependency>
|
54
|
+
<groupId>org.postgresql</groupId>
|
55
|
+
<artifactId>postgresql-jdbc4</artifactId>
|
56
|
+
<version>[8.4.702,9.1)</version>
|
57
|
+
<scope>test</scope>
|
58
|
+
</dependency>
|
59
|
+
|
60
|
+
<dependency>
|
61
|
+
<groupId>junit</groupId>
|
62
|
+
<artifactId>junit</artifactId>
|
63
|
+
</dependency>
|
64
|
+
|
65
|
+
<dependency>
|
66
|
+
<groupId>ch.qos.logback</groupId>
|
67
|
+
<artifactId>logback-classic</artifactId>
|
68
|
+
<scope>test</scope>
|
69
|
+
</dependency>
|
70
|
+
|
71
|
+
</dependencies>
|
72
|
+
|
73
|
+
<build>
|
74
|
+
<plugins>
|
75
|
+
<plugin>
|
76
|
+
<!-- Parent settings -->
|
77
|
+
<artifactId>maven-compiler-plugin</artifactId>
|
78
|
+
</plugin>
|
79
|
+
<plugin>
|
80
|
+
<!-- Parent settings -->
|
81
|
+
<artifactId>maven-source-plugin</artifactId>
|
82
|
+
</plugin>
|
83
|
+
</plugins>
|
84
|
+
</build>
|
85
|
+
|
86
|
+
</project>
|
data/test/setup.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
#### General test setup: LOAD_PATH, logging, console output ####
|
18
|
+
|
19
|
+
ldir = File.join( File.dirname( __FILE__ ), "..", "lib" )
|
20
|
+
$LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
|
21
|
+
|
22
|
+
require 'rubygems'
|
23
|
+
require 'rjack-logback'
|
24
|
+
RJack::Logback.config_console( :stderr => true )
|
25
|
+
|
26
|
+
require 'minitest/unit'
|
27
|
+
require 'minitest/autorun'
|
28
|
+
|
29
|
+
# Make test output logging compatible: no partial lines.
|
30
|
+
class TestOut
|
31
|
+
def print( *a ); $stdout.puts( *a ); end
|
32
|
+
def puts( *a ); $stdout.puts( *a ); end
|
33
|
+
end
|
34
|
+
MiniTest::Unit.output = TestOut.new
|