iudex-da 1.0.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.rdoc +2 -0
- data/Manifest.txt +32 -0
- data/README.rdoc +30 -0
- data/Rakefile +49 -0
- data/bin/iudex-da-generate-test-data +127 -0
- data/bin/iudex-da-import +62 -0
- data/bin/iudex-da-simhash-dump +112 -0
- data/bin/iudex-migrate +66 -0
- data/config/config.rb +14 -0
- data/db/0010_base_urls.rb +84 -0
- data/db/0020_add_feed_metadata.rb +37 -0
- data/db/0021_more_feed_text.rb +29 -0
- data/db/0030_add_priority.rb +28 -0
- data/db/0040_add_visit_after.rb +30 -0
- data/db/0050_add_cache_location.rb +32 -0
- data/db/0060_url_indexes.rb +41 -0
- data/db/0070_add_created_at.rb +28 -0
- data/db/0080_add_simhash.rb +33 -0
- data/lib/iudex-da.rb +40 -0
- data/lib/iudex-da/ar.rb +48 -0
- data/lib/iudex-da/base.rb +23 -0
- data/lib/iudex-da/config.rb +31 -0
- data/lib/iudex-da/factory_helper.rb +53 -0
- data/lib/iudex-da/importer.rb +91 -0
- data/lib/iudex-da/iudex-da-1.0.0.jar +0 -0
- data/lib/iudex-da/key_helper.rb +33 -0
- data/lib/iudex-da/pool_data_source_factory.rb +108 -0
- data/pom.xml +86 -0
- data/test/setup.rb +34 -0
- data/test/test_migrate.rb +41 -0
- data/test/test_poll_work.rb +132 -0
- data/test/test_pool_factory.rb +59 -0
- metadata +203 -0
@@ -0,0 +1,23 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
module Iudex
|
18
|
+
module DA
|
19
|
+
VERSION = '1.0.0'
|
20
|
+
|
21
|
+
LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2008-2011 David Kellum
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
6
|
+
# may not use this file except in compliance with the License. You
|
7
|
+
# may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
14
|
+
# implied. See the License for the specific language governing
|
15
|
+
# permissions and limitations under the License.
|
16
|
+
#++
|
17
|
+
|
18
|
+
module Iudex
|
19
|
+
|
20
|
+
module DA
|
21
|
+
# Default database connection configuration for both ActiveRecord
|
22
|
+
# (migrations, testing) and PoolDataSourceFactory.
|
23
|
+
CONFIG = {
|
24
|
+
:adapter => 'jdbcpostgresql',
|
25
|
+
:host => 'localhost',
|
26
|
+
:database => 'iudex_test',
|
27
|
+
:username => 'iudex',
|
28
|
+
:pool => 10 }
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-da'
|
18
|
+
require 'iudex-da/key_helper'
|
19
|
+
require 'iudex-da/pool_data_source_factory'
|
20
|
+
|
21
|
+
module Iudex
|
22
|
+
module DA
|
23
|
+
module Filters
|
24
|
+
|
25
|
+
# Mixin FilterChainFactory helper methods
|
26
|
+
module FactoryHelper
|
27
|
+
include Iudex::Filter::KeyHelper
|
28
|
+
|
29
|
+
# Lazy initialize DataSource
|
30
|
+
def data_source
|
31
|
+
@data_source ||= PoolDataSourceFactory.new.create
|
32
|
+
end
|
33
|
+
|
34
|
+
# Create UpdateFilter given fields and filter list factory
|
35
|
+
# methods
|
36
|
+
def create_update_filter( fields = [],
|
37
|
+
post_sym = nil,
|
38
|
+
update_sym = nil,
|
39
|
+
new_sym = nil )
|
40
|
+
fields = ( keys( :uhash ) + fields ).uniq
|
41
|
+
|
42
|
+
f = UpdateFilter.new( data_source, ContentMapper.new( fields ) )
|
43
|
+
create_chain( update_sym ) { |c| f.update_ref_filter = c }
|
44
|
+
create_chain( new_sym ) { |c| f.new_ref_filter = c }
|
45
|
+
create_chain( post_sym ) { |c| f.content_filter = c }
|
46
|
+
f
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-da'
|
18
|
+
require 'iudex-da/key_helper'
|
19
|
+
require 'iudex-da/pool_data_source_factory'
|
20
|
+
|
21
|
+
module Iudex::DA
|
22
|
+
|
23
|
+
class Importer
|
24
|
+
include Iudex::Core
|
25
|
+
include Gravitext::HTMap
|
26
|
+
|
27
|
+
include Iudex::Filter::KeyHelper
|
28
|
+
|
29
|
+
import 'iudex.da.BaseTransformer'
|
30
|
+
import 'iudex.da.ContentUpdater'
|
31
|
+
|
32
|
+
def initialize()
|
33
|
+
@dsf = PoolDataSourceFactory.new
|
34
|
+
UniMap.define_accessors
|
35
|
+
|
36
|
+
Hooker.apply( [ :iudex, :importer ], self )
|
37
|
+
end
|
38
|
+
|
39
|
+
def import_files( files = ARGV )
|
40
|
+
files.each do |fname|
|
41
|
+
open( fname, "r" ) do |fin|
|
42
|
+
import( fin )
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def import( input )
|
48
|
+
|
49
|
+
cmapper = ContentMapper.new( keys( import_keys ) )
|
50
|
+
transformer = BaseTransformer.new
|
51
|
+
updater = ContentUpdater.new( @dsf.create, cmapper, transformer )
|
52
|
+
|
53
|
+
tmpl = template_map
|
54
|
+
batch = []
|
55
|
+
|
56
|
+
input.each do |line|
|
57
|
+
umap = tmpl.clone
|
58
|
+
parse_to( line, umap )
|
59
|
+
batch << umap
|
60
|
+
if batch.length >= 1_000
|
61
|
+
updater.update( batch )
|
62
|
+
batch.clear
|
63
|
+
end
|
64
|
+
end
|
65
|
+
updater.update( batch ) unless batch.empty?
|
66
|
+
end
|
67
|
+
|
68
|
+
def import_keys
|
69
|
+
[ :uhash, :host, :url, :type, :priority, :next_visit_after ]
|
70
|
+
end
|
71
|
+
|
72
|
+
def template_map
|
73
|
+
umap = UniMap.new
|
74
|
+
umap.type = "FEED"
|
75
|
+
umap.next_visit_after = Time.now
|
76
|
+
umap.priority = 1.0
|
77
|
+
umap
|
78
|
+
end
|
79
|
+
|
80
|
+
def parse_to( line, umap )
|
81
|
+
fields = line.split( ',' )
|
82
|
+
umap.url = VisitURL.normalize( fields[0] )
|
83
|
+
umap
|
84
|
+
end
|
85
|
+
|
86
|
+
def close
|
87
|
+
@dsf.close if @dsf
|
88
|
+
@dsf = nil
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
Binary file
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-da'
|
18
|
+
require 'iudex-filter/key_helper'
|
19
|
+
|
20
|
+
module Iudex
|
21
|
+
module Filter
|
22
|
+
module KeyHelper
|
23
|
+
|
24
|
+
# Override to lookup matching Key in ContentMapper::LOGICAL_KEYS
|
25
|
+
# or normal UniMap::KEY_SPACE
|
26
|
+
def self.lookup_key( name )
|
27
|
+
Iudex::DA::ContentMapper::LOGICAL_KEYS.get( name ) or
|
28
|
+
lookup_key_space( name )
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-da'
|
18
|
+
require 'rjack-slf4j'
|
19
|
+
require 'java'
|
20
|
+
require 'jdbc/postgres'
|
21
|
+
|
22
|
+
module Iudex::DA
|
23
|
+
|
24
|
+
# Factory for a DataSource using commons-dbcp and postgres driver
|
25
|
+
class PoolDataSourceFactory
|
26
|
+
import 'java.io.PrintWriter'
|
27
|
+
import 'java.sql.DriverManager'
|
28
|
+
import 'java.util.Properties'
|
29
|
+
import 'java.util.regex.Pattern'
|
30
|
+
import 'org.apache.commons.dbcp.DriverManagerConnectionFactory'
|
31
|
+
import 'org.apache.commons.dbcp.PoolableConnectionFactory'
|
32
|
+
import 'org.apache.commons.dbcp.PoolingDataSource'
|
33
|
+
import 'org.apache.commons.pool.impl.GenericObjectPool'
|
34
|
+
import 'iudex.util.LogWriter'
|
35
|
+
|
36
|
+
attr_accessor :data_source
|
37
|
+
|
38
|
+
def initialize( in_props = {} )
|
39
|
+
@props = Hooker.merge( [ :iudex, :connect_props ],
|
40
|
+
CONFIG.merge( in_props ) )
|
41
|
+
|
42
|
+
# Tweeks specific for Java datasource/pool
|
43
|
+
@props[ :user ] ||= @props[ :username ]
|
44
|
+
@props.delete( :username )
|
45
|
+
|
46
|
+
@props[ :loglevel ] ||= 1
|
47
|
+
|
48
|
+
RJack::SLF4J[ 'iudex.da.PoolDataSourceFactory' ].info do
|
49
|
+
"Init properties: #{@props.inspect}"
|
50
|
+
end
|
51
|
+
load_driver
|
52
|
+
end
|
53
|
+
|
54
|
+
def create
|
55
|
+
con_factory = create_connection_factory
|
56
|
+
@con_pool = create_connection_pool( con_factory )
|
57
|
+
@data_source = PoolingDataSource.new( @con_pool )
|
58
|
+
end
|
59
|
+
|
60
|
+
def close
|
61
|
+
@con_pool.close
|
62
|
+
@con_pool = @data_source = nil
|
63
|
+
end
|
64
|
+
|
65
|
+
def load_driver
|
66
|
+
import 'org.postgresql.Driver'
|
67
|
+
lw = LogWriter.new( 'iudex.da.Driver' )
|
68
|
+
# Remove postgres time stamp, trailing whitespace.
|
69
|
+
lw.remove_pattern =
|
70
|
+
Pattern.compile( '(^\d\d:\d\d:\d\d\.\d\d\d\s\(\d\)\s)|(\s+$)' )
|
71
|
+
DriverManager::set_log_writer( PrintWriter.new( lw, true ) )
|
72
|
+
end
|
73
|
+
|
74
|
+
def create_connection_factory
|
75
|
+
uri = "jdbc:postgresql://%s/%s" % [ @props[ :host ], @props[ :database ] ]
|
76
|
+
|
77
|
+
jprops = Properties.new
|
78
|
+
@props.each { |k,v| jprops.set_property( k.to_s, v.to_s ) }
|
79
|
+
|
80
|
+
DriverManagerConnectionFactory.new( uri, jprops )
|
81
|
+
end
|
82
|
+
|
83
|
+
def create_connection_pool( con_factory )
|
84
|
+
con_pool = GenericObjectPool.new( nil )
|
85
|
+
|
86
|
+
con_count = @props[ :pool ]
|
87
|
+
if con_count
|
88
|
+
con_pool.max_active = con_count
|
89
|
+
con_pool.max_idle = con_count
|
90
|
+
end
|
91
|
+
|
92
|
+
props = @props[ :ds_pool ]
|
93
|
+
if props
|
94
|
+
props.each { |k,v| con_pool.send( k.to_s + '=', v ) }
|
95
|
+
end
|
96
|
+
|
97
|
+
# This sets self on con_pool
|
98
|
+
PoolableConnectionFactory.new( con_factory,
|
99
|
+
con_pool,
|
100
|
+
nil, #stmtPoolFactory
|
101
|
+
nil, #validationQuery
|
102
|
+
false, #read_only_default
|
103
|
+
true ) #auto_commit_default
|
104
|
+
con_pool
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
data/pom.xml
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
3
|
+
|
4
|
+
<modelVersion>4.0.0</modelVersion>
|
5
|
+
<groupId>iudex</groupId>
|
6
|
+
<artifactId>iudex-da</artifactId>
|
7
|
+
<packaging>jar</packaging>
|
8
|
+
<version>1.0.0</version>
|
9
|
+
<name>Iudex Data Access</name>
|
10
|
+
|
11
|
+
<parent>
|
12
|
+
<groupId>iudex</groupId>
|
13
|
+
<artifactId>iudex-parent</artifactId>
|
14
|
+
<version>1.0</version>
|
15
|
+
<relativePath>..</relativePath>
|
16
|
+
</parent>
|
17
|
+
|
18
|
+
<repositories>
|
19
|
+
<repository>
|
20
|
+
<id>maven2-repository.dev.java.net</id>
|
21
|
+
<name>Java.net Repository for Maven</name>
|
22
|
+
<url>http://download.java.net/maven/2/</url>
|
23
|
+
<layout>default</layout>
|
24
|
+
</repository>
|
25
|
+
</repositories>
|
26
|
+
|
27
|
+
<dependencies>
|
28
|
+
|
29
|
+
<dependency>
|
30
|
+
<groupId>iudex</groupId>
|
31
|
+
<artifactId>iudex-core</artifactId>
|
32
|
+
<version>[1.0,1.1)</version>
|
33
|
+
</dependency>
|
34
|
+
|
35
|
+
<dependency>
|
36
|
+
<groupId>commons-dbutils</groupId>
|
37
|
+
<artifactId>commons-dbutils</artifactId>
|
38
|
+
<version>1.3</version>
|
39
|
+
</dependency>
|
40
|
+
|
41
|
+
<dependency>
|
42
|
+
<groupId>commons-dbcp</groupId>
|
43
|
+
<artifactId>commons-dbcp</artifactId>
|
44
|
+
<version>1.4</version>
|
45
|
+
</dependency>
|
46
|
+
|
47
|
+
<dependency>
|
48
|
+
<groupId>commons-pool</groupId>
|
49
|
+
<artifactId>commons-pool</artifactId>
|
50
|
+
<version>[1.5.4,1.5.5]</version>
|
51
|
+
</dependency>
|
52
|
+
|
53
|
+
<dependency>
|
54
|
+
<groupId>org.postgresql</groupId>
|
55
|
+
<artifactId>postgresql-jdbc4</artifactId>
|
56
|
+
<version>[8.4.702,9.1)</version>
|
57
|
+
<scope>test</scope>
|
58
|
+
</dependency>
|
59
|
+
|
60
|
+
<dependency>
|
61
|
+
<groupId>junit</groupId>
|
62
|
+
<artifactId>junit</artifactId>
|
63
|
+
</dependency>
|
64
|
+
|
65
|
+
<dependency>
|
66
|
+
<groupId>ch.qos.logback</groupId>
|
67
|
+
<artifactId>logback-classic</artifactId>
|
68
|
+
<scope>test</scope>
|
69
|
+
</dependency>
|
70
|
+
|
71
|
+
</dependencies>
|
72
|
+
|
73
|
+
<build>
|
74
|
+
<plugins>
|
75
|
+
<plugin>
|
76
|
+
<!-- Parent settings -->
|
77
|
+
<artifactId>maven-compiler-plugin</artifactId>
|
78
|
+
</plugin>
|
79
|
+
<plugin>
|
80
|
+
<!-- Parent settings -->
|
81
|
+
<artifactId>maven-source-plugin</artifactId>
|
82
|
+
</plugin>
|
83
|
+
</plugins>
|
84
|
+
</build>
|
85
|
+
|
86
|
+
</project>
|
data/test/setup.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
#### General test setup: LOAD_PATH, logging, console output ####
|
18
|
+
|
19
|
+
ldir = File.join( File.dirname( __FILE__ ), "..", "lib" )
|
20
|
+
$LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
|
21
|
+
|
22
|
+
require 'rubygems'
|
23
|
+
require 'rjack-logback'
|
24
|
+
RJack::Logback.config_console( :stderr => true )
|
25
|
+
|
26
|
+
require 'minitest/unit'
|
27
|
+
require 'minitest/autorun'
|
28
|
+
|
29
|
+
# Make test output logging compatible: no partial lines.
|
30
|
+
class TestOut
|
31
|
+
def print( *a ); $stdout.puts( *a ); end
|
32
|
+
def puts( *a ); $stdout.puts( *a ); end
|
33
|
+
end
|
34
|
+
MiniTest::Unit.output = TestOut.new
|