iudex-filter 1.0.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/History.rdoc +2 -0
- data/Manifest.txt +17 -0
- data/README.rdoc +25 -0
- data/Rakefile +40 -0
- data/lib/iudex-filter/base.rb +23 -0
- data/lib/iudex-filter/by_filter_logger.rb +74 -0
- data/lib/iudex-filter/filter_base.rb +46 -0
- data/lib/iudex-filter/filter_chain_factory.rb +171 -0
- data/lib/iudex-filter/iudex-filter-1.0.0.jar +0 -0
- data/lib/iudex-filter/key_helper.rb +56 -0
- data/lib/iudex-filter/proc_filter.rb +50 -0
- data/lib/iudex-filter.rb +52 -0
- data/pom.xml +56 -0
- data/test/setup.rb +34 -0
- data/test/test_filter_base.rb +42 -0
- data/test/test_filter_chain_factory.rb +86 -0
- data/test/test_proc_filter.rb +65 -0
- metadata +138 -0
data/History.rdoc
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
History.rdoc
|
2
|
+
Manifest.txt
|
3
|
+
README.rdoc
|
4
|
+
Rakefile
|
5
|
+
pom.xml
|
6
|
+
lib/iudex-filter/base.rb
|
7
|
+
lib/iudex-filter.rb
|
8
|
+
lib/iudex-filter/by_filter_logger.rb
|
9
|
+
lib/iudex-filter/filter_base.rb
|
10
|
+
lib/iudex-filter/filter_chain_factory.rb
|
11
|
+
lib/iudex-filter/key_helper.rb
|
12
|
+
lib/iudex-filter/proc_filter.rb
|
13
|
+
test/setup.rb
|
14
|
+
test/test_filter_base.rb
|
15
|
+
test/test_filter_chain_factory.rb
|
16
|
+
test/test_proc_filter.rb
|
17
|
+
lib/iudex-filter/iudex-filter-1.0.0.jar
|
data/README.rdoc
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
= iudex-filter
|
2
|
+
|
3
|
+
* http://github.com/dekellum/iudex
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Iudex is a general purpose web crawler and feed processor in
|
8
|
+
ruby/java. The iudex-filter gem contains a fundamental filtering/chain
|
9
|
+
of responsbility sub-system.
|
10
|
+
|
11
|
+
== License
|
12
|
+
|
13
|
+
Copyright (c) 2008-2011 David Kellum
|
14
|
+
|
15
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you
|
16
|
+
may not use this file except in compliance with the License. You
|
17
|
+
may obtain a copy of the License at:
|
18
|
+
|
19
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
20
|
+
|
21
|
+
Unless required by applicable law or agreed to in writing, software
|
22
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
23
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
24
|
+
implied. See the License for the specific language governing
|
25
|
+
permissions and limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
$LOAD_PATH << './lib'
|
4
|
+
require 'iudex-filter/base'
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
gem 'rjack-tarpit', '~> 1.2'
|
8
|
+
require 'rjack-tarpit'
|
9
|
+
|
10
|
+
t = RJack::TarPit.new( 'iudex-filter',
|
11
|
+
Iudex::Filter::VERSION,
|
12
|
+
:no_assembly, :java_platform )
|
13
|
+
|
14
|
+
t.specify do |h|
|
15
|
+
h.developer( "David Kellum", "dek-oss@gravitext.com" )
|
16
|
+
h.extra_deps += [ [ 'rjack-slf4j', '~> 1.6.1' ],
|
17
|
+
[ 'gravitext-util', '~> 1.5.0' ] ]
|
18
|
+
|
19
|
+
h.testlib = :minitest
|
20
|
+
h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
|
21
|
+
[ 'rjack-logback', '>= 1.0' ] ]
|
22
|
+
end
|
23
|
+
|
24
|
+
file 'Manifest.txt' => "lib/#{t.name}/base.rb"
|
25
|
+
|
26
|
+
task :check_pom_version do
|
27
|
+
t.test_line_match( 'pom.xml', /<version>/, /#{t.version}/ )
|
28
|
+
end
|
29
|
+
task :check_history_version do
|
30
|
+
t.test_line_match( 'History.rdoc', /^==/, / #{t.version} / )
|
31
|
+
end
|
32
|
+
task :check_history_date do
|
33
|
+
t.test_line_match( 'History.rdoc', /^==/, /\([0-9\-]+\)$/ )
|
34
|
+
end
|
35
|
+
|
36
|
+
task :gem => [ :check_pom_version, :check_history_version ]
|
37
|
+
task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
|
38
|
+
task :push => [ :check_history_date ]
|
39
|
+
|
40
|
+
t.define_tasks
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
module Iudex
|
18
|
+
module Filter
|
19
|
+
VERSION = '1.0.0'
|
20
|
+
|
21
|
+
LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-filter'
|
18
|
+
require 'stringio'
|
19
|
+
|
20
|
+
module Iudex
|
21
|
+
module Filter
|
22
|
+
module Core
|
23
|
+
|
24
|
+
class ByFilterLogger
|
25
|
+
include ByFilterReporter::ReportWriter
|
26
|
+
|
27
|
+
import 'com.gravitext.util.Metric'
|
28
|
+
|
29
|
+
def initialize( desc, index )
|
30
|
+
@log = RJack::SLF4J[ "iudex.filter.core.ByFilterLogger.#{desc}" ]
|
31
|
+
@index = index
|
32
|
+
@nlength = index.filters.map { |f| index.name( f ).length }.max
|
33
|
+
end
|
34
|
+
|
35
|
+
def report( total, delta, duration_ns, counters )
|
36
|
+
out = StringIO.new
|
37
|
+
|
38
|
+
out << "Report total: %s ::\n" % [ fmt( total ) ]
|
39
|
+
out << ( " %-#{@nlength}s %6s %5s %6s %6s" %
|
40
|
+
%w{ Filter Accept % Reject Failed } )
|
41
|
+
|
42
|
+
accepted = total
|
43
|
+
@index.filters.each do |f|
|
44
|
+
c = counters[ f ]
|
45
|
+
d = dropped( c )
|
46
|
+
if d > 0
|
47
|
+
p = prc( -d, accepted )
|
48
|
+
accepted -= d
|
49
|
+
out << ( "\n %-#{@nlength}s %6s %4.0f%% %6s %6s" %
|
50
|
+
[ @index.name( f ),
|
51
|
+
fmt( accepted ), p,
|
52
|
+
fmt( c.rejected ), fmt( c.failed ) ] )
|
53
|
+
end
|
54
|
+
end
|
55
|
+
@log.info( out.string )
|
56
|
+
end
|
57
|
+
|
58
|
+
def dropped( c )
|
59
|
+
c.rejected + c.failed
|
60
|
+
end
|
61
|
+
|
62
|
+
def fmt( v )
|
63
|
+
Metric::format( v )
|
64
|
+
end
|
65
|
+
|
66
|
+
def prc( v, t )
|
67
|
+
( t > 0 ) ? v.to_f / t * 100.0 : 0.0
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-filter'
|
18
|
+
|
19
|
+
module Iudex::Filter
|
20
|
+
|
21
|
+
# Default implementation of Filter, Described, and Named interfaces
|
22
|
+
class FilterBase
|
23
|
+
include Filter
|
24
|
+
include Described
|
25
|
+
include Named
|
26
|
+
|
27
|
+
# Returns empty list
|
28
|
+
def describe
|
29
|
+
[]
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns abbreviated/lower case module names plus class name, in
|
33
|
+
# dot notation.
|
34
|
+
def name
|
35
|
+
n = self.class.name
|
36
|
+
n.gsub!( /::/, '.' )
|
37
|
+
n.gsub( /(\w)\w+\./ ) { |m| $1.downcase + '.' }
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns true
|
41
|
+
def filter( map )
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-filter'
|
18
|
+
require 'iudex-filter/key_helper'
|
19
|
+
require 'iudex-filter/by_filter_logger'
|
20
|
+
|
21
|
+
module Iudex
|
22
|
+
module Filter
|
23
|
+
module Core
|
24
|
+
|
25
|
+
class FilterChainFactory
|
26
|
+
attr_reader :description
|
27
|
+
|
28
|
+
include KeyHelper
|
29
|
+
|
30
|
+
def initialize( description = "default" )
|
31
|
+
@description = description
|
32
|
+
|
33
|
+
@log = RJack::SLF4J[ [ RJack::SLF4J.to_log_name( self.class ),
|
34
|
+
description ].join('.') ]
|
35
|
+
|
36
|
+
@summary_period = nil
|
37
|
+
@by_filter_period = nil
|
38
|
+
|
39
|
+
@index = nil
|
40
|
+
@chain = nil
|
41
|
+
@listener = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_summary_reporter( period_s = 10.0 )
|
45
|
+
@summary_period = period_s
|
46
|
+
end
|
47
|
+
|
48
|
+
def add_by_filter_reporter( period_s = 60 * 10.0 )
|
49
|
+
@by_filter_period = period_s
|
50
|
+
end
|
51
|
+
|
52
|
+
def open
|
53
|
+
close if open?
|
54
|
+
|
55
|
+
@index = FilterIndex.new
|
56
|
+
|
57
|
+
flts = filters
|
58
|
+
log_and_register( flts )
|
59
|
+
|
60
|
+
@listener = ListenerChain.new( listeners )
|
61
|
+
@chain = create_chain( @description, flts )
|
62
|
+
@chain.listener = @listener
|
63
|
+
|
64
|
+
# With all filters loaded and thus key references, make sure
|
65
|
+
# UniMap accessors are defined (for ruby filters)
|
66
|
+
Gravitext::HTMap::UniMap.define_accessors
|
67
|
+
|
68
|
+
nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def open?
|
72
|
+
@chain != nil
|
73
|
+
end
|
74
|
+
|
75
|
+
def close
|
76
|
+
if @chain
|
77
|
+
@chain.close
|
78
|
+
@chain = nil
|
79
|
+
end
|
80
|
+
|
81
|
+
if @listener
|
82
|
+
@listener.close
|
83
|
+
@listener = nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Yields chain to block, bounded by open/close if not already open
|
88
|
+
def filter
|
89
|
+
opened = unless open?
|
90
|
+
open
|
91
|
+
true
|
92
|
+
end
|
93
|
+
|
94
|
+
yield @chain
|
95
|
+
|
96
|
+
ensure
|
97
|
+
close if opened
|
98
|
+
end
|
99
|
+
|
100
|
+
def filters
|
101
|
+
[]
|
102
|
+
end
|
103
|
+
|
104
|
+
def log_listener( desc )
|
105
|
+
LogListener.new( "iudex.filter.core.FilterChain.#{desc}", @index )
|
106
|
+
end
|
107
|
+
|
108
|
+
def listeners
|
109
|
+
ll = [ log_listener( @description ) ]
|
110
|
+
|
111
|
+
if @summary_period
|
112
|
+
ll << SummaryReporter.new( @description, @summary_period )
|
113
|
+
end
|
114
|
+
|
115
|
+
if @by_filter_period
|
116
|
+
ll << ByFilterReporter.new( @index,
|
117
|
+
ByFilterLogger.new( @description, @index ),
|
118
|
+
@by_filter_period )
|
119
|
+
end
|
120
|
+
ll
|
121
|
+
end
|
122
|
+
|
123
|
+
# Create, yield to optional block, and return FilterChain if
|
124
|
+
# flts is not empty. Otherwise return a NoOpFilter and don't
|
125
|
+
# yield. If passed a single Symbol argument, will use both
|
126
|
+
# as description and method to obtain flts array from.
|
127
|
+
def create_chain( desc, flts = nil )
|
128
|
+
|
129
|
+
if desc.is_a?( Symbol )
|
130
|
+
flts = send( desc ) unless flts
|
131
|
+
desc = desc.to_s.gsub( /_/, '-' )
|
132
|
+
end
|
133
|
+
|
134
|
+
if flts.nil? || flts.empty?
|
135
|
+
NoOpFilter.new
|
136
|
+
else
|
137
|
+
c = FilterChain.new( desc, flts )
|
138
|
+
c.listener = log_listener( desc )
|
139
|
+
yield c if block_given?
|
140
|
+
c
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Create a new Switch given selector key and map of values to
|
145
|
+
# filters.
|
146
|
+
def create_switch( key, value_filters_map )
|
147
|
+
switch = Switch.new
|
148
|
+
value_filters_map.each do |value, filters|
|
149
|
+
create_chain( value.to_s.downcase, filters ) do |chain|
|
150
|
+
switch.add_proposition( Selector.new( key, value ), chain )
|
151
|
+
end
|
152
|
+
end
|
153
|
+
switch
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def log_and_register( filters, depth = 0 )
|
159
|
+
filters.each do |filter|
|
160
|
+
name = @index.register( filter )
|
161
|
+
@log.info { "<< " + " " * depth + name }
|
162
|
+
if filter.kind_of?( FilterContainer )
|
163
|
+
log_and_register( filter.children, depth + 1 )
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
Binary file
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-filter'
|
18
|
+
|
19
|
+
class Symbol
|
20
|
+
def to_k
|
21
|
+
Iudex::Filter::KeyHelper.lookup_key( self.to_s )
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class Gravitext::HTMap::Key
|
26
|
+
def to_k
|
27
|
+
self
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
module Iudex
|
32
|
+
module Filter
|
33
|
+
|
34
|
+
# Mixin module support for UniMap Keys
|
35
|
+
module KeyHelper
|
36
|
+
|
37
|
+
# Lookup matching Key in UniMap::KEY_SPACE
|
38
|
+
def self.lookup_key( name )
|
39
|
+
lookup_key_space( name )
|
40
|
+
end
|
41
|
+
|
42
|
+
# Lookup matching Key in UniMap::KEY_SPACE
|
43
|
+
def self.lookup_key_space( name )
|
44
|
+
Gravitext::HTMap::UniMap::KEY_SPACE.get( name ) or
|
45
|
+
raise( "Key #{name} not found" )
|
46
|
+
end
|
47
|
+
|
48
|
+
# Map Symbols to Keys
|
49
|
+
def keys( *syms )
|
50
|
+
syms = syms[0] if ( syms[0] && syms[0].respond_to?( :each ) )
|
51
|
+
syms.map { |s| s.to_k }.uniq
|
52
|
+
end
|
53
|
+
module_function :keys
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-filter/filter_base'
|
18
|
+
|
19
|
+
module Iudex::Filter
|
20
|
+
|
21
|
+
# Short hand for ProcFilter.new
|
22
|
+
def fltr( &block )
|
23
|
+
ProcFilter.new( caller.first, &block )
|
24
|
+
end
|
25
|
+
|
26
|
+
class ProcFilter < FilterBase
|
27
|
+
|
28
|
+
# New ProcFilter using block as implmentation of
|
29
|
+
# Filter.filter( map ). The created filter will only return false
|
30
|
+
# (reject map, stop chain) if the block returns the :reject
|
31
|
+
# symbol.
|
32
|
+
def initialize( clr = nil, &block )
|
33
|
+
@block = block
|
34
|
+
|
35
|
+
clr ||= caller.first
|
36
|
+
clr = clr.split( /:/ )
|
37
|
+
@description = [ File.basename( clr[0], ".rb" ), clr[1].to_i ]
|
38
|
+
#FIXME: When ruby 1.9, can use Proc.source_location instead.
|
39
|
+
end
|
40
|
+
|
41
|
+
def describe
|
42
|
+
@description
|
43
|
+
end
|
44
|
+
|
45
|
+
def filter( map )
|
46
|
+
( @block.call( map ) != :reject )
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
data/lib/iudex-filter.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'rjack-slf4j'
|
18
|
+
require 'gravitext-util'
|
19
|
+
|
20
|
+
require 'iudex-filter/base'
|
21
|
+
|
22
|
+
require 'java'
|
23
|
+
|
24
|
+
module Iudex
|
25
|
+
module Filter
|
26
|
+
require "#{LIB_DIR}/iudex-filter-#{VERSION}.jar"
|
27
|
+
|
28
|
+
import 'iudex.filter.Filter'
|
29
|
+
import 'iudex.filter.FilterContainer'
|
30
|
+
import 'iudex.filter.Described'
|
31
|
+
import 'iudex.filter.Named'
|
32
|
+
import 'iudex.filter.NoOpFilter'
|
33
|
+
|
34
|
+
module Core
|
35
|
+
import 'iudex.filter.core.ByFilterReporter'
|
36
|
+
import 'iudex.filter.core.Copier'
|
37
|
+
import 'iudex.filter.core.FilterChain'
|
38
|
+
import 'iudex.filter.core.FilterIndex'
|
39
|
+
import 'iudex.filter.core.ListenerChain'
|
40
|
+
import 'iudex.filter.core.LogListener'
|
41
|
+
import 'iudex.filter.core.MDCSetter'
|
42
|
+
import 'iudex.filter.core.MDCUnsetter'
|
43
|
+
import 'iudex.filter.core.Selector'
|
44
|
+
import 'iudex.filter.core.Setter'
|
45
|
+
import 'iudex.filter.core.SummaryReporter'
|
46
|
+
import 'iudex.filter.core.Switch'
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
require 'iudex-filter/filter_base'
|
52
|
+
require 'iudex-filter/proc_filter'
|
data/pom.xml
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
3
|
+
|
4
|
+
<modelVersion>4.0.0</modelVersion>
|
5
|
+
<groupId>iudex</groupId>
|
6
|
+
<artifactId>iudex-filter</artifactId>
|
7
|
+
<packaging>jar</packaging>
|
8
|
+
<version>1.0.0</version>
|
9
|
+
<name>Iudex Filter</name>
|
10
|
+
|
11
|
+
<parent>
|
12
|
+
<groupId>iudex</groupId>
|
13
|
+
<artifactId>iudex-parent</artifactId>
|
14
|
+
<version>1.0</version>
|
15
|
+
<relativePath>..</relativePath>
|
16
|
+
</parent>
|
17
|
+
|
18
|
+
<dependencies>
|
19
|
+
|
20
|
+
<dependency>
|
21
|
+
<groupId>org.slf4j</groupId>
|
22
|
+
<artifactId>slf4j-api</artifactId>
|
23
|
+
</dependency>
|
24
|
+
|
25
|
+
<dependency>
|
26
|
+
<groupId>com.gravitext</groupId>
|
27
|
+
<artifactId>gravitext-util</artifactId>
|
28
|
+
</dependency>
|
29
|
+
|
30
|
+
<dependency>
|
31
|
+
<groupId>junit</groupId>
|
32
|
+
<artifactId>junit</artifactId>
|
33
|
+
</dependency>
|
34
|
+
|
35
|
+
<dependency>
|
36
|
+
<groupId>ch.qos.logback</groupId>
|
37
|
+
<artifactId>logback-classic</artifactId>
|
38
|
+
<scope>test</scope>
|
39
|
+
</dependency>
|
40
|
+
|
41
|
+
</dependencies>
|
42
|
+
|
43
|
+
<build>
|
44
|
+
<plugins>
|
45
|
+
<plugin>
|
46
|
+
<!-- Parent settings -->
|
47
|
+
<artifactId>maven-compiler-plugin</artifactId>
|
48
|
+
</plugin>
|
49
|
+
<plugin>
|
50
|
+
<!-- Parent settings -->
|
51
|
+
<artifactId>maven-source-plugin</artifactId>
|
52
|
+
</plugin>
|
53
|
+
</plugins>
|
54
|
+
</build>
|
55
|
+
|
56
|
+
</project>
|
data/test/setup.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
#### General test setup: LOAD_PATH, logging, console output ####
|
18
|
+
|
19
|
+
ldir = File.join( File.dirname( __FILE__ ), "..", "lib" )
|
20
|
+
$LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
|
21
|
+
|
22
|
+
require 'rubygems'
|
23
|
+
require 'rjack-logback'
|
24
|
+
RJack::Logback.config_console( :stderr => true )
|
25
|
+
|
26
|
+
require 'minitest/unit'
|
27
|
+
require 'minitest/autorun'
|
28
|
+
|
29
|
+
# Make test output logging compatible: no partial lines.
|
30
|
+
class TestOut
|
31
|
+
def print( *a ); $stdout.puts( *a ); end
|
32
|
+
def puts( *a ); $stdout.puts( *a ); end
|
33
|
+
end
|
34
|
+
MiniTest::Unit.output = TestOut.new
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'gravitext-util'
|
23
|
+
require 'iudex-filter/proc_filter'
|
24
|
+
|
25
|
+
class TestFilter < Iudex::Filter::FilterBase
|
26
|
+
end
|
27
|
+
|
28
|
+
class TestFilterBase < MiniTest::Unit::TestCase
|
29
|
+
include Iudex::Filter
|
30
|
+
include Gravitext::HTMap
|
31
|
+
|
32
|
+
def test_base_name
|
33
|
+
f = FilterBase.new
|
34
|
+
assert_equal( "i.f.FilterBase", f.name )
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_top_level_name
|
38
|
+
f = TestFilter.new
|
39
|
+
assert_equal( "TestFilter", f.name )
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
RJack::Logback.config_console( :stderr => true, :mdc => "tkey" )
|
23
|
+
|
24
|
+
# RJack::Logback[ "iudex.filter.core.FilterChain.test.reject" ].level = RJack::Logback::DEBUG
|
25
|
+
|
26
|
+
require 'gravitext-util'
|
27
|
+
require 'iudex-filter/filter_chain_factory'
|
28
|
+
|
29
|
+
class TestFilterChainFactory < MiniTest::Unit::TestCase
|
30
|
+
include Iudex::Filter
|
31
|
+
include Iudex::Filter::Core
|
32
|
+
|
33
|
+
include Gravitext::HTMap
|
34
|
+
|
35
|
+
import 'iudex.filter.core.MDCSetter'
|
36
|
+
import 'iudex.filter.core.MDCUnsetter'
|
37
|
+
|
38
|
+
TKEY = UniMap.create_key( 'tkey' );
|
39
|
+
|
40
|
+
class RandomFilter < FilterBase
|
41
|
+
|
42
|
+
def initialize( odds = 2 )
|
43
|
+
@odds = odds
|
44
|
+
end
|
45
|
+
|
46
|
+
def describe
|
47
|
+
[ @odds ]
|
48
|
+
end
|
49
|
+
|
50
|
+
def filter( map )
|
51
|
+
rand( @odds ) != 0
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_filter_chain
|
56
|
+
fcf = FilterChainFactory.new( "test" )
|
57
|
+
fcf.add_summary_reporter( 1.0 )
|
58
|
+
fcf.add_by_filter_reporter( 2.5 )
|
59
|
+
|
60
|
+
def fcf.filters
|
61
|
+
[ MDCSetter.new( TKEY ) ] + super +
|
62
|
+
[ 6, 4, 6, 6 ].map { |p| RandomFilter.new( p ) }
|
63
|
+
end
|
64
|
+
|
65
|
+
def fcf.listeners
|
66
|
+
super + [ MDCUnsetter.new( TKEY ) ]
|
67
|
+
end
|
68
|
+
|
69
|
+
2.times do |r|
|
70
|
+
assert( ! fcf.open? )
|
71
|
+
|
72
|
+
fcf.filter do |chain|
|
73
|
+
1000.times do |t|
|
74
|
+
sleep( rand( 10 ) / 1000.0 / ( r + 1 ) )
|
75
|
+
map = UniMap.new
|
76
|
+
map.tkey = t
|
77
|
+
chain.filter( map )
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
assert( ! fcf.open? )
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'gravitext-util'
|
23
|
+
require 'iudex-filter/proc_filter'
|
24
|
+
|
25
|
+
class TestProcFilter < MiniTest::Unit::TestCase
|
26
|
+
include Iudex::Filter
|
27
|
+
include Gravitext::HTMap
|
28
|
+
|
29
|
+
UniMap.create_key( 'mkey' )
|
30
|
+
UniMap.define_accessors
|
31
|
+
|
32
|
+
def test_describe
|
33
|
+
assert_equal( [ 'test_proc_filter', __LINE__ ], fltr {}.describe )
|
34
|
+
assert_equal( [ 'test_proc_filter', __LINE__ ], ProcFilter.new {}.describe )
|
35
|
+
assert_equal( [ 'test_proc_filter', __LINE__ ], ProcFilter.new {}.describe )
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_name
|
39
|
+
index = Core::FilterIndex.new
|
40
|
+
name, line = index.register( fltr {} ), __LINE__
|
41
|
+
assert_equal( "i.f.ProcFilter-test_proc_filter-#{line}", name )
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_return
|
45
|
+
assert do_f( fltr { } )
|
46
|
+
assert do_f( fltr { nil } )
|
47
|
+
assert do_f( fltr { true } )
|
48
|
+
assert do_f( fltr { :other_sym } )
|
49
|
+
assert do_f( fltr { false } ) # Consequence
|
50
|
+
|
51
|
+
refute do_f( fltr { :reject } )
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_mutate
|
55
|
+
map = UniMap.new
|
56
|
+
map.mkey = :initial
|
57
|
+
assert do_f( fltr { |m| m.mkey = :mutated }, map )
|
58
|
+
assert_equal( :mutated, map.mkey )
|
59
|
+
end
|
60
|
+
|
61
|
+
def do_f( f, m = UniMap.new )
|
62
|
+
f.filter( m )
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
metadata
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: iudex-filter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 1.0.0
|
6
|
+
platform: java
|
7
|
+
authors:
|
8
|
+
- David Kellum
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-04-04 00:00:00 -07:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rjack-slf4j
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 1.6.1
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: gravitext-util
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ~>
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 1.5.0
|
36
|
+
type: :runtime
|
37
|
+
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: minitest
|
40
|
+
prerelease: false
|
41
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 1.7.1
|
47
|
+
- - <
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: "2.1"
|
50
|
+
type: :development
|
51
|
+
version_requirements: *id003
|
52
|
+
- !ruby/object:Gem::Dependency
|
53
|
+
name: rjack-logback
|
54
|
+
prerelease: false
|
55
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "1.0"
|
61
|
+
type: :development
|
62
|
+
version_requirements: *id004
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: rjack-tarpit
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ~>
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: 1.3.0
|
72
|
+
type: :development
|
73
|
+
version_requirements: *id005
|
74
|
+
description: |-
|
75
|
+
Iudex is a general purpose web crawler and feed processor in
|
76
|
+
ruby/java. The iudex-filter gem contains a fundamental filtering/chain
|
77
|
+
of responsbility sub-system.
|
78
|
+
email:
|
79
|
+
- dek-oss@gravitext.com
|
80
|
+
executables: []
|
81
|
+
|
82
|
+
extensions: []
|
83
|
+
|
84
|
+
extra_rdoc_files:
|
85
|
+
- Manifest.txt
|
86
|
+
- History.rdoc
|
87
|
+
- README.rdoc
|
88
|
+
files:
|
89
|
+
- History.rdoc
|
90
|
+
- Manifest.txt
|
91
|
+
- README.rdoc
|
92
|
+
- Rakefile
|
93
|
+
- pom.xml
|
94
|
+
- lib/iudex-filter/base.rb
|
95
|
+
- lib/iudex-filter.rb
|
96
|
+
- lib/iudex-filter/by_filter_logger.rb
|
97
|
+
- lib/iudex-filter/filter_base.rb
|
98
|
+
- lib/iudex-filter/filter_chain_factory.rb
|
99
|
+
- lib/iudex-filter/key_helper.rb
|
100
|
+
- lib/iudex-filter/proc_filter.rb
|
101
|
+
- test/setup.rb
|
102
|
+
- test/test_filter_base.rb
|
103
|
+
- test/test_filter_chain_factory.rb
|
104
|
+
- test/test_proc_filter.rb
|
105
|
+
- lib/iudex-filter/iudex-filter-1.0.0.jar
|
106
|
+
has_rdoc: true
|
107
|
+
homepage: http://github.com/dekellum/iudex
|
108
|
+
licenses: []
|
109
|
+
|
110
|
+
post_install_message:
|
111
|
+
rdoc_options:
|
112
|
+
- --main
|
113
|
+
- README.rdoc
|
114
|
+
require_paths:
|
115
|
+
- lib
|
116
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
117
|
+
none: false
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: "0"
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
124
|
+
requirements:
|
125
|
+
- - ">="
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: "0"
|
128
|
+
requirements: []
|
129
|
+
|
130
|
+
rubyforge_project: iudex-filter
|
131
|
+
rubygems_version: 1.5.1
|
132
|
+
signing_key:
|
133
|
+
specification_version: 3
|
134
|
+
summary: Iudex is a general purpose web crawler and feed processor in ruby/java
|
135
|
+
test_files:
|
136
|
+
- test/test_proc_filter.rb
|
137
|
+
- test/test_filter_base.rb
|
138
|
+
- test/test_filter_chain_factory.rb
|