iudex-filter 1.0.0-java → 1.1.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/History.rdoc +10 -0
- data/Manifest.txt +2 -1
- data/Rakefile +3 -3
- data/lib/iudex-filter/base.rb +1 -1
- data/lib/iudex-filter/filter_chain_factory.rb +64 -29
- data/lib/iudex-filter/{iudex-filter-1.0.0.jar → iudex-filter-1.1.0.jar} +0 -0
- data/lib/iudex-filter.rb +3 -0
- data/pom.xml +2 -2
- data/test/test_filter_base.rb +30 -0
- data/test/test_filter_chain_factory.rb +34 -4
- data/test/test_sublist_filter.rb +90 -0
- metadata +11 -13
data/.gemtest
ADDED
File without changes
|
data/History.rdoc
CHANGED
@@ -1,2 +1,12 @@
|
|
1
|
+
=== 1.1.0 (2011-11-13)
|
2
|
+
* Filter chain factory ergonomics:
|
3
|
+
* flatten.compact filters in create_chain, top level filters
|
4
|
+
* main_summary_period, main_by_filter_period on by default
|
5
|
+
* Add listener param to create_chain, create_switch for nested listener
|
6
|
+
* New SubListFilter
|
7
|
+
* Add FilterException to imports (Siddharth Raghavan)
|
8
|
+
* Update to minitest ~> 2.3
|
9
|
+
* Update to gravitext-util ~> 1.5.1 (for UniMap.toString)
|
10
|
+
|
1
11
|
=== 1.0.0 (2011-04-04)
|
2
12
|
* Initial release.
|
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
|
|
4
4
|
require 'iudex-filter/base'
|
5
5
|
|
6
6
|
require 'rubygems'
|
7
|
-
gem 'rjack-tarpit', '~> 1.
|
7
|
+
gem 'rjack-tarpit', '~> 1.4'
|
8
8
|
require 'rjack-tarpit'
|
9
9
|
|
10
10
|
t = RJack::TarPit.new( 'iudex-filter',
|
@@ -14,10 +14,10 @@ t = RJack::TarPit.new( 'iudex-filter',
|
|
14
14
|
t.specify do |h|
|
15
15
|
h.developer( "David Kellum", "dek-oss@gravitext.com" )
|
16
16
|
h.extra_deps += [ [ 'rjack-slf4j', '~> 1.6.1' ],
|
17
|
-
[ 'gravitext-util', '~> 1.5.
|
17
|
+
[ 'gravitext-util', '~> 1.5.1' ] ]
|
18
18
|
|
19
19
|
h.testlib = :minitest
|
20
|
-
h.extra_dev_deps += [ [ 'minitest', '
|
20
|
+
h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
|
21
21
|
[ 'rjack-logback', '>= 1.0' ] ]
|
22
22
|
end
|
23
23
|
|
data/lib/iudex-filter/base.rb
CHANGED
@@ -25,6 +25,9 @@ module Iudex
|
|
25
25
|
class FilterChainFactory
|
26
26
|
attr_reader :description
|
27
27
|
|
28
|
+
attr_accessor :main_summary_period
|
29
|
+
attr_accessor :main_by_filter_period
|
30
|
+
|
28
31
|
include KeyHelper
|
29
32
|
|
30
33
|
def initialize( description = "default" )
|
@@ -33,20 +36,22 @@ module Iudex
|
|
33
36
|
@log = RJack::SLF4J[ [ RJack::SLF4J.to_log_name( self.class ),
|
34
37
|
description ].join('.') ]
|
35
38
|
|
36
|
-
@
|
37
|
-
@
|
39
|
+
@main_summary_period = 10.0
|
40
|
+
@main_by_filter_period = 60.0
|
38
41
|
|
39
42
|
@index = nil
|
40
43
|
@chain = nil
|
41
44
|
@listener = nil
|
42
45
|
end
|
43
46
|
|
47
|
+
# Deprecated: Use main_summary_period accessor
|
44
48
|
def add_summary_reporter( period_s = 10.0 )
|
45
|
-
@
|
49
|
+
@main_summary_period = period_s
|
46
50
|
end
|
47
51
|
|
52
|
+
# Deprecated: Use main_by_filter_period accessor
|
48
53
|
def add_by_filter_reporter( period_s = 60 * 10.0 )
|
49
|
-
@
|
54
|
+
@main_by_filter_period = period_s
|
50
55
|
end
|
51
56
|
|
52
57
|
def open
|
@@ -54,12 +59,20 @@ module Iudex
|
|
54
59
|
|
55
60
|
@index = FilterIndex.new
|
56
61
|
|
57
|
-
|
62
|
+
# Temp setup of empty listener, since full listeners setup
|
63
|
+
# requires filters, log_and_register which itself requires
|
64
|
+
# listeners via create_chain
|
65
|
+
@listener = place_holder = NoOpListener.new
|
66
|
+
|
67
|
+
flts = filters.flatten.compact
|
58
68
|
log_and_register( flts )
|
59
69
|
|
70
|
+
@chain = create_chain( @description, flts, :main )
|
60
71
|
@listener = ListenerChain.new( listeners )
|
61
|
-
|
62
|
-
|
72
|
+
|
73
|
+
# Now replace the temp listener with the final listener
|
74
|
+
# chain
|
75
|
+
replace_listeners( @chain, place_holder, @listener )
|
63
76
|
|
64
77
|
# With all filters loaded and thus key references, make sure
|
65
78
|
# UniMap accessors are defined (for ruby filters)
|
@@ -101,52 +114,47 @@ module Iudex
|
|
101
114
|
[]
|
102
115
|
end
|
103
116
|
|
104
|
-
def log_listener( desc )
|
105
|
-
LogListener.new( "iudex.filter.core.FilterChain.#{desc}", @index )
|
106
|
-
end
|
107
|
-
|
108
117
|
def listeners
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
ll << SummaryReporter.new( @description, @summary_period )
|
113
|
-
end
|
114
|
-
|
115
|
-
if @by_filter_period
|
116
|
-
ll << ByFilterReporter.new( @index,
|
117
|
-
ByFilterLogger.new( @description, @index ),
|
118
|
-
@by_filter_period )
|
119
|
-
end
|
120
|
-
ll
|
118
|
+
create_listeners( @description,
|
119
|
+
@main_summary_period,
|
120
|
+
@main_by_filter_period )
|
121
121
|
end
|
122
122
|
|
123
123
|
# Create, yield to optional block, and return FilterChain if
|
124
124
|
# flts is not empty. Otherwise return a NoOpFilter and don't
|
125
|
-
# yield. If passed a
|
125
|
+
# yield. If passed a Symbol desc and nil flts, will use both
|
126
126
|
# as description and method to obtain flts array from.
|
127
|
-
def create_chain( desc, flts = nil )
|
127
|
+
def create_chain( desc, flts = nil, listener = nil )
|
128
128
|
|
129
129
|
if desc.is_a?( Symbol )
|
130
130
|
flts = send( desc ) unless flts
|
131
131
|
desc = desc.to_s.gsub( /_/, '-' )
|
132
132
|
end
|
133
133
|
|
134
|
+
flts = flts.flatten.compact if flts
|
135
|
+
|
134
136
|
if flts.nil? || flts.empty?
|
135
137
|
NoOpFilter.new
|
136
138
|
else
|
137
139
|
c = FilterChain.new( desc, flts )
|
138
|
-
|
140
|
+
if listener.nil?
|
141
|
+
c.listener = log_listener( desc )
|
142
|
+
elsif listener == :main
|
143
|
+
c.listener = @listener
|
144
|
+
else
|
145
|
+
c.listener = listener
|
146
|
+
end
|
139
147
|
yield c if block_given?
|
140
148
|
c
|
141
149
|
end
|
142
150
|
end
|
143
151
|
|
144
152
|
# Create a new Switch given selector key and map of values to
|
145
|
-
# filters
|
153
|
+
# filters, or values to [filters,listener]
|
146
154
|
def create_switch( key, value_filters_map )
|
147
155
|
switch = Switch.new
|
148
|
-
value_filters_map.each do |value, filters|
|
149
|
-
create_chain( value.to_s.downcase, filters ) do |chain|
|
156
|
+
value_filters_map.each do |value, (filters, listener)|
|
157
|
+
create_chain( value.to_s.downcase, filters, listener ) do |chain|
|
150
158
|
switch.add_proposition( Selector.new( key, value ), chain )
|
151
159
|
end
|
152
160
|
end
|
@@ -155,6 +163,19 @@ module Iudex
|
|
155
163
|
|
156
164
|
private
|
157
165
|
|
166
|
+
def create_listeners( desc, summary_period, by_filter_period )
|
167
|
+
[ log_listener( desc ),
|
168
|
+
( SummaryReporter.new( desc, summary_period ) if summary_period ),
|
169
|
+
( ByFilterReporter.new( @index,
|
170
|
+
ByFilterLogger.new( desc, @index ),
|
171
|
+
by_filter_period ) if by_filter_period )
|
172
|
+
].compact
|
173
|
+
end
|
174
|
+
|
175
|
+
def log_listener( desc )
|
176
|
+
LogListener.new( "iudex.filter.core.FilterChain.#{desc}", @index )
|
177
|
+
end
|
178
|
+
|
158
179
|
def log_and_register( filters, depth = 0 )
|
159
180
|
filters.each do |filter|
|
160
181
|
name = @index.register( filter )
|
@@ -164,6 +185,20 @@ module Iudex
|
|
164
185
|
end
|
165
186
|
end
|
166
187
|
end
|
188
|
+
|
189
|
+
def replace_listeners( filter, place_holder, listener )
|
190
|
+
if filter.kind_of?( FilterContainer )
|
191
|
+
if filter.kind_of?( FilterChain )
|
192
|
+
if filter.listener == place_holder
|
193
|
+
filter.listener = listener
|
194
|
+
end
|
195
|
+
end
|
196
|
+
filter.children.each do |c|
|
197
|
+
replace_listeners( c, place_holder, listener )
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
167
202
|
end
|
168
203
|
|
169
204
|
end
|
Binary file
|
data/lib/iudex-filter.rb
CHANGED
@@ -27,9 +27,11 @@ module Iudex
|
|
27
27
|
|
28
28
|
import 'iudex.filter.Filter'
|
29
29
|
import 'iudex.filter.FilterContainer'
|
30
|
+
import 'iudex.filter.FilterException'
|
30
31
|
import 'iudex.filter.Described'
|
31
32
|
import 'iudex.filter.Named'
|
32
33
|
import 'iudex.filter.NoOpFilter'
|
34
|
+
import 'iudex.filter.NoOpListener'
|
33
35
|
|
34
36
|
module Core
|
35
37
|
import 'iudex.filter.core.ByFilterReporter'
|
@@ -42,6 +44,7 @@ module Iudex
|
|
42
44
|
import 'iudex.filter.core.MDCUnsetter'
|
43
45
|
import 'iudex.filter.core.Selector'
|
44
46
|
import 'iudex.filter.core.Setter'
|
47
|
+
import 'iudex.filter.core.SubListFilter'
|
45
48
|
import 'iudex.filter.core.SummaryReporter'
|
46
49
|
import 'iudex.filter.core.Switch'
|
47
50
|
end
|
data/pom.xml
CHANGED
@@ -5,13 +5,13 @@
|
|
5
5
|
<groupId>iudex</groupId>
|
6
6
|
<artifactId>iudex-filter</artifactId>
|
7
7
|
<packaging>jar</packaging>
|
8
|
-
<version>1.
|
8
|
+
<version>1.1.0</version>
|
9
9
|
<name>Iudex Filter</name>
|
10
10
|
|
11
11
|
<parent>
|
12
12
|
<groupId>iudex</groupId>
|
13
13
|
<artifactId>iudex-parent</artifactId>
|
14
|
-
<version>1.
|
14
|
+
<version>1.1</version>
|
15
15
|
<relativePath>..</relativePath>
|
16
16
|
</parent>
|
17
17
|
|
data/test/test_filter_base.rb
CHANGED
@@ -29,6 +29,8 @@ class TestFilterBase < MiniTest::Unit::TestCase
|
|
29
29
|
include Iudex::Filter
|
30
30
|
include Gravitext::HTMap
|
31
31
|
|
32
|
+
import 'iudex.filter.FilterListener'
|
33
|
+
|
32
34
|
def test_base_name
|
33
35
|
f = FilterBase.new
|
34
36
|
assert_equal( "i.f.FilterBase", f.name )
|
@@ -39,4 +41,32 @@ class TestFilterBase < MiniTest::Unit::TestCase
|
|
39
41
|
assert_equal( "TestFilter", f.name )
|
40
42
|
end
|
41
43
|
|
44
|
+
# Filter exception may be raised from Ruby, and is handled as per
|
45
|
+
# Java: caught by chain, forwarded to listener as failure
|
46
|
+
def test_raise_filter_exception
|
47
|
+
|
48
|
+
test_filter = FilterBase.new
|
49
|
+
def test_filter.filter( map )
|
50
|
+
raise FilterException.new( "Expected" )
|
51
|
+
end
|
52
|
+
|
53
|
+
listener = FilterListener.new
|
54
|
+
class << listener
|
55
|
+
attr_accessor :fail
|
56
|
+
def failed( filter, map, x )
|
57
|
+
@fail = [ filter, map, x ]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
fc = Core::FilterChain.new( "test", [ test_filter ] )
|
62
|
+
fc.listener = listener
|
63
|
+
|
64
|
+
map = UniMap.new
|
65
|
+
refute( fc.filter( map ) )
|
66
|
+
|
67
|
+
assert_equal( test_filter, listener.fail[0] )
|
68
|
+
assert_equal( map, listener.fail[1] )
|
69
|
+
assert_instance_of( FilterException, listener.fail[2] )
|
70
|
+
end
|
71
|
+
|
42
72
|
end
|
@@ -54,8 +54,8 @@ class TestFilterChainFactory < MiniTest::Unit::TestCase
|
|
54
54
|
|
55
55
|
def test_filter_chain
|
56
56
|
fcf = FilterChainFactory.new( "test" )
|
57
|
-
fcf.
|
58
|
-
fcf.
|
57
|
+
fcf.main_summary_period = 1.0
|
58
|
+
fcf.main_by_filter_period = 2.5
|
59
59
|
|
60
60
|
def fcf.filters
|
61
61
|
[ MDCSetter.new( TKEY ) ] + super +
|
@@ -67,7 +67,7 @@ class TestFilterChainFactory < MiniTest::Unit::TestCase
|
|
67
67
|
end
|
68
68
|
|
69
69
|
2.times do |r|
|
70
|
-
|
70
|
+
refute( fcf.open? )
|
71
71
|
|
72
72
|
fcf.filter do |chain|
|
73
73
|
1000.times do |t|
|
@@ -78,9 +78,39 @@ class TestFilterChainFactory < MiniTest::Unit::TestCase
|
|
78
78
|
end
|
79
79
|
end
|
80
80
|
|
81
|
-
|
81
|
+
refute( fcf.open? )
|
82
82
|
end
|
83
83
|
|
84
84
|
end
|
85
85
|
|
86
|
+
def test_nested_reporting
|
87
|
+
fcf = FilterChainFactory.new( "test" )
|
88
|
+
class << fcf
|
89
|
+
attr_accessor :summary_reporter
|
90
|
+
def filters
|
91
|
+
[ create_chain( :sub_filters, nil, :main ) ]
|
92
|
+
end
|
93
|
+
def listeners
|
94
|
+
super.tap do |ll|
|
95
|
+
@summary_reporter = ll[1] #FIXME: Brittle
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def sub_filters
|
100
|
+
[ 6, 4, 6, 6 ].map { |p| RandomFilter.new( p ) }
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
fcf.filter do |chain|
|
105
|
+
100.times do |t|
|
106
|
+
map = UniMap.new
|
107
|
+
map.tkey = t
|
108
|
+
chain.filter( map )
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
assert_equal( 100, fcf.summary_reporter.total_count )
|
113
|
+
|
114
|
+
end
|
115
|
+
|
86
116
|
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'iudex-filter'
|
23
|
+
require 'iudex-filter/proc_filter'
|
24
|
+
|
25
|
+
class TestSubListFilter < MiniTest::Unit::TestCase
|
26
|
+
include Iudex::Filter
|
27
|
+
include Iudex::Filter::Core
|
28
|
+
include Gravitext::HTMap
|
29
|
+
|
30
|
+
SLIST = UniMap.create_key( 'slist', Java::java.util.List )
|
31
|
+
SVAL = UniMap.create_key( 'sval' )
|
32
|
+
|
33
|
+
UniMap.define_accessors
|
34
|
+
|
35
|
+
def test_remove_rejects
|
36
|
+
( -1 .. 2 ).each do |c|
|
37
|
+
map = UniMap.new
|
38
|
+
map.slist = ( [ UniMap.new ] * c if c >= 0 )
|
39
|
+
f = sublist_filter( [ fltr { :reject } ] )
|
40
|
+
f.filter( map )
|
41
|
+
|
42
|
+
refute map.slist
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_do_not_remove_rejects_when_set
|
47
|
+
[ -1, 1, 2 ].each do |c|
|
48
|
+
map = UniMap.new
|
49
|
+
map.slist = pre = ( [ UniMap.new ] * c if c >= 0 )
|
50
|
+
f = sublist_filter( [ fltr { :reject } ], false )
|
51
|
+
f.filter( map )
|
52
|
+
|
53
|
+
assert_equal( pre, map.slist )
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_do_not_remove_accepted
|
58
|
+
[ -1, 1, 2 ].each do |c|
|
59
|
+
map = UniMap.new
|
60
|
+
map.slist = pre = ( [ UniMap.new ] * c if c >= 0 )
|
61
|
+
f = sublist_filter( [] ) #accept all
|
62
|
+
f.filter( map )
|
63
|
+
|
64
|
+
assert_equal( pre, map.slist )
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_modify
|
69
|
+
( -1 .. 2 ).each do |c|
|
70
|
+
map = UniMap.new
|
71
|
+
map.slist = pre = ( [ UniMap.new ] * c if c >= 0 )
|
72
|
+
f = sublist_filter( [ fltr { |s| s.sval = :mod } ] )
|
73
|
+
f.filter( map )
|
74
|
+
|
75
|
+
assert_equal( Array( pre ).length,
|
76
|
+
Array( map.slist ).length )
|
77
|
+
|
78
|
+
Array( map.slist ).each do |s|
|
79
|
+
assert_equal( :mod, s.sval )
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def sublist_filter( sfilters, remove_rejects = true )
|
85
|
+
SubListFilter.new( SLIST,
|
86
|
+
FilterChain.new( "sub", sfilters ),
|
87
|
+
remove_rejects )
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: iudex-filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.
|
5
|
+
version: 1.1.0
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- David Kellum
|
@@ -10,8 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
14
|
-
default_executable:
|
13
|
+
date: 2011-11-13 00:00:00 Z
|
15
14
|
dependencies:
|
16
15
|
- !ruby/object:Gem::Dependency
|
17
16
|
name: rjack-slf4j
|
@@ -32,7 +31,7 @@ dependencies:
|
|
32
31
|
requirements:
|
33
32
|
- - ~>
|
34
33
|
- !ruby/object:Gem::Version
|
35
|
-
version: 1.5.
|
34
|
+
version: 1.5.1
|
36
35
|
type: :runtime
|
37
36
|
version_requirements: *id002
|
38
37
|
- !ruby/object:Gem::Dependency
|
@@ -41,12 +40,9 @@ dependencies:
|
|
41
40
|
requirement: &id003 !ruby/object:Gem::Requirement
|
42
41
|
none: false
|
43
42
|
requirements:
|
44
|
-
- -
|
45
|
-
- !ruby/object:Gem::Version
|
46
|
-
version: 1.7.1
|
47
|
-
- - <
|
43
|
+
- - ~>
|
48
44
|
- !ruby/object:Gem::Version
|
49
|
-
version: "2.
|
45
|
+
version: "2.3"
|
50
46
|
type: :development
|
51
47
|
version_requirements: *id003
|
52
48
|
- !ruby/object:Gem::Dependency
|
@@ -68,7 +64,7 @@ dependencies:
|
|
68
64
|
requirements:
|
69
65
|
- - ~>
|
70
66
|
- !ruby/object:Gem::Version
|
71
|
-
version: 1.
|
67
|
+
version: 1.4.0
|
72
68
|
type: :development
|
73
69
|
version_requirements: *id005
|
74
70
|
description: |-
|
@@ -102,8 +98,9 @@ files:
|
|
102
98
|
- test/test_filter_base.rb
|
103
99
|
- test/test_filter_chain_factory.rb
|
104
100
|
- test/test_proc_filter.rb
|
105
|
-
-
|
106
|
-
|
101
|
+
- test/test_sublist_filter.rb
|
102
|
+
- lib/iudex-filter/iudex-filter-1.1.0.jar
|
103
|
+
- .gemtest
|
107
104
|
homepage: http://github.com/dekellum/iudex
|
108
105
|
licenses: []
|
109
106
|
|
@@ -128,11 +125,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
125
|
requirements: []
|
129
126
|
|
130
127
|
rubyforge_project: iudex-filter
|
131
|
-
rubygems_version: 1.
|
128
|
+
rubygems_version: 1.8.9
|
132
129
|
signing_key:
|
133
130
|
specification_version: 3
|
134
131
|
summary: Iudex is a general purpose web crawler and feed processor in ruby/java
|
135
132
|
test_files:
|
136
133
|
- test/test_proc_filter.rb
|
134
|
+
- test/test_sublist_filter.rb
|
137
135
|
- test/test_filter_base.rb
|
138
136
|
- test/test_filter_chain_factory.rb
|