iudex-filter 1.0.0-java → 1.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/History.rdoc +10 -0
- data/Manifest.txt +2 -1
- data/Rakefile +3 -3
- data/lib/iudex-filter/base.rb +1 -1
- data/lib/iudex-filter/filter_chain_factory.rb +64 -29
- data/lib/iudex-filter/{iudex-filter-1.0.0.jar → iudex-filter-1.1.0.jar} +0 -0
- data/lib/iudex-filter.rb +3 -0
- data/pom.xml +2 -2
- data/test/test_filter_base.rb +30 -0
- data/test/test_filter_chain_factory.rb +34 -4
- data/test/test_sublist_filter.rb +90 -0
- metadata +11 -13
data/.gemtest
ADDED
File without changes
|
data/History.rdoc
CHANGED
@@ -1,2 +1,12 @@
|
|
1
|
+
=== 1.1.0 (2011-11-13)
|
2
|
+
* Filter chain factory ergonomics:
|
3
|
+
* flatten.compact filters in create_chain, top level filters
|
4
|
+
* main_summary_period, main_by_filter_period on by default
|
5
|
+
* Add listener param to create_chain, create_switch for nested listener
|
6
|
+
* New SubListFilter
|
7
|
+
* Add FilterException to imports (Siddharth Raghavan)
|
8
|
+
* Update to minitest ~> 2.3
|
9
|
+
* Update to gravitext-util ~> 1.5.1 (for UniMap.toString)
|
10
|
+
|
1
11
|
=== 1.0.0 (2011-04-04)
|
2
12
|
* Initial release.
|
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH << './lib'
|
|
4
4
|
require 'iudex-filter/base'
|
5
5
|
|
6
6
|
require 'rubygems'
|
7
|
-
gem 'rjack-tarpit', '~> 1.
|
7
|
+
gem 'rjack-tarpit', '~> 1.4'
|
8
8
|
require 'rjack-tarpit'
|
9
9
|
|
10
10
|
t = RJack::TarPit.new( 'iudex-filter',
|
@@ -14,10 +14,10 @@ t = RJack::TarPit.new( 'iudex-filter',
|
|
14
14
|
t.specify do |h|
|
15
15
|
h.developer( "David Kellum", "dek-oss@gravitext.com" )
|
16
16
|
h.extra_deps += [ [ 'rjack-slf4j', '~> 1.6.1' ],
|
17
|
-
[ 'gravitext-util', '~> 1.5.
|
17
|
+
[ 'gravitext-util', '~> 1.5.1' ] ]
|
18
18
|
|
19
19
|
h.testlib = :minitest
|
20
|
-
h.extra_dev_deps += [ [ 'minitest', '
|
20
|
+
h.extra_dev_deps += [ [ 'minitest', '~> 2.3' ],
|
21
21
|
[ 'rjack-logback', '>= 1.0' ] ]
|
22
22
|
end
|
23
23
|
|
data/lib/iudex-filter/base.rb
CHANGED
@@ -25,6 +25,9 @@ module Iudex
|
|
25
25
|
class FilterChainFactory
|
26
26
|
attr_reader :description
|
27
27
|
|
28
|
+
attr_accessor :main_summary_period
|
29
|
+
attr_accessor :main_by_filter_period
|
30
|
+
|
28
31
|
include KeyHelper
|
29
32
|
|
30
33
|
def initialize( description = "default" )
|
@@ -33,20 +36,22 @@ module Iudex
|
|
33
36
|
@log = RJack::SLF4J[ [ RJack::SLF4J.to_log_name( self.class ),
|
34
37
|
description ].join('.') ]
|
35
38
|
|
36
|
-
@
|
37
|
-
@
|
39
|
+
@main_summary_period = 10.0
|
40
|
+
@main_by_filter_period = 60.0
|
38
41
|
|
39
42
|
@index = nil
|
40
43
|
@chain = nil
|
41
44
|
@listener = nil
|
42
45
|
end
|
43
46
|
|
47
|
+
# Deprecated: Use main_summary_period accessor
|
44
48
|
def add_summary_reporter( period_s = 10.0 )
|
45
|
-
@
|
49
|
+
@main_summary_period = period_s
|
46
50
|
end
|
47
51
|
|
52
|
+
# Deprecated: Use main_by_filter_period accessor
|
48
53
|
def add_by_filter_reporter( period_s = 60 * 10.0 )
|
49
|
-
@
|
54
|
+
@main_by_filter_period = period_s
|
50
55
|
end
|
51
56
|
|
52
57
|
def open
|
@@ -54,12 +59,20 @@ module Iudex
|
|
54
59
|
|
55
60
|
@index = FilterIndex.new
|
56
61
|
|
57
|
-
|
62
|
+
# Temp setup of empty listener, since full listeners setup
|
63
|
+
# requires filters, log_and_register which itself requires
|
64
|
+
# listeners via create_chain
|
65
|
+
@listener = place_holder = NoOpListener.new
|
66
|
+
|
67
|
+
flts = filters.flatten.compact
|
58
68
|
log_and_register( flts )
|
59
69
|
|
70
|
+
@chain = create_chain( @description, flts, :main )
|
60
71
|
@listener = ListenerChain.new( listeners )
|
61
|
-
|
62
|
-
|
72
|
+
|
73
|
+
# Now replace the temp listener with the final listener
|
74
|
+
# chain
|
75
|
+
replace_listeners( @chain, place_holder, @listener )
|
63
76
|
|
64
77
|
# With all filters loaded and thus key references, make sure
|
65
78
|
# UniMap accessors are defined (for ruby filters)
|
@@ -101,52 +114,47 @@ module Iudex
|
|
101
114
|
[]
|
102
115
|
end
|
103
116
|
|
104
|
-
def log_listener( desc )
|
105
|
-
LogListener.new( "iudex.filter.core.FilterChain.#{desc}", @index )
|
106
|
-
end
|
107
|
-
|
108
117
|
def listeners
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
ll << SummaryReporter.new( @description, @summary_period )
|
113
|
-
end
|
114
|
-
|
115
|
-
if @by_filter_period
|
116
|
-
ll << ByFilterReporter.new( @index,
|
117
|
-
ByFilterLogger.new( @description, @index ),
|
118
|
-
@by_filter_period )
|
119
|
-
end
|
120
|
-
ll
|
118
|
+
create_listeners( @description,
|
119
|
+
@main_summary_period,
|
120
|
+
@main_by_filter_period )
|
121
121
|
end
|
122
122
|
|
123
123
|
# Create, yield to optional block, and return FilterChain if
|
124
124
|
# flts is not empty. Otherwise return a NoOpFilter and don't
|
125
|
-
# yield. If passed a
|
125
|
+
# yield. If passed a Symbol desc and nil flts, will use both
|
126
126
|
# as description and method to obtain flts array from.
|
127
|
-
def create_chain( desc, flts = nil )
|
127
|
+
def create_chain( desc, flts = nil, listener = nil )
|
128
128
|
|
129
129
|
if desc.is_a?( Symbol )
|
130
130
|
flts = send( desc ) unless flts
|
131
131
|
desc = desc.to_s.gsub( /_/, '-' )
|
132
132
|
end
|
133
133
|
|
134
|
+
flts = flts.flatten.compact if flts
|
135
|
+
|
134
136
|
if flts.nil? || flts.empty?
|
135
137
|
NoOpFilter.new
|
136
138
|
else
|
137
139
|
c = FilterChain.new( desc, flts )
|
138
|
-
|
140
|
+
if listener.nil?
|
141
|
+
c.listener = log_listener( desc )
|
142
|
+
elsif listener == :main
|
143
|
+
c.listener = @listener
|
144
|
+
else
|
145
|
+
c.listener = listener
|
146
|
+
end
|
139
147
|
yield c if block_given?
|
140
148
|
c
|
141
149
|
end
|
142
150
|
end
|
143
151
|
|
144
152
|
# Create a new Switch given selector key and map of values to
|
145
|
-
# filters
|
153
|
+
# filters, or values to [filters,listener]
|
146
154
|
def create_switch( key, value_filters_map )
|
147
155
|
switch = Switch.new
|
148
|
-
value_filters_map.each do |value, filters|
|
149
|
-
create_chain( value.to_s.downcase, filters ) do |chain|
|
156
|
+
value_filters_map.each do |value, (filters, listener)|
|
157
|
+
create_chain( value.to_s.downcase, filters, listener ) do |chain|
|
150
158
|
switch.add_proposition( Selector.new( key, value ), chain )
|
151
159
|
end
|
152
160
|
end
|
@@ -155,6 +163,19 @@ module Iudex
|
|
155
163
|
|
156
164
|
private
|
157
165
|
|
166
|
+
def create_listeners( desc, summary_period, by_filter_period )
|
167
|
+
[ log_listener( desc ),
|
168
|
+
( SummaryReporter.new( desc, summary_period ) if summary_period ),
|
169
|
+
( ByFilterReporter.new( @index,
|
170
|
+
ByFilterLogger.new( desc, @index ),
|
171
|
+
by_filter_period ) if by_filter_period )
|
172
|
+
].compact
|
173
|
+
end
|
174
|
+
|
175
|
+
def log_listener( desc )
|
176
|
+
LogListener.new( "iudex.filter.core.FilterChain.#{desc}", @index )
|
177
|
+
end
|
178
|
+
|
158
179
|
def log_and_register( filters, depth = 0 )
|
159
180
|
filters.each do |filter|
|
160
181
|
name = @index.register( filter )
|
@@ -164,6 +185,20 @@ module Iudex
|
|
164
185
|
end
|
165
186
|
end
|
166
187
|
end
|
188
|
+
|
189
|
+
def replace_listeners( filter, place_holder, listener )
|
190
|
+
if filter.kind_of?( FilterContainer )
|
191
|
+
if filter.kind_of?( FilterChain )
|
192
|
+
if filter.listener == place_holder
|
193
|
+
filter.listener = listener
|
194
|
+
end
|
195
|
+
end
|
196
|
+
filter.children.each do |c|
|
197
|
+
replace_listeners( c, place_holder, listener )
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
167
202
|
end
|
168
203
|
|
169
204
|
end
|
Binary file
|
data/lib/iudex-filter.rb
CHANGED
@@ -27,9 +27,11 @@ module Iudex
|
|
27
27
|
|
28
28
|
import 'iudex.filter.Filter'
|
29
29
|
import 'iudex.filter.FilterContainer'
|
30
|
+
import 'iudex.filter.FilterException'
|
30
31
|
import 'iudex.filter.Described'
|
31
32
|
import 'iudex.filter.Named'
|
32
33
|
import 'iudex.filter.NoOpFilter'
|
34
|
+
import 'iudex.filter.NoOpListener'
|
33
35
|
|
34
36
|
module Core
|
35
37
|
import 'iudex.filter.core.ByFilterReporter'
|
@@ -42,6 +44,7 @@ module Iudex
|
|
42
44
|
import 'iudex.filter.core.MDCUnsetter'
|
43
45
|
import 'iudex.filter.core.Selector'
|
44
46
|
import 'iudex.filter.core.Setter'
|
47
|
+
import 'iudex.filter.core.SubListFilter'
|
45
48
|
import 'iudex.filter.core.SummaryReporter'
|
46
49
|
import 'iudex.filter.core.Switch'
|
47
50
|
end
|
data/pom.xml
CHANGED
@@ -5,13 +5,13 @@
|
|
5
5
|
<groupId>iudex</groupId>
|
6
6
|
<artifactId>iudex-filter</artifactId>
|
7
7
|
<packaging>jar</packaging>
|
8
|
-
<version>1.
|
8
|
+
<version>1.1.0</version>
|
9
9
|
<name>Iudex Filter</name>
|
10
10
|
|
11
11
|
<parent>
|
12
12
|
<groupId>iudex</groupId>
|
13
13
|
<artifactId>iudex-parent</artifactId>
|
14
|
-
<version>1.
|
14
|
+
<version>1.1</version>
|
15
15
|
<relativePath>..</relativePath>
|
16
16
|
</parent>
|
17
17
|
|
data/test/test_filter_base.rb
CHANGED
@@ -29,6 +29,8 @@ class TestFilterBase < MiniTest::Unit::TestCase
|
|
29
29
|
include Iudex::Filter
|
30
30
|
include Gravitext::HTMap
|
31
31
|
|
32
|
+
import 'iudex.filter.FilterListener'
|
33
|
+
|
32
34
|
def test_base_name
|
33
35
|
f = FilterBase.new
|
34
36
|
assert_equal( "i.f.FilterBase", f.name )
|
@@ -39,4 +41,32 @@ class TestFilterBase < MiniTest::Unit::TestCase
|
|
39
41
|
assert_equal( "TestFilter", f.name )
|
40
42
|
end
|
41
43
|
|
44
|
+
# Filter exception may be raised from Ruby, and is handled as per
|
45
|
+
# Java: caught by chain, forwarded to listener as failure
|
46
|
+
def test_raise_filter_exception
|
47
|
+
|
48
|
+
test_filter = FilterBase.new
|
49
|
+
def test_filter.filter( map )
|
50
|
+
raise FilterException.new( "Expected" )
|
51
|
+
end
|
52
|
+
|
53
|
+
listener = FilterListener.new
|
54
|
+
class << listener
|
55
|
+
attr_accessor :fail
|
56
|
+
def failed( filter, map, x )
|
57
|
+
@fail = [ filter, map, x ]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
fc = Core::FilterChain.new( "test", [ test_filter ] )
|
62
|
+
fc.listener = listener
|
63
|
+
|
64
|
+
map = UniMap.new
|
65
|
+
refute( fc.filter( map ) )
|
66
|
+
|
67
|
+
assert_equal( test_filter, listener.fail[0] )
|
68
|
+
assert_equal( map, listener.fail[1] )
|
69
|
+
assert_instance_of( FilterException, listener.fail[2] )
|
70
|
+
end
|
71
|
+
|
42
72
|
end
|
@@ -54,8 +54,8 @@ class TestFilterChainFactory < MiniTest::Unit::TestCase
|
|
54
54
|
|
55
55
|
def test_filter_chain
|
56
56
|
fcf = FilterChainFactory.new( "test" )
|
57
|
-
fcf.
|
58
|
-
fcf.
|
57
|
+
fcf.main_summary_period = 1.0
|
58
|
+
fcf.main_by_filter_period = 2.5
|
59
59
|
|
60
60
|
def fcf.filters
|
61
61
|
[ MDCSetter.new( TKEY ) ] + super +
|
@@ -67,7 +67,7 @@ class TestFilterChainFactory < MiniTest::Unit::TestCase
|
|
67
67
|
end
|
68
68
|
|
69
69
|
2.times do |r|
|
70
|
-
|
70
|
+
refute( fcf.open? )
|
71
71
|
|
72
72
|
fcf.filter do |chain|
|
73
73
|
1000.times do |t|
|
@@ -78,9 +78,39 @@ class TestFilterChainFactory < MiniTest::Unit::TestCase
|
|
78
78
|
end
|
79
79
|
end
|
80
80
|
|
81
|
-
|
81
|
+
refute( fcf.open? )
|
82
82
|
end
|
83
83
|
|
84
84
|
end
|
85
85
|
|
86
|
+
def test_nested_reporting
|
87
|
+
fcf = FilterChainFactory.new( "test" )
|
88
|
+
class << fcf
|
89
|
+
attr_accessor :summary_reporter
|
90
|
+
def filters
|
91
|
+
[ create_chain( :sub_filters, nil, :main ) ]
|
92
|
+
end
|
93
|
+
def listeners
|
94
|
+
super.tap do |ll|
|
95
|
+
@summary_reporter = ll[1] #FIXME: Brittle
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def sub_filters
|
100
|
+
[ 6, 4, 6, 6 ].map { |p| RandomFilter.new( p ) }
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
fcf.filter do |chain|
|
105
|
+
100.times do |t|
|
106
|
+
map = UniMap.new
|
107
|
+
map.tkey = t
|
108
|
+
chain.filter( map )
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
assert_equal( 100, fcf.summary_reporter.total_count )
|
113
|
+
|
114
|
+
end
|
115
|
+
|
86
116
|
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'iudex-filter'
|
23
|
+
require 'iudex-filter/proc_filter'
|
24
|
+
|
25
|
+
class TestSubListFilter < MiniTest::Unit::TestCase
|
26
|
+
include Iudex::Filter
|
27
|
+
include Iudex::Filter::Core
|
28
|
+
include Gravitext::HTMap
|
29
|
+
|
30
|
+
SLIST = UniMap.create_key( 'slist', Java::java.util.List )
|
31
|
+
SVAL = UniMap.create_key( 'sval' )
|
32
|
+
|
33
|
+
UniMap.define_accessors
|
34
|
+
|
35
|
+
def test_remove_rejects
|
36
|
+
( -1 .. 2 ).each do |c|
|
37
|
+
map = UniMap.new
|
38
|
+
map.slist = ( [ UniMap.new ] * c if c >= 0 )
|
39
|
+
f = sublist_filter( [ fltr { :reject } ] )
|
40
|
+
f.filter( map )
|
41
|
+
|
42
|
+
refute map.slist
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_do_not_remove_rejects_when_set
|
47
|
+
[ -1, 1, 2 ].each do |c|
|
48
|
+
map = UniMap.new
|
49
|
+
map.slist = pre = ( [ UniMap.new ] * c if c >= 0 )
|
50
|
+
f = sublist_filter( [ fltr { :reject } ], false )
|
51
|
+
f.filter( map )
|
52
|
+
|
53
|
+
assert_equal( pre, map.slist )
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_do_not_remove_accepted
|
58
|
+
[ -1, 1, 2 ].each do |c|
|
59
|
+
map = UniMap.new
|
60
|
+
map.slist = pre = ( [ UniMap.new ] * c if c >= 0 )
|
61
|
+
f = sublist_filter( [] ) #accept all
|
62
|
+
f.filter( map )
|
63
|
+
|
64
|
+
assert_equal( pre, map.slist )
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_modify
|
69
|
+
( -1 .. 2 ).each do |c|
|
70
|
+
map = UniMap.new
|
71
|
+
map.slist = pre = ( [ UniMap.new ] * c if c >= 0 )
|
72
|
+
f = sublist_filter( [ fltr { |s| s.sval = :mod } ] )
|
73
|
+
f.filter( map )
|
74
|
+
|
75
|
+
assert_equal( Array( pre ).length,
|
76
|
+
Array( map.slist ).length )
|
77
|
+
|
78
|
+
Array( map.slist ).each do |s|
|
79
|
+
assert_equal( :mod, s.sval )
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def sublist_filter( sfilters, remove_rejects = true )
|
85
|
+
SubListFilter.new( SLIST,
|
86
|
+
FilterChain.new( "sub", sfilters ),
|
87
|
+
remove_rejects )
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: iudex-filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.
|
5
|
+
version: 1.1.0
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- David Kellum
|
@@ -10,8 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
14
|
-
default_executable:
|
13
|
+
date: 2011-11-13 00:00:00 Z
|
15
14
|
dependencies:
|
16
15
|
- !ruby/object:Gem::Dependency
|
17
16
|
name: rjack-slf4j
|
@@ -32,7 +31,7 @@ dependencies:
|
|
32
31
|
requirements:
|
33
32
|
- - ~>
|
34
33
|
- !ruby/object:Gem::Version
|
35
|
-
version: 1.5.
|
34
|
+
version: 1.5.1
|
36
35
|
type: :runtime
|
37
36
|
version_requirements: *id002
|
38
37
|
- !ruby/object:Gem::Dependency
|
@@ -41,12 +40,9 @@ dependencies:
|
|
41
40
|
requirement: &id003 !ruby/object:Gem::Requirement
|
42
41
|
none: false
|
43
42
|
requirements:
|
44
|
-
- -
|
45
|
-
- !ruby/object:Gem::Version
|
46
|
-
version: 1.7.1
|
47
|
-
- - <
|
43
|
+
- - ~>
|
48
44
|
- !ruby/object:Gem::Version
|
49
|
-
version: "2.
|
45
|
+
version: "2.3"
|
50
46
|
type: :development
|
51
47
|
version_requirements: *id003
|
52
48
|
- !ruby/object:Gem::Dependency
|
@@ -68,7 +64,7 @@ dependencies:
|
|
68
64
|
requirements:
|
69
65
|
- - ~>
|
70
66
|
- !ruby/object:Gem::Version
|
71
|
-
version: 1.
|
67
|
+
version: 1.4.0
|
72
68
|
type: :development
|
73
69
|
version_requirements: *id005
|
74
70
|
description: |-
|
@@ -102,8 +98,9 @@ files:
|
|
102
98
|
- test/test_filter_base.rb
|
103
99
|
- test/test_filter_chain_factory.rb
|
104
100
|
- test/test_proc_filter.rb
|
105
|
-
-
|
106
|
-
|
101
|
+
- test/test_sublist_filter.rb
|
102
|
+
- lib/iudex-filter/iudex-filter-1.1.0.jar
|
103
|
+
- .gemtest
|
107
104
|
homepage: http://github.com/dekellum/iudex
|
108
105
|
licenses: []
|
109
106
|
|
@@ -128,11 +125,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
125
|
requirements: []
|
129
126
|
|
130
127
|
rubyforge_project: iudex-filter
|
131
|
-
rubygems_version: 1.
|
128
|
+
rubygems_version: 1.8.9
|
132
129
|
signing_key:
|
133
130
|
specification_version: 3
|
134
131
|
summary: Iudex is a general purpose web crawler and feed processor in ruby/java
|
135
132
|
test_files:
|
136
133
|
- test/test_proc_filter.rb
|
134
|
+
- test/test_sublist_filter.rb
|
137
135
|
- test/test_filter_base.rb
|
138
136
|
- test/test_filter_chain_factory.rb
|