iudex-filter 1.0.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.rdoc +2 -0
- data/Manifest.txt +17 -0
- data/README.rdoc +25 -0
- data/Rakefile +40 -0
- data/lib/iudex-filter/base.rb +23 -0
- data/lib/iudex-filter/by_filter_logger.rb +74 -0
- data/lib/iudex-filter/filter_base.rb +46 -0
- data/lib/iudex-filter/filter_chain_factory.rb +171 -0
- data/lib/iudex-filter/iudex-filter-1.0.0.jar +0 -0
- data/lib/iudex-filter/key_helper.rb +56 -0
- data/lib/iudex-filter/proc_filter.rb +50 -0
- data/lib/iudex-filter.rb +52 -0
- data/pom.xml +56 -0
- data/test/setup.rb +34 -0
- data/test/test_filter_base.rb +42 -0
- data/test/test_filter_chain_factory.rb +86 -0
- data/test/test_proc_filter.rb +65 -0
- metadata +138 -0
data/History.rdoc
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
History.rdoc
|
2
|
+
Manifest.txt
|
3
|
+
README.rdoc
|
4
|
+
Rakefile
|
5
|
+
pom.xml
|
6
|
+
lib/iudex-filter/base.rb
|
7
|
+
lib/iudex-filter.rb
|
8
|
+
lib/iudex-filter/by_filter_logger.rb
|
9
|
+
lib/iudex-filter/filter_base.rb
|
10
|
+
lib/iudex-filter/filter_chain_factory.rb
|
11
|
+
lib/iudex-filter/key_helper.rb
|
12
|
+
lib/iudex-filter/proc_filter.rb
|
13
|
+
test/setup.rb
|
14
|
+
test/test_filter_base.rb
|
15
|
+
test/test_filter_chain_factory.rb
|
16
|
+
test/test_proc_filter.rb
|
17
|
+
lib/iudex-filter/iudex-filter-1.0.0.jar
|
data/README.rdoc
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
= iudex-filter
|
2
|
+
|
3
|
+
* http://github.com/dekellum/iudex
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Iudex is a general purpose web crawler and feed processor in
|
8
|
+
ruby/java. The iudex-filter gem contains a fundamental filtering/chain
|
9
|
+
of responsbility sub-system.
|
10
|
+
|
11
|
+
== License
|
12
|
+
|
13
|
+
Copyright (c) 2008-2011 David Kellum
|
14
|
+
|
15
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you
|
16
|
+
may not use this file except in compliance with the License. You
|
17
|
+
may obtain a copy of the License at:
|
18
|
+
|
19
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
20
|
+
|
21
|
+
Unless required by applicable law or agreed to in writing, software
|
22
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
23
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
24
|
+
implied. See the License for the specific language governing
|
25
|
+
permissions and limitations under the License.
|
data/Rakefile
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
$LOAD_PATH << './lib'
|
4
|
+
require 'iudex-filter/base'
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
gem 'rjack-tarpit', '~> 1.2'
|
8
|
+
require 'rjack-tarpit'
|
9
|
+
|
10
|
+
t = RJack::TarPit.new( 'iudex-filter',
|
11
|
+
Iudex::Filter::VERSION,
|
12
|
+
:no_assembly, :java_platform )
|
13
|
+
|
14
|
+
t.specify do |h|
|
15
|
+
h.developer( "David Kellum", "dek-oss@gravitext.com" )
|
16
|
+
h.extra_deps += [ [ 'rjack-slf4j', '~> 1.6.1' ],
|
17
|
+
[ 'gravitext-util', '~> 1.5.0' ] ]
|
18
|
+
|
19
|
+
h.testlib = :minitest
|
20
|
+
h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
|
21
|
+
[ 'rjack-logback', '>= 1.0' ] ]
|
22
|
+
end
|
23
|
+
|
24
|
+
file 'Manifest.txt' => "lib/#{t.name}/base.rb"
|
25
|
+
|
26
|
+
task :check_pom_version do
|
27
|
+
t.test_line_match( 'pom.xml', /<version>/, /#{t.version}/ )
|
28
|
+
end
|
29
|
+
task :check_history_version do
|
30
|
+
t.test_line_match( 'History.rdoc', /^==/, / #{t.version} / )
|
31
|
+
end
|
32
|
+
task :check_history_date do
|
33
|
+
t.test_line_match( 'History.rdoc', /^==/, /\([0-9\-]+\)$/ )
|
34
|
+
end
|
35
|
+
|
36
|
+
task :gem => [ :check_pom_version, :check_history_version ]
|
37
|
+
task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
|
38
|
+
task :push => [ :check_history_date ]
|
39
|
+
|
40
|
+
t.define_tasks
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
module Iudex
|
18
|
+
module Filter
|
19
|
+
VERSION = '1.0.0'
|
20
|
+
|
21
|
+
LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-filter'
|
18
|
+
require 'stringio'
|
19
|
+
|
20
|
+
module Iudex
|
21
|
+
module Filter
|
22
|
+
module Core
|
23
|
+
|
24
|
+
class ByFilterLogger
|
25
|
+
include ByFilterReporter::ReportWriter
|
26
|
+
|
27
|
+
import 'com.gravitext.util.Metric'
|
28
|
+
|
29
|
+
def initialize( desc, index )
|
30
|
+
@log = RJack::SLF4J[ "iudex.filter.core.ByFilterLogger.#{desc}" ]
|
31
|
+
@index = index
|
32
|
+
@nlength = index.filters.map { |f| index.name( f ).length }.max
|
33
|
+
end
|
34
|
+
|
35
|
+
def report( total, delta, duration_ns, counters )
|
36
|
+
out = StringIO.new
|
37
|
+
|
38
|
+
out << "Report total: %s ::\n" % [ fmt( total ) ]
|
39
|
+
out << ( " %-#{@nlength}s %6s %5s %6s %6s" %
|
40
|
+
%w{ Filter Accept % Reject Failed } )
|
41
|
+
|
42
|
+
accepted = total
|
43
|
+
@index.filters.each do |f|
|
44
|
+
c = counters[ f ]
|
45
|
+
d = dropped( c )
|
46
|
+
if d > 0
|
47
|
+
p = prc( -d, accepted )
|
48
|
+
accepted -= d
|
49
|
+
out << ( "\n %-#{@nlength}s %6s %4.0f%% %6s %6s" %
|
50
|
+
[ @index.name( f ),
|
51
|
+
fmt( accepted ), p,
|
52
|
+
fmt( c.rejected ), fmt( c.failed ) ] )
|
53
|
+
end
|
54
|
+
end
|
55
|
+
@log.info( out.string )
|
56
|
+
end
|
57
|
+
|
58
|
+
def dropped( c )
|
59
|
+
c.rejected + c.failed
|
60
|
+
end
|
61
|
+
|
62
|
+
def fmt( v )
|
63
|
+
Metric::format( v )
|
64
|
+
end
|
65
|
+
|
66
|
+
def prc( v, t )
|
67
|
+
( t > 0 ) ? v.to_f / t * 100.0 : 0.0
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-filter'
|
18
|
+
|
19
|
+
module Iudex::Filter
|
20
|
+
|
21
|
+
# Default implementation of Filter, Described, and Named interfaces
|
22
|
+
class FilterBase
|
23
|
+
include Filter
|
24
|
+
include Described
|
25
|
+
include Named
|
26
|
+
|
27
|
+
# Returns empty list
|
28
|
+
def describe
|
29
|
+
[]
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns abbreviated/lower case module names plus class name, in
|
33
|
+
# dot notation.
|
34
|
+
def name
|
35
|
+
n = self.class.name
|
36
|
+
n.gsub!( /::/, '.' )
|
37
|
+
n.gsub( /(\w)\w+\./ ) { |m| $1.downcase + '.' }
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns true
|
41
|
+
def filter( map )
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-filter'
|
18
|
+
require 'iudex-filter/key_helper'
|
19
|
+
require 'iudex-filter/by_filter_logger'
|
20
|
+
|
21
|
+
module Iudex
|
22
|
+
module Filter
|
23
|
+
module Core
|
24
|
+
|
25
|
+
class FilterChainFactory
|
26
|
+
attr_reader :description
|
27
|
+
|
28
|
+
include KeyHelper
|
29
|
+
|
30
|
+
def initialize( description = "default" )
|
31
|
+
@description = description
|
32
|
+
|
33
|
+
@log = RJack::SLF4J[ [ RJack::SLF4J.to_log_name( self.class ),
|
34
|
+
description ].join('.') ]
|
35
|
+
|
36
|
+
@summary_period = nil
|
37
|
+
@by_filter_period = nil
|
38
|
+
|
39
|
+
@index = nil
|
40
|
+
@chain = nil
|
41
|
+
@listener = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_summary_reporter( period_s = 10.0 )
|
45
|
+
@summary_period = period_s
|
46
|
+
end
|
47
|
+
|
48
|
+
def add_by_filter_reporter( period_s = 60 * 10.0 )
|
49
|
+
@by_filter_period = period_s
|
50
|
+
end
|
51
|
+
|
52
|
+
def open
|
53
|
+
close if open?
|
54
|
+
|
55
|
+
@index = FilterIndex.new
|
56
|
+
|
57
|
+
flts = filters
|
58
|
+
log_and_register( flts )
|
59
|
+
|
60
|
+
@listener = ListenerChain.new( listeners )
|
61
|
+
@chain = create_chain( @description, flts )
|
62
|
+
@chain.listener = @listener
|
63
|
+
|
64
|
+
# With all filters loaded and thus key references, make sure
|
65
|
+
# UniMap accessors are defined (for ruby filters)
|
66
|
+
Gravitext::HTMap::UniMap.define_accessors
|
67
|
+
|
68
|
+
nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def open?
|
72
|
+
@chain != nil
|
73
|
+
end
|
74
|
+
|
75
|
+
def close
|
76
|
+
if @chain
|
77
|
+
@chain.close
|
78
|
+
@chain = nil
|
79
|
+
end
|
80
|
+
|
81
|
+
if @listener
|
82
|
+
@listener.close
|
83
|
+
@listener = nil
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Yields chain to block, bounded by open/close if not already open
|
88
|
+
def filter
|
89
|
+
opened = unless open?
|
90
|
+
open
|
91
|
+
true
|
92
|
+
end
|
93
|
+
|
94
|
+
yield @chain
|
95
|
+
|
96
|
+
ensure
|
97
|
+
close if opened
|
98
|
+
end
|
99
|
+
|
100
|
+
def filters
|
101
|
+
[]
|
102
|
+
end
|
103
|
+
|
104
|
+
def log_listener( desc )
|
105
|
+
LogListener.new( "iudex.filter.core.FilterChain.#{desc}", @index )
|
106
|
+
end
|
107
|
+
|
108
|
+
def listeners
|
109
|
+
ll = [ log_listener( @description ) ]
|
110
|
+
|
111
|
+
if @summary_period
|
112
|
+
ll << SummaryReporter.new( @description, @summary_period )
|
113
|
+
end
|
114
|
+
|
115
|
+
if @by_filter_period
|
116
|
+
ll << ByFilterReporter.new( @index,
|
117
|
+
ByFilterLogger.new( @description, @index ),
|
118
|
+
@by_filter_period )
|
119
|
+
end
|
120
|
+
ll
|
121
|
+
end
|
122
|
+
|
123
|
+
# Create, yield to optional block, and return FilterChain if
|
124
|
+
# flts is not empty. Otherwise return a NoOpFilter and don't
|
125
|
+
# yield. If passed a single Symbol argument, will use both
|
126
|
+
# as description and method to obtain flts array from.
|
127
|
+
def create_chain( desc, flts = nil )
|
128
|
+
|
129
|
+
if desc.is_a?( Symbol )
|
130
|
+
flts = send( desc ) unless flts
|
131
|
+
desc = desc.to_s.gsub( /_/, '-' )
|
132
|
+
end
|
133
|
+
|
134
|
+
if flts.nil? || flts.empty?
|
135
|
+
NoOpFilter.new
|
136
|
+
else
|
137
|
+
c = FilterChain.new( desc, flts )
|
138
|
+
c.listener = log_listener( desc )
|
139
|
+
yield c if block_given?
|
140
|
+
c
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# Create a new Switch given selector key and map of values to
|
145
|
+
# filters.
|
146
|
+
def create_switch( key, value_filters_map )
|
147
|
+
switch = Switch.new
|
148
|
+
value_filters_map.each do |value, filters|
|
149
|
+
create_chain( value.to_s.downcase, filters ) do |chain|
|
150
|
+
switch.add_proposition( Selector.new( key, value ), chain )
|
151
|
+
end
|
152
|
+
end
|
153
|
+
switch
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def log_and_register( filters, depth = 0 )
|
159
|
+
filters.each do |filter|
|
160
|
+
name = @index.register( filter )
|
161
|
+
@log.info { "<< " + " " * depth + name }
|
162
|
+
if filter.kind_of?( FilterContainer )
|
163
|
+
log_and_register( filter.children, depth + 1 )
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
Binary file
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-filter'
|
18
|
+
|
19
|
+
class Symbol
|
20
|
+
def to_k
|
21
|
+
Iudex::Filter::KeyHelper.lookup_key( self.to_s )
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class Gravitext::HTMap::Key
|
26
|
+
def to_k
|
27
|
+
self
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
module Iudex
|
32
|
+
module Filter
|
33
|
+
|
34
|
+
# Mixin module support for UniMap Keys
|
35
|
+
module KeyHelper
|
36
|
+
|
37
|
+
# Lookup matching Key in UniMap::KEY_SPACE
|
38
|
+
def self.lookup_key( name )
|
39
|
+
lookup_key_space( name )
|
40
|
+
end
|
41
|
+
|
42
|
+
# Lookup matching Key in UniMap::KEY_SPACE
|
43
|
+
def self.lookup_key_space( name )
|
44
|
+
Gravitext::HTMap::UniMap::KEY_SPACE.get( name ) or
|
45
|
+
raise( "Key #{name} not found" )
|
46
|
+
end
|
47
|
+
|
48
|
+
# Map Symbols to Keys
|
49
|
+
def keys( *syms )
|
50
|
+
syms = syms[0] if ( syms[0] && syms[0].respond_to?( :each ) )
|
51
|
+
syms.map { |s| s.to_k }.uniq
|
52
|
+
end
|
53
|
+
module_function :keys
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You may
|
6
|
+
# obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'iudex-filter/filter_base'
|
18
|
+
|
19
|
+
module Iudex::Filter
|
20
|
+
|
21
|
+
# Short hand for ProcFilter.new
|
22
|
+
def fltr( &block )
|
23
|
+
ProcFilter.new( caller.first, &block )
|
24
|
+
end
|
25
|
+
|
26
|
+
class ProcFilter < FilterBase
|
27
|
+
|
28
|
+
# New ProcFilter using block as implmentation of
|
29
|
+
# Filter.filter( map ). The created filter will only return false
|
30
|
+
# (reject map, stop chain) if the block returns the :reject
|
31
|
+
# symbol.
|
32
|
+
def initialize( clr = nil, &block )
|
33
|
+
@block = block
|
34
|
+
|
35
|
+
clr ||= caller.first
|
36
|
+
clr = clr.split( /:/ )
|
37
|
+
@description = [ File.basename( clr[0], ".rb" ), clr[1].to_i ]
|
38
|
+
#FIXME: When ruby 1.9, can use Proc.source_location instead.
|
39
|
+
end
|
40
|
+
|
41
|
+
def describe
|
42
|
+
@description
|
43
|
+
end
|
44
|
+
|
45
|
+
def filter( map )
|
46
|
+
( @block.call( map ) != :reject )
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
data/lib/iudex-filter.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'rjack-slf4j'
|
18
|
+
require 'gravitext-util'
|
19
|
+
|
20
|
+
require 'iudex-filter/base'
|
21
|
+
|
22
|
+
require 'java'
|
23
|
+
|
24
|
+
module Iudex
|
25
|
+
module Filter
|
26
|
+
require "#{LIB_DIR}/iudex-filter-#{VERSION}.jar"
|
27
|
+
|
28
|
+
import 'iudex.filter.Filter'
|
29
|
+
import 'iudex.filter.FilterContainer'
|
30
|
+
import 'iudex.filter.Described'
|
31
|
+
import 'iudex.filter.Named'
|
32
|
+
import 'iudex.filter.NoOpFilter'
|
33
|
+
|
34
|
+
module Core
|
35
|
+
import 'iudex.filter.core.ByFilterReporter'
|
36
|
+
import 'iudex.filter.core.Copier'
|
37
|
+
import 'iudex.filter.core.FilterChain'
|
38
|
+
import 'iudex.filter.core.FilterIndex'
|
39
|
+
import 'iudex.filter.core.ListenerChain'
|
40
|
+
import 'iudex.filter.core.LogListener'
|
41
|
+
import 'iudex.filter.core.MDCSetter'
|
42
|
+
import 'iudex.filter.core.MDCUnsetter'
|
43
|
+
import 'iudex.filter.core.Selector'
|
44
|
+
import 'iudex.filter.core.Setter'
|
45
|
+
import 'iudex.filter.core.SummaryReporter'
|
46
|
+
import 'iudex.filter.core.Switch'
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
require 'iudex-filter/filter_base'
|
52
|
+
require 'iudex-filter/proc_filter'
|
data/pom.xml
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
3
|
+
|
4
|
+
<modelVersion>4.0.0</modelVersion>
|
5
|
+
<groupId>iudex</groupId>
|
6
|
+
<artifactId>iudex-filter</artifactId>
|
7
|
+
<packaging>jar</packaging>
|
8
|
+
<version>1.0.0</version>
|
9
|
+
<name>Iudex Filter</name>
|
10
|
+
|
11
|
+
<parent>
|
12
|
+
<groupId>iudex</groupId>
|
13
|
+
<artifactId>iudex-parent</artifactId>
|
14
|
+
<version>1.0</version>
|
15
|
+
<relativePath>..</relativePath>
|
16
|
+
</parent>
|
17
|
+
|
18
|
+
<dependencies>
|
19
|
+
|
20
|
+
<dependency>
|
21
|
+
<groupId>org.slf4j</groupId>
|
22
|
+
<artifactId>slf4j-api</artifactId>
|
23
|
+
</dependency>
|
24
|
+
|
25
|
+
<dependency>
|
26
|
+
<groupId>com.gravitext</groupId>
|
27
|
+
<artifactId>gravitext-util</artifactId>
|
28
|
+
</dependency>
|
29
|
+
|
30
|
+
<dependency>
|
31
|
+
<groupId>junit</groupId>
|
32
|
+
<artifactId>junit</artifactId>
|
33
|
+
</dependency>
|
34
|
+
|
35
|
+
<dependency>
|
36
|
+
<groupId>ch.qos.logback</groupId>
|
37
|
+
<artifactId>logback-classic</artifactId>
|
38
|
+
<scope>test</scope>
|
39
|
+
</dependency>
|
40
|
+
|
41
|
+
</dependencies>
|
42
|
+
|
43
|
+
<build>
|
44
|
+
<plugins>
|
45
|
+
<plugin>
|
46
|
+
<!-- Parent settings -->
|
47
|
+
<artifactId>maven-compiler-plugin</artifactId>
|
48
|
+
</plugin>
|
49
|
+
<plugin>
|
50
|
+
<!-- Parent settings -->
|
51
|
+
<artifactId>maven-source-plugin</artifactId>
|
52
|
+
</plugin>
|
53
|
+
</plugins>
|
54
|
+
</build>
|
55
|
+
|
56
|
+
</project>
|
data/test/setup.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008-2011 David Kellum
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
5
|
+
# may not use this file except in compliance with the License. You
|
6
|
+
# may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
13
|
+
# implied. See the License for the specific language governing
|
14
|
+
# permissions and limitations under the License.
|
15
|
+
#++
|
16
|
+
|
17
|
+
#### General test setup: LOAD_PATH, logging, console output ####
|
18
|
+
|
19
|
+
ldir = File.join( File.dirname( __FILE__ ), "..", "lib" )
|
20
|
+
$LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
|
21
|
+
|
22
|
+
require 'rubygems'
|
23
|
+
require 'rjack-logback'
|
24
|
+
RJack::Logback.config_console( :stderr => true )
|
25
|
+
|
26
|
+
require 'minitest/unit'
|
27
|
+
require 'minitest/autorun'
|
28
|
+
|
29
|
+
# Make test output logging compatible: no partial lines.
|
30
|
+
class TestOut
|
31
|
+
def print( *a ); $stdout.puts( *a ); end
|
32
|
+
def puts( *a ); $stdout.puts( *a ); end
|
33
|
+
end
|
34
|
+
MiniTest::Unit.output = TestOut.new
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'gravitext-util'
|
23
|
+
require 'iudex-filter/proc_filter'
|
24
|
+
|
25
|
+
class TestFilter < Iudex::Filter::FilterBase
|
26
|
+
end
|
27
|
+
|
28
|
+
class TestFilterBase < MiniTest::Unit::TestCase
|
29
|
+
include Iudex::Filter
|
30
|
+
include Gravitext::HTMap
|
31
|
+
|
32
|
+
def test_base_name
|
33
|
+
f = FilterBase.new
|
34
|
+
assert_equal( "i.f.FilterBase", f.name )
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_top_level_name
|
38
|
+
f = TestFilter.new
|
39
|
+
assert_equal( "TestFilter", f.name )
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
RJack::Logback.config_console( :stderr => true, :mdc => "tkey" )
|
23
|
+
|
24
|
+
# RJack::Logback[ "iudex.filter.core.FilterChain.test.reject" ].level = RJack::Logback::DEBUG
|
25
|
+
|
26
|
+
require 'gravitext-util'
|
27
|
+
require 'iudex-filter/filter_chain_factory'
|
28
|
+
|
29
|
+
class TestFilterChainFactory < MiniTest::Unit::TestCase
|
30
|
+
include Iudex::Filter
|
31
|
+
include Iudex::Filter::Core
|
32
|
+
|
33
|
+
include Gravitext::HTMap
|
34
|
+
|
35
|
+
import 'iudex.filter.core.MDCSetter'
|
36
|
+
import 'iudex.filter.core.MDCUnsetter'
|
37
|
+
|
38
|
+
TKEY = UniMap.create_key( 'tkey' );
|
39
|
+
|
40
|
+
class RandomFilter < FilterBase
|
41
|
+
|
42
|
+
def initialize( odds = 2 )
|
43
|
+
@odds = odds
|
44
|
+
end
|
45
|
+
|
46
|
+
def describe
|
47
|
+
[ @odds ]
|
48
|
+
end
|
49
|
+
|
50
|
+
def filter( map )
|
51
|
+
rand( @odds ) != 0
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_filter_chain
|
56
|
+
fcf = FilterChainFactory.new( "test" )
|
57
|
+
fcf.add_summary_reporter( 1.0 )
|
58
|
+
fcf.add_by_filter_reporter( 2.5 )
|
59
|
+
|
60
|
+
def fcf.filters
|
61
|
+
[ MDCSetter.new( TKEY ) ] + super +
|
62
|
+
[ 6, 4, 6, 6 ].map { |p| RandomFilter.new( p ) }
|
63
|
+
end
|
64
|
+
|
65
|
+
def fcf.listeners
|
66
|
+
super + [ MDCUnsetter.new( TKEY ) ]
|
67
|
+
end
|
68
|
+
|
69
|
+
2.times do |r|
|
70
|
+
assert( ! fcf.open? )
|
71
|
+
|
72
|
+
fcf.filter do |chain|
|
73
|
+
1000.times do |t|
|
74
|
+
sleep( rand( 10 ) / 1000.0 / ( r + 1 ) )
|
75
|
+
map = UniMap.new
|
76
|
+
map.tkey = t
|
77
|
+
chain.filter( map )
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
assert( ! fcf.open? )
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
#.hashdot.profile += jruby-shortlived
|
3
|
+
|
4
|
+
#--
|
5
|
+
# Copyright (c) 2008-2011 David Kellum
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
8
|
+
# may not use this file except in compliance with the License. You
|
9
|
+
# may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
16
|
+
# implied. See the License for the specific language governing
|
17
|
+
# permissions and limitations under the License.
|
18
|
+
#++
|
19
|
+
|
20
|
+
require File.join( File.dirname( __FILE__ ), "setup" )
|
21
|
+
|
22
|
+
require 'gravitext-util'
|
23
|
+
require 'iudex-filter/proc_filter'
|
24
|
+
|
25
|
+
class TestProcFilter < MiniTest::Unit::TestCase
|
26
|
+
include Iudex::Filter
|
27
|
+
include Gravitext::HTMap
|
28
|
+
|
29
|
+
UniMap.create_key( 'mkey' )
|
30
|
+
UniMap.define_accessors
|
31
|
+
|
32
|
+
def test_describe
|
33
|
+
assert_equal( [ 'test_proc_filter', __LINE__ ], fltr {}.describe )
|
34
|
+
assert_equal( [ 'test_proc_filter', __LINE__ ], ProcFilter.new {}.describe )
|
35
|
+
assert_equal( [ 'test_proc_filter', __LINE__ ], ProcFilter.new {}.describe )
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_name
|
39
|
+
index = Core::FilterIndex.new
|
40
|
+
name, line = index.register( fltr {} ), __LINE__
|
41
|
+
assert_equal( "i.f.ProcFilter-test_proc_filter-#{line}", name )
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_return
|
45
|
+
assert do_f( fltr { } )
|
46
|
+
assert do_f( fltr { nil } )
|
47
|
+
assert do_f( fltr { true } )
|
48
|
+
assert do_f( fltr { :other_sym } )
|
49
|
+
assert do_f( fltr { false } ) # Consequence
|
50
|
+
|
51
|
+
refute do_f( fltr { :reject } )
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_mutate
|
55
|
+
map = UniMap.new
|
56
|
+
map.mkey = :initial
|
57
|
+
assert do_f( fltr { |m| m.mkey = :mutated }, map )
|
58
|
+
assert_equal( :mutated, map.mkey )
|
59
|
+
end
|
60
|
+
|
61
|
+
def do_f( f, m = UniMap.new )
|
62
|
+
f.filter( m )
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
metadata
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: iudex-filter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 1.0.0
|
6
|
+
platform: java
|
7
|
+
authors:
|
8
|
+
- David Kellum
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-04-04 00:00:00 -07:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rjack-slf4j
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 1.6.1
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: gravitext-util
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ~>
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 1.5.0
|
36
|
+
type: :runtime
|
37
|
+
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: minitest
|
40
|
+
prerelease: false
|
41
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 1.7.1
|
47
|
+
- - <
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: "2.1"
|
50
|
+
type: :development
|
51
|
+
version_requirements: *id003
|
52
|
+
- !ruby/object:Gem::Dependency
|
53
|
+
name: rjack-logback
|
54
|
+
prerelease: false
|
55
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "1.0"
|
61
|
+
type: :development
|
62
|
+
version_requirements: *id004
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: rjack-tarpit
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ~>
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: 1.3.0
|
72
|
+
type: :development
|
73
|
+
version_requirements: *id005
|
74
|
+
description: |-
|
75
|
+
Iudex is a general purpose web crawler and feed processor in
|
76
|
+
ruby/java. The iudex-filter gem contains a fundamental filtering/chain
|
77
|
+
of responsbility sub-system.
|
78
|
+
email:
|
79
|
+
- dek-oss@gravitext.com
|
80
|
+
executables: []
|
81
|
+
|
82
|
+
extensions: []
|
83
|
+
|
84
|
+
extra_rdoc_files:
|
85
|
+
- Manifest.txt
|
86
|
+
- History.rdoc
|
87
|
+
- README.rdoc
|
88
|
+
files:
|
89
|
+
- History.rdoc
|
90
|
+
- Manifest.txt
|
91
|
+
- README.rdoc
|
92
|
+
- Rakefile
|
93
|
+
- pom.xml
|
94
|
+
- lib/iudex-filter/base.rb
|
95
|
+
- lib/iudex-filter.rb
|
96
|
+
- lib/iudex-filter/by_filter_logger.rb
|
97
|
+
- lib/iudex-filter/filter_base.rb
|
98
|
+
- lib/iudex-filter/filter_chain_factory.rb
|
99
|
+
- lib/iudex-filter/key_helper.rb
|
100
|
+
- lib/iudex-filter/proc_filter.rb
|
101
|
+
- test/setup.rb
|
102
|
+
- test/test_filter_base.rb
|
103
|
+
- test/test_filter_chain_factory.rb
|
104
|
+
- test/test_proc_filter.rb
|
105
|
+
- lib/iudex-filter/iudex-filter-1.0.0.jar
|
106
|
+
has_rdoc: true
|
107
|
+
homepage: http://github.com/dekellum/iudex
|
108
|
+
licenses: []
|
109
|
+
|
110
|
+
post_install_message:
|
111
|
+
rdoc_options:
|
112
|
+
- --main
|
113
|
+
- README.rdoc
|
114
|
+
require_paths:
|
115
|
+
- lib
|
116
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
117
|
+
none: false
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: "0"
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
124
|
+
requirements:
|
125
|
+
- - ">="
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: "0"
|
128
|
+
requirements: []
|
129
|
+
|
130
|
+
rubyforge_project: iudex-filter
|
131
|
+
rubygems_version: 1.5.1
|
132
|
+
signing_key:
|
133
|
+
specification_version: 3
|
134
|
+
summary: Iudex is a general purpose web crawler and feed processor in ruby/java
|
135
|
+
test_files:
|
136
|
+
- test/test_proc_filter.rb
|
137
|
+
- test/test_filter_base.rb
|
138
|
+
- test/test_filter_chain_factory.rb
|