iudex-filter 1.0.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.rdoc ADDED
@@ -0,0 +1,2 @@
1
+ === 1.0.0 (2011-04-04)
2
+ * Initial release.
data/Manifest.txt ADDED
@@ -0,0 +1,17 @@
1
+ History.rdoc
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ pom.xml
6
+ lib/iudex-filter/base.rb
7
+ lib/iudex-filter.rb
8
+ lib/iudex-filter/by_filter_logger.rb
9
+ lib/iudex-filter/filter_base.rb
10
+ lib/iudex-filter/filter_chain_factory.rb
11
+ lib/iudex-filter/key_helper.rb
12
+ lib/iudex-filter/proc_filter.rb
13
+ test/setup.rb
14
+ test/test_filter_base.rb
15
+ test/test_filter_chain_factory.rb
16
+ test/test_proc_filter.rb
17
+ lib/iudex-filter/iudex-filter-1.0.0.jar
data/README.rdoc ADDED
@@ -0,0 +1,25 @@
1
+ = iudex-filter
2
+
3
+ * http://github.com/dekellum/iudex
4
+
5
+ == Description
6
+
7
+ Iudex is a general purpose web crawler and feed processor in
8
+ ruby/java. The iudex-filter gem contains a fundamental filtering/chain
9
+ of responsbility sub-system.
10
+
11
+ == License
12
+
13
+ Copyright (c) 2008-2011 David Kellum
14
+
15
+ Licensed under the Apache License, Version 2.0 (the "License"); you
16
+ may not use this file except in compliance with the License. You
17
+ may obtain a copy of the License at:
18
+
19
+ http://www.apache.org/licenses/LICENSE-2.0
20
+
21
+ Unless required by applicable law or agreed to in writing, software
22
+ distributed under the License is distributed on an "AS IS" BASIS,
23
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
24
+ implied. See the License for the specific language governing
25
+ permissions and limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,40 @@
1
+ # -*- ruby -*-
2
+
3
+ $LOAD_PATH << './lib'
4
+ require 'iudex-filter/base'
5
+
6
+ require 'rubygems'
7
+ gem 'rjack-tarpit', '~> 1.2'
8
+ require 'rjack-tarpit'
9
+
10
+ t = RJack::TarPit.new( 'iudex-filter',
11
+ Iudex::Filter::VERSION,
12
+ :no_assembly, :java_platform )
13
+
14
+ t.specify do |h|
15
+ h.developer( "David Kellum", "dek-oss@gravitext.com" )
16
+ h.extra_deps += [ [ 'rjack-slf4j', '~> 1.6.1' ],
17
+ [ 'gravitext-util', '~> 1.5.0' ] ]
18
+
19
+ h.testlib = :minitest
20
+ h.extra_dev_deps += [ [ 'minitest', '>= 1.7.1', '< 2.1' ],
21
+ [ 'rjack-logback', '>= 1.0' ] ]
22
+ end
23
+
24
+ file 'Manifest.txt' => "lib/#{t.name}/base.rb"
25
+
26
+ task :check_pom_version do
27
+ t.test_line_match( 'pom.xml', /<version>/, /#{t.version}/ )
28
+ end
29
+ task :check_history_version do
30
+ t.test_line_match( 'History.rdoc', /^==/, / #{t.version} / )
31
+ end
32
+ task :check_history_date do
33
+ t.test_line_match( 'History.rdoc', /^==/, /\([0-9\-]+\)$/ )
34
+ end
35
+
36
+ task :gem => [ :check_pom_version, :check_history_version ]
37
+ task :tag => [ :check_pom_version, :check_history_version, :check_history_date ]
38
+ task :push => [ :check_history_date ]
39
+
40
+ t.define_tasks
@@ -0,0 +1,23 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ module Iudex
18
+ module Filter
19
+ VERSION = '1.0.0'
20
+
21
+ LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
+ end
23
+ end
@@ -0,0 +1,74 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-filter'
18
+ require 'stringio'
19
+
20
+ module Iudex
21
+ module Filter
22
+ module Core
23
+
24
+ class ByFilterLogger
25
+ include ByFilterReporter::ReportWriter
26
+
27
+ import 'com.gravitext.util.Metric'
28
+
29
+ def initialize( desc, index )
30
+ @log = RJack::SLF4J[ "iudex.filter.core.ByFilterLogger.#{desc}" ]
31
+ @index = index
32
+ @nlength = index.filters.map { |f| index.name( f ).length }.max
33
+ end
34
+
35
+ def report( total, delta, duration_ns, counters )
36
+ out = StringIO.new
37
+
38
+ out << "Report total: %s ::\n" % [ fmt( total ) ]
39
+ out << ( " %-#{@nlength}s %6s %5s %6s %6s" %
40
+ %w{ Filter Accept % Reject Failed } )
41
+
42
+ accepted = total
43
+ @index.filters.each do |f|
44
+ c = counters[ f ]
45
+ d = dropped( c )
46
+ if d > 0
47
+ p = prc( -d, accepted )
48
+ accepted -= d
49
+ out << ( "\n %-#{@nlength}s %6s %4.0f%% %6s %6s" %
50
+ [ @index.name( f ),
51
+ fmt( accepted ), p,
52
+ fmt( c.rejected ), fmt( c.failed ) ] )
53
+ end
54
+ end
55
+ @log.info( out.string )
56
+ end
57
+
58
+ def dropped( c )
59
+ c.rejected + c.failed
60
+ end
61
+
62
+ def fmt( v )
63
+ Metric::format( v )
64
+ end
65
+
66
+ def prc( v, t )
67
+ ( t > 0 ) ? v.to_f / t * 100.0 : 0.0
68
+ end
69
+
70
+ end
71
+ end
72
+
73
+ end
74
+ end
@@ -0,0 +1,46 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-filter'
18
+
19
+ module Iudex::Filter
20
+
21
+ # Default implementation of Filter, Described, and Named interfaces
22
+ class FilterBase
23
+ include Filter
24
+ include Described
25
+ include Named
26
+
27
+ # Returns empty list
28
+ def describe
29
+ []
30
+ end
31
+
32
+ # Returns abbreviated/lower case module names plus class name, in
33
+ # dot notation.
34
+ def name
35
+ n = self.class.name
36
+ n.gsub!( /::/, '.' )
37
+ n.gsub( /(\w)\w+\./ ) { |m| $1.downcase + '.' }
38
+ end
39
+
40
+ # Returns true
41
+ def filter( map )
42
+ true
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,171 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-filter'
18
+ require 'iudex-filter/key_helper'
19
+ require 'iudex-filter/by_filter_logger'
20
+
21
+ module Iudex
22
+ module Filter
23
+ module Core
24
+
25
+ class FilterChainFactory
26
+ attr_reader :description
27
+
28
+ include KeyHelper
29
+
30
+ def initialize( description = "default" )
31
+ @description = description
32
+
33
+ @log = RJack::SLF4J[ [ RJack::SLF4J.to_log_name( self.class ),
34
+ description ].join('.') ]
35
+
36
+ @summary_period = nil
37
+ @by_filter_period = nil
38
+
39
+ @index = nil
40
+ @chain = nil
41
+ @listener = nil
42
+ end
43
+
44
+ def add_summary_reporter( period_s = 10.0 )
45
+ @summary_period = period_s
46
+ end
47
+
48
+ def add_by_filter_reporter( period_s = 60 * 10.0 )
49
+ @by_filter_period = period_s
50
+ end
51
+
52
+ def open
53
+ close if open?
54
+
55
+ @index = FilterIndex.new
56
+
57
+ flts = filters
58
+ log_and_register( flts )
59
+
60
+ @listener = ListenerChain.new( listeners )
61
+ @chain = create_chain( @description, flts )
62
+ @chain.listener = @listener
63
+
64
+ # With all filters loaded and thus key references, make sure
65
+ # UniMap accessors are defined (for ruby filters)
66
+ Gravitext::HTMap::UniMap.define_accessors
67
+
68
+ nil
69
+ end
70
+
71
+ def open?
72
+ @chain != nil
73
+ end
74
+
75
+ def close
76
+ if @chain
77
+ @chain.close
78
+ @chain = nil
79
+ end
80
+
81
+ if @listener
82
+ @listener.close
83
+ @listener = nil
84
+ end
85
+ end
86
+
87
+ # Yields chain to block, bounded by open/close if not already open
88
+ def filter
89
+ opened = unless open?
90
+ open
91
+ true
92
+ end
93
+
94
+ yield @chain
95
+
96
+ ensure
97
+ close if opened
98
+ end
99
+
100
+ def filters
101
+ []
102
+ end
103
+
104
+ def log_listener( desc )
105
+ LogListener.new( "iudex.filter.core.FilterChain.#{desc}", @index )
106
+ end
107
+
108
+ def listeners
109
+ ll = [ log_listener( @description ) ]
110
+
111
+ if @summary_period
112
+ ll << SummaryReporter.new( @description, @summary_period )
113
+ end
114
+
115
+ if @by_filter_period
116
+ ll << ByFilterReporter.new( @index,
117
+ ByFilterLogger.new( @description, @index ),
118
+ @by_filter_period )
119
+ end
120
+ ll
121
+ end
122
+
123
+ # Create, yield to optional block, and return FilterChain if
124
+ # flts is not empty. Otherwise return a NoOpFilter and don't
125
+ # yield. If passed a single Symbol argument, will use both
126
+ # as description and method to obtain flts array from.
127
+ def create_chain( desc, flts = nil )
128
+
129
+ if desc.is_a?( Symbol )
130
+ flts = send( desc ) unless flts
131
+ desc = desc.to_s.gsub( /_/, '-' )
132
+ end
133
+
134
+ if flts.nil? || flts.empty?
135
+ NoOpFilter.new
136
+ else
137
+ c = FilterChain.new( desc, flts )
138
+ c.listener = log_listener( desc )
139
+ yield c if block_given?
140
+ c
141
+ end
142
+ end
143
+
144
+ # Create a new Switch given selector key and map of values to
145
+ # filters.
146
+ def create_switch( key, value_filters_map )
147
+ switch = Switch.new
148
+ value_filters_map.each do |value, filters|
149
+ create_chain( value.to_s.downcase, filters ) do |chain|
150
+ switch.add_proposition( Selector.new( key, value ), chain )
151
+ end
152
+ end
153
+ switch
154
+ end
155
+
156
+ private
157
+
158
+ def log_and_register( filters, depth = 0 )
159
+ filters.each do |filter|
160
+ name = @index.register( filter )
161
+ @log.info { "<< " + " " * depth + name }
162
+ if filter.kind_of?( FilterContainer )
163
+ log_and_register( filter.children, depth + 1 )
164
+ end
165
+ end
166
+ end
167
+ end
168
+
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,56 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You may
6
+ # obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-filter'
18
+
19
+ class Symbol
20
+ def to_k
21
+ Iudex::Filter::KeyHelper.lookup_key( self.to_s )
22
+ end
23
+ end
24
+
25
+ class Gravitext::HTMap::Key
26
+ def to_k
27
+ self
28
+ end
29
+ end
30
+
31
+ module Iudex
32
+ module Filter
33
+
34
+ # Mixin module support for UniMap Keys
35
+ module KeyHelper
36
+
37
+ # Lookup matching Key in UniMap::KEY_SPACE
38
+ def self.lookup_key( name )
39
+ lookup_key_space( name )
40
+ end
41
+
42
+ # Lookup matching Key in UniMap::KEY_SPACE
43
+ def self.lookup_key_space( name )
44
+ Gravitext::HTMap::UniMap::KEY_SPACE.get( name ) or
45
+ raise( "Key #{name} not found" )
46
+ end
47
+
48
+ # Map Symbols to Keys
49
+ def keys( *syms )
50
+ syms = syms[0] if ( syms[0] && syms[0].respond_to?( :each ) )
51
+ syms.map { |s| s.to_k }.uniq
52
+ end
53
+ module_function :keys
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,50 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You may
6
+ # obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-filter/filter_base'
18
+
19
+ module Iudex::Filter
20
+
21
+ # Short hand for ProcFilter.new
22
+ def fltr( &block )
23
+ ProcFilter.new( caller.first, &block )
24
+ end
25
+
26
+ class ProcFilter < FilterBase
27
+
28
+ # New ProcFilter using block as implmentation of
29
+ # Filter.filter( map ). The created filter will only return false
30
+ # (reject map, stop chain) if the block returns the :reject
31
+ # symbol.
32
+ def initialize( clr = nil, &block )
33
+ @block = block
34
+
35
+ clr ||= caller.first
36
+ clr = clr.split( /:/ )
37
+ @description = [ File.basename( clr[0], ".rb" ), clr[1].to_i ]
38
+ #FIXME: When ruby 1.9, can use Proc.source_location instead.
39
+ end
40
+
41
+ def describe
42
+ @description
43
+ end
44
+
45
+ def filter( map )
46
+ ( @block.call( map ) != :reject )
47
+ end
48
+ end
49
+
50
+ end
@@ -0,0 +1,52 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'rjack-slf4j'
18
+ require 'gravitext-util'
19
+
20
+ require 'iudex-filter/base'
21
+
22
+ require 'java'
23
+
24
+ module Iudex
25
+ module Filter
26
+ require "#{LIB_DIR}/iudex-filter-#{VERSION}.jar"
27
+
28
+ import 'iudex.filter.Filter'
29
+ import 'iudex.filter.FilterContainer'
30
+ import 'iudex.filter.Described'
31
+ import 'iudex.filter.Named'
32
+ import 'iudex.filter.NoOpFilter'
33
+
34
+ module Core
35
+ import 'iudex.filter.core.ByFilterReporter'
36
+ import 'iudex.filter.core.Copier'
37
+ import 'iudex.filter.core.FilterChain'
38
+ import 'iudex.filter.core.FilterIndex'
39
+ import 'iudex.filter.core.ListenerChain'
40
+ import 'iudex.filter.core.LogListener'
41
+ import 'iudex.filter.core.MDCSetter'
42
+ import 'iudex.filter.core.MDCUnsetter'
43
+ import 'iudex.filter.core.Selector'
44
+ import 'iudex.filter.core.Setter'
45
+ import 'iudex.filter.core.SummaryReporter'
46
+ import 'iudex.filter.core.Switch'
47
+ end
48
+ end
49
+ end
50
+
51
+ require 'iudex-filter/filter_base'
52
+ require 'iudex-filter/proc_filter'
data/pom.xml ADDED
@@ -0,0 +1,56 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
+
4
+ <modelVersion>4.0.0</modelVersion>
5
+ <groupId>iudex</groupId>
6
+ <artifactId>iudex-filter</artifactId>
7
+ <packaging>jar</packaging>
8
+ <version>1.0.0</version>
9
+ <name>Iudex Filter</name>
10
+
11
+ <parent>
12
+ <groupId>iudex</groupId>
13
+ <artifactId>iudex-parent</artifactId>
14
+ <version>1.0</version>
15
+ <relativePath>..</relativePath>
16
+ </parent>
17
+
18
+ <dependencies>
19
+
20
+ <dependency>
21
+ <groupId>org.slf4j</groupId>
22
+ <artifactId>slf4j-api</artifactId>
23
+ </dependency>
24
+
25
+ <dependency>
26
+ <groupId>com.gravitext</groupId>
27
+ <artifactId>gravitext-util</artifactId>
28
+ </dependency>
29
+
30
+ <dependency>
31
+ <groupId>junit</groupId>
32
+ <artifactId>junit</artifactId>
33
+ </dependency>
34
+
35
+ <dependency>
36
+ <groupId>ch.qos.logback</groupId>
37
+ <artifactId>logback-classic</artifactId>
38
+ <scope>test</scope>
39
+ </dependency>
40
+
41
+ </dependencies>
42
+
43
+ <build>
44
+ <plugins>
45
+ <plugin>
46
+ <!-- Parent settings -->
47
+ <artifactId>maven-compiler-plugin</artifactId>
48
+ </plugin>
49
+ <plugin>
50
+ <!-- Parent settings -->
51
+ <artifactId>maven-source-plugin</artifactId>
52
+ </plugin>
53
+ </plugins>
54
+ </build>
55
+
56
+ </project>
data/test/setup.rb ADDED
@@ -0,0 +1,34 @@
1
+ #--
2
+ # Copyright (c) 2008-2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ #### General test setup: LOAD_PATH, logging, console output ####
18
+
19
+ ldir = File.join( File.dirname( __FILE__ ), "..", "lib" )
20
+ $LOAD_PATH.unshift( ldir ) unless $LOAD_PATH.include?( ldir )
21
+
22
+ require 'rubygems'
23
+ require 'rjack-logback'
24
+ RJack::Logback.config_console( :stderr => true )
25
+
26
+ require 'minitest/unit'
27
+ require 'minitest/autorun'
28
+
29
+ # Make test output logging compatible: no partial lines.
30
+ class TestOut
31
+ def print( *a ); $stdout.puts( *a ); end
32
+ def puts( *a ); $stdout.puts( *a ); end
33
+ end
34
+ MiniTest::Unit.output = TestOut.new
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env jruby
2
+ #.hashdot.profile += jruby-shortlived
3
+
4
+ #--
5
+ # Copyright (c) 2008-2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You
9
+ # may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+
20
+ require File.join( File.dirname( __FILE__ ), "setup" )
21
+
22
+ require 'gravitext-util'
23
+ require 'iudex-filter/proc_filter'
24
+
25
+ class TestFilter < Iudex::Filter::FilterBase
26
+ end
27
+
28
+ class TestFilterBase < MiniTest::Unit::TestCase
29
+ include Iudex::Filter
30
+ include Gravitext::HTMap
31
+
32
+ def test_base_name
33
+ f = FilterBase.new
34
+ assert_equal( "i.f.FilterBase", f.name )
35
+ end
36
+
37
+ def test_top_level_name
38
+ f = TestFilter.new
39
+ assert_equal( "TestFilter", f.name )
40
+ end
41
+
42
+ end
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env jruby
2
+ #.hashdot.profile += jruby-shortlived
3
+
4
+ #--
5
+ # Copyright (c) 2008-2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You
9
+ # may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+
20
+ require File.join( File.dirname( __FILE__ ), "setup" )
21
+
22
+ RJack::Logback.config_console( :stderr => true, :mdc => "tkey" )
23
+
24
+ # RJack::Logback[ "iudex.filter.core.FilterChain.test.reject" ].level = RJack::Logback::DEBUG
25
+
26
+ require 'gravitext-util'
27
+ require 'iudex-filter/filter_chain_factory'
28
+
29
+ class TestFilterChainFactory < MiniTest::Unit::TestCase
30
+ include Iudex::Filter
31
+ include Iudex::Filter::Core
32
+
33
+ include Gravitext::HTMap
34
+
35
+ import 'iudex.filter.core.MDCSetter'
36
+ import 'iudex.filter.core.MDCUnsetter'
37
+
38
+ TKEY = UniMap.create_key( 'tkey' );
39
+
40
+ class RandomFilter < FilterBase
41
+
42
+ def initialize( odds = 2 )
43
+ @odds = odds
44
+ end
45
+
46
+ def describe
47
+ [ @odds ]
48
+ end
49
+
50
+ def filter( map )
51
+ rand( @odds ) != 0
52
+ end
53
+ end
54
+
55
+ def test_filter_chain
56
+ fcf = FilterChainFactory.new( "test" )
57
+ fcf.add_summary_reporter( 1.0 )
58
+ fcf.add_by_filter_reporter( 2.5 )
59
+
60
+ def fcf.filters
61
+ [ MDCSetter.new( TKEY ) ] + super +
62
+ [ 6, 4, 6, 6 ].map { |p| RandomFilter.new( p ) }
63
+ end
64
+
65
+ def fcf.listeners
66
+ super + [ MDCUnsetter.new( TKEY ) ]
67
+ end
68
+
69
+ 2.times do |r|
70
+ assert( ! fcf.open? )
71
+
72
+ fcf.filter do |chain|
73
+ 1000.times do |t|
74
+ sleep( rand( 10 ) / 1000.0 / ( r + 1 ) )
75
+ map = UniMap.new
76
+ map.tkey = t
77
+ chain.filter( map )
78
+ end
79
+ end
80
+
81
+ assert( ! fcf.open? )
82
+ end
83
+
84
+ end
85
+
86
+ end
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env jruby
2
+ #.hashdot.profile += jruby-shortlived
3
+
4
+ #--
5
+ # Copyright (c) 2008-2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You
9
+ # may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+
20
+ require File.join( File.dirname( __FILE__ ), "setup" )
21
+
22
+ require 'gravitext-util'
23
+ require 'iudex-filter/proc_filter'
24
+
25
+ class TestProcFilter < MiniTest::Unit::TestCase
26
+ include Iudex::Filter
27
+ include Gravitext::HTMap
28
+
29
+ UniMap.create_key( 'mkey' )
30
+ UniMap.define_accessors
31
+
32
+ def test_describe
33
+ assert_equal( [ 'test_proc_filter', __LINE__ ], fltr {}.describe )
34
+ assert_equal( [ 'test_proc_filter', __LINE__ ], ProcFilter.new {}.describe )
35
+ assert_equal( [ 'test_proc_filter', __LINE__ ], ProcFilter.new {}.describe )
36
+ end
37
+
38
+ def test_name
39
+ index = Core::FilterIndex.new
40
+ name, line = index.register( fltr {} ), __LINE__
41
+ assert_equal( "i.f.ProcFilter-test_proc_filter-#{line}", name )
42
+ end
43
+
44
+ def test_return
45
+ assert do_f( fltr { } )
46
+ assert do_f( fltr { nil } )
47
+ assert do_f( fltr { true } )
48
+ assert do_f( fltr { :other_sym } )
49
+ assert do_f( fltr { false } ) # Consequence
50
+
51
+ refute do_f( fltr { :reject } )
52
+ end
53
+
54
+ def test_mutate
55
+ map = UniMap.new
56
+ map.mkey = :initial
57
+ assert do_f( fltr { |m| m.mkey = :mutated }, map )
58
+ assert_equal( :mutated, map.mkey )
59
+ end
60
+
61
+ def do_f( f, m = UniMap.new )
62
+ f.filter( m )
63
+ end
64
+
65
+ end
metadata ADDED
@@ -0,0 +1,138 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iudex-filter
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 1.0.0
6
+ platform: java
7
+ authors:
8
+ - David Kellum
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-04-04 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: rjack-slf4j
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: 1.6.1
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: gravitext-util
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ~>
34
+ - !ruby/object:Gem::Version
35
+ version: 1.5.0
36
+ type: :runtime
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: minitest
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.7.1
47
+ - - <
48
+ - !ruby/object:Gem::Version
49
+ version: "2.1"
50
+ type: :development
51
+ version_requirements: *id003
52
+ - !ruby/object:Gem::Dependency
53
+ name: rjack-logback
54
+ prerelease: false
55
+ requirement: &id004 !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "1.0"
61
+ type: :development
62
+ version_requirements: *id004
63
+ - !ruby/object:Gem::Dependency
64
+ name: rjack-tarpit
65
+ prerelease: false
66
+ requirement: &id005 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ~>
70
+ - !ruby/object:Gem::Version
71
+ version: 1.3.0
72
+ type: :development
73
+ version_requirements: *id005
74
+ description: |-
75
+ Iudex is a general purpose web crawler and feed processor in
76
+ ruby/java. The iudex-filter gem contains a fundamental filtering/chain
77
+ of responsbility sub-system.
78
+ email:
79
+ - dek-oss@gravitext.com
80
+ executables: []
81
+
82
+ extensions: []
83
+
84
+ extra_rdoc_files:
85
+ - Manifest.txt
86
+ - History.rdoc
87
+ - README.rdoc
88
+ files:
89
+ - History.rdoc
90
+ - Manifest.txt
91
+ - README.rdoc
92
+ - Rakefile
93
+ - pom.xml
94
+ - lib/iudex-filter/base.rb
95
+ - lib/iudex-filter.rb
96
+ - lib/iudex-filter/by_filter_logger.rb
97
+ - lib/iudex-filter/filter_base.rb
98
+ - lib/iudex-filter/filter_chain_factory.rb
99
+ - lib/iudex-filter/key_helper.rb
100
+ - lib/iudex-filter/proc_filter.rb
101
+ - test/setup.rb
102
+ - test/test_filter_base.rb
103
+ - test/test_filter_chain_factory.rb
104
+ - test/test_proc_filter.rb
105
+ - lib/iudex-filter/iudex-filter-1.0.0.jar
106
+ has_rdoc: true
107
+ homepage: http://github.com/dekellum/iudex
108
+ licenses: []
109
+
110
+ post_install_message:
111
+ rdoc_options:
112
+ - --main
113
+ - README.rdoc
114
+ require_paths:
115
+ - lib
116
+ required_ruby_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: "0"
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ">="
126
+ - !ruby/object:Gem::Version
127
+ version: "0"
128
+ requirements: []
129
+
130
+ rubyforge_project: iudex-filter
131
+ rubygems_version: 1.5.1
132
+ signing_key:
133
+ specification_version: 3
134
+ summary: Iudex is a general purpose web crawler and feed processor in ruby/java
135
+ test_files:
136
+ - test/test_proc_filter.rb
137
+ - test/test_filter_base.rb
138
+ - test/test_filter_chain_factory.rb