feedfilter 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +8 -0
- data/config/feedburner.txt +21 -0
- data/config/feedflare.txt +17 -0
- data/lib/feedfilter.rb +3 -1
- data/lib/feedfilter/ads.rb +62 -0
- data/lib/feedfilter/block_reader.rb +67 -0
- data/lib/feedfilter/version.rb +1 -1
- data/test/helper.rb +14 -0
- data/test/test_ads.rb +12 -3
- data/test/test_ads_all.rb +29 -0
- data/test/test_block_reader.rb +25 -0
- metadata +12 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81bab08453558167699203ea2a2ee6159a0fbaae
|
4
|
+
data.tar.gz: 83d6db5270e1ee8d8444b8334b96d364faba4bfe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59cd7df83d51380bfc8aa9bb524b086d05c24137f83dd1be29b89db9be885c1366b0fea95a4e132e0b251a169c37ba662a42fc89fbaff6bcc20524c0c402634b
|
7
|
+
data.tar.gz: 11ce28d9351a39e2e9a5ad736f7594194ebf7a07fc8417b240418871d3b96ef3487655f684e23fcdc69df08b6e88d28459026bf31f052c3f7c4f8b2260ac3d51
|
data/Manifest.txt
CHANGED
@@ -2,5 +2,13 @@ HISTORY.md
|
|
2
2
|
Manifest.txt
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
|
+
config/feedburner.txt
|
6
|
+
config/feedflare.txt
|
5
7
|
lib/feedfilter.rb
|
8
|
+
lib/feedfilter/ads.rb
|
9
|
+
lib/feedfilter/block_reader.rb
|
6
10
|
lib/feedfilter/version.rb
|
11
|
+
test/helper.rb
|
12
|
+
test/test_ads.rb
|
13
|
+
test/test_ads_all.rb
|
14
|
+
test/test_block_reader.rb
|
@@ -0,0 +1,21 @@
|
|
1
|
+
####################################
|
2
|
+
# feedburner text pattern (regex)
|
3
|
+
#
|
4
|
+
# pattern (regex)
|
5
|
+
# ---
|
6
|
+
# test1
|
7
|
+
# ---
|
8
|
+
# test2
|
9
|
+
# ---
|
10
|
+
# etc.
|
11
|
+
|
12
|
+
|
13
|
+
<img[^>]*?
|
14
|
+
src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1
|
15
|
+
.*?>
|
16
|
+
|
17
|
+
---
|
18
|
+
|
19
|
+
<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>
|
20
|
+
|
21
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
###################################
|
2
|
+
# feedflare text pattern (regex)
|
3
|
+
|
4
|
+
<div[^>]*?
|
5
|
+
class=("|')feedflare\1
|
6
|
+
[^>]*?>
|
7
|
+
.*?
|
8
|
+
<\/div>
|
9
|
+
|
10
|
+
---
|
11
|
+
|
12
|
+
<div class="feedflare">
|
13
|
+
<a href="http://feeds.feedburner.com/~ff/Rubyflow?a=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U">
|
14
|
+
<img src="http://feeds.feedburner.com/~ff/Rubyflow?i=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U" border="0"></img>
|
15
|
+
</a>
|
16
|
+
</div>
|
17
|
+
|
data/lib/feedfilter.rb
CHANGED
@@ -6,14 +6,16 @@
|
|
6
6
|
|
7
7
|
# 3rd party gems/libs
|
8
8
|
|
9
|
-
require '
|
9
|
+
require 'textutils'
|
10
10
|
|
11
11
|
# our own code
|
12
12
|
|
13
13
|
require 'feedfilter/version' # let it always go first
|
14
14
|
require 'feedfilter/ads'
|
15
15
|
|
16
|
+
require 'feedfilter/block_reader' ## todo/fix: move to textutils!!!!
|
16
17
|
|
17
18
|
|
18
19
|
# say hello
|
19
20
|
puts FeedFilter.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
|
21
|
+
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module FeedFilter
|
5
|
+
|
6
|
+
|
7
|
+
class AdsFilters
|
8
|
+
|
9
|
+
include LogUtils::Logging
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@filters=[]
|
13
|
+
|
14
|
+
names=[
|
15
|
+
'feedburner',
|
16
|
+
'feedflare'
|
17
|
+
]
|
18
|
+
|
19
|
+
names.each do |name|
|
20
|
+
logger.debug " add ads filter #{name}"
|
21
|
+
|
22
|
+
b = BlockReader.from_file( "#{FeedFilter.root}/config/#{name}.txt").read
|
23
|
+
## Note: replace newline and space in string for regex (w/o spaces)
|
24
|
+
## Note: add multiline option and ignore case
|
25
|
+
regexp = Regexp.new( b[0].gsub( /[\n ]/, '' ), Regexp::MULTILINE|Regexp::IGNORECASE )
|
26
|
+
@filters << [name, regexp]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def filter( text )
|
31
|
+
@filters.each do |f|
|
32
|
+
name = f[0]
|
33
|
+
pattern = f[1]
|
34
|
+
|
35
|
+
text = text.gsub( pattern ) do |m|
|
36
|
+
# Note: m - match is just a regular string
|
37
|
+
## double check if it's true also if regex contains capture groups ???
|
38
|
+
puts "strip #{name}:"
|
39
|
+
pp m
|
40
|
+
''
|
41
|
+
end
|
42
|
+
end # each filter
|
43
|
+
text
|
44
|
+
end # filter
|
45
|
+
|
46
|
+
end # AdsFilters
|
47
|
+
|
48
|
+
|
49
|
+
def self.strip_ads( text )
|
50
|
+
@@ads_filters ||= FeedFilter::AdsFilters.new
|
51
|
+
@@ads_filters.filter( text )
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
module AdsFilter
|
56
|
+
def strip_ads( text )
|
57
|
+
FeedFilter.strip_ads( text )
|
58
|
+
end
|
59
|
+
end # module AdsFilter
|
60
|
+
|
61
|
+
end # module FeedFitler
|
62
|
+
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
####
|
4
|
+
## todo: move to textutils for re(use) ?????
|
5
|
+
|
6
|
+
class BlockReader
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
def self.from_file( path )
|
11
|
+
## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
12
|
+
## - see textutils/utils.rb
|
13
|
+
text = File.read_utf8( path )
|
14
|
+
self.from_string( text )
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.from_string( text )
|
18
|
+
self.new( text )
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize( text )
|
22
|
+
@text = text
|
23
|
+
end
|
24
|
+
|
25
|
+
def read
|
26
|
+
## note returns an array of (line) strings e.g.
|
27
|
+
## [
|
28
|
+
## "line1\nline2", ## -- block1
|
29
|
+
## "line1\nline2\nline3" ## -- block2
|
30
|
+
## ]
|
31
|
+
|
32
|
+
blocks = []
|
33
|
+
buf = ""
|
34
|
+
|
35
|
+
@text.each_line do |line|
|
36
|
+
# comments allow:
|
37
|
+
# 1) ##### (shell/ruby style)
|
38
|
+
if line =~ /^\s*#/
|
39
|
+
# skip komments and do NOT copy to result (keep comments secret!)
|
40
|
+
logger.debug 'skipping comment line'
|
41
|
+
next
|
42
|
+
end
|
43
|
+
|
44
|
+
# if line =~ /^\s*$/
|
45
|
+
# # kommentar oder leerzeile überspringen
|
46
|
+
# logger.debug 'skipping blank line'
|
47
|
+
# next
|
48
|
+
# end
|
49
|
+
|
50
|
+
# pass 2) remove leading and trailing whitespace
|
51
|
+
line = line.strip
|
52
|
+
|
53
|
+
if line =~ /^-{3,}$/ ## three or more lines
|
54
|
+
logger.debug 'block separator'
|
55
|
+
blocks << buf.strip ## note: strip leading and trailing whitespace
|
56
|
+
buf = ""
|
57
|
+
else
|
58
|
+
buf << "#{line}\n"
|
59
|
+
end
|
60
|
+
end # each lines
|
61
|
+
|
62
|
+
blocks << buf.strip ## note: strip leading and trailing whitespace
|
63
|
+
blocks
|
64
|
+
end # method read
|
65
|
+
|
66
|
+
end # class BlockReader
|
67
|
+
|
data/lib/feedfilter/version.rb
CHANGED
data/test/helper.rb
ADDED
data/test/test_ads.rb
CHANGED
@@ -11,19 +11,28 @@ class TestAds < MiniTest::Test
|
|
11
11
|
|
12
12
|
include FeedFilter::AdsFilter
|
13
13
|
|
14
|
+
|
14
15
|
def test_feedflare_ads
|
15
|
-
text
|
16
|
+
text =<<EOS
|
17
|
+
<div class="feedflare">
|
18
|
+
<a href="http://feeds.feedburner.com/~ff/Rubyflow?a=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U">
|
19
|
+
<img src="http://feeds.feedburner.com/~ff/Rubyflow?i=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U" border="0"></img>
|
20
|
+
</a>
|
21
|
+
</div>
|
22
|
+
EOS
|
16
23
|
text = strip_ads( text ).strip
|
17
24
|
|
18
25
|
assert_equal '', text
|
19
26
|
end
|
20
27
|
|
28
|
+
|
21
29
|
def test_feedburner_bugs
|
22
|
-
text
|
30
|
+
text =<<EOS
|
31
|
+
<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>
|
32
|
+
EOS
|
23
33
|
text = strip_ads( text ).strip
|
24
34
|
|
25
35
|
assert_equal '', text
|
26
36
|
end
|
27
37
|
|
28
|
-
|
29
38
|
end # class TestAds
|
@@ -0,0 +1,29 @@
|
|
1
|
+
###
|
2
|
+
# to run use
|
3
|
+
# ruby -I ./lib -I ./test test/test_ads_all.rb
|
4
|
+
# or better
|
5
|
+
# rake test
|
6
|
+
|
7
|
+
require 'helper'
|
8
|
+
|
9
|
+
|
10
|
+
class TestAdsAll < MiniTest::Test
|
11
|
+
|
12
|
+
def test_all
|
13
|
+
names=[
|
14
|
+
'feedburner',
|
15
|
+
'feedflare'
|
16
|
+
]
|
17
|
+
|
18
|
+
names.each do |name|
|
19
|
+
b = BlockReader.from_file( "#{FeedFilter.root}/config/#{name}.txt").read
|
20
|
+
## Note: replace newline and space in string for regex (w/o spaces)
|
21
|
+
## Note: add multiline option and ignore case
|
22
|
+
regexp = Regexp.new( b[0].gsub( /[\n ]/, '' ), Regexp::MULTILINE|Regexp::IGNORECASE )
|
23
|
+
test1 = b[1]
|
24
|
+
|
25
|
+
assert_equal '', test1.gsub( regexp, '' ).strip
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end # class TestAdsAll
|
@@ -0,0 +1,25 @@
|
|
1
|
+
###
|
2
|
+
# to run use
|
3
|
+
# ruby -I ./lib -I ./test test/test_block_reader.rb
|
4
|
+
# or better
|
5
|
+
# rake test
|
6
|
+
|
7
|
+
require 'helper'
|
8
|
+
|
9
|
+
|
10
|
+
class TestBlockReader < MiniTest::Test
|
11
|
+
|
12
|
+
def test_feedburner
|
13
|
+
blocks = BlockReader.from_file( "#{FeedFilter.root}/config/feedburner.txt" ).read
|
14
|
+
|
15
|
+
## note: regex - use %q - do NOT escape \. or \1 etc.
|
16
|
+
pattern = %q{<img[^>]*?src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1.*?>}
|
17
|
+
|
18
|
+
test1 = %q{<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>}
|
19
|
+
|
20
|
+
assert_equal 2, blocks.size
|
21
|
+
assert_equal pattern, blocks[0].gsub( /[\n ]/, '' ) ## note: need to remove newlines and spaces
|
22
|
+
assert_equal test1, blocks[1]
|
23
|
+
end
|
24
|
+
|
25
|
+
end # class TestBlockReader
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedfilter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
@@ -60,15 +60,24 @@ extra_rdoc_files:
|
|
60
60
|
- HISTORY.md
|
61
61
|
- Manifest.txt
|
62
62
|
- README.md
|
63
|
+
- config/feedburner.txt
|
64
|
+
- config/feedflare.txt
|
63
65
|
files:
|
64
66
|
- ".gemtest"
|
65
67
|
- HISTORY.md
|
66
68
|
- Manifest.txt
|
67
69
|
- README.md
|
68
70
|
- Rakefile
|
71
|
+
- config/feedburner.txt
|
72
|
+
- config/feedflare.txt
|
69
73
|
- lib/feedfilter.rb
|
74
|
+
- lib/feedfilter/ads.rb
|
75
|
+
- lib/feedfilter/block_reader.rb
|
70
76
|
- lib/feedfilter/version.rb
|
77
|
+
- test/helper.rb
|
71
78
|
- test/test_ads.rb
|
79
|
+
- test/test_ads_all.rb
|
80
|
+
- test/test_block_reader.rb
|
72
81
|
homepage: https://github.com/feedreader/feed.filter
|
73
82
|
licenses:
|
74
83
|
- Public Domain
|
@@ -96,4 +105,6 @@ signing_key:
|
|
96
105
|
specification_version: 4
|
97
106
|
summary: feedfilter - feed filter and rules for easy (re)use
|
98
107
|
test_files:
|
108
|
+
- test/test_ads_all.rb
|
99
109
|
- test/test_ads.rb
|
110
|
+
- test/test_block_reader.rb
|