feedfilter 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +8 -0
- data/config/feedburner.txt +21 -0
- data/config/feedflare.txt +17 -0
- data/lib/feedfilter.rb +3 -1
- data/lib/feedfilter/ads.rb +62 -0
- data/lib/feedfilter/block_reader.rb +67 -0
- data/lib/feedfilter/version.rb +1 -1
- data/test/helper.rb +14 -0
- data/test/test_ads.rb +12 -3
- data/test/test_ads_all.rb +29 -0
- data/test/test_block_reader.rb +25 -0
- metadata +12 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81bab08453558167699203ea2a2ee6159a0fbaae
|
4
|
+
data.tar.gz: 83d6db5270e1ee8d8444b8334b96d364faba4bfe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59cd7df83d51380bfc8aa9bb524b086d05c24137f83dd1be29b89db9be885c1366b0fea95a4e132e0b251a169c37ba662a42fc89fbaff6bcc20524c0c402634b
|
7
|
+
data.tar.gz: 11ce28d9351a39e2e9a5ad736f7594194ebf7a07fc8417b240418871d3b96ef3487655f684e23fcdc69df08b6e88d28459026bf31f052c3f7c4f8b2260ac3d51
|
data/Manifest.txt
CHANGED
@@ -2,5 +2,13 @@ HISTORY.md
|
|
2
2
|
Manifest.txt
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
|
+
config/feedburner.txt
|
6
|
+
config/feedflare.txt
|
5
7
|
lib/feedfilter.rb
|
8
|
+
lib/feedfilter/ads.rb
|
9
|
+
lib/feedfilter/block_reader.rb
|
6
10
|
lib/feedfilter/version.rb
|
11
|
+
test/helper.rb
|
12
|
+
test/test_ads.rb
|
13
|
+
test/test_ads_all.rb
|
14
|
+
test/test_block_reader.rb
|
@@ -0,0 +1,21 @@
|
|
1
|
+
####################################
|
2
|
+
# feedburner text pattern (regex)
|
3
|
+
#
|
4
|
+
# pattern (regex)
|
5
|
+
# ---
|
6
|
+
# test1
|
7
|
+
# ---
|
8
|
+
# test2
|
9
|
+
# ---
|
10
|
+
# etc.
|
11
|
+
|
12
|
+
|
13
|
+
<img[^>]*?
|
14
|
+
src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1
|
15
|
+
.*?>
|
16
|
+
|
17
|
+
---
|
18
|
+
|
19
|
+
<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>
|
20
|
+
|
21
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
###################################
|
2
|
+
# feedflare text pattern (regex)
|
3
|
+
|
4
|
+
<div[^>]*?
|
5
|
+
class=("|')feedflare\1
|
6
|
+
[^>]*?>
|
7
|
+
.*?
|
8
|
+
<\/div>
|
9
|
+
|
10
|
+
---
|
11
|
+
|
12
|
+
<div class="feedflare">
|
13
|
+
<a href="http://feeds.feedburner.com/~ff/Rubyflow?a=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U">
|
14
|
+
<img src="http://feeds.feedburner.com/~ff/Rubyflow?i=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U" border="0"></img>
|
15
|
+
</a>
|
16
|
+
</div>
|
17
|
+
|
data/lib/feedfilter.rb
CHANGED
@@ -6,14 +6,16 @@
|
|
6
6
|
|
7
7
|
# 3rd party gems/libs
|
8
8
|
|
9
|
-
require '
|
9
|
+
require 'textutils'
|
10
10
|
|
11
11
|
# our own code
|
12
12
|
|
13
13
|
require 'feedfilter/version' # let it always go first
|
14
14
|
require 'feedfilter/ads'
|
15
15
|
|
16
|
+
require 'feedfilter/block_reader' ## todo/fix: move to textutils!!!!
|
16
17
|
|
17
18
|
|
18
19
|
# say hello
|
19
20
|
puts FeedFilter.banner if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
|
21
|
+
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module FeedFilter
|
5
|
+
|
6
|
+
|
7
|
+
class AdsFilters
|
8
|
+
|
9
|
+
include LogUtils::Logging
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@filters=[]
|
13
|
+
|
14
|
+
names=[
|
15
|
+
'feedburner',
|
16
|
+
'feedflare'
|
17
|
+
]
|
18
|
+
|
19
|
+
names.each do |name|
|
20
|
+
logger.debug " add ads filter #{name}"
|
21
|
+
|
22
|
+
b = BlockReader.from_file( "#{FeedFilter.root}/config/#{name}.txt").read
|
23
|
+
## Note: replace newline and space in string for regex (w/o spaces)
|
24
|
+
## Note: add multiline option and ignore case
|
25
|
+
regexp = Regexp.new( b[0].gsub( /[\n ]/, '' ), Regexp::MULTILINE|Regexp::IGNORECASE )
|
26
|
+
@filters << [name, regexp]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def filter( text )
|
31
|
+
@filters.each do |f|
|
32
|
+
name = f[0]
|
33
|
+
pattern = f[1]
|
34
|
+
|
35
|
+
text = text.gsub( pattern ) do |m|
|
36
|
+
# Note: m - match is just a regular string
|
37
|
+
## double check if it's true also if regex contains capture groups ???
|
38
|
+
puts "strip #{name}:"
|
39
|
+
pp m
|
40
|
+
''
|
41
|
+
end
|
42
|
+
end # each filter
|
43
|
+
text
|
44
|
+
end # filter
|
45
|
+
|
46
|
+
end # AdsFilters
|
47
|
+
|
48
|
+
|
49
|
+
def self.strip_ads( text )
|
50
|
+
@@ads_filters ||= FeedFilter::AdsFilters.new
|
51
|
+
@@ads_filters.filter( text )
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
module AdsFilter
|
56
|
+
def strip_ads( text )
|
57
|
+
FeedFilter.strip_ads( text )
|
58
|
+
end
|
59
|
+
end # module AdsFilter
|
60
|
+
|
61
|
+
end # module FeedFitler
|
62
|
+
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
####
|
4
|
+
## todo: move to textutils for re(use) ?????
|
5
|
+
|
6
|
+
class BlockReader
|
7
|
+
|
8
|
+
include LogUtils::Logging
|
9
|
+
|
10
|
+
def self.from_file( path )
|
11
|
+
## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
12
|
+
## - see textutils/utils.rb
|
13
|
+
text = File.read_utf8( path )
|
14
|
+
self.from_string( text )
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.from_string( text )
|
18
|
+
self.new( text )
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize( text )
|
22
|
+
@text = text
|
23
|
+
end
|
24
|
+
|
25
|
+
def read
|
26
|
+
## note returns an array of (line) strings e.g.
|
27
|
+
## [
|
28
|
+
## "line1\nline2", ## -- block1
|
29
|
+
## "line1\nline2\nline3" ## -- block2
|
30
|
+
## ]
|
31
|
+
|
32
|
+
blocks = []
|
33
|
+
buf = ""
|
34
|
+
|
35
|
+
@text.each_line do |line|
|
36
|
+
# comments allow:
|
37
|
+
# 1) ##### (shell/ruby style)
|
38
|
+
if line =~ /^\s*#/
|
39
|
+
# skip komments and do NOT copy to result (keep comments secret!)
|
40
|
+
logger.debug 'skipping comment line'
|
41
|
+
next
|
42
|
+
end
|
43
|
+
|
44
|
+
# if line =~ /^\s*$/
|
45
|
+
# # kommentar oder leerzeile überspringen
|
46
|
+
# logger.debug 'skipping blank line'
|
47
|
+
# next
|
48
|
+
# end
|
49
|
+
|
50
|
+
# pass 2) remove leading and trailing whitespace
|
51
|
+
line = line.strip
|
52
|
+
|
53
|
+
if line =~ /^-{3,}$/ ## three or more lines
|
54
|
+
logger.debug 'block separator'
|
55
|
+
blocks << buf.strip ## note: strip leading and trailing whitespace
|
56
|
+
buf = ""
|
57
|
+
else
|
58
|
+
buf << "#{line}\n"
|
59
|
+
end
|
60
|
+
end # each lines
|
61
|
+
|
62
|
+
blocks << buf.strip ## note: strip leading and trailing whitespace
|
63
|
+
blocks
|
64
|
+
end # method read
|
65
|
+
|
66
|
+
end # class BlockReader
|
67
|
+
|
data/lib/feedfilter/version.rb
CHANGED
data/test/helper.rb
ADDED
data/test/test_ads.rb
CHANGED
@@ -11,19 +11,28 @@ class TestAds < MiniTest::Test
|
|
11
11
|
|
12
12
|
include FeedFilter::AdsFilter
|
13
13
|
|
14
|
+
|
14
15
|
def test_feedflare_ads
|
15
|
-
text
|
16
|
+
text =<<EOS
|
17
|
+
<div class="feedflare">
|
18
|
+
<a href="http://feeds.feedburner.com/~ff/Rubyflow?a=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U">
|
19
|
+
<img src="http://feeds.feedburner.com/~ff/Rubyflow?i=1wUDnBztAJY:fzqBvTOGB9M:3H-1DwQop_U" border="0"></img>
|
20
|
+
</a>
|
21
|
+
</div>
|
22
|
+
EOS
|
16
23
|
text = strip_ads( text ).strip
|
17
24
|
|
18
25
|
assert_equal '', text
|
19
26
|
end
|
20
27
|
|
28
|
+
|
21
29
|
def test_feedburner_bugs
|
22
|
-
text
|
30
|
+
text =<<EOS
|
31
|
+
<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>
|
32
|
+
EOS
|
23
33
|
text = strip_ads( text ).strip
|
24
34
|
|
25
35
|
assert_equal '', text
|
26
36
|
end
|
27
37
|
|
28
|
-
|
29
38
|
end # class TestAds
|
@@ -0,0 +1,29 @@
|
|
1
|
+
###
|
2
|
+
# to run use
|
3
|
+
# ruby -I ./lib -I ./test test/test_ads_all.rb
|
4
|
+
# or better
|
5
|
+
# rake test
|
6
|
+
|
7
|
+
require 'helper'
|
8
|
+
|
9
|
+
|
10
|
+
class TestAdsAll < MiniTest::Test
|
11
|
+
|
12
|
+
def test_all
|
13
|
+
names=[
|
14
|
+
'feedburner',
|
15
|
+
'feedflare'
|
16
|
+
]
|
17
|
+
|
18
|
+
names.each do |name|
|
19
|
+
b = BlockReader.from_file( "#{FeedFilter.root}/config/#{name}.txt").read
|
20
|
+
## Note: replace newline and space in string for regex (w/o spaces)
|
21
|
+
## Note: add multiline option and ignore case
|
22
|
+
regexp = Regexp.new( b[0].gsub( /[\n ]/, '' ), Regexp::MULTILINE|Regexp::IGNORECASE )
|
23
|
+
test1 = b[1]
|
24
|
+
|
25
|
+
assert_equal '', test1.gsub( regexp, '' ).strip
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end # class TestAdsAll
|
@@ -0,0 +1,25 @@
|
|
1
|
+
###
|
2
|
+
# to run use
|
3
|
+
# ruby -I ./lib -I ./test test/test_block_reader.rb
|
4
|
+
# or better
|
5
|
+
# rake test
|
6
|
+
|
7
|
+
require 'helper'
|
8
|
+
|
9
|
+
|
10
|
+
class TestBlockReader < MiniTest::Test
|
11
|
+
|
12
|
+
def test_feedburner
|
13
|
+
blocks = BlockReader.from_file( "#{FeedFilter.root}/config/feedburner.txt" ).read
|
14
|
+
|
15
|
+
## note: regex - use %q - do NOT escape \. or \1 etc.
|
16
|
+
pattern = %q{<img[^>]*?src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1.*?>}
|
17
|
+
|
18
|
+
test1 = %q{<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>}
|
19
|
+
|
20
|
+
assert_equal 2, blocks.size
|
21
|
+
assert_equal pattern, blocks[0].gsub( /[\n ]/, '' ) ## note: need to remove newlines and spaces
|
22
|
+
assert_equal test1, blocks[1]
|
23
|
+
end
|
24
|
+
|
25
|
+
end # class TestBlockReader
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedfilter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
@@ -60,15 +60,24 @@ extra_rdoc_files:
|
|
60
60
|
- HISTORY.md
|
61
61
|
- Manifest.txt
|
62
62
|
- README.md
|
63
|
+
- config/feedburner.txt
|
64
|
+
- config/feedflare.txt
|
63
65
|
files:
|
64
66
|
- ".gemtest"
|
65
67
|
- HISTORY.md
|
66
68
|
- Manifest.txt
|
67
69
|
- README.md
|
68
70
|
- Rakefile
|
71
|
+
- config/feedburner.txt
|
72
|
+
- config/feedflare.txt
|
69
73
|
- lib/feedfilter.rb
|
74
|
+
- lib/feedfilter/ads.rb
|
75
|
+
- lib/feedfilter/block_reader.rb
|
70
76
|
- lib/feedfilter/version.rb
|
77
|
+
- test/helper.rb
|
71
78
|
- test/test_ads.rb
|
79
|
+
- test/test_ads_all.rb
|
80
|
+
- test/test_block_reader.rb
|
72
81
|
homepage: https://github.com/feedreader/feed.filter
|
73
82
|
licenses:
|
74
83
|
- Public Domain
|
@@ -96,4 +105,6 @@ signing_key:
|
|
96
105
|
specification_version: 4
|
97
106
|
summary: feedfilter - feed filter and rules for easy (re)use
|
98
107
|
test_files:
|
108
|
+
- test/test_ads_all.rb
|
99
109
|
- test/test_ads.rb
|
110
|
+
- test/test_block_reader.rb
|