tag_remover 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/tag_remover.rb +83 -29
- data/lib/tag_remover/version.rb +1 -1
- data/spec/{tag_helper_spec.rb → tag_remover_spec.rb} +72 -58
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5782c529dc1431ea66b7f8c1f23b986b7898c24
|
4
|
+
data.tar.gz: 151094b85405708da5f121e724329742c2b2c44b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 061fae588623198bd1be710c32d65e4ea983934d2d7ef0b2a7d698f29e0b95074501071c98cc128a591dd35db69eb48feebb7c891a3e028f8597a19526a4196b
|
7
|
+
data.tar.gz: 63afd88c539c6c24bbce653c79ada29977cf18340ced56de75baef26940d1dfb51e0a74dc5957c90933a3405ce43016a5d1912c84b17ae3fbc0281968396f3c9
|
data/lib/tag_remover.rb
CHANGED
@@ -2,43 +2,97 @@ require "tag_remover/version"
|
|
2
2
|
|
3
3
|
module TagRemover
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
class Worker
|
6
|
+
def initialize input, output, opts
|
7
|
+
@input = input
|
8
|
+
@output = output
|
9
|
+
@opts = opts
|
10
|
+
end
|
7
11
|
|
8
|
-
|
9
|
-
|
12
|
+
def perform
|
13
|
+
@tags_to_remove = @opts[:remove_tags] || []
|
10
14
|
|
11
|
-
|
12
|
-
|
13
|
-
|
15
|
+
@in_tag = nil
|
16
|
+
@depth = 0
|
17
|
+
|
18
|
+
each_tag do |tag, type|
|
19
|
+
process_tag tag, type
|
20
|
+
end
|
21
|
+
|
22
|
+
if @opts[:close_streams]
|
23
|
+
@input.close
|
24
|
+
@output.close
|
25
|
+
end
|
26
|
+
end
|
14
27
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
28
|
+
private
|
29
|
+
def process_tag tag, type
|
30
|
+
if @in_tag
|
31
|
+
in_tag_str = @tags_to_remove[@in_tag]
|
32
|
+
|
33
|
+
if tag =~ opening_tag(in_tag_str)
|
34
|
+
@depth += 1
|
35
|
+
elsif tag =~ closing_tag(in_tag_str)
|
36
|
+
@depth -= 1
|
37
|
+
@in_tag = nil if @depth == 0
|
38
|
+
end
|
39
|
+
else
|
40
|
+
found_tag = false
|
41
|
+
@tags_to_remove.each_with_index do |tag_str,index|
|
42
|
+
if tag =~ opening_tag(tag_str)
|
43
|
+
@in_tag = index
|
44
|
+
@depth = 1
|
45
|
+
found_tag = true
|
46
|
+
break
|
47
|
+
elsif tag =~ single_tag(tag_str)
|
48
|
+
found_tag = true
|
49
|
+
break
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
@output.write "#{tag}\n" unless found_tag || tag.empty?
|
20
54
|
end
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
55
|
+
end
|
56
|
+
|
57
|
+
def opening_tag tag
|
58
|
+
/<#{tag}(\s|(\s+.+?=(".+?"|'.+?')))*?>/
|
59
|
+
end
|
60
|
+
|
61
|
+
def closing_tag tag
|
62
|
+
/<\/#{tag}\s*>/
|
63
|
+
end
|
64
|
+
|
65
|
+
def single_tag tag
|
66
|
+
/<#{tag}(\s|(\s+.+?=(".+?"|'.+?')))*?\/\s*>/
|
67
|
+
end
|
68
|
+
|
69
|
+
def each_tag &block
|
70
|
+
acc = ""
|
71
|
+
type = nil
|
72
|
+
|
73
|
+
@input.each_char do |c|
|
74
|
+
case c
|
75
|
+
when '<'
|
76
|
+
yield acc.strip, type if type == :text
|
77
|
+
type = :tag
|
78
|
+
acc = c
|
79
|
+
when '>'
|
80
|
+
acc += c
|
81
|
+
yield acc.strip, type
|
82
|
+
type = nil
|
83
|
+
acc = ""
|
84
|
+
else
|
85
|
+
type ||= :text
|
86
|
+
acc += c
|
32
87
|
end
|
33
88
|
end
|
34
|
-
|
89
|
+
|
90
|
+
yield acc.strip, type if type
|
35
91
|
end
|
36
|
-
|
92
|
+
end
|
37
93
|
|
38
|
-
|
39
|
-
|
40
|
-
output.close
|
41
|
-
end
|
94
|
+
def self.process input, output, opts = {}
|
95
|
+
Worker.new(input, output, opts).perform
|
42
96
|
end
|
43
97
|
|
44
98
|
end
|
data/lib/tag_remover/version.rb
CHANGED
@@ -3,72 +3,89 @@ require 'spec_helper'
|
|
3
3
|
describe TagRemover do
|
4
4
|
describe ".process" do
|
5
5
|
it "removes elements" do
|
6
|
-
input =
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
6
|
+
input = """
|
7
|
+
<root>
|
8
|
+
<remove>
|
9
|
+
Some contents
|
10
|
+
</remove>
|
11
|
+
<remove >
|
12
|
+
</remove >
|
13
|
+
</root>"""
|
14
|
+
|
15
15
|
output = StringIO.new
|
16
16
|
tags_to_remove = ['remove']
|
17
17
|
|
18
18
|
TagRemover.process input, output, remove_tags: tags_to_remove
|
19
19
|
|
20
|
-
expect(output.string).to eq """
|
21
|
-
|
22
|
-
|
23
|
-
"""
|
20
|
+
expect(output.string).to eq """<root>
|
21
|
+
</root>
|
22
|
+
"""
|
24
23
|
end
|
25
24
|
|
26
25
|
it "removes single tags" do
|
27
|
-
input = StringIO.new """
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
"""
|
26
|
+
input = StringIO.new """<root>
|
27
|
+
<remove/>
|
28
|
+
<remove />
|
29
|
+
<remove/ >
|
30
|
+
<remove / >
|
31
|
+
</root>
|
32
|
+
"""
|
35
33
|
output = StringIO.new
|
36
34
|
tags_to_remove = ['remove']
|
37
35
|
|
38
36
|
TagRemover.process input, output, remove_tags: tags_to_remove
|
39
37
|
|
40
|
-
expect(output.string).to eq """
|
41
|
-
|
42
|
-
|
43
|
-
"""
|
38
|
+
expect(output.string).to eq """<root>
|
39
|
+
</root>
|
40
|
+
"""
|
44
41
|
end
|
45
42
|
|
46
43
|
it "removes tags with attributes" do
|
47
|
-
input = StringIO.new '''
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
44
|
+
input = StringIO.new '''<root>
|
45
|
+
<remove x="y" a="b" >
|
46
|
+
</remove>
|
47
|
+
<remove x="y" a="b" />
|
48
|
+
</root>
|
49
|
+
'''
|
50
|
+
output = StringIO.new
|
51
|
+
tags_to_remove = ['remove']
|
52
|
+
|
53
|
+
TagRemover.process input, output, remove_tags: tags_to_remove
|
54
|
+
|
55
|
+
expect(output.string).to eq """<root>
|
56
|
+
</root>
|
57
|
+
"""
|
58
|
+
end
|
59
|
+
|
60
|
+
it "deals with multiple tags on one line" do
|
61
|
+
input = StringIO.new '''<root><remove>
|
62
|
+
</remove>
|
63
|
+
<remove/><keep>
|
64
|
+
</keep>
|
65
|
+
<remove>Bad Stuff!
|
66
|
+
</remove>Happy Stuff :)<keep/>
|
67
|
+
</root>
|
68
|
+
'''
|
54
69
|
output = StringIO.new
|
55
70
|
tags_to_remove = ['remove']
|
56
71
|
|
57
72
|
TagRemover.process input, output, remove_tags: tags_to_remove
|
58
73
|
|
59
|
-
expect(output.string).to eq """
|
60
|
-
|
61
|
-
|
62
|
-
|
74
|
+
expect(output.string).to eq """<root>
|
75
|
+
<keep>
|
76
|
+
</keep>
|
77
|
+
Happy Stuff :)
|
78
|
+
<keep/>
|
79
|
+
</root>
|
80
|
+
"""
|
63
81
|
end
|
64
82
|
|
65
83
|
it "keeps elements" do
|
66
|
-
input = StringIO.new """
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
"""
|
84
|
+
input = StringIO.new """<root>
|
85
|
+
<keep>
|
86
|
+
</keep>
|
87
|
+
</root>
|
88
|
+
"""
|
72
89
|
tags_to_remove = ['remove']
|
73
90
|
|
74
91
|
output = StringIO.new
|
@@ -79,10 +96,9 @@ describe TagRemover do
|
|
79
96
|
end
|
80
97
|
|
81
98
|
it "is ok with doing nothing" do
|
82
|
-
input = StringIO.new """
|
83
|
-
|
84
|
-
|
85
|
-
"""
|
99
|
+
input = StringIO.new """<root>
|
100
|
+
</root>
|
101
|
+
"""
|
86
102
|
output = StringIO.new
|
87
103
|
|
88
104
|
TagRemover.process input, output
|
@@ -91,23 +107,21 @@ describe TagRemover do
|
|
91
107
|
end
|
92
108
|
|
93
109
|
it "removes nested tags" do
|
94
|
-
input = StringIO.new """
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
"""
|
110
|
+
input = StringIO.new """<root>
|
111
|
+
<remove>
|
112
|
+
<remove>
|
113
|
+
</remove>
|
114
|
+
</remove>
|
115
|
+
</root>
|
116
|
+
"""
|
102
117
|
tags_to_remove = ['remove']
|
103
118
|
output = StringIO.new
|
104
119
|
|
105
120
|
TagRemover.process input, output, remove_tags: tags_to_remove
|
106
121
|
|
107
|
-
expect(output.string).to eq """
|
108
|
-
|
109
|
-
|
110
|
-
"""
|
122
|
+
expect(output.string).to eq """<root>
|
123
|
+
</root>
|
124
|
+
"""
|
111
125
|
end
|
112
126
|
|
113
127
|
it "closes the streams" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tag_remover
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Smith
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -72,7 +72,7 @@ files:
|
|
72
72
|
- lib/tag_remover.rb
|
73
73
|
- lib/tag_remover/version.rb
|
74
74
|
- spec/spec_helper.rb
|
75
|
-
- spec/
|
75
|
+
- spec/tag_remover_spec.rb
|
76
76
|
- tag_remover.gemspec
|
77
77
|
homepage: https://github.com/jellymann/tag_remover
|
78
78
|
licenses:
|
@@ -100,4 +100,4 @@ specification_version: 4
|
|
100
100
|
summary: Remove elements from large XML documents.
|
101
101
|
test_files:
|
102
102
|
- spec/spec_helper.rb
|
103
|
-
- spec/
|
103
|
+
- spec/tag_remover_spec.rb
|