tag_remover 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tag_remover.rb +83 -29
- data/lib/tag_remover/version.rb +1 -1
- data/spec/{tag_helper_spec.rb → tag_remover_spec.rb} +72 -58
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5782c529dc1431ea66b7f8c1f23b986b7898c24
|
4
|
+
data.tar.gz: 151094b85405708da5f121e724329742c2b2c44b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 061fae588623198bd1be710c32d65e4ea983934d2d7ef0b2a7d698f29e0b95074501071c98cc128a591dd35db69eb48feebb7c891a3e028f8597a19526a4196b
|
7
|
+
data.tar.gz: 63afd88c539c6c24bbce653c79ada29977cf18340ced56de75baef26940d1dfb51e0a74dc5957c90933a3405ce43016a5d1912c84b17ae3fbc0281968396f3c9
|
data/lib/tag_remover.rb
CHANGED
@@ -2,43 +2,97 @@ require "tag_remover/version"
|
|
2
2
|
|
3
3
|
module TagRemover
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
class Worker
|
6
|
+
def initialize input, output, opts
|
7
|
+
@input = input
|
8
|
+
@output = output
|
9
|
+
@opts = opts
|
10
|
+
end
|
7
11
|
|
8
|
-
|
9
|
-
|
12
|
+
def perform
|
13
|
+
@tags_to_remove = @opts[:remove_tags] || []
|
10
14
|
|
11
|
-
|
12
|
-
|
13
|
-
|
15
|
+
@in_tag = nil
|
16
|
+
@depth = 0
|
17
|
+
|
18
|
+
each_tag do |tag, type|
|
19
|
+
process_tag tag, type
|
20
|
+
end
|
21
|
+
|
22
|
+
if @opts[:close_streams]
|
23
|
+
@input.close
|
24
|
+
@output.close
|
25
|
+
end
|
26
|
+
end
|
14
27
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
28
|
+
private
|
29
|
+
def process_tag tag, type
|
30
|
+
if @in_tag
|
31
|
+
in_tag_str = @tags_to_remove[@in_tag]
|
32
|
+
|
33
|
+
if tag =~ opening_tag(in_tag_str)
|
34
|
+
@depth += 1
|
35
|
+
elsif tag =~ closing_tag(in_tag_str)
|
36
|
+
@depth -= 1
|
37
|
+
@in_tag = nil if @depth == 0
|
38
|
+
end
|
39
|
+
else
|
40
|
+
found_tag = false
|
41
|
+
@tags_to_remove.each_with_index do |tag_str,index|
|
42
|
+
if tag =~ opening_tag(tag_str)
|
43
|
+
@in_tag = index
|
44
|
+
@depth = 1
|
45
|
+
found_tag = true
|
46
|
+
break
|
47
|
+
elsif tag =~ single_tag(tag_str)
|
48
|
+
found_tag = true
|
49
|
+
break
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
@output.write "#{tag}\n" unless found_tag || tag.empty?
|
20
54
|
end
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
55
|
+
end
|
56
|
+
|
57
|
+
def opening_tag tag
|
58
|
+
/<#{tag}(\s|(\s+.+?=(".+?"|'.+?')))*?>/
|
59
|
+
end
|
60
|
+
|
61
|
+
def closing_tag tag
|
62
|
+
/<\/#{tag}\s*>/
|
63
|
+
end
|
64
|
+
|
65
|
+
def single_tag tag
|
66
|
+
/<#{tag}(\s|(\s+.+?=(".+?"|'.+?')))*?\/\s*>/
|
67
|
+
end
|
68
|
+
|
69
|
+
def each_tag &block
|
70
|
+
acc = ""
|
71
|
+
type = nil
|
72
|
+
|
73
|
+
@input.each_char do |c|
|
74
|
+
case c
|
75
|
+
when '<'
|
76
|
+
yield acc.strip, type if type == :text
|
77
|
+
type = :tag
|
78
|
+
acc = c
|
79
|
+
when '>'
|
80
|
+
acc += c
|
81
|
+
yield acc.strip, type
|
82
|
+
type = nil
|
83
|
+
acc = ""
|
84
|
+
else
|
85
|
+
type ||= :text
|
86
|
+
acc += c
|
32
87
|
end
|
33
88
|
end
|
34
|
-
|
89
|
+
|
90
|
+
yield acc.strip, type if type
|
35
91
|
end
|
36
|
-
|
92
|
+
end
|
37
93
|
|
38
|
-
|
39
|
-
|
40
|
-
output.close
|
41
|
-
end
|
94
|
+
def self.process input, output, opts = {}
|
95
|
+
Worker.new(input, output, opts).perform
|
42
96
|
end
|
43
97
|
|
44
98
|
end
|
data/lib/tag_remover/version.rb
CHANGED
@@ -3,72 +3,89 @@ require 'spec_helper'
|
|
3
3
|
describe TagRemover do
|
4
4
|
describe ".process" do
|
5
5
|
it "removes elements" do
|
6
|
-
input =
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
6
|
+
input = """
|
7
|
+
<root>
|
8
|
+
<remove>
|
9
|
+
Some contents
|
10
|
+
</remove>
|
11
|
+
<remove >
|
12
|
+
</remove >
|
13
|
+
</root>"""
|
14
|
+
|
15
15
|
output = StringIO.new
|
16
16
|
tags_to_remove = ['remove']
|
17
17
|
|
18
18
|
TagRemover.process input, output, remove_tags: tags_to_remove
|
19
19
|
|
20
|
-
expect(output.string).to eq """
|
21
|
-
|
22
|
-
|
23
|
-
"""
|
20
|
+
expect(output.string).to eq """<root>
|
21
|
+
</root>
|
22
|
+
"""
|
24
23
|
end
|
25
24
|
|
26
25
|
it "removes single tags" do
|
27
|
-
input = StringIO.new """
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
"""
|
26
|
+
input = StringIO.new """<root>
|
27
|
+
<remove/>
|
28
|
+
<remove />
|
29
|
+
<remove/ >
|
30
|
+
<remove / >
|
31
|
+
</root>
|
32
|
+
"""
|
35
33
|
output = StringIO.new
|
36
34
|
tags_to_remove = ['remove']
|
37
35
|
|
38
36
|
TagRemover.process input, output, remove_tags: tags_to_remove
|
39
37
|
|
40
|
-
expect(output.string).to eq """
|
41
|
-
|
42
|
-
|
43
|
-
"""
|
38
|
+
expect(output.string).to eq """<root>
|
39
|
+
</root>
|
40
|
+
"""
|
44
41
|
end
|
45
42
|
|
46
43
|
it "removes tags with attributes" do
|
47
|
-
input = StringIO.new '''
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
44
|
+
input = StringIO.new '''<root>
|
45
|
+
<remove x="y" a="b" >
|
46
|
+
</remove>
|
47
|
+
<remove x="y" a="b" />
|
48
|
+
</root>
|
49
|
+
'''
|
50
|
+
output = StringIO.new
|
51
|
+
tags_to_remove = ['remove']
|
52
|
+
|
53
|
+
TagRemover.process input, output, remove_tags: tags_to_remove
|
54
|
+
|
55
|
+
expect(output.string).to eq """<root>
|
56
|
+
</root>
|
57
|
+
"""
|
58
|
+
end
|
59
|
+
|
60
|
+
it "deals with multiple tags on one line" do
|
61
|
+
input = StringIO.new '''<root><remove>
|
62
|
+
</remove>
|
63
|
+
<remove/><keep>
|
64
|
+
</keep>
|
65
|
+
<remove>Bad Stuff!
|
66
|
+
</remove>Happy Stuff :)<keep/>
|
67
|
+
</root>
|
68
|
+
'''
|
54
69
|
output = StringIO.new
|
55
70
|
tags_to_remove = ['remove']
|
56
71
|
|
57
72
|
TagRemover.process input, output, remove_tags: tags_to_remove
|
58
73
|
|
59
|
-
expect(output.string).to eq """
|
60
|
-
|
61
|
-
|
62
|
-
|
74
|
+
expect(output.string).to eq """<root>
|
75
|
+
<keep>
|
76
|
+
</keep>
|
77
|
+
Happy Stuff :)
|
78
|
+
<keep/>
|
79
|
+
</root>
|
80
|
+
"""
|
63
81
|
end
|
64
82
|
|
65
83
|
it "keeps elements" do
|
66
|
-
input = StringIO.new """
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
"""
|
84
|
+
input = StringIO.new """<root>
|
85
|
+
<keep>
|
86
|
+
</keep>
|
87
|
+
</root>
|
88
|
+
"""
|
72
89
|
tags_to_remove = ['remove']
|
73
90
|
|
74
91
|
output = StringIO.new
|
@@ -79,10 +96,9 @@ describe TagRemover do
|
|
79
96
|
end
|
80
97
|
|
81
98
|
it "is ok with doing nothing" do
|
82
|
-
input = StringIO.new """
|
83
|
-
|
84
|
-
|
85
|
-
"""
|
99
|
+
input = StringIO.new """<root>
|
100
|
+
</root>
|
101
|
+
"""
|
86
102
|
output = StringIO.new
|
87
103
|
|
88
104
|
TagRemover.process input, output
|
@@ -91,23 +107,21 @@ describe TagRemover do
|
|
91
107
|
end
|
92
108
|
|
93
109
|
it "removes nested tags" do
|
94
|
-
input = StringIO.new """
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
"""
|
110
|
+
input = StringIO.new """<root>
|
111
|
+
<remove>
|
112
|
+
<remove>
|
113
|
+
</remove>
|
114
|
+
</remove>
|
115
|
+
</root>
|
116
|
+
"""
|
102
117
|
tags_to_remove = ['remove']
|
103
118
|
output = StringIO.new
|
104
119
|
|
105
120
|
TagRemover.process input, output, remove_tags: tags_to_remove
|
106
121
|
|
107
|
-
expect(output.string).to eq """
|
108
|
-
|
109
|
-
|
110
|
-
"""
|
122
|
+
expect(output.string).to eq """<root>
|
123
|
+
</root>
|
124
|
+
"""
|
111
125
|
end
|
112
126
|
|
113
127
|
it "closes the streams" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tag_remover
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Smith
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -72,7 +72,7 @@ files:
|
|
72
72
|
- lib/tag_remover.rb
|
73
73
|
- lib/tag_remover/version.rb
|
74
74
|
- spec/spec_helper.rb
|
75
|
-
- spec/
|
75
|
+
- spec/tag_remover_spec.rb
|
76
76
|
- tag_remover.gemspec
|
77
77
|
homepage: https://github.com/jellymann/tag_remover
|
78
78
|
licenses:
|
@@ -100,4 +100,4 @@ specification_version: 4
|
|
100
100
|
summary: Remove elements from large XML documents.
|
101
101
|
test_files:
|
102
102
|
- spec/spec_helper.rb
|
103
|
-
- spec/
|
103
|
+
- spec/tag_remover_spec.rb
|