tag_remover 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3f1f0436eab08ba2850e4b36e2450e59154ddfe7
4
- data.tar.gz: 872a58291e7ff4973a1436d5b2699e7c06328ea9
3
+ metadata.gz: e5782c529dc1431ea66b7f8c1f23b986b7898c24
4
+ data.tar.gz: 151094b85405708da5f121e724329742c2b2c44b
5
5
  SHA512:
6
- metadata.gz: d5fbf8487101e5668118050531c040a44174de61c055386250d5aabf696ed947be0078713673fd8407f87716e8a1c203b342a15180c23a69e286a78e90dca037
7
- data.tar.gz: a48ed26ae553d43d973975ffe35e7e090a92c4a38fff206d9d3bb87c57d9221a2026531a4726f3078a60bb0c224d0ccb66660b3ce4bb4c3dd7639306cabad04c
6
+ metadata.gz: 061fae588623198bd1be710c32d65e4ea983934d2d7ef0b2a7d698f29e0b95074501071c98cc128a591dd35db69eb48feebb7c891a3e028f8597a19526a4196b
7
+ data.tar.gz: 63afd88c539c6c24bbce653c79ada29977cf18340ced56de75baef26940d1dfb51e0a74dc5957c90933a3405ce43016a5d1912c84b17ae3fbc0281968396f3c9
data/lib/tag_remover.rb CHANGED
@@ -2,43 +2,97 @@ require "tag_remover/version"
2
2
 
3
3
  module TagRemover
4
4
 
5
- def self.process input, output, opts = {}
6
- tags_to_remove = opts[:remove_tags] || []
5
+ class Worker
6
+ def initialize input, output, opts
7
+ @input = input
8
+ @output = output
9
+ @opts = opts
10
+ end
7
11
 
8
- in_tag = nil
9
- depth = 0
12
+ def perform
13
+ @tags_to_remove = @opts[:remove_tags] || []
10
14
 
11
- input.each_line.each do |line|
12
- if in_tag
13
- in_tag_str = tags_to_remove[in_tag]
15
+ @in_tag = nil
16
+ @depth = 0
17
+
18
+ each_tag do |tag, type|
19
+ process_tag tag, type
20
+ end
21
+
22
+ if @opts[:close_streams]
23
+ @input.close
24
+ @output.close
25
+ end
26
+ end
14
27
 
15
- if line =~ /<#{in_tag_str}(\s|(\s+.+?=(".+?"|'.+?')))*?>/
16
- depth += 1
17
- elsif line =~ /<\/#{in_tag_str}\s*>/
18
- depth -= 1
19
- in_tag = nil if depth == 0
28
+ private
29
+ def process_tag tag, type
30
+ if @in_tag
31
+ in_tag_str = @tags_to_remove[@in_tag]
32
+
33
+ if tag =~ opening_tag(in_tag_str)
34
+ @depth += 1
35
+ elsif tag =~ closing_tag(in_tag_str)
36
+ @depth -= 1
37
+ @in_tag = nil if @depth == 0
38
+ end
39
+ else
40
+ found_tag = false
41
+ @tags_to_remove.each_with_index do |tag_str,index|
42
+ if tag =~ opening_tag(tag_str)
43
+ @in_tag = index
44
+ @depth = 1
45
+ found_tag = true
46
+ break
47
+ elsif tag =~ single_tag(tag_str)
48
+ found_tag = true
49
+ break
50
+ end
51
+ end
52
+
53
+ @output.write "#{tag}\n" unless found_tag || tag.empty?
20
54
  end
21
- else
22
- found_tag = false
23
- tags_to_remove.each_with_index do |tag,index|
24
- if line =~ /<#{tag}(\s|(\s+.+?=(".+?"|'.+?')))*?>/
25
- in_tag = index
26
- depth = 1
27
- found_tag = true
28
- break
29
- elsif line =~ /<#{tag}(\s|(\s+.+?=(".+?"|'.+?')))*?\/\s*>/
30
- found_tag = true
31
- break
55
+ end
56
+
57
+ def opening_tag tag
58
+ /<#{tag}(\s|(\s+.+?=(".+?"|'.+?')))*?>/
59
+ end
60
+
61
+ def closing_tag tag
62
+ /<\/#{tag}\s*>/
63
+ end
64
+
65
+ def single_tag tag
66
+ /<#{tag}(\s|(\s+.+?=(".+?"|'.+?')))*?\/\s*>/
67
+ end
68
+
69
+ def each_tag &block
70
+ acc = ""
71
+ type = nil
72
+
73
+ @input.each_char do |c|
74
+ case c
75
+ when '<'
76
+ yield acc.strip, type if type == :text
77
+ type = :tag
78
+ acc = c
79
+ when '>'
80
+ acc += c
81
+ yield acc.strip, type
82
+ type = nil
83
+ acc = ""
84
+ else
85
+ type ||= :text
86
+ acc += c
32
87
  end
33
88
  end
34
- output.write line unless found_tag
89
+
90
+ yield acc.strip, type if type
35
91
  end
36
- end
92
+ end
37
93
 
38
- if opts[:close_streams]
39
- input.close
40
- output.close
41
- end
94
+ def self.process input, output, opts = {}
95
+ Worker.new(input, output, opts).perform
42
96
  end
43
97
 
44
98
  end
@@ -1,3 +1,3 @@
1
1
  module TagRemover
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -3,72 +3,89 @@ require 'spec_helper'
3
3
  describe TagRemover do
4
4
  describe ".process" do
5
5
  it "removes elements" do
6
- input = StringIO.new """
7
- <root>
8
- <remove>
9
- Some contents
10
- </remove>
11
- <remove >
12
- </remove >
13
- </root>
14
- """
6
+ input = """
7
+ <root>
8
+ <remove>
9
+ Some contents
10
+ </remove>
11
+ <remove >
12
+ </remove >
13
+ </root>"""
14
+
15
15
  output = StringIO.new
16
16
  tags_to_remove = ['remove']
17
17
 
18
18
  TagRemover.process input, output, remove_tags: tags_to_remove
19
19
 
20
- expect(output.string).to eq """
21
- <root>
22
- </root>
23
- """
20
+ expect(output.string).to eq """<root>
21
+ </root>
22
+ """
24
23
  end
25
24
 
26
25
  it "removes single tags" do
27
- input = StringIO.new """
28
- <root>
29
- <remove/>
30
- <remove />
31
- <remove/ >
32
- <remove / >
33
- </root>
34
- """
26
+ input = StringIO.new """<root>
27
+ <remove/>
28
+ <remove />
29
+ <remove/ >
30
+ <remove / >
31
+ </root>
32
+ """
35
33
  output = StringIO.new
36
34
  tags_to_remove = ['remove']
37
35
 
38
36
  TagRemover.process input, output, remove_tags: tags_to_remove
39
37
 
40
- expect(output.string).to eq """
41
- <root>
42
- </root>
43
- """
38
+ expect(output.string).to eq """<root>
39
+ </root>
40
+ """
44
41
  end
45
42
 
46
43
  it "removes tags with attributes" do
47
- input = StringIO.new '''
48
- <root>
49
- <remove x="y" a="b" >
50
- </remove>
51
- <remove x="y" a="b" />
52
- </root>
53
- '''
44
+ input = StringIO.new '''<root>
45
+ <remove x="y" a="b" >
46
+ </remove>
47
+ <remove x="y" a="b" />
48
+ </root>
49
+ '''
50
+ output = StringIO.new
51
+ tags_to_remove = ['remove']
52
+
53
+ TagRemover.process input, output, remove_tags: tags_to_remove
54
+
55
+ expect(output.string).to eq """<root>
56
+ </root>
57
+ """
58
+ end
59
+
60
+ it "deals with multiple tags on one line" do
61
+ input = StringIO.new '''<root><remove>
62
+ </remove>
63
+ <remove/><keep>
64
+ </keep>
65
+ <remove>Bad Stuff!
66
+ </remove>Happy Stuff :)<keep/>
67
+ </root>
68
+ '''
54
69
  output = StringIO.new
55
70
  tags_to_remove = ['remove']
56
71
 
57
72
  TagRemover.process input, output, remove_tags: tags_to_remove
58
73
 
59
- expect(output.string).to eq """
60
- <root>
61
- </root>
62
- """
74
+ expect(output.string).to eq """<root>
75
+ <keep>
76
+ </keep>
77
+ Happy Stuff :)
78
+ <keep/>
79
+ </root>
80
+ """
63
81
  end
64
82
 
65
83
  it "keeps elements" do
66
- input = StringIO.new """
67
- <root>
68
- <keep>
69
- </keep>
70
- </root>
71
- """
84
+ input = StringIO.new """<root>
85
+ <keep>
86
+ </keep>
87
+ </root>
88
+ """
72
89
  tags_to_remove = ['remove']
73
90
 
74
91
  output = StringIO.new
@@ -79,10 +96,9 @@ describe TagRemover do
79
96
  end
80
97
 
81
98
  it "is ok with doing nothing" do
82
- input = StringIO.new """
83
- <root>
84
- </root>
85
- """
99
+ input = StringIO.new """<root>
100
+ </root>
101
+ """
86
102
  output = StringIO.new
87
103
 
88
104
  TagRemover.process input, output
@@ -91,23 +107,21 @@ describe TagRemover do
91
107
  end
92
108
 
93
109
  it "removes nested tags" do
94
- input = StringIO.new """
95
- <root>
96
- <remove>
97
- <remove>
98
- </remove>
99
- </remove>
100
- </root>
101
- """
110
+ input = StringIO.new """<root>
111
+ <remove>
112
+ <remove>
113
+ </remove>
114
+ </remove>
115
+ </root>
116
+ """
102
117
  tags_to_remove = ['remove']
103
118
  output = StringIO.new
104
119
 
105
120
  TagRemover.process input, output, remove_tags: tags_to_remove
106
121
 
107
- expect(output.string).to eq """
108
- <root>
109
- </root>
110
- """
122
+ expect(output.string).to eq """<root>
123
+ </root>
124
+ """
111
125
  end
112
126
 
113
127
  it "closes the streams" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tag_remover
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Smith
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-27 00:00:00.000000000 Z
11
+ date: 2014-10-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -72,7 +72,7 @@ files:
72
72
  - lib/tag_remover.rb
73
73
  - lib/tag_remover/version.rb
74
74
  - spec/spec_helper.rb
75
- - spec/tag_helper_spec.rb
75
+ - spec/tag_remover_spec.rb
76
76
  - tag_remover.gemspec
77
77
  homepage: https://github.com/jellymann/tag_remover
78
78
  licenses:
@@ -100,4 +100,4 @@ specification_version: 4
100
100
  summary: Remove elements from large XML documents.
101
101
  test_files:
102
102
  - spec/spec_helper.rb
103
- - spec/tag_helper_spec.rb
103
+ - spec/tag_remover_spec.rb