tag_remover 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3f1f0436eab08ba2850e4b36e2450e59154ddfe7
4
+ data.tar.gz: 872a58291e7ff4973a1436d5b2699e7c06328ea9
5
+ SHA512:
6
+ metadata.gz: d5fbf8487101e5668118050531c040a44174de61c055386250d5aabf696ed947be0078713673fd8407f87716e8a1c203b342a15180c23a69e286a78e90dca037
7
+ data.tar.gz: a48ed26ae553d43d973975ffe35e7e090a92c4a38fff206d9d3bb87c57d9221a2026531a4726f3078a60bb0c224d0ccb66660b3ce4bb4c3dd7639306cabad04c
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ *.gem
15
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tag_remover.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Daniel Smith
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,49 @@
1
+ # TagRemover
2
+
3
+ Tag remover let's you remove all elements of specified tags from extremely large XML documents without parsing or loading the whole thing in memory, useful for processing unreasonably large documents without making your server fall over.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'tag_remover'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install tag_remover
20
+
21
+ ## Usage
22
+
23
+ The following line will read XML from `input_stream`, and write it out to `output_stream` with all `div` and `img` elements removed.
24
+
25
+ ```ruby
26
+ TagRemover.process input_stream, output_stream, remove_tags: ['div', 'img']
27
+ ```
28
+
29
+ Options include:
30
+
31
+ * `remove_tags`: List of tags to remove from the XML file.
32
+ * `close_streams`: (`true`|`false`) If set, TagRemover will close `input_stream` and `output_stream` once the proccess is over.
33
+ * [NOT IMPLEMENTED] `format`: (`true`|`false`) If set, then the contents of `output_stream` will be formatted.
34
+
35
+ TagRemover can be used from the command line with the `rmtags` command. The following is an example that reads input.xml and writes the output to output.xml, removing all `div` and `img` elements:
36
+
37
+ $ rmtags input.xml output.xml div img
38
+
39
+ ## Limitations
40
+
41
+ Tag remover currently only works correctly if the XML is formatted with only one tag per line.
42
+
43
+ ## Contributing
44
+
45
+ 1. Fork it ( https://github.com/[my-github-username]/tag_remover/fork )
46
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
47
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
48
+ 4. Push to the branch (`git push origin my-new-feature`)
49
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/bin/rmtags ADDED
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'tag_remover'
4
+
5
+ if ARGV.length < 3
6
+ puts "Usage: rmtags INPUT_FILE OUTPUT_FILE TAG [TAG, TAG, ...]"
7
+ exit 1
8
+ end
9
+
10
+ input_filename = ARGV[0] || 'input.xml'
11
+ output_filename = ARGV[1] || 'output.xml'
12
+
13
+ tags_to_remove = ARGV[2..-1]
14
+
15
+ input_file = File.open input_filename, 'r'
16
+ output_file = File.open output_filename, 'w'
17
+
18
+ TagRemover.process input_file, output_file,
19
+ remove_tags: tags_to_remove, close_streams: true
20
+
@@ -0,0 +1,44 @@
1
+ require "tag_remover/version"
2
+
3
+ module TagRemover
4
+
5
+ def self.process input, output, opts = {}
6
+ tags_to_remove = opts[:remove_tags] || []
7
+
8
+ in_tag = nil
9
+ depth = 0
10
+
11
+ input.each_line.each do |line|
12
+ if in_tag
13
+ in_tag_str = tags_to_remove[in_tag]
14
+
15
+ if line =~ /<#{in_tag_str}(\s|(\s+.+?=(".+?"|'.+?')))*?>/
16
+ depth += 1
17
+ elsif line =~ /<\/#{in_tag_str}\s*>/
18
+ depth -= 1
19
+ in_tag = nil if depth == 0
20
+ end
21
+ else
22
+ found_tag = false
23
+ tags_to_remove.each_with_index do |tag,index|
24
+ if line =~ /<#{tag}(\s|(\s+.+?=(".+?"|'.+?')))*?>/
25
+ in_tag = index
26
+ depth = 1
27
+ found_tag = true
28
+ break
29
+ elsif line =~ /<#{tag}(\s|(\s+.+?=(".+?"|'.+?')))*?\/\s*>/
30
+ found_tag = true
31
+ break
32
+ end
33
+ end
34
+ output.write line unless found_tag
35
+ end
36
+ end
37
+
38
+ if opts[:close_streams]
39
+ input.close
40
+ output.close
41
+ end
42
+ end
43
+
44
+ end
@@ -0,0 +1,3 @@
1
+ module TagRemover
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,3 @@
1
+ require 'tag_remover'
2
+
3
+
@@ -0,0 +1,123 @@
1
+ require 'spec_helper'
2
+
3
+ describe TagRemover do
4
+ describe ".process" do
5
+ it "removes elements" do
6
+ input = StringIO.new """
7
+ <root>
8
+ <remove>
9
+ Some contents
10
+ </remove>
11
+ <remove >
12
+ </remove >
13
+ </root>
14
+ """
15
+ output = StringIO.new
16
+ tags_to_remove = ['remove']
17
+
18
+ TagRemover.process input, output, remove_tags: tags_to_remove
19
+
20
+ expect(output.string).to eq """
21
+ <root>
22
+ </root>
23
+ """
24
+ end
25
+
26
+ it "removes single tags" do
27
+ input = StringIO.new """
28
+ <root>
29
+ <remove/>
30
+ <remove />
31
+ <remove/ >
32
+ <remove / >
33
+ </root>
34
+ """
35
+ output = StringIO.new
36
+ tags_to_remove = ['remove']
37
+
38
+ TagRemover.process input, output, remove_tags: tags_to_remove
39
+
40
+ expect(output.string).to eq """
41
+ <root>
42
+ </root>
43
+ """
44
+ end
45
+
46
+ it "removes tags with attributes" do
47
+ input = StringIO.new '''
48
+ <root>
49
+ <remove x="y" a="b" >
50
+ </remove>
51
+ <remove x="y" a="b" />
52
+ </root>
53
+ '''
54
+ output = StringIO.new
55
+ tags_to_remove = ['remove']
56
+
57
+ TagRemover.process input, output, remove_tags: tags_to_remove
58
+
59
+ expect(output.string).to eq """
60
+ <root>
61
+ </root>
62
+ """
63
+ end
64
+
65
+ it "keeps elements" do
66
+ input = StringIO.new """
67
+ <root>
68
+ <keep>
69
+ </keep>
70
+ </root>
71
+ """
72
+ tags_to_remove = ['remove']
73
+
74
+ output = StringIO.new
75
+
76
+ TagRemover.process input, output, remove_tags: tags_to_remove
77
+
78
+ expect(output.string).to eq input.string
79
+ end
80
+
81
+ it "is ok with doing nothing" do
82
+ input = StringIO.new """
83
+ <root>
84
+ </root>
85
+ """
86
+ output = StringIO.new
87
+
88
+ TagRemover.process input, output
89
+
90
+ expect(output.string).to eq input.string
91
+ end
92
+
93
+ it "removes nested tags" do
94
+ input = StringIO.new """
95
+ <root>
96
+ <remove>
97
+ <remove>
98
+ </remove>
99
+ </remove>
100
+ </root>
101
+ """
102
+ tags_to_remove = ['remove']
103
+ output = StringIO.new
104
+
105
+ TagRemover.process input, output, remove_tags: tags_to_remove
106
+
107
+ expect(output.string).to eq """
108
+ <root>
109
+ </root>
110
+ """
111
+ end
112
+
113
+ it "closes the streams" do
114
+ input = StringIO.new "<root></root>"
115
+ output = StringIO.new
116
+
117
+ TagRemover.process input, output, close_streams: true
118
+
119
+ expect(input).to be_closed
120
+ expect(output).to be_closed
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'tag_remover/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "tag_remover"
8
+ spec.version = TagRemover::VERSION
9
+ spec.authors = ["Daniel Smith"]
10
+ spec.email = ["jellymann@gmail.com"]
11
+ spec.summary = %q{Remove elements from large XML documents.}
12
+ spec.description = %q{Tag remover let's you remove all elements of specified tags from extremely large XML documents without parsing or loading the whole thing in memory, useful for processing unreasonably large documents without making your server fall over.}
13
+ spec.homepage = "https://github.com/jellymann/tag_remover"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3.1"
24
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tag_remover
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Daniel Smith
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-10-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.1'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.1'
55
+ description: Tag remover let's you remove all elements of specified tags from extremely
56
+ large XML documents without parsing or loading the whole thing in memory, useful
57
+ for processing unreasonably large documents without making your server fall over.
58
+ email:
59
+ - jellymann@gmail.com
60
+ executables:
61
+ - rmtags
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - ".gitignore"
66
+ - ".rspec"
67
+ - Gemfile
68
+ - LICENSE.txt
69
+ - README.md
70
+ - Rakefile
71
+ - bin/rmtags
72
+ - lib/tag_remover.rb
73
+ - lib/tag_remover/version.rb
74
+ - spec/spec_helper.rb
75
+ - spec/tag_helper_spec.rb
76
+ - tag_remover.gemspec
77
+ homepage: https://github.com/jellymann/tag_remover
78
+ licenses:
79
+ - MIT
80
+ metadata: {}
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.2.2
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: Remove elements from large XML documents.
101
+ test_files:
102
+ - spec/spec_helper.rb
103
+ - spec/tag_helper_spec.rb