onigiri 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/Gemfile +4 -0
- data/README.md +6 -0
- data/Rakefile +1 -0
- data/autotest/discover.rb +1 -0
- data/lib/onigiri.rb +51 -0
- data/lib/onigiri/drop_empty_paras.rb +13 -0
- data/lib/onigiri/enclose_block_text.rb +21 -0
- data/lib/onigiri/enclose_text.rb +19 -0
- data/lib/onigiri/fix_backslash.rb +16 -0
- data/lib/onigiri/hide_comments.rb +11 -0
- data/lib/onigiri/merge_by_tag.rb +60 -0
- data/lib/onigiri/show_body_only.rb +13 -0
- data/lib/onigiri/version.rb +3 -0
- data/onigiri.gemspec +23 -0
- data/spec/onigiri_spec.rb +214 -0
- data/spec/spec_helper.rb +5 -0
- metadata +99 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
@@ -0,0 +1 @@
|
|
1
|
+
Autotest.add_discovery { "rspec2" }
|
data/lib/onigiri.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "nokogiri"
|
3
|
+
|
4
|
+
module Onigiri
|
5
|
+
extend self
|
6
|
+
@@registry ||= {}
|
7
|
+
|
8
|
+
class OnigiriHandlerTaken < StandardError
|
9
|
+
def description
|
10
|
+
"There was an attempt to override registered handler. This usually indicates a bug in Onigiri."
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def clean(data, *params)
|
15
|
+
dupe = Onigiri::Document.parse data
|
16
|
+
params.flatten.each do |method|
|
17
|
+
dupe = dupe.send(method) if @@registry[method]
|
18
|
+
end
|
19
|
+
dupe.to_html
|
20
|
+
end
|
21
|
+
|
22
|
+
class Document < Nokogiri::HTML::DocumentFragment
|
23
|
+
class << self
|
24
|
+
def parse(tags)
|
25
|
+
# Remove formatting whitespaces
|
26
|
+
# Those do not represent any data while messing up the tree
|
27
|
+
tags = tags.gsub(/(\r|\n)/, '').gsub(/> *</, '><') if tags.respond_to? :gsub
|
28
|
+
super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def register_handler(name)
|
36
|
+
unless @@registry[name]
|
37
|
+
@@registry[name] = true
|
38
|
+
else
|
39
|
+
raise OnigiriHandlerTaken
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
require "onigiri/drop_empty_paras"
|
46
|
+
require "onigiri/enclose_block_text"
|
47
|
+
require "onigiri/enclose_text"
|
48
|
+
require "onigiri/fix_backslash"
|
49
|
+
require "onigiri/show_body_only"
|
50
|
+
require "onigiri/merge_by_tag"
|
51
|
+
require "onigiri/hide_comments"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Onigiri
|
3
|
+
register_handler :enclose_block_text
|
4
|
+
class Document
|
5
|
+
def enclose_block_text
|
6
|
+
dupe = dup
|
7
|
+
strict_tags = {"noscript" => 1, "form" => 1, "blockquote" => 1}
|
8
|
+
dupe.traverse do |elem|
|
9
|
+
if strict_tags[elem.name]
|
10
|
+
elem.children.each do |target|
|
11
|
+
if target.text?
|
12
|
+
target.add_previous_sibling "<p>#{target.content.strip}</p>"
|
13
|
+
target.unlink
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
dupe
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Onigiri
|
3
|
+
register_handler :enclose_text
|
4
|
+
class Document
|
5
|
+
def enclose_text
|
6
|
+
dupe = dup
|
7
|
+
wrapper = Onigiri::Document.parse('<p>').child
|
8
|
+
body = dupe.css('body').children
|
9
|
+
body = dupe.children if body.empty?
|
10
|
+
body.each do |target|
|
11
|
+
if target.parent && (target.text? || target.description.inline?)
|
12
|
+
wrap = target.add_previous_sibling(wrapper)
|
13
|
+
wrap << target.unlink
|
14
|
+
end
|
15
|
+
end
|
16
|
+
dupe
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Onigiri
|
3
|
+
register_handler :fix_backslash
|
4
|
+
class Document
|
5
|
+
def fix_backslash
|
6
|
+
dupe = dup
|
7
|
+
attrset = ['src', 'longdesc', 'href', 'action']
|
8
|
+
dupe.css("[#{attrset.join('], [')}]").each do |target|
|
9
|
+
attrset.each do |attr|
|
10
|
+
target[attr] = target[attr].gsub("\\", "/") if target[attr]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
dupe
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Onigiri
|
3
|
+
register_handler :merge_divs
|
4
|
+
register_handler :merge_spans
|
5
|
+
class Document
|
6
|
+
|
7
|
+
def merge_divs
|
8
|
+
self.merge_by_tag('div')
|
9
|
+
end
|
10
|
+
|
11
|
+
def merge_spans
|
12
|
+
self.merge_by_tag('span')
|
13
|
+
end
|
14
|
+
|
15
|
+
# This is going to be ugly
|
16
|
+
def merge_by_tag(tag_name)
|
17
|
+
dupe = dup
|
18
|
+
# First pass. Finding deepest <div>s that require merging upwards.
|
19
|
+
mergers = dupe.find_merger_elements(tag_name)
|
20
|
+
# Second pass. Traverse tree upwards from each merger <div> gathering attributes on our way
|
21
|
+
mergers.each do |merger|
|
22
|
+
data = singular_upverse(merger)
|
23
|
+
merger.children.each do |survivor|
|
24
|
+
data['root'] << survivor
|
25
|
+
end
|
26
|
+
data['deletion_node'].remove
|
27
|
+
data['root']['class'] = data['class'] if data['class']
|
28
|
+
data['root']['style'] = data['style'] if data['style']
|
29
|
+
end
|
30
|
+
dupe
|
31
|
+
end
|
32
|
+
|
33
|
+
def find_merger_elements(tag_name)
|
34
|
+
result = []
|
35
|
+
self.css(tag_name).each do |elem|
|
36
|
+
# !(node.next_sibling || node.previous_sibling) vs. node.parent.children.size
|
37
|
+
result << elem if elem.parent.children.size == 1 && elem.parent.name == tag_name
|
38
|
+
end
|
39
|
+
result
|
40
|
+
end
|
41
|
+
|
42
|
+
def singular_upverse(node)
|
43
|
+
if node.parent.name == node.name && !(node.next_sibling || node.previous_sibling)
|
44
|
+
data = singular_upverse(node.parent)
|
45
|
+
# If we got root node we should set a deletion point for root.
|
46
|
+
# If we have a deletion point - no need to reset it.
|
47
|
+
data['deletion_node'] ||= node if data['root']
|
48
|
+
else
|
49
|
+
data = Hash.new
|
50
|
+
data['root'] = node
|
51
|
+
end
|
52
|
+
|
53
|
+
# Ensuring uglyness
|
54
|
+
data['style'] ? (data['style'] += " #{node['style']}" if node['style']) : data['style'] = node['style']
|
55
|
+
data['class'] ? (data['class'] += " #{node['class']}" if node['class']) : data['class'] = node['class']
|
56
|
+
|
57
|
+
data
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Onigiri
|
3
|
+
register_handler :show_body_only
|
4
|
+
class Document
|
5
|
+
def show_body_only
|
6
|
+
dupe = self.css('body').empty? ? dup : Onigiri::Document.parse("")
|
7
|
+
self.css('body').children.each do |child|
|
8
|
+
dupe << child
|
9
|
+
end
|
10
|
+
dupe
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/onigiri.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "onigiri/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "onigiri"
|
7
|
+
s.version = Onigiri::VERSION
|
8
|
+
s.authors = ["Dmitrii Soltis"]
|
9
|
+
s.email = ["slotos@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{Attempt to replicate (at least some) functions of tidy utility}
|
12
|
+
s.description = %q{This gem is supposed to replace a tidy-ext in one of our projects. Tidy-ext has nasty memory leaks, tends to crash and is incompatible with Ruby 1.9. So here I am trying to use a japanese saw to make some rice balls.}
|
13
|
+
s.date = "2011-07-18"
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ["lib"]
|
19
|
+
|
20
|
+
s.add_dependency "nokogiri"
|
21
|
+
s.add_development_dependency "rspec", ">= 2.0.0"
|
22
|
+
s.add_development_dependency "autotest"
|
23
|
+
end
|
@@ -0,0 +1,214 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
include Onigiri
|
4
|
+
|
5
|
+
describe Onigiri do
|
6
|
+
it 'should throw exception when registering two handlers with the same name' do
|
7
|
+
lambda do
|
8
|
+
module Onigiri
|
9
|
+
register_handler :drop_empty_paras
|
10
|
+
end
|
11
|
+
end.should raise_error(OnigiriHandlerTaken)
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should define "drop_empty_paras" method that removes empty paragraphs from argument string' do
|
15
|
+
input = 'this is text with <p>some </p><p></p> <p>emptyness inside</p>'
|
16
|
+
expectation = 'this is text with <p>some </p><p>emptyness inside</p>'
|
17
|
+
Onigiri::clean(input, :drop_empty_paras).should == expectation
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should define "enclose_block_text" method that encloses any text inside <form>, <blockquote>, <noscript> in <p> tag with trimmed spaces' do
|
21
|
+
input = '<NOSCRIPT>hello <form>there <blockquote>pretty</blockquote> world</form></NOSCRIPT>'
|
22
|
+
expectation = '<noscript><p>hello</p><form><p>there</p><blockquote><p>pretty</p></blockquote><p>world</p></form></noscript>'
|
23
|
+
Onigiri::clean(input, :enclose_block_text).gsub(/(\r|\n| )/, '').should == expectation
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should define "enclose_text" method that encloses any text in <body> using <p> tag' do
|
27
|
+
input = '<body>some text<form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div></body>'
|
28
|
+
expectation = '<body><p>some text</p><form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div></body>'
|
29
|
+
Onigiri::clean(input, :enclose_text).gsub(/(\r|\n)/, '').gsub(/> *</, '><').should == expectation
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should wrap inline elements with "enclose_text" method' do
|
33
|
+
input = '<body><span>some inline text</span><form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div></body>'
|
34
|
+
expectation = '<body><p><span>some inline text</span></p><form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div></body>'
|
35
|
+
Onigiri::clean(input, :enclose_text).gsub(/(\r|\n)/, '').gsub(/> *</, '><').should == expectation
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'should work with root element when "enclose_text" method was called on <body>less fragment' do
|
39
|
+
input = '<span>some inline text</span><form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div>'
|
40
|
+
expectation = '<p><span>some inline text</span></p><form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div>'
|
41
|
+
Onigiri::clean(input, :enclose_text).gsub(/(\r|\n)/, '').gsub(/> *</, '><').should == expectation
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'should work around issue #407 (https://github.com/tenderlove/nokogiri/issues/407)' do
|
45
|
+
input = "<p class='red'><span class='capital'>о</span>тветственный редактор легендарного журнала The Strand, на страницах которого впервые увидели свет рассказы Артура Конан Дойля, Эдит Несбит, Агаты Кристи и Редьярда Киплинга\r\n– Эндрю Ф. Гулли обнаружил 15 ранее не публиковавшихся рассказов классика детектива – Дэшила Хэммета.</p>\nСвое открытие Гулли сделал, изучая онлайн-библиотеку и архивы центра Рэнсома при Техасском университете. «Я знал, что Лиллиан Хеллман, бывшая в тесных отношениях с Хэмметом, передала Центру большое количество его бумаг, и надеялся найти среди них что-то интересное, - рассказывает редактор The Strand. – Каково же было мое удивление, когда после тщательной проверки, на которую ушло около 100 часов работы, я обнаружил 15 совершенно новых, неизвестных читателю рассказов»\r\n\r\n<div>\r\n<br/></div>\r\n\r\n<div>Среди рассказов есть как детективы, так и «психологические» истории. Неизвестно, почему автор отказался публиковать их при жизни, ведь, по словам Эндрю Гулли, написаны они идеально: «Некоторые писатели неспособны адекватно оценивать свои произведения: им кажется, что они недоработаны или неталантливо написаны. Возможно, Хэммет тоже так посчитал, потому что, несмотря на отличный слог и композицию, найденные его рассказы отличаются от «традиционных» его произведений. Хотя как раз это и поможет читателям оценитьмногогранность таланта автора».</div>\r\n\r\n<div>\r\n<br/></div>\r\n\r\n<div>Вначале рассказы выйдут в журнале The Strand, а потом будут изданы отдельной книгой. Ранее в The Strand были напечатаны неизвестные произведения других классиков литературы - Марка Твена, П.Дж. Вудхауза, Агаты Кристи и Грэма Грина.</div>"
|
46
|
+
pending("Rspec itself leads to `#<NoMethodError: undefined method `call' for #<String>>` error when Onigiri returns string (as it should).\r\nStill useful for testing tho.")
|
47
|
+
Onigiri.clean(input, :enclose_text).should_not raise_error
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'should define "fix_backslash" method that fixes "\" for "/" in urls' do
|
51
|
+
it 'is fixing href attributes' do
|
52
|
+
input = '<a href="http:\\\\google.com/">http:\\\\google.com/</a><link rel="stylesheet" type="text/css" href="http:\\\\bing.com\\">'
|
53
|
+
expectation = '<a href="http://google.com/">http:\\\\google.com/</a><link rel="stylesheet" type="text/css" href="http://bing.com/">'
|
54
|
+
Onigiri::clean(input, :fix_backslash).should == expectation
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'is fixing src attributes' do
|
58
|
+
input = '<img src="http:\\/imagehosting.com/3\\image.png">'
|
59
|
+
expectation = '<img src="http://imagehosting.com/3/image.png">'
|
60
|
+
Onigiri::clean(input, :fix_backslash).should == expectation
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'is fixing longdesc attributes' do
|
64
|
+
input = '<img src="http://imagehosting.com/3/image.png" longdesc="http:\\/alt.com\\desc.txt">'
|
65
|
+
expectation = '<img src="http://imagehosting.com/3/image.png" longdesc="http://alt.com/desc.txt">'
|
66
|
+
Onigiri::clean(input, :fix_backslash).should == expectation
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'is fixing form action attributes' do
|
70
|
+
input = '<form action="\\application.php">\\application.php</form>'
|
71
|
+
expectation = '<form action="/application.php">\\application.php</form>'
|
72
|
+
Onigiri::clean(input, :fix_backslash).should == expectation
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'should all work together' do
|
76
|
+
input = '<a href="http:\\\\google.com/">link</a><link rel="stylesheet" type="text/css" href="http:\\\\bing.com\\"><img src="http://imagehosting.com\\3/image.png" longdesc="http:\\/alt.com\\desc.txt"><form action="\\application.php">русский текст</form>'
|
77
|
+
expectation = '<a href="http://google.com/">link</a><link rel="stylesheet" type="text/css" href="http://bing.com/"><img src="http://imagehosting.com/3/image.png" longdesc="http://alt.com/desc.txt"><form action="/application.php">русский текст</form>'
|
78
|
+
Onigiri::clean(input, :fix_backslash).should == expectation
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'should provide a "show_body_only" method that extracts contents of a <body> element for incorporation' do
|
83
|
+
input = '<body>some text<form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div></body>'
|
84
|
+
expectation = 'some text<form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div>'
|
85
|
+
Onigiri::clean(input, :show_body_only).gsub(/(\r|\n)/, '').gsub(/> *</, '><').should == expectation
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'should not do anything with "show_body_only" if there is not body' do
|
89
|
+
input = 'some text<form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div>'
|
90
|
+
expectation = 'some text<form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div>'
|
91
|
+
Onigiri::clean(input, :show_body_only).gsub(/(\r|\n)/, '').gsub(/> *</, '><').should == expectation
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'should provide a "merge_divs" method that will merge nested <div> such as "<div><div>...</div></div>" into top-level div discarding inner <div>s attributes except for "class" and "style"' do
|
95
|
+
input = <<HTML
|
96
|
+
<div class="first">
|
97
|
+
<div class="top">
|
98
|
+
<div id ="!hoho" class="test">
|
99
|
+
<div data-remote="true" style="color: black;" class="tost">
|
100
|
+
<p>data</p>
|
101
|
+
<div>
|
102
|
+
<div class="yopo">
|
103
|
+
another text
|
104
|
+
</div>
|
105
|
+
</div>
|
106
|
+
</div>
|
107
|
+
</div>
|
108
|
+
</div>
|
109
|
+
</div>
|
110
|
+
HTML
|
111
|
+
expectation = <<HTML
|
112
|
+
<div class="first top test tost" style="color: black;">
|
113
|
+
<p>data</p>
|
114
|
+
<div class="yopo">another text</div>
|
115
|
+
</div>
|
116
|
+
HTML
|
117
|
+
Onigiri::clean(input, :merge_divs).gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2').should == expectation.gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2')
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'should provide a "merge_spans" method that replicates "merge_divs" for <span> tag' do
|
121
|
+
input = <<HTML
|
122
|
+
<span class="first">
|
123
|
+
<span class="top">
|
124
|
+
<span id ="!hoho" class="test">
|
125
|
+
<span data-remote="true" style="color: black;" class="tost">
|
126
|
+
data
|
127
|
+
<span>
|
128
|
+
<span class="yopo">
|
129
|
+
another text
|
130
|
+
</span>
|
131
|
+
</span>
|
132
|
+
</span>
|
133
|
+
</span>
|
134
|
+
</span>
|
135
|
+
</span>
|
136
|
+
HTML
|
137
|
+
expectation = <<HTML
|
138
|
+
<span class="first top test tost" style="color: black;">
|
139
|
+
data
|
140
|
+
<span class="yopo">another text</span>
|
141
|
+
</span>
|
142
|
+
HTML
|
143
|
+
Onigiri::clean(input, :merge_spans).gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2').should == expectation.gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2')
|
144
|
+
end
|
145
|
+
|
146
|
+
it 'should provide "hide_comments" method that will remove all comments from the string' do
|
147
|
+
input = <<HTML
|
148
|
+
<span class="first">
|
149
|
+
<span class="top">
|
150
|
+
<span id="!hoho" class="test">
|
151
|
+
<!------ hello world! -->
|
152
|
+
<span data-remote="true" style="color: black;" class="tost">
|
153
|
+
data
|
154
|
+
<span>
|
155
|
+
<span class="yopo">
|
156
|
+
another text
|
157
|
+
</span>
|
158
|
+
</span>
|
159
|
+
</span>
|
160
|
+
<!-- another comment -->
|
161
|
+
</span>
|
162
|
+
</span>
|
163
|
+
</span>
|
164
|
+
HTML
|
165
|
+
expectation = <<HTML
|
166
|
+
<span class="first">
|
167
|
+
<span class="top">
|
168
|
+
<span id="!hoho" class="test">
|
169
|
+
<span data-remote="true" style="color: black;" class="tost">
|
170
|
+
data
|
171
|
+
<span>
|
172
|
+
<span class="yopo">
|
173
|
+
another text
|
174
|
+
</span>
|
175
|
+
</span>
|
176
|
+
</span>
|
177
|
+
</span>
|
178
|
+
</span>
|
179
|
+
</span>
|
180
|
+
HTML
|
181
|
+
Onigiri::clean(input, :hide_comments).gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2').should == expectation.gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2')
|
182
|
+
end
|
183
|
+
|
184
|
+
# Noted pending jobs.
|
185
|
+
it 'should provide a "automerge_divs" method that will merge nested <div> such as "<div><div>...</div></div>" into top-level div moving inner <div>s attributes into outer one; however it shouldnt merge together <div>s that have valid id attributes (id attribute serves as a down-top merge breakpoint)' do
|
186
|
+
input = <<HTML
|
187
|
+
<div class="first">
|
188
|
+
<div class="top" id="fff">
|
189
|
+
<div id ="!hoho" class="test">
|
190
|
+
<div data-remote="true" style="color: black;" class="tost">
|
191
|
+
<p>data</p>
|
192
|
+
<div>
|
193
|
+
<div id="yopo">
|
194
|
+
another text
|
195
|
+
</div>
|
196
|
+
</div>
|
197
|
+
</div>
|
198
|
+
</div>
|
199
|
+
</div>
|
200
|
+
</div>
|
201
|
+
HTML
|
202
|
+
expectation = <<HTML
|
203
|
+
<div class="first top" id="fff">
|
204
|
+
<div id="!hoho" class="test tost c1" data-remote="true">
|
205
|
+
<p>data</p>
|
206
|
+
<div id="yopo">another text</div>
|
207
|
+
</div>
|
208
|
+
</div>
|
209
|
+
HTML
|
210
|
+
pending('Noted the difference between merge-divs: yes/auto, but the latter one doesn\'t get priority')
|
211
|
+
end
|
212
|
+
|
213
|
+
it 'should provide "drop-proprietary-attributes" method that will drop all attributes, not defined in W3C standard or applied to mismatched element'
|
214
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: onigiri
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.6
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Dmitrii Soltis
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-07-18 00:00:00.000000000 +03:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
requirement: &72106630 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *72106630
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rspec
|
28
|
+
requirement: &72106380 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.0.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *72106380
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: autotest
|
39
|
+
requirement: &72106170 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
type: :development
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *72106170
|
48
|
+
description: This gem is supposed to replace a tidy-ext in one of our projects. Tidy-ext
|
49
|
+
has nasty memory leaks, tends to crash and is incompatible with Ruby 1.9. So here
|
50
|
+
I am trying to use a japanese saw to make some rice balls.
|
51
|
+
email:
|
52
|
+
- slotos@gmail.com
|
53
|
+
executables: []
|
54
|
+
extensions: []
|
55
|
+
extra_rdoc_files: []
|
56
|
+
files:
|
57
|
+
- .gitignore
|
58
|
+
- Gemfile
|
59
|
+
- README.md
|
60
|
+
- Rakefile
|
61
|
+
- autotest/discover.rb
|
62
|
+
- lib/onigiri.rb
|
63
|
+
- lib/onigiri/drop_empty_paras.rb
|
64
|
+
- lib/onigiri/enclose_block_text.rb
|
65
|
+
- lib/onigiri/enclose_text.rb
|
66
|
+
- lib/onigiri/fix_backslash.rb
|
67
|
+
- lib/onigiri/hide_comments.rb
|
68
|
+
- lib/onigiri/merge_by_tag.rb
|
69
|
+
- lib/onigiri/show_body_only.rb
|
70
|
+
- lib/onigiri/version.rb
|
71
|
+
- onigiri.gemspec
|
72
|
+
- spec/onigiri_spec.rb
|
73
|
+
- spec/spec_helper.rb
|
74
|
+
has_rdoc: true
|
75
|
+
homepage: ''
|
76
|
+
licenses: []
|
77
|
+
post_install_message:
|
78
|
+
rdoc_options: []
|
79
|
+
require_paths:
|
80
|
+
- lib
|
81
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ! '>='
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0'
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
requirements: []
|
94
|
+
rubyforge_project:
|
95
|
+
rubygems_version: 1.6.2
|
96
|
+
signing_key:
|
97
|
+
specification_version: 3
|
98
|
+
summary: Attempt to replicate (at least some) functions of tidy utility
|
99
|
+
test_files: []
|