onigiri 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/Gemfile +4 -0
- data/README.md +6 -0
- data/Rakefile +1 -0
- data/autotest/discover.rb +1 -0
- data/lib/onigiri.rb +51 -0
- data/lib/onigiri/drop_empty_paras.rb +13 -0
- data/lib/onigiri/enclose_block_text.rb +21 -0
- data/lib/onigiri/enclose_text.rb +19 -0
- data/lib/onigiri/fix_backslash.rb +16 -0
- data/lib/onigiri/hide_comments.rb +11 -0
- data/lib/onigiri/merge_by_tag.rb +60 -0
- data/lib/onigiri/show_body_only.rb +13 -0
- data/lib/onigiri/version.rb +3 -0
- data/onigiri.gemspec +23 -0
- data/spec/onigiri_spec.rb +214 -0
- data/spec/spec_helper.rb +5 -0
- metadata +99 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
@@ -0,0 +1 @@
|
|
1
|
+
Autotest.add_discovery { "rspec2" }
|
data/lib/onigiri.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "nokogiri"
|
3
|
+
|
4
|
+
module Onigiri
|
5
|
+
extend self
|
6
|
+
@@registry ||= {}
|
7
|
+
|
8
|
+
class OnigiriHandlerTaken < StandardError
|
9
|
+
def description
|
10
|
+
"There was an attempt to override registered handler. This usually indicates a bug in Onigiri."
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def clean(data, *params)
|
15
|
+
dupe = Onigiri::Document.parse data
|
16
|
+
params.flatten.each do |method|
|
17
|
+
dupe = dupe.send(method) if @@registry[method]
|
18
|
+
end
|
19
|
+
dupe.to_html
|
20
|
+
end
|
21
|
+
|
22
|
+
class Document < Nokogiri::HTML::DocumentFragment
|
23
|
+
class << self
|
24
|
+
def parse(tags)
|
25
|
+
# Remove formatting whitespaces
|
26
|
+
# Those do not represent any data while messing up the tree
|
27
|
+
tags = tags.gsub(/(\r|\n)/, '').gsub(/> *</, '><') if tags.respond_to? :gsub
|
28
|
+
super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def register_handler(name)
|
36
|
+
unless @@registry[name]
|
37
|
+
@@registry[name] = true
|
38
|
+
else
|
39
|
+
raise OnigiriHandlerTaken
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
require "onigiri/drop_empty_paras"
|
46
|
+
require "onigiri/enclose_block_text"
|
47
|
+
require "onigiri/enclose_text"
|
48
|
+
require "onigiri/fix_backslash"
|
49
|
+
require "onigiri/show_body_only"
|
50
|
+
require "onigiri/merge_by_tag"
|
51
|
+
require "onigiri/hide_comments"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Onigiri
|
3
|
+
register_handler :enclose_block_text
|
4
|
+
class Document
|
5
|
+
def enclose_block_text
|
6
|
+
dupe = dup
|
7
|
+
strict_tags = {"noscript" => 1, "form" => 1, "blockquote" => 1}
|
8
|
+
dupe.traverse do |elem|
|
9
|
+
if strict_tags[elem.name]
|
10
|
+
elem.children.each do |target|
|
11
|
+
if target.text?
|
12
|
+
target.add_previous_sibling "<p>#{target.content.strip}</p>"
|
13
|
+
target.unlink
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
dupe
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Onigiri
|
3
|
+
register_handler :enclose_text
|
4
|
+
class Document
|
5
|
+
def enclose_text
|
6
|
+
dupe = dup
|
7
|
+
wrapper = Onigiri::Document.parse('<p>').child
|
8
|
+
body = dupe.css('body').children
|
9
|
+
body = dupe.children if body.empty?
|
10
|
+
body.each do |target|
|
11
|
+
if target.parent && (target.text? || target.description.inline?)
|
12
|
+
wrap = target.add_previous_sibling(wrapper)
|
13
|
+
wrap << target.unlink
|
14
|
+
end
|
15
|
+
end
|
16
|
+
dupe
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Onigiri
|
3
|
+
register_handler :fix_backslash
|
4
|
+
class Document
|
5
|
+
def fix_backslash
|
6
|
+
dupe = dup
|
7
|
+
attrset = ['src', 'longdesc', 'href', 'action']
|
8
|
+
dupe.css("[#{attrset.join('], [')}]").each do |target|
|
9
|
+
attrset.each do |attr|
|
10
|
+
target[attr] = target[attr].gsub("\\", "/") if target[attr]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
dupe
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Onigiri
|
3
|
+
register_handler :merge_divs
|
4
|
+
register_handler :merge_spans
|
5
|
+
class Document
|
6
|
+
|
7
|
+
def merge_divs
|
8
|
+
self.merge_by_tag('div')
|
9
|
+
end
|
10
|
+
|
11
|
+
def merge_spans
|
12
|
+
self.merge_by_tag('span')
|
13
|
+
end
|
14
|
+
|
15
|
+
# This is going to be ugly
|
16
|
+
def merge_by_tag(tag_name)
|
17
|
+
dupe = dup
|
18
|
+
# First pass. Finding deepest <div>s that require merging upwards.
|
19
|
+
mergers = dupe.find_merger_elements(tag_name)
|
20
|
+
# Second pass. Traverse tree upwards from each merger <div> gathering attributes on our way
|
21
|
+
mergers.each do |merger|
|
22
|
+
data = singular_upverse(merger)
|
23
|
+
merger.children.each do |survivor|
|
24
|
+
data['root'] << survivor
|
25
|
+
end
|
26
|
+
data['deletion_node'].remove
|
27
|
+
data['root']['class'] = data['class'] if data['class']
|
28
|
+
data['root']['style'] = data['style'] if data['style']
|
29
|
+
end
|
30
|
+
dupe
|
31
|
+
end
|
32
|
+
|
33
|
+
def find_merger_elements(tag_name)
|
34
|
+
result = []
|
35
|
+
self.css(tag_name).each do |elem|
|
36
|
+
# !(node.next_sibling || node.previous_sibling) vs. node.parent.children.size
|
37
|
+
result << elem if elem.parent.children.size == 1 && elem.parent.name == tag_name
|
38
|
+
end
|
39
|
+
result
|
40
|
+
end
|
41
|
+
|
42
|
+
def singular_upverse(node)
|
43
|
+
if node.parent.name == node.name && !(node.next_sibling || node.previous_sibling)
|
44
|
+
data = singular_upverse(node.parent)
|
45
|
+
# If we got root node we should set a deletion point for root.
|
46
|
+
# If we have a deletion point - no need to reset it.
|
47
|
+
data['deletion_node'] ||= node if data['root']
|
48
|
+
else
|
49
|
+
data = Hash.new
|
50
|
+
data['root'] = node
|
51
|
+
end
|
52
|
+
|
53
|
+
# Ensuring uglyness
|
54
|
+
data['style'] ? (data['style'] += " #{node['style']}" if node['style']) : data['style'] = node['style']
|
55
|
+
data['class'] ? (data['class'] += " #{node['class']}" if node['class']) : data['class'] = node['class']
|
56
|
+
|
57
|
+
data
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Onigiri
|
3
|
+
register_handler :show_body_only
|
4
|
+
class Document
|
5
|
+
def show_body_only
|
6
|
+
dupe = self.css('body').empty? ? dup : Onigiri::Document.parse("")
|
7
|
+
self.css('body').children.each do |child|
|
8
|
+
dupe << child
|
9
|
+
end
|
10
|
+
dupe
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/onigiri.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "onigiri/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "onigiri"
|
7
|
+
s.version = Onigiri::VERSION
|
8
|
+
s.authors = ["Dmitrii Soltis"]
|
9
|
+
s.email = ["slotos@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{Attempt to replicate (at least some) functions of tidy utility}
|
12
|
+
s.description = %q{This gem is supposed to replace a tidy-ext in one of our projects. Tidy-ext has nasty memory leaks, tends to crash and is incompatible with Ruby 1.9. So here I am trying to use a japanese saw to make some rice balls.}
|
13
|
+
s.date = "2011-07-18"
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ["lib"]
|
19
|
+
|
20
|
+
s.add_dependency "nokogiri"
|
21
|
+
s.add_development_dependency "rspec", ">= 2.0.0"
|
22
|
+
s.add_development_dependency "autotest"
|
23
|
+
end
|
@@ -0,0 +1,214 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
include Onigiri
|
4
|
+
|
5
|
+
describe Onigiri do
|
6
|
+
it 'should throw exception when registering two handlers with the same name' do
|
7
|
+
lambda do
|
8
|
+
module Onigiri
|
9
|
+
register_handler :drop_empty_paras
|
10
|
+
end
|
11
|
+
end.should raise_error(OnigiriHandlerTaken)
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should define "drop_empty_paras" method that removes empty paragraphs from argument string' do
|
15
|
+
input = 'this is text with <p>some </p><p></p> <p>emptyness inside</p>'
|
16
|
+
expectation = 'this is text with <p>some </p><p>emptyness inside</p>'
|
17
|
+
Onigiri::clean(input, :drop_empty_paras).should == expectation
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should define "enclose_block_text" method that encloses any text inside <form>, <blockquote>, <noscript> in <p> tag with trimmed spaces' do
|
21
|
+
input = '<NOSCRIPT>hello <form>there <blockquote>pretty</blockquote> world</form></NOSCRIPT>'
|
22
|
+
expectation = '<noscript><p>hello</p><form><p>there</p><blockquote><p>pretty</p></blockquote><p>world</p></form></noscript>'
|
23
|
+
Onigiri::clean(input, :enclose_block_text).gsub(/(\r|\n| )/, '').should == expectation
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should define "enclose_text" method that encloses any text in <body> using <p> tag' do
|
27
|
+
input = '<body>some text<form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div></body>'
|
28
|
+
expectation = '<body><p>some text</p><form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div></body>'
|
29
|
+
Onigiri::clean(input, :enclose_text).gsub(/(\r|\n)/, '').gsub(/> *</, '><').should == expectation
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should wrap inline elements with "enclose_text" method' do
|
33
|
+
input = '<body><span>some inline text</span><form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div></body>'
|
34
|
+
expectation = '<body><p><span>some inline text</span></p><form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div></body>'
|
35
|
+
Onigiri::clean(input, :enclose_text).gsub(/(\r|\n)/, '').gsub(/> *</, '><').should == expectation
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'should work with root element when "enclose_text" method was called on <body>less fragment' do
|
39
|
+
input = '<span>some inline text</span><form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div>'
|
40
|
+
expectation = '<p><span>some inline text</span></p><form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div>'
|
41
|
+
Onigiri::clean(input, :enclose_text).gsub(/(\r|\n)/, '').gsub(/> *</, '><').should == expectation
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'should work around issue #407 (https://github.com/tenderlove/nokogiri/issues/407)' do
|
45
|
+
input = "<p class='red'><span class='capital'>о</span>тветственный редактор легендарного журнала The Strand, на страницах которого впервые увидели свет рассказы Артура Конан Дойля, Эдит Несбит, Агаты Кристи и Редьярда Киплинга\r\n– Эндрю Ф. Гулли обнаружил 15 ранее не публиковавшихся рассказов классика детектива – Дэшила Хэммета.</p>\nСвое открытие Гулли сделал, изучая онлайн-библиотеку и архивы центра Рэнсома при Техасском университете. «Я знал, что Лиллиан Хеллман, бывшая в тесных отношениях с Хэмметом, передала Центру большое количество его бумаг, и надеялся найти среди них что-то интересное, - рассказывает редактор The Strand. – Каково же было мое удивление, когда после тщательной проверки, на которую ушло около 100 часов работы, я обнаружил 15 совершенно новых, неизвестных читателю рассказов»\r\n\r\n<div>\r\n<br/></div>\r\n\r\n<div>Среди рассказов есть как детективы, так и «психологические» истории. Неизвестно, почему автор отказался публиковать их при жизни, ведь, по словам Эндрю Гулли, написаны они идеально: «Некоторые писатели неспособны адекватно оценивать свои произведения: им кажется, что они недоработаны или неталантливо написаны. Возможно, Хэммет тоже так посчитал, потому что, несмотря на отличный слог и композицию, найденные его рассказы отличаются от «традиционных» его произведений. Хотя как раз это и поможет читателям оценитьмногогранность таланта автора».</div>\r\n\r\n<div>\r\n<br/></div>\r\n\r\n<div>Вначале рассказы выйдут в журнале The Strand, а потом будут изданы отдельной книгой. Ранее в The Strand были напечатаны неизвестные произведения других классиков литературы - Марка Твена, П.Дж. Вудхауза, Агаты Кристи и Грэма Грина.</div>"
|
46
|
+
pending("Rspec itself leads to `#<NoMethodError: undefined method `call' for #<String>>` error when Onigiri returns string (as it should).\r\nStill useful for testing tho.")
|
47
|
+
Onigiri.clean(input, :enclose_text).should_not raise_error
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'should define "fix_backslash" method that fixes "\" for "/" in urls' do
|
51
|
+
it 'is fixing href attributes' do
|
52
|
+
input = '<a href="http:\\\\google.com/">http:\\\\google.com/</a><link rel="stylesheet" type="text/css" href="http:\\\\bing.com\\">'
|
53
|
+
expectation = '<a href="http://google.com/">http:\\\\google.com/</a><link rel="stylesheet" type="text/css" href="http://bing.com/">'
|
54
|
+
Onigiri::clean(input, :fix_backslash).should == expectation
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'is fixing src attributes' do
|
58
|
+
input = '<img src="http:\\/imagehosting.com/3\\image.png">'
|
59
|
+
expectation = '<img src="http://imagehosting.com/3/image.png">'
|
60
|
+
Onigiri::clean(input, :fix_backslash).should == expectation
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'is fixing longdesc attributes' do
|
64
|
+
input = '<img src="http://imagehosting.com/3/image.png" longdesc="http:\\/alt.com\\desc.txt">'
|
65
|
+
expectation = '<img src="http://imagehosting.com/3/image.png" longdesc="http://alt.com/desc.txt">'
|
66
|
+
Onigiri::clean(input, :fix_backslash).should == expectation
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'is fixing form action attributes' do
|
70
|
+
input = '<form action="\\application.php">\\application.php</form>'
|
71
|
+
expectation = '<form action="/application.php">\\application.php</form>'
|
72
|
+
Onigiri::clean(input, :fix_backslash).should == expectation
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'should all work together' do
|
76
|
+
input = '<a href="http:\\\\google.com/">link</a><link rel="stylesheet" type="text/css" href="http:\\\\bing.com\\"><img src="http://imagehosting.com\\3/image.png" longdesc="http:\\/alt.com\\desc.txt"><form action="\\application.php">русский текст</form>'
|
77
|
+
expectation = '<a href="http://google.com/">link</a><link rel="stylesheet" type="text/css" href="http://bing.com/"><img src="http://imagehosting.com/3/image.png" longdesc="http://alt.com/desc.txt"><form action="/application.php">русский текст</form>'
|
78
|
+
Onigiri::clean(input, :fix_backslash).should == expectation
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'should provide a "show_body_only" method that extracts contents of a <body> element for incorporation' do
|
83
|
+
input = '<body>some text<form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div></body>'
|
84
|
+
expectation = 'some text<form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div>'
|
85
|
+
Onigiri::clean(input, :show_body_only).gsub(/(\r|\n)/, '').gsub(/> *</, '><').should == expectation
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'should not do anything with "show_body_only" if there is not body' do
|
89
|
+
input = 'some text<form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div>'
|
90
|
+
expectation = 'some text<form>some text in form</form><p>some text in p</p><div><blockquote>some text in third level element</blockquote></div>'
|
91
|
+
Onigiri::clean(input, :show_body_only).gsub(/(\r|\n)/, '').gsub(/> *</, '><').should == expectation
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'should provide a "merge_divs" method that will merge nested <div> such as "<div><div>...</div></div>" into top-level div discarding inner <div>s attributes except for "class" and "style"' do
|
95
|
+
input = <<HTML
|
96
|
+
<div class="first">
|
97
|
+
<div class="top">
|
98
|
+
<div id ="!hoho" class="test">
|
99
|
+
<div data-remote="true" style="color: black;" class="tost">
|
100
|
+
<p>data</p>
|
101
|
+
<div>
|
102
|
+
<div class="yopo">
|
103
|
+
another text
|
104
|
+
</div>
|
105
|
+
</div>
|
106
|
+
</div>
|
107
|
+
</div>
|
108
|
+
</div>
|
109
|
+
</div>
|
110
|
+
HTML
|
111
|
+
expectation = <<HTML
|
112
|
+
<div class="first top test tost" style="color: black;">
|
113
|
+
<p>data</p>
|
114
|
+
<div class="yopo">another text</div>
|
115
|
+
</div>
|
116
|
+
HTML
|
117
|
+
Onigiri::clean(input, :merge_divs).gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2').should == expectation.gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2')
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'should provide a "merge_spans" method that replicates "merge_divs" for <span> tag' do
|
121
|
+
input = <<HTML
|
122
|
+
<span class="first">
|
123
|
+
<span class="top">
|
124
|
+
<span id ="!hoho" class="test">
|
125
|
+
<span data-remote="true" style="color: black;" class="tost">
|
126
|
+
data
|
127
|
+
<span>
|
128
|
+
<span class="yopo">
|
129
|
+
another text
|
130
|
+
</span>
|
131
|
+
</span>
|
132
|
+
</span>
|
133
|
+
</span>
|
134
|
+
</span>
|
135
|
+
</span>
|
136
|
+
HTML
|
137
|
+
expectation = <<HTML
|
138
|
+
<span class="first top test tost" style="color: black;">
|
139
|
+
data
|
140
|
+
<span class="yopo">another text</span>
|
141
|
+
</span>
|
142
|
+
HTML
|
143
|
+
Onigiri::clean(input, :merge_spans).gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2').should == expectation.gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2')
|
144
|
+
end
|
145
|
+
|
146
|
+
it 'should provide "hide_comments" method that will remove all comments from the string' do
|
147
|
+
input = <<HTML
|
148
|
+
<span class="first">
|
149
|
+
<span class="top">
|
150
|
+
<span id="!hoho" class="test">
|
151
|
+
<!------ hello world! -->
|
152
|
+
<span data-remote="true" style="color: black;" class="tost">
|
153
|
+
data
|
154
|
+
<span>
|
155
|
+
<span class="yopo">
|
156
|
+
another text
|
157
|
+
</span>
|
158
|
+
</span>
|
159
|
+
</span>
|
160
|
+
<!-- another comment -->
|
161
|
+
</span>
|
162
|
+
</span>
|
163
|
+
</span>
|
164
|
+
HTML
|
165
|
+
expectation = <<HTML
|
166
|
+
<span class="first">
|
167
|
+
<span class="top">
|
168
|
+
<span id="!hoho" class="test">
|
169
|
+
<span data-remote="true" style="color: black;" class="tost">
|
170
|
+
data
|
171
|
+
<span>
|
172
|
+
<span class="yopo">
|
173
|
+
another text
|
174
|
+
</span>
|
175
|
+
</span>
|
176
|
+
</span>
|
177
|
+
</span>
|
178
|
+
</span>
|
179
|
+
</span>
|
180
|
+
HTML
|
181
|
+
Onigiri::clean(input, :hide_comments).gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2').should == expectation.gsub(/(\r|\n)/, '').gsub(/> *</, '><').gsub(/(>) +| +(<)/, '\1\2')
|
182
|
+
end
|
183
|
+
|
184
|
+
# Noted pending jobs.
|
185
|
+
it 'should provide a "automerge_divs" method that will merge nested <div> such as "<div><div>...</div></div>" into top-level div moving inner <div>s attributes into outer one; however it shouldnt merge together <div>s that have valid id attributes (id attribute serves as a down-top merge breakpoint)' do
|
186
|
+
input = <<HTML
|
187
|
+
<div class="first">
|
188
|
+
<div class="top" id="fff">
|
189
|
+
<div id ="!hoho" class="test">
|
190
|
+
<div data-remote="true" style="color: black;" class="tost">
|
191
|
+
<p>data</p>
|
192
|
+
<div>
|
193
|
+
<div id="yopo">
|
194
|
+
another text
|
195
|
+
</div>
|
196
|
+
</div>
|
197
|
+
</div>
|
198
|
+
</div>
|
199
|
+
</div>
|
200
|
+
</div>
|
201
|
+
HTML
|
202
|
+
expectation = <<HTML
|
203
|
+
<div class="first top" id="fff">
|
204
|
+
<div id="!hoho" class="test tost c1" data-remote="true">
|
205
|
+
<p>data</p>
|
206
|
+
<div id="yopo">another text</div>
|
207
|
+
</div>
|
208
|
+
</div>
|
209
|
+
HTML
|
210
|
+
pending('Noted the difference between merge-divs: yes/auto, but the latter one doesn\'t get priority')
|
211
|
+
end
|
212
|
+
|
213
|
+
it 'should provide "drop-proprietary-attributes" method that will drop all attributes, not defined in W3C standard or applied to mismatched element'
|
214
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: onigiri
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.6
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Dmitrii Soltis
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-07-18 00:00:00.000000000 +03:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
requirement: &72106630 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *72106630
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rspec
|
28
|
+
requirement: &72106380 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.0.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *72106380
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: autotest
|
39
|
+
requirement: &72106170 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
type: :development
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *72106170
|
48
|
+
description: This gem is supposed to replace a tidy-ext in one of our projects. Tidy-ext
|
49
|
+
has nasty memory leaks, tends to crash and is incompatible with Ruby 1.9. So here
|
50
|
+
I am trying to use a japanese saw to make some rice balls.
|
51
|
+
email:
|
52
|
+
- slotos@gmail.com
|
53
|
+
executables: []
|
54
|
+
extensions: []
|
55
|
+
extra_rdoc_files: []
|
56
|
+
files:
|
57
|
+
- .gitignore
|
58
|
+
- Gemfile
|
59
|
+
- README.md
|
60
|
+
- Rakefile
|
61
|
+
- autotest/discover.rb
|
62
|
+
- lib/onigiri.rb
|
63
|
+
- lib/onigiri/drop_empty_paras.rb
|
64
|
+
- lib/onigiri/enclose_block_text.rb
|
65
|
+
- lib/onigiri/enclose_text.rb
|
66
|
+
- lib/onigiri/fix_backslash.rb
|
67
|
+
- lib/onigiri/hide_comments.rb
|
68
|
+
- lib/onigiri/merge_by_tag.rb
|
69
|
+
- lib/onigiri/show_body_only.rb
|
70
|
+
- lib/onigiri/version.rb
|
71
|
+
- onigiri.gemspec
|
72
|
+
- spec/onigiri_spec.rb
|
73
|
+
- spec/spec_helper.rb
|
74
|
+
has_rdoc: true
|
75
|
+
homepage: ''
|
76
|
+
licenses: []
|
77
|
+
post_install_message:
|
78
|
+
rdoc_options: []
|
79
|
+
require_paths:
|
80
|
+
- lib
|
81
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ! '>='
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0'
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
requirements: []
|
94
|
+
rubyforge_project:
|
95
|
+
rubygems_version: 1.6.2
|
96
|
+
signing_key:
|
97
|
+
specification_version: 3
|
98
|
+
summary: Attempt to replicate (at least some) functions of tidy utility
|
99
|
+
test_files: []
|