taggie 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/MIT-LICENSE +20 -0
- data/README.rdoc +55 -0
- data/Rakefile +22 -0
- data/lib/taggie.rb +25 -0
- data/lib/taggie_unabridged.rb +1 -0
- data/test/taggie_test.rb +10 -0
- data/test/test_helper.rb +15 -0
- metadata +83 -0
data/.gitignore
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Sean Huber - shuber@huberry.com
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
= taggie
|
2
|
+
|
3
|
+
The tiniest little HTML/XML parser...using regex
|
4
|
+
|
5
|
+
gem install taggie --pre
|
6
|
+
|
7
|
+
|
8
|
+
== WTF, why regex?!?
|
9
|
+
|
10
|
+
Curiosity, regex practice, and proof that it could be done. If you're interested, here's the beast of a regex that parses arbitrarily nested tags:
|
11
|
+
|
12
|
+
/(<(\w+)[^>]*(?:\/>|>((?:<(\w+)[^>]*(?:\/>|>.*<\/\4>)|<!--.*?-->|<\?.*?\?>|[^>])*)<\/\2>)|<!--.*?-->|<\?.*?\?>|[^>]*)/m
|
13
|
+
|
14
|
+
|
15
|
+
== Examples (these may not all work yet - work in progress)
|
16
|
+
|
17
|
+
html = '<div id="header"><img src="logo.png" /><h1>Your Company</h1></div><div id="body"><p class="content">some <span>content</span> here</p></div>'.to_taggie
|
18
|
+
puts html.type # div
|
19
|
+
puts html.tag # <div id="header">
|
20
|
+
puts html.inner_html # <img src="logo.png" /><h1>Your Company</h1>
|
21
|
+
|
22
|
+
puts html.children.first.src # logo.png
|
23
|
+
html.children.first.src = '/images/logo.png'
|
24
|
+
puts html.inner_html # <img src="/images/logo.png" /><h1>Your Company</h1>
|
25
|
+
|
26
|
+
p = html.siblings.first.children.first
|
27
|
+
puts p.tag # <p class="content">
|
28
|
+
|
29
|
+
p.id = 'content'
|
30
|
+
puts html.siblings.first.children.first # <p class="content" id="content">Blah blah blah</p>
|
31
|
+
|
32
|
+
p.class = nil
|
33
|
+
puts html.siblings.first.children.first # <p id="content">Blah blah blah</p>
|
34
|
+
|
35
|
+
p.class = ''
|
36
|
+
puts html.siblings.first.children.first # <p id="content" class="">Blah blah blah</p>
|
37
|
+
|
38
|
+
|
39
|
+
== TODO
|
40
|
+
|
41
|
+
* attribute writer is broken
|
42
|
+
* lib/taggie_unabridged.rb
|
43
|
+
* tests
|
44
|
+
|
45
|
+
|
46
|
+
== Note on Patches/Pull Requests
|
47
|
+
|
48
|
+
* Fork the project.
|
49
|
+
* Make your feature addition or bug fix.
|
50
|
+
* Add tests for it. This is important so I don't break it in a
|
51
|
+
future version unintentionally.
|
52
|
+
* Commit, do not mess with rakefile, version, or history.
|
53
|
+
(if you want to have your own version, that is fine but
|
54
|
+
bump version in a commit by itself I can ignore when I pull)
|
55
|
+
* Send me a pull request. Bonus points for topic branches.
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
|
5
|
+
desc 'Default: run unit tests.'
|
6
|
+
task :default => :test
|
7
|
+
|
8
|
+
desc 'Test the taggie gem.'
|
9
|
+
Rake::TestTask.new(:test) do |t|
|
10
|
+
t.libs << 'lib'
|
11
|
+
t.pattern = 'test/**/*_test.rb'
|
12
|
+
t.verbose = true
|
13
|
+
end
|
14
|
+
|
15
|
+
desc 'Generate documentation for the taggie gem.'
|
16
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
17
|
+
rdoc.rdoc_dir = 'rdoc'
|
18
|
+
rdoc.title = 'taggie'
|
19
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
20
|
+
rdoc.rdoc_files.include('README*')
|
21
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
22
|
+
end
|
data/lib/taggie.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
class String; def to_taggie;Taggie.new self end end
|
2
|
+
class Taggie < String
|
3
|
+
AttributeValue = '("|\')(.*?)\2|(\S*)'
|
4
|
+
OpenTag = '<(\w+)[^>]*'
|
5
|
+
CloseTag = '\/>|>'
|
6
|
+
SpecialTags = '<!--.*?-->|<\?.*?\?>|[^>]'
|
7
|
+
TagMatcher = /(#{OpenTag}(?:#{CloseTag}((?:#{OpenTag}(?:#{CloseTag}.*<\/\4>)|#{SpecialTags})*)<\/\2>)|#{SpecialTags}*)/m
|
8
|
+
alias :__id__ :id;alias :__class__ :class;undef :id,:class
|
9
|
+
attr_accessor :parent
|
10
|
+
def [] a; [Integer,Range].include?(a.class)?super: to_h[a.to_s] end
|
11
|
+
def []= a,v;@attributes=@tag=nil;[Integer,Range].include?(a.class)?super: !%w(comment string).include?(type)? v ? (q=v.include?('"')? "'":'"';sub!(/^([^>]+#{a}=)(?:#{AttributeValue})/m,"\\1#{q}#{v}#{q}");sub!(/^([^>]+?)\s*(\/|\?)?>/m, "\\1 #{a}=#{q}#{v}#{q}\\2>") if tag !~/\s+#{a}=/m):sub!(/^([^>]+)\s+#{a}=(?:#{AttributeValue})/,'\1'):nil;rebuild!;v end
|
12
|
+
def attributes;@attributes||=%w(comment doctype string).include?(type)?[]:tag.scan(/([\S]+)=(?:#{AttributeValue})/m).map!{|m|[m[0],m[2]||m[3]]} end
|
13
|
+
def children;@children||=inner_html.siblings_and_self.map!{|c|c.parent=self;c} end
|
14
|
+
def inner_html;r,c=inner_html_regex;m=match(r);m ?m.captures[c]:'' end
|
15
|
+
def inner_html= v;@children=nil;sub! inner_html,v;rebuild!;v end
|
16
|
+
def inner_html_regex;r,c={'comment'=>/^<!--(.*?)-->/m,'doctype'=>//,'string'=>/^([^<]+)/m,'xml'=>/^<\?(.*?)\?>/m}[type],0;r,c=TagMatcher,2 if r.nil?;[r,c] end
|
17
|
+
def method_missing m,*a;m.to_s=~/=$/?self[$`]=a[0]:a==[]?self[m.to_s]:super end
|
18
|
+
def rebuild!;(parent.inner_html=parent.children.join;parent.parent.rebuild! if parent.parent) if parent end
|
19
|
+
def siblings;siblings_and_self[1..-1] end
|
20
|
+
def siblings_and_self;@siblings_and_self||=scan(inner_html_regex[0]).map!{|m|m[0]} end
|
21
|
+
def tag;@tag||={'comment'=>self,'string'=>inner_html}[type]||match(/^([^>]+>)/m).captures[0] end
|
22
|
+
def to_h; Hash[*attributes.flatten].merge! :html=>self end
|
23
|
+
def to_s; String.new self end
|
24
|
+
def type;@type||(m={(/^<([\w\-_:]+)[^>]*>/m)=>'1',(/^<!--/m)=>'comment',(/^<!doctype[^>]*>/mi)=>'doctype',(/^<\?/m)=>'xml'}.detect{|r,v|r=~self};@type=m ?eval('$'+m[1])||m[1]:'string') end
|
25
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO
|
data/test/taggie_test.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'test_helper')
|
2
|
+
|
3
|
+
class TaggieTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
should 'test this gem' do
|
6
|
+
html = '<div id="header"><img src="logo.png" /><h1>Your Company</h1></div><div id="body"><p class="content">some <span>content</span> here</p></div>'.to_taggie
|
7
|
+
flunk
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'shoulda'
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
6
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
7
|
+
require 'taggie'
|
8
|
+
|
9
|
+
class Test::Unit::TestCase
|
10
|
+
|
11
|
+
def assert_all_equal(value, *others)
|
12
|
+
others.each { |other| assert_equal value, other }
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: taggie
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 0.0.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Sean Huber
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-04-02 00:00:00 -07:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: shoulda
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 2
|
29
|
+
- 10
|
30
|
+
- 3
|
31
|
+
version: 2.10.3
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
description: A tiny little HTML/XML parser...using regex
|
35
|
+
email: shuber@huberry.com
|
36
|
+
executables: []
|
37
|
+
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files:
|
41
|
+
- README.rdoc
|
42
|
+
files:
|
43
|
+
- .gitignore
|
44
|
+
- MIT-LICENSE
|
45
|
+
- README.rdoc
|
46
|
+
- Rakefile
|
47
|
+
- lib/taggie.rb
|
48
|
+
- lib/taggie_unabridged.rb
|
49
|
+
- test/taggie_test.rb
|
50
|
+
- test/test_helper.rb
|
51
|
+
has_rdoc: true
|
52
|
+
homepage: http://github.com/shuber/taggie
|
53
|
+
licenses: []
|
54
|
+
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options:
|
57
|
+
- --charset=UTF-8
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
segments:
|
65
|
+
- 0
|
66
|
+
version: "0"
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
requirements: []
|
75
|
+
|
76
|
+
rubyforge_project:
|
77
|
+
rubygems_version: 1.3.6
|
78
|
+
signing_key:
|
79
|
+
specification_version: 3
|
80
|
+
summary: A tiny little HTML/XML parser
|
81
|
+
test_files:
|
82
|
+
- test/taggie_test.rb
|
83
|
+
- test/test_helper.rb
|