taggie 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,2 @@
1
+ .DS_Store
2
+ pkg
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Sean Huber - shuber@huberry.com
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,55 @@
1
+ = taggie
2
+
3
+ The tiniest little HTML/XML parser...using regex
4
+
5
+ gem install taggie --pre
6
+
7
+
8
+ == WTF, why regex?!?
9
+
10
+ Curiosity, regex practice, and proof that it could be done. If you're interested, here's the beast of a regex that parses arbitrarily nested tags:
11
+
12
+ /(<(\w+)[^>]*(?:\/>|>((?:<(\w+)[^>]*(?:\/>|>.*<\/\4>)|<!--.*?-->|<\?.*?\?>|[^>])*)<\/\2>)|<!--.*?-->|<\?.*?\?>|[^>]*)/m
13
+
14
+
15
+ == Examples (these may not all work yet - work in progress)
16
+
17
+ html = '<div id="header"><img src="logo.png" /><h1>Your Company</h1></div><div id="body"><p class="content">some <span>content</span> here</p></div>'.to_taggie
18
+ puts html.type # div
19
+ puts html.tag # <div id="header">
20
+ puts html.inner_html # <img src="logo.png" /><h1>Your Company</h1>
21
+
22
+ puts html.children.first.src # logo.png
23
+ html.children.first.src = '/images/logo.png'
24
+ puts html.inner_html # <img src="/images/logo.png" /><h1>Your Company</h1>
25
+
26
+ p = html.siblings.first.children.first
27
+ puts p.tag # <p class="content">
28
+
29
+ p.id = 'content'
30
+ puts html.siblings.first.children.first # <p class="content" id="content">Blah blah blah</p>
31
+
32
+ p.class = nil
33
+ puts html.siblings.first.children.first # <p id="content">Blah blah blah</p>
34
+
35
+ p.class = ''
36
+ puts html.siblings.first.children.first # <p id="content" class="">Blah blah blah</p>
37
+
38
+
39
+ == TODO
40
+
41
+ * attribute writer is broken
42
+ * lib/taggie_unabridged.rb
43
+ * tests
44
+
45
+
46
+ == Note on Patches/Pull Requests
47
+
48
+ * Fork the project.
49
+ * Make your feature addition or bug fix.
50
+ * Add tests for it. This is important so I don't break it in a
51
+ future version unintentionally.
52
+ * Commit, do not mess with rakefile, version, or history.
53
+ (if you want to have your own version, that is fine but
54
+ bump version in a commit by itself I can ignore when I pull)
55
+ * Send me a pull request. Bonus points for topic branches.
@@ -0,0 +1,22 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rake/rdoctask'
4
+
5
+ desc 'Default: run unit tests.'
6
+ task :default => :test
7
+
8
+ desc 'Test the taggie gem.'
9
+ Rake::TestTask.new(:test) do |t|
10
+ t.libs << 'lib'
11
+ t.pattern = 'test/**/*_test.rb'
12
+ t.verbose = true
13
+ end
14
+
15
+ desc 'Generate documentation for the taggie gem.'
16
+ Rake::RDocTask.new(:rdoc) do |rdoc|
17
+ rdoc.rdoc_dir = 'rdoc'
18
+ rdoc.title = 'taggie'
19
+ rdoc.options << '--line-numbers' << '--inline-source'
20
+ rdoc.rdoc_files.include('README*')
21
+ rdoc.rdoc_files.include('lib/**/*.rb')
22
+ end
@@ -0,0 +1,25 @@
1
+ class String; def to_taggie;Taggie.new self end end
2
+ class Taggie < String
3
+ AttributeValue = '("|\')(.*?)\2|(\S*)'
4
+ OpenTag = '<(\w+)[^>]*'
5
+ CloseTag = '\/>|>'
6
+ SpecialTags = '<!--.*?-->|<\?.*?\?>|[^>]'
7
+ TagMatcher = /(#{OpenTag}(?:#{CloseTag}((?:#{OpenTag}(?:#{CloseTag}.*<\/\4>)|#{SpecialTags})*)<\/\2>)|#{SpecialTags}*)/m
8
+ alias :__id__ :id;alias :__class__ :class;undef :id,:class
9
+ attr_accessor :parent
10
+ def [] a; [Integer,Range].include?(a.class)?super: to_h[a.to_s] end
11
+ def []= a,v;@attributes=@tag=nil;[Integer,Range].include?(a.class)?super: !%w(comment string).include?(type)? v ? (q=v.include?('"')? "'":'"';sub!(/^([^>]+#{a}=)(?:#{AttributeValue})/m,"\\1#{q}#{v}#{q}");sub!(/^([^>]+?)\s*(\/|\?)?>/m, "\\1 #{a}=#{q}#{v}#{q}\\2>") if tag !~/\s+#{a}=/m):sub!(/^([^>]+)\s+#{a}=(?:#{AttributeValue})/,'\1'):nil;rebuild!;v end
12
+ def attributes;@attributes||=%w(comment doctype string).include?(type)?[]:tag.scan(/([\S]+)=(?:#{AttributeValue})/m).map!{|m|[m[0],m[2]||m[3]]} end
13
+ def children;@children||=inner_html.siblings_and_self.map!{|c|c.parent=self;c} end
14
+ def inner_html;r,c=inner_html_regex;m=match(r);m ?m.captures[c]:'' end
15
+ def inner_html= v;@children=nil;sub! inner_html,v;rebuild!;v end
16
+ def inner_html_regex;r,c={'comment'=>/^<!--(.*?)-->/m,'doctype'=>//,'string'=>/^([^<]+)/m,'xml'=>/^<\?(.*?)\?>/m}[type],0;r,c=TagMatcher,2 if r.nil?;[r,c] end
17
+ def method_missing m,*a;m.to_s=~/=$/?self[$`]=a[0]:a==[]?self[m.to_s]:super end
18
+ def rebuild!;(parent.inner_html=parent.children.join;parent.parent.rebuild! if parent.parent) if parent end
19
+ def siblings;siblings_and_self[1..-1] end
20
+ def siblings_and_self;@siblings_and_self||=scan(inner_html_regex[0]).map!{|m|m[0]} end
21
+ def tag;@tag||={'comment'=>self,'string'=>inner_html}[type]||match(/^([^>]+>)/m).captures[0] end
22
+ def to_h; Hash[*attributes.flatten].merge! :html=>self end
23
+ def to_s; String.new self end
24
+ def type;@type||(m={(/^<([\w\-_:]+)[^>]*>/m)=>'1',(/^<!--/m)=>'comment',(/^<!doctype[^>]*>/mi)=>'doctype',(/^<\?/m)=>'xml'}.detect{|r,v|r=~self};@type=m ?eval('$'+m[1])||m[1]:'string') end
25
+ end
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1,10 @@
1
+ require File.join(File.dirname(__FILE__), 'test_helper')
2
+
3
+ class TaggieTest < Test::Unit::TestCase
4
+
5
+ should 'test this gem' do
6
+ html = '<div id="header"><img src="logo.png" /><h1>Your Company</h1></div><div id="body"><p class="content">some <span>content</span> here</p></div>'.to_taggie
7
+ flunk
8
+ end
9
+
10
+ end
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'taggie'
8
+
9
+ class Test::Unit::TestCase
10
+
11
+ def assert_all_equal(value, *others)
12
+ others.each { |other| assert_equal value, other }
13
+ end
14
+
15
+ end
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: taggie
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 0
9
+ version: 0.0.0
10
+ platform: ruby
11
+ authors:
12
+ - Sean Huber
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-04-02 00:00:00 -07:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: shoulda
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 2
29
+ - 10
30
+ - 3
31
+ version: 2.10.3
32
+ type: :development
33
+ version_requirements: *id001
34
+ description: A tiny little HTML/XML parser...using regex
35
+ email: shuber@huberry.com
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files:
41
+ - README.rdoc
42
+ files:
43
+ - .gitignore
44
+ - MIT-LICENSE
45
+ - README.rdoc
46
+ - Rakefile
47
+ - lib/taggie.rb
48
+ - lib/taggie_unabridged.rb
49
+ - test/taggie_test.rb
50
+ - test/test_helper.rb
51
+ has_rdoc: true
52
+ homepage: http://github.com/shuber/taggie
53
+ licenses: []
54
+
55
+ post_install_message:
56
+ rdoc_options:
57
+ - --charset=UTF-8
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ requirements: []
75
+
76
+ rubyforge_project:
77
+ rubygems_version: 1.3.6
78
+ signing_key:
79
+ specification_version: 3
80
+ summary: A tiny little HTML/XML parser
81
+ test_files:
82
+ - test/taggie_test.rb
83
+ - test/test_helper.rb