parser_girl 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/parser_girl.rb +84 -0
- metadata +45 -0
data/lib/parser_girl.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
class ParserGirl
|
2
|
+
def initialize(xml=nil)
|
3
|
+
@xml = xml
|
4
|
+
@result = nil
|
5
|
+
@stack = nil
|
6
|
+
end
|
7
|
+
|
8
|
+
def find(needle, haystack_base=nil)
|
9
|
+
haystack_base = @xml unless haystack_base
|
10
|
+
return [] unless haystack_base
|
11
|
+
|
12
|
+
haystack = haystack_base
|
13
|
+
pos = 0
|
14
|
+
@result = []
|
15
|
+
@stack = []
|
16
|
+
while true do
|
17
|
+
if haystack =~ /\<([^!][^\>]*)\>/i
|
18
|
+
content = $1 # tag-hit
|
19
|
+
b = $`.length + 1 # relative beginning in haystack
|
20
|
+
e = content.length + b + 1 # relative ending in haystack
|
21
|
+
if content =~ /^([^\s]+)/ and $1.downcase == "script" and
|
22
|
+
haystack =~ /\<\/script(\s[^\>]*)?\>/i
|
23
|
+
e = $`.length + $&.length
|
24
|
+
end
|
25
|
+
|
26
|
+
if content =~ /^#{needle}(\s.*)?$/i
|
27
|
+
push({:position => pos+e, :attrs => split_attr($1)})
|
28
|
+
elsif content =~ /^\/#{needle}(\s.*)?$/i
|
29
|
+
hash = pop(haystack_base, b+pos-1)
|
30
|
+
if hash
|
31
|
+
if block_given?
|
32
|
+
@result.push(yield(hash[:content], hash[:attrs]))
|
33
|
+
else
|
34
|
+
@result.push(hash[:content])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
pos += e
|
39
|
+
haystack = haystack_base[pos, haystack_base.length-pos]
|
40
|
+
else
|
41
|
+
break
|
42
|
+
end
|
43
|
+
end
|
44
|
+
@result
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
def push(hash)
|
49
|
+
@stack.push hash
|
50
|
+
end
|
51
|
+
|
52
|
+
def pop(haystack, current_position)
|
53
|
+
if @stack.any?
|
54
|
+
hash = @stack.pop
|
55
|
+
hash[:content] = haystack[hash[:position],
|
56
|
+
current_position-hash[:position]]
|
57
|
+
hash[:end_position] = current_position
|
58
|
+
hash
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def split_attr(attrs)
|
63
|
+
attr_hash = {}
|
64
|
+
while(1)
|
65
|
+
if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
|
66
|
+
attrs = $'
|
67
|
+
key = $1
|
68
|
+
value = nil
|
69
|
+
value = $7 if $7
|
70
|
+
value = $6 if $6
|
71
|
+
value = $4 if $4
|
72
|
+
if value
|
73
|
+
value.gsub! "\"", "\\\""
|
74
|
+
value.gsub! "'", "\\'"
|
75
|
+
end
|
76
|
+
attr_hash[key.downcase.to_sym] = value
|
77
|
+
else
|
78
|
+
break
|
79
|
+
end
|
80
|
+
end
|
81
|
+
attr_hash
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: parser_girl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Matthias Geier
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-04-26 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description:
|
15
|
+
email:
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/parser_girl.rb
|
21
|
+
homepage: https://github.com/matthias-geier/parser_girl
|
22
|
+
licenses: []
|
23
|
+
post_install_message:
|
24
|
+
rdoc_options: []
|
25
|
+
require_paths:
|
26
|
+
- lib
|
27
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.9.1
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
none: false
|
35
|
+
requirements:
|
36
|
+
- - ! '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 1.8.23
|
42
|
+
signing_key:
|
43
|
+
specification_version: 3
|
44
|
+
summary: A minimal very fast xml/html parser
|
45
|
+
test_files: []
|