parser_girl 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/parser_girl.rb +84 -0
  2. metadata +45 -0
@@ -0,0 +1,84 @@
1
+ class ParserGirl
2
+ def initialize(xml=nil)
3
+ @xml = xml
4
+ @result = nil
5
+ @stack = nil
6
+ end
7
+
8
+ def find(needle, haystack_base=nil)
9
+ haystack_base = @xml unless haystack_base
10
+ return [] unless haystack_base
11
+
12
+ haystack = haystack_base
13
+ pos = 0
14
+ @result = []
15
+ @stack = []
16
+ while true do
17
+ if haystack =~ /\<([^!][^\>]*)\>/i
18
+ content = $1 # tag-hit
19
+ b = $`.length + 1 # relative beginning in haystack
20
+ e = content.length + b + 1 # relative ending in haystack
21
+ if content =~ /^([^\s]+)/ and $1.downcase == "script" and
22
+ haystack =~ /\<\/script(\s[^\>]*)?\>/i
23
+ e = $`.length + $&.length
24
+ end
25
+
26
+ if content =~ /^#{needle}(\s.*)?$/i
27
+ push({:position => pos+e, :attrs => split_attr($1)})
28
+ elsif content =~ /^\/#{needle}(\s.*)?$/i
29
+ hash = pop(haystack_base, b+pos-1)
30
+ if hash
31
+ if block_given?
32
+ @result.push(yield(hash[:content], hash[:attrs]))
33
+ else
34
+ @result.push(hash[:content])
35
+ end
36
+ end
37
+ end
38
+ pos += e
39
+ haystack = haystack_base[pos, haystack_base.length-pos]
40
+ else
41
+ break
42
+ end
43
+ end
44
+ @result
45
+ end
46
+
47
+ private
48
+ def push(hash)
49
+ @stack.push hash
50
+ end
51
+
52
+ def pop(haystack, current_position)
53
+ if @stack.any?
54
+ hash = @stack.pop
55
+ hash[:content] = haystack[hash[:position],
56
+ current_position-hash[:position]]
57
+ hash[:end_position] = current_position
58
+ hash
59
+ end
60
+ end
61
+
62
+ def split_attr(attrs)
63
+ attr_hash = {}
64
+ while(1)
65
+ if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
66
+ attrs = $'
67
+ key = $1
68
+ value = nil
69
+ value = $7 if $7
70
+ value = $6 if $6
71
+ value = $4 if $4
72
+ if value
73
+ value.gsub! "\"", "\\\""
74
+ value.gsub! "'", "\\'"
75
+ end
76
+ attr_hash[key.downcase.to_sym] = value
77
+ else
78
+ break
79
+ end
80
+ end
81
+ attr_hash
82
+ end
83
+ end
84
+
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: parser_girl
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Matthias Geier
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-26 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email:
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/parser_girl.rb
21
+ homepage: https://github.com/matthias-geier/parser_girl
22
+ licenses: []
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 1.9.1
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 1.8.23
42
+ signing_key:
43
+ specification_version: 3
44
+ summary: A minimal very fast xml/html parser
45
+ test_files: []