parser_girl 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/parser_girl.rb +84 -0
  2. metadata +45 -0
@@ -0,0 +1,84 @@
1
+ class ParserGirl
2
+ def initialize(xml=nil)
3
+ @xml = xml
4
+ @result = nil
5
+ @stack = nil
6
+ end
7
+
8
+ def find(needle, haystack_base=nil)
9
+ haystack_base = @xml unless haystack_base
10
+ return [] unless haystack_base
11
+
12
+ haystack = haystack_base
13
+ pos = 0
14
+ @result = []
15
+ @stack = []
16
+ while true do
17
+ if haystack =~ /\<([^!][^\>]*)\>/i
18
+ content = $1 # tag-hit
19
+ b = $`.length + 1 # relative beginning in haystack
20
+ e = content.length + b + 1 # relative ending in haystack
21
+ if content =~ /^([^\s]+)/ and $1.downcase == "script" and
22
+ haystack =~ /\<\/script(\s[^\>]*)?\>/i
23
+ e = $`.length + $&.length
24
+ end
25
+
26
+ if content =~ /^#{needle}(\s.*)?$/i
27
+ push({:position => pos+e, :attrs => split_attr($1)})
28
+ elsif content =~ /^\/#{needle}(\s.*)?$/i
29
+ hash = pop(haystack_base, b+pos-1)
30
+ if hash
31
+ if block_given?
32
+ @result.push(yield(hash[:content], hash[:attrs]))
33
+ else
34
+ @result.push(hash[:content])
35
+ end
36
+ end
37
+ end
38
+ pos += e
39
+ haystack = haystack_base[pos, haystack_base.length-pos]
40
+ else
41
+ break
42
+ end
43
+ end
44
+ @result
45
+ end
46
+
47
+ private
48
+ def push(hash)
49
+ @stack.push hash
50
+ end
51
+
52
+ def pop(haystack, current_position)
53
+ if @stack.any?
54
+ hash = @stack.pop
55
+ hash[:content] = haystack[hash[:position],
56
+ current_position-hash[:position]]
57
+ hash[:end_position] = current_position
58
+ hash
59
+ end
60
+ end
61
+
62
+ def split_attr(attrs)
63
+ attr_hash = {}
64
+ while(1)
65
+ if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
66
+ attrs = $'
67
+ key = $1
68
+ value = nil
69
+ value = $7 if $7
70
+ value = $6 if $6
71
+ value = $4 if $4
72
+ if value
73
+ value.gsub! "\"", "\\\""
74
+ value.gsub! "'", "\\'"
75
+ end
76
+ attr_hash[key.downcase.to_sym] = value
77
+ else
78
+ break
79
+ end
80
+ end
81
+ attr_hash
82
+ end
83
+ end
84
+
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: parser_girl
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Matthias Geier
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-26 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email:
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/parser_girl.rb
21
+ homepage: https://github.com/matthias-geier/parser_girl
22
+ licenses: []
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 1.9.1
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 1.8.23
42
+ signing_key:
43
+ specification_version: 3
44
+ summary: A minimal very fast xml/html parser
45
+ test_files: []