parser_girl 1.1.1 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.md +1 -1
- data/lib/parser_girl.rb +8 -87
- data/lib/parser_girl/attributes.rb +11 -0
- data/lib/parser_girl/parser.rb +78 -0
- data/lib/parser_girl/proxy.rb +16 -0
- metadata +12 -11
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7973f3cc3bca16ec83df5007be3bde0699dba758
|
4
|
+
data.tar.gz: 4c6aa3aa81f2680b622dd0e969c5bd54d512e053
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f34007bd37b7807100af1b43fd2de033a86ec362786f9c1f1a47a3150099e375b234e304392242de3e40f44c2918cf9fb8f89c6eb42cf69e03b4719fe9223509
|
7
|
+
data.tar.gz: 66934a5d2c34ff8105706f3bb0971e169767981fff30c96d8c8b4fafb5db059e7723ac0c643fe54b56a9ec0367c578efd21e7be2f0451b113ef7a243156cd6c3
|
data/LICENSE.md
CHANGED
data/lib/parser_girl.rb
CHANGED
@@ -1,92 +1,13 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
@result = nil
|
5
|
-
@stack = nil
|
6
|
-
end
|
7
|
-
|
8
|
-
def find(needle, haystack_base=nil)
|
9
|
-
haystack_base = @xml unless haystack_base
|
10
|
-
return [] unless haystack_base
|
11
|
-
|
12
|
-
haystack = haystack_base
|
13
|
-
pos = 0
|
14
|
-
@result = []
|
15
|
-
@stack = []
|
16
|
-
while true do
|
17
|
-
if haystack =~ /\<([^!][^\>]*)\>/i
|
18
|
-
content = $1 # tag-hit
|
19
|
-
b = $`.length + 1 # relative beginning in haystack
|
20
|
-
e = content.length + b + 1 # relative ending in haystack
|
21
|
-
if content =~ /^([^\s]+)/ and $1.downcase == "script" and
|
22
|
-
haystack =~ /\<\/script(\s[^\>]*)?\>/i
|
23
|
-
e = $`.length + $&.length
|
24
|
-
end
|
1
|
+
require 'parser_girl/attributes'
|
2
|
+
require 'parser_girl/parser'
|
3
|
+
require 'parser_girl/proxy'
|
25
4
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
hash = pop(haystack_base, b+pos-1)
|
30
|
-
if hash
|
31
|
-
if block_given?
|
32
|
-
@result.push(yield(hash[:content], hash[:attrs]))
|
33
|
-
else
|
34
|
-
@result.push(hash[:content])
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
pos += e
|
39
|
-
haystack = haystack_base[pos, haystack_base.length-pos]
|
40
|
-
else
|
41
|
-
break
|
42
|
-
end
|
43
|
-
end
|
44
|
-
# pop rest
|
45
|
-
while @stack.any?
|
46
|
-
hash = @stack.pop
|
47
|
-
if block_given?
|
48
|
-
@result.push(yield("", hash[:attrs]))
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
@result
|
5
|
+
module ParserGirl
|
6
|
+
def self.new(*args)
|
7
|
+
return Parser.new(*args)
|
53
8
|
end
|
54
9
|
|
55
|
-
|
56
|
-
|
57
|
-
@stack.push hash
|
58
|
-
end
|
59
|
-
|
60
|
-
def pop(haystack, current_position)
|
61
|
-
if @stack.any?
|
62
|
-
hash = @stack.pop
|
63
|
-
hash[:content] = haystack[hash[:position],
|
64
|
-
current_position-hash[:position]]
|
65
|
-
hash[:end_position] = current_position
|
66
|
-
hash
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def split_attr(attrs)
|
71
|
-
attr_hash = {}
|
72
|
-
while(1)
|
73
|
-
if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
|
74
|
-
attrs = $'
|
75
|
-
key = $1
|
76
|
-
value = nil
|
77
|
-
value = $7 if $7
|
78
|
-
value = $6 if $6
|
79
|
-
value = $4 if $4
|
80
|
-
if value
|
81
|
-
value.gsub! "\"", "\\\""
|
82
|
-
value.gsub! "'", "\\'"
|
83
|
-
end
|
84
|
-
attr_hash[key.downcase.to_sym] = value
|
85
|
-
else
|
86
|
-
break
|
87
|
-
end
|
88
|
-
end
|
89
|
-
attr_hash
|
10
|
+
def self.find(xml, needle)
|
11
|
+
return Parser.new(xml).find(needle)
|
90
12
|
end
|
91
13
|
end
|
92
|
-
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module ParserGirl
|
2
|
+
class Parser
|
3
|
+
include Attributes
|
4
|
+
|
5
|
+
def initialize(xml=nil, attrs=nil)
|
6
|
+
@xml = xml
|
7
|
+
@attrs = attrs
|
8
|
+
@stack = nil
|
9
|
+
end
|
10
|
+
|
11
|
+
def content
|
12
|
+
return @xml.dup
|
13
|
+
end
|
14
|
+
|
15
|
+
def find(needle)
|
16
|
+
haystack_base = @xml
|
17
|
+
|
18
|
+
haystack = haystack_base
|
19
|
+
pos = 0
|
20
|
+
result = []
|
21
|
+
@stack = []
|
22
|
+
while true do
|
23
|
+
break unless haystack =~ /\<([^!][^\>]*)\>/i
|
24
|
+
|
25
|
+
content = $1 # tag-hit
|
26
|
+
b = $`.length + 1 # relative beginning in haystack
|
27
|
+
e = content.length + b + 1 # relative ending in haystack
|
28
|
+
|
29
|
+
if content =~ /^([^\s]+)/ && $1.downcase == "script" &&
|
30
|
+
haystack =~ /\<\/script(\s[^\>]*)?\>/i
|
31
|
+
|
32
|
+
e = $`.length + $&.length
|
33
|
+
end
|
34
|
+
|
35
|
+
if content =~ /^#{needle}(\s.*)?$/i
|
36
|
+
@stack << { :position => pos+e, :attrs => split_attr($1) }
|
37
|
+
elsif content =~ /^\/#{needle}(\s.*)?$/i
|
38
|
+
hash = pop(haystack_base, b+pos-1)
|
39
|
+
result << Parser.new(hash[:content], hash[:attrs]) if hash
|
40
|
+
end
|
41
|
+
pos += e
|
42
|
+
haystack = haystack_base[pos, haystack_base.length-pos]
|
43
|
+
end
|
44
|
+
# pop rest and append
|
45
|
+
result += @stack.map{ |hash| Parser.new("", hash[:attrs]) }
|
46
|
+
return Proxy.new(result)
|
47
|
+
end
|
48
|
+
|
49
|
+
protected
|
50
|
+
def pop(haystack, current_position)
|
51
|
+
return unless @stack.any?
|
52
|
+
hash = @stack.pop
|
53
|
+
hash[:content] = haystack[hash[:position],
|
54
|
+
current_position-hash[:position]]
|
55
|
+
hash[:end_position] = current_position
|
56
|
+
hash
|
57
|
+
end
|
58
|
+
|
59
|
+
def split_attr(attrs)
|
60
|
+
attr_hash = {}
|
61
|
+
while(1)
|
62
|
+
if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
|
63
|
+
attrs = $'
|
64
|
+
key = $1
|
65
|
+
value = [$7, $6, $4].detect{ |v| !v.nil? }
|
66
|
+
if value
|
67
|
+
value.gsub! "\"", "\\\""
|
68
|
+
value.gsub! "'", "\\'"
|
69
|
+
end
|
70
|
+
attr_hash[key.downcase.to_sym] = value
|
71
|
+
else
|
72
|
+
break
|
73
|
+
end
|
74
|
+
end
|
75
|
+
attr_hash
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module ParserGirl
|
2
|
+
class Proxy
|
3
|
+
def initialize(data)
|
4
|
+
@data = data
|
5
|
+
end
|
6
|
+
|
7
|
+
def method_missing(method, *args, &blk)
|
8
|
+
if ParserGirl.new.respond_to?(method)
|
9
|
+
new_data = @data.map{ |d| d.send(method, *args, &blk) }
|
10
|
+
return @data.size == 1 ? new_data.first : new_data
|
11
|
+
else
|
12
|
+
return @data.send(method, *args, &blk)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parser_girl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Matthias Geier
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-10-05 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description:
|
15
14
|
email:
|
@@ -17,30 +16,32 @@ executables: []
|
|
17
16
|
extensions: []
|
18
17
|
extra_rdoc_files: []
|
19
18
|
files:
|
20
|
-
- lib/parser_girl.rb
|
21
19
|
- LICENSE.md
|
20
|
+
- lib/parser_girl.rb
|
21
|
+
- lib/parser_girl/attributes.rb
|
22
|
+
- lib/parser_girl/parser.rb
|
23
|
+
- lib/parser_girl/proxy.rb
|
22
24
|
homepage: https://github.com/matthias-geier/parser_girl
|
23
25
|
licenses: []
|
26
|
+
metadata: {}
|
24
27
|
post_install_message:
|
25
28
|
rdoc_options: []
|
26
29
|
require_paths:
|
27
30
|
- lib
|
28
31
|
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
-
none: false
|
30
32
|
requirements:
|
31
|
-
- -
|
33
|
+
- - ">="
|
32
34
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
35
|
+
version: '2.0'
|
34
36
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
-
none: false
|
36
37
|
requirements:
|
37
|
-
- -
|
38
|
+
- - ">="
|
38
39
|
- !ruby/object:Gem::Version
|
39
40
|
version: '0'
|
40
41
|
requirements: []
|
41
42
|
rubyforge_project:
|
42
|
-
rubygems_version:
|
43
|
+
rubygems_version: 2.2.2
|
43
44
|
signing_key:
|
44
|
-
specification_version:
|
45
|
+
specification_version: 4
|
45
46
|
summary: A minimal very fast xml/html parser
|
46
47
|
test_files: []
|