parser_girl 1.1.1 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.md +1 -1
- data/lib/parser_girl.rb +8 -87
- data/lib/parser_girl/attributes.rb +11 -0
- data/lib/parser_girl/parser.rb +78 -0
- data/lib/parser_girl/proxy.rb +16 -0
- metadata +12 -11
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7973f3cc3bca16ec83df5007be3bde0699dba758
|
4
|
+
data.tar.gz: 4c6aa3aa81f2680b622dd0e969c5bd54d512e053
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f34007bd37b7807100af1b43fd2de033a86ec362786f9c1f1a47a3150099e375b234e304392242de3e40f44c2918cf9fb8f89c6eb42cf69e03b4719fe9223509
|
7
|
+
data.tar.gz: 66934a5d2c34ff8105706f3bb0971e169767981fff30c96d8c8b4fafb5db059e7723ac0c643fe54b56a9ec0367c578efd21e7be2f0451b113ef7a243156cd6c3
|
data/LICENSE.md
CHANGED
data/lib/parser_girl.rb
CHANGED
@@ -1,92 +1,13 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
@result = nil
|
5
|
-
@stack = nil
|
6
|
-
end
|
7
|
-
|
8
|
-
def find(needle, haystack_base=nil)
|
9
|
-
haystack_base = @xml unless haystack_base
|
10
|
-
return [] unless haystack_base
|
11
|
-
|
12
|
-
haystack = haystack_base
|
13
|
-
pos = 0
|
14
|
-
@result = []
|
15
|
-
@stack = []
|
16
|
-
while true do
|
17
|
-
if haystack =~ /\<([^!][^\>]*)\>/i
|
18
|
-
content = $1 # tag-hit
|
19
|
-
b = $`.length + 1 # relative beginning in haystack
|
20
|
-
e = content.length + b + 1 # relative ending in haystack
|
21
|
-
if content =~ /^([^\s]+)/ and $1.downcase == "script" and
|
22
|
-
haystack =~ /\<\/script(\s[^\>]*)?\>/i
|
23
|
-
e = $`.length + $&.length
|
24
|
-
end
|
1
|
+
require 'parser_girl/attributes'
|
2
|
+
require 'parser_girl/parser'
|
3
|
+
require 'parser_girl/proxy'
|
25
4
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
hash = pop(haystack_base, b+pos-1)
|
30
|
-
if hash
|
31
|
-
if block_given?
|
32
|
-
@result.push(yield(hash[:content], hash[:attrs]))
|
33
|
-
else
|
34
|
-
@result.push(hash[:content])
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
pos += e
|
39
|
-
haystack = haystack_base[pos, haystack_base.length-pos]
|
40
|
-
else
|
41
|
-
break
|
42
|
-
end
|
43
|
-
end
|
44
|
-
# pop rest
|
45
|
-
while @stack.any?
|
46
|
-
hash = @stack.pop
|
47
|
-
if block_given?
|
48
|
-
@result.push(yield("", hash[:attrs]))
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
@result
|
5
|
+
module ParserGirl
|
6
|
+
def self.new(*args)
|
7
|
+
return Parser.new(*args)
|
53
8
|
end
|
54
9
|
|
55
|
-
|
56
|
-
|
57
|
-
@stack.push hash
|
58
|
-
end
|
59
|
-
|
60
|
-
def pop(haystack, current_position)
|
61
|
-
if @stack.any?
|
62
|
-
hash = @stack.pop
|
63
|
-
hash[:content] = haystack[hash[:position],
|
64
|
-
current_position-hash[:position]]
|
65
|
-
hash[:end_position] = current_position
|
66
|
-
hash
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def split_attr(attrs)
|
71
|
-
attr_hash = {}
|
72
|
-
while(1)
|
73
|
-
if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
|
74
|
-
attrs = $'
|
75
|
-
key = $1
|
76
|
-
value = nil
|
77
|
-
value = $7 if $7
|
78
|
-
value = $6 if $6
|
79
|
-
value = $4 if $4
|
80
|
-
if value
|
81
|
-
value.gsub! "\"", "\\\""
|
82
|
-
value.gsub! "'", "\\'"
|
83
|
-
end
|
84
|
-
attr_hash[key.downcase.to_sym] = value
|
85
|
-
else
|
86
|
-
break
|
87
|
-
end
|
88
|
-
end
|
89
|
-
attr_hash
|
10
|
+
def self.find(xml, needle)
|
11
|
+
return Parser.new(xml).find(needle)
|
90
12
|
end
|
91
13
|
end
|
92
|
-
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module ParserGirl
|
2
|
+
class Parser
|
3
|
+
include Attributes
|
4
|
+
|
5
|
+
def initialize(xml=nil, attrs=nil)
|
6
|
+
@xml = xml
|
7
|
+
@attrs = attrs
|
8
|
+
@stack = nil
|
9
|
+
end
|
10
|
+
|
11
|
+
def content
|
12
|
+
return @xml.dup
|
13
|
+
end
|
14
|
+
|
15
|
+
def find(needle)
|
16
|
+
haystack_base = @xml
|
17
|
+
|
18
|
+
haystack = haystack_base
|
19
|
+
pos = 0
|
20
|
+
result = []
|
21
|
+
@stack = []
|
22
|
+
while true do
|
23
|
+
break unless haystack =~ /\<([^!][^\>]*)\>/i
|
24
|
+
|
25
|
+
content = $1 # tag-hit
|
26
|
+
b = $`.length + 1 # relative beginning in haystack
|
27
|
+
e = content.length + b + 1 # relative ending in haystack
|
28
|
+
|
29
|
+
if content =~ /^([^\s]+)/ && $1.downcase == "script" &&
|
30
|
+
haystack =~ /\<\/script(\s[^\>]*)?\>/i
|
31
|
+
|
32
|
+
e = $`.length + $&.length
|
33
|
+
end
|
34
|
+
|
35
|
+
if content =~ /^#{needle}(\s.*)?$/i
|
36
|
+
@stack << { :position => pos+e, :attrs => split_attr($1) }
|
37
|
+
elsif content =~ /^\/#{needle}(\s.*)?$/i
|
38
|
+
hash = pop(haystack_base, b+pos-1)
|
39
|
+
result << Parser.new(hash[:content], hash[:attrs]) if hash
|
40
|
+
end
|
41
|
+
pos += e
|
42
|
+
haystack = haystack_base[pos, haystack_base.length-pos]
|
43
|
+
end
|
44
|
+
# pop rest and append
|
45
|
+
result += @stack.map{ |hash| Parser.new("", hash[:attrs]) }
|
46
|
+
return Proxy.new(result)
|
47
|
+
end
|
48
|
+
|
49
|
+
protected
|
50
|
+
def pop(haystack, current_position)
|
51
|
+
return unless @stack.any?
|
52
|
+
hash = @stack.pop
|
53
|
+
hash[:content] = haystack[hash[:position],
|
54
|
+
current_position-hash[:position]]
|
55
|
+
hash[:end_position] = current_position
|
56
|
+
hash
|
57
|
+
end
|
58
|
+
|
59
|
+
def split_attr(attrs)
|
60
|
+
attr_hash = {}
|
61
|
+
while(1)
|
62
|
+
if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
|
63
|
+
attrs = $'
|
64
|
+
key = $1
|
65
|
+
value = [$7, $6, $4].detect{ |v| !v.nil? }
|
66
|
+
if value
|
67
|
+
value.gsub! "\"", "\\\""
|
68
|
+
value.gsub! "'", "\\'"
|
69
|
+
end
|
70
|
+
attr_hash[key.downcase.to_sym] = value
|
71
|
+
else
|
72
|
+
break
|
73
|
+
end
|
74
|
+
end
|
75
|
+
attr_hash
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module ParserGirl
|
2
|
+
class Proxy
|
3
|
+
def initialize(data)
|
4
|
+
@data = data
|
5
|
+
end
|
6
|
+
|
7
|
+
def method_missing(method, *args, &blk)
|
8
|
+
if ParserGirl.new.respond_to?(method)
|
9
|
+
new_data = @data.map{ |d| d.send(method, *args, &blk) }
|
10
|
+
return @data.size == 1 ? new_data.first : new_data
|
11
|
+
else
|
12
|
+
return @data.send(method, *args, &blk)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parser_girl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Matthias Geier
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-10-05 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description:
|
15
14
|
email:
|
@@ -17,30 +16,32 @@ executables: []
|
|
17
16
|
extensions: []
|
18
17
|
extra_rdoc_files: []
|
19
18
|
files:
|
20
|
-
- lib/parser_girl.rb
|
21
19
|
- LICENSE.md
|
20
|
+
- lib/parser_girl.rb
|
21
|
+
- lib/parser_girl/attributes.rb
|
22
|
+
- lib/parser_girl/parser.rb
|
23
|
+
- lib/parser_girl/proxy.rb
|
22
24
|
homepage: https://github.com/matthias-geier/parser_girl
|
23
25
|
licenses: []
|
26
|
+
metadata: {}
|
24
27
|
post_install_message:
|
25
28
|
rdoc_options: []
|
26
29
|
require_paths:
|
27
30
|
- lib
|
28
31
|
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
-
none: false
|
30
32
|
requirements:
|
31
|
-
- -
|
33
|
+
- - ">="
|
32
34
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
35
|
+
version: '2.0'
|
34
36
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
-
none: false
|
36
37
|
requirements:
|
37
|
-
- -
|
38
|
+
- - ">="
|
38
39
|
- !ruby/object:Gem::Version
|
39
40
|
version: '0'
|
40
41
|
requirements: []
|
41
42
|
rubyforge_project:
|
42
|
-
rubygems_version:
|
43
|
+
rubygems_version: 2.2.2
|
43
44
|
signing_key:
|
44
|
-
specification_version:
|
45
|
+
specification_version: 4
|
45
46
|
summary: A minimal very fast xml/html parser
|
46
47
|
test_files: []
|