parser_girl 1.1.1 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7973f3cc3bca16ec83df5007be3bde0699dba758
4
+ data.tar.gz: 4c6aa3aa81f2680b622dd0e969c5bd54d512e053
5
+ SHA512:
6
+ metadata.gz: f34007bd37b7807100af1b43fd2de033a86ec362786f9c1f1a47a3150099e375b234e304392242de3e40f44c2918cf9fb8f89c6eb42cf69e03b4719fe9223509
7
+ data.tar.gz: 66934a5d2c34ff8105706f3bb0971e169767981fff30c96d8c8b4fafb5db059e7723ac0c643fe54b56a9ec0367c578efd21e7be2f0451b113ef7a243156cd6c3
data/LICENSE.md CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2013, Matthias Geier
1
+ Copyright (c) 2013-2014, Matthias Geier
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without modification,
@@ -1,92 +1,13 @@
1
- class ParserGirl
2
- def initialize(xml=nil)
3
- @xml = xml
4
- @result = nil
5
- @stack = nil
6
- end
7
-
8
- def find(needle, haystack_base=nil)
9
- haystack_base = @xml unless haystack_base
10
- return [] unless haystack_base
11
-
12
- haystack = haystack_base
13
- pos = 0
14
- @result = []
15
- @stack = []
16
- while true do
17
- if haystack =~ /\<([^!][^\>]*)\>/i
18
- content = $1 # tag-hit
19
- b = $`.length + 1 # relative beginning in haystack
20
- e = content.length + b + 1 # relative ending in haystack
21
- if content =~ /^([^\s]+)/ and $1.downcase == "script" and
22
- haystack =~ /\<\/script(\s[^\>]*)?\>/i
23
- e = $`.length + $&.length
24
- end
1
+ require 'parser_girl/attributes'
2
+ require 'parser_girl/parser'
3
+ require 'parser_girl/proxy'
25
4
 
26
- if content =~ /^#{needle}(\s.*)?$/i
27
- push({:position => pos+e, :attrs => split_attr($1)})
28
- elsif content =~ /^\/#{needle}(\s.*)?$/i
29
- hash = pop(haystack_base, b+pos-1)
30
- if hash
31
- if block_given?
32
- @result.push(yield(hash[:content], hash[:attrs]))
33
- else
34
- @result.push(hash[:content])
35
- end
36
- end
37
- end
38
- pos += e
39
- haystack = haystack_base[pos, haystack_base.length-pos]
40
- else
41
- break
42
- end
43
- end
44
- # pop rest
45
- while @stack.any?
46
- hash = @stack.pop
47
- if block_given?
48
- @result.push(yield("", hash[:attrs]))
49
- end
50
- end
51
-
52
- @result
5
+ module ParserGirl
6
+ def self.new(*args)
7
+ return Parser.new(*args)
53
8
  end
54
9
 
55
- private
56
- def push(hash)
57
- @stack.push hash
58
- end
59
-
60
- def pop(haystack, current_position)
61
- if @stack.any?
62
- hash = @stack.pop
63
- hash[:content] = haystack[hash[:position],
64
- current_position-hash[:position]]
65
- hash[:end_position] = current_position
66
- hash
67
- end
68
- end
69
-
70
- def split_attr(attrs)
71
- attr_hash = {}
72
- while(1)
73
- if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
74
- attrs = $'
75
- key = $1
76
- value = nil
77
- value = $7 if $7
78
- value = $6 if $6
79
- value = $4 if $4
80
- if value
81
- value.gsub! "\"", "\\\""
82
- value.gsub! "'", "\\'"
83
- end
84
- attr_hash[key.downcase.to_sym] = value
85
- else
86
- break
87
- end
88
- end
89
- attr_hash
10
+ def self.find(xml, needle)
11
+ return Parser.new(xml).find(needle)
90
12
  end
91
13
  end
92
-
@@ -0,0 +1,11 @@
1
+ module ParserGirl
2
+ module Attributes
3
+ def [](key)
4
+ return @attrs[key]
5
+ end
6
+
7
+ def to_h
8
+ return @attrs.dup
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,78 @@
1
+ module ParserGirl
2
+ class Parser
3
+ include Attributes
4
+
5
+ def initialize(xml=nil, attrs=nil)
6
+ @xml = xml
7
+ @attrs = attrs
8
+ @stack = nil
9
+ end
10
+
11
+ def content
12
+ return @xml.dup
13
+ end
14
+
15
+ def find(needle)
16
+ haystack_base = @xml
17
+
18
+ haystack = haystack_base
19
+ pos = 0
20
+ result = []
21
+ @stack = []
22
+ while true do
23
+ break unless haystack =~ /\<([^!][^\>]*)\>/i
24
+
25
+ content = $1 # tag-hit
26
+ b = $`.length + 1 # relative beginning in haystack
27
+ e = content.length + b + 1 # relative ending in haystack
28
+
29
+ if content =~ /^([^\s]+)/ && $1.downcase == "script" &&
30
+ haystack =~ /\<\/script(\s[^\>]*)?\>/i
31
+
32
+ e = $`.length + $&.length
33
+ end
34
+
35
+ if content =~ /^#{needle}(\s.*)?$/i
36
+ @stack << { :position => pos+e, :attrs => split_attr($1) }
37
+ elsif content =~ /^\/#{needle}(\s.*)?$/i
38
+ hash = pop(haystack_base, b+pos-1)
39
+ result << Parser.new(hash[:content], hash[:attrs]) if hash
40
+ end
41
+ pos += e
42
+ haystack = haystack_base[pos, haystack_base.length-pos]
43
+ end
44
+ # pop rest and append
45
+ result += @stack.map{ |hash| Parser.new("", hash[:attrs]) }
46
+ return Proxy.new(result)
47
+ end
48
+
49
+ protected
50
+ def pop(haystack, current_position)
51
+ return unless @stack.any?
52
+ hash = @stack.pop
53
+ hash[:content] = haystack[hash[:position],
54
+ current_position-hash[:position]]
55
+ hash[:end_position] = current_position
56
+ hash
57
+ end
58
+
59
+ def split_attr(attrs)
60
+ attr_hash = {}
61
+ while(1)
62
+ if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
63
+ attrs = $'
64
+ key = $1
65
+ value = [$7, $6, $4].detect{ |v| !v.nil? }
66
+ if value
67
+ value.gsub! "\"", "\\\""
68
+ value.gsub! "'", "\\'"
69
+ end
70
+ attr_hash[key.downcase.to_sym] = value
71
+ else
72
+ break
73
+ end
74
+ end
75
+ attr_hash
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,16 @@
1
+ module ParserGirl
2
+ class Proxy
3
+ def initialize(data)
4
+ @data = data
5
+ end
6
+
7
+ def method_missing(method, *args, &blk)
8
+ if ParserGirl.new.respond_to?(method)
9
+ new_data = @data.map{ |d| d.send(method, *args, &blk) }
10
+ return @data.size == 1 ? new_data.first : new_data
11
+ else
12
+ return @data.send(method, *args, &blk)
13
+ end
14
+ end
15
+ end
16
+ end
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parser_girl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
5
- prerelease:
4
+ version: 1.2.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - Matthias Geier
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-06-16 00:00:00.000000000 Z
11
+ date: 2014-10-05 00:00:00.000000000 Z
13
12
  dependencies: []
14
13
  description:
15
14
  email:
@@ -17,30 +16,32 @@ executables: []
17
16
  extensions: []
18
17
  extra_rdoc_files: []
19
18
  files:
20
- - lib/parser_girl.rb
21
19
  - LICENSE.md
20
+ - lib/parser_girl.rb
21
+ - lib/parser_girl/attributes.rb
22
+ - lib/parser_girl/parser.rb
23
+ - lib/parser_girl/proxy.rb
22
24
  homepage: https://github.com/matthias-geier/parser_girl
23
25
  licenses: []
26
+ metadata: {}
24
27
  post_install_message:
25
28
  rdoc_options: []
26
29
  require_paths:
27
30
  - lib
28
31
  required_ruby_version: !ruby/object:Gem::Requirement
29
- none: false
30
32
  requirements:
31
- - - ! '>='
33
+ - - ">="
32
34
  - !ruby/object:Gem::Version
33
- version: 1.9.1
35
+ version: '2.0'
34
36
  required_rubygems_version: !ruby/object:Gem::Requirement
35
- none: false
36
37
  requirements:
37
- - - ! '>='
38
+ - - ">="
38
39
  - !ruby/object:Gem::Version
39
40
  version: '0'
40
41
  requirements: []
41
42
  rubyforge_project:
42
- rubygems_version: 1.8.23
43
+ rubygems_version: 2.2.2
43
44
  signing_key:
44
- specification_version: 3
45
+ specification_version: 4
45
46
  summary: A minimal very fast xml/html parser
46
47
  test_files: []