parser_girl 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7973f3cc3bca16ec83df5007be3bde0699dba758
4
+ data.tar.gz: 4c6aa3aa81f2680b622dd0e969c5bd54d512e053
5
+ SHA512:
6
+ metadata.gz: f34007bd37b7807100af1b43fd2de033a86ec362786f9c1f1a47a3150099e375b234e304392242de3e40f44c2918cf9fb8f89c6eb42cf69e03b4719fe9223509
7
+ data.tar.gz: 66934a5d2c34ff8105706f3bb0971e169767981fff30c96d8c8b4fafb5db059e7723ac0c643fe54b56a9ec0367c578efd21e7be2f0451b113ef7a243156cd6c3
data/LICENSE.md CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2013, Matthias Geier
1
+ Copyright (c) 2013-2014, Matthias Geier
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without modification,
@@ -1,92 +1,13 @@
1
- class ParserGirl
2
- def initialize(xml=nil)
3
- @xml = xml
4
- @result = nil
5
- @stack = nil
6
- end
7
-
8
- def find(needle, haystack_base=nil)
9
- haystack_base = @xml unless haystack_base
10
- return [] unless haystack_base
11
-
12
- haystack = haystack_base
13
- pos = 0
14
- @result = []
15
- @stack = []
16
- while true do
17
- if haystack =~ /\<([^!][^\>]*)\>/i
18
- content = $1 # tag-hit
19
- b = $`.length + 1 # relative beginning in haystack
20
- e = content.length + b + 1 # relative ending in haystack
21
- if content =~ /^([^\s]+)/ and $1.downcase == "script" and
22
- haystack =~ /\<\/script(\s[^\>]*)?\>/i
23
- e = $`.length + $&.length
24
- end
1
+ require 'parser_girl/attributes'
2
+ require 'parser_girl/parser'
3
+ require 'parser_girl/proxy'
25
4
 
26
- if content =~ /^#{needle}(\s.*)?$/i
27
- push({:position => pos+e, :attrs => split_attr($1)})
28
- elsif content =~ /^\/#{needle}(\s.*)?$/i
29
- hash = pop(haystack_base, b+pos-1)
30
- if hash
31
- if block_given?
32
- @result.push(yield(hash[:content], hash[:attrs]))
33
- else
34
- @result.push(hash[:content])
35
- end
36
- end
37
- end
38
- pos += e
39
- haystack = haystack_base[pos, haystack_base.length-pos]
40
- else
41
- break
42
- end
43
- end
44
- # pop rest
45
- while @stack.any?
46
- hash = @stack.pop
47
- if block_given?
48
- @result.push(yield("", hash[:attrs]))
49
- end
50
- end
51
-
52
- @result
5
+ module ParserGirl
6
+ def self.new(*args)
7
+ return Parser.new(*args)
53
8
  end
54
9
 
55
- private
56
- def push(hash)
57
- @stack.push hash
58
- end
59
-
60
- def pop(haystack, current_position)
61
- if @stack.any?
62
- hash = @stack.pop
63
- hash[:content] = haystack[hash[:position],
64
- current_position-hash[:position]]
65
- hash[:end_position] = current_position
66
- hash
67
- end
68
- end
69
-
70
- def split_attr(attrs)
71
- attr_hash = {}
72
- while(1)
73
- if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
74
- attrs = $'
75
- key = $1
76
- value = nil
77
- value = $7 if $7
78
- value = $6 if $6
79
- value = $4 if $4
80
- if value
81
- value.gsub! "\"", "\\\""
82
- value.gsub! "'", "\\'"
83
- end
84
- attr_hash[key.downcase.to_sym] = value
85
- else
86
- break
87
- end
88
- end
89
- attr_hash
10
+ def self.find(xml, needle)
11
+ return Parser.new(xml).find(needle)
90
12
  end
91
13
  end
92
-
@@ -0,0 +1,11 @@
1
+ module ParserGirl
2
+ module Attributes
3
+ def [](key)
4
+ return @attrs[key]
5
+ end
6
+
7
+ def to_h
8
+ return @attrs.dup
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,78 @@
1
+ module ParserGirl
2
+ class Parser
3
+ include Attributes
4
+
5
+ def initialize(xml=nil, attrs=nil)
6
+ @xml = xml
7
+ @attrs = attrs
8
+ @stack = nil
9
+ end
10
+
11
+ def content
12
+ return @xml.dup
13
+ end
14
+
15
+ def find(needle)
16
+ haystack_base = @xml
17
+
18
+ haystack = haystack_base
19
+ pos = 0
20
+ result = []
21
+ @stack = []
22
+ while true do
23
+ break unless haystack =~ /\<([^!][^\>]*)\>/i
24
+
25
+ content = $1 # tag-hit
26
+ b = $`.length + 1 # relative beginning in haystack
27
+ e = content.length + b + 1 # relative ending in haystack
28
+
29
+ if content =~ /^([^\s]+)/ && $1.downcase == "script" &&
30
+ haystack =~ /\<\/script(\s[^\>]*)?\>/i
31
+
32
+ e = $`.length + $&.length
33
+ end
34
+
35
+ if content =~ /^#{needle}(\s.*)?$/i
36
+ @stack << { :position => pos+e, :attrs => split_attr($1) }
37
+ elsif content =~ /^\/#{needle}(\s.*)?$/i
38
+ hash = pop(haystack_base, b+pos-1)
39
+ result << Parser.new(hash[:content], hash[:attrs]) if hash
40
+ end
41
+ pos += e
42
+ haystack = haystack_base[pos, haystack_base.length-pos]
43
+ end
44
+ # pop rest and append
45
+ result += @stack.map{ |hash| Parser.new("", hash[:attrs]) }
46
+ return Proxy.new(result)
47
+ end
48
+
49
+ protected
50
+ def pop(haystack, current_position)
51
+ return unless @stack.any?
52
+ hash = @stack.pop
53
+ hash[:content] = haystack[hash[:position],
54
+ current_position-hash[:position]]
55
+ hash[:end_position] = current_position
56
+ hash
57
+ end
58
+
59
+ def split_attr(attrs)
60
+ attr_hash = {}
61
+ while(1)
62
+ if attrs =~ /\s*([^=]+)=((\"([^\"]+)\")|(\'([^\']+)\')|([^\s]+))/
63
+ attrs = $'
64
+ key = $1
65
+ value = [$7, $6, $4].detect{ |v| !v.nil? }
66
+ if value
67
+ value.gsub! "\"", "\\\""
68
+ value.gsub! "'", "\\'"
69
+ end
70
+ attr_hash[key.downcase.to_sym] = value
71
+ else
72
+ break
73
+ end
74
+ end
75
+ attr_hash
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,16 @@
1
+ module ParserGirl
2
+ class Proxy
3
+ def initialize(data)
4
+ @data = data
5
+ end
6
+
7
+ def method_missing(method, *args, &blk)
8
+ if ParserGirl.new.respond_to?(method)
9
+ new_data = @data.map{ |d| d.send(method, *args, &blk) }
10
+ return @data.size == 1 ? new_data.first : new_data
11
+ else
12
+ return @data.send(method, *args, &blk)
13
+ end
14
+ end
15
+ end
16
+ end
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parser_girl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
5
- prerelease:
4
+ version: 1.2.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - Matthias Geier
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-06-16 00:00:00.000000000 Z
11
+ date: 2014-10-05 00:00:00.000000000 Z
13
12
  dependencies: []
14
13
  description:
15
14
  email:
@@ -17,30 +16,32 @@ executables: []
17
16
  extensions: []
18
17
  extra_rdoc_files: []
19
18
  files:
20
- - lib/parser_girl.rb
21
19
  - LICENSE.md
20
+ - lib/parser_girl.rb
21
+ - lib/parser_girl/attributes.rb
22
+ - lib/parser_girl/parser.rb
23
+ - lib/parser_girl/proxy.rb
22
24
  homepage: https://github.com/matthias-geier/parser_girl
23
25
  licenses: []
26
+ metadata: {}
24
27
  post_install_message:
25
28
  rdoc_options: []
26
29
  require_paths:
27
30
  - lib
28
31
  required_ruby_version: !ruby/object:Gem::Requirement
29
- none: false
30
32
  requirements:
31
- - - ! '>='
33
+ - - ">="
32
34
  - !ruby/object:Gem::Version
33
- version: 1.9.1
35
+ version: '2.0'
34
36
  required_rubygems_version: !ruby/object:Gem::Requirement
35
- none: false
36
37
  requirements:
37
- - - ! '>='
38
+ - - ">="
38
39
  - !ruby/object:Gem::Version
39
40
  version: '0'
40
41
  requirements: []
41
42
  rubyforge_project:
42
- rubygems_version: 1.8.23
43
+ rubygems_version: 2.2.2
43
44
  signing_key:
44
- specification_version: 3
45
+ specification_version: 4
45
46
  summary: A minimal very fast xml/html parser
46
47
  test_files: []