hiroiyomi 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 16f588a0df9ea7f70429f272c27e70b3de34378959999be3c09b0a9d535c8040
4
- data.tar.gz: f5a2922e84a4b42445437a81e3e3eebf6117c044d7a25666ceeeed29c0c2350e
3
+ metadata.gz: 0661022f19059a23cf6ec690cb47bb531f2b08225197e487648d91a8f31df1d7
4
+ data.tar.gz: 813da1519fe1b3da7e41775d5ca6b63198d5846e8f1624f95d9b82df0f46b35c
5
5
  SHA512:
6
- metadata.gz: cd0b643422d1b5b0807fd872cb5c2db60e7ec20ef491c9d6c69921b10871d9f49e762bb06f249f1d65adbb2be8229fbfa184aa22ecda77f59324354aab72e1fd
7
- data.tar.gz: b4debfcfd5dbcc3519c2d85544b97d4b984d00c59eb50c5a4f54e2e1d683605c468e6c403c57bf4caf2d92622f75e8c8593d2c40d59d6302acbb9a1eca99e8b3
6
+ metadata.gz: 0bd9bc0554a39fc7ed25133be2cda01d2393268ef49ca1dee9c9dbc67cfa1354bb940a9e651c8236006df0c17f277eb0f821dfe4dbf3584ca6756044a65070c1
7
+ data.tar.gz: bc3f1e4e3ab39af9f83367a9d72fa622a801978fb840c426464cf7704ec16d9ad81f594efb360ac76c60fc0107801fbd9ce280edcdc242042404d61a11c62175
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.5.1
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # Hiroiyomi
2
2
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/hiroiyomi`. To experiment with that code, run `bin/console` for an interactive prompt.
4
-
5
- TODO: Delete this and the text above, and describe your gem
3
+ Provides features to parse and filter HTML elements.
6
4
 
7
5
  ## Installation
8
6
 
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'open-uri'
4
- require 'openssl'
3
+ require 'hiroiyomi/html/dom_parser_helper'
5
4
 
6
5
  module Hiroiyomi
7
6
  module Html
@@ -9,6 +8,53 @@ module Hiroiyomi
9
8
  class Attribute
10
9
  attr_accessor :name, :value
11
10
 
11
+ class << self
12
+ def value_of(file)
13
+ name = DOMParserHelper.extract_string(file)
14
+ return nil if name.empty?
15
+ value = extract_value(file)
16
+ Attribute.new(name, value.empty? ? nil : value)
17
+ end
18
+
19
+ private
20
+
21
+ # name=value
22
+ # Check spaces and > whether value is end
23
+ # name="value"
24
+ # name='value'
25
+ def extract_value(file)
26
+ value = ''
27
+ open = { "'" => false, '"' => false }
28
+ equal = false
29
+
30
+ while (c = file.getc)
31
+ case c
32
+ when "'", '"'
33
+ break if open[c]
34
+ open_keys = open.keys
35
+ open_keys.delete(c)
36
+ if open[open_keys.first]
37
+ value += c
38
+ else
39
+ open[c] = true
40
+ end
41
+ else
42
+ if open.values.any?
43
+ value += c
44
+ elsif c == '='
45
+ equal = true
46
+ elsif ['>', ' '].include?(c)
47
+ file.ungetc(c)
48
+ break
49
+ elsif equal
50
+ value += c
51
+ end
52
+ end
53
+ end
54
+ value
55
+ end
56
+ end
57
+
12
58
  def initialize(name, value = nil)
13
59
  @name = name
14
60
  @value = value
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Hiroiyomi
4
+ module Html
5
+ # Childable
6
+ module Childable
7
+ def text?
8
+ false
9
+ end
10
+ end
11
+ end
12
+ end
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'open-uri'
4
- require 'openssl'
3
+ require 'hiroiyomi/html/element'
4
+ require 'hiroiyomi/html/attribute'
5
+ require 'hiroiyomi/html/text'
6
+ require 'hiroiyomi/html/dom_parser_helper'
5
7
 
6
8
  module Hiroiyomi
7
9
  module Html
@@ -11,12 +13,18 @@ module Hiroiyomi
11
13
 
12
14
  attr_accessor :root
13
15
 
14
- def initialize
15
- @root = nil
16
+ class << self
17
+ def value_of(file)
18
+ document = new
19
+ return document if file.nil?
20
+
21
+ document.root = Element.value_of(file)
22
+ document
23
+ end
16
24
  end
17
25
 
18
- def element=(element)
19
- @root = element
26
+ def initialize
27
+ @root = nil
20
28
  end
21
29
 
22
30
  def each
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'hiroiyomi/parser'
4
+ require 'hiroiyomi/html/document'
5
+
6
+ module Hiroiyomi
7
+ module Html
8
+ # DOMParser
9
+ class DOMParser
10
+ include Parser
11
+
12
+ private
13
+
14
+ def do_parse(file)
15
+ Document.value_of(file)
16
+ end
17
+
18
+ def do_filter(document, filter:, is_deep: true)
19
+ filtered_elements = filter_element(document, filter, [])
20
+ return filtered_elements unless is_deep
21
+
22
+ filtered_elements.map { |e| e.deep_select(filter) }.flatten
23
+ end
24
+
25
+ def filter_element(element, filter, filtered_elements)
26
+ element.each do |child|
27
+ next if child.text?
28
+ if filter&.include?(child.name.downcase)
29
+ filtered_elements.push(child)
30
+ else
31
+ filter_element(child, filter, filtered_elements)
32
+ end
33
+ end
34
+ filtered_elements
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Hiroiyomi
4
+ module Html
5
+ # DOMParserHelper
6
+ class DOMParserHelper
7
+ class << self
8
+ def cur_pos(file, c)
9
+ file.ungetc(c) # In order to get current position correctly
10
+ cur_pos = file.pos
11
+ file.getc # drop <
12
+ cur_pos
13
+ end
14
+
15
+ def skip_ignore_chars(file)
16
+ while (c = file.getc)
17
+ unless /[\\t\\n\\r\s]/.match?(c)
18
+ file.ungetc(c)
19
+ return
20
+ end
21
+ end
22
+ end
23
+
24
+ # string of <.+ or ".+"
25
+ def extract_string(file)
26
+ skip_ignore_chars(file)
27
+ string = ''
28
+ while (c = file.getc)
29
+ case c
30
+ when /[\w-]/
31
+ string += c
32
+ else
33
+ file.ungetc(c)
34
+ break
35
+ end
36
+ end
37
+ string.gsub(/[\t\r\n]/, '').strip
38
+ end
39
+
40
+ def extract_text_with_symbols(file, char_before_last_char = ']', last_char = '>')
41
+ string = ''
42
+ while (c = file.getc)
43
+ string += c
44
+ next_c = file.getc
45
+ if c == char_before_last_char && last_char == next_c
46
+ string += next_c
47
+ break
48
+ end
49
+ file.ungetc(next_c)
50
+ end
51
+ string
52
+ end
53
+
54
+ # after <!
55
+ def extract_bang_text(file)
56
+ cur_pos = file.pos
57
+ case (c = file.getc)
58
+ when '[' # CDDATA
59
+ return "#{c}#{extract_text_with_symbols(file, ']')}"
60
+ when '-' # Comment
61
+ extract_text_with_symbols(file, '-')
62
+ return '' # Drop comments
63
+ end
64
+ file.pos = cur_pos
65
+ nil
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -1,25 +1,145 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'open-uri'
4
- require 'openssl'
3
+ require 'hiroiyomi/html/childable'
4
+ require 'hiroiyomi/html/attribute'
5
+ require 'hiroiyomi/html/text'
5
6
 
6
7
  module Hiroiyomi
7
8
  module Html
8
9
  # Element
9
10
  class Element
10
11
  include Enumerable
12
+ include Childable
11
13
 
12
- attr_accessor :name, :content, :attributes, :children
14
+ attr_accessor :name, :parent, :attributes, :children
13
15
 
14
- def initialize(name, content: nil, attributes: [], children: [])
16
+ class << self
17
+ EXCEPTIONAL_ELEMENT_NAME_LIST = %w[script style].freeze
18
+
19
+ def value_of(file, parent_element = nil)
20
+ # name
21
+ name = extract_element_name(file)
22
+
23
+ return parent_element if name.empty?
24
+
25
+ # element
26
+ element = Element.new(name, parent: parent_element)
27
+
28
+ if parent_element.nil?
29
+ parent_element = element
30
+ else
31
+ parent_element.element = element
32
+ end
33
+
34
+ # attributes
35
+ element.attributes = extract_attributes(file)
36
+
37
+ # exceptional elements
38
+ if EXCEPTIONAL_ELEMENT_NAME_LIST.include?(name.downcase)
39
+ element.element = extract_exceptional_element_text(file, name)
40
+ return parent_element
41
+ end
42
+
43
+ # text if >..., close if /, or open element if >...<
44
+ Text.add_text_to_element_or_parse(file, element)
45
+
46
+ # close check. move element children to parent element if not closed. e.g. <img ...>
47
+ element.move_children_to(parent_element) unless validate_closing_element?(element, file)
48
+
49
+ parent_element
50
+ end
51
+
52
+ private
53
+
54
+ def validate_closing_element?(element, file)
55
+ open = false
56
+
57
+ while (c = file.getc)
58
+ # /> or </
59
+ if c == '/'
60
+ open = false
61
+ cur_pos = DOMParserHelper.cur_pos(file, c)
62
+ next_c = file.getc
63
+ return true if next_c == '>' # case of />
64
+
65
+ # Check whether name is the same or not
66
+ file.ungetc(next_c)
67
+ close_name = DOMParserHelper.extract_string(file)
68
+
69
+ return false if close_name.empty?
70
+
71
+ is_closed = close_name == element.name
72
+ return true if is_closed
73
+
74
+ # Try it again if name is not matched and next close element name does not exist in parent elements
75
+ next unless element.parents?(close_name)
76
+
77
+ file.pos = cur_pos
78
+ return false
79
+ elsif c == '<' # case of </
80
+ open = true
81
+ elsif open
82
+ file.ungetc(c)
83
+ return false
84
+ end
85
+ end
86
+ false
87
+ end
88
+
89
+ # Start from > after attributes
90
+ def extract_exceptional_element_text(file, name)
91
+ DOMParserHelper.skip_ignore_chars(file)
92
+ file.getc # drop >
93
+ string = ''
94
+ while (c = file.getc)
95
+ if c == '<'
96
+ cur_pos = file.pos
97
+ if file.getc == '/' && name == DOMParserHelper.extract_string(file)
98
+ DOMParserHelper.skip_ignore_chars(file)
99
+ file.getc # drop >
100
+ break
101
+ end
102
+ file.pos = cur_pos
103
+ end
104
+ string += c
105
+ end
106
+ return Text.new(string) unless string.empty?
107
+ nil
108
+ end
109
+
110
+ def extract_element_name(file)
111
+ while (c = file.getc)
112
+ next unless c == '<'
113
+ cur_pos = file.pos
114
+ if file.getc == '!'
115
+ # Skip like <!document html>, <!--
116
+ DOMParserHelper.extract_bang_text(file)
117
+ next
118
+ end
119
+ file.pos = cur_pos
120
+ return DOMParserHelper.extract_string(file)
121
+ end
122
+ ''
123
+ end
124
+
125
+ def extract_attributes(file)
126
+ attributes = []
127
+ while (attribute = Attribute.value_of(file))
128
+ attributes.push(attribute)
129
+ end
130
+ attributes
131
+ end
132
+ end
133
+
134
+ def initialize(name, parent: nil, attributes: [], children: [])
15
135
  @name = name
16
- @content = content
136
+ @parent = parent
17
137
  @attributes = attributes
18
138
  @children = children
19
139
  end
20
140
 
21
141
  def element=(element)
22
- @children.push(element)
142
+ @children.push(element) unless element.nil?
23
143
  end
24
144
 
25
145
  def each
@@ -27,6 +147,44 @@ module Hiroiyomi
27
147
  yield child
28
148
  end
29
149
  end
150
+
151
+ def move_children_to(element)
152
+ each do |child|
153
+ element.element = child
154
+ end
155
+ children.clear
156
+ end
157
+
158
+ def parents?(name)
159
+ return false if parent.nil?
160
+ return true if parent.name == name
161
+ parent.parents?(name)
162
+ end
163
+
164
+ def deep_select(search_name_list = [], searched = [])
165
+ searched.push(self) if search_name_list.include?(name.downcase)
166
+ children.each do |child|
167
+ next if child.text?
168
+ if search_name_list.include?(child.name.downcase)
169
+ searched.push(child)
170
+ else
171
+ child.deep_select(search_name_list, searched)
172
+ end
173
+ end
174
+ searched
175
+ end
176
+
177
+ def to_s
178
+ attrs = attributes.map(&:to_s).join(' ')
179
+ attrs = ' ' + attrs unless attrs.empty?
180
+ "<#{name}#{attrs}>#{innerHTML}</#{name}>"
181
+ end
182
+
183
+ private
184
+
185
+ def innerHTML
186
+ children.map(&:to_s).join
187
+ end
30
188
  end
31
189
  end
32
190
  end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'hiroiyomi/html/childable'
4
+ require 'hiroiyomi/html/element'
5
+
6
+ module Hiroiyomi
7
+ module Html
8
+ # Text
9
+ class Text
10
+ include Childable
11
+ attr_accessor :value
12
+
13
+ class << self
14
+ # Start from > after attributes
15
+ def add_text_to_element_or_parse(file, element)
16
+ close = false
17
+ string = ''
18
+
19
+ append_string = lambda { |str|
20
+ string += str if close
21
+ }
22
+
23
+ add_text_to_element = lambda { |str = ''|
24
+ append_string.call str
25
+ string = string.gsub(/[\t\r\n]/, '').strip
26
+ unless string.empty?
27
+ element.element = new(string)
28
+ string = ''
29
+ end
30
+ }
31
+
32
+ while (c = file.getc)
33
+ case c
34
+ when '/' # /* */ ?
35
+ cur_pos = DOMParserHelper.cur_pos(file, c)
36
+ next_c = file.getc
37
+ if next_c == '*'
38
+ add_text_to_element.call "#{c}#{next_c}#{DOMParserHelper.extract_text_with_symbols(file, next_c, c)}"
39
+ next
40
+ end
41
+ # / is of />
42
+ file.pos = cur_pos
43
+ break
44
+ when '<'
45
+ cur_pos = DOMParserHelper.cur_pos(file, c)
46
+ next_c = file.getc
47
+ if next_c == '!'
48
+ bang_string = DOMParserHelper.extract_bang_text(file)
49
+ unless bang_string.nil?
50
+ # empty if comment
51
+ add_text_to_element.call "#{c}#{next_c}#{bang_string}" unless bang_string.empty?
52
+ next
53
+ end
54
+ end
55
+ file.pos = cur_pos
56
+
57
+ add_text_to_element.call
58
+
59
+ # Next element from < char
60
+ element = Element.value_of(file, element)
61
+
62
+ # file.getc # drop <
63
+ when '>' # > is of >...
64
+ close = true
65
+ else
66
+ append_string.call c
67
+ end
68
+ end
69
+
70
+ add_text_to_element.call
71
+ element
72
+ end
73
+ end
74
+
75
+ def initialize(value)
76
+ @value = value
77
+ end
78
+
79
+ def text?
80
+ true
81
+ end
82
+
83
+ def to_s
84
+ value
85
+ end
86
+ end
87
+ end
88
+ end
@@ -7,22 +7,14 @@ module Hiroiyomi
7
7
  # Parser
8
8
  module Parser
9
9
  def self.included(klass)
10
- # @param [String] url URL
11
- # @param [Array] filter of filtered by name list, e.g. [h1, h2, h3]
12
- #
13
- # @return [Array] of Hiroiyomi::Html::Element which has been filtered
14
- def klass.read(url, filter:)
15
- new.read(url, filter: filter)
10
+ def klass.read(url, filter:, is_deep: true)
11
+ new.read(url, filter: filter, is_deep: is_deep)
16
12
  end
17
13
  end
18
14
 
19
- # @param [String] url URL
20
- # @param [Array] filter of filtered by name list, e.g. [h1, h2, h3]
21
- #
22
- # @return [Array] of Hiroiyomi::Html::Element which has been filtered
23
- def read(url, filter:)
15
+ def read(url, filter:, is_deep: true)
24
16
  @open_file = open_url(url)
25
- do_filter(do_parse(@open_file), filter: filter)
17
+ do_filter(do_parse(@open_file), filter: filter, is_deep: is_deep)
26
18
  ensure
27
19
  @open_file&.unlink
28
20
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  # Hiroiyomi
4
4
  module Hiroiyomi
5
- VERSION = '0.1.1'
5
+ VERSION = '0.1.2'
6
6
  end
data/lib/hiroiyomi.rb CHANGED
@@ -2,16 +2,17 @@
2
2
 
3
3
  require 'hiroiyomi/version'
4
4
  require 'hiroiyomi/root'
5
- require 'hiroiyomi/html_parser'
5
+ require 'hiroiyomi/html/dom_parser'
6
6
 
7
7
  # Hiroiyomi
8
8
  module Hiroiyomi
9
9
  # @param [String] url URL
10
10
  # @param [Array] filter of filtered by name list, e.g. [h1, h2, h3]
11
+ # @param [Boolean] is_deep Whether result is filtered into children
11
12
  #
12
13
  # @return [Array] of Hiroiyomi::Html::Element which has been filtered
13
- def read(url, filter: [])
14
- HtmlParser.read(url, filter: filter)
14
+ def read(url, filter: [], is_deep: true)
15
+ Html::DOMParser.read(url, filter: filter, is_deep: is_deep)
15
16
  end
16
17
 
17
18
  # rubocop:disable Style/AccessModifierDeclarations
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hiroiyomi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomonori Murakami
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-07-12 00:00:00.000000000 Z
11
+ date: 2018-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -76,6 +76,7 @@ files:
76
76
  - ".gitignore"
77
77
  - ".rspec"
78
78
  - ".rubocop.yml"
79
+ - ".ruby-version"
79
80
  - ".travis.yml"
80
81
  - Gemfile
81
82
  - README.md
@@ -85,9 +86,12 @@ files:
85
86
  - hiroiyomi.gemspec
86
87
  - lib/hiroiyomi.rb
87
88
  - lib/hiroiyomi/html/attribute.rb
89
+ - lib/hiroiyomi/html/childable.rb
88
90
  - lib/hiroiyomi/html/document.rb
91
+ - lib/hiroiyomi/html/dom_parser.rb
92
+ - lib/hiroiyomi/html/dom_parser_helper.rb
89
93
  - lib/hiroiyomi/html/element.rb
90
- - lib/hiroiyomi/html_parser.rb
94
+ - lib/hiroiyomi/html/text.rb
91
95
  - lib/hiroiyomi/parser.rb
92
96
  - lib/hiroiyomi/root.rb
93
97
  - lib/hiroiyomi/version.rb
@@ -1,191 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'hiroiyomi/parser'
4
- require 'hiroiyomi/html/document'
5
- require 'hiroiyomi/html/element'
6
- require 'hiroiyomi/html/attribute'
7
-
8
- module Hiroiyomi
9
- # HtmlParser
10
- # rubocop:disable Metrics/ClassLength
11
- class HtmlParser
12
- include Parser
13
-
14
- private
15
-
16
- def do_parse(file)
17
- document = Html::Document.new
18
- return document if file.nil?
19
-
20
- track_element(file, document)
21
- end
22
-
23
- # ========
24
- # Extract HTML Element
25
- # ========
26
-
27
- def track_element(file, document)
28
- while (c = file.getc)
29
- break if c == '<' && extract_element(file, document)
30
- end
31
- document
32
- end
33
-
34
- def extract_element(file, document)
35
- name = extract_name(file)
36
- return false if name.empty?
37
-
38
- attributes = extract_attributes(file)
39
- element = Html::Element.new(name, attributes: attributes)
40
- content = extract_content(file, element)
41
- element.content = content unless content.empty?
42
-
43
- document.element = element if validate_closing_element?(name, file)
44
- true
45
- end
46
-
47
- # rubocop:disable Metrics/MethodLength
48
- def extract_name(file, skip_space: false)
49
- name = ''
50
- while (c = file.getc)
51
- case c
52
- when /[\w-]/
53
- name += c
54
- else
55
- next if skip_space && c =~ /\s/
56
- file.ungetc(c)
57
- break
58
- end
59
- end
60
- name
61
- end
62
-
63
- # rubocop:enable Metrics/MethodLength
64
-
65
- def extract_attributes(file)
66
- attributes = []
67
- while (attribute = extract_attribute(file))
68
- attributes.push(attribute)
69
- end
70
- attributes
71
- end
72
-
73
- # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
74
- def extract_attribute(file)
75
- name = extract_name(file, skip_space: true)
76
- return nil if name.empty?
77
-
78
- value = ''
79
- open = false
80
- while (c = file.getc)
81
- case c
82
- when '"'
83
- break if open
84
- open = true
85
- else
86
- value += c if open
87
- end
88
- end
89
-
90
- Html::Attribute.new(name, value.empty? ? nil : value)
91
- end
92
-
93
- # rubocop:enable Metrics/MethodLength, Metrics/CyclomaticComplexity
94
-
95
- # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/AbcSize
96
- def extract_content(file, document)
97
- content = ''
98
- close = false
99
-
100
- append_content = lambda { |str|
101
- content += str if close
102
- }
103
-
104
- while (c = file.getc)
105
- case c
106
- when '/'
107
- # /*<![CDATA[*/!function(e,t,r){function ... ])/*]]>*/
108
- next_c = file.getc
109
- if next_c == '*'
110
- append_content.call(c + next_c)
111
- content += extract_content_of_cddata(file)
112
- elsif !close
113
- file.ungetc(c)
114
- break
115
- end
116
- when '<'
117
- extract_element(file, document)
118
- # file.ungetc(c)
119
- # track_element(file, document)
120
- close = false
121
- when '>'
122
- close ||= true
123
- else
124
- append_content.call(c)
125
- end
126
- end
127
- content
128
- end
129
-
130
- # /*<![CDATA[*/!function(e,t,r){function ... ])/*]]>*/
131
- def extract_content_of_cddata(file)
132
- content = ''
133
- start_cddata = false
134
-
135
- append_content = lambda { |str|
136
- content += str
137
- }
138
-
139
- while (c = file.getc)
140
- case c
141
- when '/'
142
- next_c = file.getc
143
- append_content.call(c + next_c) if next_c == '*'
144
- when '*' # /*<![CDATA[*/!function(e,t,r){function ... ])/*]]>*/
145
- next_c = file.getc
146
- unless next_c == '/'
147
- file.ungetc(next_c)
148
- next_c = ''
149
- end
150
- start_cddata = !start_cddata
151
- append_content.call(c + next_c)
152
- return content unless start_cddata
153
- else
154
- append_content.call(c)
155
- end
156
- end
157
- content
158
- end
159
-
160
- # rubocop:enable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/AbcSize
161
-
162
- def validate_closing_element?(element_name, file)
163
- open = false
164
- while (c = file.getc)
165
- return !open || extract_name(file) == element_name if c == '/'
166
- open = true if c == '<'
167
- end
168
- false
169
- end
170
-
171
- # ========
172
- # Filter HTML Element
173
- # ========
174
-
175
- def do_filter(document, filter:)
176
- filter_element(document, filter, [])
177
- end
178
-
179
- def filter_element(element, filter, extracted_elements)
180
- element.each do |child|
181
- if filter&.include?(child.name)
182
- extracted_elements.push(child)
183
- else
184
- filter_element(child, filter, extracted_elements)
185
- end
186
- end
187
- extracted_elements
188
- end
189
- end
190
- # rubocop:enable Metrics/ClassLength
191
- end