scrubyt 0.2.8 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +32 -2
- data/Rakefile +25 -20
- data/lib/scrubyt.rb +24 -5
- data/lib/scrubyt/core/navigation/fetch_action.rb +76 -42
- data/lib/scrubyt/core/navigation/navigation_actions.rb +24 -6
- data/lib/scrubyt/core/scraping/filters/base_filter.rb +5 -5
- data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +2 -2
- data/lib/scrubyt/core/scraping/filters/download_filter.rb +2 -1
- data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +7 -2
- data/lib/scrubyt/core/scraping/filters/tree_filter.rb +37 -12
- data/lib/scrubyt/core/scraping/pattern.rb +82 -90
- data/lib/scrubyt/core/scraping/pre_filter_document.rb +2 -1
- data/lib/scrubyt/core/shared/evaluation_context.rb +14 -37
- data/lib/scrubyt/core/shared/extractor.rb +55 -54
- data/lib/scrubyt/logging.rb +16 -0
- data/lib/scrubyt/output/export.rb +1 -1
- data/lib/scrubyt/output/post_processor.rb +6 -5
- data/lib/scrubyt/output/result.rb +1 -0
- data/lib/scrubyt/output/result_dumper.rb +4 -3
- data/lib/scrubyt/output/result_node.rb +73 -0
- data/lib/scrubyt/output/scrubyt_result.rb +28 -0
- data/lib/scrubyt/utils/ruby_extensions.rb +8 -0
- data/lib/scrubyt/utils/simple_example_lookup.rb +14 -1
- data/lib/scrubyt/utils/xpathutils.rb +11 -0
- metadata +7 -12
- data/test/unittests/constraint_test.rb +0 -107
- data/test/unittests/extractor_test.rb +0 -91
- data/test/unittests/filter_test.rb +0 -79
- data/test/unittests/input/constraint_test.html +0 -55
- data/test/unittests/input/test.html +0 -39
- data/test/unittests/pattern_test.rb +0 -27
- data/test/unittests/simple_example_lookup_test.rb +0 -68
- data/test/unittests/xpathutils_test.rb +0 -152
@@ -2,12 +2,13 @@ module Scrubyt
|
|
2
2
|
##
|
3
3
|
#=<tt>Apply different functions on the input document</tt>
|
4
4
|
#Before the document is passed to Hpricot for parsing, we may need
|
5
|
-
#to do different stuff with it which are clumsy/not appropriate/impossible
|
5
|
+
#to do different stuff with it which are clumsy/not appropriate/impossible
|
6
6
|
#to do once the document is loaded.
|
7
7
|
class PreFilterDocument
|
8
8
|
#Replace <br/> tags with newlines
|
9
9
|
def self.br_to_newline(doc)
|
10
10
|
doc.gsub(/<br[ \/]*>/i, "\r\n")
|
11
|
+
doc = doc.tr("\240"," ")
|
11
12
|
end #end of function br_to_newline
|
12
13
|
end #end of class PreFilterDocument
|
13
14
|
end #end of module Scrubyt
|
@@ -5,7 +5,7 @@ module Scrubyt
|
|
5
5
|
#Every kind of data that is shared among patterns during the extraction process
|
6
6
|
#is held in this class, so it can be looked up anytime.
|
7
7
|
#
|
8
|
-
#This class provides also some high-level basic functionality in navigation, like
|
8
|
+
#This class provides also some high-level basic functionality in navigation, like
|
9
9
|
#crawling to new pages, attaching doucment to the root pattern once arrived at the
|
10
10
|
#desired page etc.
|
11
11
|
#
|
@@ -14,7 +14,7 @@ module Scrubyt
|
|
14
14
|
#and this is accomplished through EvaluationContext.
|
15
15
|
class EvaluationContext
|
16
16
|
attr_accessor :root_pattern, :document_index, :extractor, :uri_builder, :evaluating_extractor_definition
|
17
|
-
|
17
|
+
|
18
18
|
def initialize
|
19
19
|
@root_pattern = nil
|
20
20
|
@next_page = nil
|
@@ -22,54 +22,31 @@ module Scrubyt
|
|
22
22
|
@extractor = nil
|
23
23
|
@evaluating_extractor_definition = false
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
##
|
27
27
|
#Crawl to a new page. This function should not be called from the outside - it is automatically called
|
28
28
|
#if the next_page pattern is defined
|
29
|
-
def crawl_to_new_page(
|
30
|
-
|
31
|
-
|
29
|
+
def crawl_to_new_page(uri_builder)
|
30
|
+
#puts "Crawling to new page!"
|
31
|
+
#puts "example #{uri_builder.next_page_example}"
|
32
|
+
temp_document = uri_builder.next_page_example ?
|
33
|
+
generate_next_page_link(uri_builder) :
|
32
34
|
uri_builder.generate_next_uri
|
33
|
-
return
|
34
|
-
clear_sources_and_sinks(@root_pattern)
|
35
|
+
return false if temp_document == nil
|
35
36
|
FetchAction.restore_host_name
|
36
37
|
@extractor.fetch(temp_document)
|
37
|
-
|
38
|
+
return true
|
38
39
|
end
|
39
40
|
|
40
|
-
##
|
41
|
-
#Attach document to the root pattern; This is happening automatically as the root pattern is defined or
|
42
|
-
#crawling to a new page
|
43
|
-
def attach_current_document
|
44
|
-
doc = @extractor.get_hpricot_doc
|
45
|
-
@root_pattern.filters[0].source << doc
|
46
|
-
@root_pattern.filters[0].sink << doc
|
47
|
-
@root_pattern.last_result ||= []
|
48
|
-
@root_pattern.last_result << doc
|
49
|
-
@root_pattern.result.add_result(@root_pattern.filters[0].source,
|
50
|
-
@root_pattern.filters[0].sink)
|
51
|
-
end
|
52
|
-
|
53
|
-
##
|
54
|
-
#After crawling to the new page, the sources and sinks need to be cleaned
|
55
|
-
#since they are no more valid
|
56
|
-
def clear_sources_and_sinks(pattern)
|
57
|
-
pattern.filters.each do |filter|
|
58
|
-
filter.source = []
|
59
|
-
filter.sink = []
|
60
|
-
end
|
61
|
-
pattern.children.each {|child| clear_sources_and_sinks child}
|
62
|
-
end
|
63
|
-
|
64
41
|
def generate_next_page_link(uri_builder)
|
65
|
-
uri_builder.next_page_pattern.filters[0].generate_XPath_for_example(true)
|
42
|
+
return nil unless uri_builder.next_page_pattern.filters[0].generate_XPath_for_example(true)
|
66
43
|
xpath = uri_builder.next_page_pattern.filters[0].xpath
|
67
44
|
node = (@extractor.get_hpricot_doc/xpath).map.last
|
68
45
|
node = XPathUtils.find_nearest_node_with_attribute(node, 'href')
|
69
|
-
return nil if node == nil || node.attributes['href'] == nil
|
46
|
+
return nil if node == nil || node.attributes['href'] == nil
|
70
47
|
node.attributes['href'].gsub('&') {'&'}
|
71
|
-
end
|
72
|
-
|
48
|
+
end
|
49
|
+
|
73
50
|
def setup_uri_builder(pattern,args)
|
74
51
|
if args[0] =~ /^http.+/
|
75
52
|
args.insert(0, @extractor.get_current_doc_url) if args[1] !~ /^http.+/
|
@@ -3,49 +3,56 @@ module Scrubyt
|
|
3
3
|
#=<tt>Driving the whole extraction process</tt>
|
4
4
|
#
|
5
5
|
#Extractor is a performer class - it gets an extractor definition and carries
|
6
|
-
#out the actions and evaluates the wrappers sequentially.
|
6
|
+
#out the actions and evaluates the wrappers sequentially.
|
7
7
|
#
|
8
8
|
#Originally also the navigation actions were here, but since the class got too
|
9
9
|
#big, they were factored out to an own class, NavigationAction.
|
10
|
-
class Extractor
|
10
|
+
class Extractor
|
11
11
|
#The definition of the extractor is passed through this method
|
12
12
|
def self.define(mode=nil, &extractor_definition)
|
13
13
|
backtrace = SharedUtils.get_backtrace
|
14
14
|
parts = backtrace[1].split(':')
|
15
15
|
source_file = parts[0]
|
16
|
-
|
16
|
+
|
17
17
|
@@mode = mode
|
18
18
|
#We are keeping the relations between the detail patterns and their root patterns
|
19
19
|
@@detail_extractor_to_pattern_name = {}
|
20
|
-
@@detail_pattern_relations = {}
|
20
|
+
@@detail_pattern_relations = {}
|
21
21
|
#root pattern -> URIBuilder mapping
|
22
22
|
@@next_patterns = {}
|
23
23
|
mode_name = (mode == :production ? 'Production' : 'Learning')
|
24
|
-
|
25
|
-
|
24
|
+
|
25
|
+
Scrubyt.log :MODE, mode_name
|
26
|
+
|
26
27
|
@@evaluation_context = EvaluationContext.new
|
27
|
-
#Hack up an artificial root pattern (i.e. do not return the pattern which
|
28
|
+
#Hack up an artificial root pattern (i.e. do not return the pattern which
|
28
29
|
#is the root one in the user's definition, but rather the real (invisible)
|
29
30
|
#root pattern
|
30
31
|
@@evaluation_context.evaluating_extractor_definition = true
|
31
32
|
class_eval(&extractor_definition)
|
32
33
|
@@evaluation_context.evaluating_extractor_definition = false
|
33
34
|
root_pattern = @@evaluation_context.root_pattern
|
35
|
+
|
34
36
|
if root_pattern.nil?
|
35
|
-
|
37
|
+
# TODO: this should be an exception
|
38
|
+
Scrubyt.log :ERROR, 'No extractor defined, exiting...'
|
36
39
|
exit
|
37
40
|
end
|
41
|
+
|
38
42
|
root_pattern.source_file = source_file
|
39
43
|
root_pattern.source_proc = extractor_definition
|
40
44
|
#Once all is set up, evaluate the extractor from the root pattern!
|
41
|
-
evaluate_extractor(root_pattern)
|
42
|
-
|
43
|
-
|
45
|
+
root_results = evaluate_extractor(root_pattern)
|
46
|
+
|
47
|
+
scrubyt_result = ScrubytResult.new('root')
|
48
|
+
scrubyt_result.push(*root_results)
|
49
|
+
scrubyt_result.root_pattern = root_pattern
|
50
|
+
|
44
51
|
#Return the root pattern
|
45
|
-
|
46
|
-
|
52
|
+
Scrubyt.log :INFO, 'Extraction finished succesfully!'
|
53
|
+
scrubyt_result
|
47
54
|
end
|
48
|
-
|
55
|
+
|
49
56
|
#Evaluate a subexttractor (i.e. an extractor on a detail page).
|
50
57
|
#The url passed to this function is automatically loaded.
|
51
58
|
#The definition of the subextractor is passed as a block
|
@@ -53,119 +60,113 @@ module Scrubyt
|
|
53
60
|
#!!!! THIS CODE IS A MESS, IT needs to be refactored ASAP....
|
54
61
|
def self.evaluate_subextractor(url, parent_pattern, resolve)
|
55
62
|
if @@detail_pattern_relations.keys.include? @@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]
|
56
|
-
detail_root = @@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]]
|
57
|
-
detail_root.result = Result.new
|
63
|
+
detail_root = @@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]]
|
58
64
|
detail_root.last_result = nil
|
59
65
|
FetchAction.store_page
|
60
66
|
@@original_evaluation_context.push @@evaluation_context
|
61
67
|
@@host_stack.push FetchAction.get_host_name
|
62
68
|
@@evaluation_context = EvaluationContext.new
|
63
|
-
@@evaluation_context.clear_sources_and_sinks detail_root
|
64
69
|
FetchAction.restore_host_name
|
65
70
|
fetch url, :resolve => resolve
|
66
71
|
@@evaluation_context.extractor = self
|
67
|
-
@@evaluation_context.root_pattern = detail_root
|
68
|
-
|
69
|
-
evaluate_extractor detail_root
|
72
|
+
@@evaluation_context.root_pattern = detail_root
|
73
|
+
root_results = evaluate_extractor detail_root
|
70
74
|
@@evaluation_context = @@original_evaluation_context.pop
|
71
75
|
FetchAction.restore_page
|
72
76
|
FetchAction.store_host_name(@@host_stack.pop)
|
73
|
-
|
74
|
-
else
|
77
|
+
root_results
|
78
|
+
else
|
75
79
|
@@original_evaluation_context ||= []
|
76
80
|
@@host_stack ||= []
|
77
81
|
FetchAction.store_page
|
78
82
|
@@original_evaluation_context.push @@evaluation_context
|
79
83
|
@@host_stack.push FetchAction.get_host_name
|
80
84
|
@@evaluation_context = EvaluationContext.new
|
81
|
-
FetchAction.restore_host_name
|
85
|
+
FetchAction.restore_host_name
|
82
86
|
fetch url, :resolve => resolve
|
83
|
-
|
84
|
-
root_pattern =
|
85
|
-
@@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]] = root_pattern
|
86
|
-
evaluate_extractor(root_pattern)
|
87
|
-
#Apply all postprocess steps
|
88
|
-
PostProcessor.apply_post_processing(root_pattern)
|
87
|
+
class_eval(&parent_pattern.referenced_extractor)
|
88
|
+
root_pattern = @@evaluation_context.root_pattern
|
89
|
+
@@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]] = root_pattern
|
90
|
+
root_results = evaluate_extractor(root_pattern)
|
89
91
|
@@evaluation_context = @@original_evaluation_context.pop
|
90
92
|
FetchAction.restore_page
|
91
93
|
FetchAction.store_host_name(@@host_stack.pop)
|
92
|
-
|
94
|
+
root_results
|
93
95
|
end
|
94
96
|
end
|
95
|
-
|
96
|
-
#build the current wrapper
|
97
|
+
|
98
|
+
#build the current wrapper
|
97
99
|
def self.method_missing(method_name, *args, &block)
|
98
100
|
if NavigationActions::KEYWORDS.include? method_name.to_s
|
99
101
|
NavigationActions.send(method_name, *args)
|
100
102
|
return
|
101
103
|
end
|
104
|
+
|
102
105
|
if method_name.to_s == 'next_page'
|
103
106
|
pattern = Scrubyt::Pattern.new(method_name.to_s, args, @@evaluation_context)
|
104
107
|
pattern.evaluation_context = @@evaluation_context
|
105
|
-
|
108
|
+
|
106
109
|
@@evaluation_context.setup_uri_builder(pattern, args)
|
107
110
|
@@next_patterns[@@last_root_pattern] = @@evaluation_context.uri_builder
|
108
111
|
else
|
109
112
|
raise "Only one root pattern allowed" if !@@evaluation_context.root_pattern.nil?
|
110
113
|
#Create a root pattern
|
111
|
-
|
114
|
+
@@evaluation_context.extractor = self
|
115
|
+
root_pattern = Scrubyt::Pattern.new(method_name.to_s, args, @@evaluation_context, root_pattern, &block)
|
112
116
|
@@last_root_pattern = root_pattern
|
113
117
|
@@evaluation_context.root_pattern = root_pattern
|
114
|
-
|
115
|
-
#add the currently active document to the root pattern
|
116
|
-
@@evaluation_context.attach_current_document
|
117
|
-
pattern = Scrubyt::Pattern.new(method_name.to_s, args, @@evaluation_context, root_pattern, &block)
|
118
|
-
root_pattern.children << pattern
|
119
|
-
pattern
|
118
|
+
root_pattern
|
120
119
|
end
|
121
120
|
end
|
122
|
-
|
121
|
+
|
123
122
|
def self.add_detail_extractor_to_pattern_name(referenced_extractor, pattern)
|
124
123
|
@@detail_extractor_to_pattern_name[referenced_extractor] ||= [] << pattern
|
125
124
|
end
|
126
125
|
|
127
126
|
def self.get_detail_extractor(parent_pattern)
|
128
|
-
@@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]]
|
127
|
+
@@detail_pattern_relations[@@detail_extractor_to_pattern_name[parent_pattern.referenced_extractor]]
|
129
128
|
end
|
130
129
|
|
131
130
|
def self.get_hpricot_doc
|
132
131
|
NavigationActions.get_hpricot_doc
|
133
132
|
end
|
134
|
-
|
133
|
+
|
135
134
|
def self.get_current_doc_url
|
136
135
|
NavigationActions.get_current_doc_url
|
137
136
|
end
|
138
|
-
|
137
|
+
|
139
138
|
def self.get_detail_pattern_relations
|
140
139
|
@@detail_pattern_relations
|
141
140
|
end
|
142
|
-
|
141
|
+
|
143
142
|
def self.get_host_name
|
144
143
|
NavigationActions.get_host_name
|
145
144
|
end
|
146
|
-
|
145
|
+
|
147
146
|
def self.get_mode
|
148
147
|
@@mode
|
149
148
|
end
|
150
|
-
|
149
|
+
|
151
150
|
def self.get_original_host_name
|
152
151
|
@@original_host_name
|
153
152
|
end
|
154
|
-
|
153
|
+
|
155
154
|
private
|
156
|
-
|
155
|
+
|
157
156
|
def self.evaluate_extractor(root_pattern)
|
157
|
+
root_results = []
|
158
158
|
if @@next_patterns[root_pattern]
|
159
159
|
current_page_count = 1
|
160
160
|
loop do
|
161
|
-
root_pattern.evaluate(nil)
|
162
|
-
break if (@@next_patterns[root_pattern].limit == current_page_count || !@@evaluation_context.crawl_to_new_page(
|
161
|
+
root_results.push(*root_pattern.evaluate(get_hpricot_doc, nil))
|
162
|
+
break if (@@next_patterns[root_pattern].limit == current_page_count || !@@evaluation_context.crawl_to_new_page(@@next_patterns[root_pattern]))
|
163
163
|
current_page_count += 1 if @@next_patterns[root_pattern].limit != nil
|
164
164
|
end
|
165
165
|
else
|
166
|
-
root_pattern.evaluate(nil)
|
166
|
+
root_results.push(*root_pattern.evaluate(get_hpricot_doc, nil))
|
167
167
|
end
|
168
|
+
root_results
|
168
169
|
end
|
169
|
-
|
170
|
+
|
170
171
|
end #end of class Extractor
|
171
|
-
end #end of module Scrubyt
|
172
|
+
end #end of module Scrubyt
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Scrubyt
|
2
|
+
def self.log(message_type, message)
|
3
|
+
|
4
|
+
pre = "[#{message_type}] "
|
5
|
+
|
6
|
+
if message.is_a? Array
|
7
|
+
puts pre + message.first
|
8
|
+
message[1..-1].each do |line|
|
9
|
+
puts ' ' * pre.length + line
|
10
|
+
end
|
11
|
+
else
|
12
|
+
puts pre + message.to_s
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
module Scrubyt
|
2
2
|
|
3
|
+
########################################## NOT USED ANY MORE ##########################################
|
3
4
|
require 'set'
|
4
5
|
##
|
5
6
|
#=<tt>Post processing results after the extraction</tt>
|
@@ -46,11 +47,11 @@ require 'set'
|
|
46
47
|
def self.report_if_no_results(root_pattern)
|
47
48
|
results_found = false
|
48
49
|
root_pattern.children.each {|child| return if (child.result.childmap.size > 0)}
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
50
|
+
|
51
|
+
Scrubyt.log :WARNING, [
|
52
|
+
"The extractor did not find any result instances. Most probably this is wrong.",
|
53
|
+
"Check your extractor and if you are sure it should work, report a bug!"
|
54
|
+
]
|
54
55
|
end
|
55
56
|
|
56
57
|
private
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'rexml/document'
|
2
2
|
require 'rexml/xpath'
|
3
3
|
|
4
|
+
########################################## NOT USED ANY MORE ##########################################
|
4
5
|
module Scrubyt
|
5
6
|
##
|
6
7
|
#=<tt>Dumping the result in various formats and providing statistics on the results</tt>
|
@@ -45,7 +46,7 @@ module Scrubyt
|
|
45
46
|
flat_csv_inner = lambda {|e, parts|
|
46
47
|
content = e.text || ''
|
47
48
|
parts << content if ((e.is_a? REXML::Element) && content != '')
|
48
|
-
e.children.each {|c|
|
49
|
+
e.children.each {|c| flat_csv_inner.call(c, parts) if c.is_a? REXML::Element }
|
49
50
|
parts
|
50
51
|
}
|
51
52
|
to_xml(pattern).root.elements['/root'].each {|e| result << flat_csv_inner.call(e, []) }
|
@@ -55,7 +56,7 @@ module Scrubyt
|
|
55
56
|
def self.to_hash(pattern)
|
56
57
|
result = []
|
57
58
|
flat_hash_inner = lambda {|e, parts|
|
58
|
-
content = e.text
|
59
|
+
content = e.text ? REXML::Text.unnormalize(e.text) : ''
|
59
60
|
if ((e.is_a? REXML::Element) && content != '')
|
60
61
|
if parts[e.local_name]
|
61
62
|
parts[e.local_name] = parts[e.local_name] + "," + content
|
@@ -141,7 +142,7 @@ private
|
|
141
142
|
end
|
142
143
|
else
|
143
144
|
count = REXML::XPath.match(@@last_doc, "//#{pattern.name}").size
|
144
|
-
|
145
|
+
Scrubyt.log :INFO, (' ' * depth.to_i) + "#{pattern.name} extracted #{count} instances."
|
145
146
|
end
|
146
147
|
end
|
147
148
|
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Scrubyt
|
2
|
+
class ResultNode < Array
|
3
|
+
OUTPUT_OPTIONS = [:write_text]
|
4
|
+
|
5
|
+
attr_accessor :name, :result, :options, :generated_by_leaf
|
6
|
+
|
7
|
+
def initialize(name, result=nil, options={})
|
8
|
+
@name = name
|
9
|
+
@result = result
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
def write_text
|
14
|
+
@options[:write_text].nil? ? @generated_by_leaf : @options[:write_text]
|
15
|
+
end
|
16
|
+
|
17
|
+
def has_content?
|
18
|
+
return true if result.is_a? String
|
19
|
+
write_text || (inject(false) { |one_child_has_content, child| one_child_has_content || child.has_content? })
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s
|
23
|
+
text = (@result.is_a? String) ? @result : @result.inner_text
|
24
|
+
text = SharedUtils.unescape_entities(text)
|
25
|
+
text.strip!
|
26
|
+
text
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_libxml
|
30
|
+
libxml_node = XML::Node.new(name)
|
31
|
+
self.each { |child| libxml_node << child.to_libxml if child.has_content? }
|
32
|
+
libxml_node << to_s if write_text
|
33
|
+
libxml_node
|
34
|
+
end
|
35
|
+
|
36
|
+
#note: see ruby_extensions.rb for String#write
|
37
|
+
def to_xml
|
38
|
+
to_xml_lines.join("\n")
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_hash
|
42
|
+
result = []
|
43
|
+
flat_hash_inner = lambda {|e, hash|
|
44
|
+
hash[e.name.to_sym] = hash[e.name.to_sym] ? hash[e.name.to_sym] + "," + e.to_s : e.to_s if e.write_text && !e.to_s.empty?
|
45
|
+
e.each {|c| flat_hash_inner.call(c, hash) }
|
46
|
+
hash
|
47
|
+
}
|
48
|
+
self.each {|e| result << flat_hash_inner.call(e, {}) }
|
49
|
+
result
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_xml_lines
|
53
|
+
lines = []
|
54
|
+
children = self.select{ |child| child.has_content? }
|
55
|
+
if children.empty?
|
56
|
+
if result.is_a? String
|
57
|
+
lines << "<#{name}>#{result}</#{name}>"
|
58
|
+
elsif write_text && !to_s.empty?
|
59
|
+
lines << "<#{name}>#{ERB::Util.html_escape(to_s)}</#{name}>"
|
60
|
+
else
|
61
|
+
lines << "<#{name}/>"
|
62
|
+
end
|
63
|
+
else
|
64
|
+
lines << "<#{name}>"
|
65
|
+
lines << " #{ERB::Util.html_escape(to_s)}" if write_text && !to_s.empty?
|
66
|
+
children.each do |child|
|
67
|
+
lines.push(*child.to_xml_lines.map{ |line| " #{line}" })
|
68
|
+
end
|
69
|
+
lines << "</#{name}>"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|