tax_generator 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/data/input/.gitignore +1 -1
- data/data/input/destinations.xml +2168 -22
- data/data/input/taxonomy.xml +76 -0
- data/lib/tax_generator/classes/processor.rb +9 -4
- data/lib/tax_generator/classes/taxonomy_node.rb +16 -0
- data/lib/tax_generator/classes/taxonomy_tree.rb +25 -9
- data/lib/tax_generator/version.rb +1 -1
- metadata +1 -1
data/data/input/taxonomy.xml
CHANGED
@@ -75,4 +75,80 @@
|
|
75
75
|
</node>
|
76
76
|
</node>
|
77
77
|
</taxonomy>
|
78
|
+
|
79
|
+
<taxonomy>
|
80
|
+
<taxonomy_name>World2</taxonomy_name>
|
81
|
+
<node atlas_node_id = "3550641" ethyl_content_object_id="82534" geo_id = "355064">
|
82
|
+
<node_name>Africa</node_name>
|
83
|
+
<node atlas_node_id = "3556111" ethyl_content_object_id="3210" geo_id = "355611">
|
84
|
+
<node_name>South Africa</node_name>
|
85
|
+
<node atlas_node_id = "3556121" ethyl_content_object_id="35474" geo_id = "355612">
|
86
|
+
<node_name>Cape Town</node_name>
|
87
|
+
<node atlas_node_id = "3556131" ethyl_content_object_id="" geo_id = "355613">
|
88
|
+
<node_name>Table Mountain National Park</node_name>
|
89
|
+
</node>
|
90
|
+
</node>
|
91
|
+
<node atlas_node_id = "3556141" ethyl_content_object_id="" geo_id = "355614">
|
92
|
+
<node_name>Free State</node_name>
|
93
|
+
<node atlas_node_id = "3556151" ethyl_content_object_id="1000550692" geo_id = "355615">
|
94
|
+
<node_name>Bloemfontein</node_name>
|
95
|
+
</node>
|
96
|
+
</node>
|
97
|
+
<node atlas_node_id = "3556161" ethyl_content_object_id="" geo_id = "355616">
|
98
|
+
<node_name>Gauteng</node_name>
|
99
|
+
<node atlas_node_id = "3556171" ethyl_content_object_id="37710" geo_id = "355617">
|
100
|
+
<node_name>Johannesburg</node_name>
|
101
|
+
</node>
|
102
|
+
<node atlas_node_id = "3556181" ethyl_content_object_id="1000548256" geo_id = "355618">
|
103
|
+
<node_name>Pretoria</node_name>
|
104
|
+
</node>
|
105
|
+
</node>
|
106
|
+
<node atlas_node_id = "3556191" ethyl_content_object_id="" geo_id = "355619">
|
107
|
+
<node_name>KwaZulu-Natal</node_name>
|
108
|
+
<node atlas_node_id = "3556201" ethyl_content_object_id="43725" geo_id = "355620">
|
109
|
+
<node_name>Durban</node_name>
|
110
|
+
</node>
|
111
|
+
<node atlas_node_id = "3556211" ethyl_content_object_id="1000576780" geo_id = "355621">
|
112
|
+
<node_name>Pietermaritzburg</node_name>
|
113
|
+
</node>
|
114
|
+
</node>
|
115
|
+
<node atlas_node_id = "3556221" ethyl_content_object_id="" geo_id = "355622">
|
116
|
+
<node_name>Mpumalanga</node_name>
|
117
|
+
<node atlas_node_id = "3556231" ethyl_content_object_id="67561" geo_id = "355623">
|
118
|
+
<node_name>Kruger National Park</node_name>
|
119
|
+
</node>
|
120
|
+
</node>
|
121
|
+
<node atlas_node_id = "3556241" ethyl_content_object_id="" geo_id = "355624">
|
122
|
+
<node_name>The Drakensberg</node_name>
|
123
|
+
<node atlas_node_id = "3556251" ethyl_content_object_id="" geo_id = "355625">
|
124
|
+
<node_name>Royal Natal National Park</node_name>
|
125
|
+
</node>
|
126
|
+
</node>
|
127
|
+
<node atlas_node_id = "3556261" ethyl_content_object_id="" geo_id = "355626">
|
128
|
+
<node_name>The Garden Route</node_name>
|
129
|
+
<node atlas_node_id = "3556271" ethyl_content_object_id="" geo_id = "355627">
|
130
|
+
<node_name>Oudtshoorn</node_name>
|
131
|
+
</node>
|
132
|
+
<node atlas_node_id = "3556281" ethyl_content_object_id="" geo_id = "355628">
|
133
|
+
<node_name>Tsitsikamma Coastal National Park</node_name>
|
134
|
+
</node>
|
135
|
+
</node>
|
136
|
+
</node>
|
137
|
+
<node atlas_node_id = "3556291" ethyl_content_object_id="3263" geo_id = "355629">
|
138
|
+
<node_name>Sudan</node_name>
|
139
|
+
<node atlas_node_id = "3556301" ethyl_content_object_id="" geo_id = "355630">
|
140
|
+
<node_name>Eastern Sudan</node_name>
|
141
|
+
<node atlas_node_id = "3556311" ethyl_content_object_id="" geo_id = "355631">
|
142
|
+
<node_name>Port Sudan</node_name>
|
143
|
+
</node>
|
144
|
+
</node>
|
145
|
+
<node atlas_node_id = "3556321" ethyl_content_object_id="" geo_id = "355632">
|
146
|
+
<node_name>Khartoum</node_name>
|
147
|
+
</node>
|
148
|
+
</node>
|
149
|
+
<node atlas_node_id = "3556331" ethyl_content_object_id="3272" geo_id = "355633">
|
150
|
+
<node_name>Swaziland</node_name>
|
151
|
+
</node>
|
152
|
+
</node>
|
153
|
+
</taxonomy>
|
78
154
|
</taxonomies>
|
@@ -178,11 +178,16 @@ module TaxGenerator
|
|
178
178
|
#
|
179
179
|
# @api public
|
180
180
|
def fetch_file_jobs
|
181
|
-
jobs = [
|
182
|
-
|
183
|
-
|
184
|
-
|
181
|
+
jobs = []
|
182
|
+
count = 0
|
183
|
+
@taxonomy.document.xpath('.//taxonomy').pmap do |taxonomy_node|
|
184
|
+
count += 1
|
185
|
+
jobs << { atlas_id: count, taxonomy: @taxonomy, destination: nil, output_folder: output_folder }
|
185
186
|
end
|
187
|
+
nokogiri_xml(destinations_file_path).xpath('//destination').pmap do |destination|
|
188
|
+
atlas_id = destination.attributes['atlas_id']
|
189
|
+
jobs << { atlas_id: atlas_id.value, taxonomy: @taxonomy, destination: destination, output_folder: output_folder }
|
190
|
+
end
|
186
191
|
jobs
|
187
192
|
end
|
188
193
|
|
@@ -1,6 +1,15 @@
|
|
1
1
|
module TaxGenerator
|
2
2
|
# node from the tree
|
3
3
|
class TaxonomyNode < Tree::TreeNode
|
4
|
+
# prints the entire tree with name and content
|
5
|
+
#
|
6
|
+
# @param [Integer] level the level of the current node, Default 0
|
7
|
+
# @param [Integer] max_depth the maximum depth the tree must be printed. Default nil
|
8
|
+
# @param [Lambda] block the lambda that will be executed for printing node name and content
|
9
|
+
#
|
10
|
+
# @return [String]
|
11
|
+
#
|
12
|
+
# @api public
|
4
13
|
def print_tree(level = 0, max_depth = nil, block = ->(node, prefix) { puts "#{prefix} #{node.respond_to?(:name) ? node.name : node}" })
|
5
14
|
prefix = fetch_prefix_for_printing(level)
|
6
15
|
block.call("#{name}---#{content}", prefix)
|
@@ -9,6 +18,13 @@ module TaxGenerator
|
|
9
18
|
children { |child| child.print_tree(level + 1, max_depth, block) if child } # Child might be 'nil'
|
10
19
|
end
|
11
20
|
|
21
|
+
# builds up the prefix needed to display for current node
|
22
|
+
#
|
23
|
+
# @param [Integer] level the level of the current node
|
24
|
+
#
|
25
|
+
# @return [String]
|
26
|
+
#
|
27
|
+
# @api public
|
12
28
|
def fetch_prefix_for_printing(level)
|
13
29
|
prefix = ''
|
14
30
|
if is_root?
|
@@ -13,7 +13,7 @@ module TaxGenerator
|
|
13
13
|
|
14
14
|
# receives a file path that will be parsed and used to build the tree
|
15
15
|
# @see Tree::TreeNode#new
|
16
|
-
# @see #
|
16
|
+
# @see #find_taxonomies
|
17
17
|
#
|
18
18
|
# @param [String] file_path the path to the xml file that will be parsed and used to build the tree
|
19
19
|
#
|
@@ -22,10 +22,25 @@ module TaxGenerator
|
|
22
22
|
# @api public
|
23
23
|
def initialize(file_path)
|
24
24
|
@document = nokogiri_xml(file_path)
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
@root_node = TaxGenerator::TaxonomyNode.new('ROOT', 'ROOT')
|
26
|
+
find_taxonomies
|
27
|
+
end
|
28
|
+
|
29
|
+
# searches all the taxonomy elements in the document and adds them as top level nodes
|
30
|
+
# and then calls method add_node to search inside childrens
|
31
|
+
# @see #insert_node
|
32
|
+
# @see #add_node
|
33
|
+
#
|
34
|
+
# @return [void]
|
35
|
+
#
|
36
|
+
# @api public
|
37
|
+
def find_taxonomies
|
38
|
+
count = 0
|
39
|
+
@document.xpath('.//taxonomy').pmap do |taxonomy_node|
|
40
|
+
count += 1
|
41
|
+
taxonomy_name = taxonomy_node.at_xpath('.//taxonomy_name')
|
42
|
+
tax_node = insert_node(count.to_s, taxonomy_name.content, @root_node)
|
43
|
+
add_node(taxonomy_node, tax_node, skip_add: true)
|
29
44
|
end
|
30
45
|
end
|
31
46
|
|
@@ -60,7 +75,8 @@ module TaxGenerator
|
|
60
75
|
def add_taxonomy_node(taxonomy_node, node)
|
61
76
|
atlas_node_id = taxonomy_node.attributes['atlas_node_id']
|
62
77
|
node_name = taxonomy_node.children.find { |child| child.name == 'node_name' }
|
63
|
-
|
78
|
+
return if atlas_node_id.blank? || node_name.blank?
|
79
|
+
insert_node(atlas_node_id.value, node_name.content, node)
|
64
80
|
end
|
65
81
|
|
66
82
|
# inserts a new node in the tree by checking first if atlas_id and node_name are present
|
@@ -76,7 +92,7 @@ module TaxGenerator
|
|
76
92
|
# @api public
|
77
93
|
def insert_node(atlas_node_id, node_name, node)
|
78
94
|
return if atlas_node_id.blank? || node_name.blank?
|
79
|
-
current_node = TaxGenerator::TaxonomyNode.new(atlas_node_id
|
95
|
+
current_node = TaxGenerator::TaxonomyNode.new(atlas_node_id, node_name)
|
80
96
|
node << current_node
|
81
97
|
current_node
|
82
98
|
end
|
@@ -91,8 +107,8 @@ module TaxGenerator
|
|
91
107
|
# @return [void]
|
92
108
|
#
|
93
109
|
# @api public
|
94
|
-
def add_node(taxonomy_node, node)
|
95
|
-
tax_node = add_taxonomy_node(taxonomy_node, node)
|
110
|
+
def add_node(taxonomy_node, node, options = {})
|
111
|
+
tax_node = options[:skip_add].present? ? node : add_taxonomy_node(taxonomy_node, node)
|
96
112
|
return unless taxonomy_node.children.any?
|
97
113
|
taxonomy_node.xpath('./node').pmap do |child_node|
|
98
114
|
add_node(child_node, tax_node) if tax_node.present?
|